Got chunked uploading kinda working
continuous-integration/drone/push Build is failing Details

First pass of FE chunked uploading (theres a 1 byte less per chunk per file, no idea why)
More formatting!
This commit is contained in:
Jordi Loyzaga 2024-09-18 20:15:18 -06:00
parent cc46df81fe
commit eeaa1805bf
10 changed files with 182 additions and 116 deletions

View File

@ -39,14 +39,14 @@ CONFIG_KEYS = {
"verbose_name": "Max per chunk size in bytes", "verbose_name": "Max per chunk size in bytes",
"native_type": int, "native_type": int,
"sensitive": False, "sensitive": False,
"default": 1024 * 1024 * 1, # 20 MB "default": 1024 * 1024 * 2, # 2 MB
}, },
"MAX_FILE_BYTES": { "MAX_FILE_BYTES": {
"description": "Max total file size in bytes", "description": "Max total file size in bytes",
"verbose_name": "Max upload size in bytes", "verbose_name": "Max upload size in bytes",
"native_type": int, "native_type": int,
"sensitive": False, "sensitive": False,
"default": 1024 * 1024 * 30, # 200 MB "default": 1024 * 1024 * 30, # 300 MB
}, },
"ENABLE_BROWSABLE_API": { "ENABLE_BROWSABLE_API": {
"description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)", "description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)",

View File

@ -33,7 +33,9 @@ def cast_to_native_type(key, value, native_type):
try: try:
return native_type(value) return native_type(value)
except ValueError as e: except ValueError as e:
message = f"Received unexpected value type for configuration key {key}\nValue: {value}\nExpected type : {native_type}" message = f"Received unexpected value type for configuration key {key}\n\
Value: {value}\n\
Expected type : {native_type}"
raise ValueError(message) from e raise ValueError(message) from e

View File

@ -75,8 +75,6 @@ TEMPLATES = [
WSGI_APPLICATION = "lockbox.wsgi.application" WSGI_APPLICATION = "lockbox.wsgi.application"
# Password validation # Password validation
# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [ AUTH_PASSWORD_VALIDATORS = [
{ {
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator", "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",

View File

@ -1,11 +1,15 @@
from pathlib import Path from pathlib import Path
# TODO: LOG MEEEEE
# TODO: Figure out file owner in system, permissions, GUID
# Whats the default path if not provided? // docker volume
def validate_paths(media_path): def validate_paths(media_path):
"""Validates media path and validates that we can actually write to that location
Args:
media_path (str): Absolute-path-like string where files are stored
Raises:
e: Any exception that might happen (Permission Denied, Path does not exist, etc.)
"""
try: try:
Path(media_path).mkdir(exist_ok=True) Path(media_path).mkdir(exist_ok=True)
except Exception as e: except Exception as e:

View File

@ -1,65 +1,95 @@
const fileInput = document.getElementById('file-upload'); const fileInput = document.getElementById("file-upload");
const upload_ready = false; const uploadButton = document.getElementById("upload-button");
const fileSizeReport = document.getElementById("file-size");
const progressBar = document.getElementById("progressBar");
fileInput.value = '';
fileInput.addEventListener('change', handleFileChange); fileInput.addEventListener('change', handleFileChange);
uploadButton.addEventListener('click', handleFileUpload);
function handleFileChange(event) { function handleFileChange(event) {
const file = event.target.files[0]; const file = event.target.files[0];
const file_size = file.size; const file_size = file.size;
fileSizeReport.textContent = "File size is: " + file.size;
console.log("Max file bytes is : ", max_file_bytes);
console.log("File size is: ", file_size);
if (file_size > max_file_bytes){ if (file_size > max_file_bytes){
console.log("PLACEHOLDER: Size too big man."); console.log("File size is too large");
// Handle this.
return return
} }
console.log("Ready!");
console.log("PLACEHOLDER: Ready!");
handleFileUpload(file);
} }
function handleFileUpload(file) { async function handleFileUpload(event) {
let start = 0; let headers = new Headers();
let end = 0; headers.append("Content-Type", "application/json");
let chunk; const request_args = {
let file_id = null; method: "POST",
headers: headers,
while (start < file.size) { body: JSON.stringify(
chunk = file.slice(start, start + chunk_size); {
end = chunk.size - start; "expected_size": fileInput.files[0].size
console.log("LID: ", file_id);
file_id = uploadChunk(chunk, start, end, file.size, file_id);
start += chunk_size;
} }
)
};
const response = await fetch(uploadPath, request_args);
if (!response.ok) {
throw new Error(`Response status: ${response.status}`);
}
const file = await response.json();
await uploadChunks(file);
} }
function uploadChunk(chunk, start, end, total, file_id=null) { function updateProgressBar(remaining, total) {
const formData = new FormData(); let current_percent = Math.round((total - remaining) / (total / 100));
const range_header = `bytes ${start}-${end}/${total}`; progressBar.textContent = current_percent;
formData.append('file', chunk);
if (file_id) {
formData.append("lid", file_id);
} }
let request = new Request(".", { async function uploadChunks(remoteFile){
method: 'POST', const chunkPath = chunkPathTemplate.replace("@", remoteFile.lid);
body: formData, let file = fileInput.files[0];
headers: {
'X-CSRFToken': csrftoken, let bytes_remaining = remoteFile.expected_size
'Content-range': range_header let last_transfer_position = 0;
} let to_transfer = remoteFile.max_size_chunk_bytes;
}) console.log("Chunk size is: " + remoteFile.max_size_chunk_bytes);
return _uploadChunk(request);
while (bytes_remaining >= 0) {
updateProgressBar(bytes_remaining, remoteFile.expected_size);
if (bytes_remaining <= remoteFile.max_size_chunk_bytes) {
to_transfer = bytes_remaining;
bytes_remaining = 0;
} }
async function _uploadChunk(request) { await uploadChunk(file, [last_transfer_position, last_transfer_position += to_transfer], chunkPath);
const _response = await fetch(request) last_transfer_position += 1;
.then(async (response)=>response.json()) bytes_remaining -= to_transfer;
.then((data) =>{ }
return data.lid; console.log("Done!")
}) progressBar.textContent = 100;
return _response;
}
async function uploadChunk(file, byte_range, chunkPath) {
console.log(byte_range);
let file_bytes_target = file.slice(byte_range[0], byte_range[1]);
let body = new FormData();
body.append("Content", file_bytes_target);
let headers = new Headers();
headers.append("Content-Disposition", 'attachment; filename="DUMMY"');
headers.append("Content-Range", "bytes " + byte_range[0] + "-" + byte_range[1])
const request_args = {
method: "PUT",
headers: headers,
body: body
};
const response = await fetch(chunkPath, request_args);
if (!response.ok) {
throw new Error(`Response status: ${response.status}`);
}
} }

View File

@ -1,4 +1,4 @@
# Generated by Django 4.2.15 on 2024-09-17 06:52 # Generated by Django 4.2.15 on 2024-09-17 19:51
import common.utils import common.utils
from django.conf import settings from django.conf import settings
@ -24,13 +24,13 @@ class Migration(migrations.Migration):
('date_created', models.DateTimeField(blank=True, help_text='date at which this object was created', verbose_name='date created')), ('date_created', models.DateTimeField(blank=True, help_text='date at which this object was created', verbose_name='date created')),
('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')), ('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')),
('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')), ('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')),
('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models.upload_to_fielpath, verbose_name='file')), ('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models._upload_to_fielpath, verbose_name='file')),
('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')), ('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')),
('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')), ('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')),
('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')), ('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')),
('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')), ('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')),
('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')), ('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')),
('expected_size', models.PositiveBigIntegerField(blank=True, help_text='expected file size', null=True, verbose_name='expected size (bytes)')), ('expected_size', models.PositiveBigIntegerField(help_text='expected file size', verbose_name='expected size (bytes)')),
('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')), ('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')),
('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')), ('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')),
('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')), ('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')),

View File

@ -6,6 +6,7 @@ from common.constants import UPLOAD_ERROR_CODES, UPLOAD_STATUS_TYPES
from common.models import LockboxBase from common.models import LockboxBase
from common.utils import get_config, get_max_size_chunk_bytes from common.utils import get_config, get_max_size_chunk_bytes
from django.conf import settings from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import models, transaction from django.db import models, transaction
from django.utils import timezone from django.utils import timezone
from django.utils.translation import gettext_lazy as _ from django.utils.translation import gettext_lazy as _
@ -18,7 +19,7 @@ class UploadError(Exception):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
def upload_to_fielpath(instance, filename): def _upload_to_fielpath(instance, filename):
return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}") return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}")
@ -36,7 +37,7 @@ class File(LockboxBase):
blank=True, blank=True,
verbose_name=_("file"), verbose_name=_("file"),
help_text=_("actual file"), help_text=_("actual file"),
upload_to=upload_to_fielpath, upload_to=_upload_to_fielpath,
) )
UPLOAD_CHOICES = ( UPLOAD_CHOICES = (
@ -97,8 +98,8 @@ class File(LockboxBase):
) )
expected_size = models.PositiveBigIntegerField( expected_size = models.PositiveBigIntegerField(
null=True, null=False,
blank=True, blank=False,
verbose_name=_("expected size (bytes)"), verbose_name=_("expected size (bytes)"),
help_text=_("expected file size"), help_text=_("expected file size"),
) )
@ -143,20 +144,20 @@ class File(LockboxBase):
@property @property
def md5(self): def md5(self):
if self.exists: if self.exists:
return md5(open(self.file, "rb").read()).hexdigest() self.file.open("rb")
md5_hash = md5(self.file.read()).hexdigest()
self.file.close()
return md5_hash
return None return None
@property @property
def abandoned(self): def abandoned(self):
return self.date_created <= timezone.now() + timedelta( return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
minutes=get_config("ABANDONED_DELTA_MINUTES")
)
@classmethod @classmethod
def abandoned_condition(): def abandoned_condition():
return models.Q( return models.Q(
date_created__lte=timezone.now() date_created__lte=timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
+ timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
) )
@property @property
@ -174,6 +175,16 @@ class File(LockboxBase):
return Path(self.file.path).is_file() return Path(self.file.path).is_file()
def append_chunk(self, chunk_file, chunk_data): def append_chunk(self, chunk_file, chunk_data):
"""Append chunks to a file
Args:
chunk_file (UploadedFile): Django provided abstraction that contains the actual file in memory
chunk_data (Dict): Additional chunk parameters: start_bytes, end_bytes, size
Raises:
UploadError: The file you are trying to append to is not found in FS
UploadError: Expected last_end_bytes + 1 but got a different number
"""
# Override in case recently abandoned # Override in case recently abandoned
# Will persist if it does not error out. # Will persist if it does not error out.
self.status = UPLOAD_STATUS_TYPES.UPLOADING self.status = UPLOAD_STATUS_TYPES.UPLOADING
@ -192,24 +203,32 @@ class File(LockboxBase):
if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]: if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]:
# Client screwed up, this is not where we left # Client screwed up, this is not where we left
raise UploadError( raise UploadError(
"Mismatch in expected chunk", code=UPLOAD_ERROR_CODES.CHUNK_MISMATCH "Mismatch in expected chunk",
code=UPLOAD_ERROR_CODES.CHUNK_MISMATCH
) )
self.last_end_bytes = chunk_data["end_bytes"] self.last_end_bytes = chunk_data["end_bytes"]
if self.expected_size == self.last_end_bytes:
# File is one shot chunk.
if chunk_data["start_bytes"] == 0: if chunk_data["start_bytes"] == 0:
self.file = chunk_file self.file = chunk_file
self.save() self.save()
else:
chunk_file.open("rb")
self.file.open("ab")
self.file.write(chunk_file.read())
self.file.close()
self.save()
if self.expected_size == self.last_end_bytes:
self.save()
self.finalize() self.finalize()
return return
# This is an n + 1 chunk.
print("Appending bytes yo")
chunk_file.seek(0)
self.save()
def finalize(self): def finalize(self):
"""Finalizes the file
Sets file status to 'completed'
Sets datetime_completed to now
Renames file from file.extention.incomplete to file.extention
"""
self.refresh_from_db() self.refresh_from_db()
self.status = UPLOAD_STATUS_TYPES.COMPLETED self.status = UPLOAD_STATUS_TYPES.COMPLETED
self.datetime_completed = timezone.now() self.datetime_completed = timezone.now()
@ -223,31 +242,17 @@ class File(LockboxBase):
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if not self.max_size_chunk_bytes: if not self.max_size_chunk_bytes:
self.max_size_chunk_bytes = get_max_size_chunk_bytes() self.max_size_chunk_bytes = get_max_size_chunk_bytes()
if self.expected_size > get_config("MAX_FILE_BYTES"):
raise ValidationError(f"Expected size: {self.expected_size} > than config MAX_SIZE_BYTES")
return super().save(*args, **kwargs) return super().save(*args, **kwargs)
def delete(self, *args, **kwargs): def delete(self, *args, **kwargs):
with transaction.atomic(): with transaction.atomic():
if self.file: if self.file:
if Path(self.file.path).is_file(): if Path(self.file.path).is_file():
self.file.storage.delete(self.file.path) self.file.storage.delete(self.file.path)
# Delete containing directory (UUID)
self.file.storage.delete(Path(self.file.path).parent) self.file.storage.delete(Path(self.file.path).parent)
result = super().delete(*args, **kwargs) result = super().delete(*args, **kwargs)
return result return result
# class FileShare(LockboxBase):
# file = models.ForeignKey(
# "storage.File",
# null=False,
# blank=False,
# on_delete=models.CASCADE,
# related_name="shares",
# )
# def __str__(self):
# return self.file.name
# class Meta:
# verbose_name = _("share")
# verbose_name_plural = _("shares")

View File

@ -1,4 +1,8 @@
from common.constants import CONTENT_RANGE_HEADER, CONTENT_RANGE_HEADER_PATTERN from common.constants import (
CONTENT_RANGE_HEADER,
CONTENT_RANGE_HEADER_PATTERN,
UPLOAD_STATUS_TYPES,
)
from django.core.exceptions import ValidationError from django.core.exceptions import ValidationError
from rest_framework import status from rest_framework import status
from rest_framework.decorators import action from rest_framework.decorators import action
@ -15,6 +19,15 @@ class FileModelViewSet(ModelViewSet):
queryset = File.objects.all() queryset = File.objects.all()
serializer_class = FileSerializer serializer_class = FileSerializer
@action(detail=True, methods=["GET"])
def md5(self, request, pk=None):
try:
file = File.objects.filter(lid=pk).first()
except ValidationError:
raise UserValidationError(f"UUID {pk} is not a valid UUID")
return Response({"md5": file.md5}, status=status.HTTP_200_OK)
@action(detail=True, methods=["PUT"]) @action(detail=True, methods=["PUT"])
def append_chunk(self, request, filename="DUMMY", format=None, pk=None): def append_chunk(self, request, filename="DUMMY", format=None, pk=None):
try: try:
@ -25,11 +38,22 @@ class FileModelViewSet(ModelViewSet):
if not file: if not file:
raise NotFound(f"File with ID {pk} was not found") raise NotFound(f"File with ID {pk} was not found")
chunk_data = self.get_content_range(request) if file.status in [UPLOAD_STATUS_TYPES.COMPLETED, UPLOAD_STATUS_TYPES.ERROR]:
raise UserValidationError(
f"Cannot append chunk to file, status is: {file.status}"
)
chunk_data = self._get_content_range(request)
if not chunk_data: if not chunk_data:
raise UserValidationError("Missing content-range headers") raise UserValidationError("Missing content-range headers")
chunk_file = request.FILES["Content"] chunk_file = request.FILES.get("Content", None)
if not chunk_file:
raise UserValidationError(
"Could not find 'Content' in request body"
)
if chunk_file.size > file.max_size_chunk_bytes: if chunk_file.size > file.max_size_chunk_bytes:
raise UserValidationError( raise UserValidationError(
f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}" f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}"
@ -48,7 +72,7 @@ class FileModelViewSet(ModelViewSet):
return Response({"code": e.code}, status=status.HTTP_400_BAD_REQUEST) return Response({"code": e.code}, status=status.HTTP_400_BAD_REQUEST)
return Response(status=status.HTTP_201_CREATED) return Response(status=status.HTTP_201_CREATED)
def get_content_range(self, request): def _get_content_range(self, request):
content_range = request.META.get(CONTENT_RANGE_HEADER, None) content_range = request.META.get(CONTENT_RANGE_HEADER, None)
if not content_range: if not content_range:
return None return None

View File

@ -1,3 +1,4 @@
from common.utils import get_config
from django.shortcuts import render from django.shortcuts import render
from django.views import View from django.views import View
@ -5,4 +6,7 @@ from django.views import View
# Static view # Static view
class FileUploadView(View): class FileUploadView(View):
def get(self, request): def get(self, request):
return render(request, "storage/upload.html") context = {
"max_file_bytes": get_config("MAX_FILE_BYTES"),
}
return render(request, "storage/upload.html", context=context)

View File

@ -1,40 +1,39 @@
{% extends "base.html" %} {% extends "base.html" %}
{% block title %}Upload a file{% endblock %} {% block title %}Upload a file{% endblock %}
{% block postjs %} {% block postjs %}
{% load static %} {% load static %}
{{ block.super }} {{ block.super }}
<script src="{% static 'js/utils.js' %}"></script> <script src="{% static 'js/utils.js' %}"></script>
<script> <script>
const chunk_size = {{ max_chunk_bytes }};
const max_file_bytes = {{ max_file_bytes }}; const max_file_bytes = {{ max_file_bytes }};
const csrftoken = getCookie('csrftoken'); const uploadPath = "{% url 'file-list' %}";
const chunkPathTemplate = "{% url 'file-append-chunk' pk='@'%}";
console.log("upload path is: " + uploadPath);
console.log("chunk path is: " + chunkPathTemplate);
</script> </script>
<script src="{% static 'js/chunked_uploader.js' %}"></script> <script src="{% static 'js/chunked_uploader.js' %}"></script>
{% endblock %} {% endblock %}
{% block content %} {% block content %}
<p> Upload file </p> <p> Upload file </p>
<p id="max_size">Max size allowed size is: {{max_file_bytes}} bytes</p>
<table>
{{form}}
</table>
<p id="file-size"></p> <p id="file-size"></p>
<form method="post">{% csrf_token %}
<table>
<form>
<label for="expected_size">Expected Size:</label><br>
<input type="file" id="file-upload"> <input type="file" id="file-upload">
<input type="button" id="upload-button" value="Upload">
</form> </form>
</table>
<p id="progressBar"></p>
{% endblock %} {% endblock %}