Got chunked uploading kinda working
continuous-integration/drone/push Build is failing Details

First pass of FE chunked uploading (theres a 1 byte less per chunk per file, no idea why)
More formatting!
This commit is contained in:
Jordi Loyzaga 2024-09-18 20:15:18 -06:00
parent cc46df81fe
commit eeaa1805bf
10 changed files with 182 additions and 116 deletions

View File

@ -39,14 +39,14 @@ CONFIG_KEYS = {
"verbose_name": "Max per chunk size in bytes",
"native_type": int,
"sensitive": False,
"default": 1024 * 1024 * 1, # 20 MB
"default": 1024 * 1024 * 2, # 2 MB
},
"MAX_FILE_BYTES": {
"description": "Max total file size in bytes",
"verbose_name": "Max upload size in bytes",
"native_type": int,
"sensitive": False,
"default": 1024 * 1024 * 30, # 200 MB
"default": 1024 * 1024 * 30, # 300 MB
},
"ENABLE_BROWSABLE_API": {
"description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)",

View File

@ -33,7 +33,9 @@ def cast_to_native_type(key, value, native_type):
try:
return native_type(value)
except ValueError as e:
message = f"Received unexpected value type for configuration key {key}\nValue: {value}\nExpected type : {native_type}"
message = f"Received unexpected value type for configuration key {key}\n\
Value: {value}\n\
Expected type : {native_type}"
raise ValueError(message) from e

View File

@ -75,8 +75,6 @@ TEMPLATES = [
WSGI_APPLICATION = "lockbox.wsgi.application"
# Password validation
# https://docs.djangoproject.com/en/4.2/ref/settings/#auth-password-validators
AUTH_PASSWORD_VALIDATORS = [
{
"NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator",

View File

@ -1,11 +1,15 @@
from pathlib import Path
# TODO: LOG MEEEEE
# TODO: Figure out file owner in system, permissions, GUID
# Whats the default path if not provided? // docker volume
def validate_paths(media_path):
"""Validates media path and validates that we can actually write to that location
Args:
media_path (str): Absolute-path-like string where files are stored
Raises:
e: Any exception that might happen (Permission Denied, Path does not exist, etc.)
"""
try:
Path(media_path).mkdir(exist_ok=True)
except Exception as e:

View File

@ -1,65 +1,95 @@
const fileInput = document.getElementById('file-upload');
const upload_ready = false;
const fileInput = document.getElementById("file-upload");
const uploadButton = document.getElementById("upload-button");
const fileSizeReport = document.getElementById("file-size");
const progressBar = document.getElementById("progressBar");
fileInput.value = '';
fileInput.addEventListener('change', handleFileChange);
uploadButton.addEventListener('click', handleFileUpload);
function handleFileChange(event) {
const file = event.target.files[0];
const file_size = file.size;
console.log("Max file bytes is : ", max_file_bytes);
console.log("File size is: ", file_size);
fileSizeReport.textContent = "File size is: " + file.size;
if (file_size > max_file_bytes){
console.log("PLACEHOLDER: Size too big man.");
console.log("File size is too large");
// Handle this.
return
}
console.log("PLACEHOLDER: Ready!");
handleFileUpload(file);
console.log("Ready!");
}
function handleFileUpload(file) {
let start = 0;
let end = 0;
let chunk;
let file_id = null;
while (start < file.size) {
chunk = file.slice(start, start + chunk_size);
end = chunk.size - start;
console.log("LID: ", file_id);
file_id = uploadChunk(chunk, start, end, file.size, file_id);
start += chunk_size;
async function handleFileUpload(event) {
let headers = new Headers();
headers.append("Content-Type", "application/json");
const request_args = {
method: "POST",
headers: headers,
body: JSON.stringify(
{
"expected_size": fileInput.files[0].size
}
)
};
const response = await fetch(uploadPath, request_args);
if (!response.ok) {
throw new Error(`Response status: ${response.status}`);
}
const file = await response.json();
await uploadChunks(file);
}
function uploadChunk(chunk, start, end, total, file_id=null) {
const formData = new FormData();
const range_header = `bytes ${start}-${end}/${total}`;
formData.append('file', chunk);
function updateProgressBar(remaining, total) {
let current_percent = Math.round((total - remaining) / (total / 100));
progressBar.textContent = current_percent;
}
async function uploadChunks(remoteFile){
const chunkPath = chunkPathTemplate.replace("@", remoteFile.lid);
let file = fileInput.files[0];
if (file_id) {
formData.append("lid", file_id);
}
let bytes_remaining = remoteFile.expected_size
let last_transfer_position = 0;
let to_transfer = remoteFile.max_size_chunk_bytes;
console.log("Chunk size is: " + remoteFile.max_size_chunk_bytes);
let request = new Request(".", {
method: 'POST',
body: formData,
headers: {
'X-CSRFToken': csrftoken,
'Content-range': range_header
while (bytes_remaining >= 0) {
updateProgressBar(bytes_remaining, remoteFile.expected_size);
if (bytes_remaining <= remoteFile.max_size_chunk_bytes) {
to_transfer = bytes_remaining;
bytes_remaining = 0;
}
})
return _uploadChunk(request);
}
await uploadChunk(file, [last_transfer_position, last_transfer_position += to_transfer], chunkPath);
last_transfer_position += 1;
bytes_remaining -= to_transfer;
}
console.log("Done!")
progressBar.textContent = 100;
async function _uploadChunk(request) {
const _response = await fetch(request)
.then(async (response)=>response.json())
.then((data) =>{
return data.lid;
})
return _response;
}
async function uploadChunk(file, byte_range, chunkPath) {
console.log(byte_range);
let file_bytes_target = file.slice(byte_range[0], byte_range[1]);
let body = new FormData();
body.append("Content", file_bytes_target);
let headers = new Headers();
headers.append("Content-Disposition", 'attachment; filename="DUMMY"');
headers.append("Content-Range", "bytes " + byte_range[0] + "-" + byte_range[1])
const request_args = {
method: "PUT",
headers: headers,
body: body
};
const response = await fetch(chunkPath, request_args);
if (!response.ok) {
throw new Error(`Response status: ${response.status}`);
}
}

View File

@ -1,4 +1,4 @@
# Generated by Django 4.2.15 on 2024-09-17 06:52
# Generated by Django 4.2.15 on 2024-09-17 19:51
import common.utils
from django.conf import settings
@ -24,13 +24,13 @@ class Migration(migrations.Migration):
('date_created', models.DateTimeField(blank=True, help_text='date at which this object was created', verbose_name='date created')),
('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')),
('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')),
('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models.upload_to_fielpath, verbose_name='file')),
('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models._upload_to_fielpath, verbose_name='file')),
('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')),
('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')),
('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')),
('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')),
('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')),
('expected_size', models.PositiveBigIntegerField(blank=True, help_text='expected file size', null=True, verbose_name='expected size (bytes)')),
('expected_size', models.PositiveBigIntegerField(help_text='expected file size', verbose_name='expected size (bytes)')),
('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')),
('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')),
('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')),

View File

@ -6,6 +6,7 @@ from common.constants import UPLOAD_ERROR_CODES, UPLOAD_STATUS_TYPES
from common.models import LockboxBase
from common.utils import get_config, get_max_size_chunk_bytes
from django.conf import settings
from django.core.exceptions import ValidationError
from django.db import models, transaction
from django.utils import timezone
from django.utils.translation import gettext_lazy as _
@ -18,7 +19,7 @@ class UploadError(Exception):
super().__init__(*args, **kwargs)
def upload_to_fielpath(instance, filename):
def _upload_to_fielpath(instance, filename):
return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}")
@ -36,7 +37,7 @@ class File(LockboxBase):
blank=True,
verbose_name=_("file"),
help_text=_("actual file"),
upload_to=upload_to_fielpath,
upload_to=_upload_to_fielpath,
)
UPLOAD_CHOICES = (
@ -97,8 +98,8 @@ class File(LockboxBase):
)
expected_size = models.PositiveBigIntegerField(
null=True,
blank=True,
null=False,
blank=False,
verbose_name=_("expected size (bytes)"),
help_text=_("expected file size"),
)
@ -143,20 +144,20 @@ class File(LockboxBase):
@property
def md5(self):
if self.exists:
return md5(open(self.file, "rb").read()).hexdigest()
self.file.open("rb")
md5_hash = md5(self.file.read()).hexdigest()
self.file.close()
return md5_hash
return None
@property
def abandoned(self):
return self.date_created <= timezone.now() + timedelta(
minutes=get_config("ABANDONED_DELTA_MINUTES")
)
return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
@classmethod
def abandoned_condition():
return models.Q(
date_created__lte=timezone.now()
+ timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
date_created__lte=timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
)
@property
@ -174,6 +175,16 @@ class File(LockboxBase):
return Path(self.file.path).is_file()
def append_chunk(self, chunk_file, chunk_data):
"""Append chunks to a file
Args:
chunk_file (UploadedFile): Django provided abstraction that contains the actual file in memory
chunk_data (Dict): Additional chunk parameters: start_bytes, end_bytes, size
Raises:
UploadError: The file you are trying to append to is not found in FS
UploadError: Expected last_end_bytes + 1 but got a different number
"""
# Override in case recently abandoned
# Will persist if it does not error out.
self.status = UPLOAD_STATUS_TYPES.UPLOADING
@ -192,24 +203,32 @@ class File(LockboxBase):
if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]:
# Client screwed up, this is not where we left
raise UploadError(
"Mismatch in expected chunk", code=UPLOAD_ERROR_CODES.CHUNK_MISMATCH
"Mismatch in expected chunk",
code=UPLOAD_ERROR_CODES.CHUNK_MISMATCH
)
self.last_end_bytes = chunk_data["end_bytes"]
if chunk_data["start_bytes"] == 0:
self.file = chunk_file
self.save()
else:
chunk_file.open("rb")
self.file.open("ab")
self.file.write(chunk_file.read())
self.file.close()
self.save()
if self.expected_size == self.last_end_bytes:
# File is one shot chunk.
if chunk_data["start_bytes"] == 0:
self.file = chunk_file
self.save()
self.finalize()
return
# This is an n + 1 chunk.
print("Appending bytes yo")
chunk_file.seek(0)
self.save()
def finalize(self):
"""Finalizes the file
Sets file status to 'completed'
Sets datetime_completed to now
Renames file from file.extention.incomplete to file.extention
"""
self.refresh_from_db()
self.status = UPLOAD_STATUS_TYPES.COMPLETED
self.datetime_completed = timezone.now()
@ -223,31 +242,17 @@ class File(LockboxBase):
def save(self, *args, **kwargs):
if not self.max_size_chunk_bytes:
self.max_size_chunk_bytes = get_max_size_chunk_bytes()
if self.expected_size > get_config("MAX_FILE_BYTES"):
raise ValidationError(f"Expected size: {self.expected_size} > than config MAX_SIZE_BYTES")
return super().save(*args, **kwargs)
def delete(self, *args, **kwargs):
with transaction.atomic():
if self.file:
if Path(self.file.path).is_file():
self.file.storage.delete(self.file.path)
# Delete containing directory (UUID)
self.file.storage.delete(Path(self.file.path).parent)
result = super().delete(*args, **kwargs)
return result
# class FileShare(LockboxBase):
# file = models.ForeignKey(
# "storage.File",
# null=False,
# blank=False,
# on_delete=models.CASCADE,
# related_name="shares",
# )
# def __str__(self):
# return self.file.name
# class Meta:
# verbose_name = _("share")
# verbose_name_plural = _("shares")

View File

@ -1,4 +1,8 @@
from common.constants import CONTENT_RANGE_HEADER, CONTENT_RANGE_HEADER_PATTERN
from common.constants import (
CONTENT_RANGE_HEADER,
CONTENT_RANGE_HEADER_PATTERN,
UPLOAD_STATUS_TYPES,
)
from django.core.exceptions import ValidationError
from rest_framework import status
from rest_framework.decorators import action
@ -15,6 +19,15 @@ class FileModelViewSet(ModelViewSet):
queryset = File.objects.all()
serializer_class = FileSerializer
@action(detail=True, methods=["GET"])
def md5(self, request, pk=None):
try:
file = File.objects.filter(lid=pk).first()
except ValidationError:
raise UserValidationError(f"UUID {pk} is not a valid UUID")
return Response({"md5": file.md5}, status=status.HTTP_200_OK)
@action(detail=True, methods=["PUT"])
def append_chunk(self, request, filename="DUMMY", format=None, pk=None):
try:
@ -25,11 +38,22 @@ class FileModelViewSet(ModelViewSet):
if not file:
raise NotFound(f"File with ID {pk} was not found")
chunk_data = self.get_content_range(request)
if file.status in [UPLOAD_STATUS_TYPES.COMPLETED, UPLOAD_STATUS_TYPES.ERROR]:
raise UserValidationError(
f"Cannot append chunk to file, status is: {file.status}"
)
chunk_data = self._get_content_range(request)
if not chunk_data:
raise UserValidationError("Missing content-range headers")
chunk_file = request.FILES["Content"]
chunk_file = request.FILES.get("Content", None)
if not chunk_file:
raise UserValidationError(
"Could not find 'Content' in request body"
)
if chunk_file.size > file.max_size_chunk_bytes:
raise UserValidationError(
f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}"
@ -48,7 +72,7 @@ class FileModelViewSet(ModelViewSet):
return Response({"code": e.code}, status=status.HTTP_400_BAD_REQUEST)
return Response(status=status.HTTP_201_CREATED)
def get_content_range(self, request):
def _get_content_range(self, request):
content_range = request.META.get(CONTENT_RANGE_HEADER, None)
if not content_range:
return None

View File

@ -1,3 +1,4 @@
from common.utils import get_config
from django.shortcuts import render
from django.views import View
@ -5,4 +6,7 @@ from django.views import View
# Static view
class FileUploadView(View):
def get(self, request):
return render(request, "storage/upload.html")
context = {
"max_file_bytes": get_config("MAX_FILE_BYTES"),
}
return render(request, "storage/upload.html", context=context)

View File

@ -1,40 +1,39 @@
{% extends "base.html" %}
{% block title %}Upload a file{% endblock %}
{% block postjs %}
{% load static %}
{{ block.super }}
<script src="{% static 'js/utils.js' %}"></script>
<script>
const chunk_size = {{ max_chunk_bytes }};
const max_file_bytes = {{ max_file_bytes }};
const csrftoken = getCookie('csrftoken');
const uploadPath = "{% url 'file-list' %}";
const chunkPathTemplate = "{% url 'file-append-chunk' pk='@'%}";
console.log("upload path is: " + uploadPath);
console.log("chunk path is: " + chunkPathTemplate);
</script>
<script src="{% static 'js/chunked_uploader.js' %}"></script>
{% endblock %}
{% block content %}
<p> Upload file </p>
<table>
{{form}}
</table>
<p id="max_size">Max size allowed size is: {{max_file_bytes}} bytes</p>
<p id="file-size"></p>
<form method="post">{% csrf_token %}
<input type="file" id="file-upload">
</form>
<table>
<form>
<label for="expected_size">Expected Size:</label><br>
<input type="file" id="file-upload">
<input type="button" id="upload-button" value="Upload">
</form>
</table>
<p id="progressBar"></p>
{% endblock %}