Upload is now working
continuous-integration/drone/push Build is failing Details

Added file hash validation (client vs server)
Added mime guessing
Added upload checkpoints
Improved error handling
This commit is contained in:
Jordi Loyzaga 2024-09-19 03:54:52 -06:00
parent ea84012059
commit 3279d6c5dc
12 changed files with 155 additions and 53 deletions

8
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,8 @@
repos:
- repo: local
hooks:
- id: flake8
name: flake8
entry: flake8
language: system
files: '\.py$'

View File

@ -14,7 +14,7 @@ class UPLOAD_STATUS_TYPES:
class UPLOAD_ERROR_CODES:
FILE_MISSING = "file_missing"
CHUNK_MISMATCH = "chunk_mismatch"
VERIFICATION_FAILED = "verification_failed"
# Config
@ -48,6 +48,13 @@ CONFIG_KEYS = {
"sensitive": False,
"default": 1024 * 1024 * 30, # 300 MB
},
"VERIFY_ENABLE": {
"description": "Verify uploaded file integrity(sha256)",
"verbose_name": "File integrity verification",
"native_type": bool,
"sensitive": False,
"default": True
},
"ENABLE_BROWSABLE_API": {
"description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)",
"verbose_name": "Enable browsable API",

View File

@ -118,6 +118,7 @@ STORAGES = {
MEDIA_ROOT = Path("/home/kitty/src/lockbox/FILES")
MEDIA_URL = "files/"
INCOMPLETE_EXT = ".incomplete"
DEFAULT_FILE_HEADER_BYTES = 2048
validate_paths(MEDIA_ROOT)

View File

@ -3,6 +3,8 @@ const uploadButton = document.getElementById("upload-button");
const fileSizeReport = document.getElementById("file-size");
const progressBar = document.getElementById("progressBar");
let isReady = false;
fileInput.value = '';
fileInput.addEventListener('change', handleFileChange);
uploadButton.addEventListener('click', handleFileUpload);
@ -10,71 +12,85 @@ uploadButton.addEventListener('click', handleFileUpload);
function handleFileChange(event) {
const file = event.target.files[0];
const file_size = file.size;
fileSizeReport.textContent = "File size is: " + file.size;
fileSizeReport.textContent = "File size is: " + file.size + " bytes";
if (file_size > max_file_bytes){
console.log("File size is too large");
// Handle this.
isReady = false;
return
}
console.log("Ready!");
isReady = true;
}
async function handleFileUpload(event) {
if (!isReady){
console.log("Not ready");
return
}
isReady = false;
const file = fileInput.files[0];
let headers = new Headers();
headers.append("Content-Type", "application/json");
const request_args = {
method: "POST",
headers: headers,
body: JSON.stringify(
{
"expected_size": fileInput.files[0].size
"filename": file.name,
"expected_size": file.size,
"sha256": await getHash(file),
}
)
};
const response = await fetch(uploadPath, request_args);
if (!response.ok) {
throw new Error(`Response status: ${response.status}`);
}
const file = await response.json();
await uploadChunks(file);
const apifile = await response.json();
await uploadChunks(apifile);
}
function updateProgressBar(remaining, total) {
let current_percent = Math.round((total - remaining) / (total / 100));
progressBar.textContent = current_percent;
progressBar.textContent = current_percent + " %";
}
async function uploadChunks(remoteFile){
const chunkPath = chunkPathTemplate.replace("@", remoteFile.lid);
let file = fileInput.files[0];
let bytes_remaining = remoteFile.expected_size
let last_transfer_position = 0;
let bytes_remaining = remoteFile.expected_size;
let last_transfer_position = remoteFile.last_end_bytes; // Start where we left, default is 0;
let to_transfer = remoteFile.max_size_chunk_bytes;
console.log("Chunk size is: " + remoteFile.max_size_chunk_bytes);
while (bytes_remaining >= 0) {
updateProgressBar(bytes_remaining, remoteFile.expected_size);
if (bytes_remaining <= remoteFile.max_size_chunk_bytes) {
to_transfer = bytes_remaining;
bytes_remaining = 0;
}
await uploadChunk(file, [last_transfer_position, last_transfer_position += to_transfer], chunkPath);
last_transfer_position += 1;
bytes_remaining -= to_transfer;
updateProgressBar(bytes_remaining, remoteFile.expected_size);
}
console.log("Done!")
progressBar.textContent = 100;
console.log("Done!");
progressBar.textContent = "Done!";
}
async function uploadChunk(file, byte_range, chunkPath) {
console.log(byte_range);
let file_bytes_target = file.slice(byte_range[0], byte_range[1]);
let body = new FormData();
body.append("Content", file_bytes_target);
let headers = new Headers();

View File

@ -12,4 +12,32 @@ function getCookie(name) {
}
}
return cookieValue;
}
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
async function getHash(file) {
// I hate this language so much.
const read = (blob) => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (event) => resolve(event.target.result);
reader.onerror = reject;
reader.readAsArrayBuffer(blob);
});
const file_bytes = await read(file);
hash = CryptoJS.SHA256(
arrayBufferToWordArray(
file_bytes
)
);
return hash.toString(CryptoJS.enc.Hex);
}

View File

@ -1,4 +1,4 @@
# Generated by Django 4.2.15 on 2024-09-17 19:51
# Generated by Django 4.2.15 on 2024-09-19 09:40
import common.utils
from django.conf import settings
@ -25,14 +25,16 @@ class Migration(migrations.Migration):
('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')),
('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')),
('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models._upload_to_fielpath, verbose_name='file')),
('filename', models.CharField(help_text='file name', max_length=256, verbose_name='filename')),
('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')),
('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')),
('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')),
('sha256', models.CharField(help_text='file hash (sha256)', max_length=64, verbose_name='hash (sha256)')),
('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')),
('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')),
('expected_size', models.PositiveBigIntegerField(help_text='expected file size', verbose_name='expected size (bytes)')),
('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')),
('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')),
('last_end_bytes', models.BigIntegerField(default=0, help_text='last uploaded bytes position', verbose_name='last end bytes')),
('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')),
],
options={

View File

@ -1,7 +1,8 @@
from datetime import timedelta
from hashlib import md5
from hashlib import sha256
from pathlib import Path
import magic
from common.constants import UPLOAD_ERROR_CODES, UPLOAD_STATUS_TYPES
from common.models import LockboxBase
from common.utils import get_config, get_max_size_chunk_bytes
@ -20,7 +21,7 @@ class UploadError(Exception):
def _upload_to_fielpath(instance, filename):
return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}")
return Path(str(instance.lid)).joinpath(f"{instance.filename}{settings.INCOMPLETE_EXT}")
class File(LockboxBase):
@ -40,6 +41,14 @@ class File(LockboxBase):
upload_to=_upload_to_fielpath,
)
filename = models.CharField(
null=False,
blank=False,
max_length=256, # safeish in most FS
verbose_name=_("filename"),
help_text=_("file name")
)
UPLOAD_CHOICES = (
(UPLOAD_STATUS_TYPES.UPLOADING, _(UPLOAD_STATUS_TYPES.UPLOADING)),
(UPLOAD_STATUS_TYPES.COMPLETED, _(UPLOAD_STATUS_TYPES.COMPLETED)),
@ -82,6 +91,14 @@ class File(LockboxBase):
help_text=_("will be scrubbed on 'date_expires'"),
)
sha256 = models.CharField(
null=False,
blank=False,
max_length=64,
verbose_name=_("hash (sha256)"),
help_text=_("file hash (sha256)")
)
delete_on_expiration = models.BooleanField(
null=False,
blank=False,
@ -113,8 +130,9 @@ class File(LockboxBase):
)
last_end_bytes = models.BigIntegerField(
null=True,
blank=True,
null=False,
blank=False,
default=0,
verbose_name=("last end bytes"),
help_text=_("last uploaded bytes position"),
)
@ -141,15 +159,6 @@ class File(LockboxBase):
verbose_name = _("file")
verbose_name_plural = _("files")
@property
def md5(self):
if self.exists:
self.file.open("rb")
md5_hash = md5(self.file.read()).hexdigest()
self.file.close()
return md5_hash
return None
@property
def abandoned(self):
return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
@ -200,7 +209,7 @@ class File(LockboxBase):
code=UPLOAD_ERROR_CODES.FILE_MISSING,
)
if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]:
if self.last_end_bytes and self.last_end_bytes != chunk_data["start_bytes"]:
# Client screwed up, this is not where we left
raise UploadError(
"Mismatch in expected chunk",
@ -225,20 +234,48 @@ class File(LockboxBase):
def finalize(self):
"""Finalizes the file
Guesses mimetype
Validates file hash if enabled
Renames file to the originally provided filename, whatever it is.
Sets file status to 'completed'
Sets datetime_completed to now
Renames file from file.extention.incomplete to file.extention
"""
self.refresh_from_db()
self.status = UPLOAD_STATUS_TYPES.COMPLETED
self.datetime_completed = timezone.now()
final_name = self.file.name.replace(settings.INCOMPLETE_EXT, "")
final_path = settings.MEDIA_ROOT / final_name
self.mime_type = self.guess_type()
if get_config("VERIFY_ENABLE"):
result = self.verify()
if not result:
self.status = UPLOAD_STATUS_TYPES.ERROR
raise UploadError(
"File verification failed",
code=UPLOAD_ERROR_CODES.VERIFICATION_FAILED
)
final_path = settings.MEDIA_ROOT / str(self.lid) / self.filename
with transaction.atomic():
Path(self.file.path).rename(final_path)
self.file.name = final_name
self.file.name = self.filename
self.status = UPLOAD_STATUS_TYPES.COMPLETED
self.datetime_completed = timezone.now()
self.save()
def verify(self):
if self.exists:
self.file.open("rb")
sha256_hash = sha256(self.file.read()).hexdigest()
self.file.close()
return sha256_hash == self.sha256
raise Exception(f"Fatal: Could get file hash - file {self.file.path} does not exist")
def guess_type(self):
self.file.open("rb")
self.file.seek(0)
mime_type = magic.from_buffer(self.file.read(settings.DEFAULT_FILE_HEADER_BYTES), mime=True)
self.file.close()
return mime_type
def save(self, *args, **kwargs):
if not self.max_size_chunk_bytes:
self.max_size_chunk_bytes = get_max_size_chunk_bytes()

View File

@ -19,15 +19,6 @@ class FileModelViewSet(ModelViewSet):
queryset = File.objects.all()
serializer_class = FileSerializer
@action(detail=True, methods=["GET"])
def md5(self, request, pk=None):
try:
file = File.objects.filter(lid=pk).first()
except ValidationError:
raise UserValidationError(f"UUID {pk} is not a valid UUID")
return Response({"md5": file.md5}, status=status.HTTP_200_OK)
@action(detail=True, methods=["PUT"])
def append_chunk(self, request, filename="DUMMY", format=None, pk=None):
try:
@ -54,6 +45,7 @@ class FileModelViewSet(ModelViewSet):
"Could not find 'Content' in request body"
)
# Bytes are inclusive for slicing but not for size, go figure.
if chunk_file.size > file.max_size_chunk_bytes:
raise UserValidationError(
f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}"

View File

@ -10,6 +10,7 @@
{% block prejs %}
{% endblock %}
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/4.0.0/crypto-js.min.js"></script>
<title>{% block title %}Lockbox{% endblock %}</title>
</head>

View File

@ -10,9 +10,7 @@
const max_file_bytes = {{ max_file_bytes }};
const uploadPath = "{% url 'file-list' %}";
const chunkPathTemplate = "{% url 'file-append-chunk' pk='@'%}";
console.log("upload path is: " + uploadPath);
console.log("chunk path is: " + chunkPathTemplate);
const returnPath = "{% url 'file-append-chunk' pk='@'%}"
</script>
@ -22,18 +20,18 @@
{% block content %}
<p> Upload file </p>
<p id="max_size">Max size allowed size is: {{max_file_bytes}} bytes</p>
<p id="max_size">Max size allowed: {{max_file_bytes}} bytes</p>
<p id="file-size"></p>
<table>
<form>
<label for="expected_size">Expected Size:</label><br>
<input type="file" id="file-upload">
<input type="button" id="upload-button" value="Upload">
</form>
</table>
<p>Progress: </p>
<p id="progressBar"></p>
{% endblock %}

13
poetry.lock generated
View File

@ -344,6 +344,17 @@ files = [
[package.extras]
cli = ["click (>=5.0)"]
[[package]]
name = "python-magic"
version = "0.4.27"
description = "File type identification using libmagic"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
{file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"},
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
]
[[package]]
name = "sqlparse"
version = "0.5.1"
@ -387,4 +398,4 @@ brotli = ["brotli"]
[metadata]
lock-version = "2.0"
python-versions = "~3.12"
content-hash = "155d31f2edffb6e6ea604c7a1115fa072072a5370e012eea577644e0a337f0b0"
content-hash = "cf73bb83fc48555289dd3949c6bf10a7feab817496ab8f4826222a2f9b2bad0a"

View File

@ -13,6 +13,7 @@ whitenoise = "^6.6.0"
djangorestframework = "^3.14.0"
drf-nested-routers = "^0.93.5"
python-dotenv = "^1.0.1"
python-magic = "^0.4.27"
[tool.poetry.group.dev.dependencies]
pytest = "^8.0.0"