From 3279d6c5dc41841a1907c1e1e42acb06529dd213 Mon Sep 17 00:00:00 2001 From: Jordi Loyzaga Date: Thu, 19 Sep 2024 03:54:52 -0600 Subject: [PATCH] Upload is now working Added file hash validation (client vs server) Added mime guessing Added upload checkpoints Improved error handling --- .pre-commit-config.yaml | 8 +++ lockbox/common/constants.py | 9 ++- lockbox/lockbox/settings.py | 1 + lockbox/static/js/chunked_uploader.js | 46 ++++++++----- lockbox/static/js/utils.js | 28 ++++++++ lockbox/storage/migrations/0001_initial.py | 6 +- lockbox/storage/models.py | 77 ++++++++++++++++------ lockbox/storage/views_api.py | 10 +-- lockbox/templates/base.html | 1 + lockbox/templates/storage/upload.html | 8 +-- poetry.lock | 13 +++- pyproject.toml | 1 + 12 files changed, 155 insertions(+), 53 deletions(-) create mode 100644 .pre-commit-config.yaml diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..fe5b5f7 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,8 @@ +repos: + - repo: local + hooks: + - id: flake8 + name: flake8 + entry: flake8 + language: system + files: '\.py$' diff --git a/lockbox/common/constants.py b/lockbox/common/constants.py index 375fc3a..1181c89 100644 --- a/lockbox/common/constants.py +++ b/lockbox/common/constants.py @@ -14,7 +14,7 @@ class UPLOAD_STATUS_TYPES: class UPLOAD_ERROR_CODES: FILE_MISSING = "file_missing" CHUNK_MISMATCH = "chunk_mismatch" - + VERIFICATION_FAILED = "verification_failed" # Config @@ -48,6 +48,13 @@ CONFIG_KEYS = { "sensitive": False, "default": 1024 * 1024 * 30, # 300 MB }, + "VERIFY_ENABLE": { + "description": "Verify uploaded file integrity(sha256)", + "verbose_name": "File integrity verification", + "native_type": bool, + "sensitive": False, + "default": True + }, "ENABLE_BROWSABLE_API": { "description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)", "verbose_name": "Enable browsable API", diff --git a/lockbox/lockbox/settings.py b/lockbox/lockbox/settings.py index 42be9d3..7a521ba 100644 --- a/lockbox/lockbox/settings.py +++ b/lockbox/lockbox/settings.py @@ -118,6 +118,7 @@ STORAGES = { MEDIA_ROOT = Path("/home/kitty/src/lockbox/FILES") MEDIA_URL = "files/" INCOMPLETE_EXT = ".incomplete" +DEFAULT_FILE_HEADER_BYTES = 2048 validate_paths(MEDIA_ROOT) diff --git a/lockbox/static/js/chunked_uploader.js b/lockbox/static/js/chunked_uploader.js index 1bc696d..f0af91c 100644 --- a/lockbox/static/js/chunked_uploader.js +++ b/lockbox/static/js/chunked_uploader.js @@ -3,6 +3,8 @@ const uploadButton = document.getElementById("upload-button"); const fileSizeReport = document.getElementById("file-size"); const progressBar = document.getElementById("progressBar"); + +let isReady = false; fileInput.value = ''; fileInput.addEventListener('change', handleFileChange); uploadButton.addEventListener('click', handleFileUpload); @@ -10,71 +12,85 @@ uploadButton.addEventListener('click', handleFileUpload); function handleFileChange(event) { const file = event.target.files[0]; const file_size = file.size; - fileSizeReport.textContent = "File size is: " + file.size; + fileSizeReport.textContent = "File size is: " + file.size + " bytes"; if (file_size > max_file_bytes){ console.log("File size is too large"); - // Handle this. + isReady = false; return } console.log("Ready!"); + isReady = true; } async function handleFileUpload(event) { + + if (!isReady){ + console.log("Not ready"); + return + } + + isReady = false; + + const file = fileInput.files[0]; + let headers = new Headers(); headers.append("Content-Type", "application/json"); + const request_args = { method: "POST", headers: headers, body: JSON.stringify( { - "expected_size": fileInput.files[0].size + "filename": file.name, + "expected_size": file.size, + "sha256": await getHash(file), } ) }; + const response = await fetch(uploadPath, request_args); + if (!response.ok) { throw new Error(`Response status: ${response.status}`); } - const file = await response.json(); - await uploadChunks(file); + + const apifile = await response.json(); + await uploadChunks(apifile); } function updateProgressBar(remaining, total) { let current_percent = Math.round((total - remaining) / (total / 100)); - progressBar.textContent = current_percent; + progressBar.textContent = current_percent + " %"; } async function uploadChunks(remoteFile){ const chunkPath = chunkPathTemplate.replace("@", remoteFile.lid); let file = fileInput.files[0]; - let bytes_remaining = remoteFile.expected_size - let last_transfer_position = 0; + let bytes_remaining = remoteFile.expected_size; + let last_transfer_position = remoteFile.last_end_bytes; // Start where we left, default is 0; let to_transfer = remoteFile.max_size_chunk_bytes; console.log("Chunk size is: " + remoteFile.max_size_chunk_bytes); while (bytes_remaining >= 0) { - updateProgressBar(bytes_remaining, remoteFile.expected_size); if (bytes_remaining <= remoteFile.max_size_chunk_bytes) { to_transfer = bytes_remaining; bytes_remaining = 0; } - + await uploadChunk(file, [last_transfer_position, last_transfer_position += to_transfer], chunkPath); - last_transfer_position += 1; bytes_remaining -= to_transfer; + updateProgressBar(bytes_remaining, remoteFile.expected_size); } - console.log("Done!") - progressBar.textContent = 100; + console.log("Done!"); + progressBar.textContent = "Done!"; } async function uploadChunk(file, byte_range, chunkPath) { - console.log(byte_range); let file_bytes_target = file.slice(byte_range[0], byte_range[1]); let body = new FormData(); - body.append("Content", file_bytes_target); let headers = new Headers(); diff --git a/lockbox/static/js/utils.js b/lockbox/static/js/utils.js index b672cc3..64bd50a 100644 --- a/lockbox/static/js/utils.js +++ b/lockbox/static/js/utils.js @@ -12,4 +12,32 @@ function getCookie(name) { } } return cookieValue; +} + +function arrayBufferToWordArray(ab) { + var i8a = new Uint8Array(ab); + var a = []; + for (var i = 0; i < i8a.length; i += 4) { + a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]); + } + return CryptoJS.lib.WordArray.create(a, i8a.length); +} + +async function getHash(file) { + + // I hate this language so much. + const read = (blob) => new Promise((resolve, reject) => { + const reader = new FileReader(); + reader.onload = (event) => resolve(event.target.result); + reader.onerror = reject; + reader.readAsArrayBuffer(blob); + + }); + const file_bytes = await read(file); + hash = CryptoJS.SHA256( + arrayBufferToWordArray( + file_bytes + ) + ); + return hash.toString(CryptoJS.enc.Hex); } \ No newline at end of file diff --git a/lockbox/storage/migrations/0001_initial.py b/lockbox/storage/migrations/0001_initial.py index 53694a8..1b91554 100644 --- a/lockbox/storage/migrations/0001_initial.py +++ b/lockbox/storage/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.15 on 2024-09-17 19:51 +# Generated by Django 4.2.15 on 2024-09-19 09:40 import common.utils from django.conf import settings @@ -25,14 +25,16 @@ class Migration(migrations.Migration): ('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')), ('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')), ('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models._upload_to_fielpath, verbose_name='file')), + ('filename', models.CharField(help_text='file name', max_length=256, verbose_name='filename')), ('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')), ('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')), ('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')), + ('sha256', models.CharField(help_text='file hash (sha256)', max_length=64, verbose_name='hash (sha256)')), ('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')), ('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')), ('expected_size', models.PositiveBigIntegerField(help_text='expected file size', verbose_name='expected size (bytes)')), ('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')), - ('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')), + ('last_end_bytes', models.BigIntegerField(default=0, help_text='last uploaded bytes position', verbose_name='last end bytes')), ('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')), ], options={ diff --git a/lockbox/storage/models.py b/lockbox/storage/models.py index 9f94a41..b5e9273 100644 --- a/lockbox/storage/models.py +++ b/lockbox/storage/models.py @@ -1,7 +1,8 @@ from datetime import timedelta -from hashlib import md5 +from hashlib import sha256 from pathlib import Path +import magic from common.constants import UPLOAD_ERROR_CODES, UPLOAD_STATUS_TYPES from common.models import LockboxBase from common.utils import get_config, get_max_size_chunk_bytes @@ -20,7 +21,7 @@ class UploadError(Exception): def _upload_to_fielpath(instance, filename): - return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}") + return Path(str(instance.lid)).joinpath(f"{instance.filename}{settings.INCOMPLETE_EXT}") class File(LockboxBase): @@ -40,6 +41,14 @@ class File(LockboxBase): upload_to=_upload_to_fielpath, ) + filename = models.CharField( + null=False, + blank=False, + max_length=256, # safeish in most FS + verbose_name=_("filename"), + help_text=_("file name") + ) + UPLOAD_CHOICES = ( (UPLOAD_STATUS_TYPES.UPLOADING, _(UPLOAD_STATUS_TYPES.UPLOADING)), (UPLOAD_STATUS_TYPES.COMPLETED, _(UPLOAD_STATUS_TYPES.COMPLETED)), @@ -82,6 +91,14 @@ class File(LockboxBase): help_text=_("will be scrubbed on 'date_expires'"), ) + sha256 = models.CharField( + null=False, + blank=False, + max_length=64, + verbose_name=_("hash (sha256)"), + help_text=_("file hash (sha256)") + ) + delete_on_expiration = models.BooleanField( null=False, blank=False, @@ -113,8 +130,9 @@ class File(LockboxBase): ) last_end_bytes = models.BigIntegerField( - null=True, - blank=True, + null=False, + blank=False, + default=0, verbose_name=("last end bytes"), help_text=_("last uploaded bytes position"), ) @@ -141,15 +159,6 @@ class File(LockboxBase): verbose_name = _("file") verbose_name_plural = _("files") - @property - def md5(self): - if self.exists: - self.file.open("rb") - md5_hash = md5(self.file.read()).hexdigest() - self.file.close() - return md5_hash - return None - @property def abandoned(self): return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES")) @@ -200,7 +209,7 @@ class File(LockboxBase): code=UPLOAD_ERROR_CODES.FILE_MISSING, ) - if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]: + if self.last_end_bytes and self.last_end_bytes != chunk_data["start_bytes"]: # Client screwed up, this is not where we left raise UploadError( "Mismatch in expected chunk", @@ -225,20 +234,48 @@ class File(LockboxBase): def finalize(self): """Finalizes the file + Guesses mimetype + Validates file hash if enabled + Renames file to the originally provided filename, whatever it is. Sets file status to 'completed' Sets datetime_completed to now - Renames file from file.extention.incomplete to file.extention """ self.refresh_from_db() - self.status = UPLOAD_STATUS_TYPES.COMPLETED - self.datetime_completed = timezone.now() - final_name = self.file.name.replace(settings.INCOMPLETE_EXT, "") - final_path = settings.MEDIA_ROOT / final_name + self.mime_type = self.guess_type() + + if get_config("VERIFY_ENABLE"): + result = self.verify() + if not result: + self.status = UPLOAD_STATUS_TYPES.ERROR + raise UploadError( + "File verification failed", + code=UPLOAD_ERROR_CODES.VERIFICATION_FAILED + ) + + final_path = settings.MEDIA_ROOT / str(self.lid) / self.filename + with transaction.atomic(): Path(self.file.path).rename(final_path) - self.file.name = final_name + self.file.name = self.filename + self.status = UPLOAD_STATUS_TYPES.COMPLETED + self.datetime_completed = timezone.now() self.save() + def verify(self): + if self.exists: + self.file.open("rb") + sha256_hash = sha256(self.file.read()).hexdigest() + self.file.close() + return sha256_hash == self.sha256 + raise Exception(f"Fatal: Could get file hash - file {self.file.path} does not exist") + + def guess_type(self): + self.file.open("rb") + self.file.seek(0) + mime_type = magic.from_buffer(self.file.read(settings.DEFAULT_FILE_HEADER_BYTES), mime=True) + self.file.close() + return mime_type + def save(self, *args, **kwargs): if not self.max_size_chunk_bytes: self.max_size_chunk_bytes = get_max_size_chunk_bytes() diff --git a/lockbox/storage/views_api.py b/lockbox/storage/views_api.py index bc78fac..9cf47a1 100644 --- a/lockbox/storage/views_api.py +++ b/lockbox/storage/views_api.py @@ -19,15 +19,6 @@ class FileModelViewSet(ModelViewSet): queryset = File.objects.all() serializer_class = FileSerializer - @action(detail=True, methods=["GET"]) - def md5(self, request, pk=None): - try: - file = File.objects.filter(lid=pk).first() - except ValidationError: - raise UserValidationError(f"UUID {pk} is not a valid UUID") - - return Response({"md5": file.md5}, status=status.HTTP_200_OK) - @action(detail=True, methods=["PUT"]) def append_chunk(self, request, filename="DUMMY", format=None, pk=None): try: @@ -54,6 +45,7 @@ class FileModelViewSet(ModelViewSet): "Could not find 'Content' in request body" ) + # Bytes are inclusive for slicing but not for size, go figure. if chunk_file.size > file.max_size_chunk_bytes: raise UserValidationError( f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}" diff --git a/lockbox/templates/base.html b/lockbox/templates/base.html index 5562f9a..1f72515 100644 --- a/lockbox/templates/base.html +++ b/lockbox/templates/base.html @@ -10,6 +10,7 @@ {% block prejs %} {% endblock %} + {% block title %}Lockbox{% endblock %} diff --git a/lockbox/templates/storage/upload.html b/lockbox/templates/storage/upload.html index 813fe6f..c7d1690 100644 --- a/lockbox/templates/storage/upload.html +++ b/lockbox/templates/storage/upload.html @@ -10,9 +10,7 @@ const max_file_bytes = {{ max_file_bytes }}; const uploadPath = "{% url 'file-list' %}"; const chunkPathTemplate = "{% url 'file-append-chunk' pk='@'%}"; - - console.log("upload path is: " + uploadPath); - console.log("chunk path is: " + chunkPathTemplate); + const returnPath = "{% url 'file-append-chunk' pk='@'%}" @@ -22,18 +20,18 @@ {% block content %}

Upload file

-

Max size allowed size is: {{max_file_bytes}} bytes

+

Max size allowed: {{max_file_bytes}} bytes

-
+

Progress:

{% endblock %} diff --git a/poetry.lock b/poetry.lock index 891bf02..9316d3b 100644 --- a/poetry.lock +++ b/poetry.lock @@ -344,6 +344,17 @@ files = [ [package.extras] cli = ["click (>=5.0)"] +[[package]] +name = "python-magic" +version = "0.4.27" +description = "File type identification using libmagic" +optional = false +python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*" +files = [ + {file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"}, + {file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"}, +] + [[package]] name = "sqlparse" version = "0.5.1" @@ -387,4 +398,4 @@ brotli = ["brotli"] [metadata] lock-version = "2.0" python-versions = "~3.12" -content-hash = "155d31f2edffb6e6ea604c7a1115fa072072a5370e012eea577644e0a337f0b0" +content-hash = "cf73bb83fc48555289dd3949c6bf10a7feab817496ab8f4826222a2f9b2bad0a" diff --git a/pyproject.toml b/pyproject.toml index 999fe64..6c7df1f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,6 +13,7 @@ whitenoise = "^6.6.0" djangorestframework = "^3.14.0" drf-nested-routers = "^0.93.5" python-dotenv = "^1.0.1" +python-magic = "^0.4.27" [tool.poetry.group.dev.dependencies] pytest = "^8.0.0"