Upload is now working
continuous-integration/drone/push Build is failing Details

Added file hash validation (client vs server)
Added mime guessing
Added upload checkpoints
Improved error handling
This commit is contained in:
Jordi Loyzaga 2024-09-19 03:54:52 -06:00
parent ea84012059
commit 3279d6c5dc
12 changed files with 155 additions and 53 deletions

8
.pre-commit-config.yaml Normal file
View File

@ -0,0 +1,8 @@
repos:
- repo: local
hooks:
- id: flake8
name: flake8
entry: flake8
language: system
files: '\.py$'

View File

@ -14,7 +14,7 @@ class UPLOAD_STATUS_TYPES:
class UPLOAD_ERROR_CODES: class UPLOAD_ERROR_CODES:
FILE_MISSING = "file_missing" FILE_MISSING = "file_missing"
CHUNK_MISMATCH = "chunk_mismatch" CHUNK_MISMATCH = "chunk_mismatch"
VERIFICATION_FAILED = "verification_failed"
# Config # Config
@ -48,6 +48,13 @@ CONFIG_KEYS = {
"sensitive": False, "sensitive": False,
"default": 1024 * 1024 * 30, # 300 MB "default": 1024 * 1024 * 30, # 300 MB
}, },
"VERIFY_ENABLE": {
"description": "Verify uploaded file integrity(sha256)",
"verbose_name": "File integrity verification",
"native_type": bool,
"sensitive": False,
"default": True
},
"ENABLE_BROWSABLE_API": { "ENABLE_BROWSABLE_API": {
"description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)", "description": "REST Framework browsable API is enabled (Always enabled if DEBUG is true)",
"verbose_name": "Enable browsable API", "verbose_name": "Enable browsable API",

View File

@ -118,6 +118,7 @@ STORAGES = {
MEDIA_ROOT = Path("/home/kitty/src/lockbox/FILES") MEDIA_ROOT = Path("/home/kitty/src/lockbox/FILES")
MEDIA_URL = "files/" MEDIA_URL = "files/"
INCOMPLETE_EXT = ".incomplete" INCOMPLETE_EXT = ".incomplete"
DEFAULT_FILE_HEADER_BYTES = 2048
validate_paths(MEDIA_ROOT) validate_paths(MEDIA_ROOT)

View File

@ -3,6 +3,8 @@ const uploadButton = document.getElementById("upload-button");
const fileSizeReport = document.getElementById("file-size"); const fileSizeReport = document.getElementById("file-size");
const progressBar = document.getElementById("progressBar"); const progressBar = document.getElementById("progressBar");
let isReady = false;
fileInput.value = ''; fileInput.value = '';
fileInput.addEventListener('change', handleFileChange); fileInput.addEventListener('change', handleFileChange);
uploadButton.addEventListener('click', handleFileUpload); uploadButton.addEventListener('click', handleFileUpload);
@ -10,71 +12,85 @@ uploadButton.addEventListener('click', handleFileUpload);
function handleFileChange(event) { function handleFileChange(event) {
const file = event.target.files[0]; const file = event.target.files[0];
const file_size = file.size; const file_size = file.size;
fileSizeReport.textContent = "File size is: " + file.size; fileSizeReport.textContent = "File size is: " + file.size + " bytes";
if (file_size > max_file_bytes){ if (file_size > max_file_bytes){
console.log("File size is too large"); console.log("File size is too large");
// Handle this. isReady = false;
return return
} }
console.log("Ready!"); console.log("Ready!");
isReady = true;
} }
async function handleFileUpload(event) { async function handleFileUpload(event) {
if (!isReady){
console.log("Not ready");
return
}
isReady = false;
const file = fileInput.files[0];
let headers = new Headers(); let headers = new Headers();
headers.append("Content-Type", "application/json"); headers.append("Content-Type", "application/json");
const request_args = { const request_args = {
method: "POST", method: "POST",
headers: headers, headers: headers,
body: JSON.stringify( body: JSON.stringify(
{ {
"expected_size": fileInput.files[0].size "filename": file.name,
"expected_size": file.size,
"sha256": await getHash(file),
} }
) )
}; };
const response = await fetch(uploadPath, request_args); const response = await fetch(uploadPath, request_args);
if (!response.ok) { if (!response.ok) {
throw new Error(`Response status: ${response.status}`); throw new Error(`Response status: ${response.status}`);
} }
const file = await response.json();
await uploadChunks(file); const apifile = await response.json();
await uploadChunks(apifile);
} }
function updateProgressBar(remaining, total) { function updateProgressBar(remaining, total) {
let current_percent = Math.round((total - remaining) / (total / 100)); let current_percent = Math.round((total - remaining) / (total / 100));
progressBar.textContent = current_percent; progressBar.textContent = current_percent + " %";
} }
async function uploadChunks(remoteFile){ async function uploadChunks(remoteFile){
const chunkPath = chunkPathTemplate.replace("@", remoteFile.lid); const chunkPath = chunkPathTemplate.replace("@", remoteFile.lid);
let file = fileInput.files[0]; let file = fileInput.files[0];
let bytes_remaining = remoteFile.expected_size let bytes_remaining = remoteFile.expected_size;
let last_transfer_position = 0; let last_transfer_position = remoteFile.last_end_bytes; // Start where we left, default is 0;
let to_transfer = remoteFile.max_size_chunk_bytes; let to_transfer = remoteFile.max_size_chunk_bytes;
console.log("Chunk size is: " + remoteFile.max_size_chunk_bytes); console.log("Chunk size is: " + remoteFile.max_size_chunk_bytes);
while (bytes_remaining >= 0) { while (bytes_remaining >= 0) {
updateProgressBar(bytes_remaining, remoteFile.expected_size);
if (bytes_remaining <= remoteFile.max_size_chunk_bytes) { if (bytes_remaining <= remoteFile.max_size_chunk_bytes) {
to_transfer = bytes_remaining; to_transfer = bytes_remaining;
bytes_remaining = 0; bytes_remaining = 0;
} }
await uploadChunk(file, [last_transfer_position, last_transfer_position += to_transfer], chunkPath); await uploadChunk(file, [last_transfer_position, last_transfer_position += to_transfer], chunkPath);
last_transfer_position += 1;
bytes_remaining -= to_transfer; bytes_remaining -= to_transfer;
updateProgressBar(bytes_remaining, remoteFile.expected_size);
} }
console.log("Done!") console.log("Done!");
progressBar.textContent = 100; progressBar.textContent = "Done!";
} }
async function uploadChunk(file, byte_range, chunkPath) { async function uploadChunk(file, byte_range, chunkPath) {
console.log(byte_range);
let file_bytes_target = file.slice(byte_range[0], byte_range[1]); let file_bytes_target = file.slice(byte_range[0], byte_range[1]);
let body = new FormData(); let body = new FormData();
body.append("Content", file_bytes_target); body.append("Content", file_bytes_target);
let headers = new Headers(); let headers = new Headers();

View File

@ -12,4 +12,32 @@ function getCookie(name) {
} }
} }
return cookieValue; return cookieValue;
}
function arrayBufferToWordArray(ab) {
var i8a = new Uint8Array(ab);
var a = [];
for (var i = 0; i < i8a.length; i += 4) {
a.push(i8a[i] << 24 | i8a[i + 1] << 16 | i8a[i + 2] << 8 | i8a[i + 3]);
}
return CryptoJS.lib.WordArray.create(a, i8a.length);
}
async function getHash(file) {
// I hate this language so much.
const read = (blob) => new Promise((resolve, reject) => {
const reader = new FileReader();
reader.onload = (event) => resolve(event.target.result);
reader.onerror = reject;
reader.readAsArrayBuffer(blob);
});
const file_bytes = await read(file);
hash = CryptoJS.SHA256(
arrayBufferToWordArray(
file_bytes
)
);
return hash.toString(CryptoJS.enc.Hex);
} }

View File

@ -1,4 +1,4 @@
# Generated by Django 4.2.15 on 2024-09-17 19:51 # Generated by Django 4.2.15 on 2024-09-19 09:40
import common.utils import common.utils
from django.conf import settings from django.conf import settings
@ -25,14 +25,16 @@ class Migration(migrations.Migration):
('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')), ('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')),
('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')), ('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')),
('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models._upload_to_fielpath, verbose_name='file')), ('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models._upload_to_fielpath, verbose_name='file')),
('filename', models.CharField(help_text='file name', max_length=256, verbose_name='filename')),
('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')), ('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')),
('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')), ('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')),
('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')), ('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')),
('sha256', models.CharField(help_text='file hash (sha256)', max_length=64, verbose_name='hash (sha256)')),
('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')), ('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')),
('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')), ('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')),
('expected_size', models.PositiveBigIntegerField(help_text='expected file size', verbose_name='expected size (bytes)')), ('expected_size', models.PositiveBigIntegerField(help_text='expected file size', verbose_name='expected size (bytes)')),
('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')), ('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')),
('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')), ('last_end_bytes', models.BigIntegerField(default=0, help_text='last uploaded bytes position', verbose_name='last end bytes')),
('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')), ('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')),
], ],
options={ options={

View File

@ -1,7 +1,8 @@
from datetime import timedelta from datetime import timedelta
from hashlib import md5 from hashlib import sha256
from pathlib import Path from pathlib import Path
import magic
from common.constants import UPLOAD_ERROR_CODES, UPLOAD_STATUS_TYPES from common.constants import UPLOAD_ERROR_CODES, UPLOAD_STATUS_TYPES
from common.models import LockboxBase from common.models import LockboxBase
from common.utils import get_config, get_max_size_chunk_bytes from common.utils import get_config, get_max_size_chunk_bytes
@ -20,7 +21,7 @@ class UploadError(Exception):
def _upload_to_fielpath(instance, filename): def _upload_to_fielpath(instance, filename):
return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}") return Path(str(instance.lid)).joinpath(f"{instance.filename}{settings.INCOMPLETE_EXT}")
class File(LockboxBase): class File(LockboxBase):
@ -40,6 +41,14 @@ class File(LockboxBase):
upload_to=_upload_to_fielpath, upload_to=_upload_to_fielpath,
) )
filename = models.CharField(
null=False,
blank=False,
max_length=256, # safeish in most FS
verbose_name=_("filename"),
help_text=_("file name")
)
UPLOAD_CHOICES = ( UPLOAD_CHOICES = (
(UPLOAD_STATUS_TYPES.UPLOADING, _(UPLOAD_STATUS_TYPES.UPLOADING)), (UPLOAD_STATUS_TYPES.UPLOADING, _(UPLOAD_STATUS_TYPES.UPLOADING)),
(UPLOAD_STATUS_TYPES.COMPLETED, _(UPLOAD_STATUS_TYPES.COMPLETED)), (UPLOAD_STATUS_TYPES.COMPLETED, _(UPLOAD_STATUS_TYPES.COMPLETED)),
@ -82,6 +91,14 @@ class File(LockboxBase):
help_text=_("will be scrubbed on 'date_expires'"), help_text=_("will be scrubbed on 'date_expires'"),
) )
sha256 = models.CharField(
null=False,
blank=False,
max_length=64,
verbose_name=_("hash (sha256)"),
help_text=_("file hash (sha256)")
)
delete_on_expiration = models.BooleanField( delete_on_expiration = models.BooleanField(
null=False, null=False,
blank=False, blank=False,
@ -113,8 +130,9 @@ class File(LockboxBase):
) )
last_end_bytes = models.BigIntegerField( last_end_bytes = models.BigIntegerField(
null=True, null=False,
blank=True, blank=False,
default=0,
verbose_name=("last end bytes"), verbose_name=("last end bytes"),
help_text=_("last uploaded bytes position"), help_text=_("last uploaded bytes position"),
) )
@ -141,15 +159,6 @@ class File(LockboxBase):
verbose_name = _("file") verbose_name = _("file")
verbose_name_plural = _("files") verbose_name_plural = _("files")
@property
def md5(self):
if self.exists:
self.file.open("rb")
md5_hash = md5(self.file.read()).hexdigest()
self.file.close()
return md5_hash
return None
@property @property
def abandoned(self): def abandoned(self):
return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES")) return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))
@ -200,7 +209,7 @@ class File(LockboxBase):
code=UPLOAD_ERROR_CODES.FILE_MISSING, code=UPLOAD_ERROR_CODES.FILE_MISSING,
) )
if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]: if self.last_end_bytes and self.last_end_bytes != chunk_data["start_bytes"]:
# Client screwed up, this is not where we left # Client screwed up, this is not where we left
raise UploadError( raise UploadError(
"Mismatch in expected chunk", "Mismatch in expected chunk",
@ -225,20 +234,48 @@ class File(LockboxBase):
def finalize(self): def finalize(self):
"""Finalizes the file """Finalizes the file
Guesses mimetype
Validates file hash if enabled
Renames file to the originally provided filename, whatever it is.
Sets file status to 'completed' Sets file status to 'completed'
Sets datetime_completed to now Sets datetime_completed to now
Renames file from file.extention.incomplete to file.extention
""" """
self.refresh_from_db() self.refresh_from_db()
self.status = UPLOAD_STATUS_TYPES.COMPLETED self.mime_type = self.guess_type()
self.datetime_completed = timezone.now()
final_name = self.file.name.replace(settings.INCOMPLETE_EXT, "") if get_config("VERIFY_ENABLE"):
final_path = settings.MEDIA_ROOT / final_name result = self.verify()
if not result:
self.status = UPLOAD_STATUS_TYPES.ERROR
raise UploadError(
"File verification failed",
code=UPLOAD_ERROR_CODES.VERIFICATION_FAILED
)
final_path = settings.MEDIA_ROOT / str(self.lid) / self.filename
with transaction.atomic(): with transaction.atomic():
Path(self.file.path).rename(final_path) Path(self.file.path).rename(final_path)
self.file.name = final_name self.file.name = self.filename
self.status = UPLOAD_STATUS_TYPES.COMPLETED
self.datetime_completed = timezone.now()
self.save() self.save()
def verify(self):
if self.exists:
self.file.open("rb")
sha256_hash = sha256(self.file.read()).hexdigest()
self.file.close()
return sha256_hash == self.sha256
raise Exception(f"Fatal: Could get file hash - file {self.file.path} does not exist")
def guess_type(self):
self.file.open("rb")
self.file.seek(0)
mime_type = magic.from_buffer(self.file.read(settings.DEFAULT_FILE_HEADER_BYTES), mime=True)
self.file.close()
return mime_type
def save(self, *args, **kwargs): def save(self, *args, **kwargs):
if not self.max_size_chunk_bytes: if not self.max_size_chunk_bytes:
self.max_size_chunk_bytes = get_max_size_chunk_bytes() self.max_size_chunk_bytes = get_max_size_chunk_bytes()

View File

@ -19,15 +19,6 @@ class FileModelViewSet(ModelViewSet):
queryset = File.objects.all() queryset = File.objects.all()
serializer_class = FileSerializer serializer_class = FileSerializer
@action(detail=True, methods=["GET"])
def md5(self, request, pk=None):
try:
file = File.objects.filter(lid=pk).first()
except ValidationError:
raise UserValidationError(f"UUID {pk} is not a valid UUID")
return Response({"md5": file.md5}, status=status.HTTP_200_OK)
@action(detail=True, methods=["PUT"]) @action(detail=True, methods=["PUT"])
def append_chunk(self, request, filename="DUMMY", format=None, pk=None): def append_chunk(self, request, filename="DUMMY", format=None, pk=None):
try: try:
@ -54,6 +45,7 @@ class FileModelViewSet(ModelViewSet):
"Could not find 'Content' in request body" "Could not find 'Content' in request body"
) )
# Bytes are inclusive for slicing but not for size, go figure.
if chunk_file.size > file.max_size_chunk_bytes: if chunk_file.size > file.max_size_chunk_bytes:
raise UserValidationError( raise UserValidationError(
f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}" f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}"

View File

@ -10,6 +10,7 @@
{% block prejs %} {% block prejs %}
{% endblock %} {% endblock %}
<script src="https://cdnjs.cloudflare.com/ajax/libs/crypto-js/4.0.0/crypto-js.min.js"></script>
<title>{% block title %}Lockbox{% endblock %}</title> <title>{% block title %}Lockbox{% endblock %}</title>
</head> </head>

View File

@ -10,9 +10,7 @@
const max_file_bytes = {{ max_file_bytes }}; const max_file_bytes = {{ max_file_bytes }};
const uploadPath = "{% url 'file-list' %}"; const uploadPath = "{% url 'file-list' %}";
const chunkPathTemplate = "{% url 'file-append-chunk' pk='@'%}"; const chunkPathTemplate = "{% url 'file-append-chunk' pk='@'%}";
const returnPath = "{% url 'file-append-chunk' pk='@'%}"
console.log("upload path is: " + uploadPath);
console.log("chunk path is: " + chunkPathTemplate);
</script> </script>
@ -22,18 +20,18 @@
{% block content %} {% block content %}
<p> Upload file </p> <p> Upload file </p>
<p id="max_size">Max size allowed size is: {{max_file_bytes}} bytes</p> <p id="max_size">Max size allowed: {{max_file_bytes}} bytes</p>
<p id="file-size"></p> <p id="file-size"></p>
<table> <table>
<form> <form>
<label for="expected_size">Expected Size:</label><br>
<input type="file" id="file-upload"> <input type="file" id="file-upload">
<input type="button" id="upload-button" value="Upload"> <input type="button" id="upload-button" value="Upload">
</form> </form>
</table> </table>
<p>Progress: </p>
<p id="progressBar"></p> <p id="progressBar"></p>
{% endblock %} {% endblock %}

13
poetry.lock generated
View File

@ -344,6 +344,17 @@ files = [
[package.extras] [package.extras]
cli = ["click (>=5.0)"] cli = ["click (>=5.0)"]
[[package]]
name = "python-magic"
version = "0.4.27"
description = "File type identification using libmagic"
optional = false
python-versions = ">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*"
files = [
{file = "python-magic-0.4.27.tar.gz", hash = "sha256:c1ba14b08e4a5f5c31a302b7721239695b2f0f058d125bd5ce1ee36b9d9d3c3b"},
{file = "python_magic-0.4.27-py2.py3-none-any.whl", hash = "sha256:c212960ad306f700aa0d01e5d7a325d20548ff97eb9920dcd29513174f0294d3"},
]
[[package]] [[package]]
name = "sqlparse" name = "sqlparse"
version = "0.5.1" version = "0.5.1"
@ -387,4 +398,4 @@ brotli = ["brotli"]
[metadata] [metadata]
lock-version = "2.0" lock-version = "2.0"
python-versions = "~3.12" python-versions = "~3.12"
content-hash = "155d31f2edffb6e6ea604c7a1115fa072072a5370e012eea577644e0a337f0b0" content-hash = "cf73bb83fc48555289dd3949c6bf10a7feab817496ab8f4826222a2f9b2bad0a"

View File

@ -13,6 +13,7 @@ whitenoise = "^6.6.0"
djangorestframework = "^3.14.0" djangorestframework = "^3.14.0"
drf-nested-routers = "^0.93.5" drf-nested-routers = "^0.93.5"
python-dotenv = "^1.0.1" python-dotenv = "^1.0.1"
python-magic = "^0.4.27"
[tool.poetry.group.dev.dependencies] [tool.poetry.group.dev.dependencies]
pytest = "^8.0.0" pytest = "^8.0.0"