diff --git a/.gitignore b/.gitignore index 48c0084..7c5ef38 100644 --- a/.gitignore +++ b/.gitignore @@ -94,4 +94,5 @@ venv.bak/ lockbox/media lockbox/staticfiles -TODO.txt \ No newline at end of file +TODO.txt +FILES \ No newline at end of file diff --git a/lockbox/common/constants.py b/lockbox/common/constants.py index e0647cf..5300fa0 100644 --- a/lockbox/common/constants.py +++ b/lockbox/common/constants.py @@ -7,18 +7,15 @@ class UPLOAD_STATUS_TYPES: UPLOADING = "uploading" COMPLETED = "completed" ABANDONED = "abandoned" - PROCESSING = "processing" + ERROR = "error" + +class UPLOAD_ERROR_CODES: + FILE_MISSING = "file_missing" + CHUNK_MISMATCH = "chunk_mismatch" # Config CONFIG_KEYS = { - "EXPIRATION_DELTA_MINUTES": { - "description": "Date created + this delta at which file expires", - "verbose_name": "File expiration delta (minutes)", - "native_type": int, - "sensitive": False, - "default": 120, - }, "ABANDONED_DELTA_MINUTES": { "description": "Date created + this delta at which a file is marked as abandoned", "verbose_name": "Uncompleted file abandoned max age", diff --git a/lockbox/lockbox/settings.py b/lockbox/lockbox/settings.py index c91150f..68080c3 100644 --- a/lockbox/lockbox/settings.py +++ b/lockbox/lockbox/settings.py @@ -120,6 +120,7 @@ STORAGES = { # Storage MEDIA_ROOT = Path("/home/kitty/src/lockbox/FILES") MEDIA_URL = "files/" +INCOMPLETE_EXT = ".incomplete" validate_paths(MEDIA_ROOT) diff --git a/lockbox/lockbox/setup.py b/lockbox/lockbox/setup.py index 0e9fd78..4900290 100644 --- a/lockbox/lockbox/setup.py +++ b/lockbox/lockbox/setup.py @@ -1,11 +1,10 @@ -import os +from pathlib import Path # TODO: LOG MEEEEE # TODO: Figure out file owner in system, permissions, GUID # Whats the default path if not provided? // docker volume def validate_paths(media_path): - if not os.path.isdir(media_path): try: - os.makedirs(media_path) + Path(media_path).mkdir(exist_ok=True) except Exception as e: raise e \ No newline at end of file diff --git a/lockbox/storage/migrations/0001_initial.py b/lockbox/storage/migrations/0001_initial.py index 7f458eb..4249925 100644 --- a/lockbox/storage/migrations/0001_initial.py +++ b/lockbox/storage/migrations/0001_initial.py @@ -1,4 +1,4 @@ -# Generated by Django 4.2.15 on 2024-09-16 11:24 +# Generated by Django 4.2.15 on 2024-09-17 06:52 import common.utils from django.conf import settings @@ -23,15 +23,16 @@ class Migration(migrations.Migration): ('lid', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, verbose_name='lockbox ID')), ('date_created', models.DateTimeField(blank=True, help_text='date at which this object was created', verbose_name='date created')), ('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')), - ('filename', models.CharField(help_text='Name of the file', max_length=255, verbose_name='name')), - ('extension', models.CharField(blank=True, help_text='reported filesystem extension (not mime type)', max_length=128, null=True, verbose_name='extension')), - ('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to='', verbose_name='file')), - ('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('processing', 'processing'), ('abandoned', 'abandoned')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')), - ('date_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')), + ('mime_type', models.CharField(blank=True, help_text='reported mime-type', max_length=128, null=True, verbose_name='mime-type')), + ('file', models.FileField(blank=True, help_text='actual file', null=True, upload_to=storage.models.upload_to_fielpath, verbose_name='file')), + ('status', models.CharField(choices=[('uploading', 'uploading'), ('completed', 'completed'), ('abandoned', 'abandoned'), ('error', 'error')], default='uploading', help_text='upload status for file', max_length=10, verbose_name='status')), + ('datetime_completed', models.DateTimeField(blank=True, help_text="datetime at which this file's upload was completed", null=True, verbose_name='completed on')), ('expires', models.BooleanField(default=False, help_text="will be scrubbed on 'date_expires'", verbose_name='expires')), ('delete_on_expiration', models.BooleanField(default=False, help_text='will be deleted if expired and expires is true', verbose_name='delete on expiration')), - ('size_on_disk', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size on disk (bytes)')), + ('size', models.PositiveBigIntegerField(blank=True, help_text='total size on disk for this file', null=True, verbose_name='size (bytes)')), + ('expected_size', models.PositiveBigIntegerField(blank=True, help_text='expected file size', null=True, verbose_name='expected size (bytes)')), ('max_size_chunk_bytes', models.PositiveBigIntegerField(default=common.utils.get_max_size_chunk_bytes, help_text='max size of each individual chunk for this file', verbose_name='maximum size of chunks (bytes)')), + ('last_end_bytes', models.BigIntegerField(blank=True, help_text='last uploaded bytes position', null=True, verbose_name='last end bytes')), ('owner', models.ForeignKey(blank=True, help_text='Who owns this file', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='files_owned', to=settings.AUTH_USER_MODEL, verbose_name='owner')), ], options={ @@ -39,23 +40,4 @@ class Migration(migrations.Migration): 'verbose_name_plural': 'files', }, ), - migrations.CreateModel( - name='FileChunk', - fields=[ - ('lid', models.UUIDField(default=uuid.uuid4, primary_key=True, serialize=False, verbose_name='lockbox ID')), - ('date_created', models.DateTimeField(blank=True, help_text='date at which this object was created', verbose_name='date created')), - ('date_updated', models.DateTimeField(blank=True, help_text='date at which this object was last updated', verbose_name='date updated')), - ('chunk', models.FileField(help_text='chunk file', upload_to=storage.models.get_upload_path_chunk, verbose_name='chunk file')), - ('chunk_id', models.BigIntegerField(help_text='chunk id', verbose_name='chunk id')), - ('size', models.BigIntegerField(help_text='chunk size', verbose_name='size')), - ('start_bytes', models.BigIntegerField(help_text='part of file start', verbose_name='start bytes')), - ('end_bytes', models.BigIntegerField(help_text='part of file end', verbose_name='end bytes')), - ('file', models.ForeignKey(on_delete=django.db.models.deletion.CASCADE, related_name='chunks', to='storage.file')), - ], - options={ - 'verbose_name': 'file chunk', - 'verbose_name_plural': 'file chunks', - 'unique_together': {('file', 'chunk_id')}, - }, - ), ] diff --git a/lockbox/storage/models.py b/lockbox/storage/models.py index 8ea2ace..7cd56c0 100644 --- a/lockbox/storage/models.py +++ b/lockbox/storage/models.py @@ -1,47 +1,33 @@ from datetime import timedelta from pathlib import Path - -from common.constants import UPLOAD_STATUS_TYPES +from common.constants import UPLOAD_STATUS_TYPES, UPLOAD_ERROR_CODES from common.models import LockboxBase from common.utils import get_config, get_max_size_chunk_bytes -from django.conf import settings from django.core.files.uploadedfile import UploadedFile from django.db import models, transaction from django.utils import timezone from django.utils.translation import gettext_lazy as _ +from django.conf import settings +from hashlib import md5 -def get_upload_path_chunk(instance, filename): - # TODO: How do we reconcile storage? - # TODO: Do we autodetect existing files task? - # TODO: Figure out absolute storage :(, custom storage and custom filefield? why is this not a def behaviour? +class UploadError(Exception): + + def __init__(self, *args, **kwargs): + self.code = kwargs.pop("code") + super().__init__(*args, **kwargs) - filename = f"{instance.chunk_id}.chunk" - chunk_dir = settings.MEDIA_ROOT / str(instance.file.lid) - - if not Path.exists(chunk_dir): - Path.mkdir(chunk_dir) - - target_path = Path(chunk_dir) / Path(filename) - print(target_path) - return target_path +def upload_to_fielpath(instance, filename): + return Path(str(instance.lid)).joinpath(f"{filename}{settings.INCOMPLETE_EXT}") class File(LockboxBase): - filename = models.CharField( - max_length=255, - null=False, - blank=False, - verbose_name = _("name"), - help_text=_("Name of the file"), - ) - - extension = models.CharField( + mime_type = models.CharField( max_length=128, blank=True, null=True, - verbose_name=_("extension"), - help_text=_("reported filesystem extension (not mime type)"), + verbose_name=_("mime-type"), + help_text=_("reported mime-type"), ) file = models.FileField( @@ -49,14 +35,14 @@ class File(LockboxBase): blank=True, verbose_name=_("file"), help_text=_("actual file"), + upload_to=upload_to_fielpath ) - # TODO: Make this an FSM UPLOAD_CHOICES = ( (UPLOAD_STATUS_TYPES.UPLOADING, _(UPLOAD_STATUS_TYPES.UPLOADING)), (UPLOAD_STATUS_TYPES.COMPLETED, _(UPLOAD_STATUS_TYPES.COMPLETED)), - (UPLOAD_STATUS_TYPES.PROCESSING, _(UPLOAD_STATUS_TYPES.PROCESSING)), (UPLOAD_STATUS_TYPES.ABANDONED, _(UPLOAD_STATUS_TYPES.ABANDONED)), + (UPLOAD_STATUS_TYPES.ERROR, _(UPLOAD_STATUS_TYPES.ERROR)), ) status = models.CharField( @@ -69,7 +55,7 @@ class File(LockboxBase): help_text=_("upload status for file"), ) - date_completed = models.DateTimeField( + datetime_completed = models.DateTimeField( null=True, blank=True, verbose_name=_("completed on"), @@ -102,13 +88,20 @@ class File(LockboxBase): help_text=_("will be deleted if expired and expires is true"), ) - size_on_disk = models.PositiveBigIntegerField( + size = models.PositiveBigIntegerField( null=True, blank=True, - verbose_name=_("size on disk (bytes)"), + verbose_name=_("size (bytes)"), help_text=_("total size on disk for this file"), ) + expected_size = models.PositiveBigIntegerField( + null=True, + blank=True, + verbose_name=_("expected size (bytes)"), + help_text=_("expected file size"), + ) + max_size_chunk_bytes = models.PositiveBigIntegerField( null=False, blank=False, @@ -117,157 +110,122 @@ class File(LockboxBase): help_text=_("max size of each individual chunk for this file"), ) + last_end_bytes = models.BigIntegerField( + null=True, + blank=True, + verbose_name=("last end bytes"), + help_text=_("last uploaded bytes position"), + ) + readonly_fields = [ - "extension", + "mime_type", "status", - "date_completed", - "size_on_disk", + "datetime_completed", + "size", "file", "max_size_chunk_bytes", + "last_end_bytes", *LockboxBase.readonly_fields, ] def __str__(self): - return self.filename + name = "NO NAME" + + if self.file: + name = self.file.name + return f"{name} ({self.lid})" + class Meta: verbose_name = _("file") verbose_name_plural = _("files") @property - def checksum(self): - return 0 - - @property - def date_expires(self): - return self.date_created + timedelta(minutes=get_config("EXPIRATION_DELTA_MINUTES")) + def md5(self): + if self.exists: + return md5(open(self.file, "rb").read()).hexdigest() + return None @property def abandoned(self): - return self.date_created + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES")) + return self.date_created <= timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES")) + + @classmethod + def abandoned_condition(): + return models.Q(date_created__lte=timezone.now() + timedelta(minutes=get_config("ABANDONED_DELTA_MINUTES"))) @property def expired(self): return self.date_expires <= timezone.now() - - @property - def last_chunk_id(self): - last_chunk_id = self.chunks.order_by("-chunk_id").values("chunk_id").first() - if last_chunk_id: - return last_chunk_id.get("chunk_id") - return - 1 - - def create_chunk(self, chunk_file, chunk_data): - chunk = FileChunk( - file=self, - chunk=chunk_file, - chunk_id=self.last_chunk_id, - **chunk_data - ) - chunk.save() - return chunk - + @classmethod + def expired_conditon(): + return models.Q(date_expires__lte=timezone.now()) + + @property + def exists(self): + if not self.file: + return False + return Path(self.file.path).is_file() + + def append_chunk(self, chunk_file, chunk_data): + # Override in case recently abandoned + # Will persist if it does not error out. + self.status = UPLOAD_STATUS_TYPES.UPLOADING + + # Do not rely on DB file state, check for actual file. + if not self.exists: + # Oh oh, we are uploading a n + 1 chunk but theres no file + if chunk_data["start_bytes"] != 0: + self.status = UPLOAD_STATUS_TYPES.ERROR + self.save() + raise UploadError("File for uploaded chunk no longer exists", code=UPLOAD_ERROR_CODES.FILE_MISSING) + + if self.last_end_bytes and self.last_end_bytes + 1 != chunk_data["start_bytes"]: + # Client screwed up, this is not where we left + raise UploadError("Mismatch in expected chunk", code=UPLOAD_ERROR_CODES.CHUNK_MISMATCH) + + self.last_end_bytes = chunk_data["end_bytes"] + if self.expected_size == self.last_end_bytes: + # File is one shot chunk. + if chunk_data["start_bytes"] == 0: + self.file = chunk_file + self.save() + self.finalize() + return + + # This is an n + 1 chunk. + print("Appending bytes yo") + chunk_file.seek(0) + self.save() + + def finalize(self): + self.refresh_from_db() + self.status = UPLOAD_STATUS_TYPES.COMPLETED + self.datetime_completed = timezone.now() + final_name = self.file.name.replace(settings.INCOMPLETE_EXT, "") + final_path = settings.MEDIA_ROOT / final_name + with transaction.atomic(): + Path(self.file.path).rename(final_path) + self.file.name = final_name + self.save() def save(self, *args, **kwargs): - self.max_size_chunk_bytes = get_max_size_chunk_bytes() + if not self.max_size_chunk_bytes: + self.max_size_chunk_bytes = get_max_size_chunk_bytes() return super().save(*args, **kwargs) def delete(self, *args, **kwargs): - if self.file: - storage, path = self.file.storage, self.file.path - - if self.file: - # TODO: Figure out if file exists and try to delete it if error, report error. - storage.delete(path) - with transaction.atomic(): - self.chunks.all().delete() + if self.file: + if Path(self.file.path).is_file(): + self.file.storage.delete(self.file.path) + self.file.storage.delete(Path(self.file.path).parent) result = super().delete(*args, **kwargs) return result - def handler_bytes(self): - # TODO: This is a naive approach, we almost never want to do this. - self.file.close() - self.file.open(mode="rb") - return UploadedFile(file=self.file, name=self.filename, size=self.offset) - -class FileChunk(LockboxBase): - file = models.ForeignKey( - "storage.File", - null=False, - blank=False, - on_delete=models.CASCADE, - related_name="chunks", - ) - - chunk = models.FileField( - upload_to=get_upload_path_chunk, - null=False, - blank=False, - verbose_name=_("chunk file"), - help_text=_("chunk file"), - ) - - chunk_id = models.BigIntegerField( - null=False, - blank=False, - verbose_name=_("chunk id"), - help_text=_("chunk id"), - ) - - size = models.BigIntegerField( - null=False, - blank=False, - verbose_name=("size"), - help_text=_("chunk size"), - ) - - start_bytes = models.BigIntegerField( - null=False, - blank=False, - verbose_name=("start bytes"), - help_text=_("part of file start"), - ) - - end_bytes = models.BigIntegerField( - null=False, - blank=False, - verbose_name=("end bytes"), - help_text=_("part of file end"), - ) - - readonly_fields = [ - "file", - "chunk_id", - "start", - "end", - "size", - *LockboxBase.readonly_fields, - ] - - def __str__(self): - return f"{self.file.filename}.{self.chunk_id}.chunk" - - class Meta: - verbose_name = _("file chunk") - verbose_name_plural = _("file chunks") - unique_together = ("file", "chunk_id") - - def save(self, *args, **kwargs): - self.chunk_id = self.file.last_chunk_id + 1 - return super().save(*args, **kwargs) - - def delete(self, *args, **kwargs): - if self.file: - storage, path = self.file.storage, self.file.path - - if self.file: - # TODO: Figure out if file exists and try to delete it if error, report error. - storage.delete(path) - return super().delete(*args, **kwargs) # class FileShare(LockboxBase): # file = models.ForeignKey( diff --git a/lockbox/storage/serializers.py b/lockbox/storage/serializers.py index 5445205..7966ec6 100644 --- a/lockbox/storage/serializers.py +++ b/lockbox/storage/serializers.py @@ -1,6 +1,6 @@ from rest_framework import serializers -from storage.models import File, FileChunk +from storage.models import File class FileSerializer(serializers.ModelSerializer): @@ -9,21 +9,3 @@ class FileSerializer(serializers.ModelSerializer): model = File fields = "__all__" read_only_fields = File.readonly_fields - - -class FileChunkSerializer(serializers.ModelSerializer): - - class Meta: - model = FileChunk - fields = "__all__" - read_only_fields = FileChunk.readonly_fields - - def validate(self, data): - data = super().validate(data) - file = File.objects.get(lid=data["file"]) - - if data["size"] > file.max_size_chunk_bytes: - detail = f"'size' param is larger than max chunk size for file:\ - {data["size"]} > {file.max_size_chunk_bytes}" - raise serializers.ValidationError(detail) - return data diff --git a/lockbox/storage/urls.py b/lockbox/storage/urls.py index ab12874..d55f170 100644 --- a/lockbox/storage/urls.py +++ b/lockbox/storage/urls.py @@ -7,11 +7,7 @@ from storage import views_api, views_client router = SimpleRouter() router.register(r'files', views_api.FileModelViewSet) -chunk_router = NestedSimpleRouter(router, r'files', lookup="file") -chunk_router.register(r'chunks', views_api.FileChunkViewSet, basename="file-chunks") - urlpatterns = [ path("api/", include(router.urls)), - path("api/", include(chunk_router.urls)), path("upload/", views_client.FileUploadView.as_view(), name="client-fileupload"), ] diff --git a/lockbox/storage/views_api.py b/lockbox/storage/views_api.py index fd5f7f7..895f77c 100644 --- a/lockbox/storage/views_api.py +++ b/lockbox/storage/views_api.py @@ -8,12 +8,14 @@ from rest_framework import status from rest_framework.decorators import action from rest_framework.response import Response from rest_framework.viewsets import ModelViewSet -from rest_framework.exceptions import NotFound, ValidationError +from rest_framework.exceptions import NotFound +from rest_framework.exceptions import ValidationError as UserValidationError from rest_framework.parsers import FileUploadParser -# from user.models import LockboxUser -from storage.models import File, FileChunk -from storage.serializers import FileChunkSerializer, FileSerializer +from django.core.exceptions import ValidationError + +from storage.models import File, UploadError +from storage.serializers import FileSerializer class FileModelViewSet(ModelViewSet): @@ -21,48 +23,38 @@ class FileModelViewSet(ModelViewSet): queryset = File.objects.all() serializer_class = FileSerializer - @action(detail=True, methods=["GET"]) - def last_chunk_position(self, request, pk=None): - file = self.get_object() - last_chunk_id = file.last_chunk_id - last_postion = 0 - if last_chunk_id != -1: - last_chunk = self.chunks.order_by("-chunk_id").values("end_bytes").first() - if last_chunk: - last_postion = last_chunk_id.get("end_bytes") - return Response({"last_chunk_position": last_postion}, status=status.HTTP_200_OK) + @action(detail=True, methods=["PUT"]) + def append_chunk(self, request, filename="DUMMY", format=None, pk=None): + try: + file = File.objects.filter(lid=pk).first() + except ValidationError: + raise UserValidationError(f"UUID {pk} is not a valid UUID") - -class FileChunkViewSet(ModelViewSet): - model = FileChunk - queryset = FileChunk.objects.all() - serializer_class = FileChunkSerializer - parser_classes = (FileUploadParser,) - - def create(self, request, filename="DUMMY", format=None, file_pk=None): - file = File.objects.filter(lid=str(file_pk)).first() if not file: - raise NotFound(f"File with ID {file_pk} was not found") + raise NotFound(f"File with ID {pk} was not found") chunk_data = self.get_content_range(request) if not chunk_data: - raise ValidationError( + raise UserValidationError( f"Missing content range headers" ) - chunk_file = request.FILES["file"] + chunk_file = request.FILES["Content"] if chunk_file.size > file.max_size_chunk_bytes: - raise ValidationError( + raise UserValidationError( f"Chunk size is greater than files max chunk size: {chunk_file.size} > {file.max_size_chunk_bytes}") range_size = chunk_data["end_bytes"] - chunk_data["start_bytes"] if chunk_file.size != range_size: - raise ValidationError( + raise UserValidationError( f"Actual chunk size mismatches content-range header: {chunk_file.size} != {range_size}" ) chunk_data["size"] = chunk_file.size - file.create_chunk(chunk_file=chunk_file, chunk_data=chunk_data) + try: + file.append_chunk(chunk_file, chunk_data) + except UploadError as e: + return Response({"code": e.code}, status=status.HTTP_400_BAD_REQUEST) return Response(status=status.HTTP_201_CREATED) def get_content_range(self, request): @@ -77,4 +69,4 @@ class FileChunkViewSet(ModelViewSet): return { "start_bytes": int(match.group('start')), "end_bytes": int(match.group('end')), - } \ No newline at end of file + }