Blob Storage App¶
Large binary data storage and retrieval (PDFs, images, archives, etc.).
Quick Start¶
from htk.apps.blob_storage.models import Blob
# Store binary data
blob = Blob.objects.create(
name='document.pdf',
data=pdf_bytes,
content_type='application/pdf',
user=request.user
)
# Retrieve data
blob = Blob.objects.get(id=blob_id)
binary_data = blob.data
# Delete blob
blob.delete()
Storing Blobs¶
Store File Content¶
from htk.apps.blob_storage.models import Blob
# Store from file upload
uploaded_file = request.FILES['file']
blob = Blob.objects.create(
name=uploaded_file.name,
data=uploaded_file.read(),
content_type=uploaded_file.content_type,
user=request.user
)
Store with Metadata¶
from htk.apps.blob_storage.models import Blob
# Store with additional info
blob = Blob.objects.create(
name='report.xlsx',
data=excel_bytes,
content_type='application/vnd.openxmlformats-officedocument.spreadsheetml.sheet',
user=request.user,
is_private=True, # Access control
metadata={
'version': '1.0',
'created_by': 'system',
'source': 'export'
}
)
Store with Size Limit¶
from htk.apps.blob_storage.models import Blob
MAX_BLOB_SIZE = 100 * 1024 * 1024 # 100MB
def store_blob_safe(file_obj, user, name):
if file_obj.size > MAX_BLOB_SIZE:
raise ValueError(f'File too large: {file_obj.size}')
blob = Blob.objects.create(
name=name,
data=file_obj.read(),
content_type=file_obj.content_type,
user=user,
size=file_obj.size
)
return blob
Retrieving Blobs¶
Get Blob by ID¶
from htk.apps.blob_storage.models import Blob
from django.http import FileResponse
def download_blob(request, blob_id):
blob = Blob.objects.get(id=blob_id)
# Check access
if blob.is_private and blob.user != request.user:
raise PermissionDenied
# Return file download
response = FileResponse(blob.data)
response['Content-Type'] = blob.content_type
response['Content-Disposition'] = f'attachment; filename="{blob.name}"'
return response
List User's Blobs¶
from htk.apps.blob_storage.models import Blob
# Get blobs for user
user_blobs = Blob.objects.filter(user=request.user)
# Filter by type
documents = user_blobs.filter(content_type='application/pdf')
# Get recent blobs
recent = user_blobs.order_by('-created')[:10]
Common Patterns¶
Virus Scanning¶
from htk.apps.blob_storage.models import Blob
import subprocess
def scan_blob_for_virus(blob):
"""Scan uploaded file with ClamAV"""
# Write to temp file
import tempfile
with tempfile.NamedTemporaryFile() as tmp:
tmp.write(blob.data)
tmp.flush()
# Run scan
result = subprocess.run(
['clamscan', tmp.name],
capture_output=True
)
return result.returncode == 0 # 0 = clean
Compression¶
import zlib
from htk.apps.blob_storage.models import Blob
def store_blob_compressed(data, name, user):
"""Store blob with compression"""
compressed = zlib.compress(data)
blob = Blob.objects.create(
name=name,
data=compressed,
content_type='application/x-gzip',
user=user,
is_compressed=True,
original_size=len(data),
compressed_size=len(compressed)
)
return blob
def retrieve_blob_decompressed(blob):
"""Retrieve and decompress"""
if blob.is_compressed:
return zlib.decompress(blob.data)
return blob.data
S3 Storage Integration¶
from htk.apps.blob_storage.models import Blob
from htk.lib.aws.s3.utils import S3Manager
def store_blob_to_s3(file_data, name, user):
"""Store blob to S3 instead of database"""
s3 = S3Manager()
# Upload to S3
s3_path = f'blobs/{user.id}/{name}'
s3.put_file('data-bucket', s3_path, file_data)
# Create record pointing to S3
blob = Blob.objects.create(
name=name,
s3_path=s3_path, # Store reference instead of data
content_type=content_type,
user=user
)
return blob
Expiring Blobs¶
from django.utils import timezone
from datetime import timedelta
from htk.apps.blob_storage.models import Blob
def create_temporary_blob(data, name, user, ttl_hours=24):
"""Create blob that expires after TTL"""
expiry = timezone.now() + timedelta(hours=ttl_hours)
blob = Blob.objects.create(
name=name,
data=data,
user=user,
expires_at=expiry
)
return blob
def cleanup_expired_blobs():
"""Delete expired blobs"""
Blob.objects.filter(
expires_at__lt=timezone.now()
).delete()
Access Control¶
from htk.apps.blob_storage.models import Blob
def get_blob_safe(user, blob_id):
"""Get blob only if user has access"""
blob = Blob.objects.get(id=blob_id)
# Check ownership
if blob.user != user:
raise PermissionDenied('Cannot access this blob')
# Check if private
if blob.is_private and not blob.user.is_staff:
raise PermissionDenied('Blob is private')
return blob
Models¶
Blob¶
class Blob(models.Model):
user = ForeignKey(User, null=True, blank=True, on_delete=models.CASCADE)
name = CharField(max_length=255)
data = BinaryField()
content_type = CharField(max_length=100)
is_private = BooleanField(default=False)
size = IntegerField() # bytes
expires_at = DateTimeField(null=True, blank=True)
created = DateTimeField(auto_now_add=True)
updated = DateTimeField(auto_now=True)
Configuration¶
# settings.py
BLOB_STORAGE_MAX_SIZE = 100 * 1024 * 1024 # 100MB
BLOB_STORAGE_ALLOWED_TYPES = [
'application/pdf',
'image/jpeg',
'image/png',
'application/zip',
]
BLOB_STORAGE_ENABLE_COMPRESSION = False
BLOB_STORAGE_ENABLE_S3 = False # Use S3 instead of DB
BLOB_STORAGE_ENABLE_VIRUS_SCAN = False # ClamAV scan
Best Practices¶
- Validate file types - Check MIME type and extension
- Scan for viruses - Use ClamAV or similar
- Set size limits - Prevent huge uploads
- Expire temporary files - Clean up old blobs
- Use S3 for large files - Database not ideal for large files
- Compress when appropriate - Reduce storage
- Track access - Log downloads