mirror of
https://github.com/ghndrx/file-transformer-s3.git
synced 2026-02-10 06:45:05 +00:00
Initial commit: File Transformer S3 project with React dashboard and Knative functions
This commit is contained in:
30
functions/download/Dockerfile
Normal file
30
functions/download/Dockerfile
Normal file
@@ -0,0 +1,30 @@
|
||||
FROM python:3.11-slim
|
||||
|
||||
# Set working directory
|
||||
WORKDIR /app
|
||||
|
||||
# Install system dependencies
|
||||
RUN apt-get update && apt-get install -y \
|
||||
gcc \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
# Copy requirements and install Python dependencies
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
# Copy application code
|
||||
COPY app.py .
|
||||
|
||||
# Create non-root user
|
||||
RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app
|
||||
USER app
|
||||
|
||||
# Expose port
|
||||
EXPOSE 5000
|
||||
|
||||
# Health check
|
||||
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
|
||||
CMD curl -f http://localhost:5000/health || exit 1
|
||||
|
||||
# Run the application
|
||||
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"]
|
||||
201
functions/download/app.py
Normal file
201
functions/download/app.py
Normal file
@@ -0,0 +1,201 @@
|
||||
import os
|
||||
import logging
|
||||
from datetime import datetime
|
||||
from typing import Optional
|
||||
|
||||
from flask import Flask, request, jsonify, send_file
|
||||
import psycopg2
|
||||
from psycopg2.extras import RealDictCursor
|
||||
from minio import Minio
|
||||
from minio.error import S3Error
|
||||
import structlog
|
||||
import io
|
||||
|
||||
# Configure structured logging
|
||||
structlog.configure(
|
||||
processors=[
|
||||
structlog.stdlib.filter_by_level,
|
||||
structlog.stdlib.add_logger_name,
|
||||
structlog.stdlib.add_log_level,
|
||||
structlog.stdlib.PositionalArgumentsFormatter(),
|
||||
structlog.processors.TimeStamper(fmt="iso"),
|
||||
structlog.processors.StackInfoRenderer(),
|
||||
structlog.processors.format_exc_info,
|
||||
structlog.processors.UnicodeDecoder(),
|
||||
structlog.processors.JSONRenderer()
|
||||
],
|
||||
context_class=dict,
|
||||
logger_factory=structlog.stdlib.LoggerFactory(),
|
||||
wrapper_class=structlog.stdlib.BoundLogger,
|
||||
cache_logger_on_first_use=True,
|
||||
)
|
||||
|
||||
logger = structlog.get_logger()
|
||||
|
||||
app = Flask(__name__)
|
||||
|
||||
# Configuration
|
||||
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
|
||||
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
|
||||
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123')
|
||||
MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket')
|
||||
MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true'
|
||||
|
||||
POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer')
|
||||
|
||||
# Initialize MinIO client
|
||||
minio_client = Minio(
|
||||
MINIO_ENDPOINT,
|
||||
access_key=MINIO_ACCESS_KEY,
|
||||
secret_key=MINIO_SECRET_KEY,
|
||||
secure=MINIO_USE_SSL
|
||||
)
|
||||
|
||||
def get_db_connection():
|
||||
"""Create a database connection."""
|
||||
return psycopg2.connect(POSTGRES_URL)
|
||||
|
||||
def get_file_info(file_id: str) -> Optional[dict]:
|
||||
"""Get file information from database."""
|
||||
conn = get_db_connection()
|
||||
try:
|
||||
with conn.cursor(cursor_factory=RealDictCursor) as cur:
|
||||
cur.execute("""
|
||||
SELECT id, filename, original_filename, file_size, file_type,
|
||||
mime_type, bucket_name, object_key, status
|
||||
FROM files
|
||||
WHERE id = %s AND status != 'deleted'
|
||||
""", (file_id,))
|
||||
|
||||
file_record = cur.fetchone()
|
||||
return dict(file_record) if file_record else None
|
||||
except Exception as e:
|
||||
logger.error("Failed to get file info", error=str(e))
|
||||
return None
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]):
|
||||
"""Log file access for audit purposes."""
|
||||
conn = get_db_connection()
|
||||
try:
|
||||
with conn.cursor() as cur:
|
||||
cur.execute("""
|
||||
INSERT INTO file_access_logs (file_id, action, ip_address, user_agent)
|
||||
VALUES (%s, %s, %s, %s)
|
||||
""", (file_id, action, ip_address, user_agent))
|
||||
conn.commit()
|
||||
except Exception as e:
|
||||
logger.error("Failed to log file access", error=str(e))
|
||||
conn.rollback()
|
||||
finally:
|
||||
conn.close()
|
||||
|
||||
@app.route('/health', methods=['GET'])
|
||||
def health_check():
|
||||
"""Health check endpoint."""
|
||||
return jsonify({'status': 'healthy', 'service': 'file-download'})
|
||||
|
||||
@app.route('/download/<file_id>', methods=['GET'])
|
||||
def download_file(file_id: str):
|
||||
"""Download file by ID."""
|
||||
try:
|
||||
# Get file information
|
||||
file_info = get_file_info(file_id)
|
||||
if not file_info:
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
|
||||
if file_info['status'] == 'deleted':
|
||||
return jsonify({'error': 'File has been deleted'}), 404
|
||||
|
||||
# Get file from MinIO
|
||||
try:
|
||||
response = minio_client.get_object(
|
||||
file_info['bucket_name'],
|
||||
file_info['object_key']
|
||||
)
|
||||
file_data = response.read()
|
||||
response.close()
|
||||
response.release_conn()
|
||||
except S3Error as e:
|
||||
logger.error("Failed to get file from MinIO", error=str(e))
|
||||
return jsonify({'error': 'File not found in storage'}), 404
|
||||
|
||||
# Log access
|
||||
log_file_access(file_id, 'download', request.remote_addr, request.headers.get('User-Agent'))
|
||||
|
||||
# Create file-like object for Flask to serve
|
||||
file_stream = io.BytesIO(file_data)
|
||||
file_stream.seek(0)
|
||||
|
||||
logger.info("File download completed",
|
||||
file_id=file_id,
|
||||
filename=file_info['filename'],
|
||||
size=len(file_data))
|
||||
|
||||
return send_file(
|
||||
file_stream,
|
||||
mimetype=file_info['mime_type'],
|
||||
as_attachment=True,
|
||||
download_name=file_info['original_filename']
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Download error", error=str(e))
|
||||
return jsonify({'error': 'Internal server error'}), 500
|
||||
|
||||
@app.route('/files/<file_id>/info', methods=['GET'])
|
||||
def get_file_info_endpoint(file_id: str):
|
||||
"""Get file information without downloading."""
|
||||
try:
|
||||
file_info = get_file_info(file_id)
|
||||
if not file_info:
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
|
||||
# Log access
|
||||
log_file_access(file_id, 'view', request.remote_addr, request.headers.get('User-Agent'))
|
||||
|
||||
return jsonify(file_info), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error fetching file info", error=str(e))
|
||||
return jsonify({'error': 'Internal server error'}), 500
|
||||
|
||||
@app.route('/files/<file_id>/url', methods=['GET'])
|
||||
def get_download_url(file_id: str):
|
||||
"""Get presigned download URL."""
|
||||
try:
|
||||
file_info = get_file_info(file_id)
|
||||
if not file_info:
|
||||
return jsonify({'error': 'File not found'}), 404
|
||||
|
||||
if file_info['status'] == 'deleted':
|
||||
return jsonify({'error': 'File has been deleted'}), 404
|
||||
|
||||
# Generate presigned URL
|
||||
try:
|
||||
url = minio_client.presigned_get_object(
|
||||
file_info['bucket_name'],
|
||||
file_info['object_key'],
|
||||
expires=3600 # 1 hour
|
||||
)
|
||||
except S3Error as e:
|
||||
logger.error("Failed to generate presigned URL", error=str(e))
|
||||
return jsonify({'error': 'Failed to generate download URL'}), 500
|
||||
|
||||
# Log access
|
||||
log_file_access(file_id, 'url_generated', request.remote_addr, request.headers.get('User-Agent'))
|
||||
|
||||
return jsonify({
|
||||
'file_id': file_id,
|
||||
'filename': file_info['original_filename'],
|
||||
'download_url': url,
|
||||
'expires_in': 3600
|
||||
}), 200
|
||||
|
||||
except Exception as e:
|
||||
logger.error("Error generating download URL", error=str(e))
|
||||
return jsonify({'error': 'Internal server error'}), 500
|
||||
|
||||
if __name__ == '__main__':
|
||||
app.run(host='0.0.0.0', port=5000, debug=False)
|
||||
24
functions/download/requirements.txt
Normal file
24
functions/download/requirements.txt
Normal file
@@ -0,0 +1,24 @@
|
||||
# Core dependencies
|
||||
flask==2.3.3
|
||||
gunicorn==21.2.0
|
||||
python-dotenv==1.0.0
|
||||
|
||||
# Database
|
||||
psycopg2-binary==2.9.7
|
||||
sqlalchemy==2.0.21
|
||||
|
||||
# MinIO/S3
|
||||
minio==7.1.17
|
||||
boto3==1.28.44
|
||||
|
||||
# HTTP requests
|
||||
requests==2.31.0
|
||||
|
||||
# JSON and data handling
|
||||
pydantic==2.1.1
|
||||
|
||||
# Logging
|
||||
structlog==23.1.0
|
||||
|
||||
# Utilities
|
||||
python-dateutil==2.8.2
|
||||
Reference in New Issue
Block a user