Files

201 lines
6.8 KiB
Python

import os
import logging
from datetime import datetime
from typing import Optional
from flask import Flask, request, jsonify, send_file
import psycopg2
from psycopg2.extras import RealDictCursor
from minio import Minio
from minio.error import S3Error
import structlog
import io
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
app = Flask(__name__)
# Configuration
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123')
MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket')
MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true'
POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer')
# Initialize MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=MINIO_USE_SSL
)
def get_db_connection():
"""Create a database connection."""
return psycopg2.connect(POSTGRES_URL)
def get_file_info(file_id: str) -> Optional[dict]:
"""Get file information from database."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, filename, original_filename, file_size, file_type,
mime_type, bucket_name, object_key, status
FROM files
WHERE id = %s AND status != 'deleted'
""", (file_id,))
file_record = cur.fetchone()
return dict(file_record) if file_record else None
except Exception as e:
logger.error("Failed to get file info", error=str(e))
return None
finally:
conn.close()
def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]):
"""Log file access for audit purposes."""
conn = get_db_connection()
try:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO file_access_logs (file_id, action, ip_address, user_agent)
VALUES (%s, %s, %s, %s)
""", (file_id, action, ip_address, user_agent))
conn.commit()
except Exception as e:
logger.error("Failed to log file access", error=str(e))
conn.rollback()
finally:
conn.close()
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'service': 'file-download'})
@app.route('/download/<file_id>', methods=['GET'])
def download_file(file_id: str):
"""Download file by ID."""
try:
# Get file information
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
if file_info['status'] == 'deleted':
return jsonify({'error': 'File has been deleted'}), 404
# Get file from MinIO
try:
response = minio_client.get_object(
file_info['bucket_name'],
file_info['object_key']
)
file_data = response.read()
response.close()
response.release_conn()
except S3Error as e:
logger.error("Failed to get file from MinIO", error=str(e))
return jsonify({'error': 'File not found in storage'}), 404
# Log access
log_file_access(file_id, 'download', request.remote_addr, request.headers.get('User-Agent'))
# Create file-like object for Flask to serve
file_stream = io.BytesIO(file_data)
file_stream.seek(0)
logger.info("File download completed",
file_id=file_id,
filename=file_info['filename'],
size=len(file_data))
return send_file(
file_stream,
mimetype=file_info['mime_type'],
as_attachment=True,
download_name=file_info['original_filename']
)
except Exception as e:
logger.error("Download error", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/info', methods=['GET'])
def get_file_info_endpoint(file_id: str):
"""Get file information without downloading."""
try:
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
# Log access
log_file_access(file_id, 'view', request.remote_addr, request.headers.get('User-Agent'))
return jsonify(file_info), 200
except Exception as e:
logger.error("Error fetching file info", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/url', methods=['GET'])
def get_download_url(file_id: str):
"""Get presigned download URL."""
try:
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
if file_info['status'] == 'deleted':
return jsonify({'error': 'File has been deleted'}), 404
# Generate presigned URL
try:
url = minio_client.presigned_get_object(
file_info['bucket_name'],
file_info['object_key'],
expires=3600 # 1 hour
)
except S3Error as e:
logger.error("Failed to generate presigned URL", error=str(e))
return jsonify({'error': 'Failed to generate download URL'}), 500
# Log access
log_file_access(file_id, 'url_generated', request.remote_addr, request.headers.get('User-Agent'))
return jsonify({
'file_id': file_id,
'filename': file_info['original_filename'],
'download_url': url,
'expires_in': 3600
}), 200
except Exception as e:
logger.error("Error generating download URL", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False)