import os import logging from datetime import datetime from typing import Optional from flask import Flask, request, jsonify, send_file import psycopg2 from psycopg2.extras import RealDictCursor from minio import Minio from minio.error import S3Error import structlog import io # Configure structured logging structlog.configure( processors=[ structlog.stdlib.filter_by_level, structlog.stdlib.add_logger_name, structlog.stdlib.add_log_level, structlog.stdlib.PositionalArgumentsFormatter(), structlog.processors.TimeStamper(fmt="iso"), structlog.processors.StackInfoRenderer(), structlog.processors.format_exc_info, structlog.processors.UnicodeDecoder(), structlog.processors.JSONRenderer() ], context_class=dict, logger_factory=structlog.stdlib.LoggerFactory(), wrapper_class=structlog.stdlib.BoundLogger, cache_logger_on_first_use=True, ) logger = structlog.get_logger() app = Flask(__name__) # Configuration MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000') MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin') MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123') MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket') MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true' POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer') # Initialize MinIO client minio_client = Minio( MINIO_ENDPOINT, access_key=MINIO_ACCESS_KEY, secret_key=MINIO_SECRET_KEY, secure=MINIO_USE_SSL ) def get_db_connection(): """Create a database connection.""" return psycopg2.connect(POSTGRES_URL) def get_file_info(file_id: str) -> Optional[dict]: """Get file information from database.""" conn = get_db_connection() try: with conn.cursor(cursor_factory=RealDictCursor) as cur: cur.execute(""" SELECT id, filename, original_filename, file_size, file_type, mime_type, bucket_name, object_key, status FROM files WHERE id = %s AND status != 'deleted' """, (file_id,)) file_record = cur.fetchone() return dict(file_record) if file_record else None except Exception as e: logger.error("Failed to get file info", error=str(e)) return None finally: conn.close() def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]): """Log file access for audit purposes.""" conn = get_db_connection() try: with conn.cursor() as cur: cur.execute(""" INSERT INTO file_access_logs (file_id, action, ip_address, user_agent) VALUES (%s, %s, %s, %s) """, (file_id, action, ip_address, user_agent)) conn.commit() except Exception as e: logger.error("Failed to log file access", error=str(e)) conn.rollback() finally: conn.close() @app.route('/health', methods=['GET']) def health_check(): """Health check endpoint.""" return jsonify({'status': 'healthy', 'service': 'file-download'}) @app.route('/download/', methods=['GET']) def download_file(file_id: str): """Download file by ID.""" try: # Get file information file_info = get_file_info(file_id) if not file_info: return jsonify({'error': 'File not found'}), 404 if file_info['status'] == 'deleted': return jsonify({'error': 'File has been deleted'}), 404 # Get file from MinIO try: response = minio_client.get_object( file_info['bucket_name'], file_info['object_key'] ) file_data = response.read() response.close() response.release_conn() except S3Error as e: logger.error("Failed to get file from MinIO", error=str(e)) return jsonify({'error': 'File not found in storage'}), 404 # Log access log_file_access(file_id, 'download', request.remote_addr, request.headers.get('User-Agent')) # Create file-like object for Flask to serve file_stream = io.BytesIO(file_data) file_stream.seek(0) logger.info("File download completed", file_id=file_id, filename=file_info['filename'], size=len(file_data)) return send_file( file_stream, mimetype=file_info['mime_type'], as_attachment=True, download_name=file_info['original_filename'] ) except Exception as e: logger.error("Download error", error=str(e)) return jsonify({'error': 'Internal server error'}), 500 @app.route('/files//info', methods=['GET']) def get_file_info_endpoint(file_id: str): """Get file information without downloading.""" try: file_info = get_file_info(file_id) if not file_info: return jsonify({'error': 'File not found'}), 404 # Log access log_file_access(file_id, 'view', request.remote_addr, request.headers.get('User-Agent')) return jsonify(file_info), 200 except Exception as e: logger.error("Error fetching file info", error=str(e)) return jsonify({'error': 'Internal server error'}), 500 @app.route('/files//url', methods=['GET']) def get_download_url(file_id: str): """Get presigned download URL.""" try: file_info = get_file_info(file_id) if not file_info: return jsonify({'error': 'File not found'}), 404 if file_info['status'] == 'deleted': return jsonify({'error': 'File has been deleted'}), 404 # Generate presigned URL try: url = minio_client.presigned_get_object( file_info['bucket_name'], file_info['object_key'], expires=3600 # 1 hour ) except S3Error as e: logger.error("Failed to generate presigned URL", error=str(e)) return jsonify({'error': 'Failed to generate download URL'}), 500 # Log access log_file_access(file_id, 'url_generated', request.remote_addr, request.headers.get('User-Agent')) return jsonify({ 'file_id': file_id, 'filename': file_info['original_filename'], 'download_url': url, 'expires_in': 3600 }), 200 except Exception as e: logger.error("Error generating download URL", error=str(e)) return jsonify({'error': 'Internal server error'}), 500 if __name__ == '__main__': app.run(host='0.0.0.0', port=5000, debug=False)