From fd9abd02100066047ce55f11720d5f172cd9f839 Mon Sep 17 00:00:00 2001 From: greg Date: Fri, 4 Jul 2025 08:01:46 -0700 Subject: [PATCH] Initial commit: File Transformer S3 project with React dashboard and Knative functions --- .gitignore | 192 +++++++++ Makefile | 144 +++++++ README.md | 264 ++++++++++++ TROUBLESHOOTING.md | 227 ++++++++++ api-gateway/Dockerfile | 30 ++ api-gateway/app.py | 165 +++++++ api-gateway/requirements.txt | 11 + dashboard/Dockerfile | 35 ++ dashboard/nginx.conf | 72 ++++ dashboard/package.json | 62 +++ dashboard/src/App.css | 115 +++++ dashboard/src/App.js | 67 +++ dashboard/src/components/Header.js | 30 ++ dashboard/src/components/Layout.js | 20 + dashboard/src/components/ProtectedRoute.js | 15 + dashboard/src/components/Sidebar.js | 59 +++ dashboard/src/contexts/AuthContext.js | 66 +++ dashboard/src/index.css | 20 + dashboard/src/index.js | 11 + dashboard/src/pages/Buckets.js | 94 ++++ dashboard/src/pages/Dashboard.js | 223 ++++++++++ dashboard/src/pages/Files.js | 102 +++++ dashboard/src/pages/Login.js | 111 +++++ dashboard/src/pages/Settings.js | 205 +++++++++ dashboard/src/pages/Transformations.js | 120 ++++++ dashboard/src/pages/Upload.js | 159 +++++++ dashboard/src/services/api.js | 180 ++++++++ dashboard/tailwind.config.js | 43 ++ database/init.sql | 174 ++++++++ docker-compose.yml | 195 +++++++++ env.example | 82 ++++ functions/Makefile | 29 ++ functions/download/Dockerfile | 30 ++ functions/download/app.py | 201 +++++++++ functions/download/requirements.txt | 24 ++ functions/metadata/Dockerfile | 30 ++ functions/metadata/app.py | 307 +++++++++++++ functions/metadata/requirements.txt | 24 ++ functions/requirements.txt | 51 +++ functions/transform/Dockerfile | 33 ++ functions/transform/app.py | 473 +++++++++++++++++++++ functions/transform/requirements.txt | 33 ++ functions/upload/Dockerfile | 31 ++ functions/upload/app.py | 287 +++++++++++++ functions/upload/requirements.txt | 27 ++ k8s/dashboard.yaml | 82 ++++ k8s/functions/download-service.yaml | 85 ++++ k8s/functions/metadata-service.yaml | 85 ++++ k8s/functions/transform-service.yaml | 63 +++ k8s/functions/upload-service.yaml | 63 +++ k8s/minio.yaml | 112 +++++ k8s/namespace.yaml | 7 + k8s/postgres.yaml | 100 +++++ setup.sh | 114 +++++ 54 files changed, 5584 insertions(+) create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 README.md create mode 100644 TROUBLESHOOTING.md create mode 100644 api-gateway/Dockerfile create mode 100644 api-gateway/app.py create mode 100644 api-gateway/requirements.txt create mode 100644 dashboard/Dockerfile create mode 100644 dashboard/nginx.conf create mode 100644 dashboard/package.json create mode 100644 dashboard/src/App.css create mode 100644 dashboard/src/App.js create mode 100644 dashboard/src/components/Header.js create mode 100644 dashboard/src/components/Layout.js create mode 100644 dashboard/src/components/ProtectedRoute.js create mode 100644 dashboard/src/components/Sidebar.js create mode 100644 dashboard/src/contexts/AuthContext.js create mode 100644 dashboard/src/index.css create mode 100644 dashboard/src/index.js create mode 100644 dashboard/src/pages/Buckets.js create mode 100644 dashboard/src/pages/Dashboard.js create mode 100644 dashboard/src/pages/Files.js create mode 100644 dashboard/src/pages/Login.js create mode 100644 dashboard/src/pages/Settings.js create mode 100644 dashboard/src/pages/Transformations.js create mode 100644 dashboard/src/pages/Upload.js create mode 100644 dashboard/src/services/api.js create mode 100644 dashboard/tailwind.config.js create mode 100644 database/init.sql create mode 100644 docker-compose.yml create mode 100644 env.example create mode 100644 functions/Makefile create mode 100644 functions/download/Dockerfile create mode 100644 functions/download/app.py create mode 100644 functions/download/requirements.txt create mode 100644 functions/metadata/Dockerfile create mode 100644 functions/metadata/app.py create mode 100644 functions/metadata/requirements.txt create mode 100644 functions/requirements.txt create mode 100644 functions/transform/Dockerfile create mode 100644 functions/transform/app.py create mode 100644 functions/transform/requirements.txt create mode 100644 functions/upload/Dockerfile create mode 100644 functions/upload/app.py create mode 100644 functions/upload/requirements.txt create mode 100644 k8s/dashboard.yaml create mode 100644 k8s/functions/download-service.yaml create mode 100644 k8s/functions/metadata-service.yaml create mode 100644 k8s/functions/transform-service.yaml create mode 100644 k8s/functions/upload-service.yaml create mode 100644 k8s/minio.yaml create mode 100644 k8s/namespace.yaml create mode 100644 k8s/postgres.yaml create mode 100755 setup.sh diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1c263be --- /dev/null +++ b/.gitignore @@ -0,0 +1,192 @@ +# Dependencies +node_modules/ +npm-debug.log* +yarn-debug.log* +yarn-error.log* +__pycache__/ +*.py[cod] +*$py.class +*.so +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# Environment variables +.env +.env.local +.env.development.local +.env.test.local +.env.production.local + +# IDE +.vscode/ +.idea/ +*.swp +*.swo +*~ + +# OS +.DS_Store +.DS_Store? +._* +.Spotlight-V100 +.Trashes +ehthumbs.db +Thumbs.db + +# Logs +logs +*.log +npm-debug.log* +yarn-debug.log* +yarn-error.log* +lerna-debug.log* + +# Runtime data +pids +*.pid +*.seed +*.pid.lock + +# Coverage directory used by tools like istanbul +coverage/ +*.lcov + +# nyc test coverage +.nyc_output + +# Dependency directories +jspm_packages/ + +# Optional npm cache directory +.npm + +# Optional eslint cache +.eslintcache + +# Microbundle cache +.rpt2_cache/ +.rts2_cache_cjs/ +.rts2_cache_es/ +.rts2_cache_umd/ + +# Optional REPL history +.node_repl_history + +# Output of 'npm pack' +*.tgz + +# Yarn Integrity file +.yarn-integrity + +# parcel-bundler cache (https://parceljs.org/) +.cache +.parcel-cache + +# Next.js build output +.next + +# Nuxt.js build / generate output +.nuxt +dist + +# Gatsby files +.cache/ +public + +# Storybook build outputs +.out +.storybook-out + +# Temporary folders +tmp/ +temp/ + +# Docker +.dockerignore + +# Kubernetes +*.kubeconfig + +# Database +*.db +*.sqlite +*.sqlite3 + +# MinIO data +minio_data/ + +# PostgreSQL data +postgres_data/ + +# Build artifacts +build/ +dist/ +*.tar.gz +*.zip + +# Test artifacts +.coverage +.pytest_cache/ +htmlcov/ + +# Python virtual environments +venv/ +env/ +ENV/ +env.bak/ +venv.bak/ + +# Jupyter Notebook +.ipynb_checkpoints + +# pyenv +.python-version + +# pipenv +Pipfile.lock + +# PEP 582 +__pypackages__/ + +# Celery +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# Local development +.local/ \ No newline at end of file diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..37f5fa6 --- /dev/null +++ b/Makefile @@ -0,0 +1,144 @@ +.PHONY: help setup install-deps build-dashboard build-functions deploy-local deploy-knative clean logs status + +# Default target +help: + @echo "File Transformer S3 - Available Commands:" + @echo "" + @echo "Setup & Installation:" + @echo " setup-full - Full setup with dependency installation (recommended)" + @echo " setup - Initial setup (creates .env, installs dependencies)" + @echo " install-deps - Install all dependencies" + @echo " build-dashboard - Build React dashboard" + @echo " build-functions - Build Knative functions" + @echo "" + @echo "Deployment:" + @echo " deploy-local - Deploy locally with Docker Compose" + @echo " deploy-knative - Deploy to Knative cluster" + @echo " deploy-all - Deploy everything" + @echo "" + @echo "Management:" + @echo " logs - View logs from all services" + @echo " status - Check status of all services" + @echo " clean - Clean up all resources" + @echo " reset-db - Reset PostgreSQL database" + @echo "" + @echo "Development:" + @echo " dev-dashboard - Start dashboard in development mode" + @echo " dev-functions - Start functions in development mode" + +# Environment setup +setup: + @echo "Setting up File Transformer S3..." + @if [ ! -f .env ]; then \ + cp .env.example .env; \ + echo "Created .env from .env.example"; \ + else \ + echo ".env already exists"; \ + fi + @echo "Please edit .env with your configuration values" + @make install-deps + +# Full setup with dependency installation +setup-full: + @echo "Running full setup with dependency installation..." + @./setup.sh + +# Install dependencies +install-deps: + @echo "Installing dependencies..." + @if command -v npm &> /dev/null; then \ + cd dashboard && npm install; \ + else \ + echo "⚠️ npm not found. Please install Node.js and npm first."; \ + echo " Run: ./setup.sh"; \ + exit 1; \ + fi + @if command -v pip3 &> /dev/null; then \ + pip3 install -r functions/requirements.txt; \ + else \ + echo "⚠️ pip3 not found. Please install Python3 and pip first."; \ + echo " Run: ./setup.sh"; \ + exit 1; \ + fi + @echo "Dependencies installed successfully" + +# Build dashboard +build-dashboard: + @echo "Building React dashboard..." + @if command -v npm &> /dev/null; then \ + cd dashboard && npm run build; \ + else \ + echo "⚠️ npm not found. Please install Node.js and npm first."; \ + echo " Run: ./setup.sh"; \ + exit 1; \ + fi + @echo "Dashboard built successfully" + +# Build functions +build-functions: + @echo "Building Knative functions..." + @cd functions && make build + @echo "Functions built successfully" + +# Deploy locally +deploy-local: + @echo "Deploying locally with Docker Compose..." + @docker-compose up -d + @echo "Local deployment complete" + @echo "Dashboard: http://localhost:$(shell grep REACT_APP_PORT .env | cut -d '=' -f2)" + @echo "MinIO Console: http://localhost:$(shell grep MINIO_CONSOLE_PORT .env | cut -d '=' -f2)" + +# Deploy to Knative +deploy-knative: + @echo "Deploying to Knative cluster..." + @kubectl apply -f k8s/namespace.yaml + @kubectl apply -f k8s/postgres.yaml + @kubectl apply -f k8s/minio.yaml + @kubectl apply -f k8s/functions/ + @kubectl apply -f k8s/dashboard.yaml + @echo "Knative deployment complete" + +# Deploy everything +deploy-all: build-dashboard build-functions deploy-knative + +# View logs +logs: + @echo "Viewing logs from all services..." + @docker-compose logs -f + +# Check status +status: + @echo "Checking service status..." + @docker-compose ps + @echo "" + @echo "Dashboard: http://localhost:$(shell grep REACT_APP_PORT .env | cut -d '=' -f2)" + @echo "MinIO Console: http://localhost:$(shell grep MINIO_CONSOLE_PORT .env | cut -d '=' -f2)" + +# Clean up +clean: + @echo "Cleaning up resources..." + @docker-compose down -v + @docker system prune -f + @echo "Cleanup complete" + +# Reset database +reset-db: + @echo "Resetting PostgreSQL database..." + @docker-compose exec postgres psql -U $(shell grep POSTGRES_USER .env | cut -d '=' -f2) -d $(shell grep POSTGRES_DB .env | cut -d '=' -f2) -c "DROP SCHEMA public CASCADE; CREATE SCHEMA public;" + @docker-compose exec postgres psql -U $(shell grep POSTGRES_USER .env | cut -d '=' -f2) -d $(shell grep POSTGRES_DB .env | cut -d '=' -f2) -f /docker-entrypoint-initdb.d/init.sql + @echo "Database reset complete" + +# Development mode +dev-dashboard: + @echo "Starting dashboard in development mode..." + @if command -v npm &> /dev/null; then \ + cd dashboard && npm start; \ + else \ + echo "⚠️ npm not found. Please install Node.js and npm first."; \ + echo " Run: ./setup.sh"; \ + exit 1; \ + fi + +dev-functions: + @echo "Starting functions in development mode..." + @cd functions && python -m flask run --host=0.0.0.0 --port=5000 \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..31de8be --- /dev/null +++ b/README.md @@ -0,0 +1,264 @@ +# File Transformer S3 + +A comprehensive file transformation system with a React dashboard, Knative functions, PostgreSQL database, and MinIO S3-compatible storage. + +## 🚀 Features + +- **Modern React Dashboard**: Beautiful, responsive UI for managing files and transformations +- **Knative Functions**: Serverless Python functions for file processing +- **PostgreSQL Database**: Robust data storage with comprehensive schema +- **MinIO Storage**: S3-compatible object storage +- **Environment-Driven**: Fully configurable via environment variables +- **Docker & Kubernetes**: Complete containerization and orchestration +- **Automated Setup**: Makefile for easy deployment and management + +## 🏗️ Architecture + +``` +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ React │ │ Knative │ │ PostgreSQL │ +│ Dashboard │◄──►│ Functions │◄──►│ Database │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ + │ │ │ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐ +│ MinIO │ │ API Gateway │ │ File Storage │ +│ Console │ │ (Optional) │ │ & Processing │ +└─────────────────┘ └─────────────────┘ └─────────────────┘ +``` + +## 📋 Prerequisites + +- Docker and Docker Compose +- Kubernetes cluster with Knative installed +- kubectl configured +- Node.js 18+ (for development) +- Python 3.11+ (for development) + +## 🛠️ Quick Start + +### 1. Clone and Setup + +```bash +git clone +cd file-transformer-s3 +make setup +``` + +### 2. Configure Environment + +Edit the `.env` file with your configuration: + +```bash +cp env.example .env +# Edit .env with your settings +``` + +### 3. Local Development + +```bash +# Start all services locally +make deploy-local + +# Or start individual components +make dev-dashboard +make dev-functions +``` + +### 4. Kubernetes Deployment + +```bash +# Deploy to Knative cluster +make deploy-knative + +# Check status +make status +``` + +## 📁 Project Structure + +``` +file-transformer-s3/ +├── dashboard/ # React frontend +│ ├── src/ +│ │ ├── components/ # React components +│ │ ├── pages/ # Page components +│ │ ├── services/ # API services +│ │ └── contexts/ # React contexts +│ ├── Dockerfile +│ └── package.json +├── functions/ # Knative functions +│ ├── upload/ # File upload function +│ ├── transform/ # File transformation function +│ ├── download/ # File download function +│ ├── metadata/ # File metadata function +│ └── requirements.txt +├── k8s/ # Kubernetes manifests +│ ├── namespace.yaml +│ ├── postgres.yaml +│ ├── minio.yaml +│ ├── dashboard.yaml +│ └── functions/ +├── database/ # Database scripts +│ └── init.sql +├── docker-compose.yml # Local development +├── Makefile # Automation scripts +├── env.example # Environment template +└── README.md +``` + +## 🔧 Configuration + +### Environment Variables + +Key configuration options in `.env`: + +```bash +# Application +APP_NAME=file-transformer-s3 +APP_ENV=development + +# Dashboard +REACT_APP_PORT=3000 +REACT_APP_API_BASE_URL=http://localhost:8080 + +# PostgreSQL +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_DB=file_transformer +POSTGRES_USER=file_user +POSTGRES_PASSWORD=secure_password_123 + +# MinIO +MINIO_ENDPOINT=localhost:9000 +MINIO_ACCESS_KEY=minioadmin +MINIO_SECRET_KEY=minioadmin123 +MINIO_BUCKET_NAME=file-transformer-bucket + +# Knative Functions +KNATIVE_NAMESPACE=file-transformer +FUNCTION_UPLOAD_ENDPOINT=http://file-upload-service.file-transformer.svc.cluster.local +FUNCTION_TRANSFORM_ENDPOINT=http://file-transform-service.file-transformer.svc.cluster.local +``` + +## 🎯 Available Commands + +### Setup & Installation +```bash +make setup # Initial setup +make install-deps # Install dependencies +make build-dashboard # Build React dashboard +make build-functions # Build Knative functions +``` + +### Deployment +```bash +make deploy-local # Deploy locally with Docker Compose +make deploy-knative # Deploy to Knative cluster +make deploy-all # Deploy everything +``` + +### Management +```bash +make logs # View logs from all services +make status # Check status of all services +make clean # Clean up all resources +make reset-db # Reset PostgreSQL database +``` + +### Development +```bash +make dev-dashboard # Start dashboard in development mode +make dev-functions # Start functions in development mode +``` + +## 📊 Dashboard Features + +- **File Management**: Upload, download, delete, and view files +- **Transformation Pipeline**: Convert files between formats +- **Real-time Monitoring**: Live status updates and progress tracking +- **Analytics**: File type distribution and storage usage +- **Bucket Management**: MinIO bucket operations +- **User Management**: Authentication and authorization + +## 🔄 File Transformations + +Supported transformations: + +- **Text Extraction**: Extract text from PDF and DOCX files +- **Format Conversion**: CSV/Excel to JSON, image format conversion +- **Image Processing**: Resize, compress, and convert images +- **Document Processing**: PDF manipulation and text extraction + +## 🗄️ Database Schema + +Key tables: + +- `files`: File metadata and storage information +- `transformations`: Transformation job tracking +- `buckets`: MinIO bucket management +- `users`: User authentication and authorization +- `sessions`: User session management +- `file_access_logs`: Audit trail for file operations + +## 🔒 Security + +- JWT-based authentication +- Role-based access control +- Secure file upload validation +- Audit logging for all operations +- Environment variable configuration +- Non-root container execution + +## 📈 Monitoring & Logging + +- Structured logging with structlog +- Health check endpoints +- Prometheus metrics (planned) +- Real-time dashboard updates +- Error tracking and reporting + +## 🚀 Production Deployment + +### Prerequisites +- Kubernetes cluster with Knative +- Ingress controller (nginx-ingress) +- Persistent volume provisioner +- Container registry access + +### Deployment Steps +1. Build and push container images +2. Apply Kubernetes manifests +3. Configure ingress and DNS +4. Set up monitoring and logging +5. Configure backups and disaster recovery + +## 🤝 Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Add tests +5. Submit a pull request + +## 📄 License + +This project is licensed under the MIT License - see the LICENSE file for details. + +## 🆘 Support + +For support and questions: +- Create an issue in the repository +- Check the documentation +- Review the troubleshooting guide + +## 🔮 Roadmap + +- [ ] Advanced file transformations +- [ ] Batch processing capabilities +- [ ] Webhook integrations +- [ ] Advanced analytics +- [ ] Multi-tenant support +- [ ] API rate limiting +- [ ] Advanced security features \ No newline at end of file diff --git a/TROUBLESHOOTING.md b/TROUBLESHOOTING.md new file mode 100644 index 0000000..8ad7566 --- /dev/null +++ b/TROUBLESHOOTING.md @@ -0,0 +1,227 @@ +# Troubleshooting Guide + +## Common Issues and Solutions + +### 1. Node.js/npm not found + +**Error:** `bash: npm: command not found` + +**Solution:** +```bash +# On Arch Linux +sudo pacman -S nodejs npm + +# On Ubuntu/Debian +curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash - +sudo apt-get install -y nodejs + +# Or run the setup script +./setup.sh +``` + +### 2. Python/pip not found + +**Error:** `bash: pip: command not found` + +**Solution:** +```bash +# On Arch Linux +sudo pacman -S python python-pip + +# On Ubuntu/Debian +sudo apt-get update +sudo apt-get install -y python3 python3-pip + +# Or run the setup script +./setup.sh +``` + +### 3. Docker not found or permission denied + +**Error:** `docker: command not found` or `Got permission denied while trying to connect to the Docker daemon` + +**Solution:** +```bash +# Install Docker +# On Arch Linux +sudo pacman -S docker docker-compose + +# On Ubuntu/Debian +curl -fsSL https://get.docker.com -o get-docker.sh +sudo sh get-docker.sh + +# Add user to docker group +sudo usermod -aG docker $USER + +# Start Docker service +sudo systemctl enable docker +sudo systemctl start docker + +# Log out and back in for group changes to take effect +``` + +### 4. Port already in use + +**Error:** `Error starting userland proxy: listen tcp 0.0.0.0:3000: bind: address already in use` + +**Solution:** +```bash +# Check what's using the port +sudo lsof -i :3000 + +# Kill the process or change the port in .env file +# Edit .env and change REACT_APP_PORT to another value like 3001 +``` + +### 5. Database connection failed + +**Error:** `could not connect to server: Connection refused` + +**Solution:** +```bash +# Check if PostgreSQL is running +docker-compose ps + +# Start the services +docker-compose up -d + +# Check logs +docker-compose logs postgres +``` + +### 6. MinIO connection failed + +**Error:** `MinIO connection error` + +**Solution:** +```bash +# Check if MinIO is running +docker-compose ps + +# Start the services +docker-compose up -d + +# Check MinIO logs +docker-compose logs minio + +# Access MinIO console at http://localhost:9001 +``` + +### 7. Build failures + +**Error:** `npm ERR!` or `pip install` failures + +**Solution:** +```bash +# Clear npm cache +npm cache clean --force + +# Clear pip cache +pip cache purge + +# Reinstall dependencies +make clean +make install-deps +``` + +### 8. Permission issues + +**Error:** `Permission denied` when running scripts + +**Solution:** +```bash +# Make scripts executable +chmod +x setup.sh +chmod +x *.sh + +# Check file permissions +ls -la +``` + +### 9. Environment variables not loaded + +**Error:** `Environment variable not found` + +**Solution:** +```bash +# Check if .env file exists +ls -la .env + +# Create .env from template +cp env.example .env + +# Edit .env with your values +nano .env +``` + +### 10. React app not starting + +**Error:** `Module not found` or React compilation errors + +**Solution:** +```bash +# Clear node_modules and reinstall +cd dashboard +rm -rf node_modules package-lock.json +npm install + +# Check for missing dependencies +npm list --depth=0 +``` + +## Quick Fix Commands + +### Reset everything and start fresh: +```bash +# Stop all services +docker-compose down -v + +# Clean up +make clean + +# Full setup +make setup-full + +# Start services +make deploy-local +``` + +### Check system status: +```bash +# Check all services +make status + +# Check logs +make logs + +# Check dependencies +which node npm python3 docker +``` + +### Development mode: +```bash +# Start dashboard in dev mode +make dev-dashboard + +# Start functions in dev mode +make dev-functions +``` + +## Getting Help + +If you're still experiencing issues: + +1. Check the logs: `make logs` +2. Verify your environment: `make status` +3. Check the documentation in `README.md` +4. Ensure all dependencies are installed: `./setup.sh` + +## System Requirements + +- **OS:** Linux (Arch, Ubuntu, Debian supported) +- **Node.js:** 16.x or higher +- **Python:** 3.8 or higher +- **Docker:** 20.x or higher +- **Docker Compose:** 2.x or higher +- **Memory:** At least 4GB RAM +- **Disk:** At least 10GB free space \ No newline at end of file diff --git a/api-gateway/Dockerfile b/api-gateway/Dockerfile new file mode 100644 index 0000000..13d7093 --- /dev/null +++ b/api-gateway/Dockerfile @@ -0,0 +1,30 @@ +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app.py . + +# Create non-root user +RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app +USER app + +# Expose port +EXPOSE 8080 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:8080/health || exit 1 + +# Run the application +CMD ["gunicorn", "--bind", "0.0.0.0:8080", "--workers", "4", "--timeout", "300", "app:app"] \ No newline at end of file diff --git a/api-gateway/app.py b/api-gateway/app.py new file mode 100644 index 0000000..44264ad --- /dev/null +++ b/api-gateway/app.py @@ -0,0 +1,165 @@ +import os +import requests +from flask import Flask, request, jsonify +from flask_cors import CORS +import logging + +app = Flask(__name__) +CORS(app) + +# Configure logging +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Function service URLs +UPLOAD_SERVICE_URL = os.getenv('UPLOAD_SERVICE_URL', 'http://function-upload:5000') +TRANSFORM_SERVICE_URL = os.getenv('TRANSFORM_SERVICE_URL', 'http://function-transform:5000') +DOWNLOAD_SERVICE_URL = os.getenv('DOWNLOAD_SERVICE_URL', 'http://function-download:5000') +METADATA_SERVICE_URL = os.getenv('METADATA_SERVICE_URL', 'http://function-metadata:5000') + +@app.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint.""" + return jsonify({'status': 'healthy', 'service': 'api-gateway'}) + +@app.route('/files/upload', methods=['POST']) +def upload_file(): + """Route file upload requests to upload service.""" + try: + response = requests.post( + f"{UPLOAD_SERVICE_URL}/upload", + files=request.files, + data=request.form, + timeout=300 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Upload service error: {e}") + return jsonify({'error': 'Upload service unavailable'}), 503 + +@app.route('/files//transform', methods=['POST']) +def transform_file(file_id): + """Route transformation requests to transform service.""" + try: + response = requests.post( + f"{TRANSFORM_SERVICE_URL}/transform/{file_id}", + json=request.get_json(), + timeout=300 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Transform service error: {e}") + return jsonify({'error': 'Transform service unavailable'}), 503 + +@app.route('/files//download', methods=['GET']) +def download_file(file_id): + """Route download requests to download service.""" + try: + response = requests.get( + f"{DOWNLOAD_SERVICE_URL}/download/{file_id}", + timeout=300 + ) + return response.content, response.status_code, response.headers.items() + except requests.exceptions.RequestException as e: + logger.error(f"Download service error: {e}") + return jsonify({'error': 'Download service unavailable'}), 503 + +@app.route('/files//metadata', methods=['GET']) +def get_file_metadata(file_id): + """Route metadata requests to metadata service.""" + try: + response = requests.get( + f"{METADATA_SERVICE_URL}/files/{file_id}/metadata", + timeout=30 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Metadata service error: {e}") + return jsonify({'error': 'Metadata service unavailable'}), 503 + +@app.route('/files//metadata', methods=['PUT']) +def update_file_metadata(file_id): + """Route metadata update requests to metadata service.""" + try: + response = requests.put( + f"{METADATA_SERVICE_URL}/files/{file_id}/metadata", + json=request.get_json(), + timeout=30 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Metadata service error: {e}") + return jsonify({'error': 'Metadata service unavailable'}), 503 + +@app.route('/files', methods=['GET']) +def get_files(): + """Route file listing requests to metadata service.""" + try: + response = requests.get( + f"{METADATA_SERVICE_URL}/files", + params=request.args, + timeout=30 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Metadata service error: {e}") + return jsonify({'error': 'Metadata service unavailable'}), 503 + +@app.route('/transformations', methods=['GET']) +def get_transformations(): + """Route transformation listing requests to metadata service.""" + try: + response = requests.get( + f"{METADATA_SERVICE_URL}/transformations", + params=request.args, + timeout=30 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Metadata service error: {e}") + return jsonify({'error': 'Metadata service unavailable'}), 503 + +@app.route('/dashboard/stats', methods=['GET']) +def get_dashboard_stats(): + """Route dashboard stats requests to metadata service.""" + try: + response = requests.get( + f"{METADATA_SERVICE_URL}/stats", + timeout=30 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Metadata service error: {e}") + return jsonify({'error': 'Metadata service unavailable'}), 503 + +@app.route('/buckets', methods=['GET']) +def get_buckets(): + """Route bucket requests to metadata service.""" + try: + response = requests.get( + f"{METADATA_SERVICE_URL}/buckets", + timeout=30 + ) + return jsonify(response.json()), response.status_code + except requests.exceptions.RequestException as e: + logger.error(f"Metadata service error: {e}") + return jsonify({'error': 'Metadata service unavailable'}), 503 + +# Auth endpoints (placeholder for now) +@app.route('/auth/login', methods=['POST']) +def login(): + """Placeholder login endpoint.""" + return jsonify({'token': 'dummy-token', 'user': {'id': 1, 'username': 'admin'}}), 200 + +@app.route('/auth/logout', methods=['POST']) +def logout(): + """Placeholder logout endpoint.""" + return jsonify({'message': 'Logged out successfully'}), 200 + +@app.route('/auth/profile', methods=['GET']) +def get_profile(): + """Placeholder profile endpoint.""" + return jsonify({'id': 1, 'username': 'admin', 'email': 'admin@example.com'}), 200 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=8080, debug=False) \ No newline at end of file diff --git a/api-gateway/requirements.txt b/api-gateway/requirements.txt new file mode 100644 index 0000000..5025fbd --- /dev/null +++ b/api-gateway/requirements.txt @@ -0,0 +1,11 @@ +# Core dependencies +flask==2.3.3 +flask-cors==4.0.0 +gunicorn==21.2.0 +python-dotenv==1.0.0 + +# HTTP requests +requests==2.31.0 + +# Logging +structlog==23.1.0 \ No newline at end of file diff --git a/dashboard/Dockerfile b/dashboard/Dockerfile new file mode 100644 index 0000000..064d82c --- /dev/null +++ b/dashboard/Dockerfile @@ -0,0 +1,35 @@ +# Build stage +FROM node:18-alpine as build + +WORKDIR /app + +# Copy package files +COPY package*.json ./ + +# Install dependencies (use npm install instead of npm ci for better compatibility) +RUN npm install --only=production --no-optional + +# Copy source code +COPY . . + +# Build the application +RUN npm run build + +# Production stage +FROM nginx:alpine + +# Copy built application +COPY --from=build /app/build /usr/share/nginx/html + +# Copy nginx configuration +COPY nginx.conf /etc/nginx/nginx.conf + +# Expose port +EXPOSE 3000 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:3000 || exit 1 + +# Start nginx +CMD ["nginx", "-g", "daemon off;"] \ No newline at end of file diff --git a/dashboard/nginx.conf b/dashboard/nginx.conf new file mode 100644 index 0000000..df9fab4 --- /dev/null +++ b/dashboard/nginx.conf @@ -0,0 +1,72 @@ +events { + worker_connections 1024; +} + +http { + include /etc/nginx/mime.types; + default_type application/octet-stream; + + # Logging + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + access_log /var/log/nginx/access.log main; + error_log /var/log/nginx/error.log warn; + + # Gzip compression + gzip on; + gzip_vary on; + gzip_min_length 1024; + gzip_proxied any; + gzip_comp_level 6; + gzip_types + text/plain + text/css + text/xml + text/javascript + application/json + application/javascript + application/xml+rss + application/atom+xml + image/svg+xml; + + server { + listen 3000; + server_name localhost; + root /usr/share/nginx/html; + index index.html; + + # Security headers + add_header X-Frame-Options "SAMEORIGIN" always; + add_header X-XSS-Protection "1; mode=block" always; + add_header X-Content-Type-Options "nosniff" always; + add_header Referrer-Policy "no-referrer-when-downgrade" always; + add_header Content-Security-Policy "default-src 'self' http: https: data: blob: 'unsafe-inline'" always; + + # Handle React Router + location / { + try_files $uri $uri/ /index.html; + } + + # Cache static assets + location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ { + expires 1y; + add_header Cache-Control "public, immutable"; + } + + # Health check endpoint + location /health { + access_log off; + return 200 "healthy\n"; + add_header Content-Type text/plain; + } + + # Error pages + error_page 404 /index.html; + error_page 500 502 503 504 /50x.html; + location = /50x.html { + root /usr/share/nginx/html; + } + } +} \ No newline at end of file diff --git a/dashboard/package.json b/dashboard/package.json new file mode 100644 index 0000000..8c53f28 --- /dev/null +++ b/dashboard/package.json @@ -0,0 +1,62 @@ +{ + "name": "file-transformer-dashboard", + "version": "1.0.0", + "description": "React dashboard for File Transformer S3", + "private": true, + "dependencies": { + "@testing-library/jest-dom": "^5.16.4", + "@testing-library/react": "^13.3.0", + "@testing-library/user-event": "^13.5.0", + "react": "^18.2.0", + "react-dom": "^18.2.0", + "react-scripts": "5.0.1", + "react-router-dom": "^6.3.0", + "axios": "^1.4.0", + "react-dropzone": "^14.2.3", + "react-query": "^3.39.3", + "react-hot-toast": "^2.4.0", + "lucide-react": "^0.263.1", + "clsx": "^1.2.1", + "tailwindcss": "^3.3.2", + "autoprefixer": "^10.4.14", + "postcss": "^8.4.24", + "@headlessui/react": "^1.7.15", + "@heroicons/react": "^2.0.18", + "recharts": "^2.7.2", + "date-fns": "^2.30.0", + "react-hook-form": "^7.45.1", + "react-select": "^5.7.3", + "react-table": "^7.8.0", + "framer-motion": "^10.12.16" + }, + "scripts": { + "start": "react-scripts start", + "build": "react-scripts build", + "test": "react-scripts test", + "eject": "react-scripts eject" + }, + "eslintConfig": { + "extends": [ + "react-app", + "react-app/jest" + ] + }, + "browserslist": { + "production": [ + ">0.2%", + "not dead", + "not op_mini all" + ], + "development": [ + "last 1 chrome version", + "last 1 firefox version", + "last 1 safari version" + ] + }, + "devDependencies": { + "@types/react": "^18.2.15", + "@types/react-dom": "^18.2.7", + "typescript": "^4.9.5" + }, + "proxy": "http://localhost:8080" +} \ No newline at end of file diff --git a/dashboard/src/App.css b/dashboard/src/App.css new file mode 100644 index 0000000..54161ad --- /dev/null +++ b/dashboard/src/App.css @@ -0,0 +1,115 @@ +@tailwind base; +@tailwind components; +@tailwind utilities; + +@layer base { + html { + font-family: 'Inter', system-ui, sans-serif; + } +} + +@layer components { + .btn-primary { + @apply bg-blue-600 hover:bg-blue-700 text-white font-medium py-2 px-4 rounded-lg transition-colors duration-200; + } + + .btn-secondary { + @apply bg-gray-200 hover:bg-gray-300 text-gray-800 font-medium py-2 px-4 rounded-lg transition-colors duration-200; + } + + .btn-danger { + @apply bg-red-600 hover:bg-red-700 text-white font-medium py-2 px-4 rounded-lg transition-colors duration-200; + } + + .card { + @apply bg-white rounded-lg shadow-sm border border-gray-200 p-6; + } + + .input-field { + @apply w-full px-3 py-2 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent; + } + + .table-header { + @apply px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider; + } + + .table-cell { + @apply px-6 py-4 whitespace-nowrap text-sm text-gray-900; + } +} + +/* Custom scrollbar */ +::-webkit-scrollbar { + width: 6px; +} + +::-webkit-scrollbar-track { + background: #f1f1f1; +} + +::-webkit-scrollbar-thumb { + background: #c1c1c1; + border-radius: 3px; +} + +::-webkit-scrollbar-thumb:hover { + background: #a8a8a8; +} + +/* Loading animation */ +.loading-spinner { + border: 2px solid #f3f3f3; + border-top: 2px solid #3498db; + border-radius: 50%; + width: 20px; + height: 20px; + animation: spin 1s linear infinite; +} + +@keyframes spin { + 0% { transform: rotate(0deg); } + 100% { transform: rotate(360deg); } +} + +/* File upload dropzone */ +.dropzone { + border: 2px dashed #d1d5db; + border-radius: 8px; + padding: 40px; + text-align: center; + transition: border-color 0.2s ease; +} + +.dropzone:hover { + border-color: #3b82f6; +} + +.dropzone.drag-active { + border-color: #3b82f6; + background-color: #eff6ff; +} + +/* Status badges */ +.status-badge { + @apply inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium; +} + +.status-uploaded { + @apply bg-blue-100 text-blue-800; +} + +.status-processing { + @apply bg-yellow-100 text-yellow-800; +} + +.status-transformed { + @apply bg-green-100 text-green-800; +} + +.status-error { + @apply bg-red-100 text-red-800; +} + +.status-deleted { + @apply bg-gray-100 text-gray-800; +} \ No newline at end of file diff --git a/dashboard/src/App.js b/dashboard/src/App.js new file mode 100644 index 0000000..43545de --- /dev/null +++ b/dashboard/src/App.js @@ -0,0 +1,67 @@ +import React from 'react'; +import { BrowserRouter as Router, Routes, Route } from 'react-router-dom'; +import { QueryClient, QueryClientProvider } from 'react-query'; +import { Toaster } from 'react-hot-toast'; +import Layout from './components/Layout'; +import Dashboard from './pages/Dashboard'; +import Files from './pages/Files'; +import Upload from './pages/Upload'; +import Transformations from './pages/Transformations'; +import Buckets from './pages/Buckets'; +import Settings from './pages/Settings'; +import Login from './pages/Login'; +import { AuthProvider } from './contexts/AuthContext'; +import ProtectedRoute from './components/ProtectedRoute'; +import './App.css'; + +const queryClient = new QueryClient({ + defaultOptions: { + queries: { + retry: 1, + refetchOnWindowFocus: false, + }, + }, +}); + +function App() { + return ( + + + +
+ + + } /> + + + + } + > + } /> + } /> + } /> + } /> + } /> + } /> + + +
+
+
+
+ ); +} + +export default App; \ No newline at end of file diff --git a/dashboard/src/components/Header.js b/dashboard/src/components/Header.js new file mode 100644 index 0000000..50410d5 --- /dev/null +++ b/dashboard/src/components/Header.js @@ -0,0 +1,30 @@ +import React from 'react'; +import { BellIcon } from '@heroicons/react/24/outline'; + +const Header = () => { + return ( +
+
+
+

File Transformer S3

+
+
+ +
+
+ A +
+ Admin +
+
+
+
+ ); +}; + +export default Header; \ No newline at end of file diff --git a/dashboard/src/components/Layout.js b/dashboard/src/components/Layout.js new file mode 100644 index 0000000..96dd56a --- /dev/null +++ b/dashboard/src/components/Layout.js @@ -0,0 +1,20 @@ +import React from 'react'; +import { Outlet } from 'react-router-dom'; +import Sidebar from './Sidebar'; +import Header from './Header'; + +const Layout = () => { + return ( +
+ +
+
+
+ +
+
+
+ ); +}; + +export default Layout; \ No newline at end of file diff --git a/dashboard/src/components/ProtectedRoute.js b/dashboard/src/components/ProtectedRoute.js new file mode 100644 index 0000000..920ad50 --- /dev/null +++ b/dashboard/src/components/ProtectedRoute.js @@ -0,0 +1,15 @@ +import React from 'react'; +import { Navigate } from 'react-router-dom'; + +const ProtectedRoute = ({ children }) => { + // For now, always allow access. In a real app, you'd check authentication here + const isAuthenticated = true; // Replace with actual auth check + + if (!isAuthenticated) { + return ; + } + + return children; +}; + +export default ProtectedRoute; \ No newline at end of file diff --git a/dashboard/src/components/Sidebar.js b/dashboard/src/components/Sidebar.js new file mode 100644 index 0000000..819a553 --- /dev/null +++ b/dashboard/src/components/Sidebar.js @@ -0,0 +1,59 @@ +import React from 'react'; +import { NavLink } from 'react-router-dom'; +import { + HomeIcon, + DocumentTextIcon, + CloudArrowUpIcon, + CogIcon, + FolderIcon, + ChartBarIcon +} from '@heroicons/react/24/outline'; + +const Sidebar = () => { + const navigation = [ + { name: 'Dashboard', href: '/', icon: HomeIcon }, + { name: 'Files', href: '/files', icon: DocumentTextIcon }, + { name: 'Upload', href: '/upload', icon: CloudArrowUpIcon }, + { name: 'Transformations', href: '/transformations', icon: CogIcon }, + { name: 'Buckets', href: '/buckets', icon: FolderIcon }, + { name: 'Analytics', href: '/analytics', icon: ChartBarIcon }, + { name: 'Settings', href: '/settings', icon: CogIcon }, + ]; + + return ( +
+
+
+
+
+

File Transformer

+
+ +
+
+
+
+ ); +}; + +export default Sidebar; \ No newline at end of file diff --git a/dashboard/src/contexts/AuthContext.js b/dashboard/src/contexts/AuthContext.js new file mode 100644 index 0000000..9016bf3 --- /dev/null +++ b/dashboard/src/contexts/AuthContext.js @@ -0,0 +1,66 @@ +import React, { createContext, useContext, useState, useEffect } from 'react'; + +const AuthContext = createContext(); + +export const useAuth = () => { + const context = useContext(AuthContext); + if (!context) { + throw new Error('useAuth must be used within an AuthProvider'); + } + return context; +}; + +export const AuthProvider = ({ children }) => { + const [user, setUser] = useState(null); + const [loading, setLoading] = useState(true); + + useEffect(() => { + // Check for existing token + const token = localStorage.getItem('authToken'); + if (token) { + // In a real app, validate the token with the backend + setUser({ id: '1', username: 'admin', role: 'admin' }); + } + setLoading(false); + }, []); + + const login = async (credentials) => { + try { + // In a real app, make API call to login + const response = await fetch('/api/auth/login', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify(credentials), + }); + + if (response.ok) { + const data = await response.json(); + localStorage.setItem('authToken', data.token); + setUser(data.user); + return { success: true }; + } else { + return { success: false, error: 'Invalid credentials' }; + } + } catch (error) { + return { success: false, error: 'Network error' }; + } + }; + + const logout = () => { + localStorage.removeItem('authToken'); + setUser(null); + }; + + const value = { + user, + login, + logout, + loading, + }; + + return ( + + {children} + + ); +}; \ No newline at end of file diff --git a/dashboard/src/index.css b/dashboard/src/index.css new file mode 100644 index 0000000..a7a23e2 --- /dev/null +++ b/dashboard/src/index.css @@ -0,0 +1,20 @@ +@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap'); + +body { + margin: 0; + font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen', + 'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue', + sans-serif; + -webkit-font-smoothing: antialiased; + -moz-osx-font-smoothing: grayscale; + background-color: #f9fafb; +} + +code { + font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New', + monospace; +} + +* { + box-sizing: border-box; +} \ No newline at end of file diff --git a/dashboard/src/index.js b/dashboard/src/index.js new file mode 100644 index 0000000..3d817e7 --- /dev/null +++ b/dashboard/src/index.js @@ -0,0 +1,11 @@ +import React from 'react'; +import ReactDOM from 'react-dom/client'; +import './index.css'; +import App from './App'; + +const root = ReactDOM.createRoot(document.getElementById('root')); +root.render( + + + +); \ No newline at end of file diff --git a/dashboard/src/pages/Buckets.js b/dashboard/src/pages/Buckets.js new file mode 100644 index 0000000..092d738 --- /dev/null +++ b/dashboard/src/pages/Buckets.js @@ -0,0 +1,94 @@ +import React from 'react'; +import { useQuery } from 'react-query'; +import { FolderIcon, PlusIcon } from '@heroicons/react/24/outline'; +import { bucketsAPI } from '../services/api'; + +const Buckets = () => { + const { data: buckets, isLoading, error } = useQuery('buckets', bucketsAPI.getBuckets); + + if (isLoading) { + return ( +
+
+
+ ); + } + + if (error) { + return ( +
+

Error loading buckets: {error.message}

+
+ ); + } + + return ( +
+
+
+

Buckets

+

Manage MinIO storage buckets

+
+ +
+ +
+ {buckets?.map((bucket) => ( +
+
+
+ +
+

{bucket.name}

+

{bucket.description || 'No description'}

+
+
+ + {bucket.is_active ? 'Active' : 'Inactive'} + +
+ +
+
+ Created + + {new Date(bucket.created_at).toLocaleDateString()} + +
+
+ +
+ + +
+
+ ))} +
+ + {buckets?.length === 0 && ( +
+ +

No buckets

+

+ Get started by creating a new bucket. +

+
+ +
+
+ )} +
+ ); +}; + +export default Buckets; \ No newline at end of file diff --git a/dashboard/src/pages/Dashboard.js b/dashboard/src/pages/Dashboard.js new file mode 100644 index 0000000..ff44b27 --- /dev/null +++ b/dashboard/src/pages/Dashboard.js @@ -0,0 +1,223 @@ +import React from 'react'; +import { useQuery } from 'react-query'; +import { + CloudArrowUpIcon, + DocumentTextIcon, + CogIcon, + ExclamationTriangleIcon, + ArrowUpIcon, + ArrowDownIcon +} from '@heroicons/react/24/outline'; +import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer, PieChart, Pie, Cell } from 'recharts'; +import { format } from 'date-fns'; +import { motion } from 'framer-motion'; +import { getDashboardStats, getRecentFiles, getRecentTransformations } from '../services/api'; + +const COLORS = ['#3b82f6', '#10b981', '#f59e0b', '#ef4444']; + +const Dashboard = () => { + const { data: stats, isLoading: statsLoading } = useQuery('dashboardStats', getDashboardStats); + const { data: recentFiles, isLoading: filesLoading } = useQuery('recentFiles', getRecentFiles); + const { data: recentTransformations, isLoading: transformationsLoading } = useQuery('recentTransformations', getRecentTransformations); + + const fileTypeData = [ + { name: 'PDF', value: stats?.fileTypes?.pdf || 0 }, + { name: 'DOC', value: stats?.fileTypes?.doc || 0 }, + { name: 'TXT', value: stats?.fileTypes?.txt || 0 }, + { name: 'CSV', value: stats?.fileTypes?.csv || 0 }, + ]; + + const statusData = [ + { name: 'Uploaded', value: stats?.statusCounts?.uploaded || 0 }, + { name: 'Processing', value: stats?.statusCounts?.processing || 0 }, + { name: 'Transformed', value: stats?.statusCounts?.transformed || 0 }, + { name: 'Error', value: stats?.statusCounts?.error || 0 }, + ]; + + const StatCard = ({ title, value, icon: Icon, change, changeType = 'up' }) => ( + +
+
+

{title}

+

{value}

+ {change && ( +
+ {changeType === 'up' ? ( + + ) : ( + + )} + + {change} + +
+ )} +
+
+ +
+
+
+ ); + + if (statsLoading) { + return ( +
+
+
+ ); + } + + return ( +
+ {/* Header */} +
+

Dashboard

+

Overview of your file transformation system

+
+ + {/* Stats Cards */} +
+ + + + +
+ + {/* Charts */} +
+ {/* File Types Chart */} +
+

File Types Distribution

+ + + `${name} ${(percent * 100).toFixed(0)}%`} + outerRadius={80} + fill="#8884d8" + dataKey="value" + > + {fileTypeData.map((entry, index) => ( + + ))} + + + + +
+ + {/* Status Chart */} +
+

File Status Overview

+ + + + + + + + + +
+
+ + {/* Recent Activity */} +
+ {/* Recent Files */} +
+

Recent Files

+ {filesLoading ? ( +
+
+
+ ) : ( +
+ {recentFiles?.slice(0, 5).map((file) => ( +
+
+ +
+

{file.filename}

+

+ {format(new Date(file.created_at), 'MMM dd, yyyy HH:mm')} +

+
+
+ + {file.status} + +
+ ))} +
+ )} +
+ + {/* Recent Transformations */} +
+

Recent Transformations

+ {transformationsLoading ? ( +
+
+
+ ) : ( +
+ {recentTransformations?.slice(0, 5).map((transformation) => ( +
+
+ +
+

{transformation.transformation_type}

+

+ {format(new Date(transformation.created_at), 'MMM dd, yyyy HH:mm')} +

+
+
+ + {transformation.status} + +
+ ))} +
+ )} +
+
+
+ ); +}; + +export default Dashboard; \ No newline at end of file diff --git a/dashboard/src/pages/Files.js b/dashboard/src/pages/Files.js new file mode 100644 index 0000000..ce01a01 --- /dev/null +++ b/dashboard/src/pages/Files.js @@ -0,0 +1,102 @@ +import React from 'react'; +import { useQuery } from 'react-query'; +import { DocumentTextIcon, ArrowDownTrayIcon, TrashIcon } from '@heroicons/react/24/outline'; +import { filesAPI } from '../services/api'; + +const Files = () => { + const { data: files, isLoading, error } = useQuery('files', filesAPI.getFiles); + + if (isLoading) { + return ( +
+
+
+ ); + } + + if (error) { + return ( +
+

Error loading files: {error.message}

+
+ ); + } + + return ( +
+
+

Files

+

Manage your uploaded files

+
+ +
+
+

All Files

+
+
+ + + + + + + + + + + + + {files?.map((file) => ( + + + + + + + + + ))} + +
FileSizeTypeStatusUploadedActions
+
+ +
+
+ {file.filename} +
+
+ {file.original_filename} +
+
+
+
+ + {(file.file_size / 1024 / 1024).toFixed(2)} MB + + + {file.file_type} + + + {file.status} + + + + {new Date(file.created_at).toLocaleDateString()} + + +
+ + +
+
+
+
+
+ ); +}; + +export default Files; \ No newline at end of file diff --git a/dashboard/src/pages/Login.js b/dashboard/src/pages/Login.js new file mode 100644 index 0000000..b42ce32 --- /dev/null +++ b/dashboard/src/pages/Login.js @@ -0,0 +1,111 @@ +import React, { useState } from 'react'; +import { useNavigate } from 'react-router-dom'; +import { useAuth } from '../contexts/AuthContext'; +import toast from 'react-hot-toast'; + +const Login = () => { + const [credentials, setCredentials] = useState({ + username: '', + password: '' + }); + const [loading, setLoading] = useState(false); + const { login } = useAuth(); + const navigate = useNavigate(); + + const handleSubmit = async (e) => { + e.preventDefault(); + setLoading(true); + + try { + const result = await login(credentials); + if (result.success) { + toast.success('Login successful!'); + navigate('/'); + } else { + toast.error(result.error || 'Login failed'); + } + } catch (error) { + toast.error('An error occurred during login'); + } finally { + setLoading(false); + } + }; + + const handleChange = (e) => { + setCredentials({ + ...credentials, + [e.target.name]: e.target.value + }); + }; + + return ( +
+
+
+

+ Sign in to File Transformer +

+

+ Access your file transformation dashboard +

+
+
+
+
+ + +
+
+ + +
+
+ +
+ +
+ +
+

+ Default credentials: admin / admin123 +

+
+
+
+
+ ); +}; + +export default Login; \ No newline at end of file diff --git a/dashboard/src/pages/Settings.js b/dashboard/src/pages/Settings.js new file mode 100644 index 0000000..802b3f9 --- /dev/null +++ b/dashboard/src/pages/Settings.js @@ -0,0 +1,205 @@ +import React from 'react'; +import { CogIcon, ShieldCheckIcon, CircleStackIcon, CloudIcon } from '@heroicons/react/24/outline'; + +const Settings = () => { + return ( +
+
+

Settings

+

Configure your file transformation system

+
+ +
+ {/* System Configuration */} +
+
+ +

System Configuration

+
+ +
+
+ + +
+ +
+ + +
+ +
+ +
+ + +
+
+
+
+ + {/* Security Settings */} +
+
+ +

Security

+
+ +
+
+ + +
+ +
+ + +
+ +
+ + +
+
+
+ + {/* Database Configuration */} +
+
+ +

Database

+
+ +
+
+ + +
+ +
+ + +
+ +
+ + +
+
+
+ + {/* Storage Configuration */} +
+
+ +

Storage

+
+ +
+
+ + +
+ +
+ + +
+ +
+ +
+ + +
+
+
+
+
+ + {/* Action Buttons */} +
+ + +
+
+ ); +}; + +export default Settings; \ No newline at end of file diff --git a/dashboard/src/pages/Transformations.js b/dashboard/src/pages/Transformations.js new file mode 100644 index 0000000..ca6d31f --- /dev/null +++ b/dashboard/src/pages/Transformations.js @@ -0,0 +1,120 @@ +import React from 'react'; +import { useQuery } from 'react-query'; +import { CogIcon, CheckCircleIcon, XCircleIcon, ClockIcon } from '@heroicons/react/24/outline'; +import { transformationsAPI } from '../services/api'; + +const Transformations = () => { + const { data: transformations, isLoading, error } = useQuery('transformations', transformationsAPI.getTransformations); + + const getStatusIcon = (status) => { + switch (status) { + case 'completed': + return ; + case 'failed': + return ; + case 'processing': + return ; + default: + return ; + } + }; + + if (isLoading) { + return ( +
+
+
+ ); + } + + if (error) { + return ( +
+

Error loading transformations: {error.message}

+
+ ); + } + + return ( +
+
+

Transformations

+

Monitor file transformation jobs

+
+ +
+
+

All Transformations

+
+
+ + + + + + + + + + + + + {transformations?.map((transformation) => ( + + + + + + + + + ))} + +
TypeFileStatusStartedCompletedActions
+ + {transformation.transformation_type} + + + + {transformation.file_id} + + +
+ {getStatusIcon(transformation.status)} + + {transformation.status} + +
+
+ + {transformation.started_at + ? new Date(transformation.started_at).toLocaleString() + : '-' + } + + + + {transformation.completed_at + ? new Date(transformation.completed_at).toLocaleString() + : '-' + } + + +
+ {transformation.status === 'failed' && ( + + )} + +
+
+
+
+
+ ); +}; + +export default Transformations; \ No newline at end of file diff --git a/dashboard/src/pages/Upload.js b/dashboard/src/pages/Upload.js new file mode 100644 index 0000000..745c640 --- /dev/null +++ b/dashboard/src/pages/Upload.js @@ -0,0 +1,159 @@ +import React, { useState, useCallback } from 'react'; +import { useDropzone } from 'react-dropzone'; +import { CloudArrowUpIcon, DocumentTextIcon } from '@heroicons/react/24/outline'; +import { uploadFileWithProgress } from '../services/api'; +import toast from 'react-hot-toast'; + +const Upload = () => { + const [uploading, setUploading] = useState(false); + const [uploadProgress, setUploadProgress] = useState({}); + + const onDrop = useCallback(async (acceptedFiles) => { + setUploading(true); + + for (const file of acceptedFiles) { + try { + setUploadProgress(prev => ({ ...prev, [file.name]: 0 })); + + await uploadFileWithProgress(file, (progress) => { + setUploadProgress(prev => ({ ...prev, [file.name]: progress })); + }); + + toast.success(`${file.name} uploaded successfully!`); + setUploadProgress(prev => { + const newProgress = { ...prev }; + delete newProgress[file.name]; + return newProgress; + }); + } catch (error) { + toast.error(`Failed to upload ${file.name}: ${error.message}`); + setUploadProgress(prev => { + const newProgress = { ...prev }; + delete newProgress[file.name]; + return newProgress; + }); + } + } + + setUploading(false); + }, []); + + const { getRootProps, getInputProps, isDragActive } = useDropzone({ + onDrop, + accept: { + 'application/pdf': ['.pdf'], + 'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'], + 'application/msword': ['.doc'], + 'text/plain': ['.txt'], + 'text/csv': ['.csv'], + 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'], + 'application/vnd.ms-excel': ['.xls'], + 'application/json': ['.json'], + 'application/xml': ['.xml'], + 'image/*': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff'] + }, + maxSize: 100 * 1024 * 1024, // 100MB + }); + + return ( +
+
+

Upload Files

+

Upload files for transformation and processing

+
+ +
+
+ + +
+ {isDragActive ? ( +

Drop the files here...

+ ) : ( + <> +

+ Drag and drop files here, or click to select files +

+

+ Supports PDF, DOC, TXT, CSV, Excel, JSON, XML, and image files (max 100MB) +

+ + )} +
+
+
+ + {/* Upload Progress */} + {Object.keys(uploadProgress).length > 0 && ( +
+

Upload Progress

+
+ {Object.entries(uploadProgress).map(([filename, progress]) => ( +
+ +
+
+ {filename} + {progress}% +
+
+
+
+
+
+ ))} +
+
+ )} + + {/* File Type Information */} +
+
+

Supported File Types

+
+
+ Documents + PDF, DOC, DOCX, TXT +
+
+ Data Files + CSV, XLS, XLSX, JSON, XML +
+
+ Images + JPG, PNG, GIF, BMP, TIFF +
+
+
+ +
+

Transformation Options

+
+
+ Text Extraction + Available +
+
+ Format Conversion + Available +
+
+ Image Processing + Available +
+
+
+
+
+ ); +}; + +export default Upload; \ No newline at end of file diff --git a/dashboard/src/services/api.js b/dashboard/src/services/api.js new file mode 100644 index 0000000..a7c1a81 --- /dev/null +++ b/dashboard/src/services/api.js @@ -0,0 +1,180 @@ +import axios from 'axios'; + +const API_BASE_URL = process.env.REACT_APP_API_BASE_URL || 'http://localhost:8080'; + +// Create axios instance with default config +const api = axios.create({ + baseURL: API_BASE_URL, + timeout: 10000, + headers: { + 'Content-Type': 'application/json', + }, +}); + +// Request interceptor to add auth token +api.interceptors.request.use( + (config) => { + const token = localStorage.getItem('authToken'); + if (token) { + config.headers.Authorization = `Bearer ${token}`; + } + return config; + }, + (error) => { + return Promise.reject(error); + } +); + +// Response interceptor to handle auth errors +api.interceptors.response.use( + (response) => response, + (error) => { + if (error.response?.status === 401) { + localStorage.removeItem('authToken'); + window.location.href = '/login'; + } + return Promise.reject(error); + } +); + +// Auth API +export const authAPI = { + login: (credentials) => api.post('/auth/login', credentials), + logout: () => api.post('/auth/logout'), + register: (userData) => api.post('/auth/register', userData), + getProfile: () => api.get('/auth/profile'), +}; + +// Files API +export const filesAPI = { + getFiles: (params = {}) => api.get('/files', { params }), + getFile: (id) => api.get(`/files/${id}`), + uploadFile: (file, onProgress) => { + const formData = new FormData(); + formData.append('file', file); + + return api.post('/files/upload', formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + onUploadProgress: onProgress, + }); + }, + deleteFile: (id) => api.delete(`/files/${id}`), + downloadFile: (id) => api.get(`/files/${id}/download`, { responseType: 'blob' }), + updateFile: (id, data) => api.put(`/files/${id}`, data), + getFileMetadata: (id) => api.get(`/files/${id}/metadata`), +}; + +// Transformations API +export const transformationsAPI = { + getTransformations: (params = {}) => api.get('/transformations', { params }), + getTransformation: (id) => api.get(`/transformations/${id}`), + createTransformation: (data) => api.post('/transformations', data), + updateTransformation: (id, data) => api.put(`/transformations/${id}`, data), + deleteTransformation: (id) => api.delete(`/transformations/${id}`), + retryTransformation: (id) => api.post(`/transformations/${id}/retry`), + getTransformationLogs: (id) => api.get(`/transformations/${id}/logs`), +}; + +// Buckets API +export const bucketsAPI = { + getBuckets: () => api.get('/buckets'), + getBucket: (name) => api.get(`/buckets/${name}`), + createBucket: (data) => api.post('/buckets', data), + deleteBucket: (name) => api.delete(`/buckets/${name}`), + getBucketStats: (name) => api.get(`/buckets/${name}/stats`), + getBucketFiles: (name, params = {}) => api.get(`/buckets/${name}/files`, { params }), +}; + +// Dashboard API +export const dashboardAPI = { + getStats: () => api.get('/dashboard/stats'), + getRecentFiles: (limit = 10) => api.get('/dashboard/recent-files', { params: { limit } }), + getRecentTransformations: (limit = 10) => api.get('/dashboard/recent-transformations', { params: { limit } }), + getFileTypeStats: () => api.get('/dashboard/file-types'), + getStatusStats: () => api.get('/dashboard/status-counts'), + getStorageStats: () => api.get('/dashboard/storage'), +}; + +// MinIO API (direct integration) +export const minioAPI = { + getBuckets: () => api.get('/minio/buckets'), + getObjects: (bucketName, prefix = '') => api.get(`/minio/buckets/${bucketName}/objects`, { params: { prefix } }), + uploadObject: (bucketName, objectKey, file) => { + const formData = new FormData(); + formData.append('file', file); + formData.append('objectKey', objectKey); + + return api.post(`/minio/buckets/${bucketName}/upload`, formData, { + headers: { + 'Content-Type': 'multipart/form-data', + }, + }); + }, + deleteObject: (bucketName, objectKey) => api.delete(`/minio/buckets/${bucketName}/objects/${objectKey}`), + getObjectUrl: (bucketName, objectKey) => api.get(`/minio/buckets/${bucketName}/objects/${objectKey}/url`), +}; + +// Convenience functions for common operations +export const getDashboardStats = async () => { + const [stats, fileTypes, statusCounts, storage] = await Promise.all([ + dashboardAPI.getStats(), + dashboardAPI.getFileTypeStats(), + dashboardAPI.getStatusStats(), + dashboardAPI.getStorageStats(), + ]); + + return { + ...stats.data, + fileTypes: fileTypes.data, + statusCounts: statusCounts.data, + storage: storage.data, + }; +}; + +export const getRecentFiles = async () => { + const response = await dashboardAPI.getRecentFiles(); + return response.data; +}; + +export const getRecentTransformations = async () => { + const response = await dashboardAPI.getRecentTransformations(); + return response.data; +}; + +export const uploadFileWithProgress = (file, onProgress) => { + return filesAPI.uploadFile(file, (progressEvent) => { + const percentCompleted = Math.round((progressEvent.loaded * 100) / progressEvent.total); + onProgress(percentCompleted); + }); +}; + +export const downloadFileAsBlob = async (fileId, filename) => { + const response = await filesAPI.downloadFile(fileId); + const url = window.URL.createObjectURL(new Blob([response.data])); + const link = document.createElement('a'); + link.href = url; + link.setAttribute('download', filename); + document.body.appendChild(link); + link.click(); + link.remove(); + window.URL.revokeObjectURL(url); +}; + +// Error handling utility +export const handleAPIError = (error) => { + if (error.response) { + // Server responded with error status + const message = error.response.data?.message || error.response.data?.error || 'An error occurred'; + return { error: true, message, status: error.response.status }; + } else if (error.request) { + // Request was made but no response received + return { error: true, message: 'Network error. Please check your connection.', status: 0 }; + } else { + // Something else happened + return { error: true, message: error.message || 'An unexpected error occurred.', status: 0 }; + } +}; + +export default api; \ No newline at end of file diff --git a/dashboard/tailwind.config.js b/dashboard/tailwind.config.js new file mode 100644 index 0000000..41abbb1 --- /dev/null +++ b/dashboard/tailwind.config.js @@ -0,0 +1,43 @@ +/** @type {import('tailwindcss').Config} */ +module.exports = { + content: [ + "./src/**/*.{js,jsx,ts,tsx}", + ], + theme: { + extend: { + colors: { + primary: { + 50: '#eff6ff', + 100: '#dbeafe', + 200: '#bfdbfe', + 300: '#93c5fd', + 400: '#60a5fa', + 500: '#3b82f6', + 600: '#2563eb', + 700: '#1d4ed8', + 800: '#1e40af', + 900: '#1e3a8a', + }, + }, + fontFamily: { + sans: ['Inter', 'system-ui', 'sans-serif'], + }, + animation: { + 'fade-in': 'fadeIn 0.5s ease-in-out', + 'slide-up': 'slideUp 0.3s ease-out', + 'pulse-slow': 'pulse 3s cubic-bezier(0.4, 0, 0.6, 1) infinite', + }, + keyframes: { + fadeIn: { + '0%': { opacity: '0' }, + '100%': { opacity: '1' }, + }, + slideUp: { + '0%': { transform: 'translateY(10px)', opacity: '0' }, + '100%': { transform: 'translateY(0)', opacity: '1' }, + }, + }, + }, + }, + plugins: [], +} \ No newline at end of file diff --git a/database/init.sql b/database/init.sql new file mode 100644 index 0000000..8c81fe7 --- /dev/null +++ b/database/init.sql @@ -0,0 +1,174 @@ +-- File Transformer S3 Database Schema +-- This script initializes the database with all necessary tables + +-- Create extensions +CREATE EXTENSION IF NOT EXISTS "uuid-ossp"; +CREATE EXTENSION IF NOT EXISTS "pgcrypto"; + +-- Create tables +CREATE TABLE IF NOT EXISTS files ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + filename VARCHAR(255) NOT NULL, + original_filename VARCHAR(255) NOT NULL, + file_path VARCHAR(500) NOT NULL, + file_size BIGINT NOT NULL, + file_type VARCHAR(50) NOT NULL, + mime_type VARCHAR(100), + bucket_name VARCHAR(100) NOT NULL, + object_key VARCHAR(500) NOT NULL, + checksum VARCHAR(64), + status VARCHAR(20) DEFAULT 'uploaded' CHECK (status IN ('uploaded', 'processing', 'transformed', 'error', 'deleted')), + transformation_type VARCHAR(50), + transformation_config JSONB, + metadata JSONB, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + processed_at TIMESTAMP WITH TIME ZONE, + deleted_at TIMESTAMP WITH TIME ZONE +); + +CREATE TABLE IF NOT EXISTS transformations ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + file_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE, + transformation_type VARCHAR(50) NOT NULL, + input_path VARCHAR(500) NOT NULL, + output_path VARCHAR(500), + status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')), + config JSONB, + result JSONB, + error_message TEXT, + started_at TIMESTAMP WITH TIME ZONE, + completed_at TIMESTAMP WITH TIME ZONE, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS buckets ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + name VARCHAR(100) UNIQUE NOT NULL, + description TEXT, + is_active BOOLEAN DEFAULT true, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS users ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + username VARCHAR(50) UNIQUE NOT NULL, + email VARCHAR(255) UNIQUE NOT NULL, + password_hash VARCHAR(255) NOT NULL, + is_active BOOLEAN DEFAULT true, + role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('admin', 'user')), + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP, + updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS sessions ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE, + token VARCHAR(255) UNIQUE NOT NULL, + expires_at TIMESTAMP WITH TIME ZONE NOT NULL, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +CREATE TABLE IF NOT EXISTS file_access_logs ( + id UUID PRIMARY KEY DEFAULT uuid_generate_v4(), + file_id UUID REFERENCES files(id) ON DELETE SET NULL, + user_id UUID REFERENCES users(id) ON DELETE SET NULL, + action VARCHAR(50) NOT NULL CHECK (action IN ('upload', 'download', 'view', 'delete', 'transform')), + ip_address INET, + user_agent TEXT, + created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP +); + +-- Create indexes for better performance +CREATE INDEX IF NOT EXISTS idx_files_status ON files(status); +CREATE INDEX IF NOT EXISTS idx_files_file_type ON files(file_type); +CREATE INDEX IF NOT EXISTS idx_files_created_at ON files(created_at); +CREATE INDEX IF NOT EXISTS idx_files_bucket_name ON files(bucket_name); +CREATE INDEX IF NOT EXISTS idx_files_object_key ON files(object_key); + +CREATE INDEX IF NOT EXISTS idx_transformations_file_id ON transformations(file_id); +CREATE INDEX IF NOT EXISTS idx_transformations_status ON transformations(status); +CREATE INDEX IF NOT EXISTS idx_transformations_type ON transformations(transformation_type); + +CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id); +CREATE INDEX IF NOT EXISTS idx_sessions_token ON sessions(token); +CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at); + +CREATE INDEX IF NOT EXISTS idx_file_access_logs_file_id ON file_access_logs(file_id); +CREATE INDEX IF NOT EXISTS idx_file_access_logs_user_id ON file_access_logs(user_id); +CREATE INDEX IF NOT EXISTS idx_file_access_logs_action ON file_access_logs(action); +CREATE INDEX IF NOT EXISTS idx_file_access_logs_created_at ON file_access_logs(created_at); + +-- Create updated_at trigger function +CREATE OR REPLACE FUNCTION update_updated_at_column() +RETURNS TRIGGER AS $$ +BEGIN + NEW.updated_at = CURRENT_TIMESTAMP; + RETURN NEW; +END; +$$ language 'plpgsql'; + +-- Create triggers for updated_at +CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_transformations_updated_at BEFORE UPDATE ON transformations + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_buckets_updated_at BEFORE UPDATE ON buckets + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +CREATE TRIGGER update_users_updated_at BEFORE UPDATE ON users + FOR EACH ROW EXECUTE FUNCTION update_updated_at_column(); + +-- Insert default bucket +INSERT INTO buckets (name, description) +VALUES ('file-transformer-bucket', 'Default bucket for file transformations') +ON CONFLICT (name) DO NOTHING; + +-- Insert default admin user (password: admin123) +INSERT INTO users (username, email, password_hash, role) +VALUES ( + 'admin', + 'admin@file-transformer.local', + crypt('admin123', gen_salt('bf')), + 'admin' +) ON CONFLICT (username) DO NOTHING; + +-- Create views for common queries +CREATE OR REPLACE VIEW file_summary AS +SELECT + f.id, + f.filename, + f.original_filename, + f.file_size, + f.file_type, + f.status, + f.transformation_type, + f.created_at, + f.updated_at, + COUNT(t.id) as transformation_count, + COUNT(CASE WHEN t.status = 'completed' THEN 1 END) as completed_transformations, + COUNT(CASE WHEN t.status = 'failed' THEN 1 END) as failed_transformations +FROM files f +LEFT JOIN transformations t ON f.id = t.file_id +GROUP BY f.id, f.filename, f.original_filename, f.file_size, f.file_type, f.status, f.transformation_type, f.created_at, f.updated_at; + +CREATE OR REPLACE VIEW bucket_stats AS +SELECT + b.name as bucket_name, + COUNT(f.id) as total_files, + SUM(f.file_size) as total_size, + COUNT(CASE WHEN f.status = 'uploaded' THEN 1 END) as uploaded_files, + COUNT(CASE WHEN f.status = 'processing' THEN 1 END) as processing_files, + COUNT(CASE WHEN f.status = 'transformed' THEN 1 END) as transformed_files, + COUNT(CASE WHEN f.status = 'error' THEN 1 END) as error_files +FROM buckets b +LEFT JOIN files f ON b.name = f.bucket_name +GROUP BY b.name; + +-- Grant permissions (adjust as needed for your setup) +-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO file_user; +-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO file_user; \ No newline at end of file diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..7900401 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,195 @@ +services: + # PostgreSQL Database + postgres: + image: postgres:15-alpine + container_name: file-transformer-postgres + environment: + POSTGRES_DB: ${POSTGRES_DB:-file_transformer} + POSTGRES_USER: ${POSTGRES_USER:-file_user} + POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-secure_password_123} + ports: + - "${POSTGRES_PORT:-5432}:5432" + volumes: + - postgres_data:/var/lib/postgresql/data + - ./database/init.sql:/docker-entrypoint-initdb.d/init.sql + networks: + - file-transformer-network + healthcheck: + test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-file_user}"] + interval: 10s + timeout: 5s + retries: 5 + + # MinIO Object Storage + minio: + image: minio/minio:latest + container_name: file-transformer-minio + environment: + MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin} + MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin123} + ports: + - "${MINIO_API_PORT:-9000}:9000" + - "${MINIO_CONSOLE_PORT:-9001}:9001" + volumes: + - minio_data:/data + networks: + - file-transformer-network + command: server /data --console-address ":9001" + healthcheck: + test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"] + interval: 30s + timeout: 20s + retries: 3 + + # MinIO Client for bucket setup + minio-client: + image: minio/mc:latest + container_name: file-transformer-minio-client + depends_on: + - minio + environment: + MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin} + MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin123} + networks: + - file-transformer-network + command: > + sh -c " + sleep 10 && + mc alias set local http://minio:9000 ${MINIO_ACCESS_KEY:-minioadmin} ${MINIO_SECRET_KEY:-minioadmin123} && + mc mb local/${MINIO_BUCKET_NAME:-file-transformer-bucket} --ignore-existing && + mc policy set public local/${MINIO_BUCKET_NAME:-file-transformer-bucket} && + echo 'MinIO bucket setup complete' + " + + # React Dashboard + dashboard: + build: + context: ./dashboard + dockerfile: Dockerfile + container_name: file-transformer-dashboard + ports: + - "${REACT_APP_PORT:-3000}:3000" + environment: + - REACT_APP_API_BASE_URL=${REACT_APP_API_BASE_URL:-http://localhost:8080} + - REACT_APP_MINIO_ENDPOINT=${REACT_APP_MINIO_ENDPOINT:-http://localhost:9000} + - REACT_APP_MINIO_CONSOLE=${REACT_APP_MINIO_CONSOLE:-http://localhost:9001} + networks: + - file-transformer-network + depends_on: + - postgres + - minio + volumes: + - ./dashboard:/app + - /app/node_modules + + # API Gateway (for local development) + api-gateway: + build: + context: ./api-gateway + dockerfile: Dockerfile + container_name: file-transformer-api-gateway + ports: + - "8080:8080" + environment: + - POSTGRES_URL=${POSTGRES_URL} + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} + - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - JWT_SECRET=${JWT_SECRET} + - CORS_ORIGINS=${CORS_ORIGINS} + networks: + - file-transformer-network + depends_on: + - postgres + - minio + + # File Upload Function (local development) + function-upload: + build: + context: ./functions/upload + dockerfile: Dockerfile + container_name: file-transformer-upload-function + ports: + - "5001:5000" + environment: + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} + - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - POSTGRES_URL=${POSTGRES_URL} + networks: + - file-transformer-network + depends_on: + - postgres + - minio + + # File Transform Function (local development) + function-transform: + build: + context: ./functions/transform + dockerfile: Dockerfile + container_name: file-transformer-transform-function + ports: + - "5002:5000" + environment: + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} + - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - POSTGRES_URL=${POSTGRES_URL} + networks: + - file-transformer-network + depends_on: + - postgres + - minio + + # File Download Function (local development) + function-download: + build: + context: ./functions/download + dockerfile: Dockerfile + container_name: file-transformer-download-function + ports: + - "5003:5000" + environment: + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} + - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - POSTGRES_URL=${POSTGRES_URL} + networks: + - file-transformer-network + depends_on: + - postgres + - minio + + # File Metadata Function (local development) + function-metadata: + build: + context: ./functions/metadata + dockerfile: Dockerfile + container_name: file-transformer-metadata-function + ports: + - "5004:5000" + environment: + - MINIO_ENDPOINT=${MINIO_ENDPOINT} + - MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY} + - MINIO_SECRET_KEY=${MINIO_SECRET_KEY} + - MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME} + - POSTGRES_URL=${POSTGRES_URL} + networks: + - file-transformer-network + depends_on: + - postgres + - minio + +volumes: + postgres_data: + driver: local + minio_data: + driver: local + +networks: + file-transformer-network: + driver: bridge \ No newline at end of file diff --git a/env.example b/env.example new file mode 100644 index 0000000..bfd452a --- /dev/null +++ b/env.example @@ -0,0 +1,82 @@ +# ============================================================================= +# File Transformer S3 - Environment Configuration +# ============================================================================= + +# Application Configuration +APP_NAME=file-transformer-s3 +APP_ENV=development +DEBUG=true + +# Dashboard Configuration +REACT_APP_PORT=3000 +REACT_APP_API_BASE_URL=http://localhost:8080 +REACT_APP_MINIO_ENDPOINT=http://localhost:9000 +REACT_APP_MINIO_CONSOLE=http://localhost:9001 + +# PostgreSQL Configuration +POSTGRES_HOST=localhost +POSTGRES_PORT=5432 +POSTGRES_DB=file_transformer +POSTGRES_USER=file_user +POSTGRES_PASSWORD=secure_password_123 +POSTGRES_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB} + +# MinIO Configuration +MINIO_ENDPOINT=localhost:9000 +MINIO_ACCESS_KEY=minioadmin +MINIO_SECRET_KEY=minioadmin123 +MINIO_BUCKET_NAME=file-transformer-bucket +MINIO_CONSOLE_PORT=9001 +MINIO_API_PORT=9000 +MINIO_USE_SSL=false + +# Knative Functions Configuration +KNATIVE_NAMESPACE=file-transformer +KNATIVE_SERVICE_ACCOUNT=file-transformer-sa + +# Function Endpoints (internal cluster services) +FUNCTION_UPLOAD_ENDPOINT=http://file-upload-service.file-transformer.svc.cluster.local +FUNCTION_TRANSFORM_ENDPOINT=http://file-transform-service.file-transformer.svc.cluster.local +FUNCTION_DOWNLOAD_ENDPOINT=http://file-download-service.file-transformer.svc.cluster.local +FUNCTION_METADATA_ENDPOINT=http://file-metadata-service.file-transformer.svc.cluster.local + +# Local Development Function Endpoints +LOCAL_FUNCTION_UPLOAD_ENDPOINT=http://localhost:5001 +LOCAL_FUNCTION_TRANSFORM_ENDPOINT=http://localhost:5002 +LOCAL_FUNCTION_DOWNLOAD_ENDPOINT=http://localhost:5003 +LOCAL_FUNCTION_METADATA_ENDPOINT=http://localhost:5004 + +# File Processing Configuration +MAX_FILE_SIZE=100MB +ALLOWED_FILE_TYPES=pdf,doc,docx,txt,csv,xlsx,xls,json,xml +PROCESSING_TIMEOUT=300 +CHUNK_SIZE=1048576 + +# Security Configuration +JWT_SECRET=your-super-secret-jwt-key-change-this-in-production +JWT_EXPIRY=24h +CORS_ORIGINS=http://localhost:3000,http://localhost:8080 + +# Logging Configuration +LOG_LEVEL=INFO +LOG_FORMAT=json + +# Monitoring Configuration +METRICS_PORT=9090 +HEALTH_CHECK_PORT=8081 + +# Docker Configuration +DOCKER_REGISTRY=your-registry.com +DOCKER_NAMESPACE=file-transformer +DOCKER_TAG=latest + +# Kubernetes Configuration +K8S_NAMESPACE=file-transformer +K8S_REPLICAS=2 +K8S_RESOURCES_CPU=500m +K8S_RESOURCES_MEMORY=512Mi + +# Backup Configuration +BACKUP_ENABLED=true +BACKUP_SCHEDULE=0 2 * * * +BACKUP_RETENTION_DAYS=30 \ No newline at end of file diff --git a/functions/Makefile b/functions/Makefile new file mode 100644 index 0000000..10356b9 --- /dev/null +++ b/functions/Makefile @@ -0,0 +1,29 @@ +.PHONY: build build-upload build-transform build-download build-metadata clean + +# Build all functions +build: build-upload build-transform build-download build-metadata + +# Build upload function +build-upload: + @echo "Building upload function..." + @cd upload && docker build -t function-upload:latest . + +# Build transform function +build-transform: + @echo "Building transform function..." + @cd transform && docker build -t function-transform:latest . + +# Build download function +build-download: + @echo "Building download function..." + @cd download && docker build -t function-download:latest . + +# Build metadata function +build-metadata: + @echo "Building metadata function..." + @cd metadata && docker build -t function-metadata:latest . + +# Clean all function images +clean: + @echo "Cleaning function images..." + @docker rmi function-upload:latest function-transform:latest function-download:latest function-metadata:latest 2>/dev/null || true \ No newline at end of file diff --git a/functions/download/Dockerfile b/functions/download/Dockerfile new file mode 100644 index 0000000..d9487be --- /dev/null +++ b/functions/download/Dockerfile @@ -0,0 +1,30 @@ +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app.py . + +# Create non-root user +RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app +USER app + +# Expose port +EXPOSE 5000 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:5000/health || exit 1 + +# Run the application +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"] \ No newline at end of file diff --git a/functions/download/app.py b/functions/download/app.py new file mode 100644 index 0000000..05b8ddd --- /dev/null +++ b/functions/download/app.py @@ -0,0 +1,201 @@ +import os +import logging +from datetime import datetime +from typing import Optional + +from flask import Flask, request, jsonify, send_file +import psycopg2 +from psycopg2.extras import RealDictCursor +from minio import Minio +from minio.error import S3Error +import structlog +import io + +# Configure structured logging +structlog.configure( + processors=[ + structlog.stdlib.filter_by_level, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.processors.UnicodeDecoder(), + structlog.processors.JSONRenderer() + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + +logger = structlog.get_logger() + +app = Flask(__name__) + +# Configuration +MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000') +MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin') +MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123') +MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket') +MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true' + +POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer') + +# Initialize MinIO client +minio_client = Minio( + MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + secure=MINIO_USE_SSL +) + +def get_db_connection(): + """Create a database connection.""" + return psycopg2.connect(POSTGRES_URL) + +def get_file_info(file_id: str) -> Optional[dict]: + """Get file information from database.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, filename, original_filename, file_size, file_type, + mime_type, bucket_name, object_key, status + FROM files + WHERE id = %s AND status != 'deleted' + """, (file_id,)) + + file_record = cur.fetchone() + return dict(file_record) if file_record else None + except Exception as e: + logger.error("Failed to get file info", error=str(e)) + return None + finally: + conn.close() + +def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]): + """Log file access for audit purposes.""" + conn = get_db_connection() + try: + with conn.cursor() as cur: + cur.execute(""" + INSERT INTO file_access_logs (file_id, action, ip_address, user_agent) + VALUES (%s, %s, %s, %s) + """, (file_id, action, ip_address, user_agent)) + conn.commit() + except Exception as e: + logger.error("Failed to log file access", error=str(e)) + conn.rollback() + finally: + conn.close() + +@app.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint.""" + return jsonify({'status': 'healthy', 'service': 'file-download'}) + +@app.route('/download/', methods=['GET']) +def download_file(file_id: str): + """Download file by ID.""" + try: + # Get file information + file_info = get_file_info(file_id) + if not file_info: + return jsonify({'error': 'File not found'}), 404 + + if file_info['status'] == 'deleted': + return jsonify({'error': 'File has been deleted'}), 404 + + # Get file from MinIO + try: + response = minio_client.get_object( + file_info['bucket_name'], + file_info['object_key'] + ) + file_data = response.read() + response.close() + response.release_conn() + except S3Error as e: + logger.error("Failed to get file from MinIO", error=str(e)) + return jsonify({'error': 'File not found in storage'}), 404 + + # Log access + log_file_access(file_id, 'download', request.remote_addr, request.headers.get('User-Agent')) + + # Create file-like object for Flask to serve + file_stream = io.BytesIO(file_data) + file_stream.seek(0) + + logger.info("File download completed", + file_id=file_id, + filename=file_info['filename'], + size=len(file_data)) + + return send_file( + file_stream, + mimetype=file_info['mime_type'], + as_attachment=True, + download_name=file_info['original_filename'] + ) + + except Exception as e: + logger.error("Download error", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/files//info', methods=['GET']) +def get_file_info_endpoint(file_id: str): + """Get file information without downloading.""" + try: + file_info = get_file_info(file_id) + if not file_info: + return jsonify({'error': 'File not found'}), 404 + + # Log access + log_file_access(file_id, 'view', request.remote_addr, request.headers.get('User-Agent')) + + return jsonify(file_info), 200 + + except Exception as e: + logger.error("Error fetching file info", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/files//url', methods=['GET']) +def get_download_url(file_id: str): + """Get presigned download URL.""" + try: + file_info = get_file_info(file_id) + if not file_info: + return jsonify({'error': 'File not found'}), 404 + + if file_info['status'] == 'deleted': + return jsonify({'error': 'File has been deleted'}), 404 + + # Generate presigned URL + try: + url = minio_client.presigned_get_object( + file_info['bucket_name'], + file_info['object_key'], + expires=3600 # 1 hour + ) + except S3Error as e: + logger.error("Failed to generate presigned URL", error=str(e)) + return jsonify({'error': 'Failed to generate download URL'}), 500 + + # Log access + log_file_access(file_id, 'url_generated', request.remote_addr, request.headers.get('User-Agent')) + + return jsonify({ + 'file_id': file_id, + 'filename': file_info['original_filename'], + 'download_url': url, + 'expires_in': 3600 + }), 200 + + except Exception as e: + logger.error("Error generating download URL", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file diff --git a/functions/download/requirements.txt b/functions/download/requirements.txt new file mode 100644 index 0000000..bac9222 --- /dev/null +++ b/functions/download/requirements.txt @@ -0,0 +1,24 @@ +# Core dependencies +flask==2.3.3 +gunicorn==21.2.0 +python-dotenv==1.0.0 + +# Database +psycopg2-binary==2.9.7 +sqlalchemy==2.0.21 + +# MinIO/S3 +minio==7.1.17 +boto3==1.28.44 + +# HTTP requests +requests==2.31.0 + +# JSON and data handling +pydantic==2.1.1 + +# Logging +structlog==23.1.0 + +# Utilities +python-dateutil==2.8.2 \ No newline at end of file diff --git a/functions/metadata/Dockerfile b/functions/metadata/Dockerfile new file mode 100644 index 0000000..d9487be --- /dev/null +++ b/functions/metadata/Dockerfile @@ -0,0 +1,30 @@ +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app.py . + +# Create non-root user +RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app +USER app + +# Expose port +EXPOSE 5000 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:5000/health || exit 1 + +# Run the application +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"] \ No newline at end of file diff --git a/functions/metadata/app.py b/functions/metadata/app.py new file mode 100644 index 0000000..9db7d4c --- /dev/null +++ b/functions/metadata/app.py @@ -0,0 +1,307 @@ +import os +import json +import logging +from datetime import datetime +from typing import Optional, Dict, Any + +from flask import Flask, request, jsonify +import psycopg2 +from psycopg2.extras import RealDictCursor +from minio import Minio +from minio.error import S3Error +import structlog + +# Configure structured logging +structlog.configure( + processors=[ + structlog.stdlib.filter_by_level, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.processors.UnicodeDecoder(), + structlog.processors.JSONRenderer() + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + +logger = structlog.get_logger() + +app = Flask(__name__) + +# Configuration +MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000') +MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin') +MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123') +MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket') +MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true' + +POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer') + +# Initialize MinIO client +minio_client = Minio( + MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + secure=MINIO_USE_SSL +) + +def get_db_connection(): + """Create a database connection.""" + return psycopg2.connect(POSTGRES_URL) + +def get_file_metadata(file_id: str) -> Optional[Dict[str, Any]]: + """Get file metadata from database.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, filename, original_filename, file_size, file_type, + mime_type, bucket_name, object_key, checksum, status, + transformation_type, transformation_config, metadata, + created_at, updated_at, processed_at + FROM files + WHERE id = %s + """, (file_id,)) + + file_record = cur.fetchone() + return dict(file_record) if file_record else None + except Exception as e: + logger.error("Failed to get file metadata", error=str(e)) + return None + finally: + conn.close() + +def get_file_transformations(file_id: str) -> list: + """Get transformations for a file.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, transformation_type, input_path, output_path, + status, config, result, error_message, + started_at, completed_at, created_at + FROM transformations + WHERE file_id = %s + ORDER BY created_at DESC + """, (file_id,)) + + transformations = cur.fetchall() + return [dict(t) for t in transformations] + except Exception as e: + logger.error("Failed to get file transformations", error=str(e)) + return [] + finally: + conn.close() + +def get_file_access_logs(file_id: str, limit: int = 50) -> list: + """Get access logs for a file.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, action, ip_address, user_agent, created_at + FROM file_access_logs + WHERE file_id = %s + ORDER BY created_at DESC + LIMIT %s + """, (file_id, limit)) + + logs = cur.fetchall() + return [dict(log) for log in logs] + except Exception as e: + logger.error("Failed to get file access logs", error=str(e)) + return [] + finally: + conn.close() + +def update_file_metadata(file_id: str, metadata: Dict[str, Any]) -> bool: + """Update file metadata.""" + conn = get_db_connection() + try: + with conn.cursor() as cur: + cur.execute(""" + UPDATE files + SET metadata = %s, updated_at = %s + WHERE id = %s + """, (json.dumps(metadata), datetime.utcnow(), file_id)) + conn.commit() + return True + except Exception as e: + logger.error("Failed to update file metadata", error=str(e)) + conn.rollback() + return False + finally: + conn.close() + +def get_storage_stats() -> Dict[str, Any]: + """Get storage statistics.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + # Total files and size + cur.execute(""" + SELECT COUNT(*) as total_files, + SUM(file_size) as total_size, + COUNT(CASE WHEN status = 'uploaded' THEN 1 END) as uploaded_files, + COUNT(CASE WHEN status = 'processing' THEN 1 END) as processing_files, + COUNT(CASE WHEN status = 'transformed' THEN 1 END) as transformed_files, + COUNT(CASE WHEN status = 'error' THEN 1 END) as error_files, + COUNT(CASE WHEN status = 'deleted' THEN 1 END) as deleted_files + FROM files + """) + stats = cur.fetchone() + + # File types distribution + cur.execute(""" + SELECT file_type, COUNT(*) as count + FROM files + WHERE status != 'deleted' + GROUP BY file_type + ORDER BY count DESC + """) + file_types = cur.fetchall() + + # Recent activity + cur.execute(""" + SELECT COUNT(*) as recent_uploads + FROM files + WHERE created_at >= NOW() - INTERVAL '24 hours' + """) + recent = cur.fetchone() + + return { + 'stats': dict(stats), + 'file_types': [dict(ft) for ft in file_types], + 'recent_uploads': recent['recent_uploads'] if recent else 0 + } + except Exception as e: + logger.error("Failed to get storage stats", error=str(e)) + return {} + finally: + conn.close() + +@app.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint.""" + return jsonify({'status': 'healthy', 'service': 'file-metadata'}) + +@app.route('/files//metadata', methods=['GET']) +def get_file_metadata_endpoint(file_id: str): + """Get comprehensive file metadata.""" + try: + # Get basic file metadata + file_metadata = get_file_metadata(file_id) + if not file_metadata: + return jsonify({'error': 'File not found'}), 404 + + # Get transformations + transformations = get_file_transformations(file_id) + + # Get recent access logs + access_logs = get_file_access_logs(file_id, limit=10) + + # Check if file exists in MinIO + minio_exists = False + try: + minio_client.stat_object( + file_metadata['bucket_name'], + file_metadata['object_key'] + ) + minio_exists = True + except S3Error: + minio_exists = False + + response_data = { + 'file': file_metadata, + 'transformations': transformations, + 'access_logs': access_logs, + 'storage': { + 'minio_exists': minio_exists, + 'bucket': file_metadata['bucket_name'], + 'object_key': file_metadata['object_key'] + } + } + + return jsonify(response_data), 200 + + except Exception as e: + logger.error("Error fetching file metadata", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/files//metadata', methods=['PUT']) +def update_file_metadata_endpoint(file_id: str): + """Update file metadata.""" + try: + data = request.get_json() + if not data: + return jsonify({'error': 'No data provided'}), 400 + + # Check if file exists + file_metadata = get_file_metadata(file_id) + if not file_metadata: + return jsonify({'error': 'File not found'}), 404 + + # Update metadata + success = update_file_metadata(file_id, data) + if not success: + return jsonify({'error': 'Failed to update metadata'}), 500 + + logger.info("File metadata updated", file_id=file_id) + return jsonify({'message': 'Metadata updated successfully'}), 200 + + except Exception as e: + logger.error("Error updating file metadata", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/files//transformations', methods=['GET']) +def get_file_transformations_endpoint(file_id: str): + """Get transformations for a file.""" + try: + # Check if file exists + file_metadata = get_file_metadata(file_id) + if not file_metadata: + return jsonify({'error': 'File not found'}), 404 + + transformations = get_file_transformations(file_id) + return jsonify(transformations), 200 + + except Exception as e: + logger.error("Error fetching file transformations", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/files//access-logs', methods=['GET']) +def get_file_access_logs_endpoint(file_id: str): + """Get access logs for a file.""" + try: + # Check if file exists + file_metadata = get_file_metadata(file_id) + if not file_metadata: + return jsonify({'error': 'File not found'}), 404 + + limit = request.args.get('limit', 50, type=int) + access_logs = get_file_access_logs(file_id, limit=limit) + return jsonify(access_logs), 200 + + except Exception as e: + logger.error("Error fetching file access logs", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/stats', methods=['GET']) +def get_stats_endpoint(): + """Get system statistics.""" + try: + stats = get_storage_stats() + return jsonify(stats), 200 + + except Exception as e: + logger.error("Error fetching stats", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file diff --git a/functions/metadata/requirements.txt b/functions/metadata/requirements.txt new file mode 100644 index 0000000..bac9222 --- /dev/null +++ b/functions/metadata/requirements.txt @@ -0,0 +1,24 @@ +# Core dependencies +flask==2.3.3 +gunicorn==21.2.0 +python-dotenv==1.0.0 + +# Database +psycopg2-binary==2.9.7 +sqlalchemy==2.0.21 + +# MinIO/S3 +minio==7.1.17 +boto3==1.28.44 + +# HTTP requests +requests==2.31.0 + +# JSON and data handling +pydantic==2.1.1 + +# Logging +structlog==23.1.0 + +# Utilities +python-dateutil==2.8.2 \ No newline at end of file diff --git a/functions/requirements.txt b/functions/requirements.txt new file mode 100644 index 0000000..934192d --- /dev/null +++ b/functions/requirements.txt @@ -0,0 +1,51 @@ +# Core dependencies +flask==2.3.3 +gunicorn==21.2.0 +python-dotenv==1.0.0 + +# Database +psycopg2-binary==2.9.7 +sqlalchemy==2.0.21 +alembic==1.12.0 + +# MinIO/S3 +minio==7.1.17 +boto3==1.28.44 + +# File processing +python-magic==0.4.27 +Pillow==10.0.1 +PyPDF2==3.0.1 +python-docx==0.8.11 +openpyxl==3.1.2 +pandas==2.0.3 +numpy==1.24.3 + +# HTTP requests +requests==2.31.0 +httpx==0.24.1 + +# JSON and data handling +pydantic==2.1.1 +marshmallow==3.20.1 + +# Authentication and security +PyJWT==2.8.0 +bcrypt==4.0.1 +cryptography==41.0.4 + +# Logging and monitoring +structlog==23.1.0 +prometheus-client==0.17.1 + +# Utilities +python-dateutil==2.8.2 +pytz==2023.3 +click==8.1.7 + +# Development and testing +pytest==7.4.2 +pytest-cov==4.1.0 +black==23.7.0 +flake8==6.0.0 +mypy==1.5.1 \ No newline at end of file diff --git a/functions/transform/Dockerfile b/functions/transform/Dockerfile new file mode 100644 index 0000000..522f589 --- /dev/null +++ b/functions/transform/Dockerfile @@ -0,0 +1,33 @@ +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + libmagic1 \ + libgl1-mesa-glx \ + libglib2.0-0 \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app.py . + +# Create non-root user +RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app +USER app + +# Expose port +EXPOSE 5000 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:5000/health || exit 1 + +# Run the application +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "--timeout", "600", "app:app"] \ No newline at end of file diff --git a/functions/transform/app.py b/functions/transform/app.py new file mode 100644 index 0000000..52cb648 --- /dev/null +++ b/functions/transform/app.py @@ -0,0 +1,473 @@ +import os +import uuid +import json +import tempfile +import logging +from datetime import datetime +from typing import Dict, Any, Optional, List +from pathlib import Path + +from flask import Flask, request, jsonify +import psycopg2 +from psycopg2.extras import RealDictCursor +from minio import Minio +from minio.error import S3Error +import structlog + +# File processing imports +import PyPDF2 +from docx import Document +import pandas as pd +from PIL import Image +import io + +# Configure structured logging +structlog.configure( + processors=[ + structlog.stdlib.filter_by_level, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.processors.UnicodeDecoder(), + structlog.processors.JSONRenderer() + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + +logger = structlog.get_logger() + +app = Flask(__name__) + +# Configuration +MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000') +MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin') +MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123') +MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket') +MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true' + +POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer') + +# Initialize MinIO client +minio_client = Minio( + MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + secure=MINIO_USE_SSL +) + +def get_db_connection(): + """Create a database connection.""" + return psycopg2.connect(POSTGRES_URL) + +def get_file_from_minio(object_key: str) -> bytes: + """Download file from MinIO.""" + try: + response = minio_client.get_object(MINIO_BUCKET_NAME, object_key) + return response.read() + except S3Error as e: + logger.error("Failed to get file from MinIO", object_key=object_key, error=str(e)) + raise + +def upload_file_to_minio(file_data: bytes, object_key: str) -> bool: + """Upload file to MinIO.""" + try: + minio_client.put_object( + MINIO_BUCKET_NAME, + object_key, + file_data, + length=len(file_data) + ) + return True + except S3Error as e: + logger.error("Failed to upload file to MinIO", object_key=object_key, error=str(e)) + return False + +def extract_text_from_pdf(file_data: bytes) -> str: + """Extract text from PDF file.""" + try: + pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data)) + text = "" + for page in pdf_reader.pages: + text += page.extract_text() + "\n" + return text.strip() + except Exception as e: + logger.error("PDF text extraction failed", error=str(e)) + raise + +def extract_text_from_docx(file_data: bytes) -> str: + """Extract text from DOCX file.""" + try: + doc = Document(io.BytesIO(file_data)) + text = "" + for paragraph in doc.paragraphs: + text += paragraph.text + "\n" + return text.strip() + except Exception as e: + logger.error("DOCX text extraction failed", error=str(e)) + raise + +def convert_csv_to_json(file_data: bytes) -> List[Dict[str, Any]]: + """Convert CSV to JSON format.""" + try: + df = pd.read_csv(io.BytesIO(file_data)) + return df.to_dict('records') + except Exception as e: + logger.error("CSV to JSON conversion failed", error=str(e)) + raise + +def convert_excel_to_json(file_data: bytes) -> List[Dict[str, Any]]: + """Convert Excel to JSON format.""" + try: + df = pd.read_excel(io.BytesIO(file_data)) + return df.to_dict('records') + except Exception as e: + logger.error("Excel to JSON conversion failed", error=str(e)) + raise + +def resize_image(file_data: bytes, width: int, height: int) -> bytes: + """Resize image to specified dimensions.""" + try: + image = Image.open(io.BytesIO(file_data)) + resized_image = image.resize((width, height), Image.Resampling.LANCZOS) + + output = io.BytesIO() + resized_image.save(output, format=image.format or 'JPEG') + return output.getvalue() + except Exception as e: + logger.error("Image resize failed", error=str(e)) + raise + +def convert_image_format(file_data: bytes, target_format: str) -> bytes: + """Convert image to different format.""" + try: + image = Image.open(io.BytesIO(file_data)) + + output = io.BytesIO() + image.save(output, format=target_format.upper()) + return output.getvalue() + except Exception as e: + logger.error("Image format conversion failed", error=str(e)) + raise + +def create_transformation_record(file_id: str, transformation_type: str, config: Dict[str, Any]) -> str: + """Create transformation record in database.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + INSERT INTO transformations ( + file_id, transformation_type, input_path, status, config, created_at + ) VALUES (%s, %s, %s, %s, %s, %s) + RETURNING id + """, ( + file_id, + transformation_type, + f"files/{file_id}", + 'pending', + json.dumps(config), + datetime.utcnow() + )) + + transformation_id = cur.fetchone()['id'] + conn.commit() + return str(transformation_id) + except Exception as e: + conn.rollback() + logger.error("Failed to create transformation record", error=str(e)) + raise + finally: + conn.close() + +def update_transformation_status(transformation_id: str, status: str, result: Optional[Dict[str, Any]] = None, error_message: Optional[str] = None): + """Update transformation status in database.""" + conn = get_db_connection() + try: + with conn.cursor() as cur: + if status == 'processing': + cur.execute(""" + UPDATE transformations + SET status = %s, started_at = %s + WHERE id = %s + """, (status, datetime.utcnow(), transformation_id)) + elif status == 'completed': + cur.execute(""" + UPDATE transformations + SET status = %s, completed_at = %s, result = %s + WHERE id = %s + """, (status, datetime.utcnow(), json.dumps(result), transformation_id)) + elif status == 'failed': + cur.execute(""" + UPDATE transformations + SET status = %s, completed_at = %s, error_message = %s + WHERE id = %s + """, (status, datetime.utcnow(), error_message, transformation_id)) + + conn.commit() + except Exception as e: + conn.rollback() + logger.error("Failed to update transformation status", error=str(e)) + raise + finally: + conn.close() + +def get_file_info(file_id: str) -> Optional[Dict[str, Any]]: + """Get file information from database.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, filename, file_type, mime_type, object_key, status + FROM files + WHERE id = %s + """, (file_id,)) + + file_record = cur.fetchone() + return dict(file_record) if file_record else None + except Exception as e: + logger.error("Failed to get file info", error=str(e)) + return None + finally: + conn.close() + +@app.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint.""" + return jsonify({'status': 'healthy', 'service': 'file-transform'}) + +@app.route('/transform', methods=['POST']) +def transform_file(): + """Handle file transformation request.""" + try: + data = request.get_json() + if not data: + return jsonify({'error': 'No data provided'}), 400 + + file_id = data.get('file_id') + transformation_type = data.get('transformation_type') + config = data.get('config', {}) + + if not file_id or not transformation_type: + return jsonify({'error': 'file_id and transformation_type are required'}), 400 + + # Get file information + file_info = get_file_info(file_id) + if not file_info: + return jsonify({'error': 'File not found'}), 404 + + if file_info['status'] == 'deleted': + return jsonify({'error': 'File has been deleted'}), 400 + + # Create transformation record + transformation_id = create_transformation_record(file_id, transformation_type, config) + + # Update status to processing + update_transformation_status(transformation_id, 'processing') + + logger.info("Starting transformation", + file_id=file_id, + transformation_id=transformation_id, + transformation_type=transformation_type) + + try: + # Get file from MinIO + file_data = get_file_from_minio(file_info['object_key']) + + # Perform transformation based on type + result = None + output_data = None + + if transformation_type == 'extract_text': + if file_info['file_type'] == 'pdf': + result = extract_text_from_pdf(file_data) + elif file_info['file_type'] in ['docx', 'doc']: + result = extract_text_from_docx(file_data) + else: + raise ValueError(f"Text extraction not supported for file type: {file_info['file_type']}") + + # Save extracted text as new file + output_filename = f"{Path(file_info['filename']).stem}_extracted.txt" + output_object_key = f"transformations/{transformation_id}/{output_filename}" + output_data = result.encode('utf-8') + + elif transformation_type == 'csv_to_json': + if file_info['file_type'] != 'csv': + raise ValueError("CSV to JSON conversion only supports CSV files") + + result = convert_csv_to_json(file_data) + output_filename = f"{Path(file_info['filename']).stem}.json" + output_object_key = f"transformations/{transformation_id}/{output_filename}" + output_data = json.dumps(result, indent=2).encode('utf-8') + + elif transformation_type == 'excel_to_json': + if file_info['file_type'] not in ['xlsx', 'xls']: + raise ValueError("Excel to JSON conversion only supports Excel files") + + result = convert_excel_to_json(file_data) + output_filename = f"{Path(file_info['filename']).stem}.json" + output_object_key = f"transformations/{transformation_id}/{output_filename}" + output_data = json.dumps(result, indent=2).encode('utf-8') + + elif transformation_type == 'resize_image': + if not file_info['mime_type'].startswith('image/'): + raise ValueError("Image resize only supports image files") + + width = config.get('width', 800) + height = config.get('height', 600) + output_data = resize_image(file_data, width, height) + + output_filename = f"{Path(file_info['filename']).stem}_resized.{Path(file_info['filename']).suffix}" + output_object_key = f"transformations/{transformation_id}/{output_filename}" + + elif transformation_type == 'convert_image': + if not file_info['mime_type'].startswith('image/'): + raise ValueError("Image conversion only supports image files") + + target_format = config.get('format', 'JPEG') + output_data = convert_image_format(file_data, target_format) + + output_filename = f"{Path(file_info['filename']).stem}.{target_format.lower()}" + output_object_key = f"transformations/{transformation_id}/{output_filename}" + + else: + raise ValueError(f"Unsupported transformation type: {transformation_type}") + + # Upload transformed file to MinIO + if output_data: + if not upload_file_to_minio(output_data, output_object_key): + raise Exception("Failed to upload transformed file") + + # Update transformation as completed + update_transformation_status(transformation_id, 'completed', { + 'output_object_key': output_object_key, + 'output_filename': output_filename, + 'result': result if isinstance(result, (str, list, dict)) else None + }) + + # Update file status + conn = get_db_connection() + with conn.cursor() as cur: + cur.execute(""" + UPDATE files + SET status = 'transformed', transformation_type = %s, processed_at = %s + WHERE id = %s + """, (transformation_type, datetime.utcnow(), file_id)) + conn.commit() + conn.close() + + response_data = { + 'transformation_id': transformation_id, + 'file_id': file_id, + 'transformation_type': transformation_type, + 'status': 'completed', + 'output_object_key': output_object_key, + 'output_filename': output_filename, + 'completed_at': datetime.utcnow().isoformat() + } + + logger.info("Transformation completed", + transformation_id=transformation_id, + file_id=file_id) + + return jsonify(response_data), 200 + + except Exception as e: + error_message = str(e) + logger.error("Transformation failed", + transformation_id=transformation_id, + file_id=file_id, + error=error_message) + + # Update transformation as failed + update_transformation_status(transformation_id, 'failed', error_message=error_message) + + return jsonify({ + 'transformation_id': transformation_id, + 'file_id': file_id, + 'status': 'failed', + 'error': error_message + }), 500 + + except Exception as e: + logger.error("Transform request error", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +@app.route('/transformations/', methods=['GET']) +def get_transformation_status(transformation_id: str): + """Get transformation status and details.""" + try: + conn = get_db_connection() + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, file_id, transformation_type, input_path, output_path, + status, config, result, error_message, started_at, completed_at, created_at + FROM transformations + WHERE id = %s + """, (transformation_id,)) + + transformation = cur.fetchone() + if not transformation: + return jsonify({'error': 'Transformation not found'}), 404 + + return jsonify(dict(transformation)), 200 + + except Exception as e: + logger.error("Error fetching transformation", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + finally: + conn.close() + +@app.route('/transformations//retry', methods=['POST']) +def retry_transformation(transformation_id: str): + """Retry a failed transformation.""" + try: + conn = get_db_connection() + with conn.cursor(cursor_factory=RealDictCursor) as cur: + # Get transformation details + cur.execute(""" + SELECT file_id, transformation_type, config + FROM transformations + WHERE id = %s + """, (transformation_id,)) + + transformation = cur.fetchone() + if not transformation: + return jsonify({'error': 'Transformation not found'}), 404 + + if transformation['status'] != 'failed': + return jsonify({'error': 'Only failed transformations can be retried'}), 400 + + # Reset transformation status + cur.execute(""" + UPDATE transformations + SET status = 'pending', started_at = NULL, completed_at = NULL, + error_message = NULL, result = NULL + WHERE id = %s + """, (transformation_id,)) + conn.commit() + + # Trigger new transformation + transform_data = { + 'file_id': transformation['file_id'], + 'transformation_type': transformation['transformation_type'], + 'config': transformation['config'] or {} + } + + # Call transform endpoint internally + with app.test_client() as client: + response = client.post('/transform', json=transform_data) + return response.get_json(), response.status_code + + except Exception as e: + logger.error("Error retrying transformation", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + finally: + conn.close() + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file diff --git a/functions/transform/requirements.txt b/functions/transform/requirements.txt new file mode 100644 index 0000000..e2c4ce7 --- /dev/null +++ b/functions/transform/requirements.txt @@ -0,0 +1,33 @@ +# Core dependencies +flask==2.3.3 +gunicorn==21.2.0 +python-dotenv==1.0.0 + +# Database +psycopg2-binary==2.9.7 +sqlalchemy==2.0.21 + +# MinIO/S3 +minio==7.1.17 +boto3==1.28.44 + +# File processing +python-magic==0.4.27 +Pillow==10.0.1 +PyPDF2==3.0.1 +python-docx==0.8.11 +openpyxl==3.1.2 +pandas==2.0.3 +numpy==1.24.3 + +# HTTP requests +requests==2.31.0 + +# JSON and data handling +pydantic==2.1.1 + +# Logging +structlog==23.1.0 + +# Utilities +python-dateutil==2.8.2 \ No newline at end of file diff --git a/functions/upload/Dockerfile b/functions/upload/Dockerfile new file mode 100644 index 0000000..bd7a83a --- /dev/null +++ b/functions/upload/Dockerfile @@ -0,0 +1,31 @@ +FROM python:3.11-slim + +# Set working directory +WORKDIR /app + +# Install system dependencies +RUN apt-get update && apt-get install -y \ + gcc \ + libmagic1 \ + && rm -rf /var/lib/apt/lists/* + +# Copy requirements and install Python dependencies +COPY requirements.txt . +RUN pip install --no-cache-dir -r requirements.txt + +# Copy application code +COPY app.py . + +# Create non-root user +RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app +USER app + +# Expose port +EXPOSE 5000 + +# Health check +HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \ + CMD curl -f http://localhost:5000/health || exit 1 + +# Run the application +CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"] \ No newline at end of file diff --git a/functions/upload/app.py b/functions/upload/app.py new file mode 100644 index 0000000..2b325b6 --- /dev/null +++ b/functions/upload/app.py @@ -0,0 +1,287 @@ +import os +import uuid +import hashlib +import magic +import logging +from datetime import datetime +from typing import Dict, Any, Optional + +from flask import Flask, request, jsonify +from werkzeug.utils import secure_filename +import psycopg2 +from psycopg2.extras import RealDictCursor +from minio import Minio +from minio.error import S3Error +import structlog + +# Configure structured logging +structlog.configure( + processors=[ + structlog.stdlib.filter_by_level, + structlog.stdlib.add_logger_name, + structlog.stdlib.add_log_level, + structlog.stdlib.PositionalArgumentsFormatter(), + structlog.processors.TimeStamper(fmt="iso"), + structlog.processors.StackInfoRenderer(), + structlog.processors.format_exc_info, + structlog.processors.UnicodeDecoder(), + structlog.processors.JSONRenderer() + ], + context_class=dict, + logger_factory=structlog.stdlib.LoggerFactory(), + wrapper_class=structlog.stdlib.BoundLogger, + cache_logger_on_first_use=True, +) + +logger = structlog.get_logger() + +app = Flask(__name__) + +# Configuration +MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000') +MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin') +MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123') +MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket') +MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true' + +POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer') + +# Initialize MinIO client +minio_client = Minio( + MINIO_ENDPOINT, + access_key=MINIO_ACCESS_KEY, + secret_key=MINIO_SECRET_KEY, + secure=MINIO_USE_SSL +) + +def get_db_connection(): + """Create a database connection.""" + return psycopg2.connect(POSTGRES_URL) + +def calculate_file_hash(file_data: bytes) -> str: + """Calculate SHA-256 hash of file data.""" + return hashlib.sha256(file_data).hexdigest() + +def get_file_metadata(file_data: bytes, filename: str) -> Dict[str, Any]: + """Extract file metadata including MIME type and size.""" + mime_type = magic.from_buffer(file_data, mime=True) + file_size = len(file_data) + + # Determine file type from extension + file_extension = filename.rsplit('.', 1)[1].lower() if '.' in filename else '' + + return { + 'mime_type': mime_type, + 'file_size': file_size, + 'file_type': file_extension, + 'checksum': calculate_file_hash(file_data) + } + +def save_file_to_database(file_data: bytes, filename: str, object_key: str, metadata: Dict[str, Any]) -> str: + """Save file information to PostgreSQL database.""" + conn = get_db_connection() + try: + with conn.cursor(cursor_factory=RealDictCursor) as cur: + # Insert file record + cur.execute(""" + INSERT INTO files ( + filename, original_filename, file_path, file_size, + file_type, mime_type, bucket_name, object_key, + checksum, status, created_at + ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s) + RETURNING id + """, ( + filename, + filename, + object_key, + metadata['file_size'], + metadata['file_type'], + metadata['mime_type'], + MINIO_BUCKET_NAME, + object_key, + metadata['checksum'], + 'uploaded', + datetime.utcnow() + )) + + file_id = cur.fetchone()['id'] + conn.commit() + return str(file_id) + except Exception as e: + conn.rollback() + logger.error("Database error", error=str(e)) + raise + finally: + conn.close() + +def upload_to_minio(file_data: bytes, object_key: str) -> bool: + """Upload file to MinIO bucket.""" + try: + # Ensure bucket exists + if not minio_client.bucket_exists(MINIO_BUCKET_NAME): + minio_client.make_bucket(MINIO_BUCKET_NAME) + logger.info("Created bucket", bucket=MINIO_BUCKET_NAME) + + # Upload file + minio_client.put_object( + MINIO_BUCKET_NAME, + object_key, + file_data, + length=len(file_data) + ) + + logger.info("File uploaded to MinIO", bucket=MINIO_BUCKET_NAME, object_key=object_key) + return True + except S3Error as e: + logger.error("MinIO upload error", error=str(e)) + return False + +@app.route('/health', methods=['GET']) +def health_check(): + """Health check endpoint.""" + return jsonify({'status': 'healthy', 'service': 'file-upload'}) + +@app.route('/upload', methods=['POST']) +def upload_file(): + """Handle file upload request.""" + try: + # Check if file is present in request + if 'file' not in request.files: + return jsonify({'error': 'No file provided'}), 400 + + file = request.files['file'] + if file.filename == '': + return jsonify({'error': 'No file selected'}), 400 + + # Read file data + file_data = file.read() + if not file_data: + return jsonify({'error': 'Empty file'}), 400 + + # Secure filename and generate object key + filename = secure_filename(file.filename) + file_id = str(uuid.uuid4()) + object_key = f"uploads/{file_id}/{filename}" + + # Extract metadata + metadata = get_file_metadata(file_data, filename) + + logger.info("Processing file upload", + filename=filename, + size=metadata['file_size'], + mime_type=metadata['mime_type']) + + # Upload to MinIO + if not upload_to_minio(file_data, object_key): + return jsonify({'error': 'Failed to upload file to storage'}), 500 + + # Save to database + db_file_id = save_file_to_database(file_data, filename, object_key, metadata) + + # Log access + log_file_access(db_file_id, 'upload', request.remote_addr, request.headers.get('User-Agent')) + + response_data = { + 'file_id': db_file_id, + 'filename': filename, + 'object_key': object_key, + 'file_size': metadata['file_size'], + 'mime_type': metadata['mime_type'], + 'checksum': metadata['checksum'], + 'status': 'uploaded', + 'uploaded_at': datetime.utcnow().isoformat() + } + + logger.info("File upload completed", file_id=db_file_id, filename=filename) + return jsonify(response_data), 201 + + except Exception as e: + logger.error("Upload error", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + +def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]): + """Log file access for audit purposes.""" + conn = get_db_connection() + try: + with conn.cursor() as cur: + cur.execute(""" + INSERT INTO file_access_logs (file_id, action, ip_address, user_agent) + VALUES (%s, %s, %s, %s) + """, (file_id, action, ip_address, user_agent)) + conn.commit() + except Exception as e: + logger.error("Failed to log file access", error=str(e)) + conn.rollback() + finally: + conn.close() + +@app.route('/files/', methods=['GET']) +def get_file_info(file_id: str): + """Get file information by ID.""" + try: + conn = get_db_connection() + with conn.cursor(cursor_factory=RealDictCursor) as cur: + cur.execute(""" + SELECT id, filename, original_filename, file_size, file_type, + mime_type, bucket_name, object_key, checksum, status, + created_at, updated_at + FROM files + WHERE id = %s + """, (file_id,)) + + file_record = cur.fetchone() + if not file_record: + return jsonify({'error': 'File not found'}), 404 + + return jsonify(dict(file_record)), 200 + + except Exception as e: + logger.error("Error fetching file info", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + finally: + conn.close() + +@app.route('/files/', methods=['DELETE']) +def delete_file(file_id: str): + """Delete file from storage and database.""" + try: + conn = get_db_connection() + with conn.cursor(cursor_factory=RealDictCursor) as cur: + # Get file info + cur.execute("SELECT object_key FROM files WHERE id = %s", (file_id,)) + file_record = cur.fetchone() + + if not file_record: + return jsonify({'error': 'File not found'}), 404 + + object_key = file_record['object_key'] + + # Delete from MinIO + try: + minio_client.remove_object(MINIO_BUCKET_NAME, object_key) + logger.info("File deleted from MinIO", object_key=object_key) + except S3Error as e: + logger.warning("File not found in MinIO", object_key=object_key, error=str(e)) + + # Mark as deleted in database + cur.execute(""" + UPDATE files + SET status = 'deleted', deleted_at = %s + WHERE id = %s + """, (datetime.utcnow(), file_id)) + + conn.commit() + + # Log access + log_file_access(file_id, 'delete', request.remote_addr, request.headers.get('User-Agent')) + + return jsonify({'message': 'File deleted successfully'}), 200 + + except Exception as e: + logger.error("Error deleting file", error=str(e)) + return jsonify({'error': 'Internal server error'}), 500 + finally: + conn.close() + +if __name__ == '__main__': + app.run(host='0.0.0.0', port=5000, debug=False) \ No newline at end of file diff --git a/functions/upload/requirements.txt b/functions/upload/requirements.txt new file mode 100644 index 0000000..e15cd49 --- /dev/null +++ b/functions/upload/requirements.txt @@ -0,0 +1,27 @@ +# Core dependencies +flask==2.3.3 +gunicorn==21.2.0 +python-dotenv==1.0.0 + +# Database +psycopg2-binary==2.9.7 +sqlalchemy==2.0.21 + +# MinIO/S3 +minio==7.1.17 +boto3==1.28.44 + +# File processing +python-magic==0.4.27 + +# HTTP requests +requests==2.31.0 + +# JSON and data handling +pydantic==2.1.1 + +# Logging +structlog==23.1.0 + +# Utilities +python-dateutil==2.8.2 \ No newline at end of file diff --git a/k8s/dashboard.yaml b/k8s/dashboard.yaml new file mode 100644 index 0000000..b3885d4 --- /dev/null +++ b/k8s/dashboard.yaml @@ -0,0 +1,82 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: file-transformer-dashboard + namespace: file-transformer + labels: + app: file-transformer-dashboard +spec: + replicas: 2 + selector: + matchLabels: + app: file-transformer-dashboard + template: + metadata: + labels: + app: file-transformer-dashboard + spec: + containers: + - name: dashboard + image: file-transformer/dashboard:latest + ports: + - containerPort: 3000 + env: + - name: REACT_APP_API_BASE_URL + value: "http://api-gateway.file-transformer.svc.cluster.local:8080" + - name: REACT_APP_MINIO_ENDPOINT + value: "http://minio.file-transformer.svc.cluster.local:9000" + - name: REACT_APP_MINIO_CONSOLE + value: "http://minio.file-transformer.svc.cluster.local:9001" + resources: + requests: + memory: "128Mi" + cpu: "100m" + limits: + memory: "256Mi" + cpu: "200m" + livenessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: / + port: 3000 + initialDelaySeconds: 5 + periodSeconds: 5 +--- +apiVersion: v1 +kind: Service +metadata: + name: file-transformer-dashboard + namespace: file-transformer +spec: + selector: + app: file-transformer-dashboard + ports: + - port: 80 + targetPort: 3000 + type: ClusterIP +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: file-transformer-dashboard-ingress + namespace: file-transformer + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/ssl-redirect: "false" +spec: + rules: + - host: dashboard.file-transformer.local + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: file-transformer-dashboard + port: + number: 80 \ No newline at end of file diff --git a/k8s/functions/download-service.yaml b/k8s/functions/download-service.yaml new file mode 100644 index 0000000..939de76 --- /dev/null +++ b/k8s/functions/download-service.yaml @@ -0,0 +1,85 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: function-download + namespace: file-transformer + labels: + app: file-transformer + function: download +spec: + template: + metadata: + annotations: + autoscaling.knative.dev/minScale: "0" + autoscaling.knative.dev/maxScale: "10" + autoscaling.knative.dev/target: "1" + spec: + containerConcurrency: 10 + timeoutSeconds: 300 + containers: + - image: function-download:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 5000 + env: + - name: MINIO_ENDPOINT + value: "minio-service:9000" + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: access-key + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: secret-key + - name: MINIO_BUCKET_NAME + value: "file-transformer-bucket" + - name: MINIO_USE_SSL + value: "false" + - name: POSTGRES_URL + valueFrom: + secretKeyRef: + name: postgres-secret + key: database-url + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + readinessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 15 + periodSeconds: 30 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: function-download-ingress + namespace: file-transformer + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/ssl-redirect: "false" +spec: + rules: + - host: download.file-transformer.local + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: function-download + port: + number: 80 \ No newline at end of file diff --git a/k8s/functions/metadata-service.yaml b/k8s/functions/metadata-service.yaml new file mode 100644 index 0000000..8ca4611 --- /dev/null +++ b/k8s/functions/metadata-service.yaml @@ -0,0 +1,85 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: function-metadata + namespace: file-transformer + labels: + app: file-transformer + function: metadata +spec: + template: + metadata: + annotations: + autoscaling.knative.dev/minScale: "0" + autoscaling.knative.dev/maxScale: "10" + autoscaling.knative.dev/target: "1" + spec: + containerConcurrency: 10 + timeoutSeconds: 300 + containers: + - image: function-metadata:latest + imagePullPolicy: IfNotPresent + ports: + - containerPort: 5000 + env: + - name: MINIO_ENDPOINT + value: "minio-service:9000" + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: access-key + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: secret-key + - name: MINIO_BUCKET_NAME + value: "file-transformer-bucket" + - name: MINIO_USE_SSL + value: "false" + - name: POSTGRES_URL + valueFrom: + secretKeyRef: + name: postgres-secret + key: database-url + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + readinessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 5 + periodSeconds: 10 + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 15 + periodSeconds: 30 +--- +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: function-metadata-ingress + namespace: file-transformer + annotations: + nginx.ingress.kubernetes.io/rewrite-target: / + nginx.ingress.kubernetes.io/ssl-redirect: "false" +spec: + rules: + - host: metadata.file-transformer.local + http: + paths: + - path: / + pathType: Prefix + backend: + service: + name: function-metadata + port: + number: 80 \ No newline at end of file diff --git a/k8s/functions/transform-service.yaml b/k8s/functions/transform-service.yaml new file mode 100644 index 0000000..5fe6519 --- /dev/null +++ b/k8s/functions/transform-service.yaml @@ -0,0 +1,63 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: file-transform-service + namespace: file-transformer +spec: + template: + metadata: + annotations: + autoscaling.knative.dev/minScale: "0" + autoscaling.knative.dev/maxScale: "10" + autoscaling.knative.dev/target: "1" + spec: + containerConcurrency: 5 + timeoutSeconds: 600 + containers: + - image: file-transformer/transform-function:latest + ports: + - containerPort: 5000 + env: + - name: MINIO_ENDPOINT + value: "minio.file-transformer.svc.cluster.local:9000" + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: accesskey + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: secretkey + - name: MINIO_BUCKET_NAME + valueFrom: + configMapKeyRef: + name: minio-config + key: MINIO_BUCKET_NAME + - name: POSTGRES_URL + value: "postgresql://file_user:$(POSTGRES_PASSWORD)@postgres.file-transformer.svc.cluster.local:5432/file_transformer" + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: postgres-password + resources: + requests: + memory: "512Mi" + cpu: "500m" + limits: + memory: "1Gi" + cpu: "1000m" + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 5 + periodSeconds: 5 \ No newline at end of file diff --git a/k8s/functions/upload-service.yaml b/k8s/functions/upload-service.yaml new file mode 100644 index 0000000..c1d2932 --- /dev/null +++ b/k8s/functions/upload-service.yaml @@ -0,0 +1,63 @@ +apiVersion: serving.knative.dev/v1 +kind: Service +metadata: + name: file-upload-service + namespace: file-transformer +spec: + template: + metadata: + annotations: + autoscaling.knative.dev/minScale: "0" + autoscaling.knative.dev/maxScale: "10" + autoscaling.knative.dev/target: "1" + spec: + containerConcurrency: 10 + timeoutSeconds: 300 + containers: + - image: file-transformer/upload-function:latest + ports: + - containerPort: 5000 + env: + - name: MINIO_ENDPOINT + value: "minio.file-transformer.svc.cluster.local:9000" + - name: MINIO_ACCESS_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: accesskey + - name: MINIO_SECRET_KEY + valueFrom: + secretKeyRef: + name: minio-secret + key: secretkey + - name: MINIO_BUCKET_NAME + valueFrom: + configMapKeyRef: + name: minio-config + key: MINIO_BUCKET_NAME + - name: POSTGRES_URL + value: "postgresql://file_user:$(POSTGRES_PASSWORD)@postgres.file-transformer.svc.cluster.local:5432/file_transformer" + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: postgres-password + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + livenessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: 5000 + initialDelaySeconds: 5 + periodSeconds: 5 \ No newline at end of file diff --git a/k8s/minio.yaml b/k8s/minio.yaml new file mode 100644 index 0000000..7c9e6fa --- /dev/null +++ b/k8s/minio.yaml @@ -0,0 +1,112 @@ +apiVersion: v1 +kind: Secret +metadata: + name: minio-secret + namespace: file-transformer +type: Opaque +data: + accesskey: bWluaW9hZG1pbg== # minioadmin + secretkey: bWluaW9hZG1pbjEyMw== # minioadmin123 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: minio-config + namespace: file-transformer +data: + MINIO_BUCKET_NAME: file-transformer-bucket +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: minio + namespace: file-transformer + labels: + app: minio +spec: + replicas: 1 + selector: + matchLabels: + app: minio + template: + metadata: + labels: + app: minio + spec: + containers: + - name: minio + image: minio/minio:latest + ports: + - containerPort: 9000 + name: api + - containerPort: 9001 + name: console + env: + - name: MINIO_ROOT_USER + valueFrom: + secretKeyRef: + name: minio-secret + key: accesskey + - name: MINIO_ROOT_PASSWORD + valueFrom: + secretKeyRef: + name: minio-secret + key: secretkey + - name: MINIO_BUCKET_NAME + valueFrom: + configMapKeyRef: + name: minio-config + key: MINIO_BUCKET_NAME + command: + - /bin/bash + - -c + - | + minio server /data --console-address ":9001" & + sleep 10 + mc alias set local http://localhost:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD + mc mb local/$MINIO_BUCKET_NAME --ignore-existing + mc policy set public local/$MINIO_BUCKET_NAME + wait + volumeMounts: + - name: minio-storage + mountPath: /data + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: minio-storage + persistentVolumeClaim: + claimName: minio-pvc +--- +apiVersion: v1 +kind: Service +metadata: + name: minio + namespace: file-transformer +spec: + selector: + app: minio + ports: + - name: api + port: 9000 + targetPort: 9000 + - name: console + port: 9001 + targetPort: 9001 + type: ClusterIP +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: minio-pvc + namespace: file-transformer +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 20Gi \ No newline at end of file diff --git a/k8s/namespace.yaml b/k8s/namespace.yaml new file mode 100644 index 0000000..5572843 --- /dev/null +++ b/k8s/namespace.yaml @@ -0,0 +1,7 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: file-transformer + labels: + name: file-transformer + app: file-transformer-s3 \ No newline at end of file diff --git a/k8s/postgres.yaml b/k8s/postgres.yaml new file mode 100644 index 0000000..979d667 --- /dev/null +++ b/k8s/postgres.yaml @@ -0,0 +1,100 @@ +apiVersion: v1 +kind: Secret +metadata: + name: postgres-secret + namespace: file-transformer +type: Opaque +data: + postgres-password: c2VjdXJlX3Bhc3N3b3JkXzEyMw== # secure_password_123 +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: postgres-config + namespace: file-transformer +data: + POSTGRES_DB: file_transformer + POSTGRES_USER: file_user +--- +apiVersion: apps/v1 +kind: Deployment +metadata: + name: postgres + namespace: file-transformer + labels: + app: postgres +spec: + replicas: 1 + selector: + matchLabels: + app: postgres + template: + metadata: + labels: + app: postgres + spec: + containers: + - name: postgres + image: postgres:15-alpine + ports: + - containerPort: 5432 + env: + - name: POSTGRES_DB + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_DB + - name: POSTGRES_USER + valueFrom: + configMapKeyRef: + name: postgres-config + key: POSTGRES_USER + - name: POSTGRES_PASSWORD + valueFrom: + secretKeyRef: + name: postgres-secret + key: postgres-password + volumeMounts: + - name: postgres-storage + mountPath: /var/lib/postgresql/data + - name: postgres-init + mountPath: /docker-entrypoint-initdb.d + resources: + requests: + memory: "256Mi" + cpu: "250m" + limits: + memory: "512Mi" + cpu: "500m" + volumes: + - name: postgres-storage + persistentVolumeClaim: + claimName: postgres-pvc + - name: postgres-init + configMap: + name: postgres-init-script +--- +apiVersion: v1 +kind: Service +metadata: + name: postgres + namespace: file-transformer +spec: + selector: + app: postgres + ports: + - port: 5432 + targetPort: 5432 + type: ClusterIP +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: postgres-pvc + namespace: file-transformer +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 10Gi \ No newline at end of file diff --git a/setup.sh b/setup.sh new file mode 100755 index 0000000..a97c1dc --- /dev/null +++ b/setup.sh @@ -0,0 +1,114 @@ +#!/bin/bash + +# File Transformer S3 Setup Script +# This script helps set up the development environment + +set -e + +echo "🚀 Setting up File Transformer S3..." + +# Check if running on Arch Linux +if command -v pacman &> /dev/null; then + echo "📦 Detected Arch Linux - installing dependencies with pacman..." + + # Check if Node.js is installed + if ! command -v node &> /dev/null; then + echo "Installing Node.js and npm..." + sudo pacman -S --noconfirm nodejs npm + else + echo "✅ Node.js already installed" + fi + + # Check if Python3 is installed + if ! command -v python3 &> /dev/null; then + echo "Installing Python3..." + sudo pacman -S --noconfirm python python-pip + else + echo "✅ Python3 already installed" + fi + + # Check if Docker is installed + if ! command -v docker &> /dev/null; then + echo "Installing Docker..." + sudo pacman -S --noconfirm docker docker-compose + sudo systemctl enable docker + sudo systemctl start docker + sudo usermod -aG docker $USER + echo "⚠️ Please log out and back in for Docker group changes to take effect" + else + echo "✅ Docker already installed" + fi + +# Check if running on Ubuntu/Debian +elif command -v apt &> /dev/null; then + echo "📦 Detected Ubuntu/Debian - installing dependencies with apt..." + + # Check if Node.js is installed + if ! command -v node &> /dev/null; then + echo "Installing Node.js and npm..." + curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash - + sudo apt-get install -y nodejs + else + echo "✅ Node.js already installed" + fi + + # Check if Python3 is installed + if ! command -v python3 &> /dev/null; then + echo "Installing Python3..." + sudo apt-get update + sudo apt-get install -y python3 python3-pip + else + echo "✅ Python3 already installed" + fi + + # Check if Docker is installed + if ! command -v docker &> /dev/null; then + echo "Installing Docker..." + curl -fsSL https://get.docker.com -o get-docker.sh + sudo sh get-docker.sh + sudo usermod -aG docker $USER + rm get-docker.sh + echo "⚠️ Please log out and back in for Docker group changes to take effect" + else + echo "✅ Docker already installed" + fi + +else + echo "⚠️ Unsupported package manager. Please install manually:" + echo " - Node.js and npm" + echo " - Python3 and pip" + echo " - Docker and docker-compose" +fi + +# Create .env file if it doesn't exist +if [ ! -f .env ]; then + echo "📝 Creating .env file from template..." + cp env.example .env + echo "✅ Created .env file" + echo "⚠️ Please edit .env with your configuration values" +else + echo "✅ .env file already exists" +fi + +# Install Node.js dependencies +echo "📦 Installing Node.js dependencies..." +cd dashboard +npm install +cd .. + +# Install Python dependencies +echo "🐍 Installing Python dependencies..." +pip3 install -r functions/requirements.txt + +echo "✅ Setup complete!" +echo "" +echo "Next steps:" +echo "1. Edit .env file with your configuration" +echo "2. Run 'make deploy-local' to start the system" +echo "3. Or run 'make dev-dashboard' for development mode" +echo "" +echo "Available commands:" +echo " make help - Show all available commands" +echo " make deploy-local - Deploy with Docker Compose" +echo " make dev-dashboard - Start dashboard in dev mode" +echo " make build-functions - Build function containers" \ No newline at end of file