Initial commit: File Transformer S3 project with React dashboard and Knative functions

This commit is contained in:
greg
2025-07-04 08:01:46 -07:00
commit fd9abd0210
54 changed files with 5584 additions and 0 deletions

192
.gitignore vendored Normal file
View File

@@ -0,0 +1,192 @@
# Dependencies
node_modules/
npm-debug.log*
yarn-debug.log*
yarn-error.log*
__pycache__/
*.py[cod]
*$py.class
*.so
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# Environment variables
.env
.env.local
.env.development.local
.env.test.local
.env.production.local
# IDE
.vscode/
.idea/
*.swp
*.swo
*~
# OS
.DS_Store
.DS_Store?
._*
.Spotlight-V100
.Trashes
ehthumbs.db
Thumbs.db
# Logs
logs
*.log
npm-debug.log*
yarn-debug.log*
yarn-error.log*
lerna-debug.log*
# Runtime data
pids
*.pid
*.seed
*.pid.lock
# Coverage directory used by tools like istanbul
coverage/
*.lcov
# nyc test coverage
.nyc_output
# Dependency directories
jspm_packages/
# Optional npm cache directory
.npm
# Optional eslint cache
.eslintcache
# Microbundle cache
.rpt2_cache/
.rts2_cache_cjs/
.rts2_cache_es/
.rts2_cache_umd/
# Optional REPL history
.node_repl_history
# Output of 'npm pack'
*.tgz
# Yarn Integrity file
.yarn-integrity
# parcel-bundler cache (https://parceljs.org/)
.cache
.parcel-cache
# Next.js build output
.next
# Nuxt.js build / generate output
.nuxt
dist
# Gatsby files
.cache/
public
# Storybook build outputs
.out
.storybook-out
# Temporary folders
tmp/
temp/
# Docker
.dockerignore
# Kubernetes
*.kubeconfig
# Database
*.db
*.sqlite
*.sqlite3
# MinIO data
minio_data/
# PostgreSQL data
postgres_data/
# Build artifacts
build/
dist/
*.tar.gz
*.zip
# Test artifacts
.coverage
.pytest_cache/
htmlcov/
# Python virtual environments
venv/
env/
ENV/
env.bak/
venv.bak/
# Jupyter Notebook
.ipynb_checkpoints
# pyenv
.python-version
# pipenv
Pipfile.lock
# PEP 582
__pypackages__/
# Celery
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# Local development
.local/

144
Makefile Normal file
View File

@@ -0,0 +1,144 @@
.PHONY: help setup install-deps build-dashboard build-functions deploy-local deploy-knative clean logs status
# Default target
help:
@echo "File Transformer S3 - Available Commands:"
@echo ""
@echo "Setup & Installation:"
@echo " setup-full - Full setup with dependency installation (recommended)"
@echo " setup - Initial setup (creates .env, installs dependencies)"
@echo " install-deps - Install all dependencies"
@echo " build-dashboard - Build React dashboard"
@echo " build-functions - Build Knative functions"
@echo ""
@echo "Deployment:"
@echo " deploy-local - Deploy locally with Docker Compose"
@echo " deploy-knative - Deploy to Knative cluster"
@echo " deploy-all - Deploy everything"
@echo ""
@echo "Management:"
@echo " logs - View logs from all services"
@echo " status - Check status of all services"
@echo " clean - Clean up all resources"
@echo " reset-db - Reset PostgreSQL database"
@echo ""
@echo "Development:"
@echo " dev-dashboard - Start dashboard in development mode"
@echo " dev-functions - Start functions in development mode"
# Environment setup
setup:
@echo "Setting up File Transformer S3..."
@if [ ! -f .env ]; then \
cp .env.example .env; \
echo "Created .env from .env.example"; \
else \
echo ".env already exists"; \
fi
@echo "Please edit .env with your configuration values"
@make install-deps
# Full setup with dependency installation
setup-full:
@echo "Running full setup with dependency installation..."
@./setup.sh
# Install dependencies
install-deps:
@echo "Installing dependencies..."
@if command -v npm &> /dev/null; then \
cd dashboard && npm install; \
else \
echo "⚠️ npm not found. Please install Node.js and npm first."; \
echo " Run: ./setup.sh"; \
exit 1; \
fi
@if command -v pip3 &> /dev/null; then \
pip3 install -r functions/requirements.txt; \
else \
echo "⚠️ pip3 not found. Please install Python3 and pip first."; \
echo " Run: ./setup.sh"; \
exit 1; \
fi
@echo "Dependencies installed successfully"
# Build dashboard
build-dashboard:
@echo "Building React dashboard..."
@if command -v npm &> /dev/null; then \
cd dashboard && npm run build; \
else \
echo "⚠️ npm not found. Please install Node.js and npm first."; \
echo " Run: ./setup.sh"; \
exit 1; \
fi
@echo "Dashboard built successfully"
# Build functions
build-functions:
@echo "Building Knative functions..."
@cd functions && make build
@echo "Functions built successfully"
# Deploy locally
deploy-local:
@echo "Deploying locally with Docker Compose..."
@docker-compose up -d
@echo "Local deployment complete"
@echo "Dashboard: http://localhost:$(shell grep REACT_APP_PORT .env | cut -d '=' -f2)"
@echo "MinIO Console: http://localhost:$(shell grep MINIO_CONSOLE_PORT .env | cut -d '=' -f2)"
# Deploy to Knative
deploy-knative:
@echo "Deploying to Knative cluster..."
@kubectl apply -f k8s/namespace.yaml
@kubectl apply -f k8s/postgres.yaml
@kubectl apply -f k8s/minio.yaml
@kubectl apply -f k8s/functions/
@kubectl apply -f k8s/dashboard.yaml
@echo "Knative deployment complete"
# Deploy everything
deploy-all: build-dashboard build-functions deploy-knative
# View logs
logs:
@echo "Viewing logs from all services..."
@docker-compose logs -f
# Check status
status:
@echo "Checking service status..."
@docker-compose ps
@echo ""
@echo "Dashboard: http://localhost:$(shell grep REACT_APP_PORT .env | cut -d '=' -f2)"
@echo "MinIO Console: http://localhost:$(shell grep MINIO_CONSOLE_PORT .env | cut -d '=' -f2)"
# Clean up
clean:
@echo "Cleaning up resources..."
@docker-compose down -v
@docker system prune -f
@echo "Cleanup complete"
# Reset database
reset-db:
@echo "Resetting PostgreSQL database..."
@docker-compose exec postgres psql -U $(shell grep POSTGRES_USER .env | cut -d '=' -f2) -d $(shell grep POSTGRES_DB .env | cut -d '=' -f2) -c "DROP SCHEMA public CASCADE; CREATE SCHEMA public;"
@docker-compose exec postgres psql -U $(shell grep POSTGRES_USER .env | cut -d '=' -f2) -d $(shell grep POSTGRES_DB .env | cut -d '=' -f2) -f /docker-entrypoint-initdb.d/init.sql
@echo "Database reset complete"
# Development mode
dev-dashboard:
@echo "Starting dashboard in development mode..."
@if command -v npm &> /dev/null; then \
cd dashboard && npm start; \
else \
echo "⚠️ npm not found. Please install Node.js and npm first."; \
echo " Run: ./setup.sh"; \
exit 1; \
fi
dev-functions:
@echo "Starting functions in development mode..."
@cd functions && python -m flask run --host=0.0.0.0 --port=5000

264
README.md Normal file
View File

@@ -0,0 +1,264 @@
# File Transformer S3
A comprehensive file transformation system with a React dashboard, Knative functions, PostgreSQL database, and MinIO S3-compatible storage.
## 🚀 Features
- **Modern React Dashboard**: Beautiful, responsive UI for managing files and transformations
- **Knative Functions**: Serverless Python functions for file processing
- **PostgreSQL Database**: Robust data storage with comprehensive schema
- **MinIO Storage**: S3-compatible object storage
- **Environment-Driven**: Fully configurable via environment variables
- **Docker & Kubernetes**: Complete containerization and orchestration
- **Automated Setup**: Makefile for easy deployment and management
## 🏗️ Architecture
```
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ React │ │ Knative │ │ PostgreSQL │
│ Dashboard │◄──►│ Functions │◄──►│ Database │
└─────────────────┘ └─────────────────┘ └─────────────────┘
│ │ │
│ │ │
▼ ▼ ▼
┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
│ MinIO │ │ API Gateway │ │ File Storage │
│ Console │ │ (Optional) │ │ & Processing │
└─────────────────┘ └─────────────────┘ └─────────────────┘
```
## 📋 Prerequisites
- Docker and Docker Compose
- Kubernetes cluster with Knative installed
- kubectl configured
- Node.js 18+ (for development)
- Python 3.11+ (for development)
## 🛠️ Quick Start
### 1. Clone and Setup
```bash
git clone <repository-url>
cd file-transformer-s3
make setup
```
### 2. Configure Environment
Edit the `.env` file with your configuration:
```bash
cp env.example .env
# Edit .env with your settings
```
### 3. Local Development
```bash
# Start all services locally
make deploy-local
# Or start individual components
make dev-dashboard
make dev-functions
```
### 4. Kubernetes Deployment
```bash
# Deploy to Knative cluster
make deploy-knative
# Check status
make status
```
## 📁 Project Structure
```
file-transformer-s3/
├── dashboard/ # React frontend
│ ├── src/
│ │ ├── components/ # React components
│ │ ├── pages/ # Page components
│ │ ├── services/ # API services
│ │ └── contexts/ # React contexts
│ ├── Dockerfile
│ └── package.json
├── functions/ # Knative functions
│ ├── upload/ # File upload function
│ ├── transform/ # File transformation function
│ ├── download/ # File download function
│ ├── metadata/ # File metadata function
│ └── requirements.txt
├── k8s/ # Kubernetes manifests
│ ├── namespace.yaml
│ ├── postgres.yaml
│ ├── minio.yaml
│ ├── dashboard.yaml
│ └── functions/
├── database/ # Database scripts
│ └── init.sql
├── docker-compose.yml # Local development
├── Makefile # Automation scripts
├── env.example # Environment template
└── README.md
```
## 🔧 Configuration
### Environment Variables
Key configuration options in `.env`:
```bash
# Application
APP_NAME=file-transformer-s3
APP_ENV=development
# Dashboard
REACT_APP_PORT=3000
REACT_APP_API_BASE_URL=http://localhost:8080
# PostgreSQL
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=file_transformer
POSTGRES_USER=file_user
POSTGRES_PASSWORD=secure_password_123
# MinIO
MINIO_ENDPOINT=localhost:9000
MINIO_ACCESS_KEY=minioadmin
MINIO_SECRET_KEY=minioadmin123
MINIO_BUCKET_NAME=file-transformer-bucket
# Knative Functions
KNATIVE_NAMESPACE=file-transformer
FUNCTION_UPLOAD_ENDPOINT=http://file-upload-service.file-transformer.svc.cluster.local
FUNCTION_TRANSFORM_ENDPOINT=http://file-transform-service.file-transformer.svc.cluster.local
```
## 🎯 Available Commands
### Setup & Installation
```bash
make setup # Initial setup
make install-deps # Install dependencies
make build-dashboard # Build React dashboard
make build-functions # Build Knative functions
```
### Deployment
```bash
make deploy-local # Deploy locally with Docker Compose
make deploy-knative # Deploy to Knative cluster
make deploy-all # Deploy everything
```
### Management
```bash
make logs # View logs from all services
make status # Check status of all services
make clean # Clean up all resources
make reset-db # Reset PostgreSQL database
```
### Development
```bash
make dev-dashboard # Start dashboard in development mode
make dev-functions # Start functions in development mode
```
## 📊 Dashboard Features
- **File Management**: Upload, download, delete, and view files
- **Transformation Pipeline**: Convert files between formats
- **Real-time Monitoring**: Live status updates and progress tracking
- **Analytics**: File type distribution and storage usage
- **Bucket Management**: MinIO bucket operations
- **User Management**: Authentication and authorization
## 🔄 File Transformations
Supported transformations:
- **Text Extraction**: Extract text from PDF and DOCX files
- **Format Conversion**: CSV/Excel to JSON, image format conversion
- **Image Processing**: Resize, compress, and convert images
- **Document Processing**: PDF manipulation and text extraction
## 🗄️ Database Schema
Key tables:
- `files`: File metadata and storage information
- `transformations`: Transformation job tracking
- `buckets`: MinIO bucket management
- `users`: User authentication and authorization
- `sessions`: User session management
- `file_access_logs`: Audit trail for file operations
## 🔒 Security
- JWT-based authentication
- Role-based access control
- Secure file upload validation
- Audit logging for all operations
- Environment variable configuration
- Non-root container execution
## 📈 Monitoring & Logging
- Structured logging with structlog
- Health check endpoints
- Prometheus metrics (planned)
- Real-time dashboard updates
- Error tracking and reporting
## 🚀 Production Deployment
### Prerequisites
- Kubernetes cluster with Knative
- Ingress controller (nginx-ingress)
- Persistent volume provisioner
- Container registry access
### Deployment Steps
1. Build and push container images
2. Apply Kubernetes manifests
3. Configure ingress and DNS
4. Set up monitoring and logging
5. Configure backups and disaster recovery
## 🤝 Contributing
1. Fork the repository
2. Create a feature branch
3. Make your changes
4. Add tests
5. Submit a pull request
## 📄 License
This project is licensed under the MIT License - see the LICENSE file for details.
## 🆘 Support
For support and questions:
- Create an issue in the repository
- Check the documentation
- Review the troubleshooting guide
## 🔮 Roadmap
- [ ] Advanced file transformations
- [ ] Batch processing capabilities
- [ ] Webhook integrations
- [ ] Advanced analytics
- [ ] Multi-tenant support
- [ ] API rate limiting
- [ ] Advanced security features

227
TROUBLESHOOTING.md Normal file
View File

@@ -0,0 +1,227 @@
# Troubleshooting Guide
## Common Issues and Solutions
### 1. Node.js/npm not found
**Error:** `bash: npm: command not found`
**Solution:**
```bash
# On Arch Linux
sudo pacman -S nodejs npm
# On Ubuntu/Debian
curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash -
sudo apt-get install -y nodejs
# Or run the setup script
./setup.sh
```
### 2. Python/pip not found
**Error:** `bash: pip: command not found`
**Solution:**
```bash
# On Arch Linux
sudo pacman -S python python-pip
# On Ubuntu/Debian
sudo apt-get update
sudo apt-get install -y python3 python3-pip
# Or run the setup script
./setup.sh
```
### 3. Docker not found or permission denied
**Error:** `docker: command not found` or `Got permission denied while trying to connect to the Docker daemon`
**Solution:**
```bash
# Install Docker
# On Arch Linux
sudo pacman -S docker docker-compose
# On Ubuntu/Debian
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
# Add user to docker group
sudo usermod -aG docker $USER
# Start Docker service
sudo systemctl enable docker
sudo systemctl start docker
# Log out and back in for group changes to take effect
```
### 4. Port already in use
**Error:** `Error starting userland proxy: listen tcp 0.0.0.0:3000: bind: address already in use`
**Solution:**
```bash
# Check what's using the port
sudo lsof -i :3000
# Kill the process or change the port in .env file
# Edit .env and change REACT_APP_PORT to another value like 3001
```
### 5. Database connection failed
**Error:** `could not connect to server: Connection refused`
**Solution:**
```bash
# Check if PostgreSQL is running
docker-compose ps
# Start the services
docker-compose up -d
# Check logs
docker-compose logs postgres
```
### 6. MinIO connection failed
**Error:** `MinIO connection error`
**Solution:**
```bash
# Check if MinIO is running
docker-compose ps
# Start the services
docker-compose up -d
# Check MinIO logs
docker-compose logs minio
# Access MinIO console at http://localhost:9001
```
### 7. Build failures
**Error:** `npm ERR!` or `pip install` failures
**Solution:**
```bash
# Clear npm cache
npm cache clean --force
# Clear pip cache
pip cache purge
# Reinstall dependencies
make clean
make install-deps
```
### 8. Permission issues
**Error:** `Permission denied` when running scripts
**Solution:**
```bash
# Make scripts executable
chmod +x setup.sh
chmod +x *.sh
# Check file permissions
ls -la
```
### 9. Environment variables not loaded
**Error:** `Environment variable not found`
**Solution:**
```bash
# Check if .env file exists
ls -la .env
# Create .env from template
cp env.example .env
# Edit .env with your values
nano .env
```
### 10. React app not starting
**Error:** `Module not found` or React compilation errors
**Solution:**
```bash
# Clear node_modules and reinstall
cd dashboard
rm -rf node_modules package-lock.json
npm install
# Check for missing dependencies
npm list --depth=0
```
## Quick Fix Commands
### Reset everything and start fresh:
```bash
# Stop all services
docker-compose down -v
# Clean up
make clean
# Full setup
make setup-full
# Start services
make deploy-local
```
### Check system status:
```bash
# Check all services
make status
# Check logs
make logs
# Check dependencies
which node npm python3 docker
```
### Development mode:
```bash
# Start dashboard in dev mode
make dev-dashboard
# Start functions in dev mode
make dev-functions
```
## Getting Help
If you're still experiencing issues:
1. Check the logs: `make logs`
2. Verify your environment: `make status`
3. Check the documentation in `README.md`
4. Ensure all dependencies are installed: `./setup.sh`
## System Requirements
- **OS:** Linux (Arch, Ubuntu, Debian supported)
- **Node.js:** 16.x or higher
- **Python:** 3.8 or higher
- **Docker:** 20.x or higher
- **Docker Compose:** 2.x or higher
- **Memory:** At least 4GB RAM
- **Disk:** At least 10GB free space

30
api-gateway/Dockerfile Normal file
View File

@@ -0,0 +1,30 @@
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app.py .
# Create non-root user
RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app
USER app
# Expose port
EXPOSE 8080
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:8080/health || exit 1
# Run the application
CMD ["gunicorn", "--bind", "0.0.0.0:8080", "--workers", "4", "--timeout", "300", "app:app"]

165
api-gateway/app.py Normal file
View File

@@ -0,0 +1,165 @@
import os
import requests
from flask import Flask, request, jsonify
from flask_cors import CORS
import logging
app = Flask(__name__)
CORS(app)
# Configure logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)
# Function service URLs
UPLOAD_SERVICE_URL = os.getenv('UPLOAD_SERVICE_URL', 'http://function-upload:5000')
TRANSFORM_SERVICE_URL = os.getenv('TRANSFORM_SERVICE_URL', 'http://function-transform:5000')
DOWNLOAD_SERVICE_URL = os.getenv('DOWNLOAD_SERVICE_URL', 'http://function-download:5000')
METADATA_SERVICE_URL = os.getenv('METADATA_SERVICE_URL', 'http://function-metadata:5000')
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'service': 'api-gateway'})
@app.route('/files/upload', methods=['POST'])
def upload_file():
"""Route file upload requests to upload service."""
try:
response = requests.post(
f"{UPLOAD_SERVICE_URL}/upload",
files=request.files,
data=request.form,
timeout=300
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Upload service error: {e}")
return jsonify({'error': 'Upload service unavailable'}), 503
@app.route('/files/<file_id>/transform', methods=['POST'])
def transform_file(file_id):
"""Route transformation requests to transform service."""
try:
response = requests.post(
f"{TRANSFORM_SERVICE_URL}/transform/{file_id}",
json=request.get_json(),
timeout=300
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Transform service error: {e}")
return jsonify({'error': 'Transform service unavailable'}), 503
@app.route('/files/<file_id>/download', methods=['GET'])
def download_file(file_id):
"""Route download requests to download service."""
try:
response = requests.get(
f"{DOWNLOAD_SERVICE_URL}/download/{file_id}",
timeout=300
)
return response.content, response.status_code, response.headers.items()
except requests.exceptions.RequestException as e:
logger.error(f"Download service error: {e}")
return jsonify({'error': 'Download service unavailable'}), 503
@app.route('/files/<file_id>/metadata', methods=['GET'])
def get_file_metadata(file_id):
"""Route metadata requests to metadata service."""
try:
response = requests.get(
f"{METADATA_SERVICE_URL}/files/{file_id}/metadata",
timeout=30
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Metadata service error: {e}")
return jsonify({'error': 'Metadata service unavailable'}), 503
@app.route('/files/<file_id>/metadata', methods=['PUT'])
def update_file_metadata(file_id):
"""Route metadata update requests to metadata service."""
try:
response = requests.put(
f"{METADATA_SERVICE_URL}/files/{file_id}/metadata",
json=request.get_json(),
timeout=30
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Metadata service error: {e}")
return jsonify({'error': 'Metadata service unavailable'}), 503
@app.route('/files', methods=['GET'])
def get_files():
"""Route file listing requests to metadata service."""
try:
response = requests.get(
f"{METADATA_SERVICE_URL}/files",
params=request.args,
timeout=30
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Metadata service error: {e}")
return jsonify({'error': 'Metadata service unavailable'}), 503
@app.route('/transformations', methods=['GET'])
def get_transformations():
"""Route transformation listing requests to metadata service."""
try:
response = requests.get(
f"{METADATA_SERVICE_URL}/transformations",
params=request.args,
timeout=30
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Metadata service error: {e}")
return jsonify({'error': 'Metadata service unavailable'}), 503
@app.route('/dashboard/stats', methods=['GET'])
def get_dashboard_stats():
"""Route dashboard stats requests to metadata service."""
try:
response = requests.get(
f"{METADATA_SERVICE_URL}/stats",
timeout=30
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Metadata service error: {e}")
return jsonify({'error': 'Metadata service unavailable'}), 503
@app.route('/buckets', methods=['GET'])
def get_buckets():
"""Route bucket requests to metadata service."""
try:
response = requests.get(
f"{METADATA_SERVICE_URL}/buckets",
timeout=30
)
return jsonify(response.json()), response.status_code
except requests.exceptions.RequestException as e:
logger.error(f"Metadata service error: {e}")
return jsonify({'error': 'Metadata service unavailable'}), 503
# Auth endpoints (placeholder for now)
@app.route('/auth/login', methods=['POST'])
def login():
"""Placeholder login endpoint."""
return jsonify({'token': 'dummy-token', 'user': {'id': 1, 'username': 'admin'}}), 200
@app.route('/auth/logout', methods=['POST'])
def logout():
"""Placeholder logout endpoint."""
return jsonify({'message': 'Logged out successfully'}), 200
@app.route('/auth/profile', methods=['GET'])
def get_profile():
"""Placeholder profile endpoint."""
return jsonify({'id': 1, 'username': 'admin', 'email': 'admin@example.com'}), 200
if __name__ == '__main__':
app.run(host='0.0.0.0', port=8080, debug=False)

View File

@@ -0,0 +1,11 @@
# Core dependencies
flask==2.3.3
flask-cors==4.0.0
gunicorn==21.2.0
python-dotenv==1.0.0
# HTTP requests
requests==2.31.0
# Logging
structlog==23.1.0

35
dashboard/Dockerfile Normal file
View File

@@ -0,0 +1,35 @@
# Build stage
FROM node:18-alpine as build
WORKDIR /app
# Copy package files
COPY package*.json ./
# Install dependencies (use npm install instead of npm ci for better compatibility)
RUN npm install --only=production --no-optional
# Copy source code
COPY . .
# Build the application
RUN npm run build
# Production stage
FROM nginx:alpine
# Copy built application
COPY --from=build /app/build /usr/share/nginx/html
# Copy nginx configuration
COPY nginx.conf /etc/nginx/nginx.conf
# Expose port
EXPOSE 3000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:3000 || exit 1
# Start nginx
CMD ["nginx", "-g", "daemon off;"]

72
dashboard/nginx.conf Normal file
View File

@@ -0,0 +1,72 @@
events {
worker_connections 1024;
}
http {
include /etc/nginx/mime.types;
default_type application/octet-stream;
# Logging
log_format main '$remote_addr - $remote_user [$time_local] "$request" '
'$status $body_bytes_sent "$http_referer" '
'"$http_user_agent" "$http_x_forwarded_for"';
access_log /var/log/nginx/access.log main;
error_log /var/log/nginx/error.log warn;
# Gzip compression
gzip on;
gzip_vary on;
gzip_min_length 1024;
gzip_proxied any;
gzip_comp_level 6;
gzip_types
text/plain
text/css
text/xml
text/javascript
application/json
application/javascript
application/xml+rss
application/atom+xml
image/svg+xml;
server {
listen 3000;
server_name localhost;
root /usr/share/nginx/html;
index index.html;
# Security headers
add_header X-Frame-Options "SAMEORIGIN" always;
add_header X-XSS-Protection "1; mode=block" always;
add_header X-Content-Type-Options "nosniff" always;
add_header Referrer-Policy "no-referrer-when-downgrade" always;
add_header Content-Security-Policy "default-src 'self' http: https: data: blob: 'unsafe-inline'" always;
# Handle React Router
location / {
try_files $uri $uri/ /index.html;
}
# Cache static assets
location ~* \.(js|css|png|jpg|jpeg|gif|ico|svg)$ {
expires 1y;
add_header Cache-Control "public, immutable";
}
# Health check endpoint
location /health {
access_log off;
return 200 "healthy\n";
add_header Content-Type text/plain;
}
# Error pages
error_page 404 /index.html;
error_page 500 502 503 504 /50x.html;
location = /50x.html {
root /usr/share/nginx/html;
}
}
}

62
dashboard/package.json Normal file
View File

@@ -0,0 +1,62 @@
{
"name": "file-transformer-dashboard",
"version": "1.0.0",
"description": "React dashboard for File Transformer S3",
"private": true,
"dependencies": {
"@testing-library/jest-dom": "^5.16.4",
"@testing-library/react": "^13.3.0",
"@testing-library/user-event": "^13.5.0",
"react": "^18.2.0",
"react-dom": "^18.2.0",
"react-scripts": "5.0.1",
"react-router-dom": "^6.3.0",
"axios": "^1.4.0",
"react-dropzone": "^14.2.3",
"react-query": "^3.39.3",
"react-hot-toast": "^2.4.0",
"lucide-react": "^0.263.1",
"clsx": "^1.2.1",
"tailwindcss": "^3.3.2",
"autoprefixer": "^10.4.14",
"postcss": "^8.4.24",
"@headlessui/react": "^1.7.15",
"@heroicons/react": "^2.0.18",
"recharts": "^2.7.2",
"date-fns": "^2.30.0",
"react-hook-form": "^7.45.1",
"react-select": "^5.7.3",
"react-table": "^7.8.0",
"framer-motion": "^10.12.16"
},
"scripts": {
"start": "react-scripts start",
"build": "react-scripts build",
"test": "react-scripts test",
"eject": "react-scripts eject"
},
"eslintConfig": {
"extends": [
"react-app",
"react-app/jest"
]
},
"browserslist": {
"production": [
">0.2%",
"not dead",
"not op_mini all"
],
"development": [
"last 1 chrome version",
"last 1 firefox version",
"last 1 safari version"
]
},
"devDependencies": {
"@types/react": "^18.2.15",
"@types/react-dom": "^18.2.7",
"typescript": "^4.9.5"
},
"proxy": "http://localhost:8080"
}

115
dashboard/src/App.css Normal file
View File

@@ -0,0 +1,115 @@
@tailwind base;
@tailwind components;
@tailwind utilities;
@layer base {
html {
font-family: 'Inter', system-ui, sans-serif;
}
}
@layer components {
.btn-primary {
@apply bg-blue-600 hover:bg-blue-700 text-white font-medium py-2 px-4 rounded-lg transition-colors duration-200;
}
.btn-secondary {
@apply bg-gray-200 hover:bg-gray-300 text-gray-800 font-medium py-2 px-4 rounded-lg transition-colors duration-200;
}
.btn-danger {
@apply bg-red-600 hover:bg-red-700 text-white font-medium py-2 px-4 rounded-lg transition-colors duration-200;
}
.card {
@apply bg-white rounded-lg shadow-sm border border-gray-200 p-6;
}
.input-field {
@apply w-full px-3 py-2 border border-gray-300 rounded-lg focus:outline-none focus:ring-2 focus:ring-blue-500 focus:border-transparent;
}
.table-header {
@apply px-6 py-3 text-left text-xs font-medium text-gray-500 uppercase tracking-wider;
}
.table-cell {
@apply px-6 py-4 whitespace-nowrap text-sm text-gray-900;
}
}
/* Custom scrollbar */
::-webkit-scrollbar {
width: 6px;
}
::-webkit-scrollbar-track {
background: #f1f1f1;
}
::-webkit-scrollbar-thumb {
background: #c1c1c1;
border-radius: 3px;
}
::-webkit-scrollbar-thumb:hover {
background: #a8a8a8;
}
/* Loading animation */
.loading-spinner {
border: 2px solid #f3f3f3;
border-top: 2px solid #3498db;
border-radius: 50%;
width: 20px;
height: 20px;
animation: spin 1s linear infinite;
}
@keyframes spin {
0% { transform: rotate(0deg); }
100% { transform: rotate(360deg); }
}
/* File upload dropzone */
.dropzone {
border: 2px dashed #d1d5db;
border-radius: 8px;
padding: 40px;
text-align: center;
transition: border-color 0.2s ease;
}
.dropzone:hover {
border-color: #3b82f6;
}
.dropzone.drag-active {
border-color: #3b82f6;
background-color: #eff6ff;
}
/* Status badges */
.status-badge {
@apply inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium;
}
.status-uploaded {
@apply bg-blue-100 text-blue-800;
}
.status-processing {
@apply bg-yellow-100 text-yellow-800;
}
.status-transformed {
@apply bg-green-100 text-green-800;
}
.status-error {
@apply bg-red-100 text-red-800;
}
.status-deleted {
@apply bg-gray-100 text-gray-800;
}

67
dashboard/src/App.js Normal file
View File

@@ -0,0 +1,67 @@
import React from 'react';
import { BrowserRouter as Router, Routes, Route } from 'react-router-dom';
import { QueryClient, QueryClientProvider } from 'react-query';
import { Toaster } from 'react-hot-toast';
import Layout from './components/Layout';
import Dashboard from './pages/Dashboard';
import Files from './pages/Files';
import Upload from './pages/Upload';
import Transformations from './pages/Transformations';
import Buckets from './pages/Buckets';
import Settings from './pages/Settings';
import Login from './pages/Login';
import { AuthProvider } from './contexts/AuthContext';
import ProtectedRoute from './components/ProtectedRoute';
import './App.css';
const queryClient = new QueryClient({
defaultOptions: {
queries: {
retry: 1,
refetchOnWindowFocus: false,
},
},
});
function App() {
return (
<QueryClientProvider client={queryClient}>
<AuthProvider>
<Router>
<div className="App">
<Toaster
position="top-right"
toastOptions={{
duration: 4000,
style: {
background: '#363636',
color: '#fff',
},
}}
/>
<Routes>
<Route path="/login" element={<Login />} />
<Route
path="/"
element={
<ProtectedRoute>
<Layout />
</ProtectedRoute>
}
>
<Route index element={<Dashboard />} />
<Route path="files" element={<Files />} />
<Route path="upload" element={<Upload />} />
<Route path="transformations" element={<Transformations />} />
<Route path="buckets" element={<Buckets />} />
<Route path="settings" element={<Settings />} />
</Route>
</Routes>
</div>
</Router>
</AuthProvider>
</QueryClientProvider>
);
}
export default App;

View File

@@ -0,0 +1,30 @@
import React from 'react';
import { BellIcon } from '@heroicons/react/24/outline';
const Header = () => {
return (
<header className="bg-white shadow-sm border-b border-gray-200">
<div className="flex items-center justify-between h-16 px-4 sm:px-6 lg:px-8">
<div className="flex items-center">
<h2 className="text-lg font-medium text-gray-900">File Transformer S3</h2>
</div>
<div className="flex items-center space-x-4">
<button
type="button"
className="p-2 text-gray-400 hover:text-gray-500 focus:outline-none focus:ring-2 focus:ring-blue-500"
>
<BellIcon className="h-6 w-6" />
</button>
<div className="flex items-center space-x-3">
<div className="h-8 w-8 rounded-full bg-blue-500 flex items-center justify-center">
<span className="text-sm font-medium text-white">A</span>
</div>
<span className="text-sm font-medium text-gray-700">Admin</span>
</div>
</div>
</div>
</header>
);
};
export default Header;

View File

@@ -0,0 +1,20 @@
import React from 'react';
import { Outlet } from 'react-router-dom';
import Sidebar from './Sidebar';
import Header from './Header';
const Layout = () => {
return (
<div className="flex h-screen bg-gray-50">
<Sidebar />
<div className="flex-1 flex flex-col overflow-hidden">
<Header />
<main className="flex-1 overflow-x-hidden overflow-y-auto bg-gray-50 p-6">
<Outlet />
</main>
</div>
</div>
);
};
export default Layout;

View File

@@ -0,0 +1,15 @@
import React from 'react';
import { Navigate } from 'react-router-dom';
const ProtectedRoute = ({ children }) => {
// For now, always allow access. In a real app, you'd check authentication here
const isAuthenticated = true; // Replace with actual auth check
if (!isAuthenticated) {
return <Navigate to="/login" replace />;
}
return children;
};
export default ProtectedRoute;

View File

@@ -0,0 +1,59 @@
import React from 'react';
import { NavLink } from 'react-router-dom';
import {
HomeIcon,
DocumentTextIcon,
CloudArrowUpIcon,
CogIcon,
FolderIcon,
ChartBarIcon
} from '@heroicons/react/24/outline';
const Sidebar = () => {
const navigation = [
{ name: 'Dashboard', href: '/', icon: HomeIcon },
{ name: 'Files', href: '/files', icon: DocumentTextIcon },
{ name: 'Upload', href: '/upload', icon: CloudArrowUpIcon },
{ name: 'Transformations', href: '/transformations', icon: CogIcon },
{ name: 'Buckets', href: '/buckets', icon: FolderIcon },
{ name: 'Analytics', href: '/analytics', icon: ChartBarIcon },
{ name: 'Settings', href: '/settings', icon: CogIcon },
];
return (
<div className="hidden md:flex md:flex-shrink-0">
<div className="flex flex-col w-64">
<div className="flex flex-col h-0 flex-1 bg-white border-r border-gray-200">
<div className="flex-1 flex flex-col pt-5 pb-4 overflow-y-auto">
<div className="flex items-center flex-shrink-0 px-4">
<h1 className="text-xl font-semibold text-gray-900">File Transformer</h1>
</div>
<nav className="mt-5 flex-1 px-2 space-y-1">
{navigation.map((item) => (
<NavLink
key={item.name}
to={item.href}
className={({ isActive }) =>
`group flex items-center px-2 py-2 text-sm font-medium rounded-md ${
isActive
? 'bg-blue-100 text-blue-900'
: 'text-gray-600 hover:bg-gray-50 hover:text-gray-900'
}`
}
>
<item.icon
className="mr-3 flex-shrink-0 h-6 w-6"
aria-hidden="true"
/>
{item.name}
</NavLink>
))}
</nav>
</div>
</div>
</div>
</div>
);
};
export default Sidebar;

View File

@@ -0,0 +1,66 @@
import React, { createContext, useContext, useState, useEffect } from 'react';
const AuthContext = createContext();
export const useAuth = () => {
const context = useContext(AuthContext);
if (!context) {
throw new Error('useAuth must be used within an AuthProvider');
}
return context;
};
export const AuthProvider = ({ children }) => {
const [user, setUser] = useState(null);
const [loading, setLoading] = useState(true);
useEffect(() => {
// Check for existing token
const token = localStorage.getItem('authToken');
if (token) {
// In a real app, validate the token with the backend
setUser({ id: '1', username: 'admin', role: 'admin' });
}
setLoading(false);
}, []);
const login = async (credentials) => {
try {
// In a real app, make API call to login
const response = await fetch('/api/auth/login', {
method: 'POST',
headers: { 'Content-Type': 'application/json' },
body: JSON.stringify(credentials),
});
if (response.ok) {
const data = await response.json();
localStorage.setItem('authToken', data.token);
setUser(data.user);
return { success: true };
} else {
return { success: false, error: 'Invalid credentials' };
}
} catch (error) {
return { success: false, error: 'Network error' };
}
};
const logout = () => {
localStorage.removeItem('authToken');
setUser(null);
};
const value = {
user,
login,
logout,
loading,
};
return (
<AuthContext.Provider value={value}>
{children}
</AuthContext.Provider>
);
};

20
dashboard/src/index.css Normal file
View File

@@ -0,0 +1,20 @@
@import url('https://fonts.googleapis.com/css2?family=Inter:wght@300;400;500;600;700&display=swap');
body {
margin: 0;
font-family: 'Inter', -apple-system, BlinkMacSystemFont, 'Segoe UI', 'Roboto', 'Oxygen',
'Ubuntu', 'Cantarell', 'Fira Sans', 'Droid Sans', 'Helvetica Neue',
sans-serif;
-webkit-font-smoothing: antialiased;
-moz-osx-font-smoothing: grayscale;
background-color: #f9fafb;
}
code {
font-family: source-code-pro, Menlo, Monaco, Consolas, 'Courier New',
monospace;
}
* {
box-sizing: border-box;
}

11
dashboard/src/index.js Normal file
View File

@@ -0,0 +1,11 @@
import React from 'react';
import ReactDOM from 'react-dom/client';
import './index.css';
import App from './App';
const root = ReactDOM.createRoot(document.getElementById('root'));
root.render(
<React.StrictMode>
<App />
</React.StrictMode>
);

View File

@@ -0,0 +1,94 @@
import React from 'react';
import { useQuery } from 'react-query';
import { FolderIcon, PlusIcon } from '@heroicons/react/24/outline';
import { bucketsAPI } from '../services/api';
const Buckets = () => {
const { data: buckets, isLoading, error } = useQuery('buckets', bucketsAPI.getBuckets);
if (isLoading) {
return (
<div className="flex items-center justify-center h-64">
<div className="loading-spinner"></div>
</div>
);
}
if (error) {
return (
<div className="text-center py-12">
<p className="text-red-600">Error loading buckets: {error.message}</p>
</div>
);
}
return (
<div className="space-y-6">
<div className="flex justify-between items-center">
<div>
<h1 className="text-2xl font-bold text-gray-900">Buckets</h1>
<p className="text-gray-600">Manage MinIO storage buckets</p>
</div>
<button className="btn-primary flex items-center">
<PlusIcon className="w-4 h-4 mr-2" />
Create Bucket
</button>
</div>
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-3 gap-6">
{buckets?.map((bucket) => (
<div key={bucket.name} className="card hover:shadow-md transition-shadow">
<div className="flex items-center justify-between">
<div className="flex items-center">
<FolderIcon className="w-8 h-8 text-blue-500 mr-3" />
<div>
<h3 className="text-lg font-medium text-gray-900">{bucket.name}</h3>
<p className="text-sm text-gray-500">{bucket.description || 'No description'}</p>
</div>
</div>
<span className={`inline-flex items-center px-2.5 py-0.5 rounded-full text-xs font-medium ${
bucket.is_active
? 'bg-green-100 text-green-800'
: 'bg-gray-100 text-gray-800'
}`}>
{bucket.is_active ? 'Active' : 'Inactive'}
</span>
</div>
<div className="mt-4 pt-4 border-t border-gray-200">
<div className="flex justify-between text-sm">
<span className="text-gray-600">Created</span>
<span className="text-gray-900">
{new Date(bucket.created_at).toLocaleDateString()}
</span>
</div>
</div>
<div className="mt-4 flex space-x-2">
<button className="btn-secondary text-sm">View Files</button>
<button className="btn-secondary text-sm">Settings</button>
</div>
</div>
))}
</div>
{buckets?.length === 0 && (
<div className="text-center py-12">
<FolderIcon className="mx-auto h-12 w-12 text-gray-400" />
<h3 className="mt-2 text-sm font-medium text-gray-900">No buckets</h3>
<p className="mt-1 text-sm text-gray-500">
Get started by creating a new bucket.
</p>
<div className="mt-6">
<button className="btn-primary">
<PlusIcon className="w-4 h-4 mr-2" />
Create Bucket
</button>
</div>
</div>
)}
</div>
);
};
export default Buckets;

View File

@@ -0,0 +1,223 @@
import React from 'react';
import { useQuery } from 'react-query';
import {
CloudArrowUpIcon,
DocumentTextIcon,
CogIcon,
ExclamationTriangleIcon,
ArrowUpIcon,
ArrowDownIcon
} from '@heroicons/react/24/outline';
import { BarChart, Bar, XAxis, YAxis, CartesianGrid, Tooltip, ResponsiveContainer, PieChart, Pie, Cell } from 'recharts';
import { format } from 'date-fns';
import { motion } from 'framer-motion';
import { getDashboardStats, getRecentFiles, getRecentTransformations } from '../services/api';
const COLORS = ['#3b82f6', '#10b981', '#f59e0b', '#ef4444'];
const Dashboard = () => {
const { data: stats, isLoading: statsLoading } = useQuery('dashboardStats', getDashboardStats);
const { data: recentFiles, isLoading: filesLoading } = useQuery('recentFiles', getRecentFiles);
const { data: recentTransformations, isLoading: transformationsLoading } = useQuery('recentTransformations', getRecentTransformations);
const fileTypeData = [
{ name: 'PDF', value: stats?.fileTypes?.pdf || 0 },
{ name: 'DOC', value: stats?.fileTypes?.doc || 0 },
{ name: 'TXT', value: stats?.fileTypes?.txt || 0 },
{ name: 'CSV', value: stats?.fileTypes?.csv || 0 },
];
const statusData = [
{ name: 'Uploaded', value: stats?.statusCounts?.uploaded || 0 },
{ name: 'Processing', value: stats?.statusCounts?.processing || 0 },
{ name: 'Transformed', value: stats?.statusCounts?.transformed || 0 },
{ name: 'Error', value: stats?.statusCounts?.error || 0 },
];
const StatCard = ({ title, value, icon: Icon, change, changeType = 'up' }) => (
<motion.div
initial={{ opacity: 0, y: 20 }}
animate={{ opacity: 1, y: 0 }}
transition={{ duration: 0.5 }}
className="card"
>
<div className="flex items-center justify-between">
<div>
<p className="text-sm font-medium text-gray-600">{title}</p>
<p className="text-2xl font-bold text-gray-900">{value}</p>
{change && (
<div className="flex items-center mt-2">
{changeType === 'up' ? (
<ArrowUpIcon className="w-4 h-4 text-green-500" />
) : (
<ArrowDownIcon className="w-4 h-4 text-red-500" />
)}
<span className={`text-sm font-medium ${
changeType === 'up' ? 'text-green-600' : 'text-red-600'
}`}>
{change}
</span>
</div>
)}
</div>
<div className="p-3 bg-blue-100 rounded-lg">
<Icon className="w-6 h-6 text-blue-600" />
</div>
</div>
</motion.div>
);
if (statsLoading) {
return (
<div className="flex items-center justify-center h-64">
<div className="loading-spinner"></div>
</div>
);
}
return (
<div className="space-y-6">
{/* Header */}
<div>
<h1 className="text-2xl font-bold text-gray-900">Dashboard</h1>
<p className="text-gray-600">Overview of your file transformation system</p>
</div>
{/* Stats Cards */}
<div className="grid grid-cols-1 md:grid-cols-2 lg:grid-cols-4 gap-6">
<StatCard
title="Total Files"
value={stats?.totalFiles || 0}
icon={DocumentTextIcon}
change="+12%"
changeType="up"
/>
<StatCard
title="Storage Used"
value={`${(stats?.totalSize / (1024 * 1024 * 1024)).toFixed(1)} GB`}
icon={CloudArrowUpIcon}
change="+8%"
changeType="up"
/>
<StatCard
title="Active Transformations"
value={stats?.activeTransformations || 0}
icon={CogIcon}
change="-3%"
changeType="down"
/>
<StatCard
title="Failed Jobs"
value={stats?.failedJobs || 0}
icon={ExclamationTriangleIcon}
change="+2%"
changeType="up"
/>
</div>
{/* Charts */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
{/* File Types Chart */}
<div className="card">
<h3 className="text-lg font-semibold text-gray-900 mb-4">File Types Distribution</h3>
<ResponsiveContainer width="100%" height={300}>
<PieChart>
<Pie
data={fileTypeData}
cx="50%"
cy="50%"
labelLine={false}
label={({ name, percent }) => `${name} ${(percent * 100).toFixed(0)}%`}
outerRadius={80}
fill="#8884d8"
dataKey="value"
>
{fileTypeData.map((entry, index) => (
<Cell key={`cell-${index}`} fill={COLORS[index % COLORS.length]} />
))}
</Pie>
<Tooltip />
</PieChart>
</ResponsiveContainer>
</div>
{/* Status Chart */}
<div className="card">
<h3 className="text-lg font-semibold text-gray-900 mb-4">File Status Overview</h3>
<ResponsiveContainer width="100%" height={300}>
<BarChart data={statusData}>
<CartesianGrid strokeDasharray="3 3" />
<XAxis dataKey="name" />
<YAxis />
<Tooltip />
<Bar dataKey="value" fill="#3b82f6" />
</BarChart>
</ResponsiveContainer>
</div>
</div>
{/* Recent Activity */}
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
{/* Recent Files */}
<div className="card">
<h3 className="text-lg font-semibold text-gray-900 mb-4">Recent Files</h3>
{filesLoading ? (
<div className="flex items-center justify-center h-32">
<div className="loading-spinner"></div>
</div>
) : (
<div className="space-y-3">
{recentFiles?.slice(0, 5).map((file) => (
<div key={file.id} className="flex items-center justify-between p-3 bg-gray-50 rounded-lg">
<div className="flex items-center space-x-3">
<DocumentTextIcon className="w-5 h-5 text-gray-400" />
<div>
<p className="text-sm font-medium text-gray-900">{file.filename}</p>
<p className="text-xs text-gray-500">
{format(new Date(file.created_at), 'MMM dd, yyyy HH:mm')}
</p>
</div>
</div>
<span className={`status-badge status-${file.status}`}>
{file.status}
</span>
</div>
))}
</div>
)}
</div>
{/* Recent Transformations */}
<div className="card">
<h3 className="text-lg font-semibold text-gray-900 mb-4">Recent Transformations</h3>
{transformationsLoading ? (
<div className="flex items-center justify-center h-32">
<div className="loading-spinner"></div>
</div>
) : (
<div className="space-y-3">
{recentTransformations?.slice(0, 5).map((transformation) => (
<div key={transformation.id} className="flex items-center justify-between p-3 bg-gray-50 rounded-lg">
<div className="flex items-center space-x-3">
<CogIcon className="w-5 h-5 text-gray-400" />
<div>
<p className="text-sm font-medium text-gray-900">{transformation.transformation_type}</p>
<p className="text-xs text-gray-500">
{format(new Date(transformation.created_at), 'MMM dd, yyyy HH:mm')}
</p>
</div>
</div>
<span className={`status-badge status-${transformation.status}`}>
{transformation.status}
</span>
</div>
))}
</div>
)}
</div>
</div>
</div>
);
};
export default Dashboard;

View File

@@ -0,0 +1,102 @@
import React from 'react';
import { useQuery } from 'react-query';
import { DocumentTextIcon, ArrowDownTrayIcon, TrashIcon } from '@heroicons/react/24/outline';
import { filesAPI } from '../services/api';
const Files = () => {
const { data: files, isLoading, error } = useQuery('files', filesAPI.getFiles);
if (isLoading) {
return (
<div className="flex items-center justify-center h-64">
<div className="loading-spinner"></div>
</div>
);
}
if (error) {
return (
<div className="text-center py-12">
<p className="text-red-600">Error loading files: {error.message}</p>
</div>
);
}
return (
<div className="space-y-6">
<div>
<h1 className="text-2xl font-bold text-gray-900">Files</h1>
<p className="text-gray-600">Manage your uploaded files</p>
</div>
<div className="bg-white shadow rounded-lg">
<div className="px-6 py-4 border-b border-gray-200">
<h3 className="text-lg font-medium text-gray-900">All Files</h3>
</div>
<div className="overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="table-header">File</th>
<th className="table-header">Size</th>
<th className="table-header">Type</th>
<th className="table-header">Status</th>
<th className="table-header">Uploaded</th>
<th className="table-header">Actions</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{files?.map((file) => (
<tr key={file.id} className="hover:bg-gray-50">
<td className="table-cell">
<div className="flex items-center">
<DocumentTextIcon className="w-5 h-5 text-gray-400 mr-3" />
<div>
<div className="text-sm font-medium text-gray-900">
{file.filename}
</div>
<div className="text-sm text-gray-500">
{file.original_filename}
</div>
</div>
</div>
</td>
<td className="table-cell">
<span className="text-sm text-gray-900">
{(file.file_size / 1024 / 1024).toFixed(2)} MB
</span>
</td>
<td className="table-cell">
<span className="text-sm text-gray-900">{file.file_type}</span>
</td>
<td className="table-cell">
<span className={`status-badge status-${file.status}`}>
{file.status}
</span>
</td>
<td className="table-cell">
<span className="text-sm text-gray-900">
{new Date(file.created_at).toLocaleDateString()}
</span>
</td>
<td className="table-cell">
<div className="flex space-x-2">
<button className="text-blue-600 hover:text-blue-900">
<ArrowDownTrayIcon className="w-4 h-4" />
</button>
<button className="text-red-600 hover:text-red-900">
<TrashIcon className="w-4 h-4" />
</button>
</div>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
</div>
);
};
export default Files;

View File

@@ -0,0 +1,111 @@
import React, { useState } from 'react';
import { useNavigate } from 'react-router-dom';
import { useAuth } from '../contexts/AuthContext';
import toast from 'react-hot-toast';
const Login = () => {
const [credentials, setCredentials] = useState({
username: '',
password: ''
});
const [loading, setLoading] = useState(false);
const { login } = useAuth();
const navigate = useNavigate();
const handleSubmit = async (e) => {
e.preventDefault();
setLoading(true);
try {
const result = await login(credentials);
if (result.success) {
toast.success('Login successful!');
navigate('/');
} else {
toast.error(result.error || 'Login failed');
}
} catch (error) {
toast.error('An error occurred during login');
} finally {
setLoading(false);
}
};
const handleChange = (e) => {
setCredentials({
...credentials,
[e.target.name]: e.target.value
});
};
return (
<div className="min-h-screen flex items-center justify-center bg-gray-50 py-12 px-4 sm:px-6 lg:px-8">
<div className="max-w-md w-full space-y-8">
<div>
<h2 className="mt-6 text-center text-3xl font-extrabold text-gray-900">
Sign in to File Transformer
</h2>
<p className="mt-2 text-center text-sm text-gray-600">
Access your file transformation dashboard
</p>
</div>
<form className="mt-8 space-y-6" onSubmit={handleSubmit}>
<div className="rounded-md shadow-sm -space-y-px">
<div>
<label htmlFor="username" className="sr-only">
Username
</label>
<input
id="username"
name="username"
type="text"
required
className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-t-md focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm"
placeholder="Username"
value={credentials.username}
onChange={handleChange}
/>
</div>
<div>
<label htmlFor="password" className="sr-only">
Password
</label>
<input
id="password"
name="password"
type="password"
required
className="appearance-none rounded-none relative block w-full px-3 py-2 border border-gray-300 placeholder-gray-500 text-gray-900 rounded-b-md focus:outline-none focus:ring-blue-500 focus:border-blue-500 focus:z-10 sm:text-sm"
placeholder="Password"
value={credentials.password}
onChange={handleChange}
/>
</div>
</div>
<div>
<button
type="submit"
disabled={loading}
className="group relative w-full flex justify-center py-2 px-4 border border-transparent text-sm font-medium rounded-md text-white bg-blue-600 hover:bg-blue-700 focus:outline-none focus:ring-2 focus:ring-offset-2 focus:ring-blue-500 disabled:opacity-50 disabled:cursor-not-allowed"
>
{loading ? (
<div className="loading-spinner"></div>
) : (
'Sign in'
)}
</button>
</div>
<div className="text-center">
<p className="text-sm text-gray-600">
Default credentials: admin / admin123
</p>
</div>
</form>
</div>
</div>
);
};
export default Login;

View File

@@ -0,0 +1,205 @@
import React from 'react';
import { CogIcon, ShieldCheckIcon, CircleStackIcon, CloudIcon } from '@heroicons/react/24/outline';
const Settings = () => {
return (
<div className="space-y-6">
<div>
<h1 className="text-2xl font-bold text-gray-900">Settings</h1>
<p className="text-gray-600">Configure your file transformation system</p>
</div>
<div className="grid grid-cols-1 lg:grid-cols-2 gap-6">
{/* System Configuration */}
<div className="card">
<div className="flex items-center mb-4">
<CogIcon className="w-6 h-6 text-blue-500 mr-2" />
<h3 className="text-lg font-medium text-gray-900">System Configuration</h3>
</div>
<div className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Application Name
</label>
<input
type="text"
className="input-field"
defaultValue="File Transformer S3"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Environment
</label>
<select className="input-field">
<option value="development">Development</option>
<option value="staging">Staging</option>
<option value="production">Production</option>
</select>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Debug Mode
</label>
<div className="flex items-center">
<input
type="checkbox"
className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
defaultChecked
/>
<label className="ml-2 text-sm text-gray-700">Enable debug logging</label>
</div>
</div>
</div>
</div>
{/* Security Settings */}
<div className="card">
<div className="flex items-center mb-4">
<ShieldCheckIcon className="w-6 h-6 text-green-500 mr-2" />
<h3 className="text-lg font-medium text-gray-900">Security</h3>
</div>
<div className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Session Timeout (hours)
</label>
<input
type="number"
className="input-field"
defaultValue="24"
min="1"
max="168"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Max File Size (MB)
</label>
<input
type="number"
className="input-field"
defaultValue="100"
min="1"
max="1000"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Allowed File Types
</label>
<input
type="text"
className="input-field"
defaultValue="pdf,doc,docx,txt,csv,xlsx,xls,json,xml"
placeholder="Comma-separated file extensions"
/>
</div>
</div>
</div>
{/* Database Configuration */}
<div className="card">
<div className="flex items-center mb-4">
<CircleStackIcon className="w-6 h-6 text-purple-500 mr-2" />
<h3 className="text-lg font-medium text-gray-900">Database</h3>
</div>
<div className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Database Host
</label>
<input
type="text"
className="input-field"
defaultValue="localhost"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Database Port
</label>
<input
type="number"
className="input-field"
defaultValue="5432"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Database Name
</label>
<input
type="text"
className="input-field"
defaultValue="file_transformer"
/>
</div>
</div>
</div>
{/* Storage Configuration */}
<div className="card">
<div className="flex items-center mb-4">
<CloudIcon className="w-6 h-6 text-orange-500 mr-2" />
<h3 className="text-lg font-medium text-gray-900">Storage</h3>
</div>
<div className="space-y-4">
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
MinIO Endpoint
</label>
<input
type="text"
className="input-field"
defaultValue="localhost:9000"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Default Bucket
</label>
<input
type="text"
className="input-field"
defaultValue="file-transformer-bucket"
/>
</div>
<div>
<label className="block text-sm font-medium text-gray-700 mb-1">
Use SSL
</label>
<div className="flex items-center">
<input
type="checkbox"
className="h-4 w-4 text-blue-600 focus:ring-blue-500 border-gray-300 rounded"
/>
<label className="ml-2 text-sm text-gray-700">Enable SSL for MinIO</label>
</div>
</div>
</div>
</div>
</div>
{/* Action Buttons */}
<div className="flex justify-end space-x-4">
<button className="btn-secondary">Reset to Defaults</button>
<button className="btn-primary">Save Settings</button>
</div>
</div>
);
};
export default Settings;

View File

@@ -0,0 +1,120 @@
import React from 'react';
import { useQuery } from 'react-query';
import { CogIcon, CheckCircleIcon, XCircleIcon, ClockIcon } from '@heroicons/react/24/outline';
import { transformationsAPI } from '../services/api';
const Transformations = () => {
const { data: transformations, isLoading, error } = useQuery('transformations', transformationsAPI.getTransformations);
const getStatusIcon = (status) => {
switch (status) {
case 'completed':
return <CheckCircleIcon className="w-5 h-5 text-green-500" />;
case 'failed':
return <XCircleIcon className="w-5 h-5 text-red-500" />;
case 'processing':
return <CogIcon className="w-5 h-5 text-blue-500 animate-spin" />;
default:
return <ClockIcon className="w-5 h-5 text-gray-500" />;
}
};
if (isLoading) {
return (
<div className="flex items-center justify-center h-64">
<div className="loading-spinner"></div>
</div>
);
}
if (error) {
return (
<div className="text-center py-12">
<p className="text-red-600">Error loading transformations: {error.message}</p>
</div>
);
}
return (
<div className="space-y-6">
<div>
<h1 className="text-2xl font-bold text-gray-900">Transformations</h1>
<p className="text-gray-600">Monitor file transformation jobs</p>
</div>
<div className="bg-white shadow rounded-lg">
<div className="px-6 py-4 border-b border-gray-200">
<h3 className="text-lg font-medium text-gray-900">All Transformations</h3>
</div>
<div className="overflow-x-auto">
<table className="min-w-full divide-y divide-gray-200">
<thead className="bg-gray-50">
<tr>
<th className="table-header">Type</th>
<th className="table-header">File</th>
<th className="table-header">Status</th>
<th className="table-header">Started</th>
<th className="table-header">Completed</th>
<th className="table-header">Actions</th>
</tr>
</thead>
<tbody className="bg-white divide-y divide-gray-200">
{transformations?.map((transformation) => (
<tr key={transformation.id} className="hover:bg-gray-50">
<td className="table-cell">
<span className="text-sm font-medium text-gray-900">
{transformation.transformation_type}
</span>
</td>
<td className="table-cell">
<span className="text-sm text-gray-900">
{transformation.file_id}
</span>
</td>
<td className="table-cell">
<div className="flex items-center">
{getStatusIcon(transformation.status)}
<span className={`ml-2 status-badge status-${transformation.status}`}>
{transformation.status}
</span>
</div>
</td>
<td className="table-cell">
<span className="text-sm text-gray-900">
{transformation.started_at
? new Date(transformation.started_at).toLocaleString()
: '-'
}
</span>
</td>
<td className="table-cell">
<span className="text-sm text-gray-900">
{transformation.completed_at
? new Date(transformation.completed_at).toLocaleString()
: '-'
}
</span>
</td>
<td className="table-cell">
<div className="flex space-x-2">
{transformation.status === 'failed' && (
<button className="text-blue-600 hover:text-blue-900 text-sm">
Retry
</button>
)}
<button className="text-gray-600 hover:text-gray-900 text-sm">
View Logs
</button>
</div>
</td>
</tr>
))}
</tbody>
</table>
</div>
</div>
</div>
);
};
export default Transformations;

View File

@@ -0,0 +1,159 @@
import React, { useState, useCallback } from 'react';
import { useDropzone } from 'react-dropzone';
import { CloudArrowUpIcon, DocumentTextIcon } from '@heroicons/react/24/outline';
import { uploadFileWithProgress } from '../services/api';
import toast from 'react-hot-toast';
const Upload = () => {
const [uploading, setUploading] = useState(false);
const [uploadProgress, setUploadProgress] = useState({});
const onDrop = useCallback(async (acceptedFiles) => {
setUploading(true);
for (const file of acceptedFiles) {
try {
setUploadProgress(prev => ({ ...prev, [file.name]: 0 }));
await uploadFileWithProgress(file, (progress) => {
setUploadProgress(prev => ({ ...prev, [file.name]: progress }));
});
toast.success(`${file.name} uploaded successfully!`);
setUploadProgress(prev => {
const newProgress = { ...prev };
delete newProgress[file.name];
return newProgress;
});
} catch (error) {
toast.error(`Failed to upload ${file.name}: ${error.message}`);
setUploadProgress(prev => {
const newProgress = { ...prev };
delete newProgress[file.name];
return newProgress;
});
}
}
setUploading(false);
}, []);
const { getRootProps, getInputProps, isDragActive } = useDropzone({
onDrop,
accept: {
'application/pdf': ['.pdf'],
'application/vnd.openxmlformats-officedocument.wordprocessingml.document': ['.docx'],
'application/msword': ['.doc'],
'text/plain': ['.txt'],
'text/csv': ['.csv'],
'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet': ['.xlsx'],
'application/vnd.ms-excel': ['.xls'],
'application/json': ['.json'],
'application/xml': ['.xml'],
'image/*': ['.jpg', '.jpeg', '.png', '.gif', '.bmp', '.tiff']
},
maxSize: 100 * 1024 * 1024, // 100MB
});
return (
<div className="space-y-6">
<div>
<h1 className="text-2xl font-bold text-gray-900">Upload Files</h1>
<p className="text-gray-600">Upload files for transformation and processing</p>
</div>
<div className="card">
<div
{...getRootProps()}
className={`dropzone ${isDragActive ? 'drag-active' : ''} ${
uploading ? 'opacity-50 pointer-events-none' : ''
}`}
>
<input {...getInputProps()} />
<CloudArrowUpIcon className="mx-auto h-12 w-12 text-gray-400" />
<div className="mt-4 text-center">
{isDragActive ? (
<p className="text-lg text-blue-600">Drop the files here...</p>
) : (
<>
<p className="text-lg text-gray-900">
Drag and drop files here, or click to select files
</p>
<p className="text-sm text-gray-500 mt-2">
Supports PDF, DOC, TXT, CSV, Excel, JSON, XML, and image files (max 100MB)
</p>
</>
)}
</div>
</div>
</div>
{/* Upload Progress */}
{Object.keys(uploadProgress).length > 0 && (
<div className="card">
<h3 className="text-lg font-medium text-gray-900 mb-4">Upload Progress</h3>
<div className="space-y-3">
{Object.entries(uploadProgress).map(([filename, progress]) => (
<div key={filename} className="flex items-center space-x-3">
<DocumentTextIcon className="w-5 h-5 text-gray-400" />
<div className="flex-1">
<div className="flex justify-between text-sm">
<span className="text-gray-900">{filename}</span>
<span className="text-gray-500">{progress}%</span>
</div>
<div className="mt-1 bg-gray-200 rounded-full h-2">
<div
className="bg-blue-600 h-2 rounded-full transition-all duration-300"
style={{ width: `${progress}%` }}
/>
</div>
</div>
</div>
))}
</div>
</div>
)}
{/* File Type Information */}
<div className="grid grid-cols-1 md:grid-cols-2 gap-6">
<div className="card">
<h3 className="text-lg font-medium text-gray-900 mb-4">Supported File Types</h3>
<div className="space-y-2">
<div className="flex justify-between">
<span className="text-gray-600">Documents</span>
<span className="text-gray-900">PDF, DOC, DOCX, TXT</span>
</div>
<div className="flex justify-between">
<span className="text-gray-600">Data Files</span>
<span className="text-gray-900">CSV, XLS, XLSX, JSON, XML</span>
</div>
<div className="flex justify-between">
<span className="text-gray-600">Images</span>
<span className="text-gray-900">JPG, PNG, GIF, BMP, TIFF</span>
</div>
</div>
</div>
<div className="card">
<h3 className="text-lg font-medium text-gray-900 mb-4">Transformation Options</h3>
<div className="space-y-2">
<div className="flex justify-between">
<span className="text-gray-600">Text Extraction</span>
<span className="text-green-600">Available</span>
</div>
<div className="flex justify-between">
<span className="text-gray-600">Format Conversion</span>
<span className="text-green-600">Available</span>
</div>
<div className="flex justify-between">
<span className="text-gray-600">Image Processing</span>
<span className="text-green-600">Available</span>
</div>
</div>
</div>
</div>
</div>
);
};
export default Upload;

View File

@@ -0,0 +1,180 @@
import axios from 'axios';
const API_BASE_URL = process.env.REACT_APP_API_BASE_URL || 'http://localhost:8080';
// Create axios instance with default config
const api = axios.create({
baseURL: API_BASE_URL,
timeout: 10000,
headers: {
'Content-Type': 'application/json',
},
});
// Request interceptor to add auth token
api.interceptors.request.use(
(config) => {
const token = localStorage.getItem('authToken');
if (token) {
config.headers.Authorization = `Bearer ${token}`;
}
return config;
},
(error) => {
return Promise.reject(error);
}
);
// Response interceptor to handle auth errors
api.interceptors.response.use(
(response) => response,
(error) => {
if (error.response?.status === 401) {
localStorage.removeItem('authToken');
window.location.href = '/login';
}
return Promise.reject(error);
}
);
// Auth API
export const authAPI = {
login: (credentials) => api.post('/auth/login', credentials),
logout: () => api.post('/auth/logout'),
register: (userData) => api.post('/auth/register', userData),
getProfile: () => api.get('/auth/profile'),
};
// Files API
export const filesAPI = {
getFiles: (params = {}) => api.get('/files', { params }),
getFile: (id) => api.get(`/files/${id}`),
uploadFile: (file, onProgress) => {
const formData = new FormData();
formData.append('file', file);
return api.post('/files/upload', formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
onUploadProgress: onProgress,
});
},
deleteFile: (id) => api.delete(`/files/${id}`),
downloadFile: (id) => api.get(`/files/${id}/download`, { responseType: 'blob' }),
updateFile: (id, data) => api.put(`/files/${id}`, data),
getFileMetadata: (id) => api.get(`/files/${id}/metadata`),
};
// Transformations API
export const transformationsAPI = {
getTransformations: (params = {}) => api.get('/transformations', { params }),
getTransformation: (id) => api.get(`/transformations/${id}`),
createTransformation: (data) => api.post('/transformations', data),
updateTransformation: (id, data) => api.put(`/transformations/${id}`, data),
deleteTransformation: (id) => api.delete(`/transformations/${id}`),
retryTransformation: (id) => api.post(`/transformations/${id}/retry`),
getTransformationLogs: (id) => api.get(`/transformations/${id}/logs`),
};
// Buckets API
export const bucketsAPI = {
getBuckets: () => api.get('/buckets'),
getBucket: (name) => api.get(`/buckets/${name}`),
createBucket: (data) => api.post('/buckets', data),
deleteBucket: (name) => api.delete(`/buckets/${name}`),
getBucketStats: (name) => api.get(`/buckets/${name}/stats`),
getBucketFiles: (name, params = {}) => api.get(`/buckets/${name}/files`, { params }),
};
// Dashboard API
export const dashboardAPI = {
getStats: () => api.get('/dashboard/stats'),
getRecentFiles: (limit = 10) => api.get('/dashboard/recent-files', { params: { limit } }),
getRecentTransformations: (limit = 10) => api.get('/dashboard/recent-transformations', { params: { limit } }),
getFileTypeStats: () => api.get('/dashboard/file-types'),
getStatusStats: () => api.get('/dashboard/status-counts'),
getStorageStats: () => api.get('/dashboard/storage'),
};
// MinIO API (direct integration)
export const minioAPI = {
getBuckets: () => api.get('/minio/buckets'),
getObjects: (bucketName, prefix = '') => api.get(`/minio/buckets/${bucketName}/objects`, { params: { prefix } }),
uploadObject: (bucketName, objectKey, file) => {
const formData = new FormData();
formData.append('file', file);
formData.append('objectKey', objectKey);
return api.post(`/minio/buckets/${bucketName}/upload`, formData, {
headers: {
'Content-Type': 'multipart/form-data',
},
});
},
deleteObject: (bucketName, objectKey) => api.delete(`/minio/buckets/${bucketName}/objects/${objectKey}`),
getObjectUrl: (bucketName, objectKey) => api.get(`/minio/buckets/${bucketName}/objects/${objectKey}/url`),
};
// Convenience functions for common operations
export const getDashboardStats = async () => {
const [stats, fileTypes, statusCounts, storage] = await Promise.all([
dashboardAPI.getStats(),
dashboardAPI.getFileTypeStats(),
dashboardAPI.getStatusStats(),
dashboardAPI.getStorageStats(),
]);
return {
...stats.data,
fileTypes: fileTypes.data,
statusCounts: statusCounts.data,
storage: storage.data,
};
};
export const getRecentFiles = async () => {
const response = await dashboardAPI.getRecentFiles();
return response.data;
};
export const getRecentTransformations = async () => {
const response = await dashboardAPI.getRecentTransformations();
return response.data;
};
export const uploadFileWithProgress = (file, onProgress) => {
return filesAPI.uploadFile(file, (progressEvent) => {
const percentCompleted = Math.round((progressEvent.loaded * 100) / progressEvent.total);
onProgress(percentCompleted);
});
};
export const downloadFileAsBlob = async (fileId, filename) => {
const response = await filesAPI.downloadFile(fileId);
const url = window.URL.createObjectURL(new Blob([response.data]));
const link = document.createElement('a');
link.href = url;
link.setAttribute('download', filename);
document.body.appendChild(link);
link.click();
link.remove();
window.URL.revokeObjectURL(url);
};
// Error handling utility
export const handleAPIError = (error) => {
if (error.response) {
// Server responded with error status
const message = error.response.data?.message || error.response.data?.error || 'An error occurred';
return { error: true, message, status: error.response.status };
} else if (error.request) {
// Request was made but no response received
return { error: true, message: 'Network error. Please check your connection.', status: 0 };
} else {
// Something else happened
return { error: true, message: error.message || 'An unexpected error occurred.', status: 0 };
}
};
export default api;

View File

@@ -0,0 +1,43 @@
/** @type {import('tailwindcss').Config} */
module.exports = {
content: [
"./src/**/*.{js,jsx,ts,tsx}",
],
theme: {
extend: {
colors: {
primary: {
50: '#eff6ff',
100: '#dbeafe',
200: '#bfdbfe',
300: '#93c5fd',
400: '#60a5fa',
500: '#3b82f6',
600: '#2563eb',
700: '#1d4ed8',
800: '#1e40af',
900: '#1e3a8a',
},
},
fontFamily: {
sans: ['Inter', 'system-ui', 'sans-serif'],
},
animation: {
'fade-in': 'fadeIn 0.5s ease-in-out',
'slide-up': 'slideUp 0.3s ease-out',
'pulse-slow': 'pulse 3s cubic-bezier(0.4, 0, 0.6, 1) infinite',
},
keyframes: {
fadeIn: {
'0%': { opacity: '0' },
'100%': { opacity: '1' },
},
slideUp: {
'0%': { transform: 'translateY(10px)', opacity: '0' },
'100%': { transform: 'translateY(0)', opacity: '1' },
},
},
},
},
plugins: [],
}

174
database/init.sql Normal file
View File

@@ -0,0 +1,174 @@
-- File Transformer S3 Database Schema
-- This script initializes the database with all necessary tables
-- Create extensions
CREATE EXTENSION IF NOT EXISTS "uuid-ossp";
CREATE EXTENSION IF NOT EXISTS "pgcrypto";
-- Create tables
CREATE TABLE IF NOT EXISTS files (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
filename VARCHAR(255) NOT NULL,
original_filename VARCHAR(255) NOT NULL,
file_path VARCHAR(500) NOT NULL,
file_size BIGINT NOT NULL,
file_type VARCHAR(50) NOT NULL,
mime_type VARCHAR(100),
bucket_name VARCHAR(100) NOT NULL,
object_key VARCHAR(500) NOT NULL,
checksum VARCHAR(64),
status VARCHAR(20) DEFAULT 'uploaded' CHECK (status IN ('uploaded', 'processing', 'transformed', 'error', 'deleted')),
transformation_type VARCHAR(50),
transformation_config JSONB,
metadata JSONB,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
processed_at TIMESTAMP WITH TIME ZONE,
deleted_at TIMESTAMP WITH TIME ZONE
);
CREATE TABLE IF NOT EXISTS transformations (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
file_id UUID NOT NULL REFERENCES files(id) ON DELETE CASCADE,
transformation_type VARCHAR(50) NOT NULL,
input_path VARCHAR(500) NOT NULL,
output_path VARCHAR(500),
status VARCHAR(20) DEFAULT 'pending' CHECK (status IN ('pending', 'processing', 'completed', 'failed')),
config JSONB,
result JSONB,
error_message TEXT,
started_at TIMESTAMP WITH TIME ZONE,
completed_at TIMESTAMP WITH TIME ZONE,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS buckets (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
name VARCHAR(100) UNIQUE NOT NULL,
description TEXT,
is_active BOOLEAN DEFAULT true,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS users (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
username VARCHAR(50) UNIQUE NOT NULL,
email VARCHAR(255) UNIQUE NOT NULL,
password_hash VARCHAR(255) NOT NULL,
is_active BOOLEAN DEFAULT true,
role VARCHAR(20) DEFAULT 'user' CHECK (role IN ('admin', 'user')),
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS sessions (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
user_id UUID NOT NULL REFERENCES users(id) ON DELETE CASCADE,
token VARCHAR(255) UNIQUE NOT NULL,
expires_at TIMESTAMP WITH TIME ZONE NOT NULL,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS file_access_logs (
id UUID PRIMARY KEY DEFAULT uuid_generate_v4(),
file_id UUID REFERENCES files(id) ON DELETE SET NULL,
user_id UUID REFERENCES users(id) ON DELETE SET NULL,
action VARCHAR(50) NOT NULL CHECK (action IN ('upload', 'download', 'view', 'delete', 'transform')),
ip_address INET,
user_agent TEXT,
created_at TIMESTAMP WITH TIME ZONE DEFAULT CURRENT_TIMESTAMP
);
-- Create indexes for better performance
CREATE INDEX IF NOT EXISTS idx_files_status ON files(status);
CREATE INDEX IF NOT EXISTS idx_files_file_type ON files(file_type);
CREATE INDEX IF NOT EXISTS idx_files_created_at ON files(created_at);
CREATE INDEX IF NOT EXISTS idx_files_bucket_name ON files(bucket_name);
CREATE INDEX IF NOT EXISTS idx_files_object_key ON files(object_key);
CREATE INDEX IF NOT EXISTS idx_transformations_file_id ON transformations(file_id);
CREATE INDEX IF NOT EXISTS idx_transformations_status ON transformations(status);
CREATE INDEX IF NOT EXISTS idx_transformations_type ON transformations(transformation_type);
CREATE INDEX IF NOT EXISTS idx_sessions_user_id ON sessions(user_id);
CREATE INDEX IF NOT EXISTS idx_sessions_token ON sessions(token);
CREATE INDEX IF NOT EXISTS idx_sessions_expires_at ON sessions(expires_at);
CREATE INDEX IF NOT EXISTS idx_file_access_logs_file_id ON file_access_logs(file_id);
CREATE INDEX IF NOT EXISTS idx_file_access_logs_user_id ON file_access_logs(user_id);
CREATE INDEX IF NOT EXISTS idx_file_access_logs_action ON file_access_logs(action);
CREATE INDEX IF NOT EXISTS idx_file_access_logs_created_at ON file_access_logs(created_at);
-- Create updated_at trigger function
CREATE OR REPLACE FUNCTION update_updated_at_column()
RETURNS TRIGGER AS $$
BEGIN
NEW.updated_at = CURRENT_TIMESTAMP;
RETURN NEW;
END;
$$ language 'plpgsql';
-- Create triggers for updated_at
CREATE TRIGGER update_files_updated_at BEFORE UPDATE ON files
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_transformations_updated_at BEFORE UPDATE ON transformations
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_buckets_updated_at BEFORE UPDATE ON buckets
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
CREATE TRIGGER update_users_updated_at BEFORE UPDATE ON users
FOR EACH ROW EXECUTE FUNCTION update_updated_at_column();
-- Insert default bucket
INSERT INTO buckets (name, description)
VALUES ('file-transformer-bucket', 'Default bucket for file transformations')
ON CONFLICT (name) DO NOTHING;
-- Insert default admin user (password: admin123)
INSERT INTO users (username, email, password_hash, role)
VALUES (
'admin',
'admin@file-transformer.local',
crypt('admin123', gen_salt('bf')),
'admin'
) ON CONFLICT (username) DO NOTHING;
-- Create views for common queries
CREATE OR REPLACE VIEW file_summary AS
SELECT
f.id,
f.filename,
f.original_filename,
f.file_size,
f.file_type,
f.status,
f.transformation_type,
f.created_at,
f.updated_at,
COUNT(t.id) as transformation_count,
COUNT(CASE WHEN t.status = 'completed' THEN 1 END) as completed_transformations,
COUNT(CASE WHEN t.status = 'failed' THEN 1 END) as failed_transformations
FROM files f
LEFT JOIN transformations t ON f.id = t.file_id
GROUP BY f.id, f.filename, f.original_filename, f.file_size, f.file_type, f.status, f.transformation_type, f.created_at, f.updated_at;
CREATE OR REPLACE VIEW bucket_stats AS
SELECT
b.name as bucket_name,
COUNT(f.id) as total_files,
SUM(f.file_size) as total_size,
COUNT(CASE WHEN f.status = 'uploaded' THEN 1 END) as uploaded_files,
COUNT(CASE WHEN f.status = 'processing' THEN 1 END) as processing_files,
COUNT(CASE WHEN f.status = 'transformed' THEN 1 END) as transformed_files,
COUNT(CASE WHEN f.status = 'error' THEN 1 END) as error_files
FROM buckets b
LEFT JOIN files f ON b.name = f.bucket_name
GROUP BY b.name;
-- Grant permissions (adjust as needed for your setup)
-- GRANT ALL PRIVILEGES ON ALL TABLES IN SCHEMA public TO file_user;
-- GRANT ALL PRIVILEGES ON ALL SEQUENCES IN SCHEMA public TO file_user;

195
docker-compose.yml Normal file
View File

@@ -0,0 +1,195 @@
services:
# PostgreSQL Database
postgres:
image: postgres:15-alpine
container_name: file-transformer-postgres
environment:
POSTGRES_DB: ${POSTGRES_DB:-file_transformer}
POSTGRES_USER: ${POSTGRES_USER:-file_user}
POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-secure_password_123}
ports:
- "${POSTGRES_PORT:-5432}:5432"
volumes:
- postgres_data:/var/lib/postgresql/data
- ./database/init.sql:/docker-entrypoint-initdb.d/init.sql
networks:
- file-transformer-network
healthcheck:
test: ["CMD-SHELL", "pg_isready -U ${POSTGRES_USER:-file_user}"]
interval: 10s
timeout: 5s
retries: 5
# MinIO Object Storage
minio:
image: minio/minio:latest
container_name: file-transformer-minio
environment:
MINIO_ROOT_USER: ${MINIO_ACCESS_KEY:-minioadmin}
MINIO_ROOT_PASSWORD: ${MINIO_SECRET_KEY:-minioadmin123}
ports:
- "${MINIO_API_PORT:-9000}:9000"
- "${MINIO_CONSOLE_PORT:-9001}:9001"
volumes:
- minio_data:/data
networks:
- file-transformer-network
command: server /data --console-address ":9001"
healthcheck:
test: ["CMD", "curl", "-f", "http://localhost:9000/minio/health/live"]
interval: 30s
timeout: 20s
retries: 3
# MinIO Client for bucket setup
minio-client:
image: minio/mc:latest
container_name: file-transformer-minio-client
depends_on:
- minio
environment:
MINIO_ACCESS_KEY: ${MINIO_ACCESS_KEY:-minioadmin}
MINIO_SECRET_KEY: ${MINIO_SECRET_KEY:-minioadmin123}
networks:
- file-transformer-network
command: >
sh -c "
sleep 10 &&
mc alias set local http://minio:9000 ${MINIO_ACCESS_KEY:-minioadmin} ${MINIO_SECRET_KEY:-minioadmin123} &&
mc mb local/${MINIO_BUCKET_NAME:-file-transformer-bucket} --ignore-existing &&
mc policy set public local/${MINIO_BUCKET_NAME:-file-transformer-bucket} &&
echo 'MinIO bucket setup complete'
"
# React Dashboard
dashboard:
build:
context: ./dashboard
dockerfile: Dockerfile
container_name: file-transformer-dashboard
ports:
- "${REACT_APP_PORT:-3000}:3000"
environment:
- REACT_APP_API_BASE_URL=${REACT_APP_API_BASE_URL:-http://localhost:8080}
- REACT_APP_MINIO_ENDPOINT=${REACT_APP_MINIO_ENDPOINT:-http://localhost:9000}
- REACT_APP_MINIO_CONSOLE=${REACT_APP_MINIO_CONSOLE:-http://localhost:9001}
networks:
- file-transformer-network
depends_on:
- postgres
- minio
volumes:
- ./dashboard:/app
- /app/node_modules
# API Gateway (for local development)
api-gateway:
build:
context: ./api-gateway
dockerfile: Dockerfile
container_name: file-transformer-api-gateway
ports:
- "8080:8080"
environment:
- POSTGRES_URL=${POSTGRES_URL}
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- JWT_SECRET=${JWT_SECRET}
- CORS_ORIGINS=${CORS_ORIGINS}
networks:
- file-transformer-network
depends_on:
- postgres
- minio
# File Upload Function (local development)
function-upload:
build:
context: ./functions/upload
dockerfile: Dockerfile
container_name: file-transformer-upload-function
ports:
- "5001:5000"
environment:
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- POSTGRES_URL=${POSTGRES_URL}
networks:
- file-transformer-network
depends_on:
- postgres
- minio
# File Transform Function (local development)
function-transform:
build:
context: ./functions/transform
dockerfile: Dockerfile
container_name: file-transformer-transform-function
ports:
- "5002:5000"
environment:
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- POSTGRES_URL=${POSTGRES_URL}
networks:
- file-transformer-network
depends_on:
- postgres
- minio
# File Download Function (local development)
function-download:
build:
context: ./functions/download
dockerfile: Dockerfile
container_name: file-transformer-download-function
ports:
- "5003:5000"
environment:
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- POSTGRES_URL=${POSTGRES_URL}
networks:
- file-transformer-network
depends_on:
- postgres
- minio
# File Metadata Function (local development)
function-metadata:
build:
context: ./functions/metadata
dockerfile: Dockerfile
container_name: file-transformer-metadata-function
ports:
- "5004:5000"
environment:
- MINIO_ENDPOINT=${MINIO_ENDPOINT}
- MINIO_ACCESS_KEY=${MINIO_ACCESS_KEY}
- MINIO_SECRET_KEY=${MINIO_SECRET_KEY}
- MINIO_BUCKET_NAME=${MINIO_BUCKET_NAME}
- POSTGRES_URL=${POSTGRES_URL}
networks:
- file-transformer-network
depends_on:
- postgres
- minio
volumes:
postgres_data:
driver: local
minio_data:
driver: local
networks:
file-transformer-network:
driver: bridge

82
env.example Normal file
View File

@@ -0,0 +1,82 @@
# =============================================================================
# File Transformer S3 - Environment Configuration
# =============================================================================
# Application Configuration
APP_NAME=file-transformer-s3
APP_ENV=development
DEBUG=true
# Dashboard Configuration
REACT_APP_PORT=3000
REACT_APP_API_BASE_URL=http://localhost:8080
REACT_APP_MINIO_ENDPOINT=http://localhost:9000
REACT_APP_MINIO_CONSOLE=http://localhost:9001
# PostgreSQL Configuration
POSTGRES_HOST=localhost
POSTGRES_PORT=5432
POSTGRES_DB=file_transformer
POSTGRES_USER=file_user
POSTGRES_PASSWORD=secure_password_123
POSTGRES_URL=postgresql://${POSTGRES_USER}:${POSTGRES_PASSWORD}@${POSTGRES_HOST}:${POSTGRES_PORT}/${POSTGRES_DB}
# MinIO Configuration
MINIO_ENDPOINT=localhost:9000
MINIO_ACCESS_KEY=minioadmin
MINIO_SECRET_KEY=minioadmin123
MINIO_BUCKET_NAME=file-transformer-bucket
MINIO_CONSOLE_PORT=9001
MINIO_API_PORT=9000
MINIO_USE_SSL=false
# Knative Functions Configuration
KNATIVE_NAMESPACE=file-transformer
KNATIVE_SERVICE_ACCOUNT=file-transformer-sa
# Function Endpoints (internal cluster services)
FUNCTION_UPLOAD_ENDPOINT=http://file-upload-service.file-transformer.svc.cluster.local
FUNCTION_TRANSFORM_ENDPOINT=http://file-transform-service.file-transformer.svc.cluster.local
FUNCTION_DOWNLOAD_ENDPOINT=http://file-download-service.file-transformer.svc.cluster.local
FUNCTION_METADATA_ENDPOINT=http://file-metadata-service.file-transformer.svc.cluster.local
# Local Development Function Endpoints
LOCAL_FUNCTION_UPLOAD_ENDPOINT=http://localhost:5001
LOCAL_FUNCTION_TRANSFORM_ENDPOINT=http://localhost:5002
LOCAL_FUNCTION_DOWNLOAD_ENDPOINT=http://localhost:5003
LOCAL_FUNCTION_METADATA_ENDPOINT=http://localhost:5004
# File Processing Configuration
MAX_FILE_SIZE=100MB
ALLOWED_FILE_TYPES=pdf,doc,docx,txt,csv,xlsx,xls,json,xml
PROCESSING_TIMEOUT=300
CHUNK_SIZE=1048576
# Security Configuration
JWT_SECRET=your-super-secret-jwt-key-change-this-in-production
JWT_EXPIRY=24h
CORS_ORIGINS=http://localhost:3000,http://localhost:8080
# Logging Configuration
LOG_LEVEL=INFO
LOG_FORMAT=json
# Monitoring Configuration
METRICS_PORT=9090
HEALTH_CHECK_PORT=8081
# Docker Configuration
DOCKER_REGISTRY=your-registry.com
DOCKER_NAMESPACE=file-transformer
DOCKER_TAG=latest
# Kubernetes Configuration
K8S_NAMESPACE=file-transformer
K8S_REPLICAS=2
K8S_RESOURCES_CPU=500m
K8S_RESOURCES_MEMORY=512Mi
# Backup Configuration
BACKUP_ENABLED=true
BACKUP_SCHEDULE=0 2 * * *
BACKUP_RETENTION_DAYS=30

29
functions/Makefile Normal file
View File

@@ -0,0 +1,29 @@
.PHONY: build build-upload build-transform build-download build-metadata clean
# Build all functions
build: build-upload build-transform build-download build-metadata
# Build upload function
build-upload:
@echo "Building upload function..."
@cd upload && docker build -t function-upload:latest .
# Build transform function
build-transform:
@echo "Building transform function..."
@cd transform && docker build -t function-transform:latest .
# Build download function
build-download:
@echo "Building download function..."
@cd download && docker build -t function-download:latest .
# Build metadata function
build-metadata:
@echo "Building metadata function..."
@cd metadata && docker build -t function-metadata:latest .
# Clean all function images
clean:
@echo "Cleaning function images..."
@docker rmi function-upload:latest function-transform:latest function-download:latest function-metadata:latest 2>/dev/null || true

View File

@@ -0,0 +1,30 @@
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app.py .
# Create non-root user
RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app
USER app
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/health || exit 1
# Run the application
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"]

201
functions/download/app.py Normal file
View File

@@ -0,0 +1,201 @@
import os
import logging
from datetime import datetime
from typing import Optional
from flask import Flask, request, jsonify, send_file
import psycopg2
from psycopg2.extras import RealDictCursor
from minio import Minio
from minio.error import S3Error
import structlog
import io
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
app = Flask(__name__)
# Configuration
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123')
MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket')
MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true'
POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer')
# Initialize MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=MINIO_USE_SSL
)
def get_db_connection():
"""Create a database connection."""
return psycopg2.connect(POSTGRES_URL)
def get_file_info(file_id: str) -> Optional[dict]:
"""Get file information from database."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, filename, original_filename, file_size, file_type,
mime_type, bucket_name, object_key, status
FROM files
WHERE id = %s AND status != 'deleted'
""", (file_id,))
file_record = cur.fetchone()
return dict(file_record) if file_record else None
except Exception as e:
logger.error("Failed to get file info", error=str(e))
return None
finally:
conn.close()
def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]):
"""Log file access for audit purposes."""
conn = get_db_connection()
try:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO file_access_logs (file_id, action, ip_address, user_agent)
VALUES (%s, %s, %s, %s)
""", (file_id, action, ip_address, user_agent))
conn.commit()
except Exception as e:
logger.error("Failed to log file access", error=str(e))
conn.rollback()
finally:
conn.close()
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'service': 'file-download'})
@app.route('/download/<file_id>', methods=['GET'])
def download_file(file_id: str):
"""Download file by ID."""
try:
# Get file information
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
if file_info['status'] == 'deleted':
return jsonify({'error': 'File has been deleted'}), 404
# Get file from MinIO
try:
response = minio_client.get_object(
file_info['bucket_name'],
file_info['object_key']
)
file_data = response.read()
response.close()
response.release_conn()
except S3Error as e:
logger.error("Failed to get file from MinIO", error=str(e))
return jsonify({'error': 'File not found in storage'}), 404
# Log access
log_file_access(file_id, 'download', request.remote_addr, request.headers.get('User-Agent'))
# Create file-like object for Flask to serve
file_stream = io.BytesIO(file_data)
file_stream.seek(0)
logger.info("File download completed",
file_id=file_id,
filename=file_info['filename'],
size=len(file_data))
return send_file(
file_stream,
mimetype=file_info['mime_type'],
as_attachment=True,
download_name=file_info['original_filename']
)
except Exception as e:
logger.error("Download error", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/info', methods=['GET'])
def get_file_info_endpoint(file_id: str):
"""Get file information without downloading."""
try:
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
# Log access
log_file_access(file_id, 'view', request.remote_addr, request.headers.get('User-Agent'))
return jsonify(file_info), 200
except Exception as e:
logger.error("Error fetching file info", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/url', methods=['GET'])
def get_download_url(file_id: str):
"""Get presigned download URL."""
try:
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
if file_info['status'] == 'deleted':
return jsonify({'error': 'File has been deleted'}), 404
# Generate presigned URL
try:
url = minio_client.presigned_get_object(
file_info['bucket_name'],
file_info['object_key'],
expires=3600 # 1 hour
)
except S3Error as e:
logger.error("Failed to generate presigned URL", error=str(e))
return jsonify({'error': 'Failed to generate download URL'}), 500
# Log access
log_file_access(file_id, 'url_generated', request.remote_addr, request.headers.get('User-Agent'))
return jsonify({
'file_id': file_id,
'filename': file_info['original_filename'],
'download_url': url,
'expires_in': 3600
}), 200
except Exception as e:
logger.error("Error generating download URL", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False)

View File

@@ -0,0 +1,24 @@
# Core dependencies
flask==2.3.3
gunicorn==21.2.0
python-dotenv==1.0.0
# Database
psycopg2-binary==2.9.7
sqlalchemy==2.0.21
# MinIO/S3
minio==7.1.17
boto3==1.28.44
# HTTP requests
requests==2.31.0
# JSON and data handling
pydantic==2.1.1
# Logging
structlog==23.1.0
# Utilities
python-dateutil==2.8.2

View File

@@ -0,0 +1,30 @@
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app.py .
# Create non-root user
RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app
USER app
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/health || exit 1
# Run the application
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"]

307
functions/metadata/app.py Normal file
View File

@@ -0,0 +1,307 @@
import os
import json
import logging
from datetime import datetime
from typing import Optional, Dict, Any
from flask import Flask, request, jsonify
import psycopg2
from psycopg2.extras import RealDictCursor
from minio import Minio
from minio.error import S3Error
import structlog
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
app = Flask(__name__)
# Configuration
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123')
MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket')
MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true'
POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer')
# Initialize MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=MINIO_USE_SSL
)
def get_db_connection():
"""Create a database connection."""
return psycopg2.connect(POSTGRES_URL)
def get_file_metadata(file_id: str) -> Optional[Dict[str, Any]]:
"""Get file metadata from database."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, filename, original_filename, file_size, file_type,
mime_type, bucket_name, object_key, checksum, status,
transformation_type, transformation_config, metadata,
created_at, updated_at, processed_at
FROM files
WHERE id = %s
""", (file_id,))
file_record = cur.fetchone()
return dict(file_record) if file_record else None
except Exception as e:
logger.error("Failed to get file metadata", error=str(e))
return None
finally:
conn.close()
def get_file_transformations(file_id: str) -> list:
"""Get transformations for a file."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, transformation_type, input_path, output_path,
status, config, result, error_message,
started_at, completed_at, created_at
FROM transformations
WHERE file_id = %s
ORDER BY created_at DESC
""", (file_id,))
transformations = cur.fetchall()
return [dict(t) for t in transformations]
except Exception as e:
logger.error("Failed to get file transformations", error=str(e))
return []
finally:
conn.close()
def get_file_access_logs(file_id: str, limit: int = 50) -> list:
"""Get access logs for a file."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, action, ip_address, user_agent, created_at
FROM file_access_logs
WHERE file_id = %s
ORDER BY created_at DESC
LIMIT %s
""", (file_id, limit))
logs = cur.fetchall()
return [dict(log) for log in logs]
except Exception as e:
logger.error("Failed to get file access logs", error=str(e))
return []
finally:
conn.close()
def update_file_metadata(file_id: str, metadata: Dict[str, Any]) -> bool:
"""Update file metadata."""
conn = get_db_connection()
try:
with conn.cursor() as cur:
cur.execute("""
UPDATE files
SET metadata = %s, updated_at = %s
WHERE id = %s
""", (json.dumps(metadata), datetime.utcnow(), file_id))
conn.commit()
return True
except Exception as e:
logger.error("Failed to update file metadata", error=str(e))
conn.rollback()
return False
finally:
conn.close()
def get_storage_stats() -> Dict[str, Any]:
"""Get storage statistics."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Total files and size
cur.execute("""
SELECT COUNT(*) as total_files,
SUM(file_size) as total_size,
COUNT(CASE WHEN status = 'uploaded' THEN 1 END) as uploaded_files,
COUNT(CASE WHEN status = 'processing' THEN 1 END) as processing_files,
COUNT(CASE WHEN status = 'transformed' THEN 1 END) as transformed_files,
COUNT(CASE WHEN status = 'error' THEN 1 END) as error_files,
COUNT(CASE WHEN status = 'deleted' THEN 1 END) as deleted_files
FROM files
""")
stats = cur.fetchone()
# File types distribution
cur.execute("""
SELECT file_type, COUNT(*) as count
FROM files
WHERE status != 'deleted'
GROUP BY file_type
ORDER BY count DESC
""")
file_types = cur.fetchall()
# Recent activity
cur.execute("""
SELECT COUNT(*) as recent_uploads
FROM files
WHERE created_at >= NOW() - INTERVAL '24 hours'
""")
recent = cur.fetchone()
return {
'stats': dict(stats),
'file_types': [dict(ft) for ft in file_types],
'recent_uploads': recent['recent_uploads'] if recent else 0
}
except Exception as e:
logger.error("Failed to get storage stats", error=str(e))
return {}
finally:
conn.close()
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'service': 'file-metadata'})
@app.route('/files/<file_id>/metadata', methods=['GET'])
def get_file_metadata_endpoint(file_id: str):
"""Get comprehensive file metadata."""
try:
# Get basic file metadata
file_metadata = get_file_metadata(file_id)
if not file_metadata:
return jsonify({'error': 'File not found'}), 404
# Get transformations
transformations = get_file_transformations(file_id)
# Get recent access logs
access_logs = get_file_access_logs(file_id, limit=10)
# Check if file exists in MinIO
minio_exists = False
try:
minio_client.stat_object(
file_metadata['bucket_name'],
file_metadata['object_key']
)
minio_exists = True
except S3Error:
minio_exists = False
response_data = {
'file': file_metadata,
'transformations': transformations,
'access_logs': access_logs,
'storage': {
'minio_exists': minio_exists,
'bucket': file_metadata['bucket_name'],
'object_key': file_metadata['object_key']
}
}
return jsonify(response_data), 200
except Exception as e:
logger.error("Error fetching file metadata", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/metadata', methods=['PUT'])
def update_file_metadata_endpoint(file_id: str):
"""Update file metadata."""
try:
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
# Check if file exists
file_metadata = get_file_metadata(file_id)
if not file_metadata:
return jsonify({'error': 'File not found'}), 404
# Update metadata
success = update_file_metadata(file_id, data)
if not success:
return jsonify({'error': 'Failed to update metadata'}), 500
logger.info("File metadata updated", file_id=file_id)
return jsonify({'message': 'Metadata updated successfully'}), 200
except Exception as e:
logger.error("Error updating file metadata", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/transformations', methods=['GET'])
def get_file_transformations_endpoint(file_id: str):
"""Get transformations for a file."""
try:
# Check if file exists
file_metadata = get_file_metadata(file_id)
if not file_metadata:
return jsonify({'error': 'File not found'}), 404
transformations = get_file_transformations(file_id)
return jsonify(transformations), 200
except Exception as e:
logger.error("Error fetching file transformations", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/files/<file_id>/access-logs', methods=['GET'])
def get_file_access_logs_endpoint(file_id: str):
"""Get access logs for a file."""
try:
# Check if file exists
file_metadata = get_file_metadata(file_id)
if not file_metadata:
return jsonify({'error': 'File not found'}), 404
limit = request.args.get('limit', 50, type=int)
access_logs = get_file_access_logs(file_id, limit=limit)
return jsonify(access_logs), 200
except Exception as e:
logger.error("Error fetching file access logs", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/stats', methods=['GET'])
def get_stats_endpoint():
"""Get system statistics."""
try:
stats = get_storage_stats()
return jsonify(stats), 200
except Exception as e:
logger.error("Error fetching stats", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False)

View File

@@ -0,0 +1,24 @@
# Core dependencies
flask==2.3.3
gunicorn==21.2.0
python-dotenv==1.0.0
# Database
psycopg2-binary==2.9.7
sqlalchemy==2.0.21
# MinIO/S3
minio==7.1.17
boto3==1.28.44
# HTTP requests
requests==2.31.0
# JSON and data handling
pydantic==2.1.1
# Logging
structlog==23.1.0
# Utilities
python-dateutil==2.8.2

View File

@@ -0,0 +1,51 @@
# Core dependencies
flask==2.3.3
gunicorn==21.2.0
python-dotenv==1.0.0
# Database
psycopg2-binary==2.9.7
sqlalchemy==2.0.21
alembic==1.12.0
# MinIO/S3
minio==7.1.17
boto3==1.28.44
# File processing
python-magic==0.4.27
Pillow==10.0.1
PyPDF2==3.0.1
python-docx==0.8.11
openpyxl==3.1.2
pandas==2.0.3
numpy==1.24.3
# HTTP requests
requests==2.31.0
httpx==0.24.1
# JSON and data handling
pydantic==2.1.1
marshmallow==3.20.1
# Authentication and security
PyJWT==2.8.0
bcrypt==4.0.1
cryptography==41.0.4
# Logging and monitoring
structlog==23.1.0
prometheus-client==0.17.1
# Utilities
python-dateutil==2.8.2
pytz==2023.3
click==8.1.7
# Development and testing
pytest==7.4.2
pytest-cov==4.1.0
black==23.7.0
flake8==6.0.0
mypy==1.5.1

View File

@@ -0,0 +1,33 @@
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
libmagic1 \
libgl1-mesa-glx \
libglib2.0-0 \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app.py .
# Create non-root user
RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app
USER app
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/health || exit 1
# Run the application
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "2", "--timeout", "600", "app:app"]

473
functions/transform/app.py Normal file
View File

@@ -0,0 +1,473 @@
import os
import uuid
import json
import tempfile
import logging
from datetime import datetime
from typing import Dict, Any, Optional, List
from pathlib import Path
from flask import Flask, request, jsonify
import psycopg2
from psycopg2.extras import RealDictCursor
from minio import Minio
from minio.error import S3Error
import structlog
# File processing imports
import PyPDF2
from docx import Document
import pandas as pd
from PIL import Image
import io
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
app = Flask(__name__)
# Configuration
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123')
MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket')
MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true'
POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer')
# Initialize MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=MINIO_USE_SSL
)
def get_db_connection():
"""Create a database connection."""
return psycopg2.connect(POSTGRES_URL)
def get_file_from_minio(object_key: str) -> bytes:
"""Download file from MinIO."""
try:
response = minio_client.get_object(MINIO_BUCKET_NAME, object_key)
return response.read()
except S3Error as e:
logger.error("Failed to get file from MinIO", object_key=object_key, error=str(e))
raise
def upload_file_to_minio(file_data: bytes, object_key: str) -> bool:
"""Upload file to MinIO."""
try:
minio_client.put_object(
MINIO_BUCKET_NAME,
object_key,
file_data,
length=len(file_data)
)
return True
except S3Error as e:
logger.error("Failed to upload file to MinIO", object_key=object_key, error=str(e))
return False
def extract_text_from_pdf(file_data: bytes) -> str:
"""Extract text from PDF file."""
try:
pdf_reader = PyPDF2.PdfReader(io.BytesIO(file_data))
text = ""
for page in pdf_reader.pages:
text += page.extract_text() + "\n"
return text.strip()
except Exception as e:
logger.error("PDF text extraction failed", error=str(e))
raise
def extract_text_from_docx(file_data: bytes) -> str:
"""Extract text from DOCX file."""
try:
doc = Document(io.BytesIO(file_data))
text = ""
for paragraph in doc.paragraphs:
text += paragraph.text + "\n"
return text.strip()
except Exception as e:
logger.error("DOCX text extraction failed", error=str(e))
raise
def convert_csv_to_json(file_data: bytes) -> List[Dict[str, Any]]:
"""Convert CSV to JSON format."""
try:
df = pd.read_csv(io.BytesIO(file_data))
return df.to_dict('records')
except Exception as e:
logger.error("CSV to JSON conversion failed", error=str(e))
raise
def convert_excel_to_json(file_data: bytes) -> List[Dict[str, Any]]:
"""Convert Excel to JSON format."""
try:
df = pd.read_excel(io.BytesIO(file_data))
return df.to_dict('records')
except Exception as e:
logger.error("Excel to JSON conversion failed", error=str(e))
raise
def resize_image(file_data: bytes, width: int, height: int) -> bytes:
"""Resize image to specified dimensions."""
try:
image = Image.open(io.BytesIO(file_data))
resized_image = image.resize((width, height), Image.Resampling.LANCZOS)
output = io.BytesIO()
resized_image.save(output, format=image.format or 'JPEG')
return output.getvalue()
except Exception as e:
logger.error("Image resize failed", error=str(e))
raise
def convert_image_format(file_data: bytes, target_format: str) -> bytes:
"""Convert image to different format."""
try:
image = Image.open(io.BytesIO(file_data))
output = io.BytesIO()
image.save(output, format=target_format.upper())
return output.getvalue()
except Exception as e:
logger.error("Image format conversion failed", error=str(e))
raise
def create_transformation_record(file_id: str, transformation_type: str, config: Dict[str, Any]) -> str:
"""Create transformation record in database."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
INSERT INTO transformations (
file_id, transformation_type, input_path, status, config, created_at
) VALUES (%s, %s, %s, %s, %s, %s)
RETURNING id
""", (
file_id,
transformation_type,
f"files/{file_id}",
'pending',
json.dumps(config),
datetime.utcnow()
))
transformation_id = cur.fetchone()['id']
conn.commit()
return str(transformation_id)
except Exception as e:
conn.rollback()
logger.error("Failed to create transformation record", error=str(e))
raise
finally:
conn.close()
def update_transformation_status(transformation_id: str, status: str, result: Optional[Dict[str, Any]] = None, error_message: Optional[str] = None):
"""Update transformation status in database."""
conn = get_db_connection()
try:
with conn.cursor() as cur:
if status == 'processing':
cur.execute("""
UPDATE transformations
SET status = %s, started_at = %s
WHERE id = %s
""", (status, datetime.utcnow(), transformation_id))
elif status == 'completed':
cur.execute("""
UPDATE transformations
SET status = %s, completed_at = %s, result = %s
WHERE id = %s
""", (status, datetime.utcnow(), json.dumps(result), transformation_id))
elif status == 'failed':
cur.execute("""
UPDATE transformations
SET status = %s, completed_at = %s, error_message = %s
WHERE id = %s
""", (status, datetime.utcnow(), error_message, transformation_id))
conn.commit()
except Exception as e:
conn.rollback()
logger.error("Failed to update transformation status", error=str(e))
raise
finally:
conn.close()
def get_file_info(file_id: str) -> Optional[Dict[str, Any]]:
"""Get file information from database."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, filename, file_type, mime_type, object_key, status
FROM files
WHERE id = %s
""", (file_id,))
file_record = cur.fetchone()
return dict(file_record) if file_record else None
except Exception as e:
logger.error("Failed to get file info", error=str(e))
return None
finally:
conn.close()
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'service': 'file-transform'})
@app.route('/transform', methods=['POST'])
def transform_file():
"""Handle file transformation request."""
try:
data = request.get_json()
if not data:
return jsonify({'error': 'No data provided'}), 400
file_id = data.get('file_id')
transformation_type = data.get('transformation_type')
config = data.get('config', {})
if not file_id or not transformation_type:
return jsonify({'error': 'file_id and transformation_type are required'}), 400
# Get file information
file_info = get_file_info(file_id)
if not file_info:
return jsonify({'error': 'File not found'}), 404
if file_info['status'] == 'deleted':
return jsonify({'error': 'File has been deleted'}), 400
# Create transformation record
transformation_id = create_transformation_record(file_id, transformation_type, config)
# Update status to processing
update_transformation_status(transformation_id, 'processing')
logger.info("Starting transformation",
file_id=file_id,
transformation_id=transformation_id,
transformation_type=transformation_type)
try:
# Get file from MinIO
file_data = get_file_from_minio(file_info['object_key'])
# Perform transformation based on type
result = None
output_data = None
if transformation_type == 'extract_text':
if file_info['file_type'] == 'pdf':
result = extract_text_from_pdf(file_data)
elif file_info['file_type'] in ['docx', 'doc']:
result = extract_text_from_docx(file_data)
else:
raise ValueError(f"Text extraction not supported for file type: {file_info['file_type']}")
# Save extracted text as new file
output_filename = f"{Path(file_info['filename']).stem}_extracted.txt"
output_object_key = f"transformations/{transformation_id}/{output_filename}"
output_data = result.encode('utf-8')
elif transformation_type == 'csv_to_json':
if file_info['file_type'] != 'csv':
raise ValueError("CSV to JSON conversion only supports CSV files")
result = convert_csv_to_json(file_data)
output_filename = f"{Path(file_info['filename']).stem}.json"
output_object_key = f"transformations/{transformation_id}/{output_filename}"
output_data = json.dumps(result, indent=2).encode('utf-8')
elif transformation_type == 'excel_to_json':
if file_info['file_type'] not in ['xlsx', 'xls']:
raise ValueError("Excel to JSON conversion only supports Excel files")
result = convert_excel_to_json(file_data)
output_filename = f"{Path(file_info['filename']).stem}.json"
output_object_key = f"transformations/{transformation_id}/{output_filename}"
output_data = json.dumps(result, indent=2).encode('utf-8')
elif transformation_type == 'resize_image':
if not file_info['mime_type'].startswith('image/'):
raise ValueError("Image resize only supports image files")
width = config.get('width', 800)
height = config.get('height', 600)
output_data = resize_image(file_data, width, height)
output_filename = f"{Path(file_info['filename']).stem}_resized.{Path(file_info['filename']).suffix}"
output_object_key = f"transformations/{transformation_id}/{output_filename}"
elif transformation_type == 'convert_image':
if not file_info['mime_type'].startswith('image/'):
raise ValueError("Image conversion only supports image files")
target_format = config.get('format', 'JPEG')
output_data = convert_image_format(file_data, target_format)
output_filename = f"{Path(file_info['filename']).stem}.{target_format.lower()}"
output_object_key = f"transformations/{transformation_id}/{output_filename}"
else:
raise ValueError(f"Unsupported transformation type: {transformation_type}")
# Upload transformed file to MinIO
if output_data:
if not upload_file_to_minio(output_data, output_object_key):
raise Exception("Failed to upload transformed file")
# Update transformation as completed
update_transformation_status(transformation_id, 'completed', {
'output_object_key': output_object_key,
'output_filename': output_filename,
'result': result if isinstance(result, (str, list, dict)) else None
})
# Update file status
conn = get_db_connection()
with conn.cursor() as cur:
cur.execute("""
UPDATE files
SET status = 'transformed', transformation_type = %s, processed_at = %s
WHERE id = %s
""", (transformation_type, datetime.utcnow(), file_id))
conn.commit()
conn.close()
response_data = {
'transformation_id': transformation_id,
'file_id': file_id,
'transformation_type': transformation_type,
'status': 'completed',
'output_object_key': output_object_key,
'output_filename': output_filename,
'completed_at': datetime.utcnow().isoformat()
}
logger.info("Transformation completed",
transformation_id=transformation_id,
file_id=file_id)
return jsonify(response_data), 200
except Exception as e:
error_message = str(e)
logger.error("Transformation failed",
transformation_id=transformation_id,
file_id=file_id,
error=error_message)
# Update transformation as failed
update_transformation_status(transformation_id, 'failed', error_message=error_message)
return jsonify({
'transformation_id': transformation_id,
'file_id': file_id,
'status': 'failed',
'error': error_message
}), 500
except Exception as e:
logger.error("Transform request error", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
@app.route('/transformations/<transformation_id>', methods=['GET'])
def get_transformation_status(transformation_id: str):
"""Get transformation status and details."""
try:
conn = get_db_connection()
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, file_id, transformation_type, input_path, output_path,
status, config, result, error_message, started_at, completed_at, created_at
FROM transformations
WHERE id = %s
""", (transformation_id,))
transformation = cur.fetchone()
if not transformation:
return jsonify({'error': 'Transformation not found'}), 404
return jsonify(dict(transformation)), 200
except Exception as e:
logger.error("Error fetching transformation", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
finally:
conn.close()
@app.route('/transformations/<transformation_id>/retry', methods=['POST'])
def retry_transformation(transformation_id: str):
"""Retry a failed transformation."""
try:
conn = get_db_connection()
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Get transformation details
cur.execute("""
SELECT file_id, transformation_type, config
FROM transformations
WHERE id = %s
""", (transformation_id,))
transformation = cur.fetchone()
if not transformation:
return jsonify({'error': 'Transformation not found'}), 404
if transformation['status'] != 'failed':
return jsonify({'error': 'Only failed transformations can be retried'}), 400
# Reset transformation status
cur.execute("""
UPDATE transformations
SET status = 'pending', started_at = NULL, completed_at = NULL,
error_message = NULL, result = NULL
WHERE id = %s
""", (transformation_id,))
conn.commit()
# Trigger new transformation
transform_data = {
'file_id': transformation['file_id'],
'transformation_type': transformation['transformation_type'],
'config': transformation['config'] or {}
}
# Call transform endpoint internally
with app.test_client() as client:
response = client.post('/transform', json=transform_data)
return response.get_json(), response.status_code
except Exception as e:
logger.error("Error retrying transformation", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
finally:
conn.close()
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False)

View File

@@ -0,0 +1,33 @@
# Core dependencies
flask==2.3.3
gunicorn==21.2.0
python-dotenv==1.0.0
# Database
psycopg2-binary==2.9.7
sqlalchemy==2.0.21
# MinIO/S3
minio==7.1.17
boto3==1.28.44
# File processing
python-magic==0.4.27
Pillow==10.0.1
PyPDF2==3.0.1
python-docx==0.8.11
openpyxl==3.1.2
pandas==2.0.3
numpy==1.24.3
# HTTP requests
requests==2.31.0
# JSON and data handling
pydantic==2.1.1
# Logging
structlog==23.1.0
# Utilities
python-dateutil==2.8.2

View File

@@ -0,0 +1,31 @@
FROM python:3.11-slim
# Set working directory
WORKDIR /app
# Install system dependencies
RUN apt-get update && apt-get install -y \
gcc \
libmagic1 \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements and install Python dependencies
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt
# Copy application code
COPY app.py .
# Create non-root user
RUN useradd --create-home --shell /bin/bash app && chown -R app:app /app
USER app
# Expose port
EXPOSE 5000
# Health check
HEALTHCHECK --interval=30s --timeout=30s --start-period=5s --retries=3 \
CMD curl -f http://localhost:5000/health || exit 1
# Run the application
CMD ["gunicorn", "--bind", "0.0.0.0:5000", "--workers", "4", "--timeout", "300", "app:app"]

287
functions/upload/app.py Normal file
View File

@@ -0,0 +1,287 @@
import os
import uuid
import hashlib
import magic
import logging
from datetime import datetime
from typing import Dict, Any, Optional
from flask import Flask, request, jsonify
from werkzeug.utils import secure_filename
import psycopg2
from psycopg2.extras import RealDictCursor
from minio import Minio
from minio.error import S3Error
import structlog
# Configure structured logging
structlog.configure(
processors=[
structlog.stdlib.filter_by_level,
structlog.stdlib.add_logger_name,
structlog.stdlib.add_log_level,
structlog.stdlib.PositionalArgumentsFormatter(),
structlog.processors.TimeStamper(fmt="iso"),
structlog.processors.StackInfoRenderer(),
structlog.processors.format_exc_info,
structlog.processors.UnicodeDecoder(),
structlog.processors.JSONRenderer()
],
context_class=dict,
logger_factory=structlog.stdlib.LoggerFactory(),
wrapper_class=structlog.stdlib.BoundLogger,
cache_logger_on_first_use=True,
)
logger = structlog.get_logger()
app = Flask(__name__)
# Configuration
MINIO_ENDPOINT = os.getenv('MINIO_ENDPOINT', 'localhost:9000')
MINIO_ACCESS_KEY = os.getenv('MINIO_ACCESS_KEY', 'minioadmin')
MINIO_SECRET_KEY = os.getenv('MINIO_SECRET_KEY', 'minioadmin123')
MINIO_BUCKET_NAME = os.getenv('MINIO_BUCKET_NAME', 'file-transformer-bucket')
MINIO_USE_SSL = os.getenv('MINIO_USE_SSL', 'false').lower() == 'true'
POSTGRES_URL = os.getenv('POSTGRES_URL', 'postgresql://file_user:secure_password_123@localhost:5432/file_transformer')
# Initialize MinIO client
minio_client = Minio(
MINIO_ENDPOINT,
access_key=MINIO_ACCESS_KEY,
secret_key=MINIO_SECRET_KEY,
secure=MINIO_USE_SSL
)
def get_db_connection():
"""Create a database connection."""
return psycopg2.connect(POSTGRES_URL)
def calculate_file_hash(file_data: bytes) -> str:
"""Calculate SHA-256 hash of file data."""
return hashlib.sha256(file_data).hexdigest()
def get_file_metadata(file_data: bytes, filename: str) -> Dict[str, Any]:
"""Extract file metadata including MIME type and size."""
mime_type = magic.from_buffer(file_data, mime=True)
file_size = len(file_data)
# Determine file type from extension
file_extension = filename.rsplit('.', 1)[1].lower() if '.' in filename else ''
return {
'mime_type': mime_type,
'file_size': file_size,
'file_type': file_extension,
'checksum': calculate_file_hash(file_data)
}
def save_file_to_database(file_data: bytes, filename: str, object_key: str, metadata: Dict[str, Any]) -> str:
"""Save file information to PostgreSQL database."""
conn = get_db_connection()
try:
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Insert file record
cur.execute("""
INSERT INTO files (
filename, original_filename, file_path, file_size,
file_type, mime_type, bucket_name, object_key,
checksum, status, created_at
) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
RETURNING id
""", (
filename,
filename,
object_key,
metadata['file_size'],
metadata['file_type'],
metadata['mime_type'],
MINIO_BUCKET_NAME,
object_key,
metadata['checksum'],
'uploaded',
datetime.utcnow()
))
file_id = cur.fetchone()['id']
conn.commit()
return str(file_id)
except Exception as e:
conn.rollback()
logger.error("Database error", error=str(e))
raise
finally:
conn.close()
def upload_to_minio(file_data: bytes, object_key: str) -> bool:
"""Upload file to MinIO bucket."""
try:
# Ensure bucket exists
if not minio_client.bucket_exists(MINIO_BUCKET_NAME):
minio_client.make_bucket(MINIO_BUCKET_NAME)
logger.info("Created bucket", bucket=MINIO_BUCKET_NAME)
# Upload file
minio_client.put_object(
MINIO_BUCKET_NAME,
object_key,
file_data,
length=len(file_data)
)
logger.info("File uploaded to MinIO", bucket=MINIO_BUCKET_NAME, object_key=object_key)
return True
except S3Error as e:
logger.error("MinIO upload error", error=str(e))
return False
@app.route('/health', methods=['GET'])
def health_check():
"""Health check endpoint."""
return jsonify({'status': 'healthy', 'service': 'file-upload'})
@app.route('/upload', methods=['POST'])
def upload_file():
"""Handle file upload request."""
try:
# Check if file is present in request
if 'file' not in request.files:
return jsonify({'error': 'No file provided'}), 400
file = request.files['file']
if file.filename == '':
return jsonify({'error': 'No file selected'}), 400
# Read file data
file_data = file.read()
if not file_data:
return jsonify({'error': 'Empty file'}), 400
# Secure filename and generate object key
filename = secure_filename(file.filename)
file_id = str(uuid.uuid4())
object_key = f"uploads/{file_id}/{filename}"
# Extract metadata
metadata = get_file_metadata(file_data, filename)
logger.info("Processing file upload",
filename=filename,
size=metadata['file_size'],
mime_type=metadata['mime_type'])
# Upload to MinIO
if not upload_to_minio(file_data, object_key):
return jsonify({'error': 'Failed to upload file to storage'}), 500
# Save to database
db_file_id = save_file_to_database(file_data, filename, object_key, metadata)
# Log access
log_file_access(db_file_id, 'upload', request.remote_addr, request.headers.get('User-Agent'))
response_data = {
'file_id': db_file_id,
'filename': filename,
'object_key': object_key,
'file_size': metadata['file_size'],
'mime_type': metadata['mime_type'],
'checksum': metadata['checksum'],
'status': 'uploaded',
'uploaded_at': datetime.utcnow().isoformat()
}
logger.info("File upload completed", file_id=db_file_id, filename=filename)
return jsonify(response_data), 201
except Exception as e:
logger.error("Upload error", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
def log_file_access(file_id: str, action: str, ip_address: str, user_agent: Optional[str]):
"""Log file access for audit purposes."""
conn = get_db_connection()
try:
with conn.cursor() as cur:
cur.execute("""
INSERT INTO file_access_logs (file_id, action, ip_address, user_agent)
VALUES (%s, %s, %s, %s)
""", (file_id, action, ip_address, user_agent))
conn.commit()
except Exception as e:
logger.error("Failed to log file access", error=str(e))
conn.rollback()
finally:
conn.close()
@app.route('/files/<file_id>', methods=['GET'])
def get_file_info(file_id: str):
"""Get file information by ID."""
try:
conn = get_db_connection()
with conn.cursor(cursor_factory=RealDictCursor) as cur:
cur.execute("""
SELECT id, filename, original_filename, file_size, file_type,
mime_type, bucket_name, object_key, checksum, status,
created_at, updated_at
FROM files
WHERE id = %s
""", (file_id,))
file_record = cur.fetchone()
if not file_record:
return jsonify({'error': 'File not found'}), 404
return jsonify(dict(file_record)), 200
except Exception as e:
logger.error("Error fetching file info", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
finally:
conn.close()
@app.route('/files/<file_id>', methods=['DELETE'])
def delete_file(file_id: str):
"""Delete file from storage and database."""
try:
conn = get_db_connection()
with conn.cursor(cursor_factory=RealDictCursor) as cur:
# Get file info
cur.execute("SELECT object_key FROM files WHERE id = %s", (file_id,))
file_record = cur.fetchone()
if not file_record:
return jsonify({'error': 'File not found'}), 404
object_key = file_record['object_key']
# Delete from MinIO
try:
minio_client.remove_object(MINIO_BUCKET_NAME, object_key)
logger.info("File deleted from MinIO", object_key=object_key)
except S3Error as e:
logger.warning("File not found in MinIO", object_key=object_key, error=str(e))
# Mark as deleted in database
cur.execute("""
UPDATE files
SET status = 'deleted', deleted_at = %s
WHERE id = %s
""", (datetime.utcnow(), file_id))
conn.commit()
# Log access
log_file_access(file_id, 'delete', request.remote_addr, request.headers.get('User-Agent'))
return jsonify({'message': 'File deleted successfully'}), 200
except Exception as e:
logger.error("Error deleting file", error=str(e))
return jsonify({'error': 'Internal server error'}), 500
finally:
conn.close()
if __name__ == '__main__':
app.run(host='0.0.0.0', port=5000, debug=False)

View File

@@ -0,0 +1,27 @@
# Core dependencies
flask==2.3.3
gunicorn==21.2.0
python-dotenv==1.0.0
# Database
psycopg2-binary==2.9.7
sqlalchemy==2.0.21
# MinIO/S3
minio==7.1.17
boto3==1.28.44
# File processing
python-magic==0.4.27
# HTTP requests
requests==2.31.0
# JSON and data handling
pydantic==2.1.1
# Logging
structlog==23.1.0
# Utilities
python-dateutil==2.8.2

82
k8s/dashboard.yaml Normal file
View File

@@ -0,0 +1,82 @@
apiVersion: apps/v1
kind: Deployment
metadata:
name: file-transformer-dashboard
namespace: file-transformer
labels:
app: file-transformer-dashboard
spec:
replicas: 2
selector:
matchLabels:
app: file-transformer-dashboard
template:
metadata:
labels:
app: file-transformer-dashboard
spec:
containers:
- name: dashboard
image: file-transformer/dashboard:latest
ports:
- containerPort: 3000
env:
- name: REACT_APP_API_BASE_URL
value: "http://api-gateway.file-transformer.svc.cluster.local:8080"
- name: REACT_APP_MINIO_ENDPOINT
value: "http://minio.file-transformer.svc.cluster.local:9000"
- name: REACT_APP_MINIO_CONSOLE
value: "http://minio.file-transformer.svc.cluster.local:9001"
resources:
requests:
memory: "128Mi"
cpu: "100m"
limits:
memory: "256Mi"
cpu: "200m"
livenessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /
port: 3000
initialDelaySeconds: 5
periodSeconds: 5
---
apiVersion: v1
kind: Service
metadata:
name: file-transformer-dashboard
namespace: file-transformer
spec:
selector:
app: file-transformer-dashboard
ports:
- port: 80
targetPort: 3000
type: ClusterIP
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: file-transformer-dashboard-ingress
namespace: file-transformer
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/ssl-redirect: "false"
spec:
rules:
- host: dashboard.file-transformer.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: file-transformer-dashboard
port:
number: 80

View File

@@ -0,0 +1,85 @@
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: function-download
namespace: file-transformer
labels:
app: file-transformer
function: download
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "0"
autoscaling.knative.dev/maxScale: "10"
autoscaling.knative.dev/target: "1"
spec:
containerConcurrency: 10
timeoutSeconds: 300
containers:
- image: function-download:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 5000
env:
- name: MINIO_ENDPOINT
value: "minio-service:9000"
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: access-key
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: secret-key
- name: MINIO_BUCKET_NAME
value: "file-transformer-bucket"
- name: MINIO_USE_SSL
value: "false"
- name: POSTGRES_URL
valueFrom:
secretKeyRef:
name: postgres-secret
key: database-url
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
readinessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 15
periodSeconds: 30
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: function-download-ingress
namespace: file-transformer
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/ssl-redirect: "false"
spec:
rules:
- host: download.file-transformer.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: function-download
port:
number: 80

View File

@@ -0,0 +1,85 @@
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: function-metadata
namespace: file-transformer
labels:
app: file-transformer
function: metadata
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "0"
autoscaling.knative.dev/maxScale: "10"
autoscaling.knative.dev/target: "1"
spec:
containerConcurrency: 10
timeoutSeconds: 300
containers:
- image: function-metadata:latest
imagePullPolicy: IfNotPresent
ports:
- containerPort: 5000
env:
- name: MINIO_ENDPOINT
value: "minio-service:9000"
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: access-key
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: secret-key
- name: MINIO_BUCKET_NAME
value: "file-transformer-bucket"
- name: MINIO_USE_SSL
value: "false"
- name: POSTGRES_URL
valueFrom:
secretKeyRef:
name: postgres-secret
key: database-url
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
readinessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 5
periodSeconds: 10
livenessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 15
periodSeconds: 30
---
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
name: function-metadata-ingress
namespace: file-transformer
annotations:
nginx.ingress.kubernetes.io/rewrite-target: /
nginx.ingress.kubernetes.io/ssl-redirect: "false"
spec:
rules:
- host: metadata.file-transformer.local
http:
paths:
- path: /
pathType: Prefix
backend:
service:
name: function-metadata
port:
number: 80

View File

@@ -0,0 +1,63 @@
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: file-transform-service
namespace: file-transformer
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "0"
autoscaling.knative.dev/maxScale: "10"
autoscaling.knative.dev/target: "1"
spec:
containerConcurrency: 5
timeoutSeconds: 600
containers:
- image: file-transformer/transform-function:latest
ports:
- containerPort: 5000
env:
- name: MINIO_ENDPOINT
value: "minio.file-transformer.svc.cluster.local:9000"
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: accesskey
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: secretkey
- name: MINIO_BUCKET_NAME
valueFrom:
configMapKeyRef:
name: minio-config
key: MINIO_BUCKET_NAME
- name: POSTGRES_URL
value: "postgresql://file_user:$(POSTGRES_PASSWORD)@postgres.file-transformer.svc.cluster.local:5432/file_transformer"
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-secret
key: postgres-password
resources:
requests:
memory: "512Mi"
cpu: "500m"
limits:
memory: "1Gi"
cpu: "1000m"
livenessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 5
periodSeconds: 5

View File

@@ -0,0 +1,63 @@
apiVersion: serving.knative.dev/v1
kind: Service
metadata:
name: file-upload-service
namespace: file-transformer
spec:
template:
metadata:
annotations:
autoscaling.knative.dev/minScale: "0"
autoscaling.knative.dev/maxScale: "10"
autoscaling.knative.dev/target: "1"
spec:
containerConcurrency: 10
timeoutSeconds: 300
containers:
- image: file-transformer/upload-function:latest
ports:
- containerPort: 5000
env:
- name: MINIO_ENDPOINT
value: "minio.file-transformer.svc.cluster.local:9000"
- name: MINIO_ACCESS_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: accesskey
- name: MINIO_SECRET_KEY
valueFrom:
secretKeyRef:
name: minio-secret
key: secretkey
- name: MINIO_BUCKET_NAME
valueFrom:
configMapKeyRef:
name: minio-config
key: MINIO_BUCKET_NAME
- name: POSTGRES_URL
value: "postgresql://file_user:$(POSTGRES_PASSWORD)@postgres.file-transformer.svc.cluster.local:5432/file_transformer"
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-secret
key: postgres-password
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
livenessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 30
periodSeconds: 10
readinessProbe:
httpGet:
path: /health
port: 5000
initialDelaySeconds: 5
periodSeconds: 5

112
k8s/minio.yaml Normal file
View File

@@ -0,0 +1,112 @@
apiVersion: v1
kind: Secret
metadata:
name: minio-secret
namespace: file-transformer
type: Opaque
data:
accesskey: bWluaW9hZG1pbg== # minioadmin
secretkey: bWluaW9hZG1pbjEyMw== # minioadmin123
---
apiVersion: v1
kind: ConfigMap
metadata:
name: minio-config
namespace: file-transformer
data:
MINIO_BUCKET_NAME: file-transformer-bucket
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: minio
namespace: file-transformer
labels:
app: minio
spec:
replicas: 1
selector:
matchLabels:
app: minio
template:
metadata:
labels:
app: minio
spec:
containers:
- name: minio
image: minio/minio:latest
ports:
- containerPort: 9000
name: api
- containerPort: 9001
name: console
env:
- name: MINIO_ROOT_USER
valueFrom:
secretKeyRef:
name: minio-secret
key: accesskey
- name: MINIO_ROOT_PASSWORD
valueFrom:
secretKeyRef:
name: minio-secret
key: secretkey
- name: MINIO_BUCKET_NAME
valueFrom:
configMapKeyRef:
name: minio-config
key: MINIO_BUCKET_NAME
command:
- /bin/bash
- -c
- |
minio server /data --console-address ":9001" &
sleep 10
mc alias set local http://localhost:9000 $MINIO_ROOT_USER $MINIO_ROOT_PASSWORD
mc mb local/$MINIO_BUCKET_NAME --ignore-existing
mc policy set public local/$MINIO_BUCKET_NAME
wait
volumeMounts:
- name: minio-storage
mountPath: /data
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
volumes:
- name: minio-storage
persistentVolumeClaim:
claimName: minio-pvc
---
apiVersion: v1
kind: Service
metadata:
name: minio
namespace: file-transformer
spec:
selector:
app: minio
ports:
- name: api
port: 9000
targetPort: 9000
- name: console
port: 9001
targetPort: 9001
type: ClusterIP
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: minio-pvc
namespace: file-transformer
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 20Gi

7
k8s/namespace.yaml Normal file
View File

@@ -0,0 +1,7 @@
apiVersion: v1
kind: Namespace
metadata:
name: file-transformer
labels:
name: file-transformer
app: file-transformer-s3

100
k8s/postgres.yaml Normal file
View File

@@ -0,0 +1,100 @@
apiVersion: v1
kind: Secret
metadata:
name: postgres-secret
namespace: file-transformer
type: Opaque
data:
postgres-password: c2VjdXJlX3Bhc3N3b3JkXzEyMw== # secure_password_123
---
apiVersion: v1
kind: ConfigMap
metadata:
name: postgres-config
namespace: file-transformer
data:
POSTGRES_DB: file_transformer
POSTGRES_USER: file_user
---
apiVersion: apps/v1
kind: Deployment
metadata:
name: postgres
namespace: file-transformer
labels:
app: postgres
spec:
replicas: 1
selector:
matchLabels:
app: postgres
template:
metadata:
labels:
app: postgres
spec:
containers:
- name: postgres
image: postgres:15-alpine
ports:
- containerPort: 5432
env:
- name: POSTGRES_DB
valueFrom:
configMapKeyRef:
name: postgres-config
key: POSTGRES_DB
- name: POSTGRES_USER
valueFrom:
configMapKeyRef:
name: postgres-config
key: POSTGRES_USER
- name: POSTGRES_PASSWORD
valueFrom:
secretKeyRef:
name: postgres-secret
key: postgres-password
volumeMounts:
- name: postgres-storage
mountPath: /var/lib/postgresql/data
- name: postgres-init
mountPath: /docker-entrypoint-initdb.d
resources:
requests:
memory: "256Mi"
cpu: "250m"
limits:
memory: "512Mi"
cpu: "500m"
volumes:
- name: postgres-storage
persistentVolumeClaim:
claimName: postgres-pvc
- name: postgres-init
configMap:
name: postgres-init-script
---
apiVersion: v1
kind: Service
metadata:
name: postgres
namespace: file-transformer
spec:
selector:
app: postgres
ports:
- port: 5432
targetPort: 5432
type: ClusterIP
---
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: postgres-pvc
namespace: file-transformer
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 10Gi

114
setup.sh Executable file
View File

@@ -0,0 +1,114 @@
#!/bin/bash
# File Transformer S3 Setup Script
# This script helps set up the development environment
set -e
echo "🚀 Setting up File Transformer S3..."
# Check if running on Arch Linux
if command -v pacman &> /dev/null; then
echo "📦 Detected Arch Linux - installing dependencies with pacman..."
# Check if Node.js is installed
if ! command -v node &> /dev/null; then
echo "Installing Node.js and npm..."
sudo pacman -S --noconfirm nodejs npm
else
echo "✅ Node.js already installed"
fi
# Check if Python3 is installed
if ! command -v python3 &> /dev/null; then
echo "Installing Python3..."
sudo pacman -S --noconfirm python python-pip
else
echo "✅ Python3 already installed"
fi
# Check if Docker is installed
if ! command -v docker &> /dev/null; then
echo "Installing Docker..."
sudo pacman -S --noconfirm docker docker-compose
sudo systemctl enable docker
sudo systemctl start docker
sudo usermod -aG docker $USER
echo "⚠️ Please log out and back in for Docker group changes to take effect"
else
echo "✅ Docker already installed"
fi
# Check if running on Ubuntu/Debian
elif command -v apt &> /dev/null; then
echo "📦 Detected Ubuntu/Debian - installing dependencies with apt..."
# Check if Node.js is installed
if ! command -v node &> /dev/null; then
echo "Installing Node.js and npm..."
curl -fsSL https://deb.nodesource.com/setup_lts.x | sudo -E bash -
sudo apt-get install -y nodejs
else
echo "✅ Node.js already installed"
fi
# Check if Python3 is installed
if ! command -v python3 &> /dev/null; then
echo "Installing Python3..."
sudo apt-get update
sudo apt-get install -y python3 python3-pip
else
echo "✅ Python3 already installed"
fi
# Check if Docker is installed
if ! command -v docker &> /dev/null; then
echo "Installing Docker..."
curl -fsSL https://get.docker.com -o get-docker.sh
sudo sh get-docker.sh
sudo usermod -aG docker $USER
rm get-docker.sh
echo "⚠️ Please log out and back in for Docker group changes to take effect"
else
echo "✅ Docker already installed"
fi
else
echo "⚠️ Unsupported package manager. Please install manually:"
echo " - Node.js and npm"
echo " - Python3 and pip"
echo " - Docker and docker-compose"
fi
# Create .env file if it doesn't exist
if [ ! -f .env ]; then
echo "📝 Creating .env file from template..."
cp env.example .env
echo "✅ Created .env file"
echo "⚠️ Please edit .env with your configuration values"
else
echo "✅ .env file already exists"
fi
# Install Node.js dependencies
echo "📦 Installing Node.js dependencies..."
cd dashboard
npm install
cd ..
# Install Python dependencies
echo "🐍 Installing Python dependencies..."
pip3 install -r functions/requirements.txt
echo "✅ Setup complete!"
echo ""
echo "Next steps:"
echo "1. Edit .env file with your configuration"
echo "2. Run 'make deploy-local' to start the system"
echo "3. Or run 'make dev-dashboard' for development mode"
echo ""
echo "Available commands:"
echo " make help - Show all available commands"
echo " make deploy-local - Deploy with Docker Compose"
echo " make dev-dashboard - Start dashboard in dev mode"
echo " make build-functions - Build function containers"