mirror of
https://github.com/ghndrx/kubeflow-pipelines.git
synced 2026-02-10 06:45:13 +00:00
- Downloaded 191K DDI pairs from TDC DrugBank - Fetched 1,634 drug names from PubChem API (96% hit rate) - Created complete training dataset with: - Real drug names (not just IDs) - 86 interaction type descriptions - Severity labels (minor/moderate/major/contraindicated) - Bundled 34MB data file in Docker image - Handler loads real data instead of curated samples
21 lines
493 B
Docker
21 lines
493 B
Docker
FROM runpod/pytorch:2.4.0-py3.11-cuda12.4.1-devel-ubuntu22.04
|
|
|
|
WORKDIR /app
|
|
|
|
# Copy requirements first for better caching
|
|
COPY requirements.txt /app/requirements.txt
|
|
|
|
# Install dependencies
|
|
RUN pip install --no-cache-dir -r requirements.txt
|
|
|
|
# Copy handler and data
|
|
COPY handler.py /app/handler.py
|
|
COPY data/ /app/data/
|
|
|
|
# Set environment variables
|
|
ENV PYTHONUNBUFFERED=1
|
|
ENV HF_HOME=/tmp/huggingface
|
|
ENV DDI_DATA_PATH=/app/data/drugbank_ddi_complete.jsonl
|
|
|
|
CMD ["python", "-u", "handler.py"]
|