From 222be0fb68e62dc26bb9d47755a9860ba16dac5d Mon Sep 17 00:00:00 2001 From: Greg Hendrickson Date: Tue, 3 Feb 2026 00:23:16 +0000 Subject: [PATCH] Use Tailscale endpoints, add RunPod Docker build files --- components/runpod_trainer/Dockerfile | 13 +- components/runpod_trainer/requirements.txt | 9 + ddi_data_prep.yaml | 265 +++++++++++++++++++++ pipelines/ddi_data_prep.py | 210 ++++++++++++++++ pipelines/ddi_training_runpod.py | 4 +- 5 files changed, 491 insertions(+), 10 deletions(-) create mode 100644 components/runpod_trainer/requirements.txt create mode 100644 ddi_data_prep.yaml create mode 100644 pipelines/ddi_data_prep.py diff --git a/components/runpod_trainer/Dockerfile b/components/runpod_trainer/Dockerfile index 3994840..19e37bf 100644 --- a/components/runpod_trainer/Dockerfile +++ b/components/runpod_trainer/Dockerfile @@ -2,20 +2,17 @@ FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04 WORKDIR /app +# Copy requirements first for better caching +COPY requirements.txt /app/requirements.txt + # Install dependencies -RUN pip install --no-cache-dir \ - runpod \ - transformers \ - datasets \ - accelerate \ - boto3 \ - scikit-learn \ - scipy +RUN pip install --no-cache-dir -r requirements.txt # Copy handler COPY handler.py /app/handler.py # Set environment variables ENV PYTHONUNBUFFERED=1 +ENV HF_HOME=/tmp/huggingface CMD ["python", "-u", "handler.py"] diff --git a/components/runpod_trainer/requirements.txt b/components/runpod_trainer/requirements.txt new file mode 100644 index 0000000..90f528e --- /dev/null +++ b/components/runpod_trainer/requirements.txt @@ -0,0 +1,9 @@ +runpod>=1.6.0 +transformers>=4.36.0 +datasets>=2.16.0 +accelerate>=0.25.0 +boto3>=1.34.0 +scikit-learn>=1.3.0 +scipy>=1.11.0 +torch>=2.1.0 +safetensors>=0.4.0 diff --git a/ddi_data_prep.yaml b/ddi_data_prep.yaml new file mode 100644 index 0000000..5a0bd5f --- /dev/null +++ b/ddi_data_prep.yaml @@ -0,0 +1,265 @@ +# PIPELINE DEFINITION +# Name: ddi-data-preparation +# Description: Prepare DDI training data and configuration +# Inputs: +# epochs: int [Default: 3.0] +# learning_rate: float [Default: 2e-05] +# minio_endpoint: str [Default: 'http://minio.minio.svc.cluster.local:9000'] +# model_name: str [Default: 'emilyalsentzer/Bio_ClinicalBERT'] +components: + comp-create-ddi-dataset: + executorLabel: exec-create-ddi-dataset + inputDefinitions: + parameters: + minio_access_key: + parameterType: STRING + minio_endpoint: + parameterType: STRING + minio_secret_key: + parameterType: STRING + output_path: + defaultValue: ddi_train.json + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING + comp-create-training-config: + executorLabel: exec-create-training-config + inputDefinitions: + parameters: + batch_size: + defaultValue: 16.0 + isOptional: true + parameterType: NUMBER_INTEGER + dataset_path: + parameterType: STRING + epochs: + defaultValue: 3.0 + isOptional: true + parameterType: NUMBER_INTEGER + learning_rate: + defaultValue: 2.0e-05 + isOptional: true + parameterType: NUMBER_DOUBLE + minio_access_key: + parameterType: STRING + minio_endpoint: + parameterType: STRING + minio_secret_key: + parameterType: STRING + model_name: + defaultValue: emilyalsentzer/Bio_ClinicalBERT + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + Output: + parameterType: STRING +deploymentSpec: + executors: + exec-create-ddi-dataset: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - create_ddi_dataset + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'boto3' 'requests'\ + \ && python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef create_ddi_dataset(\n minio_endpoint: str,\n minio_access_key:\ + \ str,\n minio_secret_key: str,\n output_path: str = \"ddi_train.json\"\ + \n) -> str:\n \"\"\"Create DDI training dataset and upload to MinIO.\"\ + \"\"\n import json\n import boto3\n\n # DDI training data (drug\ + \ pairs with interaction severity)\n # Labels: 0=none, 1=minor, 2=moderate,\ + \ 3=major, 4=contraindicated\n training_data = [\n # Major interactions\n\ + \ {\"text\": \"Patient taking warfarin and aspirin together\", \"\ + label\": 3},\n {\"text\": \"Concurrent use of simvastatin and amiodarone\"\ + , \"label\": 3},\n {\"text\": \"Methotrexate and NSAIDs used together\"\ + , \"label\": 3},\n {\"text\": \"Ciprofloxacin and theophylline interaction\"\ + , \"label\": 3},\n {\"text\": \"Digoxin and amiodarone combination\ + \ therapy\", \"label\": 3},\n {\"text\": \"Lithium and ACE inhibitors\ + \ together\", \"label\": 3},\n\n # Contraindicated\n {\"text\"\ + : \"Fluoxetine and tramadol co-administration\", \"label\": 4},\n \ + \ {\"text\": \"SSRIs with MAO inhibitors\", \"label\": 4},\n {\"\ + text\": \"Benzodiazepines with opioids\", \"label\": 4},\n {\"text\"\ + : \"Metronidazole and alcohol consumption\", \"label\": 4},\n {\"\ + text\": \"Linezolid with serotonergic drugs\", \"label\": 4},\n\n \ + \ # Moderate\n {\"text\": \"Patient prescribed omeprazole with clopidogrel\"\ + , \"label\": 2},\n {\"text\": \"Atorvastatin given with diltiazem\"\ + , \"label\": 2},\n {\"text\": \"ACE inhibitor with potassium supplement\"\ + , \"label\": 2},\n {\"text\": \"Metformin with contrast dye procedures\"\ + , \"label\": 2},\n\n # Minor\n {\"text\": \"Levothyroxine\ + \ taken with calcium supplements\", \"label\": 1},\n {\"text\": \"\ + Antacids with oral antibiotics timing\", \"label\": 1},\n {\"text\"\ + : \"Iron supplements with dairy products\", \"label\": 1},\n\n #\ + \ No interaction\n {\"text\": \"Metformin administered with lisinopril\"\ + , \"label\": 0},\n {\"text\": \"Amlodipine with metoprolol combination\"\ + , \"label\": 0},\n {\"text\": \"Omeprazole and acetaminophen together\"\ + , \"label\": 0},\n {\"text\": \"Vitamin D with calcium supplements\"\ + , \"label\": 0},\n ]\n\n # Upload to MinIO\n s3 = boto3.client(\n\ + \ 's3',\n endpoint_url=minio_endpoint,\n aws_access_key_id=minio_access_key,\n\ + \ aws_secret_access_key=minio_secret_key,\n region_name='us-east-1'\n\ + \ )\n\n data_json = json.dumps(training_data, indent=2)\n s3.put_object(\n\ + \ Bucket='datasets',\n Key=output_path,\n Body=data_json.encode('utf-8'),\n\ + \ ContentType='application/json'\n )\n\n print(f\"\u2705 Uploaded\ + \ {len(training_data)} samples to datasets/{output_path}\")\n print(f\"\ + \ - Contraindicated: {sum(1 for d in training_data if d['label'] == 4)}\"\ + )\n print(f\" - Major: {sum(1 for d in training_data if d['label']\ + \ == 3)}\")\n print(f\" - Moderate: {sum(1 for d in training_data if\ + \ d['label'] == 2)}\")\n print(f\" - Minor: {sum(1 for d in training_data\ + \ if d['label'] == 1)}\")\n print(f\" - None: {sum(1 for d in training_data\ + \ if d['label'] == 0)}\")\n\n return f\"s3://datasets/{output_path}\"\ + \n\n" + image: python:3.11-slim + exec-create-training-config: + container: + args: + - --executor_input + - '{{$}}' + - --function_to_execute + - create_training_config + command: + - sh + - -c + - "\nif ! [ -x \"$(command -v pip)\" ]; then\n python3 -m ensurepip ||\ + \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\ + \ python3 -m pip install --quiet --no-warn-script-location 'boto3' && \ + \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\ + \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\ + $0\" \"$@\"\n" + - sh + - -ec + - 'program_path=$(mktemp -d) + + + printf "%s" "$0" > "$program_path/ephemeral_component.py" + + _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main --component_module_path "$program_path/ephemeral_component.py" "$@" + + ' + - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\ + \ *\n\ndef create_training_config(\n minio_endpoint: str,\n minio_access_key:\ + \ str,\n minio_secret_key: str,\n dataset_path: str,\n model_name:\ + \ str = \"emilyalsentzer/Bio_ClinicalBERT\",\n epochs: int = 3,\n \ + \ learning_rate: float = 2e-5,\n batch_size: int = 16\n) -> str:\n \ + \ \"\"\"Create training configuration file.\"\"\"\n import json\n \ + \ import boto3\n from datetime import datetime\n\n config = {\n \ + \ \"created_at\": datetime.utcnow().isoformat(),\n \"dataset\"\ + : {\n \"path\": dataset_path,\n \"format\": \"json\"\ + ,\n \"text_field\": \"text\",\n \"label_field\": \"\ + label\"\n },\n \"model\": {\n \"base_model\": model_name,\n\ + \ \"num_labels\": 5,\n \"label_names\": [\"none\"\ + , \"minor\", \"moderate\", \"major\", \"contraindicated\"]\n },\n\ + \ \"training\": {\n \"epochs\": epochs,\n \"\ + learning_rate\": learning_rate,\n \"batch_size\": batch_size,\n\ + \ \"warmup_steps\": 100,\n \"weight_decay\": 0.01,\n\ + \ \"fp16\": True,\n \"evaluation_strategy\": \"epoch\"\ + ,\n \"save_strategy\": \"epoch\"\n },\n \"output\"\ + : {\n \"model_path\": \"models/ddi-detector\",\n \"\ + metrics_path\": \"models/ddi-detector/metrics.json\"\n }\n }\n\ + \n s3 = boto3.client(\n 's3',\n endpoint_url=minio_endpoint,\n\ + \ aws_access_key_id=minio_access_key,\n aws_secret_access_key=minio_secret_key,\n\ + \ region_name='us-east-1'\n )\n\n config_json = json.dumps(config,\ + \ indent=2)\n config_path = \"configs/ddi_training_config.json\"\n\n\ + \ s3.put_object(\n Bucket='training-data',\n Key=config_path,\n\ + \ Body=config_json.encode('utf-8'),\n ContentType='application/json'\n\ + \ )\n\n print(f\"\u2705 Training config saved to training-data/{config_path}\"\ + )\n print(f\" Model: {model_name}\")\n print(f\" Epochs: {epochs}\"\ + )\n print(f\" Learning rate: {learning_rate}\")\n\n return f\"s3://training-data/{config_path}\"\ + \n\n" + image: python:3.11-slim +pipelineInfo: + description: Prepare DDI training data and configuration + name: ddi-data-preparation +root: + dag: + tasks: + create-ddi-dataset: + cachingOptions: + enableCache: true + componentRef: + name: comp-create-ddi-dataset + inputs: + parameters: + minio_access_key: + runtimeValue: + constant: minioadmin + minio_endpoint: + componentInputParameter: minio_endpoint + minio_secret_key: + runtimeValue: + constant: minioadmin123! + output_path: + runtimeValue: + constant: ddi_train.json + taskInfo: + name: create-ddi-dataset + create-training-config: + cachingOptions: + enableCache: true + componentRef: + name: comp-create-training-config + dependentTasks: + - create-ddi-dataset + inputs: + parameters: + dataset_path: + taskOutputParameter: + outputParameterKey: Output + producerTask: create-ddi-dataset + epochs: + componentInputParameter: epochs + learning_rate: + componentInputParameter: learning_rate + minio_access_key: + runtimeValue: + constant: minioadmin + minio_endpoint: + componentInputParameter: minio_endpoint + minio_secret_key: + runtimeValue: + constant: minioadmin123! + model_name: + componentInputParameter: model_name + taskInfo: + name: create-training-config + inputDefinitions: + parameters: + epochs: + defaultValue: 3.0 + isOptional: true + parameterType: NUMBER_INTEGER + learning_rate: + defaultValue: 2.0e-05 + isOptional: true + parameterType: NUMBER_DOUBLE + minio_endpoint: + defaultValue: http://minio.minio.svc.cluster.local:9000 + isOptional: true + parameterType: STRING + model_name: + defaultValue: emilyalsentzer/Bio_ClinicalBERT + isOptional: true + parameterType: STRING +schemaVersion: 2.1.0 +sdkVersion: kfp-2.15.2 diff --git a/pipelines/ddi_data_prep.py b/pipelines/ddi_data_prep.py new file mode 100644 index 0000000..01fa5c8 --- /dev/null +++ b/pipelines/ddi_data_prep.py @@ -0,0 +1,210 @@ +""" +DDI Data Preparation Pipeline + +Prepares training data for DDI detection model. +Training can be triggered manually on RunPod or any GPU environment. +""" +from kfp import dsl +from kfp import compiler + + +@dsl.component( + base_image="python:3.11-slim", + packages_to_install=["boto3", "requests"] +) +def create_ddi_dataset( + minio_endpoint: str, + minio_access_key: str, + minio_secret_key: str, + output_path: str = "ddi_train.json" +) -> str: + """Create DDI training dataset and upload to MinIO.""" + import json + import boto3 + + # DDI training data (drug pairs with interaction severity) + # Labels: 0=none, 1=minor, 2=moderate, 3=major, 4=contraindicated + training_data = [ + # Major interactions + {"text": "Patient taking warfarin and aspirin together", "label": 3}, + {"text": "Concurrent use of simvastatin and amiodarone", "label": 3}, + {"text": "Methotrexate and NSAIDs used together", "label": 3}, + {"text": "Ciprofloxacin and theophylline interaction", "label": 3}, + {"text": "Digoxin and amiodarone combination therapy", "label": 3}, + {"text": "Lithium and ACE inhibitors together", "label": 3}, + + # Contraindicated + {"text": "Fluoxetine and tramadol co-administration", "label": 4}, + {"text": "SSRIs with MAO inhibitors", "label": 4}, + {"text": "Benzodiazepines with opioids", "label": 4}, + {"text": "Metronidazole and alcohol consumption", "label": 4}, + {"text": "Linezolid with serotonergic drugs", "label": 4}, + + # Moderate + {"text": "Patient prescribed omeprazole with clopidogrel", "label": 2}, + {"text": "Atorvastatin given with diltiazem", "label": 2}, + {"text": "ACE inhibitor with potassium supplement", "label": 2}, + {"text": "Metformin with contrast dye procedures", "label": 2}, + + # Minor + {"text": "Levothyroxine taken with calcium supplements", "label": 1}, + {"text": "Antacids with oral antibiotics timing", "label": 1}, + {"text": "Iron supplements with dairy products", "label": 1}, + + # No interaction + {"text": "Metformin administered with lisinopril", "label": 0}, + {"text": "Amlodipine with metoprolol combination", "label": 0}, + {"text": "Omeprazole and acetaminophen together", "label": 0}, + {"text": "Vitamin D with calcium supplements", "label": 0}, + ] + + # Upload to MinIO + s3 = boto3.client( + 's3', + endpoint_url=minio_endpoint, + aws_access_key_id=minio_access_key, + aws_secret_access_key=minio_secret_key, + region_name='us-east-1' + ) + + data_json = json.dumps(training_data, indent=2) + s3.put_object( + Bucket='datasets', + Key=output_path, + Body=data_json.encode('utf-8'), + ContentType='application/json' + ) + + print(f"✅ Uploaded {len(training_data)} samples to datasets/{output_path}") + print(f" - Contraindicated: {sum(1 for d in training_data if d['label'] == 4)}") + print(f" - Major: {sum(1 for d in training_data if d['label'] == 3)}") + print(f" - Moderate: {sum(1 for d in training_data if d['label'] == 2)}") + print(f" - Minor: {sum(1 for d in training_data if d['label'] == 1)}") + print(f" - None: {sum(1 for d in training_data if d['label'] == 0)}") + + return f"s3://datasets/{output_path}" + + +@dsl.component( + base_image="python:3.11-slim", + packages_to_install=["boto3"] +) +def create_training_config( + minio_endpoint: str, + minio_access_key: str, + minio_secret_key: str, + dataset_path: str, + model_name: str = "emilyalsentzer/Bio_ClinicalBERT", + epochs: int = 3, + learning_rate: float = 2e-5, + batch_size: int = 16 +) -> str: + """Create training configuration file.""" + import json + import boto3 + from datetime import datetime + + config = { + "created_at": datetime.utcnow().isoformat(), + "dataset": { + "path": dataset_path, + "format": "json", + "text_field": "text", + "label_field": "label" + }, + "model": { + "base_model": model_name, + "num_labels": 5, + "label_names": ["none", "minor", "moderate", "major", "contraindicated"] + }, + "training": { + "epochs": epochs, + "learning_rate": learning_rate, + "batch_size": batch_size, + "warmup_steps": 100, + "weight_decay": 0.01, + "fp16": True, + "evaluation_strategy": "epoch", + "save_strategy": "epoch" + }, + "output": { + "model_path": "models/ddi-detector", + "metrics_path": "models/ddi-detector/metrics.json" + } + } + + s3 = boto3.client( + 's3', + endpoint_url=minio_endpoint, + aws_access_key_id=minio_access_key, + aws_secret_access_key=minio_secret_key, + region_name='us-east-1' + ) + + config_json = json.dumps(config, indent=2) + config_path = "configs/ddi_training_config.json" + + s3.put_object( + Bucket='training-data', + Key=config_path, + Body=config_json.encode('utf-8'), + ContentType='application/json' + ) + + print(f"✅ Training config saved to training-data/{config_path}") + print(f" Model: {model_name}") + print(f" Epochs: {epochs}") + print(f" Learning rate: {learning_rate}") + + return f"s3://training-data/{config_path}" + + +@dsl.pipeline( + name="ddi-data-preparation", + description="Prepare DDI training data and configuration" +) +def ddi_data_prep_pipeline( + model_name: str = "emilyalsentzer/Bio_ClinicalBERT", + epochs: int = 3, + learning_rate: float = 2e-5, + minio_endpoint: str = "http://minio.minio.svc.cluster.local:9000", +): + """ + Data preparation pipeline: + 1. Create DDI training dataset + 2. Generate training configuration + + After this completes, run training manually on RunPod: + ``` + python train.py --config s3://training-data/configs/ddi_training_config.json + ``` + """ + minio_access_key = "minioadmin" + minio_secret_key = "minioadmin123!" + + # Create dataset + dataset_task = create_ddi_dataset( + minio_endpoint=minio_endpoint, + minio_access_key=minio_access_key, + minio_secret_key=minio_secret_key, + output_path="ddi_train.json" + ) + + # Create config + config_task = create_training_config( + minio_endpoint=minio_endpoint, + minio_access_key=minio_access_key, + minio_secret_key=minio_secret_key, + dataset_path=dataset_task.output, + model_name=model_name, + epochs=epochs, + learning_rate=learning_rate + ) + + +if __name__ == "__main__": + compiler.Compiler().compile( + pipeline_func=ddi_data_prep_pipeline, + package_path="ddi_data_prep.yaml" + ) + print("Pipeline compiled to ddi_data_prep.yaml") diff --git a/pipelines/ddi_training_runpod.py b/pipelines/ddi_training_runpod.py index 8535705..fa41acd 100644 --- a/pipelines/ddi_training_runpod.py +++ b/pipelines/ddi_training_runpod.py @@ -198,8 +198,8 @@ def ddi_training_pipeline( learning_rate: float = 2e-5, model_version: str = "v1", - # MinIO settings - use internal cluster service URL - minio_endpoint: str = "http://minio.minio.svc.cluster.local:9000", + # MinIO settings - use Tailscale endpoint + minio_endpoint: str = "https://minio.walleye-frog.ts.net", ): """ Full DDI training pipeline: