Use Tailscale endpoints, add RunPod Docker build files

2026-02-10 06:45:13 +00:00 · 2026-02-03 00:23:16 +00:00
parent 07bb8aa6bb
commit 222be0fb68
5 changed files with 491 additions and 10 deletions
--- a/components/runpod_trainer/Dockerfile
+++ b/components/runpod_trainer/Dockerfile
@@ -2,20 +2,17 @@ FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel-ubuntu22.04
 WORKDIR /app
 # Copy requirements first for better caching
 COPY requirements.txt /app/requirements.txt
 # Install dependencies
-RUN pip install --no-cache-dir \
+RUN pip install --no-cache-dir -r requirements.txt
    runpod \
    transformers \
    datasets \
    accelerate \
    boto3 \
    scikit-learn \
    scipy
 # Copy handler
 COPY handler.py /app/handler.py
 # Set environment variables
 ENV PYTHONUNBUFFERED=1
 ENV HF_HOME=/tmp/huggingface
 CMD ["python", "-u", "handler.py"]
--- a/components/runpod_trainer/requirements.txt
+++ b/components/runpod_trainer/requirements.txt
@@ -0,0 +1,9 @@
 runpod>=1.6.0
 transformers>=4.36.0
 datasets>=2.16.0
 accelerate>=0.25.0
 boto3>=1.34.0
 scikit-learn>=1.3.0
 scipy>=1.11.0
 torch>=2.1.0
 safetensors>=0.4.0
--- a/ddi_data_prep.yaml
+++ b/ddi_data_prep.yaml
@@ -0,0 +1,265 @@
 # PIPELINE DEFINITION
 # Name: ddi-data-preparation
 # Description: Prepare DDI training data and configuration
 # Inputs:
 #    epochs: int [Default: 3.0]
 #    learning_rate: float [Default: 2e-05]
 #    minio_endpoint: str [Default: 'http://minio.minio.svc.cluster.local:9000']
 #    model_name: str [Default: 'emilyalsentzer/Bio_ClinicalBERT']
 components:
  comp-create-ddi-dataset:
    executorLabel: exec-create-ddi-dataset
    inputDefinitions:
      parameters:
        minio_access_key:
          parameterType: STRING
        minio_endpoint:
          parameterType: STRING
        minio_secret_key:
          parameterType: STRING
        output_path:
          defaultValue: ddi_train.json
          isOptional: true
          parameterType: STRING
    outputDefinitions:
      parameters:
        Output:
          parameterType: STRING
  comp-create-training-config:
    executorLabel: exec-create-training-config
    inputDefinitions:
      parameters:
        batch_size:
          defaultValue: 16.0
          isOptional: true
          parameterType: NUMBER_INTEGER
        dataset_path:
          parameterType: STRING
        epochs:
          defaultValue: 3.0
          isOptional: true
          parameterType: NUMBER_INTEGER
        learning_rate:
          defaultValue: 2.0e-05
          isOptional: true
          parameterType: NUMBER_DOUBLE
        minio_access_key:
          parameterType: STRING
        minio_endpoint:
          parameterType: STRING
        minio_secret_key:
          parameterType: STRING
        model_name:
          defaultValue: emilyalsentzer/Bio_ClinicalBERT
          isOptional: true
          parameterType: STRING
    outputDefinitions:
      parameters:
        Output:
          parameterType: STRING
 deploymentSpec:
  executors:
    exec-create-ddi-dataset:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - create_ddi_dataset
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'boto3' 'requests'\
          \  &&  python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
          $0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)
          printf "%s" "$0" > "$program_path/ephemeral_component.py"
          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef create_ddi_dataset(\n    minio_endpoint: str,\n    minio_access_key:\
          \ str,\n    minio_secret_key: str,\n    output_path: str = \"ddi_train.json\"\
          \n) -> str:\n    \"\"\"Create DDI training dataset and upload to MinIO.\"\
          \"\"\n    import json\n    import boto3\n\n    # DDI training data (drug\
          \ pairs with interaction severity)\n    # Labels: 0=none, 1=minor, 2=moderate,\
          \ 3=major, 4=contraindicated\n    training_data = [\n        # Major interactions\n\
          \        {\"text\": \"Patient taking warfarin and aspirin together\", \"\
          label\": 3},\n        {\"text\": \"Concurrent use of simvastatin and amiodarone\"\
          , \"label\": 3},\n        {\"text\": \"Methotrexate and NSAIDs used together\"\
          , \"label\": 3},\n        {\"text\": \"Ciprofloxacin and theophylline interaction\"\
          , \"label\": 3},\n        {\"text\": \"Digoxin and amiodarone combination\
          \ therapy\", \"label\": 3},\n        {\"text\": \"Lithium and ACE inhibitors\
          \ together\", \"label\": 3},\n\n        # Contraindicated\n        {\"text\"\
          : \"Fluoxetine and tramadol co-administration\", \"label\": 4},\n      \
          \  {\"text\": \"SSRIs with MAO inhibitors\", \"label\": 4},\n        {\"\
          text\": \"Benzodiazepines with opioids\", \"label\": 4},\n        {\"text\"\
          : \"Metronidazole and alcohol consumption\", \"label\": 4},\n        {\"\
          text\": \"Linezolid with serotonergic drugs\", \"label\": 4},\n\n      \
          \  # Moderate\n        {\"text\": \"Patient prescribed omeprazole with clopidogrel\"\
          , \"label\": 2},\n        {\"text\": \"Atorvastatin given with diltiazem\"\
          , \"label\": 2},\n        {\"text\": \"ACE inhibitor with potassium supplement\"\
          , \"label\": 2},\n        {\"text\": \"Metformin with contrast dye procedures\"\
          , \"label\": 2},\n\n        # Minor\n        {\"text\": \"Levothyroxine\
          \ taken with calcium supplements\", \"label\": 1},\n        {\"text\": \"\
          Antacids with oral antibiotics timing\", \"label\": 1},\n        {\"text\"\
          : \"Iron supplements with dairy products\", \"label\": 1},\n\n        #\
          \ No interaction\n        {\"text\": \"Metformin administered with lisinopril\"\
          , \"label\": 0},\n        {\"text\": \"Amlodipine with metoprolol combination\"\
          , \"label\": 0},\n        {\"text\": \"Omeprazole and acetaminophen together\"\
          , \"label\": 0},\n        {\"text\": \"Vitamin D with calcium supplements\"\
          , \"label\": 0},\n    ]\n\n    # Upload to MinIO\n    s3 = boto3.client(\n\
          \        's3',\n        endpoint_url=minio_endpoint,\n        aws_access_key_id=minio_access_key,\n\
          \        aws_secret_access_key=minio_secret_key,\n        region_name='us-east-1'\n\
          \    )\n\n    data_json = json.dumps(training_data, indent=2)\n    s3.put_object(\n\
          \        Bucket='datasets',\n        Key=output_path,\n        Body=data_json.encode('utf-8'),\n\
          \        ContentType='application/json'\n    )\n\n    print(f\"\u2705 Uploaded\
          \ {len(training_data)} samples to datasets/{output_path}\")\n    print(f\"\
          \   - Contraindicated: {sum(1 for d in training_data if d['label'] == 4)}\"\
          )\n    print(f\"   - Major: {sum(1 for d in training_data if d['label']\
          \ == 3)}\")\n    print(f\"   - Moderate: {sum(1 for d in training_data if\
          \ d['label'] == 2)}\")\n    print(f\"   - Minor: {sum(1 for d in training_data\
          \ if d['label'] == 1)}\")\n    print(f\"   - None: {sum(1 for d in training_data\
          \ if d['label'] == 0)}\")\n\n    return f\"s3://datasets/{output_path}\"\
          \n\n"
        image: python:3.11-slim
    exec-create-training-config:
      container:
        args:
        - --executor_input
        - '{{$}}'
        - --function_to_execute
        - create_training_config
        command:
        - sh
        - -c
        - "\nif ! [ -x \"$(command -v pip)\" ]; then\n    python3 -m ensurepip ||\
          \ python3 -m ensurepip --user || apt-get install python3-pip\nfi\n\nPIP_DISABLE_PIP_VERSION_CHECK=1\
          \ python3 -m pip install --quiet --no-warn-script-location 'boto3'  && \
          \ python3 -m pip install --quiet --no-warn-script-location 'kfp==2.15.2'\
          \ '--no-deps' 'typing-extensions>=3.7.4,<5; python_version<\"3.9\"' && \"\
          $0\" \"$@\"\n"
        - sh
        - -ec
        - 'program_path=$(mktemp -d)
          printf "%s" "$0" > "$program_path/ephemeral_component.py"
          _KFP_RUNTIME=true python3 -m kfp.dsl.executor_main                         --component_module_path                         "$program_path/ephemeral_component.py"                         "$@"
          '
        - "\nimport kfp\nfrom kfp import dsl\nfrom kfp.dsl import *\nfrom typing import\
          \ *\n\ndef create_training_config(\n    minio_endpoint: str,\n    minio_access_key:\
          \ str,\n    minio_secret_key: str,\n    dataset_path: str,\n    model_name:\
          \ str = \"emilyalsentzer/Bio_ClinicalBERT\",\n    epochs: int = 3,\n   \
          \ learning_rate: float = 2e-5,\n    batch_size: int = 16\n) -> str:\n  \
          \  \"\"\"Create training configuration file.\"\"\"\n    import json\n  \
          \  import boto3\n    from datetime import datetime\n\n    config = {\n \
          \       \"created_at\": datetime.utcnow().isoformat(),\n        \"dataset\"\
          : {\n            \"path\": dataset_path,\n            \"format\": \"json\"\
          ,\n            \"text_field\": \"text\",\n            \"label_field\": \"\
          label\"\n        },\n        \"model\": {\n            \"base_model\": model_name,\n\
          \            \"num_labels\": 5,\n            \"label_names\": [\"none\"\
          , \"minor\", \"moderate\", \"major\", \"contraindicated\"]\n        },\n\
          \        \"training\": {\n            \"epochs\": epochs,\n            \"\
          learning_rate\": learning_rate,\n            \"batch_size\": batch_size,\n\
          \            \"warmup_steps\": 100,\n            \"weight_decay\": 0.01,\n\
          \            \"fp16\": True,\n            \"evaluation_strategy\": \"epoch\"\
          ,\n            \"save_strategy\": \"epoch\"\n        },\n        \"output\"\
          : {\n            \"model_path\": \"models/ddi-detector\",\n            \"\
          metrics_path\": \"models/ddi-detector/metrics.json\"\n        }\n    }\n\
          \n    s3 = boto3.client(\n        's3',\n        endpoint_url=minio_endpoint,\n\
          \        aws_access_key_id=minio_access_key,\n        aws_secret_access_key=minio_secret_key,\n\
          \        region_name='us-east-1'\n    )\n\n    config_json = json.dumps(config,\
          \ indent=2)\n    config_path = \"configs/ddi_training_config.json\"\n\n\
          \    s3.put_object(\n        Bucket='training-data',\n        Key=config_path,\n\
          \        Body=config_json.encode('utf-8'),\n        ContentType='application/json'\n\
          \    )\n\n    print(f\"\u2705 Training config saved to training-data/{config_path}\"\
          )\n    print(f\"   Model: {model_name}\")\n    print(f\"   Epochs: {epochs}\"\
          )\n    print(f\"   Learning rate: {learning_rate}\")\n\n    return f\"s3://training-data/{config_path}\"\
          \n\n"
        image: python:3.11-slim
 pipelineInfo:
  description: Prepare DDI training data and configuration
  name: ddi-data-preparation
 root:
  dag:
    tasks:
      create-ddi-dataset:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-create-ddi-dataset
        inputs:
          parameters:
            minio_access_key:
              runtimeValue:
                constant: minioadmin
            minio_endpoint:
              componentInputParameter: minio_endpoint
            minio_secret_key:
              runtimeValue:
                constant: minioadmin123!
            output_path:
              runtimeValue:
                constant: ddi_train.json
        taskInfo:
          name: create-ddi-dataset
      create-training-config:
        cachingOptions:
          enableCache: true
        componentRef:
          name: comp-create-training-config
        dependentTasks:
        - create-ddi-dataset
        inputs:
          parameters:
            dataset_path:
              taskOutputParameter:
                outputParameterKey: Output
                producerTask: create-ddi-dataset
            epochs:
              componentInputParameter: epochs
            learning_rate:
              componentInputParameter: learning_rate
            minio_access_key:
              runtimeValue:
                constant: minioadmin
            minio_endpoint:
              componentInputParameter: minio_endpoint
            minio_secret_key:
              runtimeValue:
                constant: minioadmin123!
            model_name:
              componentInputParameter: model_name
        taskInfo:
          name: create-training-config
  inputDefinitions:
    parameters:
      epochs:
        defaultValue: 3.0
        isOptional: true
        parameterType: NUMBER_INTEGER
      learning_rate:
        defaultValue: 2.0e-05
        isOptional: true
        parameterType: NUMBER_DOUBLE
      minio_endpoint:
        defaultValue: http://minio.minio.svc.cluster.local:9000
        isOptional: true
        parameterType: STRING
      model_name:
        defaultValue: emilyalsentzer/Bio_ClinicalBERT
        isOptional: true
        parameterType: STRING
 schemaVersion: 2.1.0
 sdkVersion: kfp-2.15.2
--- a/pipelines/ddi_data_prep.py
+++ b/pipelines/ddi_data_prep.py
@@ -0,0 +1,210 @@
 """
 DDI Data Preparation Pipeline
 Prepares training data for DDI detection model.
 Training can be triggered manually on RunPod or any GPU environment.
 """
 from kfp import dsl
 from kfp import compiler
@dsl.component(
    base_image="python:3.11-slim",
    packages_to_install=["boto3", "requests"]
 )
 def create_ddi_dataset(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    output_path: str = "ddi_train.json"
 ) -> str:
    """Create DDI training dataset and upload to MinIO."""
    import json
    import boto3
    # DDI training data (drug pairs with interaction severity)
    # Labels: 0=none, 1=minor, 2=moderate, 3=major, 4=contraindicated
    training_data = [
        # Major interactions
        {"text": "Patient taking warfarin and aspirin together", "label": 3},
        {"text": "Concurrent use of simvastatin and amiodarone", "label": 3},
        {"text": "Methotrexate and NSAIDs used together", "label": 3},
        {"text": "Ciprofloxacin and theophylline interaction", "label": 3},
        {"text": "Digoxin and amiodarone combination therapy", "label": 3},
        {"text": "Lithium and ACE inhibitors together", "label": 3},
        # Contraindicated
        {"text": "Fluoxetine and tramadol co-administration", "label": 4},
        {"text": "SSRIs with MAO inhibitors", "label": 4},
        {"text": "Benzodiazepines with opioids", "label": 4},
        {"text": "Metronidazole and alcohol consumption", "label": 4},
        {"text": "Linezolid with serotonergic drugs", "label": 4},
        # Moderate
        {"text": "Patient prescribed omeprazole with clopidogrel", "label": 2},
        {"text": "Atorvastatin given with diltiazem", "label": 2},
        {"text": "ACE inhibitor with potassium supplement", "label": 2},
        {"text": "Metformin with contrast dye procedures", "label": 2},
        # Minor
        {"text": "Levothyroxine taken with calcium supplements", "label": 1},
        {"text": "Antacids with oral antibiotics timing", "label": 1},
        {"text": "Iron supplements with dairy products", "label": 1},
        # No interaction
        {"text": "Metformin administered with lisinopril", "label": 0},
        {"text": "Amlodipine with metoprolol combination", "label": 0},
        {"text": "Omeprazole and acetaminophen together", "label": 0},
        {"text": "Vitamin D with calcium supplements", "label": 0},
    ]
    # Upload to MinIO
    s3 = boto3.client(
        's3',
        endpoint_url=minio_endpoint,
        aws_access_key_id=minio_access_key,
        aws_secret_access_key=minio_secret_key,
        region_name='us-east-1'
    )
    data_json = json.dumps(training_data, indent=2)
    s3.put_object(
        Bucket='datasets',
        Key=output_path,
        Body=data_json.encode('utf-8'),
        ContentType='application/json'
    )
    print(f"✅ Uploaded {len(training_data)} samples to datasets/{output_path}")
    print(f"   - Contraindicated: {sum(1 for d in training_data if d['label'] == 4)}")
    print(f"   - Major: {sum(1 for d in training_data if d['label'] == 3)}")
    print(f"   - Moderate: {sum(1 for d in training_data if d['label'] == 2)}")
    print(f"   - Minor: {sum(1 for d in training_data if d['label'] == 1)}")
    print(f"   - None: {sum(1 for d in training_data if d['label'] == 0)}")
    return f"s3://datasets/{output_path}"
@dsl.component(
    base_image="python:3.11-slim",
    packages_to_install=["boto3"]
 )
 def create_training_config(
    minio_endpoint: str,
    minio_access_key: str,
    minio_secret_key: str,
    dataset_path: str,
    model_name: str = "emilyalsentzer/Bio_ClinicalBERT",
    epochs: int = 3,
    learning_rate: float = 2e-5,
    batch_size: int = 16
 ) -> str:
    """Create training configuration file."""
    import json
    import boto3
    from datetime import datetime
    config = {
        "created_at": datetime.utcnow().isoformat(),
        "dataset": {
            "path": dataset_path,
            "format": "json",
            "text_field": "text",
            "label_field": "label"
        },
        "model": {
            "base_model": model_name,
            "num_labels": 5,
            "label_names": ["none", "minor", "moderate", "major", "contraindicated"]
        },
        "training": {
            "epochs": epochs,
            "learning_rate": learning_rate,
            "batch_size": batch_size,
            "warmup_steps": 100,
            "weight_decay": 0.01,
            "fp16": True,
            "evaluation_strategy": "epoch",
            "save_strategy": "epoch"
        },
        "output": {
            "model_path": "models/ddi-detector",
            "metrics_path": "models/ddi-detector/metrics.json"
        }
    }
    s3 = boto3.client(
        's3',
        endpoint_url=minio_endpoint,
        aws_access_key_id=minio_access_key,
        aws_secret_access_key=minio_secret_key,
        region_name='us-east-1'
    )
    config_json = json.dumps(config, indent=2)
    config_path = "configs/ddi_training_config.json"
    s3.put_object(
        Bucket='training-data',
        Key=config_path,
        Body=config_json.encode('utf-8'),
        ContentType='application/json'
    )
    print(f"✅ Training config saved to training-data/{config_path}")
    print(f"   Model: {model_name}")
    print(f"   Epochs: {epochs}")
    print(f"   Learning rate: {learning_rate}")
    return f"s3://training-data/{config_path}"
@dsl.pipeline(
    name="ddi-data-preparation",
    description="Prepare DDI training data and configuration"
 )
 def ddi_data_prep_pipeline(
    model_name: str = "emilyalsentzer/Bio_ClinicalBERT",
    epochs: int = 3,
    learning_rate: float = 2e-5,
    minio_endpoint: str = "http://minio.minio.svc.cluster.local:9000",
 ):
    """
    Data preparation pipeline:
    1. Create DDI training dataset
    2. Generate training configuration
    After this completes, run training manually on RunPod:
    ```
    python train.py --config s3://training-data/configs/ddi_training_config.json
    ```
    """
    minio_access_key = "minioadmin"
    minio_secret_key = "minioadmin123!"
    # Create dataset
    dataset_task = create_ddi_dataset(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        output_path="ddi_train.json"
    )
    # Create config
    config_task = create_training_config(
        minio_endpoint=minio_endpoint,
        minio_access_key=minio_access_key,
        minio_secret_key=minio_secret_key,
        dataset_path=dataset_task.output,
        model_name=model_name,
        epochs=epochs,
        learning_rate=learning_rate
    )
 if __name__ == "__main__":
    compiler.Compiler().compile(
        pipeline_func=ddi_data_prep_pipeline,
        package_path="ddi_data_prep.yaml"
    )
    print("Pipeline compiled to ddi_data_prep.yaml")
--- a/pipelines/ddi_training_runpod.py
+++ b/pipelines/ddi_training_runpod.py
@@ -198,8 +198,8 @@ def ddi_training_pipeline(
    learning_rate: float = 2e-5,
    model_version: str = "v1",
-    # MinIO settings - use internal cluster service URL
+    # MinIO settings - use Tailscale endpoint
-    minio_endpoint: str = "http://minio.minio.svc.cluster.local:9000",
+    minio_endpoint: str = "https://minio.walleye-frog.ts.net",
 ):
    """
    Full DDI training pipeline: