From 0f4858d22f72f3cf441ac915ab724d0872a6b087 Mon Sep 17 00:00:00 2001 From: Greg Hendrickson Date: Tue, 3 Feb 2026 02:38:15 +0000 Subject: [PATCH] fix: disable checkpoint saving to avoid tensor contiguity error --- components/runpod_trainer/handler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/components/runpod_trainer/handler.py b/components/runpod_trainer/handler.py index 830b8ce..bbca46c 100644 --- a/components/runpod_trainer/handler.py +++ b/components/runpod_trainer/handler.py @@ -103,7 +103,7 @@ def train_ddi_model(job_input: Dict[str, Any]) -> Dict[str, Any]: warmup_steps=50, weight_decay=0.01, logging_steps=10, - save_strategy='epoch', + save_strategy='no', # Don't save checkpoints (avoids tensor contiguity issues) fp16=torch.cuda.is_available(), report_to='none', )