mirror of
https://github.com/ghndrx/kubeflow-pipelines.git
synced 2026-02-10 06:45:13 +00:00
fix: disable checkpoint saving to avoid tensor contiguity error
This commit is contained in:
@@ -103,7 +103,7 @@ def train_ddi_model(job_input: Dict[str, Any]) -> Dict[str, Any]:
|
|||||||
warmup_steps=50,
|
warmup_steps=50,
|
||||||
weight_decay=0.01,
|
weight_decay=0.01,
|
||||||
logging_steps=10,
|
logging_steps=10,
|
||||||
save_strategy='epoch',
|
save_strategy='no', # Don't save checkpoints (avoids tensor contiguity issues)
|
||||||
fp16=torch.cuda.is_available(),
|
fp16=torch.cuda.is_available(),
|
||||||
report_to='none',
|
report_to='none',
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user