gpu-goblin / tests /fixtures /sample_train.yaml
bharathtelu's picture
Deploy auto-tune UI + scripts (work-from-91d0cf0)
a9aa4ae verified
Raw
History Blame Contribute Delete
825 Bytes
# YAML training config — same schema as JSON, exercised separately so we know
# yaml.safe_load + dict extraction agree with the JSON path.
model_name_or_path: Qwen/Qwen2.5-7B-Instruct
per_device_train_batch_size: 2
gradient_accumulation_steps: 16
max_seq_length: 8192
learning_rate: 1.0e-4
warmup_steps: 50
bf16: true
optim: adamw_torch
gradient_checkpointing: true
torch_compile: false
dataloader_num_workers: 8
dataloader_pin_memory: true
dataloader_prefetch_factor: 2
dataloader_persistent_workers: true
attn_implementation: sdpa
num_train_epochs: 1
output_dir: ./out
# Secrets that should be scrubbed:
hub_token: "hf_yamlsamplehfabcdefghijklmnopqrs1"
auth_header: "Bearer eyJ.payload.signaturetoken"
data_path: "/home/teamuser/datasets/alpaca-cleaned"
env_vars:
HSA_FORCE_FINE_GRAIN_PCIE: "1"
MIOPEN_FIND_MODE: "3"