| { | |
| "experiment": "r300_qwen3teacher_hardnegs", | |
| "rank": 300, | |
| "d_model": 300, | |
| "vocab_size": 82741, | |
| "seq_len": 512, | |
| "batch_size": 256, | |
| "total_steps": 150000, | |
| "warmup": 1000, | |
| "lr": 0.0005, | |
| "min_lr": 1e-05, | |
| "weight_decay": 0.01, | |
| "grad_clip": 1.0, | |
| "tau": 0.05, | |
| "lambda_mse": 1.0, | |
| "H_hard_negatives_per_anchor": 64, | |
| "hard_neg_repo": "jsanzolac/qwen3_emb_512_hard_negatives", | |
| "warm_start_repo": "jsanzolac/bpe_glove_300_lora_r300_qwen3", | |
| "warm_start_path": "rank_300/checkpoint_final.pt", | |
| "glove_repo": "jsanzolac/drifting-glove-distilled-r300", | |
| "created_at": "2026-05-21T14:16:30.508447" | |
| } |