| { | |
| "variant": 1, | |
| "augmentation": "per_token_ffn", | |
| "source_repo": "jsanzolac/bpe_glove_512_lora_v1", | |
| "source_rank": 512, | |
| "rank": 512, | |
| "d_model": 512, | |
| "vocab_size": 90181, | |
| "seq_len": 512, | |
| "batch_size": 32, | |
| "total_steps": 30000, | |
| "warmup": 500, | |
| "lr": 0.0005, | |
| "min_lr": 1e-05, | |
| "weight_decay": 0.01, | |
| "grad_clip": 1.0, | |
| "tau": 0.05, | |
| "lambda_nce": 1.0, | |
| "lambda_density": 0.1, | |
| "ffn_dropout": 0.1, | |
| "teacher_model": "Qwen/Qwen3-Embedding-8B", | |
| "teacher_max_len": 512, | |
| "glove_repo": "jsanzolac/bpe_glove_512", | |
| "created_at": "2026-05-16T17:00:45.934253" | |
| } |