Junyi42 commited on
Commit
049acc1
·
verified ·
1 Parent(s): 9da4252

Upload checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins

Browse files
checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260125_192135-checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log CHANGED
@@ -168,6 +168,13 @@ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_count
168
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
169
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
170
  ce_avg: 0.5390675663948059, mse_avg: 0.0
 
 
 
 
 
 
 
171
  wandb: Detected [huggingface_hub.inference] in use.
172
  wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
173
  wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
@@ -1195,13 +1202,13 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
1195
  [2026-01-25 20:15:54] (step=0001014) Train Loss mse: 0.0000, Train Loss ce: 0.5307, Train Steps/Sec: 0.38,
1196
  [2026-01-25 20:15:57] (step=0001015) Train Loss mse: 0.0000, Train Loss ce: 0.5537, Train Steps/Sec: 0.29,
1197
  [2026-01-25 20:16:01] (step=0001016) Train Loss mse: 0.0000, Train Loss ce: 0.5289, Train Steps/Sec: 0.28,
1198
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1000
1199
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
1200
- [eval debug] first 3 batch fingerprints:
1201
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1202
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1203
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1204
- ce_avg: 0.6019229888916016, mse_avg: 0.0
1205
  base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1500
1206
  Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
1207
  [eval debug] first 3 batch fingerprints:
@@ -1216,13 +1223,6 @@ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_count
1216
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1217
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1218
  ce_avg: 0.8126255869865417, mse_avg: 0.0
1219
- [2026-01-25 20:16:04] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.5496, Train Steps/Sec: 0.30,
1220
- [2026-01-25 20:16:06] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.5517, Train Steps/Sec: 0.39,
1221
- [2026-01-25 20:16:08] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.52,
1222
- [2026-01-25 20:16:12] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.5413, Train Steps/Sec: 0.27,
1223
- [2026-01-25 20:16:15] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.5106, Train Steps/Sec: 0.33,
1224
- [2026-01-25 20:16:19] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.5708, Train Steps/Sec: 0.28,
1225
- [2026-01-25 20:16:21] (step=0001023) Train Loss mse: 0.0000, Train Loss ce: 0.5335, Train Steps/Sec: 0.38,
1226
  [2026-01-25 20:16:24] (step=0001024) Train Loss mse: 0.0000, Train Loss ce: 0.5224, Train Steps/Sec: 0.45,
1227
  [2026-01-25 20:16:28] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.5477, Train Steps/Sec: 0.25,
1228
  [2026-01-25 20:16:31] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.5332, Train Steps/Sec: 0.33,
@@ -2616,20 +2616,6 @@ ce_avg: 0.8126255869865417, mse_avg: 0.0
2616
  [2026-01-25 21:22:01] (step=0002414) Train Loss mse: 0.0000, Train Loss ce: 0.4751, Train Steps/Sec: 0.32,
2617
  [2026-01-25 21:22:06] (step=0002415) Train Loss mse: 0.0000, Train Loss ce: 0.5217, Train Steps/Sec: 0.23,
2618
  [2026-01-25 21:22:09] (step=0002416) Train Loss mse: 0.0000, Train Loss ce: 0.5465, Train Steps/Sec: 0.33,
2619
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step2500
2620
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
2621
- [eval debug] first 3 batch fingerprints:
2622
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2623
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2624
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2625
- ce_avg: 0.9854414463043213, mse_avg: 0.0
2626
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3000
2627
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
2628
- [eval debug] first 3 batch fingerprints:
2629
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2630
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2631
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2632
- ce_avg: 0.9968664646148682, mse_avg: 0.0
2633
  [2026-01-25 21:22:11] (step=0002417) Train Loss mse: 0.0000, Train Loss ce: 0.5468, Train Steps/Sec: 0.35,
2634
  [2026-01-25 21:22:13] (step=0002418) Train Loss mse: 0.0000, Train Loss ce: 0.4715, Train Steps/Sec: 0.50,
2635
  [2026-01-25 21:22:16] (step=0002419) Train Loss mse: 0.0000, Train Loss ce: 0.4833, Train Steps/Sec: 0.45,
@@ -2646,6 +2632,27 @@ ce_avg: 0.9968664646148682, mse_avg: 0.0
2646
  [2026-01-25 21:22:46] (step=0002430) Train Loss mse: 0.0000, Train Loss ce: 0.5360, Train Steps/Sec: 0.26,
2647
  [2026-01-25 21:22:49] (step=0002431) Train Loss mse: 0.0000, Train Loss ce: 0.4942, Train Steps/Sec: 0.29,
2648
  [2026-01-25 21:22:52] (step=0002432) Train Loss mse: 0.0000, Train Loss ce: 0.5290, Train Steps/Sec: 0.43,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2649
  [2026-01-25 21:22:54] (step=0002433) Train Loss mse: 0.0000, Train Loss ce: 0.5440, Train Steps/Sec: 0.43,
2650
  [2026-01-25 21:22:58] (step=0002434) Train Loss mse: 0.0000, Train Loss ce: 0.5570, Train Steps/Sec: 0.29,
2651
  [2026-01-25 21:23:00] (step=0002435) Train Loss mse: 0.0000, Train Loss ce: 0.4797, Train Steps/Sec: 0.43,
@@ -3627,27 +3634,6 @@ ce_avg: 0.9968664646148682, mse_avg: 0.0
3627
  [2026-01-25 22:09:11] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.4740, Train Steps/Sec: 0.36,
3628
  [2026-01-25 22:09:14] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.5099, Train Steps/Sec: 0.34,
3629
  [2026-01-25 22:09:17] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.5017, Train Steps/Sec: 0.33,
3630
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3500
3631
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
3632
- [eval debug] first 3 batch fingerprints:
3633
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3634
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3635
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3636
- ce_avg: 0.9615826606750488, mse_avg: 0.0
3637
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4000
3638
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
3639
- [eval debug] first 3 batch fingerprints:
3640
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3641
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3642
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3643
- ce_avg: 0.8829993009567261, mse_avg: 0.0
3644
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4500
3645
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
3646
- [eval debug] first 3 batch fingerprints:
3647
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3648
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3649
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3650
- ce_avg: 0.8653228282928467, mse_avg: 0.0
3651
  [2026-01-25 22:09:20] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.4866, Train Steps/Sec: 0.31,
3652
  [2026-01-25 22:09:23] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.4945, Train Steps/Sec: 0.35,
3653
  [2026-01-25 22:09:26] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.4950, Train Steps/Sec: 0.40,
@@ -3669,6 +3655,20 @@ ce_avg: 0.8653228282928467, mse_avg: 0.0
3669
  [2026-01-25 22:10:14] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.5193, Train Steps/Sec: 0.29,
3670
  [2026-01-25 22:10:18] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.5097, Train Steps/Sec: 0.26,
3671
  [2026-01-25 22:10:21] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.4901, Train Steps/Sec: 0.36,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3672
  [2026-01-25 22:10:23] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.4939, Train Steps/Sec: 0.43,
3673
  [2026-01-25 22:10:27] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.5293, Train Steps/Sec: 0.32,
3674
  [2026-01-25 22:10:29] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.5026, Train Steps/Sec: 0.34,
@@ -5162,13 +5162,6 @@ ce_avg: 0.8653228282928467, mse_avg: 0.0
5162
  [2026-01-25 23:21:40] (step=0004925) Train Loss mse: 0.0000, Train Loss ce: 0.4804, Train Steps/Sec: 0.41,
5163
  [2026-01-25 23:21:43] (step=0004926) Train Loss mse: 0.0000, Train Loss ce: 0.4675, Train Steps/Sec: 0.45,
5164
  [2026-01-25 23:21:45] (step=0004927) Train Loss mse: 0.0000, Train Loss ce: 0.4538, Train Steps/Sec: 0.50,
5165
- base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step5000
5166
- Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
5167
- [eval debug] first 3 batch fingerprints:
5168
- fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
5169
- fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
5170
- fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
5171
- ce_avg: 0.847433865070343, mse_avg: 0.0
5172
  [2026-01-25 23:21:47] (step=0004928) Train Loss mse: 0.0000, Train Loss ce: 0.4813, Train Steps/Sec: 0.40,
5173
  [2026-01-25 23:21:51] (step=0004929) Train Loss mse: 0.0000, Train Loss ce: 0.5108, Train Steps/Sec: 0.28,
5174
  [2026-01-25 23:21:54] (step=0004930) Train Loss mse: 0.0000, Train Loss ce: 0.5030, Train Steps/Sec: 0.32,
@@ -5223,6 +5216,13 @@ ce_avg: 0.847433865070343, mse_avg: 0.0
5223
  [2026-01-25 23:24:13] (step=0004979) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.40,
5224
  [2026-01-25 23:24:16] (step=0004980) Train Loss mse: 0.0000, Train Loss ce: 0.5237, Train Steps/Sec: 0.34,
5225
  [2026-01-25 23:24:19] (step=0004981) Train Loss mse: 0.0000, Train Loss ce: 0.4824, Train Steps/Sec: 0.44,
 
 
 
 
 
 
 
5226
  [2026-01-25 23:24:22] (step=0004982) Train Loss mse: 0.0000, Train Loss ce: 0.5167, Train Steps/Sec: 0.26,
5227
  [2026-01-25 23:24:25] (step=0004983) Train Loss mse: 0.0000, Train Loss ce: 0.4746, Train Steps/Sec: 0.40,
5228
  [2026-01-25 23:24:27] (step=0004984) Train Loss mse: 0.0000, Train Loss ce: 0.5120, Train Steps/Sec: 0.38,
 
168
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
169
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
170
  ce_avg: 0.5390675663948059, mse_avg: 0.0
171
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1000
172
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
173
+ [eval debug] first 3 batch fingerprints:
174
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
175
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
176
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
177
+ ce_avg: 0.6019229888916016, mse_avg: 0.0
178
  wandb: Detected [huggingface_hub.inference] in use.
179
  wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
180
  wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
 
1202
  [2026-01-25 20:15:54] (step=0001014) Train Loss mse: 0.0000, Train Loss ce: 0.5307, Train Steps/Sec: 0.38,
1203
  [2026-01-25 20:15:57] (step=0001015) Train Loss mse: 0.0000, Train Loss ce: 0.5537, Train Steps/Sec: 0.29,
1204
  [2026-01-25 20:16:01] (step=0001016) Train Loss mse: 0.0000, Train Loss ce: 0.5289, Train Steps/Sec: 0.28,
1205
+ [2026-01-25 20:16:04] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.5496, Train Steps/Sec: 0.30,
1206
+ [2026-01-25 20:16:06] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.5517, Train Steps/Sec: 0.39,
1207
+ [2026-01-25 20:16:08] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.52,
1208
+ [2026-01-25 20:16:12] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.5413, Train Steps/Sec: 0.27,
1209
+ [2026-01-25 20:16:15] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.5106, Train Steps/Sec: 0.33,
1210
+ [2026-01-25 20:16:19] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.5708, Train Steps/Sec: 0.28,
1211
+ [2026-01-25 20:16:21] (step=0001023) Train Loss mse: 0.0000, Train Loss ce: 0.5335, Train Steps/Sec: 0.38,
1212
  base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1500
1213
  Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
1214
  [eval debug] first 3 batch fingerprints:
 
1223
  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1224
  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
1225
  ce_avg: 0.8126255869865417, mse_avg: 0.0
 
 
 
 
 
 
 
1226
  [2026-01-25 20:16:24] (step=0001024) Train Loss mse: 0.0000, Train Loss ce: 0.5224, Train Steps/Sec: 0.45,
1227
  [2026-01-25 20:16:28] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.5477, Train Steps/Sec: 0.25,
1228
  [2026-01-25 20:16:31] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.5332, Train Steps/Sec: 0.33,
 
2616
  [2026-01-25 21:22:01] (step=0002414) Train Loss mse: 0.0000, Train Loss ce: 0.4751, Train Steps/Sec: 0.32,
2617
  [2026-01-25 21:22:06] (step=0002415) Train Loss mse: 0.0000, Train Loss ce: 0.5217, Train Steps/Sec: 0.23,
2618
  [2026-01-25 21:22:09] (step=0002416) Train Loss mse: 0.0000, Train Loss ce: 0.5465, Train Steps/Sec: 0.33,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2619
  [2026-01-25 21:22:11] (step=0002417) Train Loss mse: 0.0000, Train Loss ce: 0.5468, Train Steps/Sec: 0.35,
2620
  [2026-01-25 21:22:13] (step=0002418) Train Loss mse: 0.0000, Train Loss ce: 0.4715, Train Steps/Sec: 0.50,
2621
  [2026-01-25 21:22:16] (step=0002419) Train Loss mse: 0.0000, Train Loss ce: 0.4833, Train Steps/Sec: 0.45,
 
2632
  [2026-01-25 21:22:46] (step=0002430) Train Loss mse: 0.0000, Train Loss ce: 0.5360, Train Steps/Sec: 0.26,
2633
  [2026-01-25 21:22:49] (step=0002431) Train Loss mse: 0.0000, Train Loss ce: 0.4942, Train Steps/Sec: 0.29,
2634
  [2026-01-25 21:22:52] (step=0002432) Train Loss mse: 0.0000, Train Loss ce: 0.5290, Train Steps/Sec: 0.43,
2635
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step2500
2636
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
2637
+ [eval debug] first 3 batch fingerprints:
2638
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2639
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2640
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2641
+ ce_avg: 0.9854414463043213, mse_avg: 0.0
2642
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3000
2643
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
2644
+ [eval debug] first 3 batch fingerprints:
2645
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2646
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2647
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2648
+ ce_avg: 0.9968664646148682, mse_avg: 0.0
2649
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3500
2650
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
2651
+ [eval debug] first 3 batch fingerprints:
2652
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2653
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2654
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
2655
+ ce_avg: 0.9615826606750488, mse_avg: 0.0
2656
  [2026-01-25 21:22:54] (step=0002433) Train Loss mse: 0.0000, Train Loss ce: 0.5440, Train Steps/Sec: 0.43,
2657
  [2026-01-25 21:22:58] (step=0002434) Train Loss mse: 0.0000, Train Loss ce: 0.5570, Train Steps/Sec: 0.29,
2658
  [2026-01-25 21:23:00] (step=0002435) Train Loss mse: 0.0000, Train Loss ce: 0.4797, Train Steps/Sec: 0.43,
 
3634
  [2026-01-25 22:09:11] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.4740, Train Steps/Sec: 0.36,
3635
  [2026-01-25 22:09:14] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.5099, Train Steps/Sec: 0.34,
3636
  [2026-01-25 22:09:17] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.5017, Train Steps/Sec: 0.33,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3637
  [2026-01-25 22:09:20] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.4866, Train Steps/Sec: 0.31,
3638
  [2026-01-25 22:09:23] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.4945, Train Steps/Sec: 0.35,
3639
  [2026-01-25 22:09:26] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.4950, Train Steps/Sec: 0.40,
 
3655
  [2026-01-25 22:10:14] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.5193, Train Steps/Sec: 0.29,
3656
  [2026-01-25 22:10:18] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.5097, Train Steps/Sec: 0.26,
3657
  [2026-01-25 22:10:21] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.4901, Train Steps/Sec: 0.36,
3658
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4000
3659
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
3660
+ [eval debug] first 3 batch fingerprints:
3661
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3662
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3663
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3664
+ ce_avg: 0.8829993009567261, mse_avg: 0.0
3665
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4500
3666
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
3667
+ [eval debug] first 3 batch fingerprints:
3668
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3669
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3670
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
3671
+ ce_avg: 0.8653228282928467, mse_avg: 0.0
3672
  [2026-01-25 22:10:23] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.4939, Train Steps/Sec: 0.43,
3673
  [2026-01-25 22:10:27] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.5293, Train Steps/Sec: 0.32,
3674
  [2026-01-25 22:10:29] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.5026, Train Steps/Sec: 0.34,
 
5162
  [2026-01-25 23:21:40] (step=0004925) Train Loss mse: 0.0000, Train Loss ce: 0.4804, Train Steps/Sec: 0.41,
5163
  [2026-01-25 23:21:43] (step=0004926) Train Loss mse: 0.0000, Train Loss ce: 0.4675, Train Steps/Sec: 0.45,
5164
  [2026-01-25 23:21:45] (step=0004927) Train Loss mse: 0.0000, Train Loss ce: 0.4538, Train Steps/Sec: 0.50,
 
 
 
 
 
 
 
5165
  [2026-01-25 23:21:47] (step=0004928) Train Loss mse: 0.0000, Train Loss ce: 0.4813, Train Steps/Sec: 0.40,
5166
  [2026-01-25 23:21:51] (step=0004929) Train Loss mse: 0.0000, Train Loss ce: 0.5108, Train Steps/Sec: 0.28,
5167
  [2026-01-25 23:21:54] (step=0004930) Train Loss mse: 0.0000, Train Loss ce: 0.5030, Train Steps/Sec: 0.32,
 
5216
  [2026-01-25 23:24:13] (step=0004979) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.40,
5217
  [2026-01-25 23:24:16] (step=0004980) Train Loss mse: 0.0000, Train Loss ce: 0.5237, Train Steps/Sec: 0.34,
5218
  [2026-01-25 23:24:19] (step=0004981) Train Loss mse: 0.0000, Train Loss ce: 0.4824, Train Steps/Sec: 0.44,
5219
+ base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step5000
5220
+ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
5221
+ [eval debug] first 3 batch fingerprints:
5222
+ fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
5223
+ fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
5224
+ fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
5225
+ ce_avg: 0.847433865070343, mse_avg: 0.0
5226
  [2026-01-25 23:24:22] (step=0004982) Train Loss mse: 0.0000, Train Loss ce: 0.5167, Train Steps/Sec: 0.26,
5227
  [2026-01-25 23:24:25] (step=0004983) Train Loss mse: 0.0000, Train Loss ce: 0.4746, Train Steps/Sec: 0.40,
5228
  [2026-01-25 23:24:27] (step=0004984) Train Loss mse: 0.0000, Train Loss ce: 0.5120, Train Steps/Sec: 0.38,