Upload checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins

Browse files

Files changed (1) hide show

checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260125_192135-checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log +56 -56

checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260125_192135-checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log CHANGED Viewed

@@ -168,6 +168,13 @@ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_count
   fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
   fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
 ce_avg: 0.5390675663948059, mse_avg: 0.0
 wandb: Detected [huggingface_hub.inference] in use.
 wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
 wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
@@ -1195,13 +1202,13 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
 [[34m2026-01-25 20:15:54[39m] (step=0001014) Train Loss mse: 0.0000, Train Loss ce: 0.5307, Train Steps/Sec: 0.38,
 [[34m2026-01-25 20:15:57[39m] (step=0001015) Train Loss mse: 0.0000, Train Loss ce: 0.5537, Train Steps/Sec: 0.29,
 [[34m2026-01-25 20:16:01[39m] (step=0001016) Train Loss mse: 0.0000, Train Loss ce: 0.5289, Train Steps/Sec: 0.28,
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1000
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.6019229888916016, mse_avg: 0.0
 base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1500
 Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
 [eval debug] first 3 batch fingerprints:
@@ -1216,13 +1223,6 @@ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_count
   fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
   fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
 ce_avg: 0.8126255869865417, mse_avg: 0.0
-[[34m2026-01-25 20:16:04[39m] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.5496, Train Steps/Sec: 0.30,
-[[34m2026-01-25 20:16:06[39m] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.5517, Train Steps/Sec: 0.39,
-[[34m2026-01-25 20:16:08[39m] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.52,
-[[34m2026-01-25 20:16:12[39m] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.5413, Train Steps/Sec: 0.27,
-[[34m2026-01-25 20:16:15[39m] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.5106, Train Steps/Sec: 0.33,
-[[34m2026-01-25 20:16:19[39m] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.5708, Train Steps/Sec: 0.28,
-[[34m2026-01-25 20:16:21[39m] (step=0001023) Train Loss mse: 0.0000, Train Loss ce: 0.5335, Train Steps/Sec: 0.38,
 [[34m2026-01-25 20:16:24[39m] (step=0001024) Train Loss mse: 0.0000, Train Loss ce: 0.5224, Train Steps/Sec: 0.45,
 [[34m2026-01-25 20:16:28[39m] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.5477, Train Steps/Sec: 0.25,
 [[34m2026-01-25 20:16:31[39m] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.5332, Train Steps/Sec: 0.33,
@@ -2616,20 +2616,6 @@ ce_avg: 0.8126255869865417, mse_avg: 0.0
 [[34m2026-01-25 21:22:01[39m] (step=0002414) Train Loss mse: 0.0000, Train Loss ce: 0.4751, Train Steps/Sec: 0.32,
 [[34m2026-01-25 21:22:06[39m] (step=0002415) Train Loss mse: 0.0000, Train Loss ce: 0.5217, Train Steps/Sec: 0.23,
 [[34m2026-01-25 21:22:09[39m] (step=0002416) Train Loss mse: 0.0000, Train Loss ce: 0.5465, Train Steps/Sec: 0.33,
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step2500
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.9854414463043213, mse_avg: 0.0
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3000
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.9968664646148682, mse_avg: 0.0
 [[34m2026-01-25 21:22:11[39m] (step=0002417) Train Loss mse: 0.0000, Train Loss ce: 0.5468, Train Steps/Sec: 0.35,
 [[34m2026-01-25 21:22:13[39m] (step=0002418) Train Loss mse: 0.0000, Train Loss ce: 0.4715, Train Steps/Sec: 0.50,
 [[34m2026-01-25 21:22:16[39m] (step=0002419) Train Loss mse: 0.0000, Train Loss ce: 0.4833, Train Steps/Sec: 0.45,
@@ -2646,6 +2632,27 @@ ce_avg: 0.9968664646148682, mse_avg: 0.0
 [[34m2026-01-25 21:22:46[39m] (step=0002430) Train Loss mse: 0.0000, Train Loss ce: 0.5360, Train Steps/Sec: 0.26,
 [[34m2026-01-25 21:22:49[39m] (step=0002431) Train Loss mse: 0.0000, Train Loss ce: 0.4942, Train Steps/Sec: 0.29,
 [[34m2026-01-25 21:22:52[39m] (step=0002432) Train Loss mse: 0.0000, Train Loss ce: 0.5290, Train Steps/Sec: 0.43,
 [[34m2026-01-25 21:22:54[39m] (step=0002433) Train Loss mse: 0.0000, Train Loss ce: 0.5440, Train Steps/Sec: 0.43,
 [[34m2026-01-25 21:22:58[39m] (step=0002434) Train Loss mse: 0.0000, Train Loss ce: 0.5570, Train Steps/Sec: 0.29,
 [[34m2026-01-25 21:23:00[39m] (step=0002435) Train Loss mse: 0.0000, Train Loss ce: 0.4797, Train Steps/Sec: 0.43,
@@ -3627,27 +3634,6 @@ ce_avg: 0.9968664646148682, mse_avg: 0.0
 [[34m2026-01-25 22:09:11[39m] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.4740, Train Steps/Sec: 0.36,
 [[34m2026-01-25 22:09:14[39m] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.5099, Train Steps/Sec: 0.34,
 [[34m2026-01-25 22:09:17[39m] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.5017, Train Steps/Sec: 0.33,
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3500
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.9615826606750488, mse_avg: 0.0
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4000
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.8829993009567261, mse_avg: 0.0
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4500
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.8653228282928467, mse_avg: 0.0
 [[34m2026-01-25 22:09:20[39m] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.4866, Train Steps/Sec: 0.31,
 [[34m2026-01-25 22:09:23[39m] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.4945, Train Steps/Sec: 0.35,
 [[34m2026-01-25 22:09:26[39m] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.4950, Train Steps/Sec: 0.40,
@@ -3669,6 +3655,20 @@ ce_avg: 0.8653228282928467, mse_avg: 0.0
 [[34m2026-01-25 22:10:14[39m] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.5193, Train Steps/Sec: 0.29,
 [[34m2026-01-25 22:10:18[39m] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.5097, Train Steps/Sec: 0.26,
 [[34m2026-01-25 22:10:21[39m] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.4901, Train Steps/Sec: 0.36,
 [[34m2026-01-25 22:10:23[39m] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.4939, Train Steps/Sec: 0.43,
 [[34m2026-01-25 22:10:27[39m] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.5293, Train Steps/Sec: 0.32,
 [[34m2026-01-25 22:10:29[39m] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.5026, Train Steps/Sec: 0.34,
@@ -5162,13 +5162,6 @@ ce_avg: 0.8653228282928467, mse_avg: 0.0
 [[34m2026-01-25 23:21:40[39m] (step=0004925) Train Loss mse: 0.0000, Train Loss ce: 0.4804, Train Steps/Sec: 0.41,
 [[34m2026-01-25 23:21:43[39m] (step=0004926) Train Loss mse: 0.0000, Train Loss ce: 0.4675, Train Steps/Sec: 0.45,
 [[34m2026-01-25 23:21:45[39m] (step=0004927) Train Loss mse: 0.0000, Train Loss ce: 0.4538, Train Steps/Sec: 0.50,
-base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step5000
-Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
-[eval debug] first 3 batch fingerprints:
-  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
-ce_avg: 0.847433865070343, mse_avg: 0.0
 [[34m2026-01-25 23:21:47[39m] (step=0004928) Train Loss mse: 0.0000, Train Loss ce: 0.4813, Train Steps/Sec: 0.40,
 [[34m2026-01-25 23:21:51[39m] (step=0004929) Train Loss mse: 0.0000, Train Loss ce: 0.5108, Train Steps/Sec: 0.28,
 [[34m2026-01-25 23:21:54[39m] (step=0004930) Train Loss mse: 0.0000, Train Loss ce: 0.5030, Train Steps/Sec: 0.32,
@@ -5223,6 +5216,13 @@ ce_avg: 0.847433865070343, mse_avg: 0.0
 [[34m2026-01-25 23:24:13[39m] (step=0004979) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.40,
 [[34m2026-01-25 23:24:16[39m] (step=0004980) Train Loss mse: 0.0000, Train Loss ce: 0.5237, Train Steps/Sec: 0.34,
 [[34m2026-01-25 23:24:19[39m] (step=0004981) Train Loss mse: 0.0000, Train Loss ce: 0.4824, Train Steps/Sec: 0.44,
 [[34m2026-01-25 23:24:22[39m] (step=0004982) Train Loss mse: 0.0000, Train Loss ce: 0.5167, Train Steps/Sec: 0.26,
 [[34m2026-01-25 23:24:25[39m] (step=0004983) Train Loss mse: 0.0000, Train Loss ce: 0.4746, Train Steps/Sec: 0.40,
 [[34m2026-01-25 23:24:27[39m] (step=0004984) Train Loss mse: 0.0000, Train Loss ce: 0.5120, Train Steps/Sec: 0.38,

   fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
   fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
 ce_avg: 0.5390675663948059, mse_avg: 0.0
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1000
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.6019229888916016, mse_avg: 0.0
 wandb: Detected [huggingface_hub.inference] in use.
 wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
 wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
 [[34m2026-01-25 20:15:54[39m] (step=0001014) Train Loss mse: 0.0000, Train Loss ce: 0.5307, Train Steps/Sec: 0.38,
 [[34m2026-01-25 20:15:57[39m] (step=0001015) Train Loss mse: 0.0000, Train Loss ce: 0.5537, Train Steps/Sec: 0.29,
 [[34m2026-01-25 20:16:01[39m] (step=0001016) Train Loss mse: 0.0000, Train Loss ce: 0.5289, Train Steps/Sec: 0.28,
+[[34m2026-01-25 20:16:04[39m] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.5496, Train Steps/Sec: 0.30,
+[[34m2026-01-25 20:16:06[39m] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.5517, Train Steps/Sec: 0.39,
+[[34m2026-01-25 20:16:08[39m] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.52,
+[[34m2026-01-25 20:16:12[39m] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.5413, Train Steps/Sec: 0.27,
+[[34m2026-01-25 20:16:15[39m] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.5106, Train Steps/Sec: 0.33,
+[[34m2026-01-25 20:16:19[39m] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.5708, Train Steps/Sec: 0.28,
+[[34m2026-01-25 20:16:21[39m] (step=0001023) Train Loss mse: 0.0000, Train Loss ce: 0.5335, Train Steps/Sec: 0.38,
 base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1500
 Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
 [eval debug] first 3 batch fingerprints:
   fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
   fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
 ce_avg: 0.8126255869865417, mse_avg: 0.0
 [[34m2026-01-25 20:16:24[39m] (step=0001024) Train Loss mse: 0.0000, Train Loss ce: 0.5224, Train Steps/Sec: 0.45,
 [[34m2026-01-25 20:16:28[39m] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.5477, Train Steps/Sec: 0.25,
 [[34m2026-01-25 20:16:31[39m] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.5332, Train Steps/Sec: 0.33,
 [[34m2026-01-25 21:22:01[39m] (step=0002414) Train Loss mse: 0.0000, Train Loss ce: 0.4751, Train Steps/Sec: 0.32,
 [[34m2026-01-25 21:22:06[39m] (step=0002415) Train Loss mse: 0.0000, Train Loss ce: 0.5217, Train Steps/Sec: 0.23,
 [[34m2026-01-25 21:22:09[39m] (step=0002416) Train Loss mse: 0.0000, Train Loss ce: 0.5465, Train Steps/Sec: 0.33,
 [[34m2026-01-25 21:22:11[39m] (step=0002417) Train Loss mse: 0.0000, Train Loss ce: 0.5468, Train Steps/Sec: 0.35,
 [[34m2026-01-25 21:22:13[39m] (step=0002418) Train Loss mse: 0.0000, Train Loss ce: 0.4715, Train Steps/Sec: 0.50,
 [[34m2026-01-25 21:22:16[39m] (step=0002419) Train Loss mse: 0.0000, Train Loss ce: 0.4833, Train Steps/Sec: 0.45,
 [[34m2026-01-25 21:22:46[39m] (step=0002430) Train Loss mse: 0.0000, Train Loss ce: 0.5360, Train Steps/Sec: 0.26,
 [[34m2026-01-25 21:22:49[39m] (step=0002431) Train Loss mse: 0.0000, Train Loss ce: 0.4942, Train Steps/Sec: 0.29,
 [[34m2026-01-25 21:22:52[39m] (step=0002432) Train Loss mse: 0.0000, Train Loss ce: 0.5290, Train Steps/Sec: 0.43,
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step2500
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.9854414463043213, mse_avg: 0.0
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3000
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.9968664646148682, mse_avg: 0.0
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3500
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.9615826606750488, mse_avg: 0.0
 [[34m2026-01-25 21:22:54[39m] (step=0002433) Train Loss mse: 0.0000, Train Loss ce: 0.5440, Train Steps/Sec: 0.43,
 [[34m2026-01-25 21:22:58[39m] (step=0002434) Train Loss mse: 0.0000, Train Loss ce: 0.5570, Train Steps/Sec: 0.29,
 [[34m2026-01-25 21:23:00[39m] (step=0002435) Train Loss mse: 0.0000, Train Loss ce: 0.4797, Train Steps/Sec: 0.43,
 [[34m2026-01-25 22:09:11[39m] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.4740, Train Steps/Sec: 0.36,
 [[34m2026-01-25 22:09:14[39m] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.5099, Train Steps/Sec: 0.34,
 [[34m2026-01-25 22:09:17[39m] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.5017, Train Steps/Sec: 0.33,
 [[34m2026-01-25 22:09:20[39m] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.4866, Train Steps/Sec: 0.31,
 [[34m2026-01-25 22:09:23[39m] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.4945, Train Steps/Sec: 0.35,
 [[34m2026-01-25 22:09:26[39m] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.4950, Train Steps/Sec: 0.40,
 [[34m2026-01-25 22:10:14[39m] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.5193, Train Steps/Sec: 0.29,
 [[34m2026-01-25 22:10:18[39m] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.5097, Train Steps/Sec: 0.26,
 [[34m2026-01-25 22:10:21[39m] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.4901, Train Steps/Sec: 0.36,
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4000
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.8829993009567261, mse_avg: 0.0
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4500
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.8653228282928467, mse_avg: 0.0
 [[34m2026-01-25 22:10:23[39m] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.4939, Train Steps/Sec: 0.43,
 [[34m2026-01-25 22:10:27[39m] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.5293, Train Steps/Sec: 0.32,
 [[34m2026-01-25 22:10:29[39m] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.5026, Train Steps/Sec: 0.34,
 [[34m2026-01-25 23:21:40[39m] (step=0004925) Train Loss mse: 0.0000, Train Loss ce: 0.4804, Train Steps/Sec: 0.41,
 [[34m2026-01-25 23:21:43[39m] (step=0004926) Train Loss mse: 0.0000, Train Loss ce: 0.4675, Train Steps/Sec: 0.45,
 [[34m2026-01-25 23:21:45[39m] (step=0004927) Train Loss mse: 0.0000, Train Loss ce: 0.4538, Train Steps/Sec: 0.50,
 [[34m2026-01-25 23:21:47[39m] (step=0004928) Train Loss mse: 0.0000, Train Loss ce: 0.4813, Train Steps/Sec: 0.40,
 [[34m2026-01-25 23:21:51[39m] (step=0004929) Train Loss mse: 0.0000, Train Loss ce: 0.5108, Train Steps/Sec: 0.28,
 [[34m2026-01-25 23:21:54[39m] (step=0004930) Train Loss mse: 0.0000, Train Loss ce: 0.5030, Train Steps/Sec: 0.32,
 [[34m2026-01-25 23:24:13[39m] (step=0004979) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.40,
 [[34m2026-01-25 23:24:16[39m] (step=0004980) Train Loss mse: 0.0000, Train Loss ce: 0.5237, Train Steps/Sec: 0.34,
 [[34m2026-01-25 23:24:19[39m] (step=0004981) Train Loss mse: 0.0000, Train Loss ce: 0.4824, Train Steps/Sec: 0.44,
+base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step5000
+Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
+[eval debug] first 3 batch fingerprints:
+  fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+  fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
+ce_avg: 0.847433865070343, mse_avg: 0.0
 [[34m2026-01-25 23:24:22[39m] (step=0004982) Train Loss mse: 0.0000, Train Loss ce: 0.5167, Train Steps/Sec: 0.26,
 [[34m2026-01-25 23:24:25[39m] (step=0004983) Train Loss mse: 0.0000, Train Loss ce: 0.4746, Train Steps/Sec: 0.40,
 [[34m2026-01-25 23:24:27[39m] (step=0004984) Train Loss mse: 0.0000, Train Loss ce: 0.5120, Train Steps/Sec: 0.38,