Upload checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins
Browse files
checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/wandb/offline-run-20260125_192135-checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins-run0/files/output.log
CHANGED
|
@@ -168,6 +168,13 @@ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_count
|
|
| 168 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 169 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 170 |
ce_avg: 0.5390675663948059, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 171 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 172 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 173 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
@@ -1195,13 +1202,13 @@ wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
| 1195 |
[[34m2026-01-25 20:15:54[39m] (step=0001014) Train Loss mse: 0.0000, Train Loss ce: 0.5307, Train Steps/Sec: 0.38,
|
| 1196 |
[[34m2026-01-25 20:15:57[39m] (step=0001015) Train Loss mse: 0.0000, Train Loss ce: 0.5537, Train Steps/Sec: 0.29,
|
| 1197 |
[[34m2026-01-25 20:16:01[39m] (step=0001016) Train Loss mse: 0.0000, Train Loss ce: 0.5289, Train Steps/Sec: 0.28,
|
| 1198 |
-
|
| 1199 |
-
|
| 1200 |
-
[
|
| 1201 |
-
|
| 1202 |
-
|
| 1203 |
-
|
| 1204 |
-
|
| 1205 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1206 |
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 1207 |
[eval debug] first 3 batch fingerprints:
|
|
@@ -1216,13 +1223,6 @@ Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_count
|
|
| 1216 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 1217 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 1218 |
ce_avg: 0.8126255869865417, mse_avg: 0.0
|
| 1219 |
-
[[34m2026-01-25 20:16:04[39m] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.5496, Train Steps/Sec: 0.30,
|
| 1220 |
-
[[34m2026-01-25 20:16:06[39m] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.5517, Train Steps/Sec: 0.39,
|
| 1221 |
-
[[34m2026-01-25 20:16:08[39m] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.52,
|
| 1222 |
-
[[34m2026-01-25 20:16:12[39m] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.5413, Train Steps/Sec: 0.27,
|
| 1223 |
-
[[34m2026-01-25 20:16:15[39m] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.5106, Train Steps/Sec: 0.33,
|
| 1224 |
-
[[34m2026-01-25 20:16:19[39m] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.5708, Train Steps/Sec: 0.28,
|
| 1225 |
-
[[34m2026-01-25 20:16:21[39m] (step=0001023) Train Loss mse: 0.0000, Train Loss ce: 0.5335, Train Steps/Sec: 0.38,
|
| 1226 |
[[34m2026-01-25 20:16:24[39m] (step=0001024) Train Loss mse: 0.0000, Train Loss ce: 0.5224, Train Steps/Sec: 0.45,
|
| 1227 |
[[34m2026-01-25 20:16:28[39m] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.5477, Train Steps/Sec: 0.25,
|
| 1228 |
[[34m2026-01-25 20:16:31[39m] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.5332, Train Steps/Sec: 0.33,
|
|
@@ -2616,20 +2616,6 @@ ce_avg: 0.8126255869865417, mse_avg: 0.0
|
|
| 2616 |
[[34m2026-01-25 21:22:01[39m] (step=0002414) Train Loss mse: 0.0000, Train Loss ce: 0.4751, Train Steps/Sec: 0.32,
|
| 2617 |
[[34m2026-01-25 21:22:06[39m] (step=0002415) Train Loss mse: 0.0000, Train Loss ce: 0.5217, Train Steps/Sec: 0.23,
|
| 2618 |
[[34m2026-01-25 21:22:09[39m] (step=0002416) Train Loss mse: 0.0000, Train Loss ce: 0.5465, Train Steps/Sec: 0.33,
|
| 2619 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2620 |
-
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 2621 |
-
[eval debug] first 3 batch fingerprints:
|
| 2622 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2623 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2624 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2625 |
-
ce_avg: 0.9854414463043213, mse_avg: 0.0
|
| 2626 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2627 |
-
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 2628 |
-
[eval debug] first 3 batch fingerprints:
|
| 2629 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2630 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2631 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2632 |
-
ce_avg: 0.9968664646148682, mse_avg: 0.0
|
| 2633 |
[[34m2026-01-25 21:22:11[39m] (step=0002417) Train Loss mse: 0.0000, Train Loss ce: 0.5468, Train Steps/Sec: 0.35,
|
| 2634 |
[[34m2026-01-25 21:22:13[39m] (step=0002418) Train Loss mse: 0.0000, Train Loss ce: 0.4715, Train Steps/Sec: 0.50,
|
| 2635 |
[[34m2026-01-25 21:22:16[39m] (step=0002419) Train Loss mse: 0.0000, Train Loss ce: 0.4833, Train Steps/Sec: 0.45,
|
|
@@ -2646,6 +2632,27 @@ ce_avg: 0.9968664646148682, mse_avg: 0.0
|
|
| 2646 |
[[34m2026-01-25 21:22:46[39m] (step=0002430) Train Loss mse: 0.0000, Train Loss ce: 0.5360, Train Steps/Sec: 0.26,
|
| 2647 |
[[34m2026-01-25 21:22:49[39m] (step=0002431) Train Loss mse: 0.0000, Train Loss ce: 0.4942, Train Steps/Sec: 0.29,
|
| 2648 |
[[34m2026-01-25 21:22:52[39m] (step=0002432) Train Loss mse: 0.0000, Train Loss ce: 0.5290, Train Steps/Sec: 0.43,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2649 |
[[34m2026-01-25 21:22:54[39m] (step=0002433) Train Loss mse: 0.0000, Train Loss ce: 0.5440, Train Steps/Sec: 0.43,
|
| 2650 |
[[34m2026-01-25 21:22:58[39m] (step=0002434) Train Loss mse: 0.0000, Train Loss ce: 0.5570, Train Steps/Sec: 0.29,
|
| 2651 |
[[34m2026-01-25 21:23:00[39m] (step=0002435) Train Loss mse: 0.0000, Train Loss ce: 0.4797, Train Steps/Sec: 0.43,
|
|
@@ -3627,27 +3634,6 @@ ce_avg: 0.9968664646148682, mse_avg: 0.0
|
|
| 3627 |
[[34m2026-01-25 22:09:11[39m] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.4740, Train Steps/Sec: 0.36,
|
| 3628 |
[[34m2026-01-25 22:09:14[39m] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.5099, Train Steps/Sec: 0.34,
|
| 3629 |
[[34m2026-01-25 22:09:17[39m] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.5017, Train Steps/Sec: 0.33,
|
| 3630 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 3631 |
-
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 3632 |
-
[eval debug] first 3 batch fingerprints:
|
| 3633 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3634 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3635 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3636 |
-
ce_avg: 0.9615826606750488, mse_avg: 0.0
|
| 3637 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3638 |
-
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 3639 |
-
[eval debug] first 3 batch fingerprints:
|
| 3640 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3641 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3642 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3643 |
-
ce_avg: 0.8829993009567261, mse_avg: 0.0
|
| 3644 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3645 |
-
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 3646 |
-
[eval debug] first 3 batch fingerprints:
|
| 3647 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3648 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3649 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3650 |
-
ce_avg: 0.8653228282928467, mse_avg: 0.0
|
| 3651 |
[[34m2026-01-25 22:09:20[39m] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.4866, Train Steps/Sec: 0.31,
|
| 3652 |
[[34m2026-01-25 22:09:23[39m] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.4945, Train Steps/Sec: 0.35,
|
| 3653 |
[[34m2026-01-25 22:09:26[39m] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.4950, Train Steps/Sec: 0.40,
|
|
@@ -3669,6 +3655,20 @@ ce_avg: 0.8653228282928467, mse_avg: 0.0
|
|
| 3669 |
[[34m2026-01-25 22:10:14[39m] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.5193, Train Steps/Sec: 0.29,
|
| 3670 |
[[34m2026-01-25 22:10:18[39m] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.5097, Train Steps/Sec: 0.26,
|
| 3671 |
[[34m2026-01-25 22:10:21[39m] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.4901, Train Steps/Sec: 0.36,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3672 |
[[34m2026-01-25 22:10:23[39m] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.4939, Train Steps/Sec: 0.43,
|
| 3673 |
[[34m2026-01-25 22:10:27[39m] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.5293, Train Steps/Sec: 0.32,
|
| 3674 |
[[34m2026-01-25 22:10:29[39m] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.5026, Train Steps/Sec: 0.34,
|
|
@@ -5162,13 +5162,6 @@ ce_avg: 0.8653228282928467, mse_avg: 0.0
|
|
| 5162 |
[[34m2026-01-25 23:21:40[39m] (step=0004925) Train Loss mse: 0.0000, Train Loss ce: 0.4804, Train Steps/Sec: 0.41,
|
| 5163 |
[[34m2026-01-25 23:21:43[39m] (step=0004926) Train Loss mse: 0.0000, Train Loss ce: 0.4675, Train Steps/Sec: 0.45,
|
| 5164 |
[[34m2026-01-25 23:21:45[39m] (step=0004927) Train Loss mse: 0.0000, Train Loss ce: 0.4538, Train Steps/Sec: 0.50,
|
| 5165 |
-
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 5166 |
-
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 5167 |
-
[eval debug] first 3 batch fingerprints:
|
| 5168 |
-
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 5169 |
-
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 5170 |
-
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 5171 |
-
ce_avg: 0.847433865070343, mse_avg: 0.0
|
| 5172 |
[[34m2026-01-25 23:21:47[39m] (step=0004928) Train Loss mse: 0.0000, Train Loss ce: 0.4813, Train Steps/Sec: 0.40,
|
| 5173 |
[[34m2026-01-25 23:21:51[39m] (step=0004929) Train Loss mse: 0.0000, Train Loss ce: 0.5108, Train Steps/Sec: 0.28,
|
| 5174 |
[[34m2026-01-25 23:21:54[39m] (step=0004930) Train Loss mse: 0.0000, Train Loss ce: 0.5030, Train Steps/Sec: 0.32,
|
|
@@ -5223,6 +5216,13 @@ ce_avg: 0.847433865070343, mse_avg: 0.0
|
|
| 5223 |
[[34m2026-01-25 23:24:13[39m] (step=0004979) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.40,
|
| 5224 |
[[34m2026-01-25 23:24:16[39m] (step=0004980) Train Loss mse: 0.0000, Train Loss ce: 0.5237, Train Steps/Sec: 0.34,
|
| 5225 |
[[34m2026-01-25 23:24:19[39m] (step=0004981) Train Loss mse: 0.0000, Train Loss ce: 0.4824, Train Steps/Sec: 0.44,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5226 |
[[34m2026-01-25 23:24:22[39m] (step=0004982) Train Loss mse: 0.0000, Train Loss ce: 0.5167, Train Steps/Sec: 0.26,
|
| 5227 |
[[34m2026-01-25 23:24:25[39m] (step=0004983) Train Loss mse: 0.0000, Train Loss ce: 0.4746, Train Steps/Sec: 0.40,
|
| 5228 |
[[34m2026-01-25 23:24:27[39m] (step=0004984) Train Loss mse: 0.0000, Train Loss ce: 0.5120, Train Steps/Sec: 0.38,
|
|
|
|
| 168 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 169 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 170 |
ce_avg: 0.5390675663948059, mse_avg: 0.0
|
| 171 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1000
|
| 172 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 173 |
+
[eval debug] first 3 batch fingerprints:
|
| 174 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 175 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 176 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 177 |
+
ce_avg: 0.6019229888916016, mse_avg: 0.0
|
| 178 |
wandb: Detected [huggingface_hub.inference] in use.
|
| 179 |
wandb: Use W&B Weave for improved LLM call tracing. Install Weave with `pip install weave` then add `import weave` to the top of your script.
|
| 180 |
wandb: For more information, check out the docs at: https://weave-docs.wandb.ai/
|
|
|
|
| 1202 |
[[34m2026-01-25 20:15:54[39m] (step=0001014) Train Loss mse: 0.0000, Train Loss ce: 0.5307, Train Steps/Sec: 0.38,
|
| 1203 |
[[34m2026-01-25 20:15:57[39m] (step=0001015) Train Loss mse: 0.0000, Train Loss ce: 0.5537, Train Steps/Sec: 0.29,
|
| 1204 |
[[34m2026-01-25 20:16:01[39m] (step=0001016) Train Loss mse: 0.0000, Train Loss ce: 0.5289, Train Steps/Sec: 0.28,
|
| 1205 |
+
[[34m2026-01-25 20:16:04[39m] (step=0001017) Train Loss mse: 0.0000, Train Loss ce: 0.5496, Train Steps/Sec: 0.30,
|
| 1206 |
+
[[34m2026-01-25 20:16:06[39m] (step=0001018) Train Loss mse: 0.0000, Train Loss ce: 0.5517, Train Steps/Sec: 0.39,
|
| 1207 |
+
[[34m2026-01-25 20:16:08[39m] (step=0001019) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.52,
|
| 1208 |
+
[[34m2026-01-25 20:16:12[39m] (step=0001020) Train Loss mse: 0.0000, Train Loss ce: 0.5413, Train Steps/Sec: 0.27,
|
| 1209 |
+
[[34m2026-01-25 20:16:15[39m] (step=0001021) Train Loss mse: 0.0000, Train Loss ce: 0.5106, Train Steps/Sec: 0.33,
|
| 1210 |
+
[[34m2026-01-25 20:16:19[39m] (step=0001022) Train Loss mse: 0.0000, Train Loss ce: 0.5708, Train Steps/Sec: 0.28,
|
| 1211 |
+
[[34m2026-01-25 20:16:21[39m] (step=0001023) Train Loss mse: 0.0000, Train Loss ce: 0.5335, Train Steps/Sec: 0.38,
|
| 1212 |
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step1500
|
| 1213 |
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 1214 |
[eval debug] first 3 batch fingerprints:
|
|
|
|
| 1223 |
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 1224 |
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 1225 |
ce_avg: 0.8126255869865417, mse_avg: 0.0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1226 |
[[34m2026-01-25 20:16:24[39m] (step=0001024) Train Loss mse: 0.0000, Train Loss ce: 0.5224, Train Steps/Sec: 0.45,
|
| 1227 |
[[34m2026-01-25 20:16:28[39m] (step=0001025) Train Loss mse: 0.0000, Train Loss ce: 0.5477, Train Steps/Sec: 0.25,
|
| 1228 |
[[34m2026-01-25 20:16:31[39m] (step=0001026) Train Loss mse: 0.0000, Train Loss ce: 0.5332, Train Steps/Sec: 0.33,
|
|
|
|
| 2616 |
[[34m2026-01-25 21:22:01[39m] (step=0002414) Train Loss mse: 0.0000, Train Loss ce: 0.4751, Train Steps/Sec: 0.32,
|
| 2617 |
[[34m2026-01-25 21:22:06[39m] (step=0002415) Train Loss mse: 0.0000, Train Loss ce: 0.5217, Train Steps/Sec: 0.23,
|
| 2618 |
[[34m2026-01-25 21:22:09[39m] (step=0002416) Train Loss mse: 0.0000, Train Loss ce: 0.5465, Train Steps/Sec: 0.33,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2619 |
[[34m2026-01-25 21:22:11[39m] (step=0002417) Train Loss mse: 0.0000, Train Loss ce: 0.5468, Train Steps/Sec: 0.35,
|
| 2620 |
[[34m2026-01-25 21:22:13[39m] (step=0002418) Train Loss mse: 0.0000, Train Loss ce: 0.4715, Train Steps/Sec: 0.50,
|
| 2621 |
[[34m2026-01-25 21:22:16[39m] (step=0002419) Train Loss mse: 0.0000, Train Loss ce: 0.4833, Train Steps/Sec: 0.45,
|
|
|
|
| 2632 |
[[34m2026-01-25 21:22:46[39m] (step=0002430) Train Loss mse: 0.0000, Train Loss ce: 0.5360, Train Steps/Sec: 0.26,
|
| 2633 |
[[34m2026-01-25 21:22:49[39m] (step=0002431) Train Loss mse: 0.0000, Train Loss ce: 0.4942, Train Steps/Sec: 0.29,
|
| 2634 |
[[34m2026-01-25 21:22:52[39m] (step=0002432) Train Loss mse: 0.0000, Train Loss ce: 0.5290, Train Steps/Sec: 0.43,
|
| 2635 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step2500
|
| 2636 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 2637 |
+
[eval debug] first 3 batch fingerprints:
|
| 2638 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2639 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2640 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2641 |
+
ce_avg: 0.9854414463043213, mse_avg: 0.0
|
| 2642 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3000
|
| 2643 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 2644 |
+
[eval debug] first 3 batch fingerprints:
|
| 2645 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2646 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2647 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2648 |
+
ce_avg: 0.9968664646148682, mse_avg: 0.0
|
| 2649 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step3500
|
| 2650 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 2651 |
+
[eval debug] first 3 batch fingerprints:
|
| 2652 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2653 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2654 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 2655 |
+
ce_avg: 0.9615826606750488, mse_avg: 0.0
|
| 2656 |
[[34m2026-01-25 21:22:54[39m] (step=0002433) Train Loss mse: 0.0000, Train Loss ce: 0.5440, Train Steps/Sec: 0.43,
|
| 2657 |
[[34m2026-01-25 21:22:58[39m] (step=0002434) Train Loss mse: 0.0000, Train Loss ce: 0.5570, Train Steps/Sec: 0.29,
|
| 2658 |
[[34m2026-01-25 21:23:00[39m] (step=0002435) Train Loss mse: 0.0000, Train Loss ce: 0.4797, Train Steps/Sec: 0.43,
|
|
|
|
| 3634 |
[[34m2026-01-25 22:09:11[39m] (step=0003411) Train Loss mse: 0.0000, Train Loss ce: 0.4740, Train Steps/Sec: 0.36,
|
| 3635 |
[[34m2026-01-25 22:09:14[39m] (step=0003412) Train Loss mse: 0.0000, Train Loss ce: 0.5099, Train Steps/Sec: 0.34,
|
| 3636 |
[[34m2026-01-25 22:09:17[39m] (step=0003413) Train Loss mse: 0.0000, Train Loss ce: 0.5017, Train Steps/Sec: 0.33,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3637 |
[[34m2026-01-25 22:09:20[39m] (step=0003414) Train Loss mse: 0.0000, Train Loss ce: 0.4866, Train Steps/Sec: 0.31,
|
| 3638 |
[[34m2026-01-25 22:09:23[39m] (step=0003415) Train Loss mse: 0.0000, Train Loss ce: 0.4945, Train Steps/Sec: 0.35,
|
| 3639 |
[[34m2026-01-25 22:09:26[39m] (step=0003416) Train Loss mse: 0.0000, Train Loss ce: 0.4950, Train Steps/Sec: 0.40,
|
|
|
|
| 3655 |
[[34m2026-01-25 22:10:14[39m] (step=0003432) Train Loss mse: 0.0000, Train Loss ce: 0.5193, Train Steps/Sec: 0.29,
|
| 3656 |
[[34m2026-01-25 22:10:18[39m] (step=0003433) Train Loss mse: 0.0000, Train Loss ce: 0.5097, Train Steps/Sec: 0.26,
|
| 3657 |
[[34m2026-01-25 22:10:21[39m] (step=0003434) Train Loss mse: 0.0000, Train Loss ce: 0.4901, Train Steps/Sec: 0.36,
|
| 3658 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4000
|
| 3659 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 3660 |
+
[eval debug] first 3 batch fingerprints:
|
| 3661 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3662 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3663 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3664 |
+
ce_avg: 0.8829993009567261, mse_avg: 0.0
|
| 3665 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step4500
|
| 3666 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 3667 |
+
[eval debug] first 3 batch fingerprints:
|
| 3668 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3669 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3670 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 3671 |
+
ce_avg: 0.8653228282928467, mse_avg: 0.0
|
| 3672 |
[[34m2026-01-25 22:10:23[39m] (step=0003435) Train Loss mse: 0.0000, Train Loss ce: 0.4939, Train Steps/Sec: 0.43,
|
| 3673 |
[[34m2026-01-25 22:10:27[39m] (step=0003436) Train Loss mse: 0.0000, Train Loss ce: 0.5293, Train Steps/Sec: 0.32,
|
| 3674 |
[[34m2026-01-25 22:10:29[39m] (step=0003437) Train Loss mse: 0.0000, Train Loss ce: 0.5026, Train Steps/Sec: 0.34,
|
|
|
|
| 5162 |
[[34m2026-01-25 23:21:40[39m] (step=0004925) Train Loss mse: 0.0000, Train Loss ce: 0.4804, Train Steps/Sec: 0.41,
|
| 5163 |
[[34m2026-01-25 23:21:43[39m] (step=0004926) Train Loss mse: 0.0000, Train Loss ce: 0.4675, Train Steps/Sec: 0.45,
|
| 5164 |
[[34m2026-01-25 23:21:45[39m] (step=0004927) Train Loss mse: 0.0000, Train Loss ce: 0.4538, Train Steps/Sec: 0.50,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 5165 |
[[34m2026-01-25 23:21:47[39m] (step=0004928) Train Loss mse: 0.0000, Train Loss ce: 0.4813, Train Steps/Sec: 0.40,
|
| 5166 |
[[34m2026-01-25 23:21:51[39m] (step=0004929) Train Loss mse: 0.0000, Train Loss ce: 0.5108, Train Steps/Sec: 0.28,
|
| 5167 |
[[34m2026-01-25 23:21:54[39m] (step=0004930) Train Loss mse: 0.0000, Train Loss ce: 0.5030, Train Steps/Sec: 0.32,
|
|
|
|
| 5216 |
[[34m2026-01-25 23:24:13[39m] (step=0004979) Train Loss mse: 0.0000, Train Loss ce: 0.5016, Train Steps/Sec: 0.40,
|
| 5217 |
[[34m2026-01-25 23:24:16[39m] (step=0004980) Train Loss mse: 0.0000, Train Loss ce: 0.5237, Train Steps/Sec: 0.34,
|
| 5218 |
[[34m2026-01-25 23:24:19[39m] (step=0004981) Train Loss mse: 0.0000, Train Loss ce: 0.4824, Train Steps/Sec: 0.44,
|
| 5219 |
+
base_dir is /dev/shm/models/checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins/eval_used_rows, step_tag is checkpoints_vlm_gym_counting_mark_all_one_image_lr2e_5_ce_no_mse_ins_step5000
|
| 5220 |
+
Preparing Dataset vlm_gym_counting_mark_all_celoss_no_mse_evalonce/vlm_gym_counting_mark_all_val
|
| 5221 |
+
[eval debug] first 3 batch fingerprints:
|
| 5222 |
+
fp[0]: [{'data_indexes': [0], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 5223 |
+
fp[1]: [{'data_indexes': [8], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 5224 |
+
fp[2]: [{'data_indexes': [16], 'worker_id': 0, 'dataset_name': 'vlm_gym_counting_mark_all_celoss_no_mse_evalonce'}]
|
| 5225 |
+
ce_avg: 0.847433865070343, mse_avg: 0.0
|
| 5226 |
[[34m2026-01-25 23:24:22[39m] (step=0004982) Train Loss mse: 0.0000, Train Loss ce: 0.5167, Train Steps/Sec: 0.26,
|
| 5227 |
[[34m2026-01-25 23:24:25[39m] (step=0004983) Train Loss mse: 0.0000, Train Loss ce: 0.4746, Train Steps/Sec: 0.40,
|
| 5228 |
[[34m2026-01-25 23:24:27[39m] (step=0004984) Train Loss mse: 0.0000, Train Loss ce: 0.5120, Train Steps/Sec: 0.38,
|