cy0307 commited on 20 days ago

Commit

2bd8497

verified ·

1 Parent(s): d96f266

Publish validation-aware Qwen3-Omni diagnostic mirrors

Browse files

Files changed (40) hide show

EVALUATION_PROTOCOL.md +8 -8
PROJECT_STATUS.md +12 -9
README.md +138 -47
RESEARCH_ROADMAP.md +26 -19
RESEARCH_TAKEAWAYS.md +7 -6
data/evaluation_protocol.json +8 -7
data/foundation_model_plan.json +2 -2
data/omni_finetune_verified_result.json +78 -0
data/project_status.json +117 -102
data/research_roadmap.json +32 -9
data/research_roadmap_interactive.json +37 -13
data/research_takeaways.json +13 -9
docs/data/evaluation_protocol.json +8 -7
docs/data/foundation_model_plan.json +2 -2
docs/data/omni_finetune_verified_result.json +78 -0
docs/data/project_status.json +117 -102
docs/data/research_roadmap.json +32 -9
docs/data/research_roadmap_interactive.json +37 -13
docs/data/research_takeaways.json +13 -9
metrics/evaluation_protocol.json +8 -7
metrics/foundation_model_plan.json +2 -2
metrics/omni_finetune_verified_result.json +78 -0
metrics/project_status.json +117 -102
metrics/research_roadmap.json +32 -9
metrics/research_roadmap_interactive.json +37 -13
metrics/research_takeaways.json +13 -9
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/PUBLIC_RESULT_SUMMARY.md +25 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/dataset/dataset_manifest.json +0 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/dataset/episode_manifest.json +0 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/RUN_REPORT.md +13 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/confusion_matrix.csv +0 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json +1578 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/per_class_metrics.csv +1211 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/predictions.csv +0 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/predictions.jsonl +0 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json +24 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/training/progress.jsonl +47 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/training/training_metadata.json +79 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/validation/eval.json +81 -0
results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json +168 -0

EVALUATION_PROTOCOL.md CHANGED Viewed

@@ -70,25 +70,25 @@ are not foundation models.
 ## Current Limitations
-- Cross-episode generalization is evaluated in the later multi-episode stage.
 - Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.
-- Qwen3-Omni setup artifacts are preparation artifacts until the selected held-out pilot runs.
 - Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations.
 ## Scale-Up Gate
-The full Qwen3-Omni fine-tuning pilot requires all of the following before
-reporting held-out model metrics:
 - selected prepared Xperience-10M episodes
 - held-out episode split with no train/test episode leakage
 - manifest, training metadata, progress logs, metrics, predictions, and run report
 - held-out evaluation on test episodes rather than train windows
-Current status: prepared; selected data relay in progress. Read
-`results/omni_finetune/DATA_ACCESS_STATUS.md` and
-`results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md` before interpreting any
-Qwen3-Omni artifact.
 ## Machine-Readable Copy

 ## Current Limitations
+- Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.
 - Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.
+- The verified validation-aware Qwen3-Omni diagnostic pilot has weak held-out metrics and needs structured-output and task-quality improvements before larger model-quality claims.
 - Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations.
 ## Scale-Up Gate
+The next Qwen3-Omni quality pilot requires all of the following before
+claiming improved held-out model quality:
 - selected prepared Xperience-10M episodes
 - held-out episode split with no train/test episode leakage
+- validation samples during training
 - manifest, training metadata, progress logs, metrics, predictions, and run report
 - held-out evaluation on test episodes rather than train windows
+Current status: verified diagnostic pilot; quality target not met. Read
+`docs/data/omni_finetune_verified_result.json` before interpreting any
+Qwen3-Omni metric.
 ## Machine-Readable Copy

PROJECT_STATUS.md CHANGED Viewed

@@ -2,8 +2,8 @@
 This is the fastest way to understand the current research project state.
 It summarizes what has already been implemented from the public
-Xperience-10M sample, what is being prepared for multi-episode training, and
-which artifacts support the next development step.
 ## Research Positioning
@@ -21,15 +21,15 @@ scale-up readiness; it is not presented as final full-dataset model quality.
 | Neural heads | Verified | `scripts/neural_task_models.py`, `results/episode_task_suite/neural_mlp/` | Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split. |
 | Audio contribution study | Verified | `scripts/audio_ablation_and_raw_upgrade.py`, `results/audio_ablation/`, `docs/data/audio_ablation_summary.json` | Audio variants are compared across all 12 task contracts; audio improves the primary metric on 6 of 12 tasks, and a 588-d audio-window representation improves over the baseline audio variant on 6 of 12 tasks. |
 | Research takeaways | Verified | `RESEARCH_TAKEAWAYS.md`, `docs/data/research_takeaways.json`, `scripts/build_research_takeaways.py` | The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes. |
-| Research roadmap | Current | `RESEARCH_ROADMAP.md`, `docs/data/research_roadmap.json` | The roadmap connects public-sample task development to 128-episode data preparation, Qwen3-Omni LoRA, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal. |
 | Foundation-model plan | Current | `FOUNDATION_MODEL_PLAN.md`, `docs/data/foundation_model_plan.json` | Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit. |
 | Xperience Embodied Foundation Model | Future goal | `XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md` | A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model. |
 | Evaluation protocol | Verified | `EVALUATION_PROTOCOL.md`, `docs/data/evaluation_protocol.json`, `scripts/build_evaluation_protocol.py` | Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts. |
 | Dataset context | Verified | `XPERIENCE10M_DATASET_CARD_ALIGNMENT.md`, official Xperience-10M and sample cards | The README and dashboard distinguish the public sample used here from the gated full dataset used for the selected multi-episode pilot. |
-| Public dashboard and Hub pages | Verified | GitHub Pages, HF Space, artifact dataset, baseline model repo, Qwen3-Omni LoRA repo | Readers can move between the website, code, derived artifacts, baseline weights, and Qwen3-Omni pilot status without needing internal setup details. |
 | Public package policy | Verified | `DATA_NOTICE.md`, `REPRODUCIBILITY.md` | Raw Xperience-10M data, private gated files, large archives, credentials, and full Qwen weights are not redistributed. |
 | Reproducibility | Verified for the public sample | `REPRODUCIBILITY.md`, `docs/data/reproducibility_matrix.json`, `notes/reproducibility_audit.md` | The public sample workflow has explicit commands, expected outputs, and exact-match reproduction evidence. |
-| Qwen3-Omni fine-tuning | Data preparation; full metrics pending | `results/omni_finetune/DATA_ACCESS_STATUS.md`, `results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md` | The gated full dataset is available for a selected 128-episode pilot; final held-out metrics require completed preprocessing, manifest construction, training, and evaluation. |
 | Raw Xperience-10M redistribution | Not included | `DATA_NOTICE.md`, `docs/data/publication_audit.json` | Raw MP4, HDF5, RRD files, private gated data, and full Qwen weights are intentionally excluded. |
 ## Fast Research Route
@@ -53,15 +53,18 @@ scale-up readiness; it is not presented as final full-dataset model quality.
    controls.
 10. Inspect `XPERIENCE10M_DATASET_CARD_ALIGNMENT.md` only if you need the
    detailed upstream dataset-card context.
-11. Inspect `results/omni_finetune/DATA_ACCESS_STATUS.md` before judging
-   Qwen3-Omni scale-up status.
 ## Current Reading Notes
 - Cross-episode generalization is a later multi-episode evaluation target; the
   current results use one public sample episode.
-- Older pilot path names refer to setup files, not completed held-out
-  training results.
 - The current reconstruction task reconstructs feature vectors, not pixel
   depth, meshes, NeRF outputs, or Gaussian splats.
 - Audio is part of the current 8,546-dimensional baseline feature vector.

 This is the fastest way to understand the current research project state.
 It summarizes what has already been implemented from the public
+Xperience-10M sample, what the first multi-episode Qwen3-Omni diagnostic pilot
+shows, and which artifacts support the next development step.
 ## Research Positioning
 | Neural heads | Verified | `scripts/neural_task_models.py`, `results/episode_task_suite/neural_mlp/` | Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split. |
 | Audio contribution study | Verified | `scripts/audio_ablation_and_raw_upgrade.py`, `results/audio_ablation/`, `docs/data/audio_ablation_summary.json` | Audio variants are compared across all 12 task contracts; audio improves the primary metric on 6 of 12 tasks, and a 588-d audio-window representation improves over the baseline audio variant on 6 of 12 tasks. |
 | Research takeaways | Verified | `RESEARCH_TAKEAWAYS.md`, `docs/data/research_takeaways.json`, `scripts/build_research_takeaways.py` | The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes. |
+| Research roadmap | Current | `RESEARCH_ROADMAP.md`, `docs/data/research_roadmap.json` | The roadmap connects public-sample task development to the verified validation-aware Qwen3-Omni diagnostic baseline, structured-output improvement pass, robustness runs, world/policy branches, and the future Xperience-native pretraining goal. |
 | Foundation-model plan | Current | `FOUNDATION_MODEL_PLAN.md`, `docs/data/foundation_model_plan.json` | Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit. |
 | Xperience Embodied Foundation Model | Future goal | `XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md` | A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model. |
 | Evaluation protocol | Verified | `EVALUATION_PROTOCOL.md`, `docs/data/evaluation_protocol.json`, `scripts/build_evaluation_protocol.py` | Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts. |
 | Dataset context | Verified | `XPERIENCE10M_DATASET_CARD_ALIGNMENT.md`, official Xperience-10M and sample cards | The README and dashboard distinguish the public sample used here from the gated full dataset used for the selected multi-episode pilot. |
+| Public dashboard and Hub pages | Verified | GitHub Pages, HF Space, artifact dataset, baseline model repo, Qwen3-Omni LoRA repo | Readers can move between the website, code, derived artifacts, baseline weights, and Qwen3-Omni pilot status without needing local infrastructure details. |
 | Public package policy | Verified | `DATA_NOTICE.md`, `REPRODUCIBILITY.md` | Raw Xperience-10M data, private gated files, large archives, credentials, and full Qwen weights are not redistributed. |
 | Reproducibility | Verified for the public sample | `REPRODUCIBILITY.md`, `docs/data/reproducibility_matrix.json`, `notes/reproducibility_audit.md` | The public sample workflow has explicit commands, expected outputs, and exact-match reproduction evidence. |
+| Qwen3-Omni fine-tuning | Verified validation-aware diagnostic held-out pilot; quality target not met | `docs/data/omni_finetune_verified_result.json`, `results/omni_finetune/verified_public/`, `scripts/omni/package_verified_omni_result.py`, `scripts/omni/audit_verified_omni_package.py` | The selected 96/16/16 episode split produced a validation-aware public-safe held-out package with 3,808 exported windows, 512 validation windows, and 448 test predictions. JSON validity is 87.50%, below the 98% target, so the result is a diagnostic baseline and the next pass should focus on structured-output improvements and error analysis. |
 | Raw Xperience-10M redistribution | Not included | `DATA_NOTICE.md`, `docs/data/publication_audit.json` | Raw MP4, HDF5, RRD files, private gated data, and full Qwen weights are intentionally excluded. |
 ## Fast Research Route
    controls.
 10. Inspect `XPERIENCE10M_DATASET_CARD_ALIGNMENT.md` only if you need the
    detailed upstream dataset-card context.
+11. Inspect `docs/data/omni_finetune_verified_result.json` before judging the
+   Qwen3-Omni diagnostic pilot.
 ## Current Reading Notes
 - Cross-episode generalization is a later multi-episode evaluation target; the
   current results use one public sample episode.
+- Public-facing fine-tuning results should come from the verified result
+  package, not from live process logs or setup-only artifacts.
+- The first Qwen3-Omni held-out package verifies the pipeline, not strong model
+  quality: JSON validity is 87.50%, action macro-F1 is 0.0027, and subtask
+  accuracy is 0.0067.
 - The current reconstruction task reconstructs feature vectors, not pixel
   depth, meshes, NeRF outputs, or Gaussian splats.
 - Audio is part of the current 8,546-dimensional baseline feature vector.

README.md CHANGED Viewed

@@ -1,28 +1,3 @@
----
-license: other
-library_name: pytorch
-tags:
-  - robotics
-  - embodied-ai
-  - multimodal
-  - ropedia
-  - xperience-10m
-  - evaluation
-  - baseline
-  - neural-network
-  - pytorch
-  - retrieval
-  - audio
-datasets:
-  - ropedia-ai/xperience-10m
-  - ropedia-ai/xperience-10m-sample
-  - cy0307/ropedia-xperience-10m-task-suite-artifacts
-metrics:
-  - accuracy
-  - f1
-  - mean_squared_error
----
 # Ropedia Xperience-10M Task Suite
 [![Website](https://img.shields.io/badge/site-GitHub%20Pages-1f63e9)](https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/)
@@ -34,7 +9,7 @@ metrics:
 [![License](https://img.shields.io/badge/license-code%20MIT%20%2B%20data%20terms-ccffa0)](LICENSE)
 <p align="center">
-  <img src="assets/brand/xperience10m-logo-social-card.png" alt="Ropedia Xperience-10M Task Suite logo card" width="760">
 </p>
 A research-development project built on the public Xperience-10M sample episode
@@ -99,7 +74,7 @@ before the multi-episode omni-model stage becomes a real held-out evaluation.
 | Task suite | 12 human-readable embodied-AI task contracts with input, process, output, metrics, predictions, and case-study walkthroughs |
 | Baselines | Minimal linear/ridge/logistic heads plus compact PyTorch MLP task heads over the same chronological split |
 | Research directions | Task mapping and extension probes for human modeling, 3D/4D reconstruction, egocentric interaction, and world modeling |
-| Scale-up path | The gated Xperience-10M dataset is available for a selected 128-episode pilot before Qwen3-Omni LoRA, followed by Cosmos 3/world-model and VLA/policy branches; the long-term goal is an Xperience-native embodied foundation model if full-corpus data, storage, and compute are available |
 | Public surfaces | GitHub repo, GitHub Pages dashboard, GHCR static-site package, HF Space, HF artifact dataset, HF baseline-model repo, and HF collection |
 For the fastest interpretation of the current metrics, start with
@@ -136,7 +111,7 @@ This project is best read as a staged embodied-AI research study:
 | Task suite | Twelve human-readable tasks cover action, procedure, contact, object, language, retrieval, reconstruction, order, and synchronization questions. | [`RESEARCH_TAKEAWAYS.md`](RESEARCH_TAKEAWAYS.md), [`results/episode_task_suite/summary_report.json`](results/episode_task_suite/summary_report.json) |
 | Baselines | Minimal heads and compact PyTorch MLP heads provide a first controlled comparison on the same chronological split. | [`results/episode_task_suite/neural_mlp/`](results/episode_task_suite/neural_mlp/) |
 | Diagnostics | Audio contribution, modality ablations, timeline overlays, object labels, and alignment stress tests show which signals are useful and which tasks remain hard. | [`results/audio_ablation/AUDIO_ABLATION_SUMMARY.md`](results/audio_ablation/AUDIO_ABLATION_SUMMARY.md), [`docs/single_episode_explorer.html`](docs/single_episode_explorer.html) |
-| Scale-up | A selected 128-episode Qwen3-Omni LoRA pilot is being prepared from the gated dataset; held-out model metrics will be added only after training and evaluation finish. The long-term native-pretraining plan is documented separately as a future research goal. | [`RESEARCH_ROADMAP.md`](RESEARCH_ROADMAP.md), [`FOUNDATION_MODEL_PLAN.md`](FOUNDATION_MODEL_PLAN.md), [`XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md`](XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md), [`results/omni_finetune/DATA_ACCESS_STATUS.md`](results/omni_finetune/DATA_ACCESS_STATUS.md) |
 Detailed dataset notes, reproduction checks, and generated JSON reports are
 included for readers who want to inspect the implementation, but they are
@@ -158,7 +133,7 @@ They give the current research state in one compact table:
 | Dataset context | Official Xperience-10M links, sample-vs-gated-data boundary, modality coverage, and redistribution policy are documented |
 | Evaluation protocol | Verified generated protocol for windowing, split policy, leakage controls, and per-task metrics |
 | Website and Hub pages | Public dashboard, Hugging Face Space, artifact dataset, baseline model repo, and collection use the same project framing and links |
-| Qwen3-Omni multi-episode pilot | The gated Xperience-10M dataset is available for selected 128-episode preparation, with full metrics pending completed preprocessing, training, and held-out evaluation |
 | Raw Xperience-10M data / full Qwen weights | Not redistributed |
 ## 90-Second Research Project Path
@@ -177,7 +152,7 @@ If you are reading the project cold, open these in order:
 | 8 | What research directions does this support? | [`RESEARCH_ROADMAP.md`](RESEARCH_ROADMAP.md), [`docs/data/research_directions.json`](docs/data/research_directions.json), [`docs/data/research_direction_extensions.json`](docs/data/research_direction_extensions.json) | The tasks are mapped to human modeling, 3D/4D reconstruction, egocentric interaction, and world modeling. |
 | 9 | Which foundation model comes next? | [`FOUNDATION_MODEL_PLAN.md`](FOUNDATION_MODEL_PLAN.md), [`docs/data/foundation_model_plan.json`](docs/data/foundation_model_plan.json), [`XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md`](XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md) | Qwen3-Omni is the first held-out LoRA baseline; Cosmos 3 is the first world-model branch; policy models wait for explicit action targets; Xperience-native pretraining is the full-corpus future goal. |
 | 10 | How do I reproduce it? | [`REPRODUCIBILITY.md`](REPRODUCIBILITY.md), [`notes/reproducibility_audit.md`](notes/reproducibility_audit.md) | Public commands and expected outputs are documented for the sample-episode task suite. |
-| 11 | What is still pending? | [`DATA_ACCESS_STATUS.md`](results/omni_finetune/DATA_ACCESS_STATUS.md), [`MULTI_EPISODE_ACCESS_STATUS.md`](results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md) | Multi-episode Qwen3-Omni model quality will be reported after preprocessing, training, and held-out evaluation complete. |
 A compact reader-path summary is available at
 [`docs/data/project_packet.json`](docs/data/project_packet.json).
@@ -233,8 +208,9 @@ The current verified public-sample subset is:
 Detailed dataset notes are available in
 [`XPERIENCE10M_DATASET_CARD_ALIGNMENT.md`](XPERIENCE10M_DATASET_CARD_ALIGNMENT.md)
 for readers who need the full upstream-card and access-term context. The
-practical boundary is simple: current results come from the public sample, and
-multi-episode model quality is pending the selected held-out pilot.
 Start with the visual dashboard:
@@ -504,8 +480,9 @@ python scripts/train_all_modalities_model.py --workspace /path/to/workspace
 ## Xperience-10M Fine-Tuning Exploration
 This repo includes a first Qwen3-Omni fine-tuning path over Xperience-10M. The
-current artifacts are setup-stage evidence, with held-out multi-episode metrics
-pending completed staging, preprocessing, training, and evaluation.
 The useful distinction is:
 - direct Qwen3-Omni inputs: RGB/fisheye video, embedded MP4 audio, and language
@@ -522,11 +499,12 @@ adds depth/pose/mocap/IMU features, LoRA adapters are trained on prepared
 train/val episodes, and sealed held-out test evaluation produces predictions,
 metrics, run reports, and upload-ready adapter artifacts.
-The current scale-up artifacts show that the export, manifest, sensor-feature,
-LoRA, and evaluation scripts can run on the available sample episode. They do
-not show a real multi-episode result. A real pilot requires valid prepared
-episodes, held-out episode splits, training metadata, predictions, metrics, and
-a run report; the current selected pilot target is 128 episodes.
 ### Sample Count Decision
@@ -564,9 +542,15 @@ Current status in this repo:
 - public_sample_valid_episodes: 1 (degraded-valid: annotation + fisheye_cam0.mp4)
 - gated_metadata_audit: 12,102 complete visible episodes across 802 complete sessions
-- selected_episode_plan: 128 metadata-balanced episodes, 96/16/16 train/val/test
 - selected_download_size: 277.71 GiB excluding `visualization.rrd`
-- ready_for_held_out_pilot: false until the selected episodes are fully prepared and checked
 - gated dataset: available for selected multi-episode data preparation
 - source_discovery: `results/omni_finetune/source_discovery.json`
 - data_status: `results/omni_finetune/DATA_ACCESS_STATUS.md`
@@ -615,8 +599,8 @@ Once all selected episodes are complete, use the fixed selected-episode split:
 - 16 held-out test episodes.
 The clean full-run launcher validates the selected split, exports all splits in
-parallel, trains Qwen3-Omni LoRA on train/val only, then evaluates on the held-
-out test split:
 ```bash
 RUN_ID=xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu \
@@ -624,9 +608,17 @@ DATA_ROOT=/path/to/xperience10m_128 \
 SELECTION_JSON=results/omni_finetune/xperience10m_128_episode_selection.json \
 MODEL_DIR=/path/to/Qwen__Qwen3-Omni-30B-A3B-Instruct \
 NUM_PROCESSES=8 \
 scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh
 ```
 Monitor the run with:
 ```bash
@@ -634,6 +626,10 @@ python scripts/omni/monitor_omni_progress.py \
   --run-id xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu
 ```
 Validate the run artifacts stage by stage:
 ```bash
@@ -647,6 +643,62 @@ python scripts/omni/validate_omni_finetune_run.py \
   --min-json-validity 0.98
 ```
 After dataset export, a model-neutral window index can be created for future
 backbones:
@@ -659,14 +711,18 @@ This produces `window_index.jsonl` and `window_index_manifest.json` so Cosmos-
 style world models and VLA/policy branches can reuse the same split-checked
 windows without depending on Qwen chat-message records.
-### Uploading the pilot Qwen3-Omni LoRA
-A prepared upload package is available at `results/omni_finetune/hf_upload`.
 ```bash
 python3 scripts/omni/upload_qwen3_omni_lora_to_hf.py \
-  --repo-id cy0307/ropedia-qwen3-omni-lora-readiness \
-  --source-dir results/omni_finetune/hf_upload \
   --message "Upload Xperience-10M Qwen3-Omni LoRA pilot"
 ```
@@ -703,6 +759,41 @@ registry can be checked with:
 python scripts/omni/backbone_registry.py --validate --json
 ```
 ## Additional Development Directions
 Beyond backbone selection and fine-tuning, Xperience-10M supports several

 # Ropedia Xperience-10M Task Suite
 [![Website](https://img.shields.io/badge/site-GitHub%20Pages-1f63e9)](https://chaoyue0307.github.io/ropedia-xperience-10m-task-suite/)
 [![License](https://img.shields.io/badge/license-code%20MIT%20%2B%20data%20terms-ccffa0)](LICENSE)
 <p align="center">
+  <img src="docs/assets/brand/xperience10m-logo-social-card.png" alt="Ropedia Xperience-10M Task Suite logo card" width="760">
 </p>
 A research-development project built on the public Xperience-10M sample episode
 | Task suite | 12 human-readable embodied-AI task contracts with input, process, output, metrics, predictions, and case-study walkthroughs |
 | Baselines | Minimal linear/ridge/logistic heads plus compact PyTorch MLP task heads over the same chronological split |
 | Research directions | Task mapping and extension probes for human modeling, 3D/4D reconstruction, egocentric interaction, and world modeling |
+| Scale-up path | A first selected-episode Qwen3-Omni LoRA diagnostic pilot has completed on the 96/16/16 split; it proves the multi-episode export/train/eval/package loop, but the weak held-out metrics make it a baseline for error analysis rather than a strong model. Cosmos 3/world-model and VLA/policy branches reuse the same split and package contract after their targets are implemented. |
 | Public surfaces | GitHub repo, GitHub Pages dashboard, GHCR static-site package, HF Space, HF artifact dataset, HF baseline-model repo, and HF collection |
 For the fastest interpretation of the current metrics, start with
 | Task suite | Twelve human-readable tasks cover action, procedure, contact, object, language, retrieval, reconstruction, order, and synchronization questions. | [`RESEARCH_TAKEAWAYS.md`](RESEARCH_TAKEAWAYS.md), [`results/episode_task_suite/summary_report.json`](results/episode_task_suite/summary_report.json) |
 | Baselines | Minimal heads and compact PyTorch MLP heads provide a first controlled comparison on the same chronological split. | [`results/episode_task_suite/neural_mlp/`](results/episode_task_suite/neural_mlp/) |
 | Diagnostics | Audio contribution, modality ablations, timeline overlays, object labels, and alignment stress tests show which signals are useful and which tasks remain hard. | [`results/audio_ablation/AUDIO_ABLATION_SUMMARY.md`](results/audio_ablation/AUDIO_ABLATION_SUMMARY.md), [`docs/single_episode_explorer.html`](docs/single_episode_explorer.html) |
+| Scale-up | The selected 128-episode Qwen3-Omni LoRA diagnostic pilot has a verified validation-aware held-out package: 96/16/16 selected episodes, 3,808 exported windows, 512 validation windows, 448 held-out test windows, and public-safe metrics/predictions. JSON validity is 87.50%, below the 98% target, so the next pass focuses on structured-output reliability and task-quality error analysis. | [`RESEARCH_ROADMAP.md`](RESEARCH_ROADMAP.md), [`FOUNDATION_MODEL_PLAN.md`](FOUNDATION_MODEL_PLAN.md), [`docs/data/omni_finetune_verified_result.json`](docs/data/omni_finetune_verified_result.json), [`results/omni_finetune/verified_public/`](results/omni_finetune/verified_public/) |
 Detailed dataset notes, reproduction checks, and generated JSON reports are
 included for readers who want to inspect the implementation, but they are
 | Dataset context | Official Xperience-10M links, sample-vs-gated-data boundary, modality coverage, and redistribution policy are documented |
 | Evaluation protocol | Verified generated protocol for windowing, split policy, leakage controls, and per-task metrics |
 | Website and Hub pages | Public dashboard, Hugging Face Space, artifact dataset, baseline model repo, and collection use the same project framing and links |
+| Qwen3-Omni multi-episode pilot | Verified diagnostic result package exists for the selected 96/16/16 episode split; current held-out metrics are weak and below the JSON-validity quality target |
 | Raw Xperience-10M data / full Qwen weights | Not redistributed |
 ## 90-Second Research Project Path
 | 8 | What research directions does this support? | [`RESEARCH_ROADMAP.md`](RESEARCH_ROADMAP.md), [`docs/data/research_directions.json`](docs/data/research_directions.json), [`docs/data/research_direction_extensions.json`](docs/data/research_direction_extensions.json) | The tasks are mapped to human modeling, 3D/4D reconstruction, egocentric interaction, and world modeling. |
 | 9 | Which foundation model comes next? | [`FOUNDATION_MODEL_PLAN.md`](FOUNDATION_MODEL_PLAN.md), [`docs/data/foundation_model_plan.json`](docs/data/foundation_model_plan.json), [`XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md`](XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md) | Qwen3-Omni is the first held-out LoRA baseline; Cosmos 3 is the first world-model branch; policy models wait for explicit action targets; Xperience-native pretraining is the full-corpus future goal. |
 | 10 | How do I reproduce it? | [`REPRODUCIBILITY.md`](REPRODUCIBILITY.md), [`notes/reproducibility_audit.md`](notes/reproducibility_audit.md) | Public commands and expected outputs are documented for the sample-episode task suite. |
+| 11 | What is still pending? | [`docs/data/omni_finetune_verified_result.json`](docs/data/omni_finetune_verified_result.json), [`DATA_ACCESS_STATUS.md`](results/omni_finetune/DATA_ACCESS_STATUS.md), [`MULTI_EPISODE_ACCESS_STATUS.md`](results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md) | The first held-out diagnostic pilot is verified; strong model quality remains pending because JSON validity is 87.50% and action/subtask metrics remain weak. |
 A compact reader-path summary is available at
 [`docs/data/project_packet.json`](docs/data/project_packet.json).
 Detailed dataset notes are available in
 [`XPERIENCE10M_DATASET_CARD_ALIGNMENT.md`](XPERIENCE10M_DATASET_CARD_ALIGNMENT.md)
 for readers who need the full upstream-card and access-term context. The
+practical boundary is simple: current task-suite results come from the public
+sample, and the first multi-episode Qwen3-Omni diagnostic pilot is verified but
+not yet strong model quality.
 Start with the visual dashboard:
 ## Xperience-10M Fine-Tuning Exploration
 This repo includes a first Qwen3-Omni fine-tuning path over Xperience-10M. The
+repository separates public-sample evidence from multi-episode fine-tuning
+artifacts. The validation-aware selected-episode held-out package is now verified as a
+diagnostic pilot, not a strong final model.
 The useful distinction is:
 - direct Qwen3-Omni inputs: RGB/fisheye video, embedded MP4 audio, and language
 train/val episodes, and sealed held-out test evaluation produces predictions,
 metrics, run reports, and upload-ready adapter artifacts.
+The scale-up path requires valid prepared episodes, held-out episode splits,
+training metadata, predictions, metrics, and a run report. A result is ready
+for public README, website, or Hugging Face updates only after the validator
+passes and `scripts/omni/package_verified_omni_result.py` creates a
+public-safe derived-artifact package. The current verified package is listed in
+[`docs/data/omni_finetune_verified_result.json`](docs/data/omni_finetune_verified_result.json).
 ### Sample Count Decision
 - public_sample_valid_episodes: 1 (degraded-valid: annotation + fisheye_cam0.mp4)
 - gated_metadata_audit: 12,102 complete visible episodes across 802 complete sessions
+- selected_episode_plan: 128 source-balanced episodes, 96/16/16 train/val/test
 - selected_download_size: 277.71 GiB excluding `visualization.rrd`
+- verified_validation_aware_diagnostic_package: true
+- selected_split: 96 train / 16 validation / 16 held-out test episodes
+- exported_windows: 2,848 train / 512 validation / 448 test
+- validation_samples_used: 512
+- held_out_eval: 448 test windows from 14 exported test episodes
+- train_loss / val_loss: 0.4130 / 0.0331
+- current_quality_target: JSON validity 87.50%, below the 98% target
 - gated dataset: available for selected multi-episode data preparation
 - source_discovery: `results/omni_finetune/source_discovery.json`
 - data_status: `results/omni_finetune/DATA_ACCESS_STATUS.md`
 - 16 held-out test episodes.
 The clean full-run launcher validates the selected split, exports all splits in
+parallel, trains Qwen3-Omni LoRA on train episodes while optionally monitoring
+validation loss, then evaluates on the held-out test split:
 ```bash
 RUN_ID=xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu \
 SELECTION_JSON=results/omni_finetune/xperience10m_128_episode_selection.json \
 MODEL_DIR=/path/to/Qwen__Qwen3-Omni-30B-A3B-Instruct \
 NUM_PROCESSES=8 \
+TRAIN_VAL_SPLIT=val \
+MAX_VAL_SAMPLES=512 \
 scripts/omni/run_128_fullsplit_parallel_export_8gpu.sh
 ```
+The current verified diagnostic package uses the same selected split and 8-GPU
+training path, records validation loss over 512 validation windows, and keeps
+the held-out test split sealed for final evaluation. The next pass should keep
+this package contract while tightening JSON decoding, target formatting, and
+action/subtask error analysis.
 Monitor the run with:
 ```bash
   --run-id xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu
 ```
+The monitor reads training `progress.jsonl`, new evaluator partial-prediction
+progress, and legacy generation logs, so long held-out evals can still expose
+sample-level progress even before final metrics are written.
 Validate the run artifacts stage by stage:
 ```bash
   --min-json-validity 0.98
 ```
+After the eval validator passes, create the public-safe result package:
+```bash
+python scripts/omni/package_verified_omni_result.py \
+  --dataset-run-id xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu \
+  --train-run-id <train_run_id> \
+  --eval-run-id <eval_run_id>
+```
+For long-running remote jobs, the packaging step can be watched automatically:
+```bash
+python scripts/omni/watch_verified_omni_package.py \
+  --dataset-run-id xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu \
+  --train-run-id <train_run_id> \
+  --eval-run-id <eval_run_id>
+```
+While waiting, the watcher can append `eval_progress_observed` events from
+partial prediction files or legacy generation logs. This keeps the package
+status file useful during long held-out evaluations.
+The package copies only small derived artifacts such as metrics, predictions,
+confusion matrices, run reports, manifests, validation summaries, and training
+metadata. The exact required eval files and primary metrics come from the
+selected backbone contract in `configs/omni_backbones`, so Qwen3-Omni,
+Cosmos-style world models, and VLA/policy branches can share the same verified
+publication gate once their model-specific evaluators exist. The package
+excludes raw Xperience-10M files, base-model weights, adapter or checkpoint
+weights, full checkpoints, and large archives.
+For hardware setups that can run multiple eval workers, the Qwen evaluator also
+supports deterministic sample shards:
+```bash
+python scripts/omni/eval_qwen3_omni_lora.py \
+  --dataset-jsonl results/omni_finetune/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_dataset/dataset.jsonl \
+  --adapter-dir checkpoints/<train_run_id>/adapter_lora \
+  --run-id <eval_shard_0> \
+  --eval-split test \
+  --sample-offset 0 \
+  --sample-stride 4
+python scripts/omni/merge_qwen3_omni_eval_shards.py \
+  --dataset-jsonl results/omni_finetune/xperience10m_qwen3_omni_128ep_fullsplit_fast8gpu_dataset/dataset.jsonl \
+  --output-dir results/omni_finetune/<merged_eval_run_id> \
+  --shard-dir results/omni_finetune/<eval_shard_0> \
+  --shard-dir results/omni_finetune/<eval_shard_1> \
+  --shard-dir results/omni_finetune/<eval_shard_2> \
+  --shard-dir results/omni_finetune/<eval_shard_3>
+```
+Only the merged eval directory should be validated and reported publicly,
+because the merger checks coverage and recomputes the metrics from all
+held-out predictions.
 After dataset export, a model-neutral window index can be created for future
 backbones:
 style world models and VLA/policy branches can reuse the same split-checked
 windows without depending on Qwen chat-message records.
+### Uploading Qwen3-Omni LoRA artifacts
+The public-safe verified package intentionally excludes raw data, base Qwen
+weights, LoRA weights, and full checkpoints. Adapter upload is a separate step:
+use it only when the intended adapter directory is present and the model card
+clearly distinguishes older smoke weights from the selected-episode diagnostic
+or validation-aware run.
 ```bash
 python3 scripts/omni/upload_qwen3_omni_lora_to_hf.py \
+  --repo-id cy0307/ropedia-qwen3-omni-lora-smoke \
+  --source-dir /path/to/adapter_upload_package \
   --message "Upload Xperience-10M Qwen3-Omni LoRA pilot"
 ```
 python scripts/omni/backbone_registry.py --validate --json
 ```
+Verify that every configured backbone can pass the public-safe packaging
+contract on synthetic derived artifacts:
+```bash
+python scripts/omni/smoke_test_backbone_packaging.py
+```
+After a real held-out package is created, audit it before updating README,
+website, or Hugging Face pages:
+```bash
+python scripts/omni/audit_verified_omni_package.py \
+  --package-dir results/omni_finetune/verified_public/<eval_run_id>
+```
+Create a new planned backbone branch from an existing contract template with:
+```bash
+python scripts/omni/scaffold_omni_backbone.py \
+  --template-backbone policy_vla_branch \
+  --id new_policy_branch \
+  --display-name "New Policy Branch" \
+  --model-family "Model family name" \
+  --dataset-contract xperience10m_observation_action_v1 \
+  --training-objective observation_to_action_policy \
+  --checkpoint-gate policy_checkpoint_action_space_and_normalizer \
+  --dry-run
+```
+Each backbone config declares the checkpoint gate, required train/eval files,
+allowed public artifacts, and forbidden private or heavyweight artifacts. This
+keeps Qwen3-Omni, Cosmos-style world models, and policy/VLA branches on the same
+split, validation, and publication discipline even though their training targets
+are different.
 ## Additional Development Directions
 Beyond backbone selection and fine-tuning, Xperience-10M supports several

RESEARCH_ROADMAP.md CHANGED Viewed

@@ -10,26 +10,29 @@ should exist before the stage is treated as complete.
 | Stage | Status | Entry condition | Research deliverables | Completion evidence |
 | --- | --- | --- | --- | --- |
 | Public-Sample Task Lab | Implemented | One public Xperience-10M sample episode is available. | 1,161 aligned windows, 12 task contracts, minimal heads, neural MLP heads, modality atlas, task walkthroughs, and derived figures. | `PROJECT_STATUS.md`, `EVALUATION_PROTOCOL.md`, `RESEARCH_TAKEAWAYS.md`, `docs/data/summary_metrics.json`, `results/episode_task_suite/summary_report.json` |
-| Multi-Episode Data Preparation | Active | Gated dataset availability and enough storage for selected episodes. | 128 selected episodes, episode manifest, missing-view manifest, held-out episode split, and source-discovery report. | `results/omni_finetune/DATA_ACCESS_STATUS.md`, `results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md`, `results/omni_finetune/source_discovery.json` |
-| Qwen3-Omni LoRA Pilot | Next | Selected episodes prepared locally with no train/test episode leakage. | Dataset JSONL/media manifests, LoRA adapter checkpoint, progress logs, held-out predictions, metrics, confusion matrices, and run report. | `dataset_manifest.json`, `training_metadata.json`, `progress.jsonl`, `metrics.json`, `predictions.jsonl`, `RUN_REPORT.md` |
-| Foundation-Model Selection Matrix | Next | The selected pilot episodes are prepared, or a 3-8 episode dry run is available for preprocessing checks. | Backbone registry, Cosmos 3 world-model branch plan, Qwen3-Omni baseline plan, OpenVLA/openpi/GR00T policy candidates, and model-specific evaluation additions. | `FOUNDATION_MODEL_PLAN.md`, `docs/data/foundation_model_plan.json`, `research_roadmap_interactive.json` |
-| 64-128 Episode Robustness Run | Planned | The selected-episode pilot trains and evaluates cleanly. | Split-by-session metrics, modality ablations, calibration/object/language error analysis, and sensitivity to missing views. | Held-out metrics by session, task, and modality; ablation tables; qualitative error analysis. |
 | Cosmos 3 and Policy-Model Extensions | Planned | Enough multi-episode data, compute budget, and model-specific action/world-state targets. | Cosmos 3 future-window or action-conditioned world-model probes, OpenVLA/openpi/GR00T action-policy baselines, modality-conditioning checks, affordance tasks, and synthetic-data usefulness tests. | Task-specific held-out evaluations, qualitative inspection, and updated model cards. |
 | Xperience Embodied Foundation Model Pretraining | Future | Full-corpus access, PB-scale storage path, multi-node compute, and positive scaling evidence from smaller runs. | Xperience-native temporal multimodal model, full-corpus manifests, pretraining shards, scaling curves, held-out evaluations, and model card. | Pretraining metadata, checkpoint inventory, held-out metrics, scaling report, and data-boundary report. |
 ## Current Decision Point
-The useful next decision is data scale plus backbone fit: keep the public-sample
-task suite as the development harness, stage enough official Xperience-10M
-episodes to run the held-out Qwen3-Omni pilot, then choose larger model branches
-by task fit. Qwen3-Omni remains the first trainable multimodal LoRA target.
-Cosmos 3 becomes the first world-model/action-generation branch. OpenVLA,
-openpi, GR00T, Octo, and SmolVLA-style models become policy/action branches only
-after the action target is explicit. A from-scratch Xperience Embodied
-Foundation Model is the long-term native-pretraining goal, not the immediate
-experiment. The public sample is already enough for task design, feature
-contracts, walkthroughs, and baseline comparisons. It is not enough to measure
-general embodied-AI model quality.
 ## Additional Concrete Development Directions
@@ -38,7 +41,7 @@ depend on immediately training a larger foundation model:
 | Direction | First artifact | Research value |
 | --- | --- | --- |
-| Episode taxonomy and data engine | Episode atlas, category tags, balance report, and split builder. | Makes episode selection representative and auditable. |
 | Standardized benchmark protocol | Fixed splits, task cards, metric scripts, and leakage checks. | Makes future model comparisons fair. |
 | Multimodal representation learning | Contrastive and masked-window objectives over synchronized modalities. | Learns reusable encoders before expensive large-model training. |
 | Skill and procedure graph mining | Steps, transitions, preconditions, effects, and temporal skill graphs. | Connects perception to planning and long-horizon reasoning. |
@@ -72,7 +75,8 @@ Evidence to inspect:
 This stage expands the same data contract to official gated episodes. The key
 research requirement is episode-level separation: training and test examples
 must come from different episodes, not different windows inside the same
-episode.
 Evidence to inspect:
@@ -84,8 +88,11 @@ Evidence to inspect:
 ### 3. Qwen3-Omni LoRA Pilot
 This stage uses Qwen3-Omni as the multimodal backbone and trains lightweight
-LoRA adapters. The first target is a complete held-out-episode training and
-evaluation loop with inspectable manifests, predictions, and metrics.
 Expected outputs:

 | Stage | Status | Entry condition | Research deliverables | Completion evidence |
 | --- | --- | --- | --- | --- |
 | Public-Sample Task Lab | Implemented | One public Xperience-10M sample episode is available. | 1,161 aligned windows, 12 task contracts, minimal heads, neural MLP heads, modality atlas, task walkthroughs, and derived figures. | `PROJECT_STATUS.md`, `EVALUATION_PROTOCOL.md`, `RESEARCH_TAKEAWAYS.md`, `docs/data/summary_metrics.json`, `results/episode_task_suite/summary_report.json` |
+| Multi-Episode Data Preparation | Implemented for first selected pilot | Gated dataset availability and enough storage for selected episodes. | 128 selected episodes, episode manifest, missing-view manifest, held-out episode split, and source-discovery report. | `results/omni_finetune/DATA_ACCESS_STATUS.md`, `results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md`, `results/omni_finetune/xperience10m_128_episode_selection.json` |
+| Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot | Verified baseline | Selected episodes prepared locally with no train/test episode leakage. | Dataset JSONL/media manifests, LoRA adapter checkpoint, progress logs, validation monitoring, held-out predictions, metrics, confusion matrices, and run report. | `docs/data/omni_finetune_verified_result.json`, `results/omni_finetune/verified_public/`, `metrics.json`, `predictions.jsonl`, `RUN_REPORT.md` |
+| Structured-Output And Error-Analysis Pass | Active next step | The validation-aware diagnostic package exists and shows weak held-out quality. | Same 96/16/16 split, stricter JSON decoding or target formatting, action/subtask error analysis, held-out test evaluation, and comparison to the verified validation-aware baseline. | Updated quality-target report, error-analysis tables, held-out metrics, and verified public package. |
+| Foundation-Model Selection Matrix | Current | The selected pilot episodes are prepared, or a 3-8 episode dry run is available for preprocessing checks. | Backbone registry, Cosmos 3 world-model branch plan, Qwen3-Omni baseline plan, OpenVLA/openpi/GR00T policy candidates, and model-specific evaluation additions. | `FOUNDATION_MODEL_PLAN.md`, `docs/data/foundation_model_plan.json`, `research_roadmap_interactive.json` |
+| 64-128 Episode Robustness Run | Planned | The validation-aware selected-episode pilot trains and evaluates cleanly. | Split-by-session metrics, modality ablations, calibration/object/language error analysis, and sensitivity to missing views. | Held-out metrics by session, task, and modality; ablation tables; qualitative error analysis. |
 | Cosmos 3 and Policy-Model Extensions | Planned | Enough multi-episode data, compute budget, and model-specific action/world-state targets. | Cosmos 3 future-window or action-conditioned world-model probes, OpenVLA/openpi/GR00T action-policy baselines, modality-conditioning checks, affordance tasks, and synthetic-data usefulness tests. | Task-specific held-out evaluations, qualitative inspection, and updated model cards. |
 | Xperience Embodied Foundation Model Pretraining | Future | Full-corpus access, PB-scale storage path, multi-node compute, and positive scaling evidence from smaller runs. | Xperience-native temporal multimodal model, full-corpus manifests, pretraining shards, scaling curves, held-out evaluations, and model card. | Pretraining metadata, checkpoint inventory, held-out metrics, scaling report, and data-boundary report. |
 ## Current Decision Point
+The useful next decision is model-quality improvement plus backbone fit: keep
+the public-sample task suite as the development harness, use the verified
+Qwen3-Omni validation-aware diagnostic pilot as the first cross-episode
+baseline, then improve format reliability and task quality before claiming
+model quality.
+Qwen3-Omni remains the first trainable multimodal LoRA target. Cosmos 3 becomes
+the first world-model/action-generation branch. OpenVLA, openpi, GR00T, Octo,
+and SmolVLA-style models become policy/action branches only after the action
+target is explicit. A from-scratch Xperience Embodied Foundation Model is the
+long-term native-pretraining goal, not the immediate experiment. The public
+sample is already enough for task design, feature contracts, walkthroughs, and
+baseline comparisons. The first multi-episode pilot is enough to verify the
+end-to-end training loop, but its weak metrics are not final model quality.
 ## Additional Concrete Development Directions
 | Direction | First artifact | Research value |
 | --- | --- | --- |
+| Episode taxonomy and data engine | Episode atlas, category tags, balance report, and split builder. | Makes episode selection representative and measurable. |
 | Standardized benchmark protocol | Fixed splits, task cards, metric scripts, and leakage checks. | Makes future model comparisons fair. |
 | Multimodal representation learning | Contrastive and masked-window objectives over synchronized modalities. | Learns reusable encoders before expensive large-model training. |
 | Skill and procedure graph mining | Steps, transitions, preconditions, effects, and temporal skill graphs. | Connects perception to planning and long-horizon reasoning. |
 This stage expands the same data contract to official gated episodes. The key
 research requirement is episode-level separation: training and test examples
 must come from different episodes, not different windows inside the same
+episode. The first selected 96/16/16 split has been used for a verified
+Qwen3-Omni diagnostic pilot.
 Evidence to inspect:
 ### 3. Qwen3-Omni LoRA Pilot
 This stage uses Qwen3-Omni as the multimodal backbone and trains lightweight
+LoRA adapters. The first held-out diagnostic package now exists. It proves the
+export, training, evaluation, validation, and public-safe packaging loop, but
+the metrics are weak: JSON validity is 87.50%, action macro-F1 is 0.0027, and
+subtask accuracy is 0.0067. Treat it as a baseline and error-analysis starting
+point.
 Expected outputs:

RESEARCH_TAKEAWAYS.md CHANGED Viewed

@@ -97,17 +97,18 @@ Current scope: This is a single-episode ablation over fixed ridge heads. It vali
 ### The next scientific unit is held-out episodes, not more adjacent windows
-The prepared Qwen3-Omni path now targets a selected 128-episode pilot; held-out metrics will be reported after staging, training, and evaluation complete.
 | Metric | Value |
 | --- | ---: |
-| `target_episodes` | 128 |
-| `selected_sessions` | 128 |
-| `valid_candidates` | 12,102 |
-Source: `results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md`.
-Current scope: The selected-episode Qwen3-Omni fine-tune requires completed data preparation and held-out evaluation; the 32-episode Qwen3-Omni fine-tune requires gated data preparation before any real held-out metric is reported.
 ## How To Read These Results

 ### The next scientific unit is held-out episodes, not more adjacent windows
+The selected Qwen3-Omni path now has a verified validation-aware held-out diagnostic pilot. It proves the cross-episode train/validation/eval loop, but the weak metrics show that structured-output reliability and task-quality error analysis are the next modeling problems.
 | Metric | Value |
 | --- | ---: |
+| `selected_episodes` | 128 |
+| `held_out_test_windows` | 448 |
+| `json_validity_rate` | 0.8750 |
+| `action_macro_f1` | 0.0027 |
+Source: `docs/data/omni_finetune_verified_result.json`.
+Current scope: The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target.
 ## How To Read These Results

data/evaluation_protocol.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
-  "generated_at_utc": "2026-06-04T16:42:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -303,22 +303,23 @@
     "Report unseen test classes when the chronological split exposes labels absent from the train segment."
   ],
   "current_limitations": [
-    "Cross-episode generalization is evaluated in the later multi-episode stage.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
-    "Qwen3-Omni setup artifacts are preparation artifacts until the selected held-out pilot runs.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
-    "required_before_full_omni_pilot": [
       "selected prepared Xperience-10M episodes",
       "held-out episode split with no train/test episode leakage",
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
-    "current_status": "prepared; selected data relay in progress",
     "evidence": [
-      "results/omni_finetune/DATA_ACCESS_STATUS.md",
-      "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
     ]
   }
 }

   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     "Report unseen test classes when the chronological split exposes labels absent from the train segment."
   ],
   "current_limitations": [
+    "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
+    "The verified validation-aware Qwen3-Omni diagnostic pilot has weak held-out metrics and needs structured-output and task-quality improvements before larger model-quality claims.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
+    "required_before_next_omni_quality_pilot": [
       "selected prepared Xperience-10M episodes",
       "held-out episode split with no train/test episode leakage",
+      "validation samples during training",
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
+    "current_status": "verified diagnostic pilot; quality target not met",
     "evidence": [
+      "docs/data/omni_finetune_verified_result.json",
+      "results/omni_finetune/verified_public/"
     ]
   }
 }

data/foundation_model_plan.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
-  "current_boundary": "No held-out multi-episode foundation-model result has been completed in this repo. The current foundation-model artifacts are setup-stage until enough valid episodes are prepared and evaluated.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
@@ -206,7 +206,7 @@
     {
       "step": 2,
       "name": "First held-out baseline",
-      "action": "Run Qwen3-Omni LoRA to establish the full train/eval loop."
     },
     {
       "step": 3,

 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
+  "current_boundary": "A first held-out multi-episode Qwen3-Omni diagnostic pilot is verified in this repo, but it is not a strong model result. The current foundation-model work should treat it as the baseline train/eval/package loop before validation-aware Qwen reruns, Cosmos-style world modeling, or policy/VLA branches.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
     {
       "step": 2,
       "name": "First held-out baseline",
+      "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline."
     },
     {
       "step": 3,

data/omni_finetune_verified_result.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "title": "Verified Qwen3-Omni LoRA Validation-Aware Held-Out Pilot",
+  "status": "verified_validation_aware_diagnostic_pilot",
+  "status_date": "2026-06-06",
+  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
+  "adapter": "Qwen3-Omni LoRA",
+  "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
+  "split_policy": {
+    "unit": "episode",
+    "selected_episode_counts": {
+      "train": 96,
+      "val": 16,
+      "test": 16
+    },
+    "exported_window_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "exported_episode_counts": {
+      "train": 89,
+      "val": 16,
+      "test": 14
+    },
+    "skipped_selected_episodes": 9,
+    "leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
+  },
+  "training": {
+    "num_processes": 8,
+    "epochs": 1,
+    "lora_rank": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "num_train_samples": 2848,
+    "num_val_samples": 512,
+    "history": [
+      {
+        "epoch": 1,
+        "train_loss": 0.41304643672440994,
+        "val_loss": 0.0330660454928875,
+        "global_step": 356
+      }
+    ],
+    "loss": "answer-token cross entropy over supervised JSON tokens",
+    "note": "This validation-aware run uses the selected validation split during training and preserves the held-out test split for final evaluation."
+  },
+  "evaluation": {
+    "split": "test",
+    "num_samples": 448,
+    "held_out_episode_count": 14,
+    "json_validity_rate": 0.875,
+    "action_macro_f1": 0.0026621494447581404,
+    "subtask_accuracy": 0.006696428571428571,
+    "transition_accuracy": 0.8504464285714286,
+    "next_action_accuracy": 0.024553571428571428,
+    "contact_accuracy": 0.6450892857142857,
+    "object_micro_f1": 0.22299431459254582,
+    "quality_target": {
+      "json_validity_rate": 0.98,
+      "status": "not_met"
+    },
+    "previous_diagnostic_json_validity_rate": 0.8526785714285714
+  },
+  "interpretation": "This is a real held-out multi-episode validation-aware diagnostic pilot proving the export, LoRA training with validation monitoring, evaluation, validation, and public-safe packaging loop. JSON validity improved over the earlier no-validation diagnostic run, but task-quality metrics remain weak, so it should be used as a baseline and error-analysis starting point rather than a strong Xperience-10M model.",
+  "public_package": {
+    "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+    "audit_status": "pass",
+    "contains_raw_xperience10m_data": false,
+    "contains_qwen_base_weights": false,
+    "contains_lora_weights": false
+  },
+  "required_next_steps": [
+    "Improve JSON-format reliability through prompt, decoding, constrained parsing, or target formatting changes.",
+    "Add error analysis by episode, action family, object category, and missing-modality state.",
+    "Run a second validation-aware Qwen3-Omni pass only after the JSON/output contract is tightened.",
+    "Keep the same verified package contract for Cosmos-style world-model and VLA/policy branches."
+  ]
+}

data/project_status.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
-  "decision": "public_sample_pipeline_verified_multi_episode_omni_data_staging",
-  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and keeps later multi-episode model-quality claims separate from current single-episode evidence.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
@@ -13,7 +13,20 @@
     "direction_extension_probe_count": 4,
     "audio_featurized": true,
     "raw_xperience10m_data_redistributed": false,
-    "qwen3_omni_32_episode_claim": false
   },
   "rows": [
     {
@@ -36,92 +49,92 @@
       ],
       "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
     },
-        {
-            "area": "Neural heads",
-            "status": "verified",
-            "evidence": [
-                "scripts/neural_task_models.py",
-                "results/episode_task_suite/neural_mlp/"
-            ],
-            "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
-        },
-        {
-            "area": "Audio contribution study",
-            "status": "verified",
-            "evidence": [
-                "scripts/audio_ablation_and_raw_upgrade.py",
-                "results/audio_ablation/",
-                "docs/data/audio_ablation_summary.json"
-            ],
-            "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
-        },
-        {
-            "area": "Evaluation protocol",
-            "status": "verified",
-            "evidence": [
-                "EVALUATION_PROTOCOL.md",
-                "docs/data/evaluation_protocol.json",
-                "scripts/build_evaluation_protocol.py"
-            ],
-            "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
-        },
-        {
-            "area": "Research takeaways",
-            "status": "verified",
-            "evidence": [
-                "RESEARCH_TAKEAWAYS.md",
-                "docs/data/research_takeaways.json",
-                "scripts/build_research_takeaways.py"
-            ],
-            "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
-        },
-        {
-            "area": "Research roadmap",
-            "status": "current",
-            "evidence": [
-                "RESEARCH_ROADMAP.md",
-                "docs/data/research_roadmap.json"
-            ],
-            "readout": "The roadmap connects public-sample task development to 128-episode data preparation, Qwen3-Omni LoRA, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
-        },
-        {
-            "area": "Foundation-model plan",
-            "status": "current",
-            "evidence": [
-                "FOUNDATION_MODEL_PLAN.md",
-                "docs/data/foundation_model_plan.json"
-            ],
-            "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
-        },
-        {
-            "area": "Xperience Embodied Foundation Model",
-            "status": "future_goal",
-            "evidence": [
-                "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md"
-            ],
-            "readout": "A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model."
-        },
-        {
-            "area": "Official dataset wording",
-            "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json"
       ],
-            "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
-        },
-        {
-            "area": "Source alignment",
-            "status": "verified",
-            "evidence": [
-                "SOURCE_ALIGNMENT_AUDIT.md",
-                "docs/data/source_alignment_audit.json",
-                "scripts/validate_source_alignment.py"
-            ],
-            "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
-        },
-        {
-            "area": "Website and HF mirrors",
       "status": "verified",
       "evidence": [
         "docs/data/website_integrity.json",
@@ -152,12 +165,14 @@
     },
     {
       "area": "Qwen3-Omni fine-tuning",
-      "status": "data_preparation_full_metrics_pending",
       "evidence": [
-        "results/omni_finetune/DATA_ACCESS_STATUS.md",
-        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
       ],
-      "readout": "The gated full dataset is available for a selected 128-episode pilot; final held-out metrics require completed preprocessing, manifest construction, training, and held-out evaluation."
     },
     {
       "area": "Raw Xperience-10M redistribution",
@@ -171,21 +186,21 @@
   ],
   "fast_research_route": [
     "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
-        "Open docs/data/project_packet.json for the machine-readable project path.",
-        "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
-        "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
-        "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
-        "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
-        "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
-        "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
-        "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
-        "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
-        "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
-    "Inspect results/omni_finetune/DATA_ACCESS_STATUS.md before judging Qwen3-Omni scale-up status."
   ],
   "current_reading_notes": [
-    "Cross-episode generalization is evaluated in the later multi-episode stage.",
-    "Older pilot path names refer to setup files, not completed held-out training results.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
+  "decision": "public_sample_pipeline_verified_qwen3_omni_validation_aware_diagnostic_pilot",
+  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and uses the selected-episode Qwen3-Omni validation-aware diagnostic pilot as a verified but weak cross-episode baseline.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
     "direction_extension_probe_count": 4,
     "audio_featurized": true,
     "raw_xperience10m_data_redistributed": false,
+    "qwen3_omni_32_episode_claim": false,
+    "qwen3_omni_verified_diagnostic_pilot": true,
+    "qwen3_omni_selected_episode_counts": {
+      "train": 96,
+      "val": 16,
+      "test": 16
+    },
+    "qwen3_omni_exported_window_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "qwen3_omni_json_validity_rate": 0.875,
+    "qwen3_omni_validation_aware": true
   },
   "rows": [
     {
       ],
       "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
     },
+    {
+      "area": "Neural heads",
+      "status": "verified",
+      "evidence": [
+        "scripts/neural_task_models.py",
+        "results/episode_task_suite/neural_mlp/"
+      ],
+      "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
+    },
+    {
+      "area": "Audio contribution study",
+      "status": "verified",
+      "evidence": [
+        "scripts/audio_ablation_and_raw_upgrade.py",
+        "results/audio_ablation/",
+        "docs/data/audio_ablation_summary.json"
+      ],
+      "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
+    },
+    {
+      "area": "Evaluation protocol",
+      "status": "verified",
+      "evidence": [
+        "EVALUATION_PROTOCOL.md",
+        "docs/data/evaluation_protocol.json",
+        "scripts/build_evaluation_protocol.py"
+      ],
+      "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
+    },
+    {
+      "area": "Research takeaways",
+      "status": "verified",
+      "evidence": [
+        "RESEARCH_TAKEAWAYS.md",
+        "docs/data/research_takeaways.json",
+        "scripts/build_research_takeaways.py"
+      ],
+      "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
+    },
+    {
+      "area": "Research roadmap",
+      "status": "current",
+      "evidence": [
+        "RESEARCH_ROADMAP.md",
+        "docs/data/research_roadmap.json"
+      ],
+      "readout": "The roadmap connects public-sample task development to the verified Qwen3-Omni diagnostic pilot, validation-aware diagnostics, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
+    },
+    {
+      "area": "Foundation-model plan",
+      "status": "current",
+      "evidence": [
+        "FOUNDATION_MODEL_PLAN.md",
+        "docs/data/foundation_model_plan.json"
+      ],
+      "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
+    },
+    {
+      "area": "Xperience Embodied Foundation Model",
+      "status": "future_goal",
+      "evidence": [
+        "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md"
+      ],
+      "readout": "A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model."
+    },
+    {
+      "area": "Official dataset wording",
+      "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json"
       ],
+      "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
+    },
+    {
+      "area": "Source alignment",
+      "status": "verified",
+      "evidence": [
+        "SOURCE_ALIGNMENT_AUDIT.md",
+        "docs/data/source_alignment_audit.json",
+        "scripts/validate_source_alignment.py"
+      ],
+      "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
+    },
+    {
+      "area": "Website and HF mirrors",
       "status": "verified",
       "evidence": [
         "docs/data/website_integrity.json",
     },
     {
       "area": "Qwen3-Omni fine-tuning",
+      "status": "verified_validation_aware_diagnostic_pilot_quality_target_not_met",
       "evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
+        "scripts/omni/package_verified_omni_result.py",
+        "scripts/omni/audit_verified_omni_package.py"
       ],
+      "readout": "The selected 96/16/16 episode split produced a validation-aware public-safe held-out package with 3,808 exported windows, 512 validation windows, and 448 test predictions. JSON validity is 87.50%, below the 98% target, so it is a stronger diagnostic baseline but not a strong model-quality result."
     },
     {
       "area": "Raw Xperience-10M redistribution",
   ],
   "fast_research_route": [
     "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
+    "Open docs/data/project_packet.json for the machine-readable project path.",
+    "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
+    "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
+    "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
+    "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
+    "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
+    "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
+    "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
+    "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
+    "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
+    "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
   ],
   "current_reading_notes": [
+    "The validation-aware Qwen3-Omni diagnostic pilot is verified, but current held-out quality is still weak.",
+    "Use docs/data/omni_finetune_verified_result.json and the latest verified_public validation-aware package for current held-out results.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

data/research_roadmap.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Roadmap",
-  "summary": "Staged path from the public-sample task lab to multi-episode held-out evaluation, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
-  "current_decision_point": "Keep the public-sample task suite as the development harness, prepare the selected official Xperience-10M episodes for the held-out Qwen3-Omni pilot, then branch into Cosmos 3 world modeling and policy-model experiments after the data preparation path is stable. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
@@ -33,8 +33,8 @@
     },
     {
       "id": "multi_episode_data_staging",
-      "name": "Multi-Episode Data Staging",
-      "status": "active",
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "deliverables": [
         "128 selected episodes",
@@ -48,23 +48,26 @@
         "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
         "results/omni_finetune/source_discovery.json"
       ],
-      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level."
     },
     {
-      "id": "qwen3_omni_lora_pilot",
-      "name": "Qwen3-Omni LoRA Pilot",
-      "status": "next",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "completion_evidence": [
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
@@ -72,7 +75,27 @@
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
-      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop."
     },
     {
       "id": "foundation_model_selection_matrix",

 {
   "title": "Ropedia Xperience-10M Research Roadmap",
+  "summary": "Staged path from the public-sample task lab to a verified validation-aware Qwen3-Omni diagnostic pilot, structured-output improvement pass, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
+  "current_decision_point": "Keep the public-sample task suite as the development harness, use the verified selected-episode Qwen3-Omni validation-aware diagnostic pilot as the first cross-episode baseline, improve structured-output reliability and task-quality error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
     },
     {
       "id": "multi_episode_data_staging",
+      "name": "Multi-Episode Data Preparation",
+      "status": "implemented_for_first_pilot",
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "deliverables": [
         "128 selected episodes",
         "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
         "results/omni_finetune/source_discovery.json"
       ],
+      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level."
     },
     {
+      "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
+      "status": "verified_baseline",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
+        "validation monitoring",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "completion_evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
+      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline."
+    },
+    {
+      "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Structured-Output And Error-Analysis Pass",
+      "status": "active_next_step",
+      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
+      "deliverables": [
+        "same 96/16/16 episode split",
+        "stricter JSON decoding or target formatting",
+        "episode/action/object error analysis",
+        "held-out test evaluation",
+        "comparison to the verified validation-aware baseline"
+      ],
+      "completion_evidence": [
+        "quality-target report",
+        "error-analysis tables",
+        "held-out metrics",
+        "verified public-safe package"
+      ],
+      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims."
     },
     {
       "id": "foundation_model_selection_matrix",

data/research_roadmap_interactive.json CHANGED Viewed

@@ -127,7 +127,7 @@
       "Build the episode taxonomy and data-quality diagnostics first.",
       "Lock the benchmark protocol and split manifests before reporting model scores.",
       "Add representation-learning and skill-graph objectives once enough episodes are staged.",
-      "Add affordance, 3D/4D memory, and policy-retargeting branches after labels and action targets are auditable."
     ],
     "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
@@ -2035,7 +2035,7 @@
         "step": 1
       },
       {
-        "action": "Run Qwen3-Omni LoRA to establish the full train/eval loop.",
         "name": "First held-out baseline",
         "step": 2
       },
@@ -2222,7 +2222,7 @@
     ],
     "status": "planning_artifact"
   },
-  "generated_at_utc": "2026-06-04T21:22:15+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2279,13 +2279,15 @@
       ],
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "id": "multi_episode_data_staging",
-      "name": "Multi-Episode Data Staging",
-      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level.",
-      "stage": "scale_up",
-      "status": "active"
     },
     {
       "completion_evidence": [
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
@@ -2297,17 +2299,39 @@
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
-      "id": "qwen3_omni_lora_pilot",
-      "name": "Qwen3-Omni LoRA Pilot",
-      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop.",
-      "stage": "omni",
-      "status": "next"
     },
     {
       "completion_evidence": [
@@ -2404,7 +2428,7 @@
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
-    "status": "selected_episode_preparation",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

       "Build the episode taxonomy and data-quality diagnostics first.",
       "Lock the benchmark protocol and split manifests before reporting model scores.",
       "Add representation-learning and skill-graph objectives once enough episodes are staged.",
+      "Add affordance, 3D/4D memory, and policy-retargeting branches after labels and action targets are measurable."
     ],
     "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
         "step": 1
       },
       {
+        "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline.",
         "name": "First held-out baseline",
         "step": 2
       },
     ],
     "status": "planning_artifact"
   },
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
       ],
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "id": "multi_episode_data_staging",
+      "name": "Multi-Episode Data Preparation",
+      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level.",
+      "stage": "future",
+      "status": "implemented_for_first_pilot"
     },
     {
       "completion_evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
+        "validation monitoring",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
+      "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
+      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline.",
+      "stage": "future",
+      "status": "verified_baseline"
+    },
+    {
+      "completion_evidence": [
+        "quality-target report",
+        "error-analysis tables",
+        "held-out metrics",
+        "verified public-safe package"
+      ],
+      "deliverables": [
+        "same 96/16/16 episode split",
+        "stricter JSON decoding or target formatting",
+        "episode/action/object error analysis",
+        "held-out test evaluation",
+        "comparison to the verified validation-aware baseline"
+      ],
+      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
+      "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Structured-Output And Error-Analysis Pass",
+      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims.",
+      "stage": "future",
+      "status": "active_next_step"
     },
     {
       "completion_evidence": [
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
+    "status": "verified_validation_aware_diagnostic_pilot",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

data/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-04T16:42:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -166,23 +166,27 @@
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
-      "readout": "The prepared Qwen3-Omni path now targets a selected 128-episode pilot; held-out metrics will be reported after staging, training, and evaluation complete.",
       "evidence": [
         {
-          "label": "target_episodes",
           "value": 128
         },
         {
-          "label": "selected_sessions",
-          "value": 128
         },
         {
-          "label": "valid_candidates",
-          "value": 12102
         }
       ],
-      "source": "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
-      "current_scope": "The selected-episode Qwen3-Omni fine-tune requires completed data preparation and held-out evaluation; the 32-episode Qwen3-Omni fine-tune requires gated data preparation before any real held-out metric is reported."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
+      "readout": "The selected Qwen3-Omni path now has a verified validation-aware held-out diagnostic pilot. It proves the cross-episode train/validation/eval loop, but the weak metrics show that structured-output reliability and task-quality error analysis are the next modeling problems.",
       "evidence": [
         {
+          "label": "selected_episodes",
           "value": 128
         },
         {
+          "label": "held_out_test_windows",
+          "value": 448
+        },
+        {
+          "label": "json_validity_rate",
+          "value": 0.875
         },
         {
+          "label": "action_macro_f1",
+          "value": 0.0026621494447581404
         }
       ],
+      "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target."
     }
   ]
 }

docs/data/evaluation_protocol.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
-  "generated_at_utc": "2026-06-04T16:42:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -303,22 +303,23 @@
     "Report unseen test classes when the chronological split exposes labels absent from the train segment."
   ],
   "current_limitations": [
-    "Cross-episode generalization is evaluated in the later multi-episode stage.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
-    "Qwen3-Omni setup artifacts are preparation artifacts until the selected held-out pilot runs.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
-    "required_before_full_omni_pilot": [
       "selected prepared Xperience-10M episodes",
       "held-out episode split with no train/test episode leakage",
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
-    "current_status": "prepared; selected data relay in progress",
     "evidence": [
-      "results/omni_finetune/DATA_ACCESS_STATUS.md",
-      "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
     ]
   }
 }

   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     "Report unseen test classes when the chronological split exposes labels absent from the train segment."
   ],
   "current_limitations": [
+    "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
+    "The verified validation-aware Qwen3-Omni diagnostic pilot has weak held-out metrics and needs structured-output and task-quality improvements before larger model-quality claims.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
+    "required_before_next_omni_quality_pilot": [
       "selected prepared Xperience-10M episodes",
       "held-out episode split with no train/test episode leakage",
+      "validation samples during training",
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
+    "current_status": "verified diagnostic pilot; quality target not met",
     "evidence": [
+      "docs/data/omni_finetune_verified_result.json",
+      "results/omni_finetune/verified_public/"
     ]
   }
 }

docs/data/foundation_model_plan.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
-  "current_boundary": "No held-out multi-episode foundation-model result has been completed in this repo. The current foundation-model artifacts are setup-stage until enough valid episodes are prepared and evaluated.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
@@ -206,7 +206,7 @@
     {
       "step": 2,
       "name": "First held-out baseline",
-      "action": "Run Qwen3-Omni LoRA to establish the full train/eval loop."
     },
     {
       "step": 3,

 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
+  "current_boundary": "A first held-out multi-episode Qwen3-Omni diagnostic pilot is verified in this repo, but it is not a strong model result. The current foundation-model work should treat it as the baseline train/eval/package loop before validation-aware Qwen reruns, Cosmos-style world modeling, or policy/VLA branches.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
     {
       "step": 2,
       "name": "First held-out baseline",
+      "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline."
     },
     {
       "step": 3,

docs/data/omni_finetune_verified_result.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "title": "Verified Qwen3-Omni LoRA Validation-Aware Held-Out Pilot",
+  "status": "verified_validation_aware_diagnostic_pilot",
+  "status_date": "2026-06-06",
+  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
+  "adapter": "Qwen3-Omni LoRA",
+  "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
+  "split_policy": {
+    "unit": "episode",
+    "selected_episode_counts": {
+      "train": 96,
+      "val": 16,
+      "test": 16
+    },
+    "exported_window_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "exported_episode_counts": {
+      "train": 89,
+      "val": 16,
+      "test": 14
+    },
+    "skipped_selected_episodes": 9,
+    "leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
+  },
+  "training": {
+    "num_processes": 8,
+    "epochs": 1,
+    "lora_rank": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "num_train_samples": 2848,
+    "num_val_samples": 512,
+    "history": [
+      {
+        "epoch": 1,
+        "train_loss": 0.41304643672440994,
+        "val_loss": 0.0330660454928875,
+        "global_step": 356
+      }
+    ],
+    "loss": "answer-token cross entropy over supervised JSON tokens",
+    "note": "This validation-aware run uses the selected validation split during training and preserves the held-out test split for final evaluation."
+  },
+  "evaluation": {
+    "split": "test",
+    "num_samples": 448,
+    "held_out_episode_count": 14,
+    "json_validity_rate": 0.875,
+    "action_macro_f1": 0.0026621494447581404,
+    "subtask_accuracy": 0.006696428571428571,
+    "transition_accuracy": 0.8504464285714286,
+    "next_action_accuracy": 0.024553571428571428,
+    "contact_accuracy": 0.6450892857142857,
+    "object_micro_f1": 0.22299431459254582,
+    "quality_target": {
+      "json_validity_rate": 0.98,
+      "status": "not_met"
+    },
+    "previous_diagnostic_json_validity_rate": 0.8526785714285714
+  },
+  "interpretation": "This is a real held-out multi-episode validation-aware diagnostic pilot proving the export, LoRA training with validation monitoring, evaluation, validation, and public-safe packaging loop. JSON validity improved over the earlier no-validation diagnostic run, but task-quality metrics remain weak, so it should be used as a baseline and error-analysis starting point rather than a strong Xperience-10M model.",
+  "public_package": {
+    "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+    "audit_status": "pass",
+    "contains_raw_xperience10m_data": false,
+    "contains_qwen_base_weights": false,
+    "contains_lora_weights": false
+  },
+  "required_next_steps": [
+    "Improve JSON-format reliability through prompt, decoding, constrained parsing, or target formatting changes.",
+    "Add error analysis by episode, action family, object category, and missing-modality state.",
+    "Run a second validation-aware Qwen3-Omni pass only after the JSON/output contract is tightened.",
+    "Keep the same verified package contract for Cosmos-style world-model and VLA/policy branches."
+  ]
+}

docs/data/project_status.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
-  "decision": "public_sample_pipeline_verified_multi_episode_omni_data_staging",
-  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and keeps later multi-episode model-quality claims separate from current single-episode evidence.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
@@ -13,7 +13,20 @@
     "direction_extension_probe_count": 4,
     "audio_featurized": true,
     "raw_xperience10m_data_redistributed": false,
-    "qwen3_omni_32_episode_claim": false
   },
   "rows": [
     {
@@ -36,92 +49,92 @@
       ],
       "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
     },
-        {
-            "area": "Neural heads",
-            "status": "verified",
-            "evidence": [
-                "scripts/neural_task_models.py",
-                "results/episode_task_suite/neural_mlp/"
-            ],
-            "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
-        },
-        {
-            "area": "Audio contribution study",
-            "status": "verified",
-            "evidence": [
-                "scripts/audio_ablation_and_raw_upgrade.py",
-                "results/audio_ablation/",
-                "docs/data/audio_ablation_summary.json"
-            ],
-            "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
-        },
-        {
-            "area": "Evaluation protocol",
-            "status": "verified",
-            "evidence": [
-                "EVALUATION_PROTOCOL.md",
-                "docs/data/evaluation_protocol.json",
-                "scripts/build_evaluation_protocol.py"
-            ],
-            "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
-        },
-        {
-            "area": "Research takeaways",
-            "status": "verified",
-            "evidence": [
-                "RESEARCH_TAKEAWAYS.md",
-                "docs/data/research_takeaways.json",
-                "scripts/build_research_takeaways.py"
-            ],
-            "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
-        },
-        {
-            "area": "Research roadmap",
-            "status": "current",
-            "evidence": [
-                "RESEARCH_ROADMAP.md",
-                "docs/data/research_roadmap.json"
-            ],
-            "readout": "The roadmap connects public-sample task development to 128-episode data preparation, Qwen3-Omni LoRA, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
-        },
-        {
-            "area": "Foundation-model plan",
-            "status": "current",
-            "evidence": [
-                "FOUNDATION_MODEL_PLAN.md",
-                "docs/data/foundation_model_plan.json"
-            ],
-            "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
-        },
-        {
-            "area": "Xperience Embodied Foundation Model",
-            "status": "future_goal",
-            "evidence": [
-                "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md"
-            ],
-            "readout": "A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model."
-        },
-        {
-            "area": "Official dataset wording",
-            "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json"
       ],
-            "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
-        },
-        {
-            "area": "Source alignment",
-            "status": "verified",
-            "evidence": [
-                "SOURCE_ALIGNMENT_AUDIT.md",
-                "docs/data/source_alignment_audit.json",
-                "scripts/validate_source_alignment.py"
-            ],
-            "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
-        },
-        {
-            "area": "Website and HF mirrors",
       "status": "verified",
       "evidence": [
         "docs/data/website_integrity.json",
@@ -152,12 +165,14 @@
     },
     {
       "area": "Qwen3-Omni fine-tuning",
-      "status": "data_preparation_full_metrics_pending",
       "evidence": [
-        "results/omni_finetune/DATA_ACCESS_STATUS.md",
-        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
       ],
-      "readout": "The gated full dataset is available for a selected 128-episode pilot; final held-out metrics require completed preprocessing, manifest construction, training, and held-out evaluation."
     },
     {
       "area": "Raw Xperience-10M redistribution",
@@ -171,21 +186,21 @@
   ],
   "fast_research_route": [
     "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
-        "Open docs/data/project_packet.json for the machine-readable project path.",
-        "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
-        "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
-        "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
-        "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
-        "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
-        "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
-        "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
-        "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
-        "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
-    "Inspect results/omni_finetune/DATA_ACCESS_STATUS.md before judging Qwen3-Omni scale-up status."
   ],
   "current_reading_notes": [
-    "Cross-episode generalization is evaluated in the later multi-episode stage.",
-    "Older pilot path names refer to setup files, not completed held-out training results.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
+  "decision": "public_sample_pipeline_verified_qwen3_omni_validation_aware_diagnostic_pilot",
+  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and uses the selected-episode Qwen3-Omni validation-aware diagnostic pilot as a verified but weak cross-episode baseline.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
     "direction_extension_probe_count": 4,
     "audio_featurized": true,
     "raw_xperience10m_data_redistributed": false,
+    "qwen3_omni_32_episode_claim": false,
+    "qwen3_omni_verified_diagnostic_pilot": true,
+    "qwen3_omni_selected_episode_counts": {
+      "train": 96,
+      "val": 16,
+      "test": 16
+    },
+    "qwen3_omni_exported_window_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "qwen3_omni_json_validity_rate": 0.875,
+    "qwen3_omni_validation_aware": true
   },
   "rows": [
     {
       ],
       "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
     },
+    {
+      "area": "Neural heads",
+      "status": "verified",
+      "evidence": [
+        "scripts/neural_task_models.py",
+        "results/episode_task_suite/neural_mlp/"
+      ],
+      "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
+    },
+    {
+      "area": "Audio contribution study",
+      "status": "verified",
+      "evidence": [
+        "scripts/audio_ablation_and_raw_upgrade.py",
+        "results/audio_ablation/",
+        "docs/data/audio_ablation_summary.json"
+      ],
+      "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
+    },
+    {
+      "area": "Evaluation protocol",
+      "status": "verified",
+      "evidence": [
+        "EVALUATION_PROTOCOL.md",
+        "docs/data/evaluation_protocol.json",
+        "scripts/build_evaluation_protocol.py"
+      ],
+      "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
+    },
+    {
+      "area": "Research takeaways",
+      "status": "verified",
+      "evidence": [
+        "RESEARCH_TAKEAWAYS.md",
+        "docs/data/research_takeaways.json",
+        "scripts/build_research_takeaways.py"
+      ],
+      "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
+    },
+    {
+      "area": "Research roadmap",
+      "status": "current",
+      "evidence": [
+        "RESEARCH_ROADMAP.md",
+        "docs/data/research_roadmap.json"
+      ],
+      "readout": "The roadmap connects public-sample task development to the verified Qwen3-Omni diagnostic pilot, validation-aware diagnostics, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
+    },
+    {
+      "area": "Foundation-model plan",
+      "status": "current",
+      "evidence": [
+        "FOUNDATION_MODEL_PLAN.md",
+        "docs/data/foundation_model_plan.json"
+      ],
+      "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
+    },
+    {
+      "area": "Xperience Embodied Foundation Model",
+      "status": "future_goal",
+      "evidence": [
+        "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md"
+      ],
+      "readout": "A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model."
+    },
+    {
+      "area": "Official dataset wording",
+      "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json"
       ],
+      "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
+    },
+    {
+      "area": "Source alignment",
+      "status": "verified",
+      "evidence": [
+        "SOURCE_ALIGNMENT_AUDIT.md",
+        "docs/data/source_alignment_audit.json",
+        "scripts/validate_source_alignment.py"
+      ],
+      "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
+    },
+    {
+      "area": "Website and HF mirrors",
       "status": "verified",
       "evidence": [
         "docs/data/website_integrity.json",
     },
     {
       "area": "Qwen3-Omni fine-tuning",
+      "status": "verified_validation_aware_diagnostic_pilot_quality_target_not_met",
       "evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
+        "scripts/omni/package_verified_omni_result.py",
+        "scripts/omni/audit_verified_omni_package.py"
       ],
+      "readout": "The selected 96/16/16 episode split produced a validation-aware public-safe held-out package with 3,808 exported windows, 512 validation windows, and 448 test predictions. JSON validity is 87.50%, below the 98% target, so it is a stronger diagnostic baseline but not a strong model-quality result."
     },
     {
       "area": "Raw Xperience-10M redistribution",
   ],
   "fast_research_route": [
     "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
+    "Open docs/data/project_packet.json for the machine-readable project path.",
+    "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
+    "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
+    "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
+    "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
+    "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
+    "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
+    "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
+    "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
+    "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
+    "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
   ],
   "current_reading_notes": [
+    "The validation-aware Qwen3-Omni diagnostic pilot is verified, but current held-out quality is still weak.",
+    "Use docs/data/omni_finetune_verified_result.json and the latest verified_public validation-aware package for current held-out results.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

docs/data/research_roadmap.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Roadmap",
-  "summary": "Staged path from the public-sample task lab to multi-episode held-out evaluation, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
-  "current_decision_point": "Keep the public-sample task suite as the development harness, prepare the selected official Xperience-10M episodes for the held-out Qwen3-Omni pilot, then branch into Cosmos 3 world modeling and policy-model experiments after the data preparation path is stable. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
@@ -33,8 +33,8 @@
     },
     {
       "id": "multi_episode_data_staging",
-      "name": "Multi-Episode Data Staging",
-      "status": "active",
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "deliverables": [
         "128 selected episodes",
@@ -48,23 +48,26 @@
         "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
         "results/omni_finetune/source_discovery.json"
       ],
-      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level."
     },
     {
-      "id": "qwen3_omni_lora_pilot",
-      "name": "Qwen3-Omni LoRA Pilot",
-      "status": "next",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "completion_evidence": [
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
@@ -72,7 +75,27 @@
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
-      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop."
     },
     {
       "id": "foundation_model_selection_matrix",

 {
   "title": "Ropedia Xperience-10M Research Roadmap",
+  "summary": "Staged path from the public-sample task lab to a verified validation-aware Qwen3-Omni diagnostic pilot, structured-output improvement pass, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
+  "current_decision_point": "Keep the public-sample task suite as the development harness, use the verified selected-episode Qwen3-Omni validation-aware diagnostic pilot as the first cross-episode baseline, improve structured-output reliability and task-quality error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
     },
     {
       "id": "multi_episode_data_staging",
+      "name": "Multi-Episode Data Preparation",
+      "status": "implemented_for_first_pilot",
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "deliverables": [
         "128 selected episodes",
         "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
         "results/omni_finetune/source_discovery.json"
       ],
+      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level."
     },
     {
+      "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
+      "status": "verified_baseline",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
+        "validation monitoring",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "completion_evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
+      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline."
+    },
+    {
+      "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Structured-Output And Error-Analysis Pass",
+      "status": "active_next_step",
+      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
+      "deliverables": [
+        "same 96/16/16 episode split",
+        "stricter JSON decoding or target formatting",
+        "episode/action/object error analysis",
+        "held-out test evaluation",
+        "comparison to the verified validation-aware baseline"
+      ],
+      "completion_evidence": [
+        "quality-target report",
+        "error-analysis tables",
+        "held-out metrics",
+        "verified public-safe package"
+      ],
+      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims."
     },
     {
       "id": "foundation_model_selection_matrix",

docs/data/research_roadmap_interactive.json CHANGED Viewed

@@ -127,7 +127,7 @@
       "Build the episode taxonomy and data-quality diagnostics first.",
       "Lock the benchmark protocol and split manifests before reporting model scores.",
       "Add representation-learning and skill-graph objectives once enough episodes are staged.",
-      "Add affordance, 3D/4D memory, and policy-retargeting branches after labels and action targets are auditable."
     ],
     "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
@@ -2035,7 +2035,7 @@
         "step": 1
       },
       {
-        "action": "Run Qwen3-Omni LoRA to establish the full train/eval loop.",
         "name": "First held-out baseline",
         "step": 2
       },
@@ -2222,7 +2222,7 @@
     ],
     "status": "planning_artifact"
   },
-  "generated_at_utc": "2026-06-04T21:22:15+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2279,13 +2279,15 @@
       ],
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "id": "multi_episode_data_staging",
-      "name": "Multi-Episode Data Staging",
-      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level.",
-      "stage": "scale_up",
-      "status": "active"
     },
     {
       "completion_evidence": [
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
@@ -2297,17 +2299,39 @@
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
-      "id": "qwen3_omni_lora_pilot",
-      "name": "Qwen3-Omni LoRA Pilot",
-      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop.",
-      "stage": "omni",
-      "status": "next"
     },
     {
       "completion_evidence": [
@@ -2404,7 +2428,7 @@
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
-    "status": "selected_episode_preparation",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

       "Build the episode taxonomy and data-quality diagnostics first.",
       "Lock the benchmark protocol and split manifests before reporting model scores.",
       "Add representation-learning and skill-graph objectives once enough episodes are staged.",
+      "Add affordance, 3D/4D memory, and policy-retargeting branches after labels and action targets are measurable."
     ],
     "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
         "step": 1
       },
       {
+        "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline.",
         "name": "First held-out baseline",
         "step": 2
       },
     ],
     "status": "planning_artifact"
   },
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
       ],
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "id": "multi_episode_data_staging",
+      "name": "Multi-Episode Data Preparation",
+      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level.",
+      "stage": "future",
+      "status": "implemented_for_first_pilot"
     },
     {
       "completion_evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
+        "validation monitoring",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
+      "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
+      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline.",
+      "stage": "future",
+      "status": "verified_baseline"
+    },
+    {
+      "completion_evidence": [
+        "quality-target report",
+        "error-analysis tables",
+        "held-out metrics",
+        "verified public-safe package"
+      ],
+      "deliverables": [
+        "same 96/16/16 episode split",
+        "stricter JSON decoding or target formatting",
+        "episode/action/object error analysis",
+        "held-out test evaluation",
+        "comparison to the verified validation-aware baseline"
+      ],
+      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
+      "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Structured-Output And Error-Analysis Pass",
+      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims.",
+      "stage": "future",
+      "status": "active_next_step"
     },
     {
       "completion_evidence": [
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
+    "status": "verified_validation_aware_diagnostic_pilot",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

docs/data/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-04T16:42:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -166,23 +166,27 @@
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
-      "readout": "The prepared Qwen3-Omni path now targets a selected 128-episode pilot; held-out metrics will be reported after staging, training, and evaluation complete.",
       "evidence": [
         {
-          "label": "target_episodes",
           "value": 128
         },
         {
-          "label": "selected_sessions",
-          "value": 128
         },
         {
-          "label": "valid_candidates",
-          "value": 12102
         }
       ],
-      "source": "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
-      "current_scope": "The selected-episode Qwen3-Omni fine-tune requires completed data preparation and held-out evaluation; the 32-episode Qwen3-Omni fine-tune requires gated data preparation before any real held-out metric is reported."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
+      "readout": "The selected Qwen3-Omni path now has a verified validation-aware held-out diagnostic pilot. It proves the cross-episode train/validation/eval loop, but the weak metrics show that structured-output reliability and task-quality error analysis are the next modeling problems.",
       "evidence": [
         {
+          "label": "selected_episodes",
           "value": 128
         },
         {
+          "label": "held_out_test_windows",
+          "value": 448
+        },
+        {
+          "label": "json_validity_rate",
+          "value": 0.875
         },
         {
+          "label": "action_macro_f1",
+          "value": 0.0026621494447581404
         }
       ],
+      "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target."
     }
   ]
 }

metrics/evaluation_protocol.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
-  "generated_at_utc": "2026-06-04T16:42:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -303,22 +303,23 @@
     "Report unseen test classes when the chronological split exposes labels absent from the train segment."
   ],
   "current_limitations": [
-    "Cross-episode generalization is evaluated in the later multi-episode stage.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
-    "Qwen3-Omni setup artifacts are preparation artifacts until the selected held-out pilot runs.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
-    "required_before_full_omni_pilot": [
       "selected prepared Xperience-10M episodes",
       "held-out episode split with no train/test episode leakage",
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
-    "current_status": "prepared; selected data relay in progress",
     "evidence": [
-      "results/omni_finetune/DATA_ACCESS_STATUS.md",
-      "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
     ]
   }
 }

   "title": "Ropedia Xperience-10M Task Suite Evaluation Protocol",
   "status": "pass",
   "version": "2026-06-01",
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     "Report unseen test classes when the chronological split exposes labels absent from the train segment."
   ],
   "current_limitations": [
+    "Cross-episode generalization for Qwen3-Omni has a first verified diagnostic pilot, but strong model quality is not yet shown.",
     "Feature-vector reconstruction is separate from pixel depth, mesh, NeRF, or Gaussian reconstruction.",
+    "The verified validation-aware Qwen3-Omni diagnostic pilot has weak held-out metrics and needs structured-output and task-quality improvements before larger model-quality claims.",
     "Full audio-visual representation learning still needs multi-episode training; the current report includes single-episode audio/no-audio ablations."
   ],
   "scale_up_gate": {
+    "required_before_next_omni_quality_pilot": [
       "selected prepared Xperience-10M episodes",
       "held-out episode split with no train/test episode leakage",
+      "validation samples during training",
       "manifest, training metadata, progress logs, metrics, predictions, and run report",
       "held-out evaluation on test episodes rather than train windows"
     ],
+    "current_status": "verified diagnostic pilot; quality target not met",
     "evidence": [
+      "docs/data/omni_finetune_verified_result.json",
+      "results/omni_finetune/verified_public/"
     ]
   }
 }

metrics/foundation_model_plan.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
-  "current_boundary": "No held-out multi-episode foundation-model result has been completed in this repo. The current foundation-model artifacts are setup-stage until enough valid episodes are prepared and evaluated.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
@@ -206,7 +206,7 @@
     {
       "step": 2,
       "name": "First held-out baseline",
-      "action": "Run Qwen3-Omni LoRA to establish the full train/eval loop."
     },
     {
       "step": 3,

 {
   "title": "Xperience-10M Foundation Model Plan",
   "status": "planning_artifact",
+  "current_boundary": "A first held-out multi-episode Qwen3-Omni diagnostic pilot is verified in this repo, but it is not a strong model result. The current foundation-model work should treat it as the baseline train/eval/package loop before validation-aware Qwen reruns, Cosmos-style world modeling, or policy/VLA branches.",
   "backbone_registry": {
     "config_dir": "configs/omni_backbones",
     "validator": "scripts/omni/backbone_registry.py --validate --json",
     {
       "step": 2,
       "name": "First held-out baseline",
+      "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline."
     },
     {
       "step": 3,

metrics/omni_finetune_verified_result.json ADDED Viewed

	@@ -0,0 +1,78 @@

+{
+  "title": "Verified Qwen3-Omni LoRA Validation-Aware Held-Out Pilot",
+  "status": "verified_validation_aware_diagnostic_pilot",
+  "status_date": "2026-06-06",
+  "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
+  "adapter": "Qwen3-Omni LoRA",
+  "dataset": "Ropedia Xperience-10M selected 128-episode pilot",
+  "split_policy": {
+    "unit": "episode",
+    "selected_episode_counts": {
+      "train": 96,
+      "val": 16,
+      "test": 16
+    },
+    "exported_window_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "exported_episode_counts": {
+      "train": 89,
+      "val": 16,
+      "test": 14
+    },
+    "skipped_selected_episodes": 9,
+    "leakage_policy": "Train, validation, and test are separated by episode/session; test windows are used only for held-out evaluation."
+  },
+  "training": {
+    "num_processes": 8,
+    "epochs": 1,
+    "lora_rank": 16,
+    "lora_alpha": 32,
+    "lora_dropout": 0.05,
+    "num_train_samples": 2848,
+    "num_val_samples": 512,
+    "history": [
+      {
+        "epoch": 1,
+        "train_loss": 0.41304643672440994,
+        "val_loss": 0.0330660454928875,
+        "global_step": 356
+      }
+    ],
+    "loss": "answer-token cross entropy over supervised JSON tokens",
+    "note": "This validation-aware run uses the selected validation split during training and preserves the held-out test split for final evaluation."
+  },
+  "evaluation": {
+    "split": "test",
+    "num_samples": 448,
+    "held_out_episode_count": 14,
+    "json_validity_rate": 0.875,
+    "action_macro_f1": 0.0026621494447581404,
+    "subtask_accuracy": 0.006696428571428571,
+    "transition_accuracy": 0.8504464285714286,
+    "next_action_accuracy": 0.024553571428571428,
+    "contact_accuracy": 0.6450892857142857,
+    "object_micro_f1": 0.22299431459254582,
+    "quality_target": {
+      "json_validity_rate": 0.98,
+      "status": "not_met"
+    },
+    "previous_diagnostic_json_validity_rate": 0.8526785714285714
+  },
+  "interpretation": "This is a real held-out multi-episode validation-aware diagnostic pilot proving the export, LoRA training with validation monitoring, evaluation, validation, and public-safe packaging loop. JSON validity improved over the earlier no-validation diagnostic run, but task-quality metrics remain weak, so it should be used as a baseline and error-analysis starting point rather than a strong Xperience-10M model.",
+  "public_package": {
+    "path": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+    "audit_status": "pass",
+    "contains_raw_xperience10m_data": false,
+    "contains_qwen_base_weights": false,
+    "contains_lora_weights": false
+  },
+  "required_next_steps": [
+    "Improve JSON-format reliability through prompt, decoding, constrained parsing, or target formatting changes.",
+    "Add error analysis by episode, action family, object category, and missing-modality state.",
+    "Run a second validation-aware Qwen3-Omni pass only after the JSON/output contract is tightened.",
+    "Keep the same verified package contract for Cosmos-style world-model and VLA/policy branches."
+  ]
+}

metrics/project_status.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
-  "decision": "public_sample_pipeline_verified_multi_episode_omni_data_staging",
-  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and keeps later multi-episode model-quality claims separate from current single-episode evidence.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
@@ -13,7 +13,20 @@
     "direction_extension_probe_count": 4,
     "audio_featurized": true,
     "raw_xperience10m_data_redistributed": false,
-    "qwen3_omni_32_episode_claim": false
   },
   "rows": [
     {
@@ -36,92 +49,92 @@
       ],
       "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
     },
-        {
-            "area": "Neural heads",
-            "status": "verified",
-            "evidence": [
-                "scripts/neural_task_models.py",
-                "results/episode_task_suite/neural_mlp/"
-            ],
-            "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
-        },
-        {
-            "area": "Audio contribution study",
-            "status": "verified",
-            "evidence": [
-                "scripts/audio_ablation_and_raw_upgrade.py",
-                "results/audio_ablation/",
-                "docs/data/audio_ablation_summary.json"
-            ],
-            "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
-        },
-        {
-            "area": "Evaluation protocol",
-            "status": "verified",
-            "evidence": [
-                "EVALUATION_PROTOCOL.md",
-                "docs/data/evaluation_protocol.json",
-                "scripts/build_evaluation_protocol.py"
-            ],
-            "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
-        },
-        {
-            "area": "Research takeaways",
-            "status": "verified",
-            "evidence": [
-                "RESEARCH_TAKEAWAYS.md",
-                "docs/data/research_takeaways.json",
-                "scripts/build_research_takeaways.py"
-            ],
-            "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
-        },
-        {
-            "area": "Research roadmap",
-            "status": "current",
-            "evidence": [
-                "RESEARCH_ROADMAP.md",
-                "docs/data/research_roadmap.json"
-            ],
-            "readout": "The roadmap connects public-sample task development to 128-episode data preparation, Qwen3-Omni LoRA, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
-        },
-        {
-            "area": "Foundation-model plan",
-            "status": "current",
-            "evidence": [
-                "FOUNDATION_MODEL_PLAN.md",
-                "docs/data/foundation_model_plan.json"
-            ],
-            "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
-        },
-        {
-            "area": "Xperience Embodied Foundation Model",
-            "status": "future_goal",
-            "evidence": [
-                "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md"
-            ],
-            "readout": "A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model."
-        },
-        {
-            "area": "Official dataset wording",
-            "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json"
       ],
-            "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
-        },
-        {
-            "area": "Source alignment",
-            "status": "verified",
-            "evidence": [
-                "SOURCE_ALIGNMENT_AUDIT.md",
-                "docs/data/source_alignment_audit.json",
-                "scripts/validate_source_alignment.py"
-            ],
-            "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
-        },
-        {
-            "area": "Website and HF mirrors",
       "status": "verified",
       "evidence": [
         "docs/data/website_integrity.json",
@@ -152,12 +165,14 @@
     },
     {
       "area": "Qwen3-Omni fine-tuning",
-      "status": "data_preparation_full_metrics_pending",
       "evidence": [
-        "results/omni_finetune/DATA_ACCESS_STATUS.md",
-        "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md"
       ],
-      "readout": "The gated full dataset is available for a selected 128-episode pilot; final held-out metrics require completed preprocessing, manifest construction, training, and held-out evaluation."
     },
     {
       "area": "Raw Xperience-10M redistribution",
@@ -171,21 +186,21 @@
   ],
   "fast_research_route": [
     "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
-        "Open docs/data/project_packet.json for the machine-readable project path.",
-        "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
-        "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
-        "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
-        "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
-        "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
-        "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
-        "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
-        "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
-        "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
-    "Inspect results/omni_finetune/DATA_ACCESS_STATUS.md before judging Qwen3-Omni scale-up status."
   ],
   "current_reading_notes": [
-    "Cross-episode generalization is evaluated in the later multi-episode stage.",
-    "Older pilot path names refer to setup files, not completed held-out training results.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

 {
   "title": "Ropedia Xperience-10M Task Suite Project Status",
   "version": "2026-06-01",
+  "decision": "public_sample_pipeline_verified_qwen3_omni_validation_aware_diagnostic_pilot",
+  "research_positioning": "A research-engineering study that makes one public Xperience-10M sample episode inspectable, defines embodied-AI tasks over synchronized modalities, records baseline behavior, and uses the selected-episode Qwen3-Omni validation-aware diagnostic pilot as a verified but weak cross-episode baseline.",
   "scope_boundary": {
     "validated_episode_count": 1,
     "aligned_frames": 5821,
     "direction_extension_probe_count": 4,
     "audio_featurized": true,
     "raw_xperience10m_data_redistributed": false,
+    "qwen3_omni_32_episode_claim": false,
+    "qwen3_omni_verified_diagnostic_pilot": true,
+    "qwen3_omni_selected_episode_counts": {
+      "train": 96,
+      "val": 16,
+      "test": 16
+    },
+    "qwen3_omni_exported_window_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "qwen3_omni_json_validity_rate": 0.875,
+    "qwen3_omni_validation_aware": true
   },
   "rows": [
     {
       ],
       "readout": "All 12 task contracts have committed metrics, predictions, and minimal baseline outputs."
     },
+    {
+      "area": "Neural heads",
+      "status": "verified",
+      "evidence": [
+        "scripts/neural_task_models.py",
+        "results/episode_task_suite/neural_mlp/"
+      ],
+      "readout": "Each task also has a compact PyTorch MLP run over the same feature tensor and chronological split."
+    },
+    {
+      "area": "Audio contribution study",
+      "status": "verified",
+      "evidence": [
+        "scripts/audio_ablation_and_raw_upgrade.py",
+        "results/audio_ablation/",
+        "docs/data/audio_ablation_summary.json"
+      ],
+      "readout": "Audio variants improve the primary metric on 6 of 12 task contracts in this single-episode setting."
+    },
+    {
+      "area": "Evaluation protocol",
+      "status": "verified",
+      "evidence": [
+        "EVALUATION_PROTOCOL.md",
+        "docs/data/evaluation_protocol.json",
+        "scripts/build_evaluation_protocol.py"
+      ],
+      "readout": "Windowing, chronological split, per-task metrics, leakage controls, and current limitations are generated from committed metric artifacts."
+    },
+    {
+      "area": "Research takeaways",
+      "status": "verified",
+      "evidence": [
+        "RESEARCH_TAKEAWAYS.md",
+        "docs/data/research_takeaways.json",
+        "scripts/build_research_takeaways.py"
+      ],
+      "readout": "The main result interpretation is generated from committed metrics: chronological class shift, neural gains on dynamics/order/alignment, open retrieval/reconstruction problems, and the need for held-out episodes."
+    },
+    {
+      "area": "Research roadmap",
+      "status": "current",
+      "evidence": [
+        "RESEARCH_ROADMAP.md",
+        "docs/data/research_roadmap.json"
+      ],
+      "readout": "The roadmap connects public-sample task development to the verified Qwen3-Omni diagnostic pilot, validation-aware diagnostics, foundation-model selection, robustness runs, world/policy branches, and the future Xperience-native pretraining goal."
+    },
+    {
+      "area": "Foundation-model plan",
+      "status": "current",
+      "evidence": [
+        "FOUNDATION_MODEL_PLAN.md",
+        "docs/data/foundation_model_plan.json"
+      ],
+      "readout": "Qwen3-Omni remains the first trainable held-out LoRA baseline; Cosmos 3 is added as the first world-model/action-generation branch; OpenVLA/openpi/GR00T are policy candidates after action targets are explicit."
+    },
+    {
+      "area": "Xperience Embodied Foundation Model",
+      "status": "future_goal",
+      "evidence": [
+        "XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md"
+      ],
+      "readout": "A future full-corpus pretraining plan describes target modules, objectives, staged scale-up, hardware ranges, and evaluation for a domain-specific embodied foundation model."
+    },
+    {
+      "area": "Official dataset wording",
+      "status": "verified",
       "evidence": [
         "XPERIENCE10M_DATASET_CARD_ALIGNMENT.md",
         "docs/data/xperience10m_dataset_card_alignment.json"
       ],
+      "readout": "Public wording is aligned to the official gated Xperience-10M dataset card, public sample card, and HF API metadata, including modalities, scale, access path, sample license/tooling, and current project coverage."
+    },
+    {
+      "area": "Source alignment",
+      "status": "verified",
+      "evidence": [
+        "SOURCE_ALIGNMENT_AUDIT.md",
+        "docs/data/source_alignment_audit.json",
+        "scripts/validate_source_alignment.py"
+      ],
+      "readout": "Source facts, sample details, API-listing notes, and project coverage are checked across repo docs, website, and HF cards."
+    },
+    {
+      "area": "Website and HF mirrors",
       "status": "verified",
       "evidence": [
         "docs/data/website_integrity.json",
     },
     {
       "area": "Qwen3-Omni fine-tuning",
+      "status": "verified_validation_aware_diagnostic_pilot_quality_target_not_met",
       "evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
+        "scripts/omni/package_verified_omni_result.py",
+        "scripts/omni/audit_verified_omni_package.py"
       ],
+      "readout": "The selected 96/16/16 episode split produced a validation-aware public-safe held-out package with 3,808 exported windows, 512 validation windows, and 448 test predictions. JSON validity is 87.50%, below the 98% target, so it is a stronger diagnostic baseline but not a strong model-quality result."
     },
     {
       "area": "Raw Xperience-10M redistribution",
   ],
   "fast_research_route": [
     "Read PROJECT_STATUS.md and EVIDENCE_CONTRACT.md to establish what is implemented.",
+    "Open docs/data/project_packet.json for the machine-readable project path.",
+    "Inspect RESEARCH_TAKEAWAYS.md and docs/data/research_takeaways.json before interpreting model scores.",
+    "Inspect RESEARCH_ROADMAP.md and docs/data/research_roadmap.json for the path from public-sample task work to multi-episode modeling.",
+    "Inspect FOUNDATION_MODEL_PLAN.md and docs/data/foundation_model_plan.json before choosing a backbone branch.",
+    "Inspect XPERIENCE_EMBODIED_FOUNDATION_MODEL_PRETRAINING.md for the long-term full-corpus pretraining goal.",
+    "Inspect docs/data/summary_metrics.json and results/episode_task_suite/neural_mlp/ to check the 12-task outputs.",
+    "Inspect results/audio_ablation/AUDIO_ABLATION_SUMMARY.md before judging whether audio helps the current task suite.",
+    "Inspect EVALUATION_PROTOCOL.md before judging task metrics or leakage controls.",
+    "Inspect SOURCE_ALIGNMENT_AUDIT.md before judging source-card consistency across public surfaces.",
+    "Inspect XPERIENCE10M_DATASET_CARD_ALIGNMENT.md before judging dataset wording.",
+    "Inspect docs/data/omni_finetune_verified_result.json before judging the Qwen3-Omni diagnostic pilot."
   ],
   "current_reading_notes": [
+    "The validation-aware Qwen3-Omni diagnostic pilot is verified, but current held-out quality is still weak.",
+    "Use docs/data/omni_finetune_verified_result.json and the latest verified_public validation-aware package for current held-out results.",
     "The current reconstruction task reconstructs feature vectors, not pixel-depth, mesh, NeRF, or Gaussian reconstruction.",
     "Audio is one of the synchronized source modalities in the current task representation.",
     "The audio ablation report compares audio/no-audio variants across all 12 task contracts in results/audio_ablation/.",

metrics/research_roadmap.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Roadmap",
-  "summary": "Staged path from the public-sample task lab to multi-episode held-out evaluation, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
-  "current_decision_point": "Keep the public-sample task suite as the development harness, prepare the selected official Xperience-10M episodes for the held-out Qwen3-Omni pilot, then branch into Cosmos 3 world modeling and policy-model experiments after the data preparation path is stable. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
@@ -33,8 +33,8 @@
     },
     {
       "id": "multi_episode_data_staging",
-      "name": "Multi-Episode Data Staging",
-      "status": "active",
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "deliverables": [
         "128 selected episodes",
@@ -48,23 +48,26 @@
         "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
         "results/omni_finetune/source_discovery.json"
       ],
-      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level."
     },
     {
-      "id": "qwen3_omni_lora_pilot",
-      "name": "Qwen3-Omni LoRA Pilot",
-      "status": "next",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "completion_evidence": [
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
@@ -72,7 +75,27 @@
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
-      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop."
     },
     {
       "id": "foundation_model_selection_matrix",

 {
   "title": "Ropedia Xperience-10M Research Roadmap",
+  "summary": "Staged path from the public-sample task lab to a verified validation-aware Qwen3-Omni diagnostic pilot, structured-output improvement pass, foundation-model selection, world/policy branches, and a future Xperience-native embodied foundation model.",
+  "current_decision_point": "Keep the public-sample task suite as the development harness, use the verified selected-episode Qwen3-Omni validation-aware diagnostic pilot as the first cross-episode baseline, improve structured-output reliability and task-quality error analysis, then branch into Cosmos 3 world modeling and policy-model experiments after their targets are implemented. The Xperience Embodied Foundation Model is a later full-corpus pretraining goal, not a current result.",
   "additional_development_directions": {
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
     "source_json": "docs/data/additional_development_directions.json",
     },
     {
       "id": "multi_episode_data_staging",
+      "name": "Multi-Episode Data Preparation",
+      "status": "implemented_for_first_pilot",
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "deliverables": [
         "128 selected episodes",
         "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
         "results/omni_finetune/source_discovery.json"
       ],
+      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level."
     },
     {
+      "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
+      "status": "verified_baseline",
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
       "deliverables": [
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
+        "validation monitoring",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "completion_evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
         "predictions.jsonl",
         "RUN_REPORT.md"
       ],
+      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline."
+    },
+    {
+      "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Structured-Output And Error-Analysis Pass",
+      "status": "active_next_step",
+      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
+      "deliverables": [
+        "same 96/16/16 episode split",
+        "stricter JSON decoding or target formatting",
+        "episode/action/object error analysis",
+        "held-out test evaluation",
+        "comparison to the verified validation-aware baseline"
+      ],
+      "completion_evidence": [
+        "quality-target report",
+        "error-analysis tables",
+        "held-out metrics",
+        "verified public-safe package"
+      ],
+      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims."
     },
     {
       "id": "foundation_model_selection_matrix",

metrics/research_roadmap_interactive.json CHANGED Viewed

@@ -127,7 +127,7 @@
       "Build the episode taxonomy and data-quality diagnostics first.",
       "Lock the benchmark protocol and split manifests before reporting model scores.",
       "Add representation-learning and skill-graph objectives once enough episodes are staged.",
-      "Add affordance, 3D/4D memory, and policy-retargeting branches after labels and action targets are auditable."
     ],
     "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
@@ -2035,7 +2035,7 @@
         "step": 1
       },
       {
-        "action": "Run Qwen3-Omni LoRA to establish the full train/eval loop.",
         "name": "First held-out baseline",
         "step": 2
       },
@@ -2222,7 +2222,7 @@
     ],
     "status": "planning_artifact"
   },
-  "generated_at_utc": "2026-06-04T21:22:15+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
@@ -2279,13 +2279,15 @@
       ],
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "id": "multi_episode_data_staging",
-      "name": "Multi-Episode Data Staging",
-      "reader_takeaway": "The next scale decision is data preparation, with train/test separation at the episode level.",
-      "stage": "scale_up",
-      "status": "active"
     },
     {
       "completion_evidence": [
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
@@ -2297,17 +2299,39 @@
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
-      "id": "qwen3_omni_lora_pilot",
-      "name": "Qwen3-Omni LoRA Pilot",
-      "reader_takeaway": "The first omni-model pilot should establish a complete held-out-episode training and evaluation loop.",
-      "stage": "omni",
-      "status": "next"
     },
     {
       "completion_evidence": [
@@ -2404,7 +2428,7 @@
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
-    "status": "selected_episode_preparation",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

       "Build the episode taxonomy and data-quality diagnostics first.",
       "Lock the benchmark protocol and split manifests before reporting model scores.",
       "Add representation-learning and skill-graph objectives once enough episodes are staged.",
+      "Add affordance, 3D/4D memory, and policy-retargeting branches after labels and action targets are measurable."
     ],
     "public_boundary": "These are proposed development tracks. They are not reported as completed held-out benchmark results.",
     "source_document": "ADDITIONAL_DEVELOPMENT_DIRECTIONS.md",
         "step": 1
       },
       {
+        "action": "Run validation-aware Qwen3-Omni LoRA to improve the verified diagnostic baseline.",
         "name": "First held-out baseline",
         "step": 2
       },
     ],
     "status": "planning_artifact"
   },
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "omni_plan": {
     "adapter": "LoRA rank 16, alpha 32, dropout 0.05",
     "backbone": "Qwen/Qwen3-Omni-30B-A3B-Instruct",
       ],
       "entry_condition": "Gated dataset availability and enough storage for selected episodes.",
       "id": "multi_episode_data_staging",
+      "name": "Multi-Episode Data Preparation",
+      "reader_takeaway": "The first selected split is available for Qwen3-Omni diagnostics, with train/test separation at the episode level.",
+      "stage": "future",
+      "status": "implemented_for_first_pilot"
     },
     {
       "completion_evidence": [
+        "docs/data/omni_finetune_verified_result.json",
+        "results/omni_finetune/verified_public/",
         "dataset_manifest.json",
         "training_metadata.json",
         "progress.jsonl",
         "dataset JSONL/media manifests",
         "LoRA adapter checkpoint",
         "progress logs",
+        "validation monitoring",
         "held-out predictions",
         "metrics",
         "confusion matrices",
         "run report"
       ],
       "entry_condition": "Selected episodes are prepared locally with no train/test episode leakage.",
+      "id": "qwen3_omni_lora_diagnostic_pilot",
+      "name": "Qwen3-Omni LoRA Validation-Aware Diagnostic Pilot",
+      "reader_takeaway": "The first omni-model pilot establishes the full held-out training/validation/evaluation loop, but the weak metrics make it a diagnostic baseline.",
+      "stage": "future",
+      "status": "verified_baseline"
+    },
+    {
+      "completion_evidence": [
+        "quality-target report",
+        "error-analysis tables",
+        "held-out metrics",
+        "verified public-safe package"
+      ],
+      "deliverables": [
+        "same 96/16/16 episode split",
+        "stricter JSON decoding or target formatting",
+        "episode/action/object error analysis",
+        "held-out test evaluation",
+        "comparison to the verified validation-aware baseline"
+      ],
+      "entry_condition": "The validation-aware diagnostic package exists and shows weak held-out quality.",
+      "id": "qwen3_omni_structured_output_error_analysis",
+      "name": "Structured-Output And Error-Analysis Pass",
+      "reader_takeaway": "The next pass should improve output reliability and task metrics before larger model-quality claims.",
+      "stage": "future",
+      "status": "active_next_step"
     },
     {
       "completion_evidence": [
       "visualization.rrd"
     ],
     "selection_strategy": "stratified_round_robin_by_top_level_session",
+    "status": "verified_validation_aware_diagnostic_pilot",
     "target_episodes": 128,
     "valid_candidates": 12102
   },

metrics/research_takeaways.json CHANGED Viewed

@@ -1,7 +1,7 @@
 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
-  "generated_at_utc": "2026-06-04T16:42:13+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
@@ -166,23 +166,27 @@
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
-      "readout": "The prepared Qwen3-Omni path now targets a selected 128-episode pilot; held-out metrics will be reported after staging, training, and evaluation complete.",
       "evidence": [
         {
-          "label": "target_episodes",
           "value": 128
         },
         {
-          "label": "selected_sessions",
-          "value": 128
         },
         {
-          "label": "valid_candidates",
-          "value": 12102
         }
       ],
-      "source": "results/omni_finetune/MULTI_EPISODE_ACCESS_STATUS.md",
-      "current_scope": "The selected-episode Qwen3-Omni fine-tune requires completed data preparation and held-out evaluation; the 32-episode Qwen3-Omni fine-tune requires gated data preparation before any real held-out metric is reported."
     }
   ]
 }

 {
   "title": "Ropedia Xperience-10M Research Takeaways",
   "status": "pass",
+  "generated_at_utc": "2026-06-06T13:49:32+00:00",
   "source_files": [
     "docs/data/summary_metrics.json",
     "results/episode_task_suite/summary_report.json",
     {
       "id": "scale_requires_episodes",
       "title": "The next scientific unit is held-out episodes, not more adjacent windows",
+      "readout": "The selected Qwen3-Omni path now has a verified validation-aware held-out diagnostic pilot. It proves the cross-episode train/validation/eval loop, but the weak metrics show that structured-output reliability and task-quality error analysis are the next modeling problems.",
       "evidence": [
         {
+          "label": "selected_episodes",
           "value": 128
         },
         {
+          "label": "held_out_test_windows",
+          "value": 448
+        },
+        {
+          "label": "json_validity_rate",
+          "value": 0.875
         },
         {
+          "label": "action_macro_f1",
+          "value": 0.0026621494447581404
         }
       ],
+      "source": "docs/data/omni_finetune_verified_result.json",
+      "current_scope": "The selected-episode Qwen3-Omni validation-aware diagnostic pilot is verified, but held-out quality is still weak and JSON validity remains below the 98% target."
     }
   ]
 }

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/PUBLIC_RESULT_SUMMARY.md ADDED Viewed

	@@ -0,0 +1,25 @@

+# Verified Omni Fine-Tuning Result
+- Backbone: `qwen3_omni_lora`
+- Dataset run: `xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605`
+- Training run: `xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora`
+- Evaluation run: `xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval`
+- Validation status: `verified`
+- Held-out eval split: `test`
+- Held-out episodes: `14`
+- Prediction rows: `448`
+## Primary Metrics
+- json_validity_rate: `0.875`
+- action_macro_f1: `0.0026621494447581404`
+- subtask_accuracy: `0.006696428571428571`
+- transition_accuracy: `0.8504464285714286`
+- next_action_accuracy: `0.024553571428571428`
+- contact_accuracy: `0.6450892857142857`
+- object_micro_f1: `0.22299431459254582`
+- held_out_episode_count: `14`
+Raw Xperience-10M files, base-model weights, adapter or checkpoint weights, full checkpoints, and large archives are not included.
+Use this package as the source for README, website, and Hugging Face updates.

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/dataset/dataset_manifest.json ADDED Viewed

The diff for this file is too large to render. See raw diff

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/dataset/episode_manifest.json ADDED Viewed

The diff for this file is too large to render. See raw diff

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/RUN_REPORT.md ADDED Viewed

	@@ -0,0 +1,13 @@

+# Qwen3-Omni LoRA Evaluation
+- Base model: `<workspace-parent>/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct`
+- Adapter: `<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora`
+- Dataset: `<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl`
+- Eval split: `test`
+- Samples: `448`
+- Episodes: `14`
+- Accuracy: `0.0246`
+- Macro-F1: `0.0027`
+- Unseen eval labels: `144`
+Artifacts include `metrics.json`, `predictions.csv`, `per_class_metrics.csv`, and `confusion_matrix.csv`.

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/confusion_matrix.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/metrics.json ADDED Viewed

	@@ -0,0 +1,1578 @@

+{
+  "num_samples": 448,
+  "accuracy": 0.024553571428571428,
+  "macro_f1": 0.0026621494447581404,
+  "labels": [
+    "Adjust Mahjong tile",
+    "Adjust Mahjong tile alignment",
+    "Adjust Mahjong tile on the stack",
+    "Adjust Mahjong tiles",
+    "Adjust bead piles",
+    "Adjust canned food on shelf",
+    "Adjust cans in bin",
+    "Adjust cans in container",
+    "Adjust cans in tray",
+    "Adjust cardboard",
+    "Adjust cardboard divider",
+    "Adjust cardboard position",
+    "Adjust container on shelf",
+    "Adjust container position",
+    "Adjust containers on shelf",
+    "Adjust foam strip",
+    "Adjust grip",
+    "Adjust grip on container",
+    "Adjust hand position",
+    "Adjust item on shelf",
+    "Adjust lantern shape",
+    "Adjust lantern string",
+    "Adjust paper",
+    "Adjust paper strip",
+    "Adjust perspective",
+    "Adjust placement on shelf",
+    "Adjust position",
+    "Adjust pot position",
+    "Adjust puzzle piece",
+    "Adjust red button",
+    "Adjust red button in row",
+    "Adjust red button position",
+    "Adjust retail item position",
+    "Adjust retail items on shelf",
+    "Adjust ruler position",
+    "Adjust smartphone and sort pieces",
+    "Adjust snack package",
+    "Adjust tile row alignment",
+    "Adjust vacuum cleaner position",
+    "Adjusting a puzzle piece",
+    "Adjusting canned goods on shelf",
+    "Adjusting fabric for cutting",
+    "Adjusting fabric position",
+    "Adjusting puzzle piece",
+    "Align Mahjong tiles",
+    "Align and place retail item",
+    "Align blue strip",
+    "Align button",
+    "Align button in row",
+    "Align button row",
+    "Align buttons",
+    "Align canned food on shelf",
+    "Align canned goods on shelf",
+    "Align cardboard piece",
+    "Align cardboard strip",
+    "Align charging cable",
+    "Align edges of paper lantern",
+    "Align foam piece",
+    "Align foam strip",
+    "Align paper lantern edges",
+    "Align paper strip",
+    "Align plastic containers",
+    "Align red button in row",
+    "Align red buttons",
+    "Align ruler",
+    "Align ruler and mark cardboard",
+    "Align ruler on cardboard",
+    "Align ruler with crease",
+    "Align scissors",
+    "Apply adhesive tape to lantern",
+    "Approach boxes",
+    "Approach desk",
+    "Approach packing area",
+    "Approach restocking supplies",
+    "Approach table",
+    "Approach work table",
+    "Approach workstation",
+    "Approaching and pressing the door switch",
+    "Approaching the table",
+    "Approaching work table",
+    "Arrange Mahjong tiles",
+    "Arrange beads by color",
+    "Arrange black buttons",
+    "Arrange button cluster",
+    "Arrange buttons",
+    "Arrange buttons in a line",
+    "Arrange buttons in row",
+    "Arrange buttons on table",
+    "Arrange buttons on the table",
+    "Arrange canned products on shelf",
+    "Arrange cans in box",
+    "Arrange cans on shelf",
+    "Arrange cardboard",
+    "Arrange cardboard piece",
+    "Arrange cardboard pieces",
+    "Arrange cardboard squares",
+    "Arrange container on shelf",
+    "Arrange items on shelf",
+    "Arrange orange buttons",
+    "Arrange paper stars",
+    "Arrange paper strips",
+    "Arrange plastic containers",
+    "Arrange red buttons",
+    "Arrange small buttons",
+    "Arrange star beads",
+    "Arrange star beads for counting",
+    "Arrange star-shaped beads",
+    "Arrange tiles into row",
+    "Arrive at a different workstation",
+    "Assemble cardboard pieces",
+    "Assemble foam strips",
+    "Assess shelf arrangement",
+    "Attach foam strip",
+    "Attach material to paper strip",
+    "Attempt to fit puzzle piece",
+    "Begin folding paper strip",
+    "Begin rolling quilling strip",
+    "Bend and manipulate plastic strip",
+    "Browse and interact with phone interface",
+    "Browse mobile phone",
+    "Browse smartphone screen",
+    "Browsing mobile phone",
+    "Browsing smartphone content",
+    "Bundle display hooks",
+    "Cap marker",
+    "Carry cardboard piece",
+    "Carry cereal boxes",
+    "Carry cereal towards aisle",
+    "Carry container",
+    "Carry crate of cans",
+    "Carry item to shelf",
+    "Carry pasta box towards aisle",
+    "Carry plastic container",
+    "Carry stool to next shelf",
+    "Check phone",
+    "Check smart watch",
+    "Check watch",
+    "Clean shelf",
+    "Close cardboard box",
+    "Closing the door",
+    "Combine bead piles",
+    "Complete the cut",
+    "Connect cable to device",
+    "Continue cutting fabric",
+    "Continue cutting newspaper",
+    "Continue folding paper strip",
+    "Count and arrange paper stars",
+    "Count and record paper stars",
+    "Count paper stars",
+    "Counting and organizing beads",
+    "Counting star beads",
+    "Curve foam strip into loop",
+    "Cut along the edge of the newspaper",
+    "Cut along the line",
+    "Cut along the marked line",
+    "Cut along the newspaper edge",
+    "Cut cardboard",
+    "Cut cardboard along line",
+    "Cut cardboard grid",
+    "Cut cardboard into triangles",
+    "Cut cardboard pattern",
+    "Cut cardboard piece",
+    "Cut cardboard piece with scissors",
+    "Cut cardboard pieces with scissors",
+    "Cut cardboard shape",
+    "Cut cardboard sheet",
+    "Cut cardboard sheet with scissors",
+    "Cut cardboard square",
+    "Cut cardboard strip",
+    "Cut cardboard strip with scissors",
+    "Cut cardboard strip with utility knife",
+    "Cut cardboard triangle",
+    "Cut cardboard tube",
+    "Cut cardboard with scissors",
+    "Cut cardboard with utility knife",
+    "Cut fabric with scissors",
+    "Cut light green fabric",
+    "Cut newspaper",
+    "Cut newspaper with scissors",
+    "Cut out cardboard pattern",
+    "Cut section from newspaper",
+    "Cutting fabric",
+    "Deposit beads into box",
+    "Deposit cardboard squares",
+    "Discard item into bin",
+    "Discard paper towel",
+    "Draw grid line",
+    "Draw grid line with pen",
+    "Draw line",
+    "Draw line along ruler",
+    "Draw line on cardboard",
+    "Draw line with marker",
+    "Draw line with pen",
+    "Draw lines on cardboard",
+    "Draw lines with pen and ruler",
+    "Draw lines with ruler",
+    "Draw straight line",
+    "Draw straight lines on cardboard",
+    "Drawing grid line",
+    "Drawing grid line with pen and ruler",
+    "Drawing grid line with ruler",
+    "Drawing lines on cardboard",
+    "Drop cardboard square into box",
+    "Dry hands",
+    "Enter the room",
+    "Enter workspace",
+    "Entering the VR training room",
+    "Examine canned goods",
+    "Examine item",
+    "Examine labels",
+    "Examine product",
+    "Expand paper lantern",
+    "Extract wire hangers from box",
+    "Finish placing cardboard cutouts",
+    "Finish washing hands",
+    "Finish wiping and inspect jar",
+    "Finishing coil",
+    "Fold and manipulate ribbon",
+    "Fold and organize paper strips",
+    "Fold blue strip",
+    "Fold cardboard",
+    "Fold cardboard edge",
+    "Fold cardboard shape",
+    "Fold cardboard sheet",
+    "Fold cut cardboard",
+    "Fold foam piece",
+    "Fold lucky star",
+    "Fold newspaper",
+    "Fold paper lantern",
+    "Fold paper star",
+    "Fold paper strip",
+    "Fold paper strip into a star",
+    "Fold paper strip into knot",
+    "Fold paper strip into lucky star",
+    "Fold paper strip into star",
+    "Fold plastic strip",
+    "Fold purple paper",
+    "Fold purple paper strip",
+    "Fold ribbon",
+    "Folding cardboard",
+    "Folding paper strip",
+    "Forming quilled paper shape",
+    "Gather cardboard pieces",
+    "Gather pieces",
+    "Gather pieces into box",
+    "Gather star beads",
+    "Gathering colored beads",
+    "Gathering items",
+    "Gathering star beads",
+    "Gesturing",
+    "Grasp and retrieve item",
+    "Grasp cardboard sheet",
+    "Grasp cleaning bottle",
+    "Grasp door handle",
+    "Grasp electronic object",
+    "Grasp item",
+    "Grasp lantern",
+    "Grasp lantern component",
+    "Grasp next item",
+    "Grasp origami stars",
+    "Grasp package",
+    "Grasp paper strip",
+    "Grasp plastic bag on shelf",
+    "Grasp product from box",
+    "Grasp product from shelf",
+    "Grasp retail item",
+    "Grasp shopping bag",
+    "Grasp snack package",
+    "Grasping cleaning cloth",
+    "Greeting/acknowledging participants",
+    "Guide utility knife along ruler",
+    "Handle paper lantern component",
+    "Hold and align cardboard",
+    "Hold and align newspaper",
+    "Hold and align paper strip",
+    "Hold and bend paper strip",
+    "Hold and bend plastic strip",
+    "Hold and crease purple paper",
+    "Hold and examine item",
+    "Hold and inspect can",
+    "Hold and manipulate paper strip",
+    "Hold and mark cardboard piece",
+    "Hold and rotate paper strip",
+    "Hold and view phone",
+    "Hold and wipe product",
+    "Hold beads",
+    "Hold bin and move through aisle",
+    "Hold blue product box",
+    "Hold blue strip",
+    "Hold canned food",
+    "Hold cardboard",
+    "Hold cardboard piece",
+    "Hold cardboard pieces",
+    "Hold cardboard strip",
+    "Hold cardboard with ruler",
+    "Hold charger",
+    "Hold charger and cable",
+    "Hold charging cable",
+    "Hold cleaning cloth",
+    "Hold container",
+    "Hold container lid",
+    "Hold container of canned food",
+    "Hold craft tool",
+    "Hold device and cable",
+    "Hold earbud case",
+    "Hold electronic accessory",
+    "Hold electronic item",
+    "Hold empty container",
+    "Hold foam pieces",
+    "Hold instructional sign",
+    "Hold item",
+    "Hold item and adjust posture",
+    "Hold items",
+    "Hold items and inspect shelf",
+    "Hold items in hand",
+    "Hold newspaper",
+    "Hold paper lantern",
+    "Hold paper strip",
+    "Hold pen and paper",
+    "Hold phone",
+    "Hold pickle jar",
+    "Hold portable charger",
+    "Hold power adapter",
+    "Hold power bank and cable",
+    "Hold product",
+    "Hold product labels",
+    "Hold product package",
+    "Hold quilled paper coil",
+    "Hold quilled paper piece",
+    "Hold quilling paper",
+    "Hold recording sheet and pen",
+    "Hold ruler",
+    "Hold ruler and draw line",
+    "Hold ruler and mark cardboard",
+    "Hold ruler and marker",
+    "Hold ruler and pen steady",
+    "Hold ruler on cardboard",
+    "Hold ruler steady",
+    "Hold scissors",
+    "Hold small cardboard pieces",
+    "Hold small object",
+    "Hold small piece of ribbon",
+    "Hold small product bag",
+    "Hold small white box",
+    "Hold smartphone",
+    "Hold smartphone box",
+    "Hold snack package",
+    "Hold snack packages",
+    "Hold supplement bottle",
+    "Hold tray of canned goods",
+    "Hold utility knife",
+    "Hold water bottle",
+    "Holding marker",
+    "Identify next cardboard piece",
+    "Inflate paper star",
+    "Initiate star folding",
+    "Insert charging cable",
+    "Insert charging cable into power bank",
+    "Insert plug into power adapter",
+    "Inspect Dior gift box",
+    "Inspect almond package",
+    "Inspect and place item on shelf",
+    "Inspect bottle",
+    "Inspect cardboard piece",
+    "Inspect cardboard strip",
+    "Inspect charging case",
+    "Inspect electronic item",
+    "Inspect jar",
+    "Inspect product",
+    "Inspect product lid",
+    "Inspect shelf",
+    "Inspect shelf and organize stock",
+    "Inspect shelf condition",
+    "Inspect smartphone box",
+    "Inspect strip",
+    "Inspect supplement bottle",
+    "Interact with colleagues",
+    "Interact with phone",
+    "Interact with smartphone",
+    "Interact with smartphone screen",
+    "Interacting with phone screen",
+    "Interaction with coworker",
+    "Interlock paper strips",
+    "Labeling cardboard piece",
+    "Labeling cardboard square",
+    "Labeling cardboard squares",
+    "Lift blue strip",
+    "Lift pen and shift ruler",
+    "Lift pot lid",
+    "Lift utility knife",
+    "Lock phone",
+    "Look around the table",
+    "Look away",
+    "Manipulate adhesive strip",
+    "Manipulate and inspect colorful pieces",
+    "Manipulate bead",
+    "Manipulate beads",
+    "Manipulate cardboard piece",
+    "Manipulate cardboard shape",
+    "Manipulate cardboard sheet",
+    "Manipulate colorful pieces",
+    "Manipulate component",
+    "Manipulate component on strip",
+    "Manipulate craft paper strips",
+    "Manipulate craft piece",
+    "Manipulate folded paper star",
+    "Manipulate light blue strip",
+    "Manipulate material",
+    "Manipulate paper decoration",
+    "Manipulate paper edge",
+    "Manipulate paper piece",
+    "Manipulate paper quilling piece",
+    "Manipulate paper star",
+    "Manipulate paper stars",
+    "Manipulate paper strip",
+    "Manipulate paper strips",
+    "Manipulate plastic strip",
+    "Manipulate plastic strips",
+    "Manipulate power cable plug",
+    "Manipulate puzzle piece",
+    "Manipulate puzzle pieces",
+    "Manipulate quilled paper",
+    "Manipulate quilled paper shape",
+    "Manipulate quilled paper strip",
+    "Manipulate quilled paper strips",
+    "Manipulate quilling paper",
+    "Manipulate quilling strip",
+    "Manipulate ribbon knot",
+    "Manipulate ribbon piece",
+    "Manipulate small component",
+    "Manipulate small object",
+    "Manipulate small paper segment",
+    "Manipulate star",
+    "Manipulate yellow strip",
+    "Manipulating paper strips",
+    "Mark cardboard",
+    "Mark cardboard piece",
+    "Mark cardboard strip with pen",
+    "Mark cardboard with marker",
+    "Mark cardboard with pen",
+    "Mark cardboard with pen and ruler",
+    "Mark cardboard with ruler",
+    "Mark cardboard with ruler and pen",
+    "Mark fabric",
+    "Mark fabric with pen",
+    "Mark fabric with pen and ruler",
+    "Mark line on cardboard",
+    "Mark lines on cardboard",
+    "Mark lines with marker",
+    "Mark lines with pen along ruler",
+    "Mark list with pen",
+    "Mark paper list",
+    "Mark straight line",
+    "Marking cardboard piece",
+    "Marking cardboard with pen",
+    "Marking lines on cardboard",
+    "Measure and mark cardboard",
+    "Measure cardboard with ruler",
+    "Move Mahjong tile",
+    "Move along shelf",
+    "Move along the shelf",
+    "Move along the shelves",
+    "Move along the supermarket aisle",
+    "Move and place black buttons",
+    "Move away from collection box",
+    "Move away from desk",
+    "Move away from shelf",
+    "Move away from table",
+    "Move away from workstation",
+    "Move bin",
+    "Move bin to shelf area",
+    "Move black button",
+    "Move blue beads",
+    "Move box to next position",
+    "Move button to line",
+    "Move camera over surface",
+    "Move can towards shelf",
+    "Move canned goods container",
+    "Move cardboard",
+    "Move cardboard box",
+    "Move cardboard piece",
+    "Move cardboard sheet",
+    "Move cardboard to pile",
+    "Move container toward shelf",
+    "Move dustpan to side",
+    "Move hand",
+    "Move hand away",
+    "Move hand away from shelf",
+    "Move hand away from workspace",
+    "Move hand back to box",
+    "Move hand over button pile",
+    "Move hand to paper stars",
+    "Move hand toward craft materials",
+    "Move item to bag",
+    "Move marker and adjust hand",
+    "Move marker and ruler",
+    "Move marker away",
+    "Move orange buttons",
+    "Move origami stars",
+    "Move pen",
+    "Move pen aside",
+    "Move pen away",
+    "Move phone",
+    "Move piece to pile",
+    "Move pieces into box",
+    "Move pineapple chips",
+    "Move plastic storage bin",
+    "Move plush toy",
+    "Move pot",
+    "Move product to box",
+    "Move product to shelf",
+    "Move product towards shelf",
+    "Move puzzle piece",
+    "Move ruler",
+    "Move ruler and tools",
+    "Move scissors away",
+    "Move small blue foam piece towards the strip",
+    "Move smartphone",
+    "Move storage bin",
+    "Move through aisle",
+    "Move through the training room",
+    "Move to box",
+    "Move to desk",
+    "Move to next section",
+    "Move to shelf",
+    "Move to shelf base",
+    "Move to stock products",
+    "Move towards aisle",
+    "Move towards box",
+    "Move towards kitchen area",
+    "Move towards shelf",
+    "Move towards table",
+    "Move towards the stove",
+    "Move tray towards packing area",
+    "Move utility knife along ruler",
+    "Move vacuum cleaner",
+    "Move vacuum cleaner hose",
+    "Moving cardboard square",
+    "Moving hand",
+    "Moving hand towards cardboard stack",
+    "Moving ruler",
+    "Observe and pause",
+    "Observe and walk through store",
+    "Observe colleague and workspace",
+    "Observe craft layout",
+    "Observe desktop layout",
+    "Observe paper and count objects",
+    "Observe paper quilling station",
+    "Observe puzzle progress",
+    "Observe room",
+    "Observe shelf",
+    "Observe shelf status",
+    "Observe sorting progress",
+    "Observe stocking",
+    "Observe surroundings",
+    "Observe workspace",
+    "Open cardboard box",
+    "Open door",
+    "Open earbud case",
+    "Open folded paper lantern",
+    "Open paper lantern",
+    "Open paper lantern component",
+    "Open small case",
+    "Open stove pot lid",
+    "Open supplement bottle",
+    "Operate smartphone",
+    "Organize bag contents",
+    "Organize cardboard pieces",
+    "Organize item on shelf",
+    "Organize products",
+    "Organize snacks in box",
+    "Organize tools and materials",
+    "Pack beads into box",
+    "Peel blue strip",
+    "Peel foam strip",
+    "Pick up Dior gift box",
+    "Pick up Mahjong tile",
+    "Pick up accessory",
+    "Pick up and sort cardboard",
+    "Pick up another bottle",
+    "Pick up another canned item",
+    "Pick up another item",
+    "Pick up beads",
+    "Pick up black button",
+    "Pick up blue foam piece",
+    "Pick up blue paper strip",
+    "Pick up bottle",
+    "Pick up bottled sauce",
+    "Pick up button",
+    "Pick up can",
+    "Pick up canned food",
+    "Pick up canned good",
+    "Pick up canned goods",
+    "Pick up canned item",
+    "Pick up canned product",
+    "Pick up cardboard",
+    "Pick up cardboard cutout",
+    "Pick up cardboard piece",
+    "Pick up cardboard square",
+    "Pick up cardboard stack",
+    "Pick up cardboard strip",
+    "Pick up cardboard tray",
+    "Pick up cereal boxes",
+    "Pick up charging cable",
+    "Pick up charging case",
+    "Pick up cleaning cloth",
+    "Pick up colored tile",
+    "Pick up container",
+    "Pick up container from box",
+    "Pick up craft material",
+    "Pick up cut cardboard piece",
+    "Pick up dustpan",
+    "Pick up electronic accessory",
+    "Pick up electronic accessory from box",
+    "Pick up electronic device",
+    "Pick up electronic item",
+    "Pick up electronic product",
+    "Pick up food item",
+    "Pick up gift box",
+    "Pick up grocery item",
+    "Pick up item",
+    "Pick up item from bin",
+    "Pick up item from box",
+    "Pick up item from shelf",
+    "Pick up items from the shopping bag",
+    "Pick up jar",
+    "Pick up light blue strip",
+    "Pick up marker",
+    "Pick up metal ruler",
+    "Pick up new cardboard piece",
+    "Pick up new electronic product",
+    "Pick up new product from box",
+    "Pick up next gift box",
+    "Pick up next item from bin",
+    "Pick up next product from bin",
+    "Pick up nut bar box",
+    "Pick up object",
+    "Pick up oil bottle",
+    "Pick up orange button",
+    "Pick up pack from shelf",
+    "Pick up packaged paper lantern component",
+    "Pick up paper star",
+    "Pick up paper strip",
+    "Pick up paper towel",
+    "Pick up pasta box",
+    "Pick up pen",
+    "Pick up phone",
+    "Pick up pickle jar",
+    "Pick up pink water bottle",
+    "Pick up plastic bin",
+    "Pick up plastic container",
+    "Pick up plush toy",
+    "Pick up portable charger",
+    "Pick up power bank",
+    "Pick up product",
+    "Pick up product box",
+    "Pick up product from bin",
+    "Pick up product from box",
+    "Pick up product from shelf",
+    "Pick up puzzle piece",
+    "Pick up red button",
+    "Pick up retail item",
+    "Pick up sauce bottle",
+    "Pick up scissors",
+    "Pick up shopping bag",
+    "Pick up small cardboard piece",
+    "Pick up small item",
+    "Pick up small object",
+    "Pick up small piece of material",
+    "Pick up smartphone",
+    "Pick up snack package",
+    "Pick up snack packages",
+    "Pick up snack packs",
+    "Pick up snack pouch",
+    "Pick up spice jar",
+    "Pick up stapler",
+    "Pick up star",
+    "Pick up star bead",
+    "Pick up star-shaped bead",
+    "Pick up storage container",
+    "Pick up supplement bottle",
+    "Pick up supplies from box",
+    "Pick up tin can",
+    "Pick up tool",
+    "Pick up utility knife",
+    "Pick up water bottle",
+    "Pick up yellow item",
+    "Pick up yellow paper strip",
+    "Picking up bottle",
+    "Picking up crafting material",
+    "Picking up stock",
+    "Place Mahjong tile on stack",
+    "Place Mahjong tile on the stack",
+    "Place accessory box",
+    "Place accessory into box",
+    "Place accessory on shelf",
+    "Place and align button",
+    "Place and count bead",
+    "Place another canned food on shelf",
+    "Place back Dior gift box",
+    "Place bead on table",
+    "Place bottle back on shelf",
+    "Place box on shelf",
+    "Place button",
+    "Place button in group",
+    "Place button in row",
+    "Place can on shelf",
+    "Place canned food in bin",
+    "Place canned food in container",
+    "Place canned food on shelf",
+    "Place canned good on shelf",
+    "Place canned goods in container",
+    "Place canned product on shelf",
+    "Place cans into box",
+    "Place cardboard",
+    "Place cardboard piece",
+    "Place cardboard piece on stack",
+    "Place cardboard square",
+    "Place cardboard square on stack",
+    "Place cardboard strip",
+    "Place charger on table",
+    "Place charging case down",
+    "Place cloth on floor",
+    "Place colored tile",
+    "Place container in bin",
+    "Place container on floor",
+    "Place container on shelf",
+    "Place controller on table",
+    "Place crate on floor",
+    "Place device on lap",
+    "Place down paper pieces",
+    "Place down paper segment",
+    "Place down pen",
+    "Place down pink water bottle",
+    "Place down ruler and pen",
+    "Place down scissors",
+    "Place down strip",
+    "Place finished star on table",
+    "Place gift box into bin",
+    "Place gift box on shelf",
+    "Place hand on table",
+    "Place item back",
+    "Place item back on shelf",
+    "Place item in bag",
+    "Place item in container",
+    "Place item in shopping bag",
+    "Place item into bag",
+    "Place item into shopping bag",
+    "Place item on shelf",
+    "Place item on table",
+    "Place items on shelf",
+    "Place items on table",
+    "Place items on the shelf",
+    "Place jar in box",
+    "Place jar into shelf box",
+    "Place jar on shelf",
+    "Place ketchup bottle on shelf",
+    "Place knife down",
+    "Place lid back",
+    "Place marked piece down",
+    "Place marker down",
+    "Place material",
+    "Place oil in container",
+    "Place paper star",
+    "Place paper star in row",
+    "Place pen on cardboard",
+    "Place pen on table",
+    "Place phone down",
+    "Place phone on desk",
+    "Place phone on shelf",
+    "Place phone on table",
+    "Place pickle jar in box",
+    "Place piece into puzzle",
+    "Place plush toy into bag",
+    "Place plush toy on shelf",
+    "Place product in box",
+    "Place product on shelf",
+    "Place puzzle piece",
+    "Place quilled paper shape",
+    "Place red button",
+    "Place ribbon onto project",
+    "Place ruler on cardboard",
+    "Place sauce bottle on shelf",
+    "Place sauce in container",
+    "Place scissors aside",
+    "Place scissors down",
+    "Place scissors on table",
+    "Place smartphone down",
+    "Place smartphone on cardboard",
+    "Place smartphone on desk",
+    "Place smartphone on stand",
+    "Place smartphone on table",
+    "Place snack in box",
+    "Place snack on shelf",
+    "Place snack package in box",
+    "Place snack package on shelf",
+    "Place snack packages on shelf",
+    "Place snack pouch in container",
+    "Place snack pouch on shelf",
+    "Place spice jar in container",
+    "Place star",
+    "Place star in row",
+    "Place star on table",
+    "Place stars in container",
+    "Place stool on floor",
+    "Place storage container on floor",
+    "Place strip on table",
+    "Place supplement bottle in container",
+    "Place tool on table",
+    "Place towel",
+    "Place water bottle on table",
+    "Place white box on table",
+    "Placing labeled cardboard square",
+    "Placing labeled square",
+    "Placing paper strip",
+    "Placing pen on table",
+    "Placing phone down",
+    "Placing piece on stack",
+    "Placing stock on shelf",
+    "Plug cable into portable charger",
+    "Position cardboard for cutting",
+    "Position cardboard piece",
+    "Position cardboard strip",
+    "Position cardboard tray",
+    "Position cardboard tube",
+    "Position container near shelf",
+    "Position container on shelf",
+    "Position hands for work",
+    "Position ribbon piece",
+    "Position ruler and mark cardboard",
+    "Position ruler on cardboard",
+    "Position scissors",
+    "Position scissors for next cut",
+    "Position scissors to cut cardboard",
+    "Position shelving divider",
+    "Position the ruler",
+    "Position tray",
+    "Position utility knife",
+    "Position utility knife on cardboard",
+    "Positioning cardboard on workspace",
+    "Positioning paper strip",
+    "Positioning puzzle piece",
+    "Positioning ruler on cardboard",
+    "Prepare paper strip",
+    "Prepare to cut cardboard",
+    "Prepare to draw lines",
+    "Prepare to pick up item",
+    "Prepare to place bottle on shelf",
+    "Prepare to place cardboard",
+    "Prepare to place item in bag",
+    "Prepare to place product",
+    "Prepare to resume cutting",
+    "Prepare to sort beads",
+    "Preparing to craft",
+    "Press fold",
+    "Pull back hand",
+    "Pull paper strip",
+    "Push vacuum cleaner",
+    "Put down phone",
+    "Put down scissors",
+    "Put down smartphone",
+    "Put down utility knife",
+    "Put down water bottle",
+    "Putting away smartphone",
+    "Reach and sort buttons",
+    "Reach for Mahjong tiles",
+    "Reach for additional items",
+    "Reach for and examine canned goods",
+    "Reach for and pick up smartphone",
+    "Reach for another container",
+    "Reach for another item",
+    "Reach for beads",
+    "Reach for black button",
+    "Reach for button",
+    "Reach for can",
+    "Reach for canned food",
+    "Reach for canned goods",
+    "Reach for cardboard box",
+    "Reach for cardboard piece",
+    "Reach for cleaning supplies",
+    "Reach for container",
+    "Reach for craft items",
+    "Reach for empty shelf space",
+    "Reach for item",
+    "Reach for item in box",
+    "Reach for item on shelf",
+    "Reach for items",
+    "Reach for items in box",
+    "Reach for more pieces",
+    "Reach for next can",
+    "Reach for next canned food",
+    "Reach for next canned food item",
+    "Reach for next canned product",
+    "Reach for next item",
+    "Reach for next piece",
+    "Reach for next product",
+    "Reach for object",
+    "Reach for paper strip",
+    "Reach for paper strips",
+    "Reach for phone",
+    "Reach for product",
+    "Reach for product labels",
+    "Reach for product on shelf",
+    "Reach for puzzle piece",
+    "Reach for retail item",
+    "Reach for shelf",
+    "Reach for shelving divider",
+    "Reach for snack package",
+    "Reach for snack pouch",
+    "Reach for star",
+    "Reach for stars",
+    "Reach for utility knife",
+    "Reach for water bottle",
+    "Reach for wire hangers",
+    "Reach into bag",
+    "Reach into box",
+    "Reach towards shelf",
+    "Reaching for beads",
+    "Realign Mahjong tiles",
+    "Rearrange Mahjong tile",
+    "Rearrange Mahjong tiles",
+    "Rearrange shelf item",
+    "Record count",
+    "Record count on notepad",
+    "Record star count",
+    "Record star count on paper",
+    "Release and prepare new strip",
+    "Release bottle",
+    "Release cardboard",
+    "Release cardboard piece",
+    "Release cardboard piece and gesture",
+    "Release cardboard shape",
+    "Release container",
+    "Release folded paper",
+    "Release food item",
+    "Release hook",
+    "Release label",
+    "Release lantern",
+    "Release paper",
+    "Release paper coil",
+    "Release paper star",
+    "Release paper strip",
+    "Release pickle jar",
+    "Release product on shelf",
+    "Release puzzle piece",
+    "Release quilling strip",
+    "Release scissors",
+    "Release smartphone",
+    "Remove cardboard flap",
+    "Remove cardboard pattern",
+    "Remove cardboard pattern piece",
+    "Remove cleaning bottle",
+    "Remove item from bag",
+    "Remove item from shelf",
+    "Remove lid from container",
+    "Remove paper lantern part from packaging",
+    "Remove plastic container from shelf",
+    "Remove plastic container from storage box",
+    "Remove plastic packaging",
+    "Remove ruler",
+    "Remove ruler and marker",
+    "Remove shelf label",
+    "Remove storage bin from shelf",
+    "Reorganize bin contents",
+    "Reposition and cut",
+    "Reposition cardboard for cutting",
+    "Reposition hand",
+    "Reposition hands",
+    "Reposition hands and ruler",
+    "Reposition marker",
+    "Reposition newspaper",
+    "Reposition pen and prepare for next line",
+    "Reposition ruler",
+    "Reposition ruler and pen",
+    "Reposition scissors",
+    "Reposition sign and organize beads",
+    "Reposition tools",
+    "Reposition utility knife",
+    "Repositioning ruler",
+    "Repositioning ruler and cardboard",
+    "Resume counting stars",
+    "Resume observation",
+    "Resume sorting blue beads",
+    "Resume writing on paper",
+    "Retract camera/reposition view",
+    "Retract hand",
+    "Retract hand from bag",
+    "Retrieve another container",
+    "Retrieve canned food from box",
+    "Retrieve hand to table",
+    "Retrieve items from bag",
+    "Retrieve next canned food item",
+    "Retrieve paper strip",
+    "Retrieve paper strips",
+    "Retrieve snack from container",
+    "Retrieve star",
+    "Retrieving more beads",
+    "Return to sorting",
+    "Reviewing count record",
+    "Rinse cloth in sink",
+    "Roll quilling paper",
+    "Rolling paper strip",
+    "Rub hands together",
+    "Scan for next piece",
+    "Scan supermarket shelves",
+    "Score cardboard",
+    "Scroll on smartphone",
+    "Scroll smartphone screen",
+    "Scroll through photo gallery",
+    "Scrolling and viewing content on phone",
+    "Scrolling or navigating on phone",
+    "Search for puzzle piece",
+    "Secure paper edges with adhesive",
+    "Secure ribbon with needle",
+    "Securing paper structure",
+    "Select a bottle",
+    "Select and pick up a canned item",
+    "Select another item",
+    "Select paper strip",
+    "Select product from box",
+    "Selecting new paper strip",
+    "Separate cardboard piece",
+    "Set down scissors and pick up power bank",
+    "Set down utility knife",
+    "Slide utility knife along ruler",
+    "Sort Mahjong tiles",
+    "Sort and adjust button line",
+    "Sort and arrange buttons",
+    "Sort and arrange cardboard pieces",
+    "Sort and count beads",
+    "Sort and place buttons",
+    "Sort and place paper star",
+    "Sort and stack cardboard pieces",
+    "Sort beads",
+    "Sort beads and write count",
+    "Sort beads by color",
+    "Sort beads by hand",
+    "Sort beads on table",
+    "Sort beads on the table",
+    "Sort blue beads",
+    "Sort blue star-shaped pieces",
+    "Sort button",
+    "Sort button by color",
+    "Sort buttons",
+    "Sort buttons by color",
+    "Sort canned goods in tray",
+    "Sort colored tiles",
+    "Sort colorful pieces",
+    "Sort craft items",
+    "Sort cut cardboard",
+    "Sort light blue origami stars",
+    "Sort orange button",
+    "Sort orange buttons",
+    "Sort origami stars",
+    "Sort origami stars by color",
+    "Sort paper star",
+    "Sort paper stars",
+    "Sort plastic pieces",
+    "Sort purple beads",
+    "Sort purple star-shaped objects",
+    "Sort puzzle pieces",
+    "Sort quilled paper pieces",
+    "Sort small colorful pieces",
+    "Sort small craft pieces",
+    "Sort small objects",
+    "Sort small plastic pieces",
+    "Sort star-shaped beads",
+    "Sort star-shaped objects",
+    "Sort star-shaped objects by color",
+    "Sort tiles",
+    "Sort tiles by color",
+    "Sort yellow star-shaped objects",
+    "Sorting buttons",
+    "Sorting colorful paper pieces",
+    "Sorting paper stars",
+    "Stabilize cardboard",
+    "Stabilize ruler",
+    "Stack cardboard pieces",
+    "Stack cardboard square",
+    "Stack cardboard squares",
+    "Stacking cardboard pieces",
+    "Stacking cardboard square",
+    "Stacking cardboard squares",
+    "Stand up and walk away",
+    "Start cutting",
+    "Start folding paper strip",
+    "Starting to label next square",
+    "Stir contents",
+    "Stop measuring and put down tools",
+    "Stop sorting stars",
+    "Sweep debris",
+    "Sweep floor debris",
+    "Switch to scissors",
+    "Switching marker",
+    "Tap smartphone screen",
+    "Tapping on smartphone screen",
+    "Tapping smartphone screen",
+    "Tear newspaper",
+    "Tear off cardboard segment",
+    "Touch canned goods",
+    "Touch pieces in box",
+    "Touch shelf edge",
+    "Trace pattern on cardboard",
+    "Transition to cutting",
+    "Transition to standing position",
+    "Trim cardboard",
+    "Trim cardboard piece",
+    "Type on smartphone",
+    "Typing message on smartphone",
+    "Typing on phone",
+    "Typing on smartphone",
+    "Update paper record",
+    "Use phone",
+    "Use phone to check instructions",
+    "Use phone to check stock",
+    "Use phone while crafting",
+    "Use smartphone",
+    "Vacuum edge of carpet",
+    "Vacuum the carpet",
+    "Vacuuming along the wall edge",
+    "Vacuuming carpet corner",
+    "Vacuuming carpet edge",
+    "Vacuuming the carpet edge",
+    "View content on smartphone",
+    "View phone screen",
+    "Viewing phone screen",
+    "Walk across office",
+    "Walk across room",
+    "Walk across the room",
+    "Walk away",
+    "Walk in hallway",
+    "Walk through corridor",
+    "Walk through doorway",
+    "Walk through hallway",
+    "Walk through office",
+    "Walk through store",
+    "Walk through workspace",
+    "Walk towards aisle",
+    "Walk towards desk",
+    "Walk towards next aisle",
+    "Walk towards other aisles",
+    "Walk towards room",
+    "Walk towards shelf",
+    "Walk towards shelves",
+    "Walk towards storage area",
+    "Walk towards table",
+    "Walk towards workspace",
+    "Walk with cardboard",
+    "Walk with cardboard cutout",
+    "Walk with marker",
+    "Walk with shopping bag",
+    "Walking across the room",
+    "Walking along the aisle",
+    "Walking in the hallway",
+    "Walking in the workspace",
+    "Walking through classroom",
+    "Walking through office hallway",
+    "Walking through the office",
+    "Walking to sink",
+    "Walking towards door",
+    "Walking towards workstation",
+    "Washing hands",
+    "Washing hands in sink",
+    "Wipe down shelf",
+    "Wipe electronic item",
+    "Wipe food product",
+    "Wipe grocery shelf",
+    "Wipe item",
+    "Wipe jar",
+    "Wipe ketchup bottle",
+    "Wipe kitchen counter",
+    "Wipe product",
+    "Wipe retail item",
+    "Wipe shelf",
+    "Wipe shelf surface",
+    "Wipe the plastic jar",
+    "Wipe the product jar",
+    "Wipe the shelf",
+    "Wiping countertop",
+    "Withdraw hand",
+    "Write count on paper",
+    "Write on notepad",
+    "Write on paper",
+    "Write on paper record",
+    "Writing on notepad",
+    "fold purple ribbon",
+    "sort craft materials",
+    "{\"action\": \"Pour liquid into bowl\", \"contact\": \"yes\", \"evidence_window\": {\"end_frame\": 19, \"start_frame\": 0}, \"next_action\": \"Pour liquid into bowl\", \"objects\": [\"bottle\", \"bowl\", \"kitchen counter\", \"kitchen sink\", \"kitchen tap\", \"kitchen cabinet\", \"kitchen shelf\", \"kitchen window\", \"kitchen utensils\", \"kitchen appliances\", \"kitchen utensil rack\", \"kitchen towel\", \"kitchen cabinet door\", \"kitchen cabinet drawer\", \"kitchen cabinet shelf\", \"kitchen cabinet handle\", \"kitchen cabinet knob\", \"kitchen cabinet latch\", \"kitchen cabinet hinge\", \"kitchen cabinet panel\", \"kitchen cabinet frame\", \"kitchen cabinet base\", \"kitchen cabinet top\", \"kitchen cabinet side\", \"kitchen cabinet back\", \"kitchen cabinet door panel\", \"kitchen cabinet door frame\", \"kitchen cabinet door handle\", \"kitchen cabinet door knob\", \"kitchen cabinet door latch\", \"kitchen cabinet door hinge\", \"kitchen cabinet door panel frame\", \"kitchen cabinet door panel handle\", \"kitchen cabinet door panel knob\", \"kitchen cabinet door panel latch\", \"kitchen cabinet door panel hinge\", \"kitchen cabinet door panel frame handle\", \"kitchen cabinet door panel frame knob\", \"kitchen cabinet door panel frame latch\", \"kitchen cabinet door panel frame hinge\", \"kitchen cabinet door panel frame panel\", \"kitchen cabinet door panel frame panel handle\", \"kitchen cabinet door panel frame panel knob\", \"kitchen cabinet door panel frame panel latch\", \"kitchen cabinet door panel frame panel hinge\", \"kitchen cabinet door panel frame panel frame\", \"kitchen cabinet door panel frame panel frame handle\", \"kitchen cabinet door panel frame panel frame knob\", \"kitchen cabinet door panel frame panel frame latch\", \"kitchen cabinet door panel frame panel frame hinge\", \"kitchen cabinet door panel frame panel frame panel\", \"kitchen cabinet door panel frame panel frame panel handle\", \"kitchen cabinet door panel frame panel frame panel knob\", \"kitchen cabinet door panel frame panel frame panel latch\", \"kitchen cabinet door panel frame panel frame panel hinge\", \"kitchen cabinet door panel frame panel frame panel frame\", \"kitchen cabinet door panel frame panel frame panel frame handle\", \"kitchen cabinet door panel frame panel frame panel frame knob\", \"kitchen cabinet door panel frame panel frame panel frame latch\", \"kitchen cabinet door panel frame panel frame panel frame hinge\", \"kitchen cabinet door panel frame panel frame panel frame panel\", \"kitchen cabinet door panel frame panel frame panel frame panel handle\", \"kitchen cabinet door panel frame panel frame panel frame panel knob\", \"kitchen cabinet door panel frame panel frame panel frame panel latch\", \"kitchen cabinet door panel frame panel frame panel frame panel hinge\", \"kitchen cabinet door panel frame panel frame panel frame panel frame\", \"kitchen cabinet door panel frame panel frame panel frame panel frame handle\", \"kitchen cabinet door panel frame panel frame panel frame panel frame knob\", \"kitchen cabinet door panel frame panel frame panel frame panel frame latch\", \"kitchen cabinet door panel frame panel frame panel frame panel frame hinge\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel handle\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel knob\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel latch\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel hinge\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame handle\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame knob\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame latch\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame hinge\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel handle\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel knob\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel latch\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel hinge\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame handle\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame knob\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame latch\", \"kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame hinge\", \"kitchen cabinet",
+    "Close the door",
+    "{\"action\": \"Close door\", \"contact\": \"yes\", \"evidence_window\": {\"end_frame\": 2539, \"start_frame\": 2520}, \"next_action\": \"Close door\", \"objects\": [\"door\", \"door handle\", \"plastic bag\", \"blue bucket\", \"red bucket\", \"blue container\", \"red container\", \"white container\", \"white box\", \"white bag\", \"white cloth\", \"white plastic bag\", \"white plastic container\", \"white cardboard box\", \"white paper\", \"white paper bag\", \"white paper sheet\", \"white paper strip\", \"white plastic sheet\", \"white plastic bag with red label\", \"white plastic container with red label\", \"white cardboard tray\", \"white cardboard piece\", \"white cardboard square\", \"white cardboard shape\", \"white paper lantern\", \"white paper lantern component\", \"white paper star\", \"white paper star shape\", \"white paper lantern piece\", \"white paper lantern part\", \"white paper decoration\", \"white paper cutout\", \"white paper shape\", \"white paper segment\", \"white paper coil\", \"white paper strip with red label\", \"white paper strip with blue label\", \"white paper strip with yellow label\", \"white paper strip with green label\", \"white paper strip with purple label\", \"white paper strip with orange label\", \"white paper strip with black label\", \"white paper strip with pink label\", \"white paper strip with brown label\", \"white paper strip with grey label\", \"white paper strip with turquoise label\", \"white paper strip with maroon label\", \"white paper strip with lavender label\", \"white paper strip with olive green label\", \"white paper strip with coral label\", \"white paper strip with navy blue label\", \"white paper strip with light blue label\", \"white paper strip with dark blue label\", \"white paper strip with light green label\", \"white paper strip with dark green label\", \"white paper strip with yellow-green label\", \"white paper strip with lime green label\", \"white paper strip with forest green label\", \"white paper strip with emerald green label\", \"white paper strip with teal label\", \"white paper strip with turquoise blue label\", \"white paper strip with sky blue label\", \"white paper strip with baby blue label\", \"white paper strip with periwinkle label\", \"white paper strip with lavender blue label\", \"white paper strip with violet label\", \"white paper strip with purple blue label\", \"white paper strip with indigo label\", \"white paper strip with dark purple label\", \"white paper strip with magenta label\", \"white paper strip with pink purple label\", \"white paper strip with rose pink label\", \"white paper strip with coral pink label\", \"white paper strip with salmon pink label\", \"white paper strip with peach label\", \"white paper strip with apricot label\", \"white paper strip with orange yellow label\", \"white paper strip with golden yellow label\", \"white paper strip with lemon yellow label\", \"white paper strip with yellow green label\", \"white paper strip with chartreuse label\", \"white paper strip with olive label\", \"white paper strip with khaki label\", \"white paper strip with tan label\", \"white paper strip with beige label\", \"white paper strip with cream label\", \"white paper strip with off white label\", \"white paper strip with light grey label\", \"white paper strip with grey label\", \"white paper strip with dark grey label\", \"white paper strip with black label\", \"white paper strip with brown label\", \"white paper strip with dark brown label\", \"white paper strip with chocolate label\", \"white paper strip with coffee label\", \"white paper strip with taupe label\", \"white paper strip with maroon label\", \"white paper strip with burgundy label\", \"white paper strip with red label\", \"white paper strip with dark red label\", \"white paper strip with crimson label\", \"white paper strip with cherry red label\", \"white paper strip with pink label\", \"white paper strip with light pink label\", \"white paper strip with pale pink label\", \"white paper strip with rose label\", \"white paper strip with blush label\", \"white paper strip with coral label\", \"white paper strip with salmon label\", \"white paper strip with peach label\", \"white paper strip with apricot label\", \"white paper strip with orange label\", \"white paper strip with light orange label\", \"white paper strip with pale orange label\", \"white paper strip with tangerine label\", \"white paper strip with mango label\", \"white paper strip with yellow label\", \"white paper strip with light yellow label\", \"white paper strip with pale yellow label\", \"white paper strip with lemon label\", \"white paper strip with lime label\", \"white paper strip with green label\", \"white paper strip with light green label\", \"white paper strip with pale green label\", \"white paper strip with mint label\", \"white paper strip with sage label\", \"white paper strip with olive label\", \"white paper strip with khaki",
+    "{\"action\": \"Close the door\", \"contact\": \"yes\", \"evidence_window\": {\"end_frame\": 2559, \"start_frame\": 2540}, \"next_action\": \"Close the door\", \"objects\": [\"door\", \"door handle\", \"door knob\", \"door frame\", \"door hinge\", \"door latch\", \"door lock\", \"door panel\", \"door post\", \"door stop\", \"door weather stripping\", \"doorway\", \"garage door\", \"garage door opener\", \"garage door track\", \"garage door wheel\", \"garage door window\", \"garage door opener button\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener sensor\", \"garage door opener control panel\", \"garage door opener light\", \"garage door opener emergency release\", \"garage door opener remote control\", \"garage door opener wall switch\", \"garage door opener keypad\", \"garage door opener smartphone app\", \"garage door opener voice control\", \"garage door opener timer\", \"garage door opener remote control\", \"garage door opener wall switch\", \"garage door opener keypad\", \"garage door opener smartphone app\", \"garage door opener voice control\", \"garage door opener timer\", \"garage door opener emergency release\", \"garage door opener sensor\", \"garage door opener control panel\", \"garage door opener light\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener button\", \"garage door window\", \"garage door track\", \"garage door wheel\", \"garage door\", \"garage door opener\", \"garage door opener button\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener sensor\", \"garage door opener control panel\", \"garage door opener light\", \"garage door opener emergency release\", \"garage door opener remote control\", \"garage door opener wall switch\", \"garage door opener keypad\", \"garage door opener smartphone app\", \"garage door opener voice control\", \"garage door opener timer\", \"garage door opener remote control\", \"garage door opener wall switch\", \"garage door opener keypad\", \"garage door opener smartphone app\", \"garage door opener voice control\", \"garage door opener timer\", \"garage door opener emergency release\", \"garage door opener sensor\", \"garage door opener control panel\", \"garage door opener light\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener button\", \"garage door window\", \"garage door track\", \"garage door wheel\", \"garage door\", \"garage door opener\", \"garage door opener button\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener sensor\", \"garage door opener control panel\", \"garage door opener light\", \"garage door opener emergency release\", \"garage door opener remote control\", \"garage door opener wall switch\", \"garage door opener keypad\", \"garage door opener smartphone app\", \"garage door opener voice control\", \"garage door opener timer\", \"garage door opener remote control\", \"garage door opener wall switch\", \"garage door opener keypad\", \"garage door opener smartphone app\", \"garage door opener voice control\", \"garage door opener timer\", \"garage door opener emergency release\", \"garage door opener sensor\", \"garage door opener control panel\", \"garage door opener light\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener button\", \"garage door window\", \"garage door track\", \"garage door wheel\", \"garage door\", \"garage door opener\", \"garage door opener button\", \"garage door opener remote\", \"garage door opener receiver\", \"garage door opener motor\", \"garage door opener chain\", \"garage door opener belt\", \"garage door opener rail\", \"garage door opener sensor\", \"garage",
+    "{\"action\": \"Close cabinet door\", \"contact\": \"yes\", \"evidence_window\": {\"end_frame\": 3419, \"start_frame\": 3400}, \"next_action\": \"Close cabinet door\", \"objects\": [\"cabinet\", \"cabinet door\", \"cabinet handle\", \"cabinet shelf\", \"cabinet drawer\", \"cabinet drawer handle\", \"cabinet drawer knob\", \"cabinet drawer pull\", \"cabinet drawer slide\", \"cabinet drawer slide rail\", \"cabinet drawer slide track\", \"cabinet drawer slide roller\", \"cabinet drawer slide bracket\", \"cabinet drawer slide support\", \"cabinet drawer slide guide\", \"cabinet drawer slide stop\", \"cabinet drawer slide clip\", \"cabinet drawer slide clip holder\", \"cabinet drawer slide clip bracket\", \"cabinet drawer slide clip support\", \"cabinet drawer slide clip guide\", \"cabinet drawer slide clip stop\", \"cabinet drawer slide clip holder bracket\", \"cabinet drawer slide clip holder support\", \"cabinet drawer slide clip holder guide\", \"cabinet drawer slide clip holder stop\", \"cabinet drawer slide clip holder clip\", \"cabinet drawer slide clip holder clip holder\", \"cabinet drawer slide clip holder clip bracket\", \"cabinet drawer slide clip holder clip support\", \"cabinet drawer slide clip holder clip guide\", \"cabinet drawer slide clip holder clip stop\", \"cabinet drawer slide clip holder clip holder bracket\", \"cabinet drawer slide clip holder clip holder support\", \"cabinet drawer slide clip holder clip holder guide\", \"cabinet drawer slide clip holder clip holder stop\", \"cabinet drawer slide clip holder clip holder clip\", \"cabinet drawer slide clip holder clip holder clip holder\", \"cabinet drawer slide clip holder clip holder clip bracket\", \"cabinet drawer slide clip holder clip holder clip support\", \"cabinet drawer slide clip holder clip holder clip guide\", \"cabinet drawer slide clip holder clip holder clip stop\", \"cabinet drawer slide clip holder clip holder clip holder bracket\", \"cabinet drawer slide clip holder clip holder clip holder support\", \"cabinet drawer slide clip holder clip holder clip holder guide\", \"cabinet drawer slide clip holder clip holder clip holder stop\", \"cabinet drawer slide clip holder clip holder clip holder clip\", \"cabinet drawer slide clip holder clip holder clip holder clip holder\", \"cabinet drawer slide clip holder clip holder clip holder clip bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip support\", \"cabinet drawer slide clip holder clip holder clip holder clip guide\", \"cabinet drawer slide clip holder clip holder clip holder clip stop\", \"cabinet drawer slide clip holder clip holder clip holder clip holder bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip holder support\", \"cabinet drawer slide clip holder clip holder clip holder clip holder guide\", \"cabinet drawer slide clip holder clip holder clip holder clip holder stop\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip support\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip guide\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip stop\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder support\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder guide\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder stop\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip support\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip guide\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip stop\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder support\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder guide\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder stop\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip holder\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip bracket\", \"cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip support",
+    "Close cabinet",
+    "Close cabinet door",
+    "Wipe hands",
+    "Hold paper star",
+    "Place pot on the counter",
+    "Rinse pot",
+    "Place marker on table",
+    "Carry snack package",
+    "Place paper strip on table",
+    "Reach for jar",
+    "Hold jar",
+    "Place beads on table",
+    "{\"action\": \"Hold paper star\", \"contact\": \"yes\", \"evidence_window\": {\"end_frame\": 1959, \"start_frame\": 1940}, \"next_action\": \"Hold paper star\", \"objects\": [\"paper star\", \"table\", \"scissors\", \"phone\", \"mouse\", \"beads\", \"pen\", \"light blue paper\", \"yellow paper\", \"purple paper\", \"cardboard\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard square\", \"cardboard tray\", \"cardboard tube\", \"cardboard strip\", \"utility knife\", \"ruler\", \"marker\", \"jar\", \"canned food\", \"tin can\", \"plastic container\", \"plastic packaging\", \"box\", \"shopping bag\", \"gift box\", \"product\", \"product box\", \"product package\", \"supplement bottle\", \"bottle\", \"sauce bottle\", \"pickle jar\", \"water bottle\", \"pink water bottle\", \"cleaning bottle\", \"earbud case\", \"charging case\", \"portable charger\", \"power bank\", \"charger\", \"charging cable\", \"power adapter\", \"smartphone\", \"smartphone box\", \"computer mouse\", \"pen and paper\", \"paper\", \"paper pieces\", \"paper segments\", \"paper lantern\", \"paper lantern component\", \"blue product box\", \"blue strip\", \"light blue strip\", \"yellow item\", \"yellow paper strip\", \"purple paper strip\", \"adhesive tape\", \"ribbon\", \"ribbon piece\", \"plastic strip\", \"foam piece\", \"foam strips\", \"cardboard pattern\", \"cardboard pattern piece\", \"stapler\", \"cleaning cloth\", \"dustpan\", \"box of beads\", \"container\", \"container of canned food\", \"small cardboard piece\", \"small piece of material\", \"small object\", \"small product bag\", \"small white box\", \"bean\", \"bean pile\", \"button\", \"black button\", \"orange button\", \"red button\", \"small button\", \"button cluster\", \"button row\", \"star\", \"star bead\", \"star-shaped bead\", \"purple bead\", \"light blue origami star\", \"yellow star-shaped object\", \"purple star-shaped object\", \"colored tile\", \"colored pieces\", \"jewel\", \"jewel piece\", \"Mahjong tile\", \"Mahjong tiles\", \"puzzle piece\", \"quilled paper\", \"quilled paper piece\", \"quilled paper shape\", \"quilling paper\", \"quilling paper strip\", \"paper quilling piece\", \"paper quilling strip\", \"paper star\", \"lucky star\", \"folded paper\", \"folded paper star\", \"paper strip\", \"paper decoration\", \"colored paper\", \"colorful pieces\", \"cardboard sheet\", \"cardboard square\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard cutout\", \"cardboard tray\", \"cardboard tube\", \"cardboard strip\", \"newspaper\", \"newspaper piece\", \"plastic container\", \"plastic storage bin\", \"tin can\", \"jar\", \"canned good\", \"canned product\", \"packaged paper lantern component\", \"snack package\", \"snack pouch\", \"container of snack\", \"gift box\", \"product package\", \"supplement bottle\", \"bottle\", \"ketchup bottle\", \"sauce bottle\", \"pickle jar\", \"water bottle\", \"pink water bottle\", \"cleaning bottle\", \"earbud case\", \"charging case\", \"portable charger\", \"power bank\", \"charger\", \"charging cable\", \"power adapter\", \"smartphone\", \"smartphone box\", \"computer mouse\", \"pen\", \"notepad\", \"paper\", \"paper pieces\", \"paper segments\", \"pieces\", \"lantern\", \"lantern component\", \"blue product box\", \"blue strip\", \"light blue strip\", \"yellow item\", \"yellow paper strip\", \"purple paper strip\", \"adhesive strip\", \"ribbon knot\", \"small paper segment\", \"foam piece\", \"foam strips\", \"cardboard pattern\", \"cardboard pattern piece\", \"stapler\", \"cleaning cloth\", \"dustpan\", \"box of beads\", \"container\", \"small cardboard piece\", \"small piece of material\", \"small object\", \"small product bag\", \"small white box\", \"bean\", \"bean pile\", \"button\", \"black button\", \"orange button\", \"red button\", \"small button\", \"buttons\", \"button cluster\", \"button row\", \"star\", \"star bead\", \"star-shaped bead\", \"purple bead\", \"light blue origami star\", \"yellow star-shaped object\", \"purple star-shaped object\", \"colored tile\", \"colored pieces\", \"jewel\", \"jewel piece\", \"Mahjong tile\", \"Mahjong tiles\", \"puzzle piece\", \"quilled paper\", \"qu",
+    "Place earbud case on table",
+    "Hold red button",
+    "Adjust beads by color",
+    "Approach workspace",
+    "{\"action\": \"Approach workspace\", \"contact\": \"no\", \"evidence_window\": {\"end_frame\": 39, \"start_frame\": 20}, \"next_action\": \"Approach workspace\", \"objects\": [\"cardboard\", \"cardboard pieces\", \"cardboard stack\", \"cardboard tray\", \"cardboard tube\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard squares\", \"cardboard sheet\", \"cardboard cutouts\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"cardboard pieces\", \"cardboard shapes\", \"card"
+  ],
+  "model_id": "<workspace-parent>/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct",
+  "adapter_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora",
+  "dataset_jsonl": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl",
+  "eval_split": "test",
+  "train_split": "train",
+  "sample_offset": 0,
+  "sample_stride": 1,
+  "num_eval_episodes": 14,
+  "held_out_episode_count": 14,
+  "unseen_eval_labels": [
+    "Adjust canned food on shelf",
+    "Adjust lantern shape",
+    "Adjust lantern string",
+    "Adjust paper",
+    "Adjust pot position",
+    "Align canned food on shelf",
+    "Align edges of paper lantern",
+    "Align paper lantern edges",
+    "Apply adhesive tape to lantern",
+    "Approach boxes",
+    "Approaching and pressing the door switch",
+    "Approaching the table",
+    "Arrange buttons in a line",
+    "Arrange star beads",
+    "Arrange star beads for counting",
+    "Attempt to fit puzzle piece",
+    "Bend and manipulate plastic strip",
+    "Browse smartphone screen",
+    "Bundle display hooks",
+    "Closing the door",
+    "Counting and organizing beads",
+    "Counting star beads",
+    "Cut along the marked line",
+    "Entering the VR training room",
+    "Expand paper lantern",
+    "Extract wire hangers from box",
+    "Fold paper lantern",
+    "Fold plastic strip",
+    "Gather star beads",
+    "Gesturing",
+    "Grasp cleaning bottle",
+    "Grasp lantern",
+    "Grasp lantern component",
+    "Grasping cleaning cloth",
+    "Greeting/acknowledging participants",
+    "Handle paper lantern component",
+    "Hold and bend plastic strip",
+    "Hold and manipulate paper strip",
+    "Hold canned food",
+    "Hold container lid",
+    "Hold earbud case",
+    "Hold paper lantern",
+    "Identify next cardboard piece",
+    "Inspect shelf condition",
+    "Lift pot lid",
+    "Manipulate adhesive strip",
+    "Manipulate bead",
+    "Manipulate beads",
+    "Manipulate craft paper strips",
+    "Manipulate craft piece",
+    "Manipulate material",
+    "Manipulate paper decoration",
+    "Manipulate paper edge",
+    "Manipulate plastic strip",
+    "Manipulate plastic strips",
+    "Manipulate puzzle piece",
+    "Manipulate yellow strip",
+    "Manipulating paper strips",
+    "Move dustpan to side",
+    "Move hand away",
+    "Move hand away from shelf",
+    "Move marker and adjust hand",
+    "Move pot",
+    "Move through aisle",
+    "Move through the training room",
+    "Move towards kitchen area",
+    "Move towards the stove",
+    "Observe and pause",
+    "Observe and walk through store",
+    "Observe colleague and workspace",
+    "Observe puzzle progress",
+    "Open earbud case",
+    "Open folded paper lantern",
+    "Open paper lantern",
+    "Open paper lantern component",
+    "Open stove pot lid",
+    "Operate smartphone",
+    "Pick up dustpan",
+    "Pick up items from the shopping bag",
+    "Pick up packaged paper lantern component",
+    "Pick up puzzle piece",
+    "Pick up small piece of material",
+    "Pick up star bead",
+    "Picking up bottle",
+    "Picking up crafting material",
+    "Place and count bead",
+    "Place another canned food on shelf",
+    "Place cloth on floor",
+    "Place hand on table",
+    "Place items on the shelf",
+    "Place lid back",
+    "Place marked piece down",
+    "Place material",
+    "Place piece into puzzle",
+    "Place smartphone down",
+    "Place smartphone on stand",
+    "Place towel",
+    "Placing paper strip",
+    "Preparing to craft",
+    "Put down smartphone",
+    "Reach for cleaning supplies",
+    "Reach for craft items",
+    "Reach for next can",
+    "Reach for next canned food",
+    "Reach for puzzle piece",
+    "Reach for wire hangers",
+    "Record count",
+    "Release cardboard piece and gesture",
+    "Release hook",
+    "Release lantern",
+    "Release smartphone",
+    "Remove cleaning bottle",
+    "Remove paper lantern part from packaging",
+    "Remove plastic packaging",
+    "Reposition hand",
+    "Resume observation",
+    "Retrieve canned food from box",
+    "Retrieve next canned food item",
+    "Retrieving more beads",
+    "Rinse cloth in sink",
+    "Scroll smartphone screen",
+    "Secure paper edges with adhesive",
+    "Securing paper structure",
+    "Sort and adjust button line",
+    "Sort and arrange buttons",
+    "Sort and count beads",
+    "Sort and place buttons",
+    "Sort beads and write count",
+    "Sort button",
+    "Sort buttons",
+    "Sort craft items",
+    "Sort puzzle pieces",
+    "Sort small craft pieces",
+    "Start cutting",
+    "Stir contents",
+    "Use phone while crafting",
+    "Walk towards other aisles",
+    "Walking across the room",
+    "Walking in the hallway",
+    "Walking towards door",
+    "Washing hands in sink",
+    "Wipe kitchen counter",
+    "Wiping countertop",
+    "sort craft materials"
+  ],
+  "num_unseen_label_samples": 317,
+  "seen_label_accuracy": 0.04580152671755725,
+  "unseen_label_accuracy": 0.015772870662460567,
+  "eval_label_counts": {
+    "Manipulate paper strip": 14,
+    "Move phone": 7,
+    "Cut cardboard": 7,
+    "Cut along the marked line": 7,
+    "Use smartphone": 6,
+    "Pick up utility knife": 6,
+    "Manipulate paper decoration": 5,
+    "Manipulate paper edge": 5,
+    "Manipulate adhesive strip": 5,
+    "Hold smartphone": 4,
+    "Interact with smartphone": 4,
+    "Manipulate craft piece": 4,
+    "Manipulate craft paper strips": 4,
+    "Operate smartphone": 4,
+    "Release paper strip": 4,
+    "Arrange buttons": 4,
+    "Arrange buttons in a line": 4,
+    "Approaching and pressing the door switch": 4,
+    "Bend and manipulate plastic strip": 4,
+    "Placing paper strip": 4,
+    "Securing paper structure": 4,
+    "Secure paper edges with adhesive": 4,
+    "Reposition hand": 3,
+    "Mark cardboard piece": 3,
+    "Hold cardboard piece": 3,
+    "Cut cardboard piece": 3,
+    "Marking cardboard piece": 3,
+    "Move marker and adjust hand": 3,
+    "Identify next cardboard piece": 3,
+    "sort craft materials": 3,
+    "Release scissors": 3,
+    "Release smartphone": 3,
+    "Sort small craft pieces": 3,
+    "Reach for craft items": 3,
+    "Sort craft items": 3,
+    "Move smartphone": 3,
+    "Place hand on table": 3,
+    "Browse smartphone screen": 3,
+    "Scroll smartphone screen": 3,
+    "Put down smartphone": 3,
+    "Place smartphone down": 3,
+    "Pick up puzzle piece": 3,
+    "Place piece into puzzle": 3,
+    "Manipulate puzzle piece": 3,
+    "Manipulate puzzle pieces": 3,
+    "Place puzzle piece": 3,
+    "Observe puzzle progress": 3,
+    "Adjust puzzle piece": 3,
+    "Attempt to fit puzzle piece": 3,
+    "Sort buttons": 3,
+    "Sort and arrange buttons": 3,
+    "Pick up button": 3,
+    "Place button": 3,
+    "Sort button": 3,
+    "Sort and adjust button line": 3,
+    "Sort and place buttons": 3,
+    "Walking in the hallway": 3,
+    "Entering the VR training room": 3,
+    "Greeting/acknowledging participants": 3,
+    "Move through the training room": 3,
+    "Manipulate plastic strips": 3,
+    "Manipulate plastic strip": 3,
+    "Hold and bend plastic strip": 3,
+    "Fold plastic strip": 3,
+    "Hold container lid": 2,
+    "Closing the door": 2,
+    "Grasp cleaning bottle": 2,
+    "Grasping cleaning cloth": 2,
+    "Adjust pot position": 2,
+    "Start cutting": 2,
+    "Observe and walk through store": 2,
+    "Move to shelf": 2,
+    "Inspect shelf condition": 2,
+    "Observe colleague and workspace": 2,
+    "Observe workspace": 2,
+    "Approach boxes": 2,
+    "Extract wire hangers from box": 2,
+    "Bundle display hooks": 2,
+    "Move through aisle": 2,
+    "Pick up items from the shopping bag": 2,
+    "Place items on the shelf": 2,
+    "Place item on shelf": 2,
+    "Adjust item on shelf": 2,
+    "Place marked piece down": 2,
+    "Pick up new cardboard piece": 2,
+    "Organize cardboard pieces": 2,
+    "Release cardboard piece and gesture": 2,
+    "Observe and pause": 2,
+    "Gesturing": 2,
+    "Resume observation": 2,
+    "Pick up can": 2,
+    "Hold canned food": 2,
+    "Pick up canned food": 2,
+    "Align canned food on shelf": 2,
+    "Place another canned food on shelf": 2,
+    "Adjust canned food on shelf": 2,
+    "Move hand away from shelf": 2,
+    "Place can on shelf": 2,
+    "Move hand away": 2,
+    "Hold earbud case": 2,
+    "Open earbud case": 2,
+    "Pick up smartphone": 2,
+    "Open paper lantern": 2,
+    "Fold paper lantern": 2,
+    "Grasp lantern": 2,
+    "Grasp lantern component": 2,
+    "Align paper lantern edges": 2,
+    "Adjust lantern string": 2,
+    "Handle paper lantern component": 2,
+    "Open folded paper lantern": 2,
+    "Adjust lantern shape": 2,
+    "Hold paper lantern": 2,
+    "Apply adhesive tape to lantern": 2,
+    "Open paper lantern component": 2,
+    "Expand paper lantern": 2,
+    "Align edges of paper lantern": 2,
+    "Search for puzzle piece": 2,
+    "Release puzzle piece": 2,
+    "Reach for puzzle piece": 2,
+    "Sort puzzle pieces": 2,
+    "Approaching the table": 2,
+    "Preparing to craft": 2,
+    "Picking up crafting material": 2,
+    "Use phone": 2,
+    "Pick up small piece of material": 2,
+    "Manipulate material": 2,
+    "Place material": 2,
+    "Manipulate yellow strip": 2,
+    "Manipulating paper strips": 2,
+    "Manipulate bead": 2,
+    "Manipulate beads": 2,
+    "Place phone down": 2,
+    "Hold and manipulate paper strip": 2,
+    "Sort beads": 2,
+    "Record count": 2,
+    "Counting and organizing beads": 2,
+    "Pick up star bead": 2,
+    "Place and count bead": 2,
+    "Hold beads": 2,
+    "Arrange star beads": 2,
+    "Pick up pen": 2,
+    "Counting star beads": 2,
+    "Retrieving more beads": 2,
+    "Adjust paper": 2,
+    "Gather star beads": 2,
+    "Arrange star beads for counting": 2,
+    "Sort and count beads": 2,
+    "Place smartphone on stand": 1,
+    "Pick up dustpan": 1,
+    "Move dustpan to side": 1,
+    "Move towards the stove": 1,
+    "Open stove pot lid": 1,
+    "Walking towards door": 1,
+    "Picking up bottle": 1,
+    "Wipe kitchen counter": 1,
+    "Rinse cloth in sink": 1,
+    "Move towards kitchen area": 1,
+    "Place cloth on floor": 1,
+    "Reach for cleaning supplies": 1,
+    "Remove cleaning bottle": 1,
+    "Washing hands in sink": 1,
+    "Wiping countertop": 1,
+    "Lift pot lid": 1,
+    "Stir contents": 1,
+    "Place lid back": 1,
+    "Move pot": 1,
+    "Place towel": 1,
+    "Walk towards shelves": 1,
+    "Reach for wire hangers": 1,
+    "Release hook": 1,
+    "Walk towards other aisles": 1,
+    "Reach for another item": 1,
+    "Reach for next item": 1,
+    "Place canned food on shelf": 1,
+    "Reach for next can": 1,
+    "Reach into box": 1,
+    "Retrieve next canned food item": 1,
+    "Reach for next canned food": 1,
+    "Retrieve canned food from box": 1,
+    "Release lantern": 1,
+    "Pick up packaged paper lantern component": 1,
+    "Remove paper lantern part from packaging": 1,
+    "Remove plastic packaging": 1,
+    "Walking across the room": 1,
+    "Use phone while crafting": 1,
+    "Sort beads and write count": 1,
+    "Sort star-shaped beads": 1,
+    "Write count on paper": 1,
+    "Write on paper": 1
+  },
+  "json_validity_rate": 0.875,
+  "action_macro_f1": 0.0026621494447581404,
+  "subtask_accuracy": 0.006696428571428571,
+  "transition_accuracy": 0.8504464285714286,
+  "next_action_accuracy": 0.024553571428571428,
+  "contact_accuracy": 0.6450892857142857,
+  "object_micro_f1": 0.22299431459254582,
+  "caption_window_grounding": {
+    "mrr": null,
+    "recall_at_5": null,
+    "note": "Grounding ranking requires a retrieval candidate set; JSON evidence_window is stored for later scoring."
+  }
+}

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/per_class_metrics.csv ADDED Viewed

	@@ -0,0 +1,1211 @@

+class_name,support,predicted,precision,recall,f1
+Adjust Mahjong tile,0,9,0.0,0.0,0.0
+Adjust Mahjong tile alignment,0,0,0.0,0.0,0.0
+Adjust Mahjong tile on the stack,0,0,0.0,0.0,0.0
+Adjust Mahjong tiles,0,0,0.0,0.0,0.0
+Adjust bead piles,0,0,0.0,0.0,0.0
+Adjust canned food on shelf,2,5,0.4,1.0,0.5714285714285715
+Adjust cans in bin,0,0,0.0,0.0,0.0
+Adjust cans in container,0,0,0.0,0.0,0.0
+Adjust cans in tray,0,0,0.0,0.0,0.0
+Adjust cardboard,0,1,0.0,0.0,0.0
+Adjust cardboard divider,0,0,0.0,0.0,0.0
+Adjust cardboard position,0,0,0.0,0.0,0.0
+Adjust container on shelf,0,0,0.0,0.0,0.0
+Adjust container position,0,0,0.0,0.0,0.0
+Adjust containers on shelf,0,0,0.0,0.0,0.0
+Adjust foam strip,0,0,0.0,0.0,0.0
+Adjust grip,0,0,0.0,0.0,0.0
+Adjust grip on container,0,0,0.0,0.0,0.0
+Adjust hand position,0,0,0.0,0.0,0.0
+Adjust item on shelf,2,0,0.0,0.0,0.0
+Adjust lantern shape,2,3,0.0,0.0,0.0
+Adjust lantern string,2,3,0.3333333333333333,0.5,0.4
+Adjust paper,2,0,0.0,0.0,0.0
+Adjust paper strip,0,2,0.0,0.0,0.0
+Adjust perspective,0,0,0.0,0.0,0.0
+Adjust placement on shelf,0,0,0.0,0.0,0.0
+Adjust position,0,0,0.0,0.0,0.0
+Adjust pot position,2,0,0.0,0.0,0.0
+Adjust puzzle piece,3,0,0.0,0.0,0.0
+Adjust red button,0,4,0.0,0.0,0.0
+Adjust red button in row,0,0,0.0,0.0,0.0
+Adjust red button position,0,0,0.0,0.0,0.0
+Adjust retail item position,0,0,0.0,0.0,0.0
+Adjust retail items on shelf,0,0,0.0,0.0,0.0
+Adjust ruler position,0,0,0.0,0.0,0.0
+Adjust smartphone and sort pieces,0,3,0.0,0.0,0.0
+Adjust snack package,0,0,0.0,0.0,0.0
+Adjust tile row alignment,0,0,0.0,0.0,0.0
+Adjust vacuum cleaner position,0,0,0.0,0.0,0.0
+Adjusting a puzzle piece,0,0,0.0,0.0,0.0
+Adjusting canned goods on shelf,0,0,0.0,0.0,0.0
+Adjusting fabric for cutting,0,0,0.0,0.0,0.0
+Adjusting fabric position,0,0,0.0,0.0,0.0
+Adjusting puzzle piece,0,0,0.0,0.0,0.0
+Align Mahjong tiles,0,0,0.0,0.0,0.0
+Align and place retail item,0,0,0.0,0.0,0.0
+Align blue strip,0,0,0.0,0.0,0.0
+Align button,0,0,0.0,0.0,0.0
+Align button in row,0,0,0.0,0.0,0.0
+Align button row,0,0,0.0,0.0,0.0
+Align buttons,0,0,0.0,0.0,0.0
+Align canned food on shelf,2,0,0.0,0.0,0.0
+Align canned goods on shelf,0,0,0.0,0.0,0.0
+Align cardboard piece,0,0,0.0,0.0,0.0
+Align cardboard strip,0,0,0.0,0.0,0.0
+Align charging cable,0,0,0.0,0.0,0.0
+Align edges of paper lantern,2,0,0.0,0.0,0.0
+Align foam piece,0,0,0.0,0.0,0.0
+Align foam strip,0,0,0.0,0.0,0.0
+Align paper lantern edges,2,0,0.0,0.0,0.0
+Align paper strip,0,0,0.0,0.0,0.0
+Align plastic containers,0,0,0.0,0.0,0.0
+Align red button in row,0,0,0.0,0.0,0.0
+Align red buttons,0,0,0.0,0.0,0.0
+Align ruler,0,0,0.0,0.0,0.0
+Align ruler and mark cardboard,0,0,0.0,0.0,0.0
+Align ruler on cardboard,0,0,0.0,0.0,0.0
+Align ruler with crease,0,0,0.0,0.0,0.0
+Align scissors,0,0,0.0,0.0,0.0
+Apply adhesive tape to lantern,2,0,0.0,0.0,0.0
+Approach boxes,2,0,0.0,0.0,0.0
+Approach desk,0,0,0.0,0.0,0.0
+Approach packing area,0,4,0.0,0.0,0.0
+Approach restocking supplies,0,0,0.0,0.0,0.0
+Approach table,0,7,0.0,0.0,0.0
+Approach work table,0,0,0.0,0.0,0.0
+Approach workstation,0,1,0.0,0.0,0.0
+Approaching and pressing the door switch,4,0,0.0,0.0,0.0
+Approaching the table,2,0,0.0,0.0,0.0
+Approaching work table,0,0,0.0,0.0,0.0
+Arrange Mahjong tiles,0,0,0.0,0.0,0.0
+Arrange beads by color,0,0,0.0,0.0,0.0
+Arrange black buttons,0,0,0.0,0.0,0.0
+Arrange button cluster,0,0,0.0,0.0,0.0
+Arrange buttons,4,0,0.0,0.0,0.0
+Arrange buttons in a line,4,0,0.0,0.0,0.0
+Arrange buttons in row,0,0,0.0,0.0,0.0
+Arrange buttons on table,0,0,0.0,0.0,0.0
+Arrange buttons on the table,0,0,0.0,0.0,0.0
+Arrange canned products on shelf,0,0,0.0,0.0,0.0
+Arrange cans in box,0,0,0.0,0.0,0.0
+Arrange cans on shelf,0,0,0.0,0.0,0.0
+Arrange cardboard,0,0,0.0,0.0,0.0
+Arrange cardboard piece,0,0,0.0,0.0,0.0
+Arrange cardboard pieces,0,0,0.0,0.0,0.0
+Arrange cardboard squares,0,0,0.0,0.0,0.0
+Arrange container on shelf,0,0,0.0,0.0,0.0
+Arrange items on shelf,0,0,0.0,0.0,0.0
+Arrange orange buttons,0,0,0.0,0.0,0.0
+Arrange paper stars,0,0,0.0,0.0,0.0
+Arrange paper strips,0,0,0.0,0.0,0.0
+Arrange plastic containers,0,0,0.0,0.0,0.0
+Arrange red buttons,0,0,0.0,0.0,0.0
+Arrange small buttons,0,0,0.0,0.0,0.0
+Arrange star beads,2,0,0.0,0.0,0.0
+Arrange star beads for counting,2,0,0.0,0.0,0.0
+Arrange star-shaped beads,0,0,0.0,0.0,0.0
+Arrange tiles into row,0,0,0.0,0.0,0.0
+Arrive at a different workstation,0,0,0.0,0.0,0.0
+Assemble cardboard pieces,0,0,0.0,0.0,0.0
+Assemble foam strips,0,0,0.0,0.0,0.0
+Assess shelf arrangement,0,0,0.0,0.0,0.0
+Attach foam strip,0,0,0.0,0.0,0.0
+Attach material to paper strip,0,0,0.0,0.0,0.0
+Attempt to fit puzzle piece,3,0,0.0,0.0,0.0
+Begin folding paper strip,0,0,0.0,0.0,0.0
+Begin rolling quilling strip,0,0,0.0,0.0,0.0
+Bend and manipulate plastic strip,4,0,0.0,0.0,0.0
+Browse and interact with phone interface,0,0,0.0,0.0,0.0
+Browse mobile phone,0,0,0.0,0.0,0.0
+Browse smartphone screen,3,4,0.0,0.0,0.0
+Browsing mobile phone,0,0,0.0,0.0,0.0
+Browsing smartphone content,0,0,0.0,0.0,0.0
+Bundle display hooks,2,0,0.0,0.0,0.0
+Cap marker,0,0,0.0,0.0,0.0
+Carry cardboard piece,0,2,0.0,0.0,0.0
+Carry cereal boxes,0,4,0.0,0.0,0.0
+Carry cereal towards aisle,0,0,0.0,0.0,0.0
+Carry container,0,3,0.0,0.0,0.0
+Carry crate of cans,0,0,0.0,0.0,0.0
+Carry item to shelf,0,0,0.0,0.0,0.0
+Carry pasta box towards aisle,0,0,0.0,0.0,0.0
+Carry plastic container,0,0,0.0,0.0,0.0
+Carry stool to next shelf,0,0,0.0,0.0,0.0
+Check phone,0,2,0.0,0.0,0.0
+Check smart watch,0,1,0.0,0.0,0.0
+Check watch,0,0,0.0,0.0,0.0
+Clean shelf,0,0,0.0,0.0,0.0
+Close cardboard box,0,0,0.0,0.0,0.0
+Closing the door,2,0,0.0,0.0,0.0
+Combine bead piles,0,0,0.0,0.0,0.0
+Complete the cut,0,0,0.0,0.0,0.0
+Connect cable to device,0,0,0.0,0.0,0.0
+Continue cutting fabric,0,0,0.0,0.0,0.0
+Continue cutting newspaper,0,0,0.0,0.0,0.0
+Continue folding paper strip,0,0,0.0,0.0,0.0
+Count and arrange paper stars,0,0,0.0,0.0,0.0
+Count and record paper stars,0,0,0.0,0.0,0.0
+Count paper stars,0,0,0.0,0.0,0.0
+Counting and organizing beads,2,0,0.0,0.0,0.0
+Counting star beads,2,0,0.0,0.0,0.0
+Curve foam strip into loop,0,0,0.0,0.0,0.0
+Cut along the edge of the newspaper,0,0,0.0,0.0,0.0
+Cut along the line,0,0,0.0,0.0,0.0
+Cut along the marked line,7,0,0.0,0.0,0.0
+Cut along the newspaper edge,0,0,0.0,0.0,0.0
+Cut cardboard,7,6,0.0,0.0,0.0
+Cut cardboard along line,0,0,0.0,0.0,0.0
+Cut cardboard grid,0,0,0.0,0.0,0.0
+Cut cardboard into triangles,0,0,0.0,0.0,0.0
+Cut cardboard pattern,0,0,0.0,0.0,0.0
+Cut cardboard piece,3,0,0.0,0.0,0.0
+Cut cardboard piece with scissors,0,0,0.0,0.0,0.0
+Cut cardboard pieces with scissors,0,0,0.0,0.0,0.0
+Cut cardboard shape,0,3,0.0,0.0,0.0
+Cut cardboard sheet,0,0,0.0,0.0,0.0
+Cut cardboard sheet with scissors,0,0,0.0,0.0,0.0
+Cut cardboard square,0,0,0.0,0.0,0.0
+Cut cardboard strip,0,0,0.0,0.0,0.0
+Cut cardboard strip with scissors,0,0,0.0,0.0,0.0
+Cut cardboard strip with utility knife,0,0,0.0,0.0,0.0
+Cut cardboard triangle,0,0,0.0,0.0,0.0
+Cut cardboard tube,0,0,0.0,0.0,0.0
+Cut cardboard with scissors,0,1,0.0,0.0,0.0
+Cut cardboard with utility knife,0,12,0.0,0.0,0.0
+Cut fabric with scissors,0,0,0.0,0.0,0.0
+Cut light green fabric,0,0,0.0,0.0,0.0
+Cut newspaper,0,0,0.0,0.0,0.0
+Cut newspaper with scissors,0,0,0.0,0.0,0.0
+Cut out cardboard pattern,0,0,0.0,0.0,0.0
+Cut section from newspaper,0,0,0.0,0.0,0.0
+Cutting fabric,0,0,0.0,0.0,0.0
+Deposit beads into box,0,0,0.0,0.0,0.0
+Deposit cardboard squares,0,0,0.0,0.0,0.0
+Discard item into bin,0,0,0.0,0.0,0.0
+Discard paper towel,0,0,0.0,0.0,0.0
+Draw grid line,0,0,0.0,0.0,0.0
+Draw grid line with pen,0,0,0.0,0.0,0.0
+Draw line,0,0,0.0,0.0,0.0
+Draw line along ruler,0,0,0.0,0.0,0.0
+Draw line on cardboard,0,0,0.0,0.0,0.0
+Draw line with marker,0,0,0.0,0.0,0.0
+Draw line with pen,0,8,0.0,0.0,0.0
+Draw lines on cardboard,0,0,0.0,0.0,0.0
+Draw lines with pen and ruler,0,0,0.0,0.0,0.0
+Draw lines with ruler,0,0,0.0,0.0,0.0
+Draw straight line,0,0,0.0,0.0,0.0
+Draw straight lines on cardboard,0,0,0.0,0.0,0.0
+Drawing grid line,0,0,0.0,0.0,0.0
+Drawing grid line with pen and ruler,0,0,0.0,0.0,0.0
+Drawing grid line with ruler,0,0,0.0,0.0,0.0
+Drawing lines on cardboard,0,0,0.0,0.0,0.0
+Drop cardboard square into box,0,0,0.0,0.0,0.0
+Dry hands,0,0,0.0,0.0,0.0
+Enter the room,0,2,0.0,0.0,0.0
+Enter workspace,0,0,0.0,0.0,0.0
+Entering the VR training room,3,0,0.0,0.0,0.0
+Examine canned goods,0,0,0.0,0.0,0.0
+Examine item,0,0,0.0,0.0,0.0
+Examine labels,0,0,0.0,0.0,0.0
+Examine product,0,0,0.0,0.0,0.0
+Expand paper lantern,2,2,0.0,0.0,0.0
+Extract wire hangers from box,2,0,0.0,0.0,0.0
+Finish placing cardboard cutouts,0,0,0.0,0.0,0.0
+Finish washing hands,0,0,0.0,0.0,0.0
+Finish wiping and inspect jar,0,0,0.0,0.0,0.0
+Finishing coil,0,0,0.0,0.0,0.0
+Fold and manipulate ribbon,0,0,0.0,0.0,0.0
+Fold and organize paper strips,0,0,0.0,0.0,0.0
+Fold blue strip,0,0,0.0,0.0,0.0
+Fold cardboard,0,0,0.0,0.0,0.0
+Fold cardboard edge,0,0,0.0,0.0,0.0
+Fold cardboard shape,0,0,0.0,0.0,0.0
+Fold cardboard sheet,0,0,0.0,0.0,0.0
+Fold cut cardboard,0,0,0.0,0.0,0.0
+Fold foam piece,0,0,0.0,0.0,0.0
+Fold lucky star,0,0,0.0,0.0,0.0
+Fold newspaper,0,0,0.0,0.0,0.0
+Fold paper lantern,2,1,0.0,0.0,0.0
+Fold paper star,0,1,0.0,0.0,0.0
+Fold paper strip,0,5,0.0,0.0,0.0
+Fold paper strip into a star,0,0,0.0,0.0,0.0
+Fold paper strip into knot,0,0,0.0,0.0,0.0
+Fold paper strip into lucky star,0,0,0.0,0.0,0.0
+Fold paper strip into star,0,0,0.0,0.0,0.0
+Fold plastic strip,3,0,0.0,0.0,0.0
+Fold purple paper,0,0,0.0,0.0,0.0
+Fold purple paper strip,0,0,0.0,0.0,0.0
+Fold ribbon,0,0,0.0,0.0,0.0
+Folding cardboard,0,0,0.0,0.0,0.0
+Folding paper strip,0,0,0.0,0.0,0.0
+Forming quilled paper shape,0,0,0.0,0.0,0.0
+Gather cardboard pieces,0,0,0.0,0.0,0.0
+Gather pieces,0,0,0.0,0.0,0.0
+Gather pieces into box,0,0,0.0,0.0,0.0
+Gather star beads,2,0,0.0,0.0,0.0
+Gathering colored beads,0,0,0.0,0.0,0.0
+Gathering items,0,0,0.0,0.0,0.0
+Gathering star beads,0,0,0.0,0.0,0.0
+Gesturing,2,1,0.0,0.0,0.0
+Grasp and retrieve item,0,0,0.0,0.0,0.0
+Grasp cardboard sheet,0,0,0.0,0.0,0.0
+Grasp cleaning bottle,2,0,0.0,0.0,0.0
+Grasp door handle,0,0,0.0,0.0,0.0
+Grasp electronic object,0,0,0.0,0.0,0.0
+Grasp item,0,0,0.0,0.0,0.0
+Grasp lantern,2,0,0.0,0.0,0.0
+Grasp lantern component,2,0,0.0,0.0,0.0
+Grasp next item,0,0,0.0,0.0,0.0
+Grasp origami stars,0,0,0.0,0.0,0.0
+Grasp package,0,0,0.0,0.0,0.0
+Grasp paper strip,0,0,0.0,0.0,0.0
+Grasp plastic bag on shelf,0,0,0.0,0.0,0.0
+Grasp product from box,0,0,0.0,0.0,0.0
+Grasp product from shelf,0,0,0.0,0.0,0.0
+Grasp retail item,0,0,0.0,0.0,0.0
+Grasp shopping bag,0,0,0.0,0.0,0.0
+Grasp snack package,0,0,0.0,0.0,0.0
+Grasping cleaning cloth,2,0,0.0,0.0,0.0
+Greeting/acknowledging participants,3,0,0.0,0.0,0.0
+Guide utility knife along ruler,0,0,0.0,0.0,0.0
+Handle paper lantern component,2,0,0.0,0.0,0.0
+Hold and align cardboard,0,0,0.0,0.0,0.0
+Hold and align newspaper,0,0,0.0,0.0,0.0
+Hold and align paper strip,0,0,0.0,0.0,0.0
+Hold and bend paper strip,0,0,0.0,0.0,0.0
+Hold and bend plastic strip,3,0,0.0,0.0,0.0
+Hold and crease purple paper,0,0,0.0,0.0,0.0
+Hold and examine item,0,0,0.0,0.0,0.0
+Hold and inspect can,0,0,0.0,0.0,0.0
+Hold and manipulate paper strip,2,0,0.0,0.0,0.0
+Hold and mark cardboard piece,0,0,0.0,0.0,0.0
+Hold and rotate paper strip,0,0,0.0,0.0,0.0
+Hold and view phone,0,0,0.0,0.0,0.0
+Hold and wipe product,0,0,0.0,0.0,0.0
+Hold beads,2,0,0.0,0.0,0.0
+Hold bin and move through aisle,0,0,0.0,0.0,0.0
+Hold blue product box,0,0,0.0,0.0,0.0
+Hold blue strip,0,0,0.0,0.0,0.0
+Hold canned food,2,0,0.0,0.0,0.0
+Hold cardboard,0,0,0.0,0.0,0.0
+Hold cardboard piece,3,0,0.0,0.0,0.0
+Hold cardboard pieces,0,0,0.0,0.0,0.0
+Hold cardboard strip,0,0,0.0,0.0,0.0
+Hold cardboard with ruler,0,0,0.0,0.0,0.0
+Hold charger,0,0,0.0,0.0,0.0
+Hold charger and cable,0,0,0.0,0.0,0.0
+Hold charging cable,0,0,0.0,0.0,0.0
+Hold cleaning cloth,0,0,0.0,0.0,0.0
+Hold container,0,0,0.0,0.0,0.0
+Hold container lid,2,0,0.0,0.0,0.0
+Hold container of canned food,0,0,0.0,0.0,0.0
+Hold craft tool,0,0,0.0,0.0,0.0
+Hold device and cable,0,0,0.0,0.0,0.0
+Hold earbud case,2,0,0.0,0.0,0.0
+Hold electronic accessory,0,0,0.0,0.0,0.0
+Hold electronic item,0,0,0.0,0.0,0.0
+Hold empty container,0,0,0.0,0.0,0.0
+Hold foam pieces,0,0,0.0,0.0,0.0
+Hold instructional sign,0,0,0.0,0.0,0.0
+Hold item,0,0,0.0,0.0,0.0
+Hold item and adjust posture,0,0,0.0,0.0,0.0
+Hold items,0,0,0.0,0.0,0.0
+Hold items and inspect shelf,0,0,0.0,0.0,0.0
+Hold items in hand,0,0,0.0,0.0,0.0
+Hold newspaper,0,0,0.0,0.0,0.0
+Hold paper lantern,2,5,0.2,0.5,0.28571428571428575
+Hold paper strip,0,24,0.0,0.0,0.0
+Hold pen and paper,0,0,0.0,0.0,0.0
+Hold phone,0,0,0.0,0.0,0.0
+Hold pickle jar,0,0,0.0,0.0,0.0
+Hold portable charger,0,0,0.0,0.0,0.0
+Hold power adapter,0,0,0.0,0.0,0.0
+Hold power bank and cable,0,0,0.0,0.0,0.0
+Hold product,0,0,0.0,0.0,0.0
+Hold product labels,0,0,0.0,0.0,0.0
+Hold product package,0,0,0.0,0.0,0.0
+Hold quilled paper coil,0,0,0.0,0.0,0.0
+Hold quilled paper piece,0,0,0.0,0.0,0.0
+Hold quilling paper,0,0,0.0,0.0,0.0
+Hold recording sheet and pen,0,0,0.0,0.0,0.0
+Hold ruler,0,0,0.0,0.0,0.0
+Hold ruler and draw line,0,0,0.0,0.0,0.0
+Hold ruler and mark cardboard,0,0,0.0,0.0,0.0
+Hold ruler and marker,0,0,0.0,0.0,0.0
+Hold ruler and pen steady,0,0,0.0,0.0,0.0
+Hold ruler on cardboard,0,0,0.0,0.0,0.0
+Hold ruler steady,0,0,0.0,0.0,0.0
+Hold scissors,0,0,0.0,0.0,0.0
+Hold small cardboard pieces,0,0,0.0,0.0,0.0
+Hold small object,0,0,0.0,0.0,0.0
+Hold small piece of ribbon,0,0,0.0,0.0,0.0
+Hold small product bag,0,0,0.0,0.0,0.0
+Hold small white box,0,0,0.0,0.0,0.0
+Hold smartphone,4,42,0.07142857142857142,0.75,0.13043478260869565
+Hold smartphone box,0,0,0.0,0.0,0.0
+Hold snack package,0,0,0.0,0.0,0.0
+Hold snack packages,0,0,0.0,0.0,0.0
+Hold supplement bottle,0,0,0.0,0.0,0.0
+Hold tray of canned goods,0,0,0.0,0.0,0.0
+Hold utility knife,0,0,0.0,0.0,0.0
+Hold water bottle,0,0,0.0,0.0,0.0
+Holding marker,0,1,0.0,0.0,0.0
+Identify next cardboard piece,3,0,0.0,0.0,0.0
+Inflate paper star,0,0,0.0,0.0,0.0
+Initiate star folding,0,0,0.0,0.0,0.0
+Insert charging cable,0,0,0.0,0.0,0.0
+Insert charging cable into power bank,0,0,0.0,0.0,0.0
+Insert plug into power adapter,0,0,0.0,0.0,0.0
+Inspect Dior gift box,0,0,0.0,0.0,0.0
+Inspect almond package,0,0,0.0,0.0,0.0
+Inspect and place item on shelf,0,0,0.0,0.0,0.0
+Inspect bottle,0,0,0.0,0.0,0.0
+Inspect cardboard piece,0,0,0.0,0.0,0.0
+Inspect cardboard strip,0,0,0.0,0.0,0.0
+Inspect charging case,0,0,0.0,0.0,0.0
+Inspect electronic item,0,0,0.0,0.0,0.0
+Inspect jar,0,0,0.0,0.0,0.0
+Inspect product,0,0,0.0,0.0,0.0
+Inspect product lid,0,0,0.0,0.0,0.0
+Inspect shelf,0,0,0.0,0.0,0.0
+Inspect shelf and organize stock,0,0,0.0,0.0,0.0
+Inspect shelf condition,2,0,0.0,0.0,0.0
+Inspect smartphone box,0,0,0.0,0.0,0.0
+Inspect strip,0,0,0.0,0.0,0.0
+Inspect supplement bottle,0,0,0.0,0.0,0.0
+Interact with colleagues,0,0,0.0,0.0,0.0
+Interact with phone,0,0,0.0,0.0,0.0
+Interact with smartphone,4,0,0.0,0.0,0.0
+Interact with smartphone screen,0,0,0.0,0.0,0.0
+Interacting with phone screen,0,0,0.0,0.0,0.0
+Interaction with coworker,0,0,0.0,0.0,0.0
+Interlock paper strips,0,0,0.0,0.0,0.0
+Labeling cardboard piece,0,0,0.0,0.0,0.0
+Labeling cardboard square,0,0,0.0,0.0,0.0
+Labeling cardboard squares,0,0,0.0,0.0,0.0
+Lift blue strip,0,0,0.0,0.0,0.0
+Lift pen and shift ruler,0,0,0.0,0.0,0.0
+Lift pot lid,1,1,1.0,1.0,1.0
+Lift utility knife,0,0,0.0,0.0,0.0
+Lock phone,0,0,0.0,0.0,0.0
+Look around the table,0,0,0.0,0.0,0.0
+Look away,0,0,0.0,0.0,0.0
+Manipulate adhesive strip,5,0,0.0,0.0,0.0
+Manipulate and inspect colorful pieces,0,0,0.0,0.0,0.0
+Manipulate bead,2,0,0.0,0.0,0.0
+Manipulate beads,2,0,0.0,0.0,0.0
+Manipulate cardboard piece,0,0,0.0,0.0,0.0
+Manipulate cardboard shape,0,0,0.0,0.0,0.0
+Manipulate cardboard sheet,0,0,0.0,0.0,0.0
+Manipulate colorful pieces,0,0,0.0,0.0,0.0
+Manipulate component,0,0,0.0,0.0,0.0
+Manipulate component on strip,0,0,0.0,0.0,0.0
+Manipulate craft paper strips,4,0,0.0,0.0,0.0
+Manipulate craft piece,4,0,0.0,0.0,0.0
+Manipulate folded paper star,0,0,0.0,0.0,0.0
+Manipulate light blue strip,0,0,0.0,0.0,0.0
+Manipulate material,2,0,0.0,0.0,0.0
+Manipulate paper decoration,5,0,0.0,0.0,0.0
+Manipulate paper edge,5,0,0.0,0.0,0.0
+Manipulate paper piece,0,0,0.0,0.0,0.0
+Manipulate paper quilling piece,0,0,0.0,0.0,0.0
+Manipulate paper star,0,3,0.0,0.0,0.0
+Manipulate paper stars,0,0,0.0,0.0,0.0
+Manipulate paper strip,14,9,0.1111111111111111,0.07142857142857142,0.08695652173913043
+Manipulate paper strips,0,0,0.0,0.0,0.0
+Manipulate plastic strip,3,0,0.0,0.0,0.0
+Manipulate plastic strips,3,0,0.0,0.0,0.0
+Manipulate power cable plug,0,0,0.0,0.0,0.0
+Manipulate puzzle piece,3,0,0.0,0.0,0.0
+Manipulate puzzle pieces,3,0,0.0,0.0,0.0
+Manipulate quilled paper,0,0,0.0,0.0,0.0
+Manipulate quilled paper shape,0,0,0.0,0.0,0.0
+Manipulate quilled paper strip,0,0,0.0,0.0,0.0
+Manipulate quilled paper strips,0,0,0.0,0.0,0.0
+Manipulate quilling paper,0,0,0.0,0.0,0.0
+Manipulate quilling strip,0,0,0.0,0.0,0.0
+Manipulate ribbon knot,0,0,0.0,0.0,0.0
+Manipulate ribbon piece,0,0,0.0,0.0,0.0
+Manipulate small component,0,0,0.0,0.0,0.0
+Manipulate small object,0,0,0.0,0.0,0.0
+Manipulate small paper segment,0,0,0.0,0.0,0.0
+Manipulate star,0,0,0.0,0.0,0.0
+Manipulate yellow strip,2,0,0.0,0.0,0.0
+Manipulating paper strips,2,0,0.0,0.0,0.0
+Mark cardboard,0,2,0.0,0.0,0.0
+Mark cardboard piece,3,0,0.0,0.0,0.0
+Mark cardboard strip with pen,0,0,0.0,0.0,0.0
+Mark cardboard with marker,0,3,0.0,0.0,0.0
+Mark cardboard with pen,0,0,0.0,0.0,0.0
+Mark cardboard with pen and ruler,0,0,0.0,0.0,0.0
+Mark cardboard with ruler,0,0,0.0,0.0,0.0
+Mark cardboard with ruler and pen,0,0,0.0,0.0,0.0
+Mark fabric,0,0,0.0,0.0,0.0
+Mark fabric with pen,0,0,0.0,0.0,0.0
+Mark fabric with pen and ruler,0,0,0.0,0.0,0.0
+Mark line on cardboard,0,0,0.0,0.0,0.0
+Mark lines on cardboard,0,0,0.0,0.0,0.0
+Mark lines with marker,0,0,0.0,0.0,0.0
+Mark lines with pen along ruler,0,0,0.0,0.0,0.0
+Mark list with pen,0,0,0.0,0.0,0.0
+Mark paper list,0,0,0.0,0.0,0.0
+Mark straight line,0,0,0.0,0.0,0.0
+Marking cardboard piece,3,0,0.0,0.0,0.0
+Marking cardboard with pen,0,0,0.0,0.0,0.0
+Marking lines on cardboard,0,0,0.0,0.0,0.0
+Measure and mark cardboard,0,0,0.0,0.0,0.0
+Measure cardboard with ruler,0,0,0.0,0.0,0.0
+Move Mahjong tile,0,0,0.0,0.0,0.0
+Move along shelf,0,0,0.0,0.0,0.0
+Move along the shelf,0,0,0.0,0.0,0.0
+Move along the shelves,0,0,0.0,0.0,0.0
+Move along the supermarket aisle,0,0,0.0,0.0,0.0
+Move and place black buttons,0,0,0.0,0.0,0.0
+Move away from collection box,0,0,0.0,0.0,0.0
+Move away from desk,0,0,0.0,0.0,0.0
+Move away from shelf,0,0,0.0,0.0,0.0
+Move away from table,0,0,0.0,0.0,0.0
+Move away from workstation,0,0,0.0,0.0,0.0
+Move bin,0,0,0.0,0.0,0.0
+Move bin to shelf area,0,0,0.0,0.0,0.0
+Move black button,0,0,0.0,0.0,0.0
+Move blue beads,0,0,0.0,0.0,0.0
+Move box to next position,0,0,0.0,0.0,0.0
+Move button to line,0,0,0.0,0.0,0.0
+Move camera over surface,0,0,0.0,0.0,0.0
+Move can towards shelf,0,0,0.0,0.0,0.0
+Move canned goods container,0,0,0.0,0.0,0.0
+Move cardboard,0,0,0.0,0.0,0.0
+Move cardboard box,0,0,0.0,0.0,0.0
+Move cardboard piece,0,0,0.0,0.0,0.0
+Move cardboard sheet,0,0,0.0,0.0,0.0
+Move cardboard to pile,0,0,0.0,0.0,0.0
+Move container toward shelf,0,0,0.0,0.0,0.0
+Move dustpan to side,1,0,0.0,0.0,0.0
+Move hand,0,0,0.0,0.0,0.0
+Move hand away,2,0,0.0,0.0,0.0
+Move hand away from shelf,2,0,0.0,0.0,0.0
+Move hand away from workspace,0,0,0.0,0.0,0.0
+Move hand back to box,0,0,0.0,0.0,0.0
+Move hand over button pile,0,0,0.0,0.0,0.0
+Move hand to paper stars,0,0,0.0,0.0,0.0
+Move hand toward craft materials,0,0,0.0,0.0,0.0
+Move item to bag,0,0,0.0,0.0,0.0
+Move marker and adjust hand,3,0,0.0,0.0,0.0
+Move marker and ruler,0,0,0.0,0.0,0.0
+Move marker away,0,0,0.0,0.0,0.0
+Move orange buttons,0,0,0.0,0.0,0.0
+Move origami stars,0,0,0.0,0.0,0.0
+Move pen,0,0,0.0,0.0,0.0
+Move pen aside,0,0,0.0,0.0,0.0
+Move pen away,0,0,0.0,0.0,0.0
+Move phone,7,0,0.0,0.0,0.0
+Move piece to pile,0,0,0.0,0.0,0.0
+Move pieces into box,0,0,0.0,0.0,0.0
+Move pineapple chips,0,0,0.0,0.0,0.0
+Move plastic storage bin,0,0,0.0,0.0,0.0
+Move plush toy,0,0,0.0,0.0,0.0
+Move pot,1,0,0.0,0.0,0.0
+Move product to box,0,0,0.0,0.0,0.0
+Move product to shelf,0,0,0.0,0.0,0.0
+Move product towards shelf,0,0,0.0,0.0,0.0
+Move puzzle piece,0,0,0.0,0.0,0.0
+Move ruler,0,0,0.0,0.0,0.0
+Move ruler and tools,0,0,0.0,0.0,0.0
+Move scissors away,0,0,0.0,0.0,0.0
+Move small blue foam piece towards the strip,0,0,0.0,0.0,0.0
+Move smartphone,3,0,0.0,0.0,0.0
+Move storage bin,0,0,0.0,0.0,0.0
+Move through aisle,2,0,0.0,0.0,0.0
+Move through the training room,3,0,0.0,0.0,0.0
+Move to box,0,0,0.0,0.0,0.0
+Move to desk,0,0,0.0,0.0,0.0
+Move to next section,0,0,0.0,0.0,0.0
+Move to shelf,2,0,0.0,0.0,0.0
+Move to shelf base,0,0,0.0,0.0,0.0
+Move to stock products,0,0,0.0,0.0,0.0
+Move towards aisle,0,0,0.0,0.0,0.0
+Move towards box,0,0,0.0,0.0,0.0
+Move towards kitchen area,1,0,0.0,0.0,0.0
+Move towards shelf,0,0,0.0,0.0,0.0
+Move towards table,0,0,0.0,0.0,0.0
+Move towards the stove,1,0,0.0,0.0,0.0
+Move tray towards packing area,0,0,0.0,0.0,0.0
+Move utility knife along ruler,0,0,0.0,0.0,0.0
+Move vacuum cleaner,0,0,0.0,0.0,0.0
+Move vacuum cleaner hose,0,0,0.0,0.0,0.0
+Moving cardboard square,0,0,0.0,0.0,0.0
+Moving hand,0,0,0.0,0.0,0.0
+Moving hand towards cardboard stack,0,0,0.0,0.0,0.0
+Moving ruler,0,0,0.0,0.0,0.0
+Observe and pause,2,1,0.0,0.0,0.0
+Observe and walk through store,2,3,0.0,0.0,0.0
+Observe colleague and workspace,2,0,0.0,0.0,0.0
+Observe craft layout,0,0,0.0,0.0,0.0
+Observe desktop layout,0,0,0.0,0.0,0.0
+Observe paper and count objects,0,0,0.0,0.0,0.0
+Observe paper quilling station,0,0,0.0,0.0,0.0
+Observe puzzle progress,3,0,0.0,0.0,0.0
+Observe room,0,1,0.0,0.0,0.0
+Observe shelf,0,0,0.0,0.0,0.0
+Observe shelf status,0,0,0.0,0.0,0.0
+Observe sorting progress,0,0,0.0,0.0,0.0
+Observe stocking,0,0,0.0,0.0,0.0
+Observe surroundings,0,2,0.0,0.0,0.0
+Observe workspace,2,1,0.0,0.0,0.0
+Open cardboard box,0,0,0.0,0.0,0.0
+Open door,0,0,0.0,0.0,0.0
+Open earbud case,2,0,0.0,0.0,0.0
+Open folded paper lantern,2,0,0.0,0.0,0.0
+Open paper lantern,2,0,0.0,0.0,0.0
+Open paper lantern component,2,0,0.0,0.0,0.0
+Open small case,0,0,0.0,0.0,0.0
+Open stove pot lid,1,0,0.0,0.0,0.0
+Open supplement bottle,0,0,0.0,0.0,0.0
+Operate smartphone,4,0,0.0,0.0,0.0
+Organize bag contents,0,0,0.0,0.0,0.0
+Organize cardboard pieces,2,0,0.0,0.0,0.0
+Organize item on shelf,0,0,0.0,0.0,0.0
+Organize products,0,0,0.0,0.0,0.0
+Organize snacks in box,0,0,0.0,0.0,0.0
+Organize tools and materials,0,0,0.0,0.0,0.0
+Pack beads into box,0,0,0.0,0.0,0.0
+Peel blue strip,0,0,0.0,0.0,0.0
+Peel foam strip,0,0,0.0,0.0,0.0
+Pick up Dior gift box,0,0,0.0,0.0,0.0
+Pick up Mahjong tile,0,0,0.0,0.0,0.0
+Pick up accessory,0,0,0.0,0.0,0.0
+Pick up and sort cardboard,0,0,0.0,0.0,0.0
+Pick up another bottle,0,0,0.0,0.0,0.0
+Pick up another canned item,0,0,0.0,0.0,0.0
+Pick up another item,0,0,0.0,0.0,0.0
+Pick up beads,0,0,0.0,0.0,0.0
+Pick up black button,0,0,0.0,0.0,0.0
+Pick up blue foam piece,0,0,0.0,0.0,0.0
+Pick up blue paper strip,0,0,0.0,0.0,0.0
+Pick up bottle,0,0,0.0,0.0,0.0
+Pick up bottled sauce,0,0,0.0,0.0,0.0
+Pick up button,3,0,0.0,0.0,0.0
+Pick up can,2,0,0.0,0.0,0.0
+Pick up canned food,2,1,0.0,0.0,0.0
+Pick up canned good,0,0,0.0,0.0,0.0
+Pick up canned goods,0,0,0.0,0.0,0.0
+Pick up canned item,0,0,0.0,0.0,0.0
+Pick up canned product,0,0,0.0,0.0,0.0
+Pick up cardboard,0,0,0.0,0.0,0.0
+Pick up cardboard cutout,0,0,0.0,0.0,0.0
+Pick up cardboard piece,0,1,0.0,0.0,0.0
+Pick up cardboard square,0,0,0.0,0.0,0.0
+Pick up cardboard stack,0,0,0.0,0.0,0.0
+Pick up cardboard strip,0,0,0.0,0.0,0.0
+Pick up cardboard tray,0,0,0.0,0.0,0.0
+Pick up cereal boxes,0,0,0.0,0.0,0.0
+Pick up charging cable,0,0,0.0,0.0,0.0
+Pick up charging case,0,0,0.0,0.0,0.0
+Pick up cleaning cloth,0,0,0.0,0.0,0.0
+Pick up colored tile,0,0,0.0,0.0,0.0
+Pick up container,0,0,0.0,0.0,0.0
+Pick up container from box,0,0,0.0,0.0,0.0
+Pick up craft material,0,0,0.0,0.0,0.0
+Pick up cut cardboard piece,0,0,0.0,0.0,0.0
+Pick up dustpan,1,0,0.0,0.0,0.0
+Pick up electronic accessory,0,0,0.0,0.0,0.0
+Pick up electronic accessory from box,0,0,0.0,0.0,0.0
+Pick up electronic device,0,0,0.0,0.0,0.0
+Pick up electronic item,0,0,0.0,0.0,0.0
+Pick up electronic product,0,0,0.0,0.0,0.0
+Pick up food item,0,0,0.0,0.0,0.0
+Pick up gift box,0,0,0.0,0.0,0.0
+Pick up grocery item,0,0,0.0,0.0,0.0
+Pick up item,0,0,0.0,0.0,0.0
+Pick up item from bin,0,0,0.0,0.0,0.0
+Pick up item from box,0,0,0.0,0.0,0.0
+Pick up item from shelf,0,0,0.0,0.0,0.0
+Pick up items from the shopping bag,2,0,0.0,0.0,0.0
+Pick up jar,0,4,0.0,0.0,0.0
+Pick up light blue strip,0,0,0.0,0.0,0.0
+Pick up marker,0,0,0.0,0.0,0.0
+Pick up metal ruler,0,0,0.0,0.0,0.0
+Pick up new cardboard piece,2,0,0.0,0.0,0.0
+Pick up new electronic product,0,0,0.0,0.0,0.0
+Pick up new product from box,0,0,0.0,0.0,0.0
+Pick up next gift box,0,0,0.0,0.0,0.0
+Pick up next item from bin,0,0,0.0,0.0,0.0
+Pick up next product from bin,0,0,0.0,0.0,0.0
+Pick up nut bar box,0,0,0.0,0.0,0.0
+Pick up object,0,0,0.0,0.0,0.0
+Pick up oil bottle,0,0,0.0,0.0,0.0
+Pick up orange button,0,0,0.0,0.0,0.0
+Pick up pack from shelf,0,0,0.0,0.0,0.0
+Pick up packaged paper lantern component,1,0,0.0,0.0,0.0
+Pick up paper star,0,2,0.0,0.0,0.0
+Pick up paper strip,0,0,0.0,0.0,0.0
+Pick up paper towel,0,0,0.0,0.0,0.0
+Pick up pasta box,0,0,0.0,0.0,0.0
+Pick up pen,2,0,0.0,0.0,0.0
+Pick up phone,0,0,0.0,0.0,0.0
+Pick up pickle jar,0,0,0.0,0.0,0.0
+Pick up pink water bottle,0,0,0.0,0.0,0.0
+Pick up plastic bin,0,0,0.0,0.0,0.0
+Pick up plastic container,0,0,0.0,0.0,0.0
+Pick up plush toy,0,0,0.0,0.0,0.0
+Pick up portable charger,0,0,0.0,0.0,0.0
+Pick up power bank,0,0,0.0,0.0,0.0
+Pick up product,0,0,0.0,0.0,0.0
+Pick up product box,0,0,0.0,0.0,0.0
+Pick up product from bin,0,0,0.0,0.0,0.0
+Pick up product from box,0,0,0.0,0.0,0.0
+Pick up product from shelf,0,0,0.0,0.0,0.0
+Pick up puzzle piece,3,0,0.0,0.0,0.0
+Pick up red button,0,0,0.0,0.0,0.0
+Pick up retail item,0,0,0.0,0.0,0.0
+Pick up sauce bottle,0,0,0.0,0.0,0.0
+Pick up scissors,0,0,0.0,0.0,0.0
+Pick up shopping bag,0,0,0.0,0.0,0.0
+Pick up small cardboard piece,0,0,0.0,0.0,0.0
+Pick up small item,0,0,0.0,0.0,0.0
+Pick up small object,0,0,0.0,0.0,0.0
+Pick up small piece of material,2,0,0.0,0.0,0.0
+Pick up smartphone,2,0,0.0,0.0,0.0
+Pick up snack package,0,0,0.0,0.0,0.0
+Pick up snack packages,0,0,0.0,0.0,0.0
+Pick up snack packs,0,0,0.0,0.0,0.0
+Pick up snack pouch,0,0,0.0,0.0,0.0
+Pick up spice jar,0,0,0.0,0.0,0.0
+Pick up stapler,0,0,0.0,0.0,0.0
+Pick up star,0,0,0.0,0.0,0.0
+Pick up star bead,2,0,0.0,0.0,0.0
+Pick up star-shaped bead,0,0,0.0,0.0,0.0
+Pick up storage container,0,0,0.0,0.0,0.0
+Pick up supplement bottle,0,0,0.0,0.0,0.0
+Pick up supplies from box,0,0,0.0,0.0,0.0
+Pick up tin can,0,0,0.0,0.0,0.0
+Pick up tool,0,0,0.0,0.0,0.0
+Pick up utility knife,6,0,0.0,0.0,0.0
+Pick up water bottle,0,0,0.0,0.0,0.0
+Pick up yellow item,0,0,0.0,0.0,0.0
+Pick up yellow paper strip,0,0,0.0,0.0,0.0
+Picking up bottle,1,0,0.0,0.0,0.0
+Picking up crafting material,2,0,0.0,0.0,0.0
+Picking up stock,0,0,0.0,0.0,0.0
+Place Mahjong tile on stack,0,0,0.0,0.0,0.0
+Place Mahjong tile on the stack,0,0,0.0,0.0,0.0
+Place accessory box,0,0,0.0,0.0,0.0
+Place accessory into box,0,0,0.0,0.0,0.0
+Place accessory on shelf,0,0,0.0,0.0,0.0
+Place and align button,0,0,0.0,0.0,0.0
+Place and count bead,2,0,0.0,0.0,0.0
+Place another canned food on shelf,2,0,0.0,0.0,0.0
+Place back Dior gift box,0,0,0.0,0.0,0.0
+Place bead on table,0,0,0.0,0.0,0.0
+Place bottle back on shelf,0,0,0.0,0.0,0.0
+Place box on shelf,0,0,0.0,0.0,0.0
+Place button,3,0,0.0,0.0,0.0
+Place button in group,0,0,0.0,0.0,0.0
+Place button in row,0,0,0.0,0.0,0.0
+Place can on shelf,2,1,1.0,0.5,0.6666666666666666
+Place canned food in bin,0,0,0.0,0.0,0.0
+Place canned food in container,0,0,0.0,0.0,0.0
+Place canned food on shelf,1,0,0.0,0.0,0.0
+Place canned good on shelf,0,0,0.0,0.0,0.0
+Place canned goods in container,0,0,0.0,0.0,0.0
+Place canned product on shelf,0,0,0.0,0.0,0.0
+Place cans into box,0,0,0.0,0.0,0.0
+Place cardboard,0,0,0.0,0.0,0.0
+Place cardboard piece,0,0,0.0,0.0,0.0
+Place cardboard piece on stack,0,6,0.0,0.0,0.0
+Place cardboard square,0,0,0.0,0.0,0.0
+Place cardboard square on stack,0,0,0.0,0.0,0.0
+Place cardboard strip,0,0,0.0,0.0,0.0
+Place charger on table,0,0,0.0,0.0,0.0
+Place charging case down,0,0,0.0,0.0,0.0
+Place cloth on floor,1,0,0.0,0.0,0.0
+Place colored tile,0,0,0.0,0.0,0.0
+Place container in bin,0,0,0.0,0.0,0.0
+Place container on floor,0,0,0.0,0.0,0.0
+Place container on shelf,0,0,0.0,0.0,0.0
+Place controller on table,0,0,0.0,0.0,0.0
+Place crate on floor,0,0,0.0,0.0,0.0
+Place device on lap,0,0,0.0,0.0,0.0
+Place down paper pieces,0,0,0.0,0.0,0.0
+Place down paper segment,0,0,0.0,0.0,0.0
+Place down pen,0,0,0.0,0.0,0.0
+Place down pink water bottle,0,0,0.0,0.0,0.0
+Place down ruler and pen,0,0,0.0,0.0,0.0
+Place down scissors,0,0,0.0,0.0,0.0
+Place down strip,0,0,0.0,0.0,0.0
+Place finished star on table,0,0,0.0,0.0,0.0
+Place gift box into bin,0,0,0.0,0.0,0.0
+Place gift box on shelf,0,0,0.0,0.0,0.0
+Place hand on table,3,0,0.0,0.0,0.0
+Place item back,0,0,0.0,0.0,0.0
+Place item back on shelf,0,0,0.0,0.0,0.0
+Place item in bag,0,0,0.0,0.0,0.0
+Place item in container,0,0,0.0,0.0,0.0
+Place item in shopping bag,0,0,0.0,0.0,0.0
+Place item into bag,0,0,0.0,0.0,0.0
+Place item into shopping bag,0,0,0.0,0.0,0.0
+Place item on shelf,2,0,0.0,0.0,0.0
+Place item on table,0,0,0.0,0.0,0.0
+Place items on shelf,0,2,0.0,0.0,0.0
+Place items on table,0,0,0.0,0.0,0.0
+Place items on the shelf,2,0,0.0,0.0,0.0
+Place jar in box,0,0,0.0,0.0,0.0
+Place jar into shelf box,0,0,0.0,0.0,0.0
+Place jar on shelf,0,22,0.0,0.0,0.0
+Place ketchup bottle on shelf,0,0,0.0,0.0,0.0
+Place knife down,0,0,0.0,0.0,0.0
+Place lid back,1,0,0.0,0.0,0.0
+Place marked piece down,2,0,0.0,0.0,0.0
+Place marker down,0,0,0.0,0.0,0.0
+Place material,2,0,0.0,0.0,0.0
+Place oil in container,0,0,0.0,0.0,0.0
+Place paper star,0,40,0.0,0.0,0.0
+Place paper star in row,0,0,0.0,0.0,0.0
+Place pen on cardboard,0,0,0.0,0.0,0.0
+Place pen on table,0,0,0.0,0.0,0.0
+Place phone down,2,0,0.0,0.0,0.0
+Place phone on desk,0,0,0.0,0.0,0.0
+Place phone on shelf,0,0,0.0,0.0,0.0
+Place phone on table,0,0,0.0,0.0,0.0
+Place pickle jar in box,0,0,0.0,0.0,0.0
+Place piece into puzzle,3,0,0.0,0.0,0.0
+Place plush toy into bag,0,0,0.0,0.0,0.0
+Place plush toy on shelf,0,0,0.0,0.0,0.0
+Place product in box,0,0,0.0,0.0,0.0
+Place product on shelf,0,0,0.0,0.0,0.0
+Place puzzle piece,3,0,0.0,0.0,0.0
+Place quilled paper shape,0,0,0.0,0.0,0.0
+Place red button,0,2,0.0,0.0,0.0
+Place ribbon onto project,0,0,0.0,0.0,0.0
+Place ruler on cardboard,0,0,0.0,0.0,0.0
+Place sauce bottle on shelf,0,0,0.0,0.0,0.0
+Place sauce in container,0,0,0.0,0.0,0.0
+Place scissors aside,0,0,0.0,0.0,0.0
+Place scissors down,0,0,0.0,0.0,0.0
+Place scissors on table,0,0,0.0,0.0,0.0
+Place smartphone down,3,0,0.0,0.0,0.0
+Place smartphone on cardboard,0,0,0.0,0.0,0.0
+Place smartphone on desk,0,0,0.0,0.0,0.0
+Place smartphone on stand,1,0,0.0,0.0,0.0
+Place smartphone on table,0,3,0.0,0.0,0.0
+Place snack in box,0,0,0.0,0.0,0.0
+Place snack on shelf,0,0,0.0,0.0,0.0
+Place snack package in box,0,0,0.0,0.0,0.0
+Place snack package on shelf,0,2,0.0,0.0,0.0
+Place snack packages on shelf,0,0,0.0,0.0,0.0
+Place snack pouch in container,0,0,0.0,0.0,0.0
+Place snack pouch on shelf,0,0,0.0,0.0,0.0
+Place spice jar in container,0,0,0.0,0.0,0.0
+Place star,0,0,0.0,0.0,0.0
+Place star in row,0,0,0.0,0.0,0.0
+Place star on table,0,0,0.0,0.0,0.0
+Place stars in container,0,0,0.0,0.0,0.0
+Place stool on floor,0,0,0.0,0.0,0.0
+Place storage container on floor,0,0,0.0,0.0,0.0
+Place strip on table,0,0,0.0,0.0,0.0
+Place supplement bottle in container,0,0,0.0,0.0,0.0
+Place tool on table,0,0,0.0,0.0,0.0
+Place towel,1,0,0.0,0.0,0.0
+Place water bottle on table,0,0,0.0,0.0,0.0
+Place white box on table,0,0,0.0,0.0,0.0
+Placing labeled cardboard square,0,0,0.0,0.0,0.0
+Placing labeled square,0,0,0.0,0.0,0.0
+Placing paper strip,4,0,0.0,0.0,0.0
+Placing pen on table,0,0,0.0,0.0,0.0
+Placing phone down,0,0,0.0,0.0,0.0
+Placing piece on stack,0,0,0.0,0.0,0.0
+Placing stock on shelf,0,0,0.0,0.0,0.0
+Plug cable into portable charger,0,0,0.0,0.0,0.0
+Position cardboard for cutting,0,0,0.0,0.0,0.0
+Position cardboard piece,0,0,0.0,0.0,0.0
+Position cardboard strip,0,0,0.0,0.0,0.0
+Position cardboard tray,0,0,0.0,0.0,0.0
+Position cardboard tube,0,0,0.0,0.0,0.0
+Position container near shelf,0,0,0.0,0.0,0.0
+Position container on shelf,0,0,0.0,0.0,0.0
+Position hands for work,0,0,0.0,0.0,0.0
+Position ribbon piece,0,0,0.0,0.0,0.0
+Position ruler and mark cardboard,0,0,0.0,0.0,0.0
+Position ruler on cardboard,0,0,0.0,0.0,0.0
+Position scissors,0,0,0.0,0.0,0.0
+Position scissors for next cut,0,0,0.0,0.0,0.0
+Position scissors to cut cardboard,0,0,0.0,0.0,0.0
+Position shelving divider,0,0,0.0,0.0,0.0
+Position the ruler,0,0,0.0,0.0,0.0
+Position tray,0,0,0.0,0.0,0.0
+Position utility knife,0,0,0.0,0.0,0.0
+Position utility knife on cardboard,0,0,0.0,0.0,0.0
+Positioning cardboard on workspace,0,0,0.0,0.0,0.0
+Positioning paper strip,0,0,0.0,0.0,0.0
+Positioning puzzle piece,0,0,0.0,0.0,0.0
+Positioning ruler on cardboard,0,0,0.0,0.0,0.0
+Prepare paper strip,0,0,0.0,0.0,0.0
+Prepare to cut cardboard,0,0,0.0,0.0,0.0
+Prepare to draw lines,0,0,0.0,0.0,0.0
+Prepare to pick up item,0,0,0.0,0.0,0.0
+Prepare to place bottle on shelf,0,0,0.0,0.0,0.0
+Prepare to place cardboard,0,0,0.0,0.0,0.0
+Prepare to place item in bag,0,0,0.0,0.0,0.0
+Prepare to place product,0,0,0.0,0.0,0.0
+Prepare to resume cutting,0,0,0.0,0.0,0.0
+Prepare to sort beads,0,0,0.0,0.0,0.0
+Preparing to craft,2,0,0.0,0.0,0.0
+Press fold,0,0,0.0,0.0,0.0
+Pull back hand,0,0,0.0,0.0,0.0
+Pull paper strip,0,0,0.0,0.0,0.0
+Push vacuum cleaner,0,0,0.0,0.0,0.0
+Put down phone,0,0,0.0,0.0,0.0
+Put down scissors,0,0,0.0,0.0,0.0
+Put down smartphone,3,0,0.0,0.0,0.0
+Put down utility knife,0,0,0.0,0.0,0.0
+Put down water bottle,0,0,0.0,0.0,0.0
+Putting away smartphone,0,0,0.0,0.0,0.0
+Reach and sort buttons,0,0,0.0,0.0,0.0
+Reach for Mahjong tiles,0,0,0.0,0.0,0.0
+Reach for additional items,0,0,0.0,0.0,0.0
+Reach for and examine canned goods,0,0,0.0,0.0,0.0
+Reach for and pick up smartphone,0,3,0.0,0.0,0.0
+Reach for another container,0,0,0.0,0.0,0.0
+Reach for another item,1,0,0.0,0.0,0.0
+Reach for beads,0,0,0.0,0.0,0.0
+Reach for black button,0,0,0.0,0.0,0.0
+Reach for button,0,0,0.0,0.0,0.0
+Reach for can,0,0,0.0,0.0,0.0
+Reach for canned food,0,0,0.0,0.0,0.0
+Reach for canned goods,0,0,0.0,0.0,0.0
+Reach for cardboard box,0,0,0.0,0.0,0.0
+Reach for cardboard piece,0,3,0.0,0.0,0.0
+Reach for cleaning supplies,1,0,0.0,0.0,0.0
+Reach for container,0,0,0.0,0.0,0.0
+Reach for craft items,3,0,0.0,0.0,0.0
+Reach for empty shelf space,0,0,0.0,0.0,0.0
+Reach for item,0,0,0.0,0.0,0.0
+Reach for item in box,0,0,0.0,0.0,0.0
+Reach for item on shelf,0,0,0.0,0.0,0.0
+Reach for items,0,0,0.0,0.0,0.0
+Reach for items in box,0,0,0.0,0.0,0.0
+Reach for more pieces,0,0,0.0,0.0,0.0
+Reach for next can,1,0,0.0,0.0,0.0
+Reach for next canned food,1,0,0.0,0.0,0.0
+Reach for next canned food item,0,0,0.0,0.0,0.0
+Reach for next canned product,0,0,0.0,0.0,0.0
+Reach for next item,1,0,0.0,0.0,0.0
+Reach for next piece,0,0,0.0,0.0,0.0
+Reach for next product,0,0,0.0,0.0,0.0
+Reach for object,0,0,0.0,0.0,0.0
+Reach for paper strip,0,6,0.0,0.0,0.0
+Reach for paper strips,0,0,0.0,0.0,0.0
+Reach for phone,0,0,0.0,0.0,0.0
+Reach for product,0,0,0.0,0.0,0.0
+Reach for product labels,0,0,0.0,0.0,0.0
+Reach for product on shelf,0,0,0.0,0.0,0.0
+Reach for puzzle piece,2,0,0.0,0.0,0.0
+Reach for retail item,0,0,0.0,0.0,0.0
+Reach for shelf,0,0,0.0,0.0,0.0
+Reach for shelving divider,0,0,0.0,0.0,0.0
+Reach for snack package,0,1,0.0,0.0,0.0
+Reach for snack pouch,0,0,0.0,0.0,0.0
+Reach for star,0,0,0.0,0.0,0.0
+Reach for stars,0,0,0.0,0.0,0.0
+Reach for utility knife,0,0,0.0,0.0,0.0
+Reach for water bottle,0,0,0.0,0.0,0.0
+Reach for wire hangers,1,0,0.0,0.0,0.0
+Reach into bag,0,0,0.0,0.0,0.0
+Reach into box,1,0,0.0,0.0,0.0
+Reach towards shelf,0,0,0.0,0.0,0.0
+Reaching for beads,0,0,0.0,0.0,0.0
+Realign Mahjong tiles,0,0,0.0,0.0,0.0
+Rearrange Mahjong tile,0,0,0.0,0.0,0.0
+Rearrange Mahjong tiles,0,0,0.0,0.0,0.0
+Rearrange shelf item,0,0,0.0,0.0,0.0
+Record count,2,0,0.0,0.0,0.0
+Record count on notepad,0,0,0.0,0.0,0.0
+Record star count,0,0,0.0,0.0,0.0
+Record star count on paper,0,0,0.0,0.0,0.0
+Release and prepare new strip,0,0,0.0,0.0,0.0
+Release bottle,0,0,0.0,0.0,0.0
+Release cardboard,0,0,0.0,0.0,0.0
+Release cardboard piece,0,0,0.0,0.0,0.0
+Release cardboard piece and gesture,2,0,0.0,0.0,0.0
+Release cardboard shape,0,0,0.0,0.0,0.0
+Release container,0,0,0.0,0.0,0.0
+Release folded paper,0,0,0.0,0.0,0.0
+Release food item,0,0,0.0,0.0,0.0
+Release hook,1,0,0.0,0.0,0.0
+Release label,0,0,0.0,0.0,0.0
+Release lantern,1,0,0.0,0.0,0.0
+Release paper,0,0,0.0,0.0,0.0
+Release paper coil,0,0,0.0,0.0,0.0
+Release paper star,0,0,0.0,0.0,0.0
+Release paper strip,4,0,0.0,0.0,0.0
+Release pickle jar,0,0,0.0,0.0,0.0
+Release product on shelf,0,0,0.0,0.0,0.0
+Release puzzle piece,2,0,0.0,0.0,0.0
+Release quilling strip,0,0,0.0,0.0,0.0
+Release scissors,3,0,0.0,0.0,0.0
+Release smartphone,3,0,0.0,0.0,0.0
+Remove cardboard flap,0,0,0.0,0.0,0.0
+Remove cardboard pattern,0,0,0.0,0.0,0.0
+Remove cardboard pattern piece,0,0,0.0,0.0,0.0
+Remove cleaning bottle,1,0,0.0,0.0,0.0
+Remove item from bag,0,0,0.0,0.0,0.0
+Remove item from shelf,0,0,0.0,0.0,0.0
+Remove lid from container,0,0,0.0,0.0,0.0
+Remove paper lantern part from packaging,1,0,0.0,0.0,0.0
+Remove plastic container from shelf,0,0,0.0,0.0,0.0
+Remove plastic container from storage box,0,0,0.0,0.0,0.0
+Remove plastic packaging,1,0,0.0,0.0,0.0
+Remove ruler,0,0,0.0,0.0,0.0
+Remove ruler and marker,0,0,0.0,0.0,0.0
+Remove shelf label,0,0,0.0,0.0,0.0
+Remove storage bin from shelf,0,0,0.0,0.0,0.0
+Reorganize bin contents,0,0,0.0,0.0,0.0
+Reposition and cut,0,0,0.0,0.0,0.0
+Reposition cardboard for cutting,0,0,0.0,0.0,0.0
+Reposition hand,3,0,0.0,0.0,0.0
+Reposition hands,0,0,0.0,0.0,0.0
+Reposition hands and ruler,0,0,0.0,0.0,0.0
+Reposition marker,0,0,0.0,0.0,0.0
+Reposition newspaper,0,0,0.0,0.0,0.0
+Reposition pen and prepare for next line,0,0,0.0,0.0,0.0
+Reposition ruler,0,0,0.0,0.0,0.0
+Reposition ruler and pen,0,0,0.0,0.0,0.0
+Reposition scissors,0,0,0.0,0.0,0.0
+Reposition sign and organize beads,0,0,0.0,0.0,0.0
+Reposition tools,0,0,0.0,0.0,0.0
+Reposition utility knife,0,0,0.0,0.0,0.0
+Repositioning ruler,0,0,0.0,0.0,0.0
+Repositioning ruler and cardboard,0,0,0.0,0.0,0.0
+Resume counting stars,0,0,0.0,0.0,0.0
+Resume observation,2,0,0.0,0.0,0.0
+Resume sorting blue beads,0,0,0.0,0.0,0.0
+Resume writing on paper,0,0,0.0,0.0,0.0
+Retract camera/reposition view,0,0,0.0,0.0,0.0
+Retract hand,0,0,0.0,0.0,0.0
+Retract hand from bag,0,0,0.0,0.0,0.0
+Retrieve another container,0,0,0.0,0.0,0.0
+Retrieve canned food from box,1,0,0.0,0.0,0.0
+Retrieve hand to table,0,0,0.0,0.0,0.0
+Retrieve items from bag,0,0,0.0,0.0,0.0
+Retrieve next canned food item,1,0,0.0,0.0,0.0
+Retrieve paper strip,0,1,0.0,0.0,0.0
+Retrieve paper strips,0,0,0.0,0.0,0.0
+Retrieve snack from container,0,0,0.0,0.0,0.0
+Retrieve star,0,0,0.0,0.0,0.0
+Retrieving more beads,2,0,0.0,0.0,0.0
+Return to sorting,0,0,0.0,0.0,0.0
+Reviewing count record,0,0,0.0,0.0,0.0
+Rinse cloth in sink,1,0,0.0,0.0,0.0
+Roll quilling paper,0,0,0.0,0.0,0.0
+Rolling paper strip,0,0,0.0,0.0,0.0
+Rub hands together,0,0,0.0,0.0,0.0
+Scan for next piece,0,0,0.0,0.0,0.0
+Scan supermarket shelves,0,0,0.0,0.0,0.0
+Score cardboard,0,0,0.0,0.0,0.0
+Scroll on smartphone,0,0,0.0,0.0,0.0
+Scroll smartphone screen,3,0,0.0,0.0,0.0
+Scroll through photo gallery,0,0,0.0,0.0,0.0
+Scrolling and viewing content on phone,0,0,0.0,0.0,0.0
+Scrolling or navigating on phone,0,0,0.0,0.0,0.0
+Search for puzzle piece,2,0,0.0,0.0,0.0
+Secure paper edges with adhesive,4,0,0.0,0.0,0.0
+Secure ribbon with needle,0,0,0.0,0.0,0.0
+Securing paper structure,4,0,0.0,0.0,0.0
+Select a bottle,0,0,0.0,0.0,0.0
+Select and pick up a canned item,0,0,0.0,0.0,0.0
+Select another item,0,0,0.0,0.0,0.0
+Select paper strip,0,0,0.0,0.0,0.0
+Select product from box,0,0,0.0,0.0,0.0
+Selecting new paper strip,0,0,0.0,0.0,0.0
+Separate cardboard piece,0,0,0.0,0.0,0.0
+Set down scissors and pick up power bank,0,0,0.0,0.0,0.0
+Set down utility knife,0,0,0.0,0.0,0.0
+Slide utility knife along ruler,0,0,0.0,0.0,0.0
+Sort Mahjong tiles,0,0,0.0,0.0,0.0
+Sort and adjust button line,3,0,0.0,0.0,0.0
+Sort and arrange buttons,3,0,0.0,0.0,0.0
+Sort and arrange cardboard pieces,0,0,0.0,0.0,0.0
+Sort and count beads,2,0,0.0,0.0,0.0
+Sort and place buttons,3,0,0.0,0.0,0.0
+Sort and place paper star,0,0,0.0,0.0,0.0
+Sort and stack cardboard pieces,0,0,0.0,0.0,0.0
+Sort beads,2,0,0.0,0.0,0.0
+Sort beads and write count,1,0,0.0,0.0,0.0
+Sort beads by color,0,39,0.0,0.0,0.0
+Sort beads by hand,0,0,0.0,0.0,0.0
+Sort beads on table,0,0,0.0,0.0,0.0
+Sort beads on the table,0,6,0.0,0.0,0.0
+Sort blue beads,0,0,0.0,0.0,0.0
+Sort blue star-shaped pieces,0,0,0.0,0.0,0.0
+Sort button,3,0,0.0,0.0,0.0
+Sort button by color,0,0,0.0,0.0,0.0
+Sort buttons,3,0,0.0,0.0,0.0
+Sort buttons by color,0,0,0.0,0.0,0.0
+Sort canned goods in tray,0,0,0.0,0.0,0.0
+Sort colored tiles,0,0,0.0,0.0,0.0
+Sort colorful pieces,0,0,0.0,0.0,0.0
+Sort craft items,3,0,0.0,0.0,0.0
+Sort cut cardboard,0,0,0.0,0.0,0.0
+Sort light blue origami stars,0,0,0.0,0.0,0.0
+Sort orange button,0,0,0.0,0.0,0.0
+Sort orange buttons,0,0,0.0,0.0,0.0
+Sort origami stars,0,0,0.0,0.0,0.0
+Sort origami stars by color,0,0,0.0,0.0,0.0
+Sort paper star,0,0,0.0,0.0,0.0
+Sort paper stars,0,0,0.0,0.0,0.0
+Sort plastic pieces,0,0,0.0,0.0,0.0
+Sort purple beads,0,0,0.0,0.0,0.0
+Sort purple star-shaped objects,0,0,0.0,0.0,0.0
+Sort puzzle pieces,2,0,0.0,0.0,0.0
+Sort quilled paper pieces,0,0,0.0,0.0,0.0
+Sort small colorful pieces,0,0,0.0,0.0,0.0
+Sort small craft pieces,3,0,0.0,0.0,0.0
+Sort small objects,0,0,0.0,0.0,0.0
+Sort small plastic pieces,0,0,0.0,0.0,0.0
+Sort star-shaped beads,1,0,0.0,0.0,0.0
+Sort star-shaped objects,0,0,0.0,0.0,0.0
+Sort star-shaped objects by color,0,0,0.0,0.0,0.0
+Sort tiles,0,0,0.0,0.0,0.0
+Sort tiles by color,0,0,0.0,0.0,0.0
+Sort yellow star-shaped objects,0,1,0.0,0.0,0.0
+Sorting buttons,0,0,0.0,0.0,0.0
+Sorting colorful paper pieces,0,0,0.0,0.0,0.0
+Sorting paper stars,0,0,0.0,0.0,0.0
+Stabilize cardboard,0,0,0.0,0.0,0.0
+Stabilize ruler,0,0,0.0,0.0,0.0
+Stack cardboard pieces,0,0,0.0,0.0,0.0
+Stack cardboard square,0,0,0.0,0.0,0.0
+Stack cardboard squares,0,1,0.0,0.0,0.0
+Stacking cardboard pieces,0,0,0.0,0.0,0.0
+Stacking cardboard square,0,0,0.0,0.0,0.0
+Stacking cardboard squares,0,0,0.0,0.0,0.0
+Stand up and walk away,0,0,0.0,0.0,0.0
+Start cutting,2,0,0.0,0.0,0.0
+Start folding paper strip,0,0,0.0,0.0,0.0
+Starting to label next square,0,0,0.0,0.0,0.0
+Stir contents,1,0,0.0,0.0,0.0
+Stop measuring and put down tools,0,0,0.0,0.0,0.0
+Stop sorting stars,0,0,0.0,0.0,0.0
+Sweep debris,0,0,0.0,0.0,0.0
+Sweep floor debris,0,0,0.0,0.0,0.0
+Switch to scissors,0,0,0.0,0.0,0.0
+Switching marker,0,0,0.0,0.0,0.0
+Tap smartphone screen,0,0,0.0,0.0,0.0
+Tapping on smartphone screen,0,0,0.0,0.0,0.0
+Tapping smartphone screen,0,0,0.0,0.0,0.0
+Tear newspaper,0,0,0.0,0.0,0.0
+Tear off cardboard segment,0,0,0.0,0.0,0.0
+Touch canned goods,0,0,0.0,0.0,0.0
+Touch pieces in box,0,0,0.0,0.0,0.0
+Touch shelf edge,0,0,0.0,0.0,0.0
+Trace pattern on cardboard,0,0,0.0,0.0,0.0
+Transition to cutting,0,0,0.0,0.0,0.0
+Transition to standing position,0,0,0.0,0.0,0.0
+Trim cardboard,0,0,0.0,0.0,0.0
+Trim cardboard piece,0,0,0.0,0.0,0.0
+Type on smartphone,0,0,0.0,0.0,0.0
+Typing message on smartphone,0,0,0.0,0.0,0.0
+Typing on phone,0,0,0.0,0.0,0.0
+Typing on smartphone,0,0,0.0,0.0,0.0
+Update paper record,0,0,0.0,0.0,0.0
+Use phone,2,0,0.0,0.0,0.0
+Use phone to check instructions,0,0,0.0,0.0,0.0
+Use phone to check stock,0,0,0.0,0.0,0.0
+Use phone while crafting,1,0,0.0,0.0,0.0
+Use smartphone,6,0,0.0,0.0,0.0
+Vacuum edge of carpet,0,0,0.0,0.0,0.0
+Vacuum the carpet,0,0,0.0,0.0,0.0
+Vacuuming along the wall edge,0,0,0.0,0.0,0.0
+Vacuuming carpet corner,0,0,0.0,0.0,0.0
+Vacuuming carpet edge,0,0,0.0,0.0,0.0
+Vacuuming the carpet edge,0,0,0.0,0.0,0.0
+View content on smartphone,0,0,0.0,0.0,0.0
+View phone screen,0,0,0.0,0.0,0.0
+Viewing phone screen,0,0,0.0,0.0,0.0
+Walk across office,0,0,0.0,0.0,0.0
+Walk across room,0,0,0.0,0.0,0.0
+Walk across the room,0,0,0.0,0.0,0.0
+Walk away,0,0,0.0,0.0,0.0
+Walk in hallway,0,0,0.0,0.0,0.0
+Walk through corridor,0,0,0.0,0.0,0.0
+Walk through doorway,0,0,0.0,0.0,0.0
+Walk through hallway,0,0,0.0,0.0,0.0
+Walk through office,0,0,0.0,0.0,0.0
+Walk through store,0,0,0.0,0.0,0.0
+Walk through workspace,0,17,0.0,0.0,0.0
+Walk towards aisle,0,0,0.0,0.0,0.0
+Walk towards desk,0,0,0.0,0.0,0.0
+Walk towards next aisle,0,0,0.0,0.0,0.0
+Walk towards other aisles,1,0,0.0,0.0,0.0
+Walk towards room,0,0,0.0,0.0,0.0
+Walk towards shelf,0,0,0.0,0.0,0.0
+Walk towards shelves,1,0,0.0,0.0,0.0
+Walk towards storage area,0,0,0.0,0.0,0.0
+Walk towards table,0,0,0.0,0.0,0.0
+Walk towards workspace,0,0,0.0,0.0,0.0
+Walk with cardboard,0,0,0.0,0.0,0.0
+Walk with cardboard cutout,0,0,0.0,0.0,0.0
+Walk with marker,0,0,0.0,0.0,0.0
+Walk with shopping bag,0,0,0.0,0.0,0.0
+Walking across the room,1,0,0.0,0.0,0.0
+Walking along the aisle,0,0,0.0,0.0,0.0
+Walking in the hallway,3,0,0.0,0.0,0.0
+Walking in the workspace,0,0,0.0,0.0,0.0
+Walking through classroom,0,0,0.0,0.0,0.0
+Walking through office hallway,0,0,0.0,0.0,0.0
+Walking through the office,0,0,0.0,0.0,0.0
+Walking to sink,0,0,0.0,0.0,0.0
+Walking towards door,1,0,0.0,0.0,0.0
+Walking towards workstation,0,0,0.0,0.0,0.0
+Washing hands,0,0,0.0,0.0,0.0
+Washing hands in sink,1,0,0.0,0.0,0.0
+Wipe down shelf,0,0,0.0,0.0,0.0
+Wipe electronic item,0,0,0.0,0.0,0.0
+Wipe food product,0,0,0.0,0.0,0.0
+Wipe grocery shelf,0,0,0.0,0.0,0.0
+Wipe item,0,0,0.0,0.0,0.0
+Wipe jar,0,0,0.0,0.0,0.0
+Wipe ketchup bottle,0,0,0.0,0.0,0.0
+Wipe kitchen counter,1,0,0.0,0.0,0.0
+Wipe product,0,0,0.0,0.0,0.0
+Wipe retail item,0,0,0.0,0.0,0.0
+Wipe shelf,0,0,0.0,0.0,0.0
+Wipe shelf surface,0,0,0.0,0.0,0.0
+Wipe the plastic jar,0,0,0.0,0.0,0.0
+Wipe the product jar,0,0,0.0,0.0,0.0
+Wipe the shelf,0,0,0.0,0.0,0.0
+Wiping countertop,1,0,0.0,0.0,0.0
+Withdraw hand,0,0,0.0,0.0,0.0
+Write count on paper,1,0,0.0,0.0,0.0
+Write on notepad,0,0,0.0,0.0,0.0
+Write on paper,1,24,0.041666666666666664,1.0,0.07999999999999999
+Write on paper record,0,0,0.0,0.0,0.0
+Writing on notepad,0,0,0.0,0.0,0.0
+fold purple ribbon,0,0,0.0,0.0,0.0
+sort craft materials,3,0,0.0,0.0,0.0
+"{""action"": ""Pour liquid into bowl"", ""contact"": ""yes"", ""evidence_window"": {""end_frame"": 19, ""start_frame"": 0}, ""next_action"": ""Pour liquid into bowl"", ""objects"": [""bottle"", ""bowl"", ""kitchen counter"", ""kitchen sink"", ""kitchen tap"", ""kitchen cabinet"", ""kitchen shelf"", ""kitchen window"", ""kitchen utensils"", ""kitchen appliances"", ""kitchen utensil rack"", ""kitchen towel"", ""kitchen cabinet door"", ""kitchen cabinet drawer"", ""kitchen cabinet shelf"", ""kitchen cabinet handle"", ""kitchen cabinet knob"", ""kitchen cabinet latch"", ""kitchen cabinet hinge"", ""kitchen cabinet panel"", ""kitchen cabinet frame"", ""kitchen cabinet base"", ""kitchen cabinet top"", ""kitchen cabinet side"", ""kitchen cabinet back"", ""kitchen cabinet door panel"", ""kitchen cabinet door frame"", ""kitchen cabinet door handle"", ""kitchen cabinet door knob"", ""kitchen cabinet door latch"", ""kitchen cabinet door hinge"", ""kitchen cabinet door panel frame"", ""kitchen cabinet door panel handle"", ""kitchen cabinet door panel knob"", ""kitchen cabinet door panel latch"", ""kitchen cabinet door panel hinge"", ""kitchen cabinet door panel frame handle"", ""kitchen cabinet door panel frame knob"", ""kitchen cabinet door panel frame latch"", ""kitchen cabinet door panel frame hinge"", ""kitchen cabinet door panel frame panel"", ""kitchen cabinet door panel frame panel handle"", ""kitchen cabinet door panel frame panel knob"", ""kitchen cabinet door panel frame panel latch"", ""kitchen cabinet door panel frame panel hinge"", ""kitchen cabinet door panel frame panel frame"", ""kitchen cabinet door panel frame panel frame handle"", ""kitchen cabinet door panel frame panel frame knob"", ""kitchen cabinet door panel frame panel frame latch"", ""kitchen cabinet door panel frame panel frame hinge"", ""kitchen cabinet door panel frame panel frame panel"", ""kitchen cabinet door panel frame panel frame panel handle"", ""kitchen cabinet door panel frame panel frame panel knob"", ""kitchen cabinet door panel frame panel frame panel latch"", ""kitchen cabinet door panel frame panel frame panel hinge"", ""kitchen cabinet door panel frame panel frame panel frame"", ""kitchen cabinet door panel frame panel frame panel frame handle"", ""kitchen cabinet door panel frame panel frame panel frame knob"", ""kitchen cabinet door panel frame panel frame panel frame latch"", ""kitchen cabinet door panel frame panel frame panel frame hinge"", ""kitchen cabinet door panel frame panel frame panel frame panel"", ""kitchen cabinet door panel frame panel frame panel frame panel handle"", ""kitchen cabinet door panel frame panel frame panel frame panel knob"", ""kitchen cabinet door panel frame panel frame panel frame panel latch"", ""kitchen cabinet door panel frame panel frame panel frame panel hinge"", ""kitchen cabinet door panel frame panel frame panel frame panel frame"", ""kitchen cabinet door panel frame panel frame panel frame panel frame handle"", ""kitchen cabinet door panel frame panel frame panel frame panel frame knob"", ""kitchen cabinet door panel frame panel frame panel frame panel frame latch"", ""kitchen cabinet door panel frame panel frame panel frame panel frame hinge"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel handle"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel knob"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel latch"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel hinge"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame handle"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame knob"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame latch"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame hinge"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel handle"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel knob"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel latch"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel hinge"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame handle"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame knob"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame latch"", ""kitchen cabinet door panel frame panel frame panel frame panel frame panel frame panel frame hinge"", ""kitchen cabinet",0,1,0.0,0.0,0.0
+Close the door,0,2,0.0,0.0,0.0
+"{""action"": ""Close door"", ""contact"": ""yes"", ""evidence_window"": {""end_frame"": 2539, ""start_frame"": 2520}, ""next_action"": ""Close door"", ""objects"": [""door"", ""door handle"", ""plastic bag"", ""blue bucket"", ""red bucket"", ""blue container"", ""red container"", ""white container"", ""white box"", ""white bag"", ""white cloth"", ""white plastic bag"", ""white plastic container"", ""white cardboard box"", ""white paper"", ""white paper bag"", ""white paper sheet"", ""white paper strip"", ""white plastic sheet"", ""white plastic bag with red label"", ""white plastic container with red label"", ""white cardboard tray"", ""white cardboard piece"", ""white cardboard square"", ""white cardboard shape"", ""white paper lantern"", ""white paper lantern component"", ""white paper star"", ""white paper star shape"", ""white paper lantern piece"", ""white paper lantern part"", ""white paper decoration"", ""white paper cutout"", ""white paper shape"", ""white paper segment"", ""white paper coil"", ""white paper strip with red label"", ""white paper strip with blue label"", ""white paper strip with yellow label"", ""white paper strip with green label"", ""white paper strip with purple label"", ""white paper strip with orange label"", ""white paper strip with black label"", ""white paper strip with pink label"", ""white paper strip with brown label"", ""white paper strip with grey label"", ""white paper strip with turquoise label"", ""white paper strip with maroon label"", ""white paper strip with lavender label"", ""white paper strip with olive green label"", ""white paper strip with coral label"", ""white paper strip with navy blue label"", ""white paper strip with light blue label"", ""white paper strip with dark blue label"", ""white paper strip with light green label"", ""white paper strip with dark green label"", ""white paper strip with yellow-green label"", ""white paper strip with lime green label"", ""white paper strip with forest green label"", ""white paper strip with emerald green label"", ""white paper strip with teal label"", ""white paper strip with turquoise blue label"", ""white paper strip with sky blue label"", ""white paper strip with baby blue label"", ""white paper strip with periwinkle label"", ""white paper strip with lavender blue label"", ""white paper strip with violet label"", ""white paper strip with purple blue label"", ""white paper strip with indigo label"", ""white paper strip with dark purple label"", ""white paper strip with magenta label"", ""white paper strip with pink purple label"", ""white paper strip with rose pink label"", ""white paper strip with coral pink label"", ""white paper strip with salmon pink label"", ""white paper strip with peach label"", ""white paper strip with apricot label"", ""white paper strip with orange yellow label"", ""white paper strip with golden yellow label"", ""white paper strip with lemon yellow label"", ""white paper strip with yellow green label"", ""white paper strip with chartreuse label"", ""white paper strip with olive label"", ""white paper strip with khaki label"", ""white paper strip with tan label"", ""white paper strip with beige label"", ""white paper strip with cream label"", ""white paper strip with off white label"", ""white paper strip with light grey label"", ""white paper strip with grey label"", ""white paper strip with dark grey label"", ""white paper strip with black label"", ""white paper strip with brown label"", ""white paper strip with dark brown label"", ""white paper strip with chocolate label"", ""white paper strip with coffee label"", ""white paper strip with taupe label"", ""white paper strip with maroon label"", ""white paper strip with burgundy label"", ""white paper strip with red label"", ""white paper strip with dark red label"", ""white paper strip with crimson label"", ""white paper strip with cherry red label"", ""white paper strip with pink label"", ""white paper strip with light pink label"", ""white paper strip with pale pink label"", ""white paper strip with rose label"", ""white paper strip with blush label"", ""white paper strip with coral label"", ""white paper strip with salmon label"", ""white paper strip with peach label"", ""white paper strip with apricot label"", ""white paper strip with orange label"", ""white paper strip with light orange label"", ""white paper strip with pale orange label"", ""white paper strip with tangerine label"", ""white paper strip with mango label"", ""white paper strip with yellow label"", ""white paper strip with light yellow label"", ""white paper strip with pale yellow label"", ""white paper strip with lemon label"", ""white paper strip with lime label"", ""white paper strip with green label"", ""white paper strip with light green label"", ""white paper strip with pale green label"", ""white paper strip with mint label"", ""white paper strip with sage label"", ""white paper strip with olive label"", ""white paper strip with khaki",0,1,0.0,0.0,0.0
+"{""action"": ""Close the door"", ""contact"": ""yes"", ""evidence_window"": {""end_frame"": 2559, ""start_frame"": 2540}, ""next_action"": ""Close the door"", ""objects"": [""door"", ""door handle"", ""door knob"", ""door frame"", ""door hinge"", ""door latch"", ""door lock"", ""door panel"", ""door post"", ""door stop"", ""door weather stripping"", ""doorway"", ""garage door"", ""garage door opener"", ""garage door track"", ""garage door wheel"", ""garage door window"", ""garage door opener button"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener sensor"", ""garage door opener control panel"", ""garage door opener light"", ""garage door opener emergency release"", ""garage door opener remote control"", ""garage door opener wall switch"", ""garage door opener keypad"", ""garage door opener smartphone app"", ""garage door opener voice control"", ""garage door opener timer"", ""garage door opener remote control"", ""garage door opener wall switch"", ""garage door opener keypad"", ""garage door opener smartphone app"", ""garage door opener voice control"", ""garage door opener timer"", ""garage door opener emergency release"", ""garage door opener sensor"", ""garage door opener control panel"", ""garage door opener light"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener button"", ""garage door window"", ""garage door track"", ""garage door wheel"", ""garage door"", ""garage door opener"", ""garage door opener button"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener sensor"", ""garage door opener control panel"", ""garage door opener light"", ""garage door opener emergency release"", ""garage door opener remote control"", ""garage door opener wall switch"", ""garage door opener keypad"", ""garage door opener smartphone app"", ""garage door opener voice control"", ""garage door opener timer"", ""garage door opener remote control"", ""garage door opener wall switch"", ""garage door opener keypad"", ""garage door opener smartphone app"", ""garage door opener voice control"", ""garage door opener timer"", ""garage door opener emergency release"", ""garage door opener sensor"", ""garage door opener control panel"", ""garage door opener light"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener button"", ""garage door window"", ""garage door track"", ""garage door wheel"", ""garage door"", ""garage door opener"", ""garage door opener button"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener sensor"", ""garage door opener control panel"", ""garage door opener light"", ""garage door opener emergency release"", ""garage door opener remote control"", ""garage door opener wall switch"", ""garage door opener keypad"", ""garage door opener smartphone app"", ""garage door opener voice control"", ""garage door opener timer"", ""garage door opener remote control"", ""garage door opener wall switch"", ""garage door opener keypad"", ""garage door opener smartphone app"", ""garage door opener voice control"", ""garage door opener timer"", ""garage door opener emergency release"", ""garage door opener sensor"", ""garage door opener control panel"", ""garage door opener light"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener button"", ""garage door window"", ""garage door track"", ""garage door wheel"", ""garage door"", ""garage door opener"", ""garage door opener button"", ""garage door opener remote"", ""garage door opener receiver"", ""garage door opener motor"", ""garage door opener chain"", ""garage door opener belt"", ""garage door opener rail"", ""garage door opener sensor"", ""garage",0,1,0.0,0.0,0.0
+"{""action"": ""Close cabinet door"", ""contact"": ""yes"", ""evidence_window"": {""end_frame"": 3419, ""start_frame"": 3400}, ""next_action"": ""Close cabinet door"", ""objects"": [""cabinet"", ""cabinet door"", ""cabinet handle"", ""cabinet shelf"", ""cabinet drawer"", ""cabinet drawer handle"", ""cabinet drawer knob"", ""cabinet drawer pull"", ""cabinet drawer slide"", ""cabinet drawer slide rail"", ""cabinet drawer slide track"", ""cabinet drawer slide roller"", ""cabinet drawer slide bracket"", ""cabinet drawer slide support"", ""cabinet drawer slide guide"", ""cabinet drawer slide stop"", ""cabinet drawer slide clip"", ""cabinet drawer slide clip holder"", ""cabinet drawer slide clip bracket"", ""cabinet drawer slide clip support"", ""cabinet drawer slide clip guide"", ""cabinet drawer slide clip stop"", ""cabinet drawer slide clip holder bracket"", ""cabinet drawer slide clip holder support"", ""cabinet drawer slide clip holder guide"", ""cabinet drawer slide clip holder stop"", ""cabinet drawer slide clip holder clip"", ""cabinet drawer slide clip holder clip holder"", ""cabinet drawer slide clip holder clip bracket"", ""cabinet drawer slide clip holder clip support"", ""cabinet drawer slide clip holder clip guide"", ""cabinet drawer slide clip holder clip stop"", ""cabinet drawer slide clip holder clip holder bracket"", ""cabinet drawer slide clip holder clip holder support"", ""cabinet drawer slide clip holder clip holder guide"", ""cabinet drawer slide clip holder clip holder stop"", ""cabinet drawer slide clip holder clip holder clip"", ""cabinet drawer slide clip holder clip holder clip holder"", ""cabinet drawer slide clip holder clip holder clip bracket"", ""cabinet drawer slide clip holder clip holder clip support"", ""cabinet drawer slide clip holder clip holder clip guide"", ""cabinet drawer slide clip holder clip holder clip stop"", ""cabinet drawer slide clip holder clip holder clip holder bracket"", ""cabinet drawer slide clip holder clip holder clip holder support"", ""cabinet drawer slide clip holder clip holder clip holder guide"", ""cabinet drawer slide clip holder clip holder clip holder stop"", ""cabinet drawer slide clip holder clip holder clip holder clip"", ""cabinet drawer slide clip holder clip holder clip holder clip holder"", ""cabinet drawer slide clip holder clip holder clip holder clip bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip support"", ""cabinet drawer slide clip holder clip holder clip holder clip guide"", ""cabinet drawer slide clip holder clip holder clip holder clip stop"", ""cabinet drawer slide clip holder clip holder clip holder clip holder bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip holder support"", ""cabinet drawer slide clip holder clip holder clip holder clip holder guide"", ""cabinet drawer slide clip holder clip holder clip holder clip holder stop"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip support"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip guide"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip stop"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder support"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder guide"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder stop"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip support"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip guide"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip stop"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder support"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder guide"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder stop"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip holder"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip bracket"", ""cabinet drawer slide clip holder clip holder clip holder clip holder clip holder clip holder clip support",0,1,0.0,0.0,0.0
+Close cabinet,0,1,0.0,0.0,0.0
+Close cabinet door,0,1,0.0,0.0,0.0
+Wipe hands,0,1,0.0,0.0,0.0
+Hold paper star,0,8,0.0,0.0,0.0
+Place pot on the counter,0,1,0.0,0.0,0.0
+Rinse pot,0,1,0.0,0.0,0.0
+Place marker on table,0,2,0.0,0.0,0.0
+Carry snack package,0,1,0.0,0.0,0.0
+Place paper strip on table,0,12,0.0,0.0,0.0
+Reach for jar,0,4,0.0,0.0,0.0
+Hold jar,0,1,0.0,0.0,0.0
+Place beads on table,0,10,0.0,0.0,0.0
+"{""action"": ""Hold paper star"", ""contact"": ""yes"", ""evidence_window"": {""end_frame"": 1959, ""start_frame"": 1940}, ""next_action"": ""Hold paper star"", ""objects"": [""paper star"", ""table"", ""scissors"", ""phone"", ""mouse"", ""beads"", ""pen"", ""light blue paper"", ""yellow paper"", ""purple paper"", ""cardboard"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard square"", ""cardboard tray"", ""cardboard tube"", ""cardboard strip"", ""utility knife"", ""ruler"", ""marker"", ""jar"", ""canned food"", ""tin can"", ""plastic container"", ""plastic packaging"", ""box"", ""shopping bag"", ""gift box"", ""product"", ""product box"", ""product package"", ""supplement bottle"", ""bottle"", ""sauce bottle"", ""pickle jar"", ""water bottle"", ""pink water bottle"", ""cleaning bottle"", ""earbud case"", ""charging case"", ""portable charger"", ""power bank"", ""charger"", ""charging cable"", ""power adapter"", ""smartphone"", ""smartphone box"", ""computer mouse"", ""pen and paper"", ""paper"", ""paper pieces"", ""paper segments"", ""paper lantern"", ""paper lantern component"", ""blue product box"", ""blue strip"", ""light blue strip"", ""yellow item"", ""yellow paper strip"", ""purple paper strip"", ""adhesive tape"", ""ribbon"", ""ribbon piece"", ""plastic strip"", ""foam piece"", ""foam strips"", ""cardboard pattern"", ""cardboard pattern piece"", ""stapler"", ""cleaning cloth"", ""dustpan"", ""box of beads"", ""container"", ""container of canned food"", ""small cardboard piece"", ""small piece of material"", ""small object"", ""small product bag"", ""small white box"", ""bean"", ""bean pile"", ""button"", ""black button"", ""orange button"", ""red button"", ""small button"", ""button cluster"", ""button row"", ""star"", ""star bead"", ""star-shaped bead"", ""purple bead"", ""light blue origami star"", ""yellow star-shaped object"", ""purple star-shaped object"", ""colored tile"", ""colored pieces"", ""jewel"", ""jewel piece"", ""Mahjong tile"", ""Mahjong tiles"", ""puzzle piece"", ""quilled paper"", ""quilled paper piece"", ""quilled paper shape"", ""quilling paper"", ""quilling paper strip"", ""paper quilling piece"", ""paper quilling strip"", ""paper star"", ""lucky star"", ""folded paper"", ""folded paper star"", ""paper strip"", ""paper decoration"", ""colored paper"", ""colorful pieces"", ""cardboard sheet"", ""cardboard square"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard cutout"", ""cardboard tray"", ""cardboard tube"", ""cardboard strip"", ""newspaper"", ""newspaper piece"", ""plastic container"", ""plastic storage bin"", ""tin can"", ""jar"", ""canned good"", ""canned product"", ""packaged paper lantern component"", ""snack package"", ""snack pouch"", ""container of snack"", ""gift box"", ""product package"", ""supplement bottle"", ""bottle"", ""ketchup bottle"", ""sauce bottle"", ""pickle jar"", ""water bottle"", ""pink water bottle"", ""cleaning bottle"", ""earbud case"", ""charging case"", ""portable charger"", ""power bank"", ""charger"", ""charging cable"", ""power adapter"", ""smartphone"", ""smartphone box"", ""computer mouse"", ""pen"", ""notepad"", ""paper"", ""paper pieces"", ""paper segments"", ""pieces"", ""lantern"", ""lantern component"", ""blue product box"", ""blue strip"", ""light blue strip"", ""yellow item"", ""yellow paper strip"", ""purple paper strip"", ""adhesive strip"", ""ribbon knot"", ""small paper segment"", ""foam piece"", ""foam strips"", ""cardboard pattern"", ""cardboard pattern piece"", ""stapler"", ""cleaning cloth"", ""dustpan"", ""box of beads"", ""container"", ""small cardboard piece"", ""small piece of material"", ""small object"", ""small product bag"", ""small white box"", ""bean"", ""bean pile"", ""button"", ""black button"", ""orange button"", ""red button"", ""small button"", ""buttons"", ""button cluster"", ""button row"", ""star"", ""star bead"", ""star-shaped bead"", ""purple bead"", ""light blue origami star"", ""yellow star-shaped object"", ""purple star-shaped object"", ""colored tile"", ""colored pieces"", ""jewel"", ""jewel piece"", ""Mahjong tile"", ""Mahjong tiles"", ""puzzle piece"", ""quilled paper"", ""qu",0,1,0.0,0.0,0.0
+Place earbud case on table,0,2,0.0,0.0,0.0
+Hold red button,0,1,0.0,0.0,0.0
+Adjust beads by color,0,5,0.0,0.0,0.0
+Approach workspace,0,3,0.0,0.0,0.0
+"{""action"": ""Approach workspace"", ""contact"": ""no"", ""evidence_window"": {""end_frame"": 39, ""start_frame"": 20}, ""next_action"": ""Approach workspace"", ""objects"": [""cardboard"", ""cardboard pieces"", ""cardboard stack"", ""cardboard tray"", ""cardboard tube"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard squares"", ""cardboard sheet"", ""cardboard cutouts"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""cardboard pieces"", ""cardboard shapes"", ""card",0,1,0.0,0.0,0.0

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/predictions.csv ADDED Viewed

The diff for this file is too large to render. See raw diff

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/eval/predictions.jsonl ADDED Viewed

The diff for this file is too large to render. See raw diff

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/package_audit.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "status": "pass",
+  "package_dir": "results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+  "backbone": "qwen3_omni_lora",
+  "required_eval_files": [
+    "metrics.json",
+    "predictions.jsonl",
+    "predictions.csv",
+    "per_class_metrics.csv",
+    "confusion_matrix.csv",
+    "RUN_REPORT.md"
+  ],
+  "primary_metrics": [
+    "action_macro_f1",
+    "contact_accuracy",
+    "held_out_episode_count",
+    "json_validity_rate",
+    "next_action_accuracy",
+    "object_micro_f1",
+    "subtask_accuracy",
+    "transition_accuracy"
+  ],
+  "issues": []
+}

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/training/progress.jsonl ADDED Viewed

	@@ -0,0 +1,47 @@

+{"event": "setup_done", "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", "dataset_jsonl": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl", "num_processes": 8, "num_train_samples": 2848, "num_val_samples": 512, "rank0_samples_per_epoch": 356, "backbone_id": "qwen3_omni_lora", "dataset_contract": "xperience10m_episode_json_qa_v1", "training_objective": "structured_episode_understanding_json_qa", "loss_mode": "answer_token_ce", "loss_logit_tail_only": true, "timestamp": 1780646466.9641912}
+{"event": "model_load_start", "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", "model_id": "<workspace-parent>/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct", "backbone_id": "qwen3_omni_lora", "device_map": "none", "dtype": "bfloat16", "timestamp": 1780646466.9644935}
+{"event": "model_load_done", "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", "timestamp": 1780646529.778516}
+{"event": "accelerator_prepare_start", "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", "timestamp": 1780646529.7835758}
+{"event": "accelerator_prepare_done", "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", "timestamp": 1780647258.9977164}
+{"event": "train_loop_start", "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora", "model_id": "<workspace-parent>/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct", "dataset_jsonl": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl", "num_processes": 8, "num_train_samples": 2848, "num_val_samples": 512, "rank_samples_per_epoch": 356, "epochs": 1, "timestamp": 1780647259.0075266}
+{"event": "train_step", "epoch": 1, "global_step": 10, "rank0_seen": 10, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 1.230179786682129, "timestamp": 1780647366.8140962}
+{"event": "train_step", "epoch": 1, "global_step": 20, "rank0_seen": 20, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 1.2016743421554565, "timestamp": 1780647469.5297396}
+{"event": "train_step", "epoch": 1, "global_step": 30, "rank0_seen": 30, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 1.1469742059707642, "timestamp": 1780647572.59419}
+{"event": "train_step", "epoch": 1, "global_step": 40, "rank0_seen": 40, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 1.0939034223556519, "timestamp": 1780647677.915944}
+{"event": "train_step", "epoch": 1, "global_step": 50, "rank0_seen": 50, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 1.0159685611724854, "timestamp": 1780647782.0573242}
+{"event": "train_step", "epoch": 1, "global_step": 60, "rank0_seen": 60, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.9657138586044312, "timestamp": 1780647884.5396533}
+{"event": "train_step", "epoch": 1, "global_step": 70, "rank0_seen": 70, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.9127762913703918, "timestamp": 1780647988.1080294}
+{"event": "train_step", "epoch": 1, "global_step": 80, "rank0_seen": 80, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.8944218754768372, "timestamp": 1780648093.5427563}
+{"event": "train_step", "epoch": 1, "global_step": 90, "rank0_seen": 90, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.8086783289909363, "timestamp": 1780648199.2200294}
+{"event": "train_step", "epoch": 1, "global_step": 100, "rank0_seen": 100, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.7389833331108093, "timestamp": 1780648302.853592}
+{"event": "train_step", "epoch": 1, "global_step": 110, "rank0_seen": 110, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.6932628750801086, "timestamp": 1780648406.7458248}
+{"event": "train_step", "epoch": 1, "global_step": 120, "rank0_seen": 120, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.6040449738502502, "timestamp": 1780648513.4280822}
+{"event": "train_step", "epoch": 1, "global_step": 130, "rank0_seen": 130, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.4854508638381958, "timestamp": 1780648618.7809823}
+{"event": "train_step", "epoch": 1, "global_step": 140, "rank0_seen": 140, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.44700533151626587, "timestamp": 1780648722.3871136}
+{"event": "train_step", "epoch": 1, "global_step": 150, "rank0_seen": 150, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.3768823444843292, "timestamp": 1780648826.5495484}
+{"event": "train_step", "epoch": 1, "global_step": 160, "rank0_seen": 160, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.3188228905200958, "timestamp": 1780648933.4630425}
+{"event": "train_step", "epoch": 1, "global_step": 170, "rank0_seen": 170, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.21572333574295044, "timestamp": 1780649040.920719}
+{"event": "train_step", "epoch": 1, "global_step": 180, "rank0_seen": 180, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.15064814686775208, "timestamp": 1780649145.727296}
+{"event": "train_step", "epoch": 1, "global_step": 190, "rank0_seen": 190, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.10380999743938446, "timestamp": 1780649250.9975677}
+{"event": "train_step", "epoch": 1, "global_step": 200, "rank0_seen": 200, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.08768776804208755, "timestamp": 1780649358.2915566}
+{"event": "train_step", "epoch": 1, "global_step": 210, "rank0_seen": 210, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.04660193994641304, "timestamp": 1780649465.2272158}
+{"event": "train_step", "epoch": 1, "global_step": 220, "rank0_seen": 220, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.04696264863014221, "timestamp": 1780649570.7967262}
+{"event": "train_step", "epoch": 1, "global_step": 230, "rank0_seen": 230, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.06465131789445877, "timestamp": 1780649675.9557557}
+{"event": "train_step", "epoch": 1, "global_step": 240, "rank0_seen": 240, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.030367165803909302, "timestamp": 1780649782.9315662}
+{"event": "train_step", "epoch": 1, "global_step": 250, "rank0_seen": 250, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.044979315251111984, "timestamp": 1780649890.433016}
+{"event": "train_step", "epoch": 1, "global_step": 260, "rank0_seen": 260, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.06045611947774887, "timestamp": 1780649995.4567258}
+{"event": "train_step", "epoch": 1, "global_step": 270, "rank0_seen": 270, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.04012652114033699, "timestamp": 1780650099.2143335}
+{"event": "train_step", "epoch": 1, "global_step": 280, "rank0_seen": 280, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.05115583539009094, "timestamp": 1780650206.8328776}
+{"event": "train_step", "epoch": 1, "global_step": 290, "rank0_seen": 290, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.011530512943863869, "timestamp": 1780650313.893128}
+{"event": "train_step", "epoch": 1, "global_step": 300, "rank0_seen": 300, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.055682338774204254, "timestamp": 1780650418.009996}
+{"event": "train_step", "epoch": 1, "global_step": 310, "rank0_seen": 310, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.03179597854614258, "timestamp": 1780650521.7131195}
+{"event": "train_step", "epoch": 1, "global_step": 320, "rank0_seen": 320, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.018152443692088127, "timestamp": 1780650628.7296107}
+{"event": "train_step", "epoch": 1, "global_step": 330, "rank0_seen": 330, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.05016171559691429, "timestamp": 1780650734.1827128}
+{"event": "train_step", "epoch": 1, "global_step": 340, "rank0_seen": 340, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.014182890765368938, "timestamp": 1780650837.4421883}
+{"event": "train_step", "epoch": 1, "global_step": 350, "rank0_seen": 350, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.024329334497451782, "timestamp": 1780650940.6010504}
+{"event": "train_step", "epoch": 1, "global_step": 356, "rank0_seen": 356, "rank0_samples_per_epoch": 356, "rank0_batch_loss": 0.035586968064308167, "timestamp": 1780651004.4774256}
+{"event": "epoch_end", "epoch": 1, "train_loss": 0.41304643672440994, "val_loss": 0.0330660454928875, "global_step": 356, "timestamp": 1780651173.976799}
+{"event": "save_start", "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora", "save_mode": "trainable_lora_state_dict", "timestamp": 1780651173.9771187}
+{"event": "save_state_dict_built", "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora", "state_source": "accelerator_full_state_dict", "trainable_tensors": 576, "trainable_bytes": 69206016, "shape_summary": {"adapter_tensors": 576, "adapter_bytes": 69206016, "prefixes": {"audio_tower": {"tensors": 192, "numel": 3932160}, "model": {"tensors": 384, "numel": 13369344}}}, "timestamp": 1780651307.4472642}
+{"event": "save_done", "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora", "timestamp": 1780651307.977997}
+{"event": "complete", "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora", "timestamp": 1780651307.9785495}

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/training/training_metadata.json ADDED Viewed

	@@ -0,0 +1,79 @@

+{
+  "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
+  "model_id": "<workspace-parent>/modelscope_models/Qwen__Qwen3-Omni-30B-A3B-Instruct",
+  "backbone": {
+    "id": "qwen3_omni_lora",
+    "display_name": "Qwen3-Omni LoRA",
+    "status": "implemented",
+    "model_family": "Qwen3-Omni",
+    "dataset_contract": "xperience10m_episode_json_qa_v1",
+    "training_objective": "structured_episode_understanding_json_qa",
+    "split_policy": {
+      "unit": "episode",
+      "default_counts": {
+        "train": 96,
+        "val": 16,
+        "test": 16
+      },
+      "leakage_guard": "episode_id and path-qualified session split checks"
+    },
+    "modalities": {
+      "direct_inputs": [
+        "multi-camera rendered mosaic video",
+        "AAC audio extracted from fisheye_cam0 MP4",
+        "language prompt and label options"
+      ],
+      "bridged_inputs": [
+        "depth and confidence",
+        "pose and SLAM trajectory",
+        "body and hand mocap",
+        "IMU acceleration and gyro",
+        "object/contact summary features"
+      ],
+      "excluded_inputs": [
+        "visualization.rrd"
+      ]
+    },
+    "primary_metrics": [
+      "json_validity_rate",
+      "action_macro_f1",
+      "subtask_accuracy",
+      "transition_accuracy",
+      "next_action_accuracy",
+      "contact_accuracy",
+      "object_micro_f1",
+      "held_out_episode_count"
+    ],
+    "config_path": "<project>/configs/omni_backbones/qwen3_omni_lora.json"
+  },
+  "dataset_jsonl": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl",
+  "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora",
+  "num_processes": 8,
+  "num_train_samples": 2848,
+  "num_val_samples": 512,
+  "history": [
+    {
+      "epoch": 1,
+      "train_loss": 0.41304643672440994,
+      "val_loss": 0.0330660454928875,
+      "global_step": 356
+    }
+  ],
+  "lora": {
+    "r": 16,
+    "alpha": 32,
+    "dropout": 0.05,
+    "target_modules": [
+      "q_proj",
+      "k_proj",
+      "v_proj",
+      "o_proj",
+      "gate_proj",
+      "up_proj",
+      "down_proj"
+    ]
+  },
+  "use_audio_in_video": false,
+  "loss_mode": "answer_token_ce",
+  "loss_logit_tail_only": true
+}

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/validation/eval.json ADDED Viewed

	@@ -0,0 +1,81 @@

+{
+  "status": "pass",
+  "summary": {
+    "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "backbone": "qwen3_omni_lora",
+    "backbone_status": "implemented",
+    "checkpoint_gate": "lora_safetensors_shape_check",
+    "required_stage": "eval",
+    "workspace": "<project>",
+    "manifest": {
+      "path": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605/episode_manifest.json",
+      "episode_count": 128,
+      "split_counts": {
+        "test": 16,
+        "train": 96,
+        "val": 16
+      },
+      "session_leakage": []
+    },
+    "dataset": {
+      "dataset_dir": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset",
+      "manifest_path": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset_manifest.json",
+      "dataset_path": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl",
+      "manifest_num_samples": 3808,
+      "row_count": 3808,
+      "sample_split_counts": {
+        "train": 2848,
+        "val": 512,
+        "test": 448
+      },
+      "episode_split_counts": {
+        "train": 89,
+        "val": 16,
+        "test": 14
+      },
+      "skipped_episodes": 9
+    },
+    "training": {
+      "train_dir": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
+      "checkpoint_candidates": [
+        "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605/adapter_lora",
+        "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora"
+      ],
+      "checkpoint_gate": "lora_safetensors_shape_check",
+      "required_training_files": [
+        "training_metadata.json",
+        "progress.jsonl",
+        "adapter_config.json",
+        "adapter_model.safetensors"
+      ],
+      "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora",
+      "num_processes": 8,
+      "num_train_samples": 2848,
+      "num_val_samples": 512,
+      "history_len": 1,
+      "checkpoint_dir_recorded": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora"
+    },
+    "eval": {
+      "eval_dir": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "required_eval_files": [
+        "metrics.json",
+        "predictions.jsonl",
+        "predictions.csv",
+        "per_class_metrics.csv",
+        "confusion_matrix.csv",
+        "RUN_REPORT.md"
+      ],
+      "eval_split": "test",
+      "num_eval_episodes": 14,
+      "held_out_episode_count": 14,
+      "json_validity_rate": 0.875,
+      "action_macro_f1": 0.0026621494447581404,
+      "prediction_file": "predictions.jsonl",
+      "prediction_rows": 448
+    }
+  },
+  "issues": []
+}

results/omni_finetune/verified_public/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval/verified_result_summary.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "status": "verified",
+  "backbone": "qwen3_omni_lora",
+  "backbone_display_name": "Qwen3-Omni LoRA",
+  "dataset_contract": "xperience10m_episode_json_qa_v1",
+  "training_objective": "structured_episode_understanding_json_qa",
+  "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+  "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
+  "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+  "dataset": {
+    "num_samples": 3808,
+    "num_episodes": 119,
+    "split_counts": {
+      "train": 2848,
+      "val": 512,
+      "test": 448
+    },
+    "skipped_episodes": 9
+  },
+  "training": {
+    "num_processes": 8,
+    "num_train_samples": 2848,
+    "num_val_samples": 512,
+    "history": [
+      {
+        "epoch": 1,
+        "train_loss": 0.41304643672440994,
+        "val_loss": 0.0330660454928875,
+        "global_step": 356
+      }
+    ]
+  },
+  "eval": {
+    "eval_split": "test",
+    "num_samples": 448,
+    "prediction_file": "predictions.jsonl",
+    "prediction_rows": 448,
+    "num_eval_episodes": 14,
+    "held_out_episode_count": 14,
+    "primary_metrics": {
+      "json_validity_rate": 0.875,
+      "action_macro_f1": 0.0026621494447581404,
+      "subtask_accuracy": 0.006696428571428571,
+      "transition_accuracy": 0.8504464285714286,
+      "next_action_accuracy": 0.024553571428571428,
+      "contact_accuracy": 0.6450892857142857,
+      "object_micro_f1": 0.22299431459254582,
+      "held_out_episode_count": 14
+    }
+  },
+  "validation_summary": {
+    "run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "dataset_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "train_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "eval_run_id": "xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605",
+    "backbone": "qwen3_omni_lora",
+    "backbone_status": "implemented",
+    "checkpoint_gate": "lora_safetensors_shape_check",
+    "required_stage": "eval",
+    "workspace": "<project>",
+    "manifest": {
+      "path": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605/episode_manifest.json",
+      "episode_count": 128,
+      "split_counts": {
+        "test": 16,
+        "train": 96,
+        "val": 16
+      },
+      "session_leakage": []
+    },
+    "dataset": {
+      "dataset_dir": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset",
+      "manifest_path": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset_manifest.json",
+      "dataset_path": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_dataset/dataset.jsonl",
+      "manifest_num_samples": 3808,
+      "row_count": 3808,
+      "sample_split_counts": {
+        "train": 2848,
+        "val": 512,
+        "test": 448
+      },
+      "episode_split_counts": {
+        "train": 89,
+        "val": 16,
+        "test": 14
+      },
+      "skipped_episodes": 9
+    },
+    "training": {
+      "train_dir": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora",
+      "checkpoint_candidates": [
+        "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605/adapter_lora",
+        "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora"
+      ],
+      "checkpoint_gate": "lora_safetensors_shape_check",
+      "required_training_files": [
+        "training_metadata.json",
+        "progress.jsonl",
+        "adapter_config.json",
+        "adapter_model.safetensors"
+      ],
+      "checkpoint_dir": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora",
+      "num_processes": 8,
+      "num_train_samples": 2848,
+      "num_val_samples": 512,
+      "history_len": 1,
+      "checkpoint_dir_recorded": "<project>/checkpoints/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_lora/adapter_lora"
+    },
+    "eval": {
+      "eval_dir": "<project>/results/omni_finetune/xperience10m_qwen3_omni_128ep_96train_16val_16test_valmon_20260605_eval",
+      "required_eval_files": [
+        "metrics.json",
+        "predictions.jsonl",
+        "predictions.csv",
+        "per_class_metrics.csv",
+        "confusion_matrix.csv",
+        "RUN_REPORT.md"
+      ],
+      "eval_split": "test",
+      "num_eval_episodes": 14,
+      "held_out_episode_count": 14,
+      "json_validity_rate": 0.875,
+      "action_macro_f1": 0.0026621494447581404,
+      "prediction_file": "predictions.jsonl",
+      "prediction_rows": 448
+    }
+  },
+  "included_files": [
+    "dataset/dataset_manifest.json",
+    "dataset/episode_manifest.json",
+    "eval/RUN_REPORT.md",
+    "eval/confusion_matrix.csv",
+    "eval/metrics.json",
+    "eval/per_class_metrics.csv",
+    "eval/predictions.csv",
+    "eval/predictions.jsonl",
+    "training/progress.jsonl",
+    "training/training_metadata.json",
+    "validation/eval.json"
+  ],
+  "required_eval_files": [
+    "metrics.json",
+    "predictions.jsonl",
+    "predictions.csv",
+    "per_class_metrics.csv",
+    "confusion_matrix.csv",
+    "RUN_REPORT.md"
+  ],
+  "public_package_allowed": [
+    "metrics",
+    "predictions",
+    "confusion matrices",
+    "run reports",
+    "episode and dataset manifests",
+    "training metadata",
+    "validation summaries"
+  ],
+  "public_package_forbidden": [
+    "raw MP4",
+    "annotation HDF5",
+    "Rerun RRD",
+    "base-model weights",
+    "LoRA adapter weights",
+    "full checkpoints",
+    "large archives"
+  ],
+  "excluded_policy": "Raw Xperience-10M files, base-model weights, adapter or checkpoint weights, full checkpoints, and large archives are not included."
+}