AdrianLlopart commited on
Commit
ab01599
·
verified ·
1 Parent(s): 0e22d27

Remove stale pre-reproduction eval/metaworld.json (superseded by metaworld_mt50.json); issue #50

Browse files
Files changed (1) hide show
  1. eval/metaworld.json +0 -45
eval/metaworld.json DELETED
@@ -1,45 +0,0 @@
1
- {
2
- "_comment": "MetaWorld MT50 eval results for lerobot/smolvla_metaworld. Status: pending local verification. Numbers from the SmolVLA paper (arxiv:2506.01844 Table 2) will be filled in once a reproduction run is completed. Do NOT add numbers here without a locally-verified eval run or a precise paper citation with table reference.",
3
- "schema_version": "0.1",
4
- "source": {
5
- "paper": "SmolVLA: A vision-language-action model for affordable and efficient robotics",
6
- "arxiv": "https://arxiv.org/abs/2506.01844",
7
- "table": "Table 2 \u2014 Simulation benchmarks (LIBERO and Meta-World)",
8
- "model_variant": "SmolVLA (0.45B)",
9
- "evaluated_by": "upstream authors",
10
- "reproduced_locally": false,
11
- "reproduction_planned": "Pending \u2014 MetaWorld MT50 environment setup not yet validated in OpenRAL. Requires uv sync --group metaworld (TBD) and checkpoint inspection for camera/state spec.",
12
- "reproduction_cli": {
13
- "description": "Re-run this benchmark inside the OpenRAL repo. ADR-0009 PR D: openral benchmark run is the canonical producer of RSkillEvalResult JSONs with reproduced_locally=true.",
14
- "command": "openral benchmark run --suite metaworld_mt50 --rskill rskill://smolvla-metaworld",
15
- "status": "catalogue YAML benchmarks/metaworld_mt50.yaml has not yet landed (ADR-0009 PR E follow-up)",
16
- "notes": [
17
- "The 'results' block on this file is still the paper snapshot (reproduced_locally=false); re-running the command above and overwriting the file with its output flips that flag.",
18
- "MetaWorld MT50 covers 50 tasks; confirm --env.task argument format before running.",
19
- "State dim and camera names must be inspected from the checkpoint before this CLI is valid."
20
- ]
21
- },
22
- "status": "pending"
23
- },
24
- "benchmark": {
25
- "name": "MetaWorld MT50",
26
- "dataset": "lerobot/metaworld_mt50",
27
- "protocol": "TBD \u2014 success rate per task, multi-task training",
28
- "n_tasks": 50,
29
- "robot": "Franka Panda (7-DOF)",
30
- "simulator": "MetaWorld (MuJoCo-based)"
31
- },
32
- "eval_config": {
33
- "action_chunk_size": "TBD",
34
- "image_size": "TBD",
35
- "cameras": "TBD",
36
- "state_dim": "TBD",
37
- "inference_mode": "TBD"
38
- },
39
- "results": {
40
- "status": "pending",
41
- "source": "upstream_paper_pending",
42
- "metaworld_mt50_avg_success_rate": null,
43
- "note": "Fill in after running lerobot-eval against MetaWorld MT50 or after extracting the exact value from arxiv:2506.01844 Table 2 with task-level breakdown."
44
- }
45
- }