AlexWortega commited on
Commit
3ab4691
·
verified ·
1 Parent(s): 05e1a70

Upload physics_serialize.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. physics_serialize.py +56 -0
physics_serialize.py ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Shared serialization for the tiny-vocab physics MoE.
2
+
3
+ Reuses physics_core.fmt_header / fmt_frame, but reduces every frame's
4
+ free-text description to a tiny controlled keyword set so the learned vocab
5
+ stays simulation-only.
6
+
7
+ Controlled description set (after the `Frame N:` token):
8
+ - "at rest" <- "All objects are at rest."
9
+ - "in motion" <- "All objects are in motion."
10
+ - "settling" <- "K of N objects are moving." (partial motion)
11
+ Anything else -> dropped (description omitted; frame still emitted).
12
+ """
13
+ from __future__ import annotations
14
+ import re
15
+ import physics_core as pc
16
+
17
+ _AT_REST = re.compile(r"all objects are at rest", re.I)
18
+ _IN_MOTION = re.compile(r"all objects are in motion", re.I)
19
+ _PARTIAL = re.compile(r"\d+\s+of\s+\d+\s+objects are moving", re.I)
20
+
21
+
22
+ def reduce_desc(raw: str) -> str:
23
+ """Map a frame's free-text description to a controlled keyword (or '')."""
24
+ if _AT_REST.search(raw):
25
+ return "at rest"
26
+ if _IN_MOTION.search(raw):
27
+ return "in motion"
28
+ if _PARTIAL.search(raw):
29
+ return "settling"
30
+ return ""
31
+
32
+
33
+ def fmt_frame_reduced(fr: dict) -> str:
34
+ """Like pc.fmt_frame but with the description replaced by a keyword."""
35
+ fr2 = dict(fr)
36
+ fr2["description"] = reduce_desc(fr.get("description", ""))
37
+ return pc.fmt_frame(fr2)
38
+
39
+
40
+ def fmt_header_reduced(header: dict) -> str:
41
+ """pc.fmt_header with the free-text Scene description blanked out.
42
+
43
+ Keeps every structural line (Gravity / Timestep / Type / Difficulty /
44
+ Static / Constraints) so the categorical `Type:` token survives, but the
45
+ `Scene:` line carries no English prose -> vocab stays sim-only.
46
+ """
47
+ h2 = dict(header)
48
+ h2["description"] = ""
49
+ return pc.fmt_header(h2)
50
+
51
+
52
+ def serialize_scene(header: dict, frames: list) -> str:
53
+ """Full scene text: reduced header + reduced frames (no trailing BOS/EOS)."""
54
+ txt = fmt_header_reduced(header)
55
+ txt += "".join(fmt_frame_reduced(fr) for fr in frames)
56
+ return txt