hikewa commited on
Commit
9aeb3eb
·
verified ·
1 Parent(s): 4a757b3

Upload exported/student_inference.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. exported/student_inference.py +144 -0
exported/student_inference.py ADDED
@@ -0,0 +1,144 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Standalone student model inference — zero external dependencies.
2
+
3
+ Generated by eisv_lumen.distillation.export_student.
4
+ Loads JSON-exported RandomForest classifiers and runs inference
5
+ using only Python stdlib.
6
+
7
+ Usage:
8
+ from student_inference import StudentInference
9
+
10
+ student = StudentInference("path/to/exported/")
11
+ result = student.predict("settled_presence", {
12
+ "mean_E": 0.7, "mean_I": 0.6, "mean_S": 0.2, "mean_V": 0.05,
13
+ "dE": 0.0, "dI": 0.0, "dS": 0.0, "dV": 0.0,
14
+ "d2E": 0.0, "d2I": 0.0, "d2S": 0.0, "d2V": 0.0,
15
+ })
16
+ # result = {"pattern": "SINGLE", "eisv_tokens": ["~stillness~"]}
17
+ """
18
+
19
+ from __future__ import annotations
20
+
21
+ import json
22
+ import os
23
+ from typing import Any, Dict, List, Optional
24
+
25
+
26
+ class StudentInference:
27
+ """Zero-dependency student model inference."""
28
+
29
+ def __init__(self, model_dir: str):
30
+ self._model_dir = model_dir
31
+ self._load_models()
32
+
33
+ def _load_models(self) -> None:
34
+ """Load JSON-exported models."""
35
+ def _load(name: str):
36
+ path = os.path.join(self._model_dir, name)
37
+ with open(path) as f:
38
+ return json.load(f)
39
+
40
+ self._pattern_forest = _load("pattern_forest.json")
41
+ self._token1_forest = _load("token1_forest.json")
42
+ self._token2_forest = _load("token2_forest.json")
43
+ self._scaler = _load("scaler.json")
44
+ self._mappings = _load("mappings.json")
45
+
46
+ def _scale_features(self, numeric: List[float]) -> List[float]:
47
+ """Apply StandardScaler normalization."""
48
+ mean = self._scaler["mean"]
49
+ scale = self._scaler["scale"]
50
+ return [(v - m) / s for v, m, s in zip(numeric, mean, scale)]
51
+
52
+ def _build_features(self, shape: str, features: Dict[str, float]) -> List[float]:
53
+ """Build feature vector from shape + numeric features."""
54
+ # Numeric features (scaled)
55
+ numeric = [features.get(f, 0.0) for f in self._mappings["numeric_features"]]
56
+ scaled = self._scale_features(numeric)
57
+
58
+ # Shape one-hot
59
+ shapes = self._mappings["shapes"]
60
+ shape_onehot = [1.0 if s == shape else 0.0 for s in shapes]
61
+
62
+ return scaled + shape_onehot
63
+
64
+ def _predict_tree(self, tree: Dict, features: List[float]) -> List[float]:
65
+ """Walk a single decision tree to get class probabilities."""
66
+ node = tree
67
+ while not node.get("leaf", False):
68
+ feat_idx = node["feature"]
69
+ threshold = node["threshold"]
70
+ if features[feat_idx] <= threshold:
71
+ node = node["left"]
72
+ else:
73
+ node = node["right"]
74
+ return node["probs"]
75
+
76
+ def _predict_forest(self, forest: List[Dict], features: List[float]) -> int:
77
+ """Average class probabilities across all trees (matches sklearn)."""
78
+ all_probs = [self._predict_tree(tree, features) for tree in forest]
79
+ n_classes = len(all_probs[0])
80
+ avg = [0.0] * n_classes
81
+ for probs in all_probs:
82
+ for i in range(n_classes):
83
+ avg[i] += probs[i]
84
+ # argmax of averaged probabilities
85
+ best_idx = 0
86
+ best_val = avg[0]
87
+ for i in range(1, n_classes):
88
+ if avg[i] > best_val:
89
+ best_val = avg[i]
90
+ best_idx = i
91
+ return best_idx
92
+
93
+ def predict(
94
+ self,
95
+ shape: str,
96
+ features: Dict[str, float],
97
+ ) -> Dict[str, Any]:
98
+ """Run student inference.
99
+
100
+ Parameters
101
+ ----------
102
+ shape : str
103
+ Trajectory shape name.
104
+ features : dict
105
+ Numeric features with keys: mean_E, mean_I, mean_S, mean_V,
106
+ dE, dI, dS, dV, d2E, d2I, d2S, d2V.
107
+
108
+ Returns
109
+ -------
110
+ dict with keys: pattern, eisv_tokens, token_1, token_2.
111
+ """
112
+ X = self._build_features(shape, features)
113
+
114
+ # Predict pattern
115
+ pattern_idx = self._predict_forest(self._pattern_forest, X)
116
+ pattern = self._mappings["patterns"][pattern_idx]
117
+
118
+ # Predict token-1
119
+ token1_idx = self._predict_forest(self._token1_forest, X)
120
+ token_1 = self._mappings["tokens"][token1_idx]
121
+
122
+ # Predict token-2 (add token_1 index as extra feature)
123
+ X_t2 = X + [float(token1_idx)]
124
+ token2_idx = self._predict_forest(self._token2_forest, X_t2)
125
+ token_2 = self._mappings["tokens_with_none"][token2_idx]
126
+
127
+ # Build token list based on pattern
128
+ if pattern == "SINGLE":
129
+ eisv_tokens = [token_1]
130
+ elif pattern == "REPETITION":
131
+ eisv_tokens = [token_1, token_1]
132
+ elif pattern in ("PAIR", "QUESTION"):
133
+ eisv_tokens = [token_1, token_2] if token_2 != "none" else [token_1]
134
+ elif pattern == "TRIPLE":
135
+ eisv_tokens = [token_1, token_2] if token_2 != "none" else [token_1]
136
+ else:
137
+ eisv_tokens = [token_1]
138
+
139
+ return {
140
+ "pattern": pattern,
141
+ "token_1": token_1,
142
+ "token_2": token_2,
143
+ "eisv_tokens": eisv_tokens,
144
+ }