Spaces:
Sleeping
Sleeping
Sai Kumar Taraka commited on
Commit ·
b32bdbb
1
Parent(s): eb3ba12
ML model improvements: coverage predictor ensemble, coverage-driven hybrid generation, coverage RL reward shaping, pipeline integration
Browse files- src/models/coverage_predictor.py +328 -0
- src/models/enhanced_ml_model_v2.py +221 -7
- src/pipeline.py +10 -0
src/models/coverage_predictor.py
ADDED
|
@@ -0,0 +1,328 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
Coverage Predictor — ML ensemble that estimates UVM coverage from spec features.
|
| 3 |
+
|
| 4 |
+
Uses:
|
| 5 |
+
- RandomForestRegressor (captures non-linear interactions)
|
| 6 |
+
- GradientBoostingRegressor (sequential refinement)
|
| 7 |
+
- LinearRegression (baseline trend)
|
| 8 |
+
Blended via Ridge meta-regressor.
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import logging
|
| 14 |
+
import math
|
| 15 |
+
import random
|
| 16 |
+
from dataclasses import dataclass
|
| 17 |
+
from typing import Any, Dict, List, Optional, Tuple
|
| 18 |
+
|
| 19 |
+
import numpy as np
|
| 20 |
+
|
| 21 |
+
logger = logging.getLogger("uvmgen.ml.coverage_predictor")
|
| 22 |
+
|
| 23 |
+
try:
|
| 24 |
+
from sklearn.ensemble import (
|
| 25 |
+
RandomForestRegressor,
|
| 26 |
+
GradientBoostingRegressor,
|
| 27 |
+
)
|
| 28 |
+
from sklearn.linear_model import Ridge, LinearRegression
|
| 29 |
+
from sklearn.preprocessing import StandardScaler
|
| 30 |
+
from sklearn.pipeline import make_pipeline
|
| 31 |
+
|
| 32 |
+
HAS_SKLEARN = True
|
| 33 |
+
except ImportError:
|
| 34 |
+
HAS_SKLEARN = False
|
| 35 |
+
|
| 36 |
+
|
| 37 |
+
@dataclass
|
| 38 |
+
class SpecFeatures:
|
| 39 |
+
interface_count: int = 0
|
| 40 |
+
total_signals: int = 0
|
| 41 |
+
register_count: int = 0
|
| 42 |
+
total_fields: int = 0
|
| 43 |
+
has_output: bool = False
|
| 44 |
+
has_input: bool = False
|
| 45 |
+
protocol_type: str = "uart"
|
| 46 |
+
|
| 47 |
+
def to_array(self) -> np.ndarray:
|
| 48 |
+
base = np.array([
|
| 49 |
+
self.interface_count,
|
| 50 |
+
self.total_signals,
|
| 51 |
+
self.register_count,
|
| 52 |
+
self.total_fields,
|
| 53 |
+
1.0 if self.has_output else 0.0,
|
| 54 |
+
1.0 if self.has_input else 0.0,
|
| 55 |
+
], dtype=float)
|
| 56 |
+
|
| 57 |
+
# protocol one-hot (up to 7 known protocols)
|
| 58 |
+
protocols = ["uart", "spi", "i2c", "axi4lite", "wishbone", "apb", "ahb"]
|
| 59 |
+
proto = np.zeros(len(protocols))
|
| 60 |
+
if self.protocol_type in protocols:
|
| 61 |
+
proto[protocols.index(self.protocol_type)] = 1.0
|
| 62 |
+
else:
|
| 63 |
+
proto[-1] = 1.0 # other
|
| 64 |
+
|
| 65 |
+
complexity = (
|
| 66 |
+
self.interface_count * 1.5
|
| 67 |
+
+ self.total_signals * 0.8
|
| 68 |
+
+ self.register_count * 2.0
|
| 69 |
+
+ self.total_fields * 0.5
|
| 70 |
+
)
|
| 71 |
+
complexity_feat = np.array([complexity, math.log1p(complexity)])
|
| 72 |
+
|
| 73 |
+
return np.concatenate([base, proto, complexity_feat])
|
| 74 |
+
|
| 75 |
+
@property
|
| 76 |
+
def num_features(self) -> int:
|
| 77 |
+
return len(self.to_array())
|
| 78 |
+
|
| 79 |
+
@staticmethod
|
| 80 |
+
def from_spec(spec: Any) -> "SpecFeatures":
|
| 81 |
+
interfaces = getattr(spec, "interfaces", getattr(spec, "_interfaces", [])) or []
|
| 82 |
+
registers = getattr(spec, "registers", getattr(spec, "_registers", [])) or []
|
| 83 |
+
|
| 84 |
+
iface_count = len(interfaces)
|
| 85 |
+
sig_count = sum(
|
| 86 |
+
len(getattr(iface, "signals", getattr(iface, "_signals", getattr(iface, "ports", []))))
|
| 87 |
+
for iface in interfaces
|
| 88 |
+
)
|
| 89 |
+
reg_count = len(registers)
|
| 90 |
+
field_count = sum(
|
| 91 |
+
len(getattr(r, "fields", getattr(r, "_fields", [])))
|
| 92 |
+
for r in registers
|
| 93 |
+
)
|
| 94 |
+
has_out = any(
|
| 95 |
+
getattr(s, "direction", getattr(s, "_direction", "")).lower() in ("output", "inout")
|
| 96 |
+
for iface in interfaces
|
| 97 |
+
for s in getattr(iface, "signals", getattr(iface, "_signals", getattr(iface, "ports", [])))
|
| 98 |
+
)
|
| 99 |
+
has_in = any(
|
| 100 |
+
getattr(s, "direction", getattr(s, "_direction", "")).lower() == "input"
|
| 101 |
+
for iface in interfaces
|
| 102 |
+
for s in getattr(iface, "signals", getattr(iface, "_signals", getattr(iface, "ports", [])))
|
| 103 |
+
)
|
| 104 |
+
proto = getattr(spec, "protocol", getattr(spec, "_protocol", "uart")) or "uart"
|
| 105 |
+
|
| 106 |
+
return SpecFeatures(
|
| 107 |
+
interface_count=iface_count,
|
| 108 |
+
total_signals=sig_count,
|
| 109 |
+
register_count=reg_count,
|
| 110 |
+
total_fields=field_count,
|
| 111 |
+
has_output=has_out,
|
| 112 |
+
has_input=has_in,
|
| 113 |
+
protocol_type=proto.lower(),
|
| 114 |
+
)
|
| 115 |
+
|
| 116 |
+
|
| 117 |
+
class CoveragePredictor:
|
| 118 |
+
def __init__(self, random_state: int = 42):
|
| 119 |
+
self.random_state = random_state
|
| 120 |
+
self._fitted = False
|
| 121 |
+
self._rng = random.Random(random_state)
|
| 122 |
+
self._models: Dict[str, Any] = {}
|
| 123 |
+
self._scaler: Any = None
|
| 124 |
+
self._meta: Any = None
|
| 125 |
+
|
| 126 |
+
def train_synthetic(self, n_samples: int = 5000) -> "CoveragePredictor":
|
| 127 |
+
if not HAS_SKLEARN:
|
| 128 |
+
logger.warning("sklearn not available — using heuristic fallback")
|
| 129 |
+
self._fitted = True
|
| 130 |
+
return self
|
| 131 |
+
|
| 132 |
+
X, y = self._generate_synthetic_data(n_samples)
|
| 133 |
+
self._scaler = StandardScaler()
|
| 134 |
+
X_scaled = self._scaler.fit_transform(X)
|
| 135 |
+
|
| 136 |
+
n_feat = X_scaled.shape[1]
|
| 137 |
+
|
| 138 |
+
rf = RandomForestRegressor(
|
| 139 |
+
n_estimators=200,
|
| 140 |
+
max_depth=min(12, max(3, n_feat * 2)),
|
| 141 |
+
min_samples_leaf=3,
|
| 142 |
+
random_state=self.random_state,
|
| 143 |
+
n_jobs=-1,
|
| 144 |
+
)
|
| 145 |
+
gbr = GradientBoostingRegressor(
|
| 146 |
+
n_estimators=150,
|
| 147 |
+
max_depth=min(6, max(2, n_feat)),
|
| 148 |
+
learning_rate=0.08,
|
| 149 |
+
subsample=0.8,
|
| 150 |
+
random_state=self.random_state,
|
| 151 |
+
)
|
| 152 |
+
lr = LinearRegression()
|
| 153 |
+
|
| 154 |
+
rf.fit(X_scaled, y)
|
| 155 |
+
gbr.fit(X_scaled, y)
|
| 156 |
+
lr.fit(X_scaled, y)
|
| 157 |
+
|
| 158 |
+
preds = np.column_stack([
|
| 159 |
+
rf.predict(X_scaled),
|
| 160 |
+
gbr.predict(X_scaled),
|
| 161 |
+
lr.predict(X_scaled),
|
| 162 |
+
])
|
| 163 |
+
|
| 164 |
+
meta = Ridge(alpha=1.0)
|
| 165 |
+
meta.fit(preds, y)
|
| 166 |
+
|
| 167 |
+
self._models = {"rf": rf, "gbr": gbr, "lr": lr}
|
| 168 |
+
self._meta = meta
|
| 169 |
+
self._fitted = True
|
| 170 |
+
logger.info(
|
| 171 |
+
"CoveragePredictor trained on %d synthetic samples — %d features",
|
| 172 |
+
n_samples, n_feat,
|
| 173 |
+
)
|
| 174 |
+
return self
|
| 175 |
+
|
| 176 |
+
def _generate_synthetic_data(self, n: int) -> Tuple[np.ndarray, np.ndarray]:
|
| 177 |
+
rows = []
|
| 178 |
+
targets = []
|
| 179 |
+
|
| 180 |
+
for _ in range(n):
|
| 181 |
+
n_iface = self._rng.randint(1, 5)
|
| 182 |
+
n_sig = self._rng.randint(2, 20) * n_iface
|
| 183 |
+
n_reg = self._rng.randint(0, 32)
|
| 184 |
+
n_fld = self._rng.randint(0, 4) * n_reg
|
| 185 |
+
has_out = self._rng.random() > 0.3
|
| 186 |
+
has_in = True
|
| 187 |
+
proto_idx = self._rng.randint(0, 6)
|
| 188 |
+
protocols = ["uart", "spi", "i2c", "axi4lite", "wishbone", "apb", "ahb"]
|
| 189 |
+
proto = protocols[proto_idx]
|
| 190 |
+
|
| 191 |
+
feat = SpecFeatures(
|
| 192 |
+
interface_count=n_iface,
|
| 193 |
+
total_signals=n_sig,
|
| 194 |
+
register_count=n_reg,
|
| 195 |
+
total_fields=n_fld,
|
| 196 |
+
has_output=has_out,
|
| 197 |
+
has_input=has_in,
|
| 198 |
+
protocol_type=proto,
|
| 199 |
+
)
|
| 200 |
+
arr = feat.to_array()
|
| 201 |
+
rows.append(arr)
|
| 202 |
+
|
| 203 |
+
# Coverage ground truth: synthetic formula with noise
|
| 204 |
+
base = 50.0
|
| 205 |
+
base += n_iface * 2.5
|
| 206 |
+
base += min(n_sig, 40) * 0.5
|
| 207 |
+
base += min(n_reg, 16) * 1.2
|
| 208 |
+
base += min(n_fld, 32) * 0.3
|
| 209 |
+
base += 5.0 if has_out else 0.0
|
| 210 |
+
base -= n_iface * 1.0 # complexity penalty
|
| 211 |
+
proto_boost = {"uart": 5, "spi": 3, "i2c": 4, "axi4lite": 2, "wishbone": 3, "apb": 4, "ahb": 2}.get(proto, 0)
|
| 212 |
+
base += proto_boost
|
| 213 |
+
noise = self._rng.gauss(0, 6)
|
| 214 |
+
cov = max(10.0, min(99.0, base + noise))
|
| 215 |
+
targets.append(cov / 100.0)
|
| 216 |
+
|
| 217 |
+
return np.array(rows), np.array(targets)
|
| 218 |
+
|
| 219 |
+
def predict_coverage(self, spec: Any, _generated_files: Optional[Dict] = None) -> Dict[str, Any]:
|
| 220 |
+
"""Predict coverage % and recommend sequences to close gaps."""
|
| 221 |
+
feat = SpecFeatures.from_spec(spec)
|
| 222 |
+
|
| 223 |
+
if not self._fitted or not HAS_SKLEARN:
|
| 224 |
+
return self._heuristic_prediction(feat)
|
| 225 |
+
|
| 226 |
+
X = feat.to_array().reshape(1, -1)
|
| 227 |
+
X_scaled = self._scaler.transform(X)
|
| 228 |
+
|
| 229 |
+
preds = np.column_stack([
|
| 230 |
+
self._models["rf"].predict(X_scaled),
|
| 231 |
+
self._models["gbr"].predict(X_scaled),
|
| 232 |
+
self._models["lr"].predict(X_scaled),
|
| 233 |
+
])
|
| 234 |
+
blended = float(self._meta.predict(preds)[0])
|
| 235 |
+
blended = max(0.1, min(0.99, blended))
|
| 236 |
+
|
| 237 |
+
rf_conf = float(self._models["rf"].predict(X_scaled)[0])
|
| 238 |
+
gbr_conf = float(self._models["gbr"].predict(X_scaled)[0])
|
| 239 |
+
lr_conf = float(self._models["lr"].predict(X_scaled)[0])
|
| 240 |
+
|
| 241 |
+
coverage_pct = blended * 100.0
|
| 242 |
+
rf_pct = rf_conf * 100.0
|
| 243 |
+
gbr_pct = gbr_conf * 100.0
|
| 244 |
+
|
| 245 |
+
gaps = self._predict_gaps(feat, coverage_pct)
|
| 246 |
+
recommended = self._recommend_sequences(feat, gaps)
|
| 247 |
+
|
| 248 |
+
return {
|
| 249 |
+
"coverage": {
|
| 250 |
+
"expected": round(coverage_pct, 1),
|
| 251 |
+
"rf_estimate": round(rf_pct, 1),
|
| 252 |
+
"gbr_estimate": round(gbr_pct, 1),
|
| 253 |
+
"lr_estimate": round(lr_conf * 100.0, 1),
|
| 254 |
+
"gaps": gaps,
|
| 255 |
+
"confidence": round(1.0 - abs(rf_pct - gbr_pct) / 100.0, 2),
|
| 256 |
+
},
|
| 257 |
+
"recommended_sequences": recommended,
|
| 258 |
+
}
|
| 259 |
+
|
| 260 |
+
def predict_optimal_params(self, spec: Any) -> Dict[str, Any]:
|
| 261 |
+
"""Suggest ML model params to maximize coverage for this spec."""
|
| 262 |
+
feat = SpecFeatures.from_spec(spec)
|
| 263 |
+
|
| 264 |
+
complexity = (
|
| 265 |
+
feat.interface_count * 1.5
|
| 266 |
+
+ feat.total_signals * 0.8
|
| 267 |
+
+ feat.register_count * 2.0
|
| 268 |
+
+ feat.total_fields * 0.5
|
| 269 |
+
)
|
| 270 |
+
|
| 271 |
+
if complexity < 10:
|
| 272 |
+
return {"model_type": "template", "max_iterations": 1, "rl_strategy": "epsilon_greedy"}
|
| 273 |
+
elif complexity < 30:
|
| 274 |
+
return {"model_type": "v2", "max_iterations": 3, "rl_strategy": "ucb"}
|
| 275 |
+
elif complexity < 60:
|
| 276 |
+
return {"model_type": "v2", "max_iterations": 5, "rl_strategy": "thompson"}
|
| 277 |
+
else:
|
| 278 |
+
return {"model_type": "v2", "max_iterations": 10, "rl_strategy": "softmax"}
|
| 279 |
+
|
| 280 |
+
def _heuristic_prediction(self, feat: SpecFeatures) -> Dict[str, Any]:
|
| 281 |
+
complexity = (
|
| 282 |
+
feat.interface_count * 1.5
|
| 283 |
+
+ feat.total_signals * 0.8
|
| 284 |
+
+ feat.register_count * 2.0
|
| 285 |
+
+ feat.total_fields * 0.5
|
| 286 |
+
)
|
| 287 |
+
base = min(95.0, 45.0 + complexity * 0.5 + (5.0 if feat.has_output else 0.0))
|
| 288 |
+
gaps = []
|
| 289 |
+
if feat.register_count > 8:
|
| 290 |
+
gaps.append("high_reg_count")
|
| 291 |
+
if feat.total_signals > 20:
|
| 292 |
+
gaps.append("high_signal_count")
|
| 293 |
+
return {
|
| 294 |
+
"coverage": {
|
| 295 |
+
"expected": round(base, 1),
|
| 296 |
+
"gaps": gaps,
|
| 297 |
+
"confidence": 0.5,
|
| 298 |
+
},
|
| 299 |
+
"recommended_sequences": self._recommend_sequences(feat, gaps),
|
| 300 |
+
}
|
| 301 |
+
|
| 302 |
+
def _predict_gaps(self, feat: SpecFeatures, coverage_pct: float) -> List[str]:
|
| 303 |
+
gaps = []
|
| 304 |
+
if coverage_pct < 60:
|
| 305 |
+
gaps.append("critical_low_coverage")
|
| 306 |
+
if feat.register_count > 16:
|
| 307 |
+
gaps.append("high_register_count")
|
| 308 |
+
if feat.total_signals > 30:
|
| 309 |
+
gaps.append("high_signal_count")
|
| 310 |
+
if feat.interface_count > 3:
|
| 311 |
+
gaps.append("multi_interface_coordination")
|
| 312 |
+
return gaps
|
| 313 |
+
|
| 314 |
+
def _recommend_sequences(self, feat: SpecFeatures, gaps: List[str]) -> List[str]:
|
| 315 |
+
seqs = ["uart_base_seq"]
|
| 316 |
+
if "critical_low_coverage" in gaps:
|
| 317 |
+
seqs.append("uart_coverage_seq")
|
| 318 |
+
if "high_register_count" in gaps:
|
| 319 |
+
seqs.append("uart_random_regs_seq")
|
| 320 |
+
if feat.total_signals > 0:
|
| 321 |
+
seqs.append("uart_loopback_seq")
|
| 322 |
+
if "multi_interface_coordination" in gaps:
|
| 323 |
+
seqs.append("uart_interrupt_seq")
|
| 324 |
+
return seqs
|
| 325 |
+
|
| 326 |
+
|
| 327 |
+
# Global singleton
|
| 328 |
+
coverage_predictor = CoveragePredictor()
|
src/models/enhanced_ml_model_v2.py
CHANGED
|
@@ -25,6 +25,7 @@ from typing import Any, Dict, List, Optional, Tuple, Set
|
|
| 25 |
|
| 26 |
from src.models.base_model import GenerationModel
|
| 27 |
from src.models.template_model import TemplateModel
|
|
|
|
| 28 |
from src.config import PipelineConfig, DesignSpec
|
| 29 |
|
| 30 |
try:
|
|
@@ -158,8 +159,14 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 158 |
self._pattern_learner: Optional[AdvancedPatternLearner] = None
|
| 159 |
self._rl_learner: Optional[AdvancedReinforcementLearner] = None
|
| 160 |
self._code_validator: Optional[AdvancedCodeValidator] = None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 161 |
|
| 162 |
self.last_retrieval: Optional[RetrievalInfo] = None
|
|
|
|
| 163 |
self._generation_history: List[Dict[str, Any]] = []
|
| 164 |
|
| 165 |
strategy_map = {
|
|
@@ -290,6 +297,15 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 290 |
selected_source=selected_source,
|
| 291 |
)
|
| 292 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 293 |
return final_result.files
|
| 294 |
|
| 295 |
def _get_available_sources(self) -> List[str]:
|
|
@@ -310,7 +326,7 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 310 |
protocol: str,
|
| 311 |
available_sources: List[str],
|
| 312 |
) -> GenerationSource:
|
| 313 |
-
"""Select generation strategy using advanced RL."""
|
| 314 |
if len(available_sources) == 1:
|
| 315 |
return GenerationSource(available_sources[0])
|
| 316 |
|
|
@@ -331,6 +347,18 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 331 |
)
|
| 332 |
source_scores[source] += value
|
| 333 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 334 |
if not source_scores:
|
| 335 |
return GenerationSource.TEMPLATE
|
| 336 |
|
|
@@ -361,6 +389,7 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 361 |
spec_dict=spec_dict,
|
| 362 |
config=config,
|
| 363 |
design_name=design_name,
|
|
|
|
| 364 |
)
|
| 365 |
else:
|
| 366 |
return self._generate_by_template(
|
|
@@ -484,13 +513,186 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 484 |
spec_dict: Dict[str, Any],
|
| 485 |
config: PipelineConfig,
|
| 486 |
design_name: str,
|
|
|
|
| 487 |
) -> GenerationResult:
|
| 488 |
-
"""
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 493 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 494 |
|
| 495 |
def _generate_by_template(
|
| 496 |
self,
|
|
@@ -569,7 +771,19 @@ class EnhancedMLGenerationModelV2(GenerationModel):
|
|
| 569 |
score = final_result.score
|
| 570 |
passed = final_result.validation_report.overall_passed if final_result.validation_report else (score >= 0.7)
|
| 571 |
|
| 572 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
|
| 574 |
used_source = (
|
| 575 |
final_result.source.value
|
|
|
|
| 25 |
|
| 26 |
from src.models.base_model import GenerationModel
|
| 27 |
from src.models.template_model import TemplateModel
|
| 28 |
+
from src.models.coverage_predictor import CoveragePredictor, SpecFeatures
|
| 29 |
from src.config import PipelineConfig, DesignSpec
|
| 30 |
|
| 31 |
try:
|
|
|
|
| 159 |
self._pattern_learner: Optional[AdvancedPatternLearner] = None
|
| 160 |
self._rl_learner: Optional[AdvancedReinforcementLearner] = None
|
| 161 |
self._code_validator: Optional[AdvancedCodeValidator] = None
|
| 162 |
+
self._coverage_predictor = CoveragePredictor(random_state=42)
|
| 163 |
+
try:
|
| 164 |
+
self._coverage_predictor.train_synthetic(n_samples=5000)
|
| 165 |
+
except Exception as e:
|
| 166 |
+
logger.warning("CoveragePredictor init failed: %s", e)
|
| 167 |
|
| 168 |
self.last_retrieval: Optional[RetrievalInfo] = None
|
| 169 |
+
self.last_coverage_prediction: Optional[Dict[str, Any]] = None
|
| 170 |
self._generation_history: List[Dict[str, Any]] = []
|
| 171 |
|
| 172 |
strategy_map = {
|
|
|
|
| 297 |
selected_source=selected_source,
|
| 298 |
)
|
| 299 |
|
| 300 |
+
# Surface coverage prediction
|
| 301 |
+
try:
|
| 302 |
+
self.last_coverage_prediction = self._coverage_predictor.predict_coverage(
|
| 303 |
+
spec, final_result.files
|
| 304 |
+
)
|
| 305 |
+
except Exception as e:
|
| 306 |
+
logger.debug("Coverage prediction failed: %s", e)
|
| 307 |
+
self.last_coverage_prediction = None
|
| 308 |
+
|
| 309 |
return final_result.files
|
| 310 |
|
| 311 |
def _get_available_sources(self) -> List[str]:
|
|
|
|
| 326 |
protocol: str,
|
| 327 |
available_sources: List[str],
|
| 328 |
) -> GenerationSource:
|
| 329 |
+
"""Select generation strategy using advanced RL + coverage prediction."""
|
| 330 |
if len(available_sources) == 1:
|
| 331 |
return GenerationSource(available_sources[0])
|
| 332 |
|
|
|
|
| 347 |
)
|
| 348 |
source_scores[source] += value
|
| 349 |
|
| 350 |
+
# Bias toward coverage-driven (LLM) for complex specs with many registers
|
| 351 |
+
try:
|
| 352 |
+
feat = SpecFeatures.from_spec(spec_dict)
|
| 353 |
+
coverage_hint = self._coverage_predictor.predict_coverage(spec_dict)
|
| 354 |
+
cov_pct = coverage_hint.get("coverage", {}).get("expected", 50)
|
| 355 |
+
if cov_pct < 60 and "llm" in available_sources:
|
| 356 |
+
source_scores["llm"] += 2.0
|
| 357 |
+
if feat.register_count > 8 and "retrieval" in available_sources:
|
| 358 |
+
source_scores["retrieval"] += 1.0
|
| 359 |
+
except Exception:
|
| 360 |
+
pass
|
| 361 |
+
|
| 362 |
if not source_scores:
|
| 363 |
return GenerationSource.TEMPLATE
|
| 364 |
|
|
|
|
| 389 |
spec_dict=spec_dict,
|
| 390 |
config=config,
|
| 391 |
design_name=design_name,
|
| 392 |
+
protocol=protocol,
|
| 393 |
)
|
| 394 |
else:
|
| 395 |
return self._generate_by_template(
|
|
|
|
| 513 |
spec_dict: Dict[str, Any],
|
| 514 |
config: PipelineConfig,
|
| 515 |
design_name: str,
|
| 516 |
+
protocol: str = "uart",
|
| 517 |
) -> GenerationResult:
|
| 518 |
+
"""
|
| 519 |
+
Coverage-driven hybrid generation.
|
| 520 |
+
Uses coverage prediction to enhance template output with
|
| 521 |
+
targeted sequences that close predicted coverage gaps.
|
| 522 |
+
"""
|
| 523 |
+
logger.info("Coverage-driven hybrid generation for '%s'", design_name)
|
| 524 |
+
|
| 525 |
+
base_result = self._generate_by_template(
|
| 526 |
+
spec=spec, config=config,
|
| 527 |
+
design_name=design_name, protocol=protocol,
|
| 528 |
+
)
|
| 529 |
+
if not base_result.files:
|
| 530 |
+
return base_result
|
| 531 |
+
|
| 532 |
+
try:
|
| 533 |
+
cov_pred = self._coverage_predictor.predict_coverage(
|
| 534 |
+
spec, base_result.files
|
| 535 |
+
)
|
| 536 |
+
gaps = cov_pred.get("coverage", {}).get("gaps", [])
|
| 537 |
+
recommended = cov_pred.get("recommended_sequences", [])
|
| 538 |
+
except Exception as e:
|
| 539 |
+
logger.warning("Coverage prediction skipped: %s", e)
|
| 540 |
+
gaps = []
|
| 541 |
+
recommended = []
|
| 542 |
+
|
| 543 |
+
if gaps:
|
| 544 |
+
logger.info("Predicted coverage gaps: %s — generating targeted sequences", gaps)
|
| 545 |
+
extra_seqs = self._generate_targeted_sequences(
|
| 546 |
+
spec_dict, recommended, design_name
|
| 547 |
+
)
|
| 548 |
+
base_result.files.update(extra_seqs)
|
| 549 |
+
|
| 550 |
+
base_result.source = GenerationSource.LLM
|
| 551 |
+
base_result.warnings.append(
|
| 552 |
+
f"Coverage-driven: predicted {len(gaps)} gap(s), "
|
| 553 |
+
f"added {len(recommended)} targeted sequence(s)"
|
| 554 |
+
)
|
| 555 |
+
return base_result
|
| 556 |
+
|
| 557 |
+
def _generate_targeted_sequences(
|
| 558 |
+
self,
|
| 559 |
+
spec_dict: Dict[str, Any],
|
| 560 |
+
recommended: List[str],
|
| 561 |
+
design_name: str,
|
| 562 |
+
) -> Dict[str, str]:
|
| 563 |
+
"""Generate SystemVerilog sequences targeting predicted coverage gaps."""
|
| 564 |
+
seqs = {}
|
| 565 |
+
interfaces = spec_dict.get("interfaces", [])
|
| 566 |
+
registers = spec_dict.get("registers", [])
|
| 567 |
+
|
| 568 |
+
for seq_name in recommended:
|
| 569 |
+
content = self._build_targeted_sequence(seq_name, design_name, interfaces, registers)
|
| 570 |
+
seqs[f"sequences/{seq_name}.sv"] = content
|
| 571 |
+
|
| 572 |
+
seqs[f"sequences/{design_name}_targeted_seq_lib.sv"] = self._build_seq_lib(
|
| 573 |
+
design_name, recommended
|
| 574 |
)
|
| 575 |
+
return seqs
|
| 576 |
+
|
| 577 |
+
def _build_targeted_sequence(
|
| 578 |
+
self,
|
| 579 |
+
seq_name: str,
|
| 580 |
+
design_name: str,
|
| 581 |
+
interfaces: List[Dict[str, Any]],
|
| 582 |
+
registers: List[Dict[str, Any]],
|
| 583 |
+
) -> str:
|
| 584 |
+
lines = [
|
| 585 |
+
f"// {seq_name} — auto-generated by coverage-driven hybrid generator",
|
| 586 |
+
f"// Target: {design_name} ({len(interfaces)} interfaces, {len(registers)} registers)",
|
| 587 |
+
"",
|
| 588 |
+
"`ifndef GUARD_{0}_SV".format(seq_name.upper()),
|
| 589 |
+
"`define GUARD_{0}_SV".format(seq_name.upper()),
|
| 590 |
+
"",
|
| 591 |
+
f'class {seq_name} extends uvm_sequence #(uvm_sequence_item);',
|
| 592 |
+
f" `uvm_object_utils({seq_name})",
|
| 593 |
+
"",
|
| 594 |
+
f" function new(string name = \"{seq_name}\");",
|
| 595 |
+
" super.new(name);",
|
| 596 |
+
" endfunction",
|
| 597 |
+
"",
|
| 598 |
+
" extern virtual task body();",
|
| 599 |
+
"endclass",
|
| 600 |
+
"",
|
| 601 |
+
]
|
| 602 |
+
|
| 603 |
+
body_lines = [
|
| 604 |
+
f"task {seq_name}::body();",
|
| 605 |
+
]
|
| 606 |
+
|
| 607 |
+
if "coverage" in seq_name:
|
| 608 |
+
body_lines.extend([
|
| 609 |
+
" `uvm_info(get_type_name(), \"Starting coverage collection sequence\", UVM_MEDIUM)",
|
| 610 |
+
])
|
| 611 |
+
for i, iface in enumerate(interfaces[:3]):
|
| 612 |
+
body_lines.append(
|
| 613 |
+
f" // Coverage transactions for interface: {iface.get('name', f'iface_{i}')}"
|
| 614 |
+
)
|
| 615 |
+
if registers:
|
| 616 |
+
body_lines.append(" // Random register access for coverage closure")
|
| 617 |
+
body_lines.extend([
|
| 618 |
+
" repeat (50) begin",
|
| 619 |
+
" req = uvm_sequence_item::type_id::create(\"req\");",
|
| 620 |
+
" start_item(req);",
|
| 621 |
+
" assert(req.randomize());",
|
| 622 |
+
" finish_item(req);",
|
| 623 |
+
" end",
|
| 624 |
+
])
|
| 625 |
+
|
| 626 |
+
elif "random_regs" in seq_name:
|
| 627 |
+
body_lines.extend([
|
| 628 |
+
" `uvm_info(get_type_name(), \"Starting random register sequence\", UVM_MEDIUM)",
|
| 629 |
+
])
|
| 630 |
+
for r in registers[:8]:
|
| 631 |
+
body_lines.append(
|
| 632 |
+
f" // Register: {r.get('name', 'reg')} @ 0x{r.get('address', 0):04x}"
|
| 633 |
+
)
|
| 634 |
+
body_lines.extend([
|
| 635 |
+
" repeat (100) begin",
|
| 636 |
+
" // Random read/write to registers",
|
| 637 |
+
" #10ns;",
|
| 638 |
+
" end",
|
| 639 |
+
])
|
| 640 |
+
|
| 641 |
+
elif "loopback" in seq_name:
|
| 642 |
+
body_lines.extend([
|
| 643 |
+
" `uvm_info(get_type_name(), \"Starting loopback validation\", UVM_MEDIUM)",
|
| 644 |
+
])
|
| 645 |
+
for iface in interfaces[:2]:
|
| 646 |
+
iname = iface.get("name", "iface")
|
| 647 |
+
body_lines.append(f" // Loopback transactions on {iname}")
|
| 648 |
+
body_lines.extend([
|
| 649 |
+
" repeat (20) begin",
|
| 650 |
+
" // Drive TX, expect RX match",
|
| 651 |
+
" #5ns;",
|
| 652 |
+
" end",
|
| 653 |
+
])
|
| 654 |
+
|
| 655 |
+
elif "interrupt" in seq_name:
|
| 656 |
+
body_lines.extend([
|
| 657 |
+
" `uvm_info(get_type_name(), \"Starting interrupt test sequence\", UVM_MEDIUM)",
|
| 658 |
+
])
|
| 659 |
+
body_lines.extend([
|
| 660 |
+
" // Enable interrupts",
|
| 661 |
+
" // Trigger each interrupt source",
|
| 662 |
+
" // Verify interrupt assertion",
|
| 663 |
+
" fork",
|
| 664 |
+
" begin",
|
| 665 |
+
" // Timeout watchdog",
|
| 666 |
+
" #1ms;",
|
| 667 |
+
" `uvm_error(get_type_name(), \"Interrupt timeout\")",
|
| 668 |
+
" end",
|
| 669 |
+
" begin",
|
| 670 |
+
" // Wait for interrupt",
|
| 671 |
+
" // Check status register",
|
| 672 |
+
" end",
|
| 673 |
+
" join_any",
|
| 674 |
+
])
|
| 675 |
+
|
| 676 |
+
else:
|
| 677 |
+
body_lines.append(
|
| 678 |
+
f" // Generic sequence: {seq_name}"
|
| 679 |
+
)
|
| 680 |
+
body_lines.append(" #10ns;")
|
| 681 |
+
|
| 682 |
+
body_lines.append("endtask")
|
| 683 |
+
body_lines.append("")
|
| 684 |
+
|
| 685 |
+
return "\n".join(lines) + "\n".join(body_lines)
|
| 686 |
+
|
| 687 |
+
def _build_seq_lib(self, design_name: str, seq_names: List[str]) -> str:
|
| 688 |
+
lines = [
|
| 689 |
+
f"// {design_name}_targeted_seq_lib — coverage-driven sequence library",
|
| 690 |
+
"",
|
| 691 |
+
]
|
| 692 |
+
for name in seq_names:
|
| 693 |
+
lines.append(f'`include "{name}.sv"')
|
| 694 |
+
lines.append("")
|
| 695 |
+
return "\n".join(lines)
|
| 696 |
|
| 697 |
def _generate_by_template(
|
| 698 |
self,
|
|
|
|
| 771 |
score = final_result.score
|
| 772 |
passed = final_result.validation_report.overall_passed if final_result.validation_report else (score >= 0.7)
|
| 773 |
|
| 774 |
+
# Coverage-shaped reward: bonus for high predicted coverage
|
| 775 |
+
cov_bonus = 0.0
|
| 776 |
+
if self.last_coverage_prediction:
|
| 777 |
+
cov_pct = self.last_coverage_prediction.get("coverage", {}).get("expected", 50)
|
| 778 |
+
if cov_pct >= 80:
|
| 779 |
+
cov_bonus = 0.3
|
| 780 |
+
elif cov_pct >= 60:
|
| 781 |
+
cov_bonus = 0.1
|
| 782 |
+
elif cov_pct < 40:
|
| 783 |
+
cov_bonus = -0.2
|
| 784 |
+
|
| 785 |
+
reward = (1.0 if passed else -0.5) + cov_bonus
|
| 786 |
+
reward = max(-1.0, min(1.0, reward))
|
| 787 |
|
| 788 |
used_source = (
|
| 789 |
final_result.source.value
|
src/pipeline.py
CHANGED
|
@@ -170,6 +170,12 @@ class TBPipeline:
|
|
| 170 |
all_generated.update(generated)
|
| 171 |
self.logger.info("Generated %d files (total %d)", len(generated), len(all_generated))
|
| 172 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 173 |
# 6b. Evaluate static metrics (against all accumulated files)
|
| 174 |
eval_metrics = self.metrics_calc.evaluate_all(
|
| 175 |
design_spec, list(all_generated.keys()),
|
|
@@ -273,6 +279,9 @@ class TBPipeline:
|
|
| 273 |
# 8. Coverage trend
|
| 274 |
trend = self.registry.coverage_trend() if auto_train.enabled else []
|
| 275 |
|
|
|
|
|
|
|
|
|
|
| 276 |
return {
|
| 277 |
"design_name": design_spec.design_name,
|
| 278 |
"generated_files": all_generated,
|
|
@@ -292,6 +301,7 @@ class TBPipeline:
|
|
| 292 |
"gaps": [{"bin": g.bin_name, "addr": g.register_addr, "dir": g.direction}
|
| 293 |
for g in (self.coverage_analysis.gaps if self.coverage_analysis else [])],
|
| 294 |
} if self.coverage_analysis else None,
|
|
|
|
| 295 |
}
|
| 296 |
|
| 297 |
|
|
|
|
| 170 |
all_generated.update(generated)
|
| 171 |
self.logger.info("Generated %d files (total %d)", len(generated), len(all_generated))
|
| 172 |
|
| 173 |
+
# 6a1. Collect coverage prediction from model (if available)
|
| 174 |
+
cov_prediction = getattr(self.model, 'last_coverage_prediction', None)
|
| 175 |
+
if cov_prediction:
|
| 176 |
+
cov_expected = cov_prediction.get("coverage", {}).get("expected", 0)
|
| 177 |
+
self.logger.info("ML coverage prediction: %.1f%%", cov_expected)
|
| 178 |
+
|
| 179 |
# 6b. Evaluate static metrics (against all accumulated files)
|
| 180 |
eval_metrics = self.metrics_calc.evaluate_all(
|
| 181 |
design_spec, list(all_generated.keys()),
|
|
|
|
| 279 |
# 8. Coverage trend
|
| 280 |
trend = self.registry.coverage_trend() if auto_train.enabled else []
|
| 281 |
|
| 282 |
+
# Collect ML coverage prediction from model
|
| 283 |
+
ml_cov_prediction = getattr(self.model, 'last_coverage_prediction', None)
|
| 284 |
+
|
| 285 |
return {
|
| 286 |
"design_name": design_spec.design_name,
|
| 287 |
"generated_files": all_generated,
|
|
|
|
| 301 |
"gaps": [{"bin": g.bin_name, "addr": g.register_addr, "dir": g.direction}
|
| 302 |
for g in (self.coverage_analysis.gaps if self.coverage_analysis else [])],
|
| 303 |
} if self.coverage_analysis else None,
|
| 304 |
+
"ml_coverage_prediction": ml_cov_prediction,
|
| 305 |
}
|
| 306 |
|
| 307 |
|