Spaces:

skumar889
/

semiconductor-pipeline

Sleeping

App Files Files Community

Sai Kumar Taraka commited on 12 days ago

Commit

6e4b4a4

1 Parent(s): 2cef8a9

Production-level model enhancements: fix strategy selection bug, add retry/health/request_id/validation, rewrite tests with pytest assertions, harden cache

Browse files

Files changed (2) hide show

src/models/enhanced_ml_model_v2.py +142 -19
tests/test_advanced_ml_v2.py +278 -423

src/models/enhanced_ml_model_v2.py CHANGED Viewed

@@ -33,6 +33,36 @@ from src.models.template_model import TemplateModel
 from src.models.coverage_predictor import CoveragePredictor, SpecFeatures
 from src.config import PipelineConfig, DesignSpec
 try:
     from src.features.extractors import RichSpecFeatureExtractor
     from src.models.similarity_index import SimilarityIndex, SearchResult
@@ -158,35 +188,65 @@ class MetricsTracker:
 class GenerationCache:
-    """Spec-driven cache with content-addressable keys."""
-    def __init__(self, ttl_seconds: int = 3600):
         self._cache: Dict[str, Tuple[float, Dict[str, str]]] = {}
         self._ttl = ttl_seconds
     def _make_key(self, spec_dict: Dict[str, Any], protocol: str) -> str:
         raw = json.dumps(spec_dict, sort_keys=True, default=str)
         return hashlib.md5(raw.encode()).hexdigest() + f"@{protocol}"
     def get(self, spec_dict: Dict[str, Any], protocol: str) -> Optional[Dict[str, str]]:
         key = self._make_key(spec_dict, protocol)
         if key in self._cache:
             timestamp, files = self._cache[key]
             if time.time() - timestamp < self._ttl:
                 return files
             del self._cache[key]
         return None
     def set(self, spec_dict: Dict[str, Any], protocol: str, files: Dict[str, str]) -> None:
         key = self._make_key(spec_dict, protocol)
         self._cache[key] = (time.time(), files)
     def invalidate(self, spec_dict: Dict[str, Any], protocol: str) -> None:
         key = self._make_key(spec_dict, protocol)
         self._cache.pop(key, None)
     def clear(self) -> None:
         self._cache.clear()
 class EnhancedMLGenerationModelV2(GenerationModel):
@@ -326,11 +386,18 @@ class EnhancedMLGenerationModelV2(GenerationModel):
         spec: DesignSpec,
         cfg: PipelineConfig,
         extra_seqs: Optional[List[str]] = None,
     ) -> Dict[str, str]:
         if not HAS_ADVANCED:
             return self._template_model.predict(spec, cfg)
         spec_dict = spec.model_dump() if hasattr(spec, 'model_dump') else dict(spec)
         design_name = spec.design_name
         protocol = spec_dict.get("protocol", "unknown")
@@ -338,18 +405,21 @@ class EnhancedMLGenerationModelV2(GenerationModel):
         if self._cache:
             cached = self._cache.get(spec_dict, protocol)
             if cached is not None:
-                logger.info("Cache hit for %s@%s", design_name, protocol)
                 return cached
         # Build validator
         self._code_validator = AdvancedCodeValidator(spec_dict)
         available_sources = self._get_available_sources()
         start_time = time.time()
         # Ensemble: run top-K strategies concurrently
         selected = self._select_generation_strategy(spec_dict, protocol, available_sources)
         strategies_to_run = self._get_strategy_plan(selected, available_sources)
         results: List[GenerationResult] = []
         with ThreadPoolExecutor(max_workers=min(self._max_concurrent, len(strategies_to_run))) as executor:
@@ -391,8 +461,10 @@ class EnhancedMLGenerationModelV2(GenerationModel):
         # Coverage prediction (lazy-trained by CoveragePredictor on first call)
         try:
             self.last_coverage_prediction = self._coverage_predictor.predict_coverage(spec, final_result.files)
         except Exception as e:
-            logger.debug("Coverage prediction failed: %s", e)
             self.last_coverage_prediction = None
         # Store last result for learn() / generate() introspect
@@ -440,17 +512,33 @@ class EnhancedMLGenerationModelV2(GenerationModel):
         self,
         spec_dict: Dict[str, Any],
         cfg: Optional[PipelineConfig] = None,
     ) -> Dict[str, Any]:
         """Public API: generate from raw spec dict (test-compatible interface).
         Returns a rich result dict with ``passed``, ``generated_files``,
         ``source``, ``strategy``, and ``validation_results``.
         """
         try:
             spec = DesignSpec(**self._coerce_spec_dict(spec_dict))
         except Exception as e:
-            logger.error("Failed to build DesignSpec from dict: %s", e)
-            return {"passed": False, "generated_files": {}, "source": "error", "strategy": "error"}
         # Auto-train template model if not yet trained
         if not self._template_model._is_trained:
@@ -477,7 +565,7 @@ class EnhancedMLGenerationModelV2(GenerationModel):
                 ),
             )
-        files = self.predict(spec, cfg)
         # Build result dict from stored generation result
         gen = getattr(self, '_last_generation_result', None)
@@ -488,6 +576,7 @@ class EnhancedMLGenerationModelV2(GenerationModel):
             "generated_files": files,
             "source": gen.source.value if gen else "template",
             "strategy": gen.strategy_used if gen else "template",
         }
         # Attach validation results if available
@@ -641,10 +730,6 @@ class EnhancedMLGenerationModelV2(GenerationModel):
         if len(available_sources) == 1:
             return GenerationSource(available_sources[0])
-        # Ensemble mode: run top strategies concurrently
-        if len(available_sources) >= 2:
-            return GenerationSource.ENSEMBLE
         if not self._use_learning or not self._rl_learner:
             if "retrieval" in available_sources and self._index and len(self._index) > 0:
                 return GenerationSource.RETRIEVAL
@@ -666,8 +751,8 @@ class EnhancedMLGenerationModelV2(GenerationModel):
                 source_scores["llm"] += 2.0
             if feat.register_count > 8 and "retrieval" in source_scores:
                 source_scores["retrieval"] += 1.0
-        except Exception:
-            pass
         if not source_scores:
             return GenerationSource.TEMPLATE
@@ -683,12 +768,22 @@ class EnhancedMLGenerationModelV2(GenerationModel):
         design_name: str,
         protocol: str,
     ) -> GenerationResult:
-        if strategy == "retrieval":
-            return self._generate_by_retrieval(spec, spec_dict, config, design_name, protocol)
-        elif strategy == "llm" and self._use_llm:
-            return self._generate_by_llm(spec, spec_dict, config, design_name, protocol)
-        else:
-            return self._generate_by_template(spec, config, design_name, protocol)
     def _generate_by_retrieval(
         self, spec: DesignSpec, spec_dict: Dict[str, Any], config: PipelineConfig,
@@ -988,6 +1083,34 @@ class EnhancedMLGenerationModelV2(GenerationModel):
             stats["pattern_learner"] = self._pattern_learner.get_suggestions(file_type="any", protocol="any")
         return stats
     def invalidate_cache(self, spec: Optional[DesignSpec] = None) -> None:
         if not self._cache:
             return

 from src.models.coverage_predictor import CoveragePredictor, SpecFeatures
 from src.config import PipelineConfig, DesignSpec
+def _retry_with_backoff(
+    fn, max_retries: int = 3, base_delay: float = 0.5, backoff: float = 2.0,
+) -> Any:
+    """Execute fn with exponential backoff on transient failures."""
+    import functools
+    last_exc = None
+    for attempt in range(max_retries):
+        try:
+            return fn()
+        except (ConnectionError, TimeoutError, OSError) as e:
+            last_exc = e
+            if attempt < max_retries - 1:
+                delay = base_delay * (backoff ** attempt)
+                logger.warning("Transient failure (attempt %d/%d): %s — retrying in %.1fs", attempt + 1, max_retries, e, delay)
+                time.sleep(delay)
+    raise last_exc  # type: ignore[misc]
+def _validate_spec_dict(spec_dict: Dict[str, Any]) -> None:
+    """Validate spec dict has required fields before generation."""
+    if not isinstance(spec_dict, dict):
+        raise TypeError(f"Expected dict, got {type(spec_dict).__name__}")
+    if "design_name" not in spec_dict:
+        raise ValueError("spec_dict must contain 'design_name'")
+    if "protocol" not in spec_dict:
+        logger.warning("spec_dict missing 'protocol', defaulting to 'unknown'")
+    if not spec_dict.get("design_name"):
+        raise ValueError("'design_name' must be a non-empty string")
 try:
     from src.features.extractors import RichSpecFeatureExtractor
     from src.models.similarity_index import SimilarityIndex, SearchResult
 class GenerationCache:
+    """Spec-driven cache with content-addressable keys, TTL, and size limits."""
+    def __init__(self, ttl_seconds: int = 3600, max_entries: int = 256):
         self._cache: Dict[str, Tuple[float, Dict[str, str]]] = {}
         self._ttl = ttl_seconds
+        self._max_entries = max_entries
+        self._access_order: List[str] = []
     def _make_key(self, spec_dict: Dict[str, Any], protocol: str) -> str:
         raw = json.dumps(spec_dict, sort_keys=True, default=str)
         return hashlib.md5(raw.encode()).hexdigest() + f"@{protocol}"
+    def _evict_if_needed(self) -> None:
+        if len(self._cache) > self._max_entries:
+            over = len(self._cache) - self._max_entries
+            for _ in range(over):
+                if self._access_order:
+                    oldest = self._access_order.pop(0)
+                    self._cache.pop(oldest, None)
+    def _clean_expired(self) -> None:
+        now = time.time()
+        expired = [k for k, (ts, _) in self._cache.items() if now - ts >= self._ttl]
+        for k in expired:
+            del self._cache[k]
+            if k in self._access_order:
+                self._access_order.remove(k)
     def get(self, spec_dict: Dict[str, Any], protocol: str) -> Optional[Dict[str, str]]:
         key = self._make_key(spec_dict, protocol)
         if key in self._cache:
             timestamp, files = self._cache[key]
             if time.time() - timestamp < self._ttl:
+                if key in self._access_order:
+                    self._access_order.remove(key)
+                self._access_order.append(key)
                 return files
             del self._cache[key]
+            if key in self._access_order:
+                self._access_order.remove(key)
         return None
     def set(self, spec_dict: Dict[str, Any], protocol: str, files: Dict[str, str]) -> None:
         key = self._make_key(spec_dict, protocol)
         self._cache[key] = (time.time(), files)
+        if key in self._access_order:
+            self._access_order.remove(key)
+        self._access_order.append(key)
+        self._evict_if_needed()
     def invalidate(self, spec_dict: Dict[str, Any], protocol: str) -> None:
         key = self._make_key(spec_dict, protocol)
         self._cache.pop(key, None)
+        if key in self._access_order:
+            self._access_order.remove(key)
     def clear(self) -> None:
         self._cache.clear()
+        self._access_order.clear()
 class EnhancedMLGenerationModelV2(GenerationModel):
         spec: DesignSpec,
         cfg: PipelineConfig,
         extra_seqs: Optional[List[str]] = None,
+        request_id: Optional[str] = None,
     ) -> Dict[str, str]:
         if not HAS_ADVANCED:
             return self._template_model.predict(spec, cfg)
+        rid = request_id or f"gen_{int(time.time() * 1000)}_{id(spec)}"
+        def _log(msg: str, *args: Any) -> None:
+            logger.info("[%s] %s", rid, msg % args if args else msg)
+        _log("Starting prediction for %s", spec.design_name)
         spec_dict = spec.model_dump() if hasattr(spec, 'model_dump') else dict(spec)
+        _validate_spec_dict(spec_dict)
         design_name = spec.design_name
         protocol = spec_dict.get("protocol", "unknown")
         if self._cache:
             cached = self._cache.get(spec_dict, protocol)
             if cached is not None:
+                _log("Cache hit for %s@%s", design_name, protocol)
                 return cached
         # Build validator
         self._code_validator = AdvancedCodeValidator(spec_dict)
         available_sources = self._get_available_sources()
+        _log("Available sources: %s", available_sources)
         start_time = time.time()
         # Ensemble: run top-K strategies concurrently
         selected = self._select_generation_strategy(spec_dict, protocol, available_sources)
         strategies_to_run = self._get_strategy_plan(selected, available_sources)
+        _log("Selected strategy=%s, plan=%s", selected.value, strategies_to_run)
         results: List[GenerationResult] = []
         with ThreadPoolExecutor(max_workers=min(self._max_concurrent, len(strategies_to_run))) as executor:
         # Coverage prediction (lazy-trained by CoveragePredictor on first call)
         try:
             self.last_coverage_prediction = self._coverage_predictor.predict_coverage(spec, final_result.files)
+            if self.last_coverage_prediction:
+                _log("Coverage prediction: %.1f%% expected", self.last_coverage_prediction.get("coverage", {}).get("expected", 0))
         except Exception as e:
+            logger.warning("[%s] Coverage prediction failed: %s", rid, e)
             self.last_coverage_prediction = None
         # Store last result for learn() / generate() introspect
         self,
         spec_dict: Dict[str, Any],
         cfg: Optional[PipelineConfig] = None,
+        request_id: Optional[str] = None,
     ) -> Dict[str, Any]:
         """Public API: generate from raw spec dict (test-compatible interface).
         Returns a rich result dict with ``passed``, ``generated_files``,
         ``source``, ``strategy``, and ``validation_results``.
+        Parameters
+        ----------
+        spec_dict : Dict[str, Any]
+            Specification dictionary with at minimum ``design_name`` and ``protocol``.
+        cfg : Optional[PipelineConfig]
+            Generation pipeline configuration. Auto-created from stored config if None.
+        request_id : Optional[str]
+            Correlation ID for request tracing across logs.
         """
+        rid = request_id or f"gen_{int(time.time() * 1000)}_{id(spec_dict)}"
+        try:
+            _validate_spec_dict(spec_dict)
+        except (TypeError, ValueError) as e:
+            logger.error("[%s] Input validation failed: %s", rid, e)
+            return {"passed": False, "generated_files": {}, "source": "error", "strategy": "error", "request_id": rid}
         try:
             spec = DesignSpec(**self._coerce_spec_dict(spec_dict))
         except Exception as e:
+            logger.error("[%s] Failed to build DesignSpec from dict: %s", rid, e)
+            return {"passed": False, "generated_files": {}, "source": "error", "strategy": "error", "request_id": rid}
         # Auto-train template model if not yet trained
         if not self._template_model._is_trained:
                 ),
             )
+        files = self.predict(spec, cfg, request_id=rid)
         # Build result dict from stored generation result
         gen = getattr(self, '_last_generation_result', None)
             "generated_files": files,
             "source": gen.source.value if gen else "template",
             "strategy": gen.strategy_used if gen else "template",
+            "request_id": rid,
         }
         # Attach validation results if available
         if len(available_sources) == 1:
             return GenerationSource(available_sources[0])
         if not self._use_learning or not self._rl_learner:
             if "retrieval" in available_sources and self._index and len(self._index) > 0:
                 return GenerationSource.RETRIEVAL
                 source_scores["llm"] += 2.0
             if feat.register_count > 8 and "retrieval" in source_scores:
                 source_scores["retrieval"] += 1.0
+        except Exception as e:
+            logger.debug("Coverage hint in strategy selection failed: %s", e)
         if not source_scores:
             return GenerationSource.TEMPLATE
         design_name: str,
         protocol: str,
     ) -> GenerationResult:
+        def _exec() -> GenerationResult:
+            if strategy == "retrieval":
+                return self._generate_by_retrieval(spec, spec_dict, config, design_name, protocol)
+            elif strategy == "llm" and self._use_llm:
+                return self._generate_by_llm(spec, spec_dict, config, design_name, protocol)
+            else:
+                return self._generate_by_template(spec, config, design_name, protocol)
+        try:
+            return _retry_with_backoff(_exec, max_retries=2, base_delay=0.25)
+        except Exception as e:
+            logger.error("Strategy %s failed after retries: %s", strategy, e)
+            return GenerationResult(
+                source=GenerationSource.TEMPLATE,
+                errors=[f"Strategy {strategy} failed after retries: {e}"],
+                strategy_used=strategy,
+            )
     def _generate_by_retrieval(
         self, spec: DesignSpec, spec_dict: Dict[str, Any], config: PipelineConfig,
             stats["pattern_learner"] = self._pattern_learner.get_suggestions(file_type="any", protocol="any")
         return stats
+    def get_health_status(self) -> Dict[str, Any]:
+        """Return health status for production monitoring / readiness probes."""
+        components = {
+            "template_model": self._template_model is not None,
+            "similarity_index": self._index is not None and len(self._index) > 0,
+            "feature_extractor": self._extractor is not None,
+            "spec_adapter": self._adapter is not None,
+            "code_validator": self._code_validator is not None,
+            "rl_learner": self._rl_learner is not None,
+            "pattern_learner": self._pattern_learner is not None,
+            "coverage_predictor": self._coverage_predictor is not None,
+        }
+        all_ok = all(components.values())
+        return {
+            "status": "healthy" if all_ok else "degraded",
+            "version": self._model_version,
+            "components": components,
+            "index_size": len(self._index) if self._index else 0,
+            "cache_enabled": self._enable_caching,
+            "use_learning": self._use_learning,
+            "use_llm": self._use_llm,
+            "exploration_strategy": self._exploration_strategy.value if self._exploration_strategy else None,
+            "total_generations": len(self._generation_history),
+            "rl_converged": self._rl_learner.is_converged() if self._rl_learner else None,
+            "quality_threshold": self._quality_threshold,
+            "max_concurrent_strategies": self._max_concurrent,
+        }
     def invalidate_cache(self, spec: Optional[DesignSpec] = None) -> None:
         if not self._cache:
             return

tests/test_advanced_ml_v2.py CHANGED Viewed

@@ -1,477 +1,332 @@
 """
-Test script for Advanced ML V2 Model
-Tests: RL strategies, experience replay, eligibility traces, pattern learning, deep validation
 """
 import sys
 import os
 import tempfile
 import yaml
 repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, repo_root)
-from src.models.enhanced_ml_model_v2 import EnhancedMLGenerationModelV2
-from src.config import PipelineConfig, MLConfig, AutoTrainConfig, GenerationConfig
 TEST_SPEC = """
 design_name: uart
 clock_reset:
   clock: clk
   reset: rst_n
 interfaces:
   - name: wb
     signals:
-      - name: wb_cyc
-        direction: input
-      - name: wb_stb
-        direction: input
-      - name: wb_we
-        direction: input
-      - name: wb_addr
-        direction: input
-        width: 3
-      - name: wb_data_o
-        direction: output
-        width: 8
-      - name: wb_data_i
-        direction: input
-        width: 8
-      - name: wb_ack
-        direction: output
   - name: uart
     signals:
-      - name: uart_tx
-        direction: output
-      - name: uart_rx
-        direction: input
-      - name: cts_n
-        direction: input
-      - name: rts_n
-        direction: output
-      - name: uart_intr
-        direction: output
 registers:
   - name: RBR_THR
     address: 0x0
     description: Receiver Buffer / Transmitter Holding
     fields:
-      - name: data
-        bits: 7:0
   - name: IER
     address: 0x1
     description: Interrupt Enable
     fields:
-      - name: erbfi
-        bits: '0'
-        description: Enable RX data available interrupt
-      - name: etbei
-        bits: '1'
-        description: Enable TX holding register empty interrupt
   - name: LCR
     address: 0x3
     description: Line Control
     fields:
-      - name: wls
-        bits: 1:0
-        description: Word length select
-      - name: dlab
-        bits: '7'
-        description: Divisor latch access bit
   - name: LSR
     address: 0x5
     description: Line Status
     fields:
-      - name: dr
-        bits: '0'
-        description: Data Ready
-      - name: thre
-        bits: '5'
-        description: TX Holding Register Empty
 protocol: uart
 """
-def test_rl_strategies():
-    """Test all RL exploration strategies."""
-    print("\n" + "="*60)
-    print("Testing RL Exploration Strategies")
-    print("="*60)
-    strategies = ["epsilon_greedy", "softmax", "ucb", "thompson"]
-    results = {}
-    for strategy in strategies:
-        print(f"\n--- Testing {strategy} strategy ---")
-        cfg = PipelineConfig(
-            ml=MLConfig(
-                enabled=True,
-                model_type="v2",
-                exploration_strategy=strategy,
-                use_llm=False,
-                use_semantic_encoder=False,
-                use_learning=True,
-                learning_storage_path=None
-            )
-        )
-        model = EnhancedMLGenerationModelV2(cfg)
-        spec_dict = yaml.safe_load(TEST_SPEC)
-        result = model.generate(spec_dict)
-        passed = result['passed']
-        generated_files = result.get('generated_files', {})
-        print(f"  Passed: {passed}")
-        print(f"  Files generated: {len(generated_files)}")
-        print(f"  Source: {result.get('source', 'unknown')}")
-        print(f"  Strategy used: {result.get('strategy', 'unknown')}")
-        if hasattr(model, '_rl_learner'):
-            rl_stats = model._rl_learner.get_performance_stats()
-            print(f"  RL episodes: {rl_stats.get('episode_count', 0)}")
-            print(f"  RL total updates: {rl_stats.get('total_updates', 0)}")
-        results[strategy] = {
-            "passed": passed,
-            "files_count": len(generated_files),
-            "source": result.get('source', 'unknown'),
-            "strategy": result.get('strategy', 'unknown')
-        }
-    print("\n--- Strategy Results Summary ---")
-    for strategy, res in results.items():
-        status = "✅" if res["passed"] else "❌"
-        print(f"  {status} {strategy}: {res['files_count']} files, source={res['source']}, strategy={res['strategy']}")
-    return all(r["passed"] for r in results.values())
-def test_experience_replay():
-    """Test experience replay buffer and eligibility traces."""
-    print("\n" + "="*60)
-    print("Testing Experience Replay & Eligibility Traces")
-    print("="*60)
-    cfg = PipelineConfig(
         ml=MLConfig(
-            enabled=True,
-            model_type="v2",
-            exploration_strategy="ucb",
-            use_llm=False,
-            use_semantic_encoder=False,
-            use_learning=True,
-            learning_storage_path=None
         )
     )
-    model = EnhancedMLGenerationModelV2(cfg)
-    spec_dict = yaml.safe_load(TEST_SPEC)
-    print("  Running multiple generations to populate replay buffer...")
-    for i in range(5):
         result = model.generate(spec_dict)
-        print(f"    Generation {i+1}: passed={result['passed']}, source={result.get('source', 'unknown')}")
-        reward = 1.0 if result['passed'] else 0.0
-        model.learn(result, reward)
-    if hasattr(model, '_rl_learner'):
-        rl = model._rl_learner
-        print(f"\n  Experience replay buffer size: {len(rl._replay_buffer)}")
-        print(f"  Episode count: {rl.get_performance_stats().get('episode_count', 0)}")
-        if hasattr(rl, '_eligibility_traces') and rl._eligibility_traces:
-            print(f"  Eligibility traces tracked: {len(rl._eligibility_traces)}")
-        state_stats = rl.get_state_stats()
-        print(f"\n  State statistics (first 3):")
-        for state, stats in list(state_stats.items())[:3]:
-            print(f"    '{state}': best_action='{stats.get('best_action', 'N/A')}', Q={stats.get('best_q_value', 0):.3f}, visits={stats.get('visit_count', 0)}")
-        return len(rl._replay_buffer) > 0
-    return False
-def test_pattern_learner():
-    """Test advanced pattern learning."""
-    print("\n" + "="*60)
-    print("Testing Advanced Pattern Learner")
-    print("="*60)
-    cfg = PipelineConfig(
-        ml=MLConfig(
-            enabled=True,
-            model_type="v2",
-            exploration_strategy="ucb",
-            use_llm=False,
-            use_semantic_encoder=False,
-            use_learning=True,
-            learning_storage_path=None
-        )
-    )
-    model = EnhancedMLGenerationModelV2(cfg)
-    spec_dict = yaml.safe_load(TEST_SPEC)
-    print("  Running generations for pattern learning...")
-    for i in range(3):
         result = model.generate(spec_dict)
-        reward = 1.0 if result['passed'] else 0.0
         model.learn(result, reward)
-    if hasattr(model, '_pattern_learner'):
-        pl = model._pattern_learner
-        stats = pl.get_statistics()
-        print(f"\n  Pattern Learner Stats:")
-        print(f"    Total specs seen: {stats['total_specs_seen']}")
-        print(f"    Total generations: {stats['total_generations']}")
-        print(f"    Average score: {stats['avg_score']:.3f}")
-        print(f"    N-gram vocabulary size: {len(stats['ngram_vocab'])}")
-        print(f"    Association rules: {len(stats['association_rules'])}")
-        recs = pl.get_recommendations(spec_dict)
-        print(f"\n  Recommendations for current spec:")
-        for rec in recs[:5]:
-            print(f"    • {rec}")
-        common = pl.get_common_error_patterns(top_n=5)
-        if common:
-            print(f"\n  Common error patterns:")
-            for pattern, count in common:
-                print(f"    • '{pattern}': {count} occurrences")
-        return True
-    return False
-def test_deep_validation():
-    """Test deep UVM compliance validation."""
-    print("\n" + "="*60)
-    print("Testing Deep UVM Compliance Validation")
-    print("="*60)
-    cfg = PipelineConfig(
-        ml=MLConfig(
-            enabled=True,
-            model_type="v2",
-            exploration_strategy="ucb",
-            use_llm=False,
-            use_semantic_encoder=False,
-            use_learning=True,
-            strict_validation=True,
-            learning_storage_path=None
-        )
-    )
-    model = EnhancedMLGenerationModelV2(cfg)
-    spec_dict = yaml.safe_load(TEST_SPEC)
-    result = model.generate(spec_dict)
-    print(f"\n  Generated files: {len(result.get('generated_files', {}))}")
-    print(f"  Passed: {result['passed']}")
-    val_results = result.get('validation_results', {})
-    if val_results:
-        print(f"\n  Validation Results:")
-        total_checks = 0
-        total_passed = 0
-        for file_path, file_result in val_results.items():
-            file_name = os.path.basename(file_path)
-            checks = file_result.get('checks', [])
-            if checks:
-                print(f"\n    {file_name}:")
-                for check in checks:
-                    total_checks += 1
-                    status = "✅" if check.get('passed', False) else "❌"
-                    if check.get('passed'):
-                        total_passed += 1
-                    msg = f"      {status} {check.get('check_name', 'unknown')}"
-                    if check.get('message'):
-                        msg += f": {check['message']}"
-                    print(msg)
-        if total_checks > 0:
-            pass_rate = (total_passed / total_checks) * 100
-            print(f"\n  Overall validation pass rate: {pass_rate:.1f}% ({total_passed}/{total_checks})")
-        return total_checks > 0
-    return False
-def test_learning_persistence():
-    """Test saving and loading learning state."""
-    print("\n" + "="*60)
-    print("Testing Learning State Persistence")
-    print("="*60)
-    with tempfile.NamedTemporaryFile(mode='w', suffix='.json', delete=False) as f:
-        state_path = f.name
-    try:
-        cfg = PipelineConfig(
-            ml=MLConfig(
-                enabled=True,
-                model_type="v2",
-                exploration_strategy="ucb",
-                use_llm=False,
-                use_semantic_encoder=False,
-                use_learning=True,
-                learning_storage_path=state_path
-            )
-        )
-        print("  Creating model and running generations...")
-        model = EnhancedMLGenerationModelV2(cfg)
-        spec_dict = yaml.safe_load(TEST_SPEC)
-        for i in range(3):
             result = model.generate(spec_dict)
-            reward = 1.0 if result['passed'] else 0.0
-            model.learn(result, reward)
-        if hasattr(model, '_rl_learner'):
-            episodes_before = model._rl_learner.get_performance_stats().get('episode_count', 0)
-            replay_size_before = len(model._rl_learner._replay_buffer)
-            print(f"  Episodes before save: {episodes_before}")
-            print(f"  Replay buffer size before save: {replay_size_before}")
-        print("  Saving learning state...")
-        model.save_learning_state(state_path)
-        print("  Loading learning state into new model...")
-        model2 = EnhancedMLGenerationModelV2(cfg)
-        model2.load_learning_state(state_path)
-        if hasattr(model2, '_rl_learner'):
-            episodes_after = model2._rl_learner.get_performance_stats().get('episode_count', 0)
-            replay_size_after = len(model2._rl_learner._replay_buffer)
-            print(f"  Episodes after load: {episodes_after}")
-            print(f"  Replay buffer size after load: {replay_size_after}")
-            return episodes_after >= 3 and replay_size_after >= 3
-        return False
-    finally:
-        if os.path.exists(state_path):
-            os.unlink(state_path)
-def test_learning_stats():
-    """Test ML stats generation for UI."""
-    print("\n" + "="*60)
-    print("Testing Learning Statistics (for UI)")
-    print("="*60)
-    cfg = PipelineConfig(
-        ml=MLConfig(
-            enabled=True,
-            model_type="v2",
-            exploration_strategy="ucb",
-            use_llm=False,
-            use_semantic_encoder=False,
-            use_learning=True,
-            learning_storage_path=None
-        )
-    )
-    model = EnhancedMLGenerationModelV2(cfg)
-    spec_dict = yaml.safe_load(TEST_SPEC)
-    for i in range(3):
         result = model.generate(spec_dict)
-        reward = 1.0 if result['passed'] else 0.0
-        model.learn(result, reward)
-    if hasattr(model, 'get_learning_stats'):
         stats = model.get_learning_stats()
-        print(f"\n  Learning Stats:")
-        print(f"    Total generations: {stats.get('total_generations', 0)}")
-        if 'source_distribution' in stats:
-            print(f"\n    Source distribution:")
-            for source, count in stats['source_distribution'].items():
-                print(f"      • {source}: {count}")
-        if 'strategy_weights' in stats:
-            print(f"\n    Strategy weights:")
-            for strategy, weight in stats['strategy_weights'].items():
-                print(f"      • {strategy}: {weight}")
-        if 'rl_learner' in stats:
-            print(f"\n    RL Learner stats:")
-            print(f"      Episode count: {stats['rl_learner'].get('episode_count', 0)}")
-            print(f"      Total updates: {stats['rl_learner'].get('total_updates', 0)}")
-        if 'pattern_learner' in stats:
-            print(f"\n    Pattern Learner stats:")
-            print(f"      Total specs seen: {stats['pattern_learner'].get('total_specs_seen', 0)}")
-        return True
-    return False
-def run_all_tests():
-    """Run all tests and report results."""
-    print("\n" + "="*60)
-    print("Advanced ML V2 Model - Complete Test Suite")
-    print("="*60)
-    tests = [
-        ("RL Exploration Strategies", test_rl_strategies),
-        ("Experience Replay & Eligibility Traces", test_experience_replay),
-        ("Advanced Pattern Learner", test_pattern_learner),
-        ("Deep UVM Validation", test_deep_validation),
-        ("Learning State Persistence", test_learning_persistence),
-        ("Learning Statistics (UI)", test_learning_stats),
-    ]
-    results = []
-    for name, test_func in tests:
         try:
-            result = test_func()
-            results.append((name, result, None))
-        except Exception as e:
-            results.append((name, False, str(e)))
-    print("\n" + "="*60)
-    print("Test Results Summary")
-    print("="*60)
-    all_passed = True
-    for name, result, error in results:
-        if result:
-            print(f"✅ {name}")
-        else:
-            print(f"❌ {name}")
-            all_passed = False
-            if error:
-                print(f"   Error: {error}")
-    print("\n" + "="*60)
-    if all_passed:
-        print("🎉 All tests PASSED!")
-    else:
-        print("⚠️ Some tests FAILED")
-    print("="*60)
-    return all_passed
 if __name__ == "__main__":
-    success = run_all_tests()
-    sys.exit(0 if success else 1)

 """
+Production-grade pytest tests for Advanced ML V2 Model.
+Covers: RL strategies, experience replay, eligibility traces,
+pattern learning, deep validation, persistence, health/request_id.
 """
 import sys
 import os
 import tempfile
 import yaml
+import pytest
 repo_root = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 sys.path.insert(0, repo_root)
+from src.models.enhanced_ml_model_v2 import (
+    EnhancedMLGenerationModelV2,
+    _validate_spec_dict,
+    GenerationCache,
+)
+from src.config import PipelineConfig, MLConfig, GenerationConfig
 TEST_SPEC = """
 design_name: uart
 clock_reset:
   clock: clk
   reset: rst_n
 interfaces:
   - name: wb
     signals:
+      - {name: wb_cyc, direction: input}
+      - {name: wb_stb, direction: input}
+      - {name: wb_we, direction: input}
+      - {name: wb_addr, direction: input, width: 3}
+      - {name: wb_data_o, direction: output, width: 8}
+      - {name: wb_data_i, direction: input, width: 8}
+      - {name: wb_ack, direction: output}
   - name: uart
     signals:
+      - {name: uart_tx, direction: output}
+      - {name: uart_rx, direction: input}
+      - {name: cts_n, direction: input}
+      - {name: rts_n, direction: output}
+      - {name: uart_intr, direction: output}
 registers:
   - name: RBR_THR
     address: 0x0
     description: Receiver Buffer / Transmitter Holding
     fields:
+      - {name: data, bits: 7:0}
   - name: IER
     address: 0x1
     description: Interrupt Enable
     fields:
+      - {name: erbfi, bits: '0', description: Enable RX data available interrupt}
+      - {name: etbei, bits: '1', description: Enable TX holding register empty interrupt}
   - name: LCR
     address: 0x3
     description: Line Control
     fields:
+      - {name: wls, bits: 1:0, description: Word length select}
+      - {name: dlab, bits: '7', description: Divisor latch access bit}
   - name: LSR
     address: 0x5
     description: Line Status
     fields:
+      - {name: dr, bits: '0', description: Data Ready}
+      - {name: thre, bits: '5', description: TX Holding Register Empty}
 protocol: uart
 """
+@pytest.fixture
+def spec_dict():
+    return yaml.safe_load(TEST_SPEC)
+@pytest.fixture
+def base_cfg():
+    return PipelineConfig(
         ml=MLConfig(
+            enabled=True, model_type="v2", exploration_strategy="ucb",
+            use_llm=False, use_semantic_encoder=False, use_learning=True,
+            learning_storage_path=None,
         )
     )
+# ---------------------------------------------------------------------------
+# Input validation tests
+# ---------------------------------------------------------------------------
+class TestInputValidation:
+    def test_valid_spec_passes(self, spec_dict):
+        _validate_spec_dict(spec_dict)
+    def test_missing_design_name_raises(self):
+        with pytest.raises(ValueError, match="design_name"):
+            _validate_spec_dict({"protocol": "uart"})
+    def test_empty_design_name_raises(self):
+        with pytest.raises(ValueError, match="non-empty"):
+            _validate_spec_dict({"design_name": "", "protocol": "uart"})
+    def test_non_dict_raises(self):
+        with pytest.raises(TypeError, match="dict"):
+            _validate_spec_dict("not_a_dict")
+# ---------------------------------------------------------------------------
+# Cache tests
+# ---------------------------------------------------------------------------
+class TestGenerationCache:
+    def test_set_and_get(self):
+        cache = GenerationCache(ttl_seconds=60, max_entries=16)
+        cache.set({"a": 1}, "uart", {"file.sv": "content"})
+        result = cache.get({"a": 1}, "uart")
+        assert result == {"file.sv": "content"}
+    def test_cache_miss(self):
+        cache = GenerationCache(ttl_seconds=60)
+        assert cache.get({"a": 1}, "uart") is None
+    def test_cache_invalidate(self, spec_dict):
+        cache = GenerationCache(ttl_seconds=60)
+        cache.set(spec_dict, "uart", {"f.sv": "content"})
+        assert cache.get(spec_dict, "uart") is not None
+        cache.invalidate(spec_dict, "uart")
+        assert cache.get(spec_dict, "uart") is None
+    def test_cache_clear(self):
+        cache = GenerationCache(ttl_seconds=60)
+        cache.set({"a": 1}, "uart", {"f.sv": "c"})
+        cache.set({"b": 2}, "spi", {"g.sv": "d"})
+        cache.clear()
+        assert cache.get({"a": 1}, "uart") is None
+        assert cache.get({"b": 2}, "spi") is None
+    def test_cache_max_entries_eviction(self):
+        cache = GenerationCache(ttl_seconds=3600, max_entries=3)
+        for i in range(5):
+            cache.set({"k": i}, "p", {f"f{i}.sv": str(i)})
+        assert len(cache._cache) <= 3
+# ---------------------------------------------------------------------------
+# Model construction tests
+# ---------------------------------------------------------------------------
+class TestModelConstruction:
+    def test_create_with_config(self, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        assert model is not None
+        assert model._use_learning is True
+    def test_create_with_string_name(self):
+        model = EnhancedMLGenerationModelV2("test_model")
+        assert model is not None
+    def test_create_with_rl_strategies(self):
+        for strategy in ["epsilon_greedy", "softmax", "ucb", "thompson"]:
+            cfg = PipelineConfig(
+                ml=MLConfig(
+                    enabled=True, model_type="v2", exploration_strategy=strategy,
+                    use_llm=False, use_semantic_encoder=False, use_learning=True,
+                )
+            )
+            model = EnhancedMLGenerationModelV2(cfg)
+            assert model is not None
+# ---------------------------------------------------------------------------
+# Generation tests
+# ---------------------------------------------------------------------------
+class TestGeneration:
+    def test_generate_returns_passed_result(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
         result = model.generate(spec_dict)
+        assert "passed" in result
+        assert "generated_files" in result
+        assert "source" in result
+        assert "strategy" in result
+        assert "request_id" in result
+    def test_generate_produces_files(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        result = model.generate(spec_dict)
+        assert len(result["generated_files"]) > 0
+    def test_generate_with_request_id(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        result = model.generate(spec_dict, request_id="test_req_001")
+        assert result["request_id"] == "test_req_001"
+    def test_generate_invalid_spec_returns_error(self, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        result = model.generate({"no_design_name": True})
+        assert result["passed"] is False
+    def test_generate_empty_design_name_returns_error(self, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        result = model.generate({"design_name": "", "protocol": "uart"})
+        assert result["passed"] is False
+# ---------------------------------------------------------------------------
+# Learning / RL tests
+# ---------------------------------------------------------------------------
+class TestLearning:
+    def test_learn_updates_rl(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
         result = model.generate(spec_dict)
+        reward = 1.0 if result["passed"] else 0.0
         model.learn(result, reward)
+        stats = model.get_learning_stats()
+        assert stats["total_generations"] >= 1
+    def test_multiple_generations_populate_replay_buffer(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        for _ in range(3):
             result = model.generate(spec_dict)
+            model.learn(result, 1.0 if result["passed"] else 0.0)
+        if model._rl_learner:
+            assert len(model._rl_learner._replay_buffer) > 0
+    def test_learning_stats_structure(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
         result = model.generate(spec_dict)
+        model.learn(result, 1.0)
         stats = model.get_learning_stats()
+        assert "total_generations" in stats
+        assert "model_version" in stats
+        assert "metrics" in stats
+        assert "strategy_weights" in stats
+    def test_learning_persistence(self, spec_dict, base_cfg):
+        with tempfile.NamedTemporaryFile(mode="w", suffix=".json", delete=False) as f:
+            path = f.name
         try:
+            base_cfg.ml.learning_storage_path = path
+            model = EnhancedMLGenerationModelV2(base_cfg)
+            for _ in range(3):
+                r = model.generate(spec_dict)
+                model.learn(r, 1.0)
+            model.save_learning_state(path)
+            model2 = EnhancedMLGenerationModelV2(base_cfg)
+            model2.load_learning_state(path)
+            assert model2._generation_history
+        finally:
+            if os.path.exists(path):
+                os.unlink(path)
+# ---------------------------------------------------------------------------
+# Health / monitoring tests
+# ---------------------------------------------------------------------------
+class TestHealth:
+    def test_health_status_returns_dict(self, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        health = model.get_health_status()
+        assert isinstance(health, dict)
+        assert "status" in health
+        assert "components" in health
+        assert "version" in health
+    def test_health_components_are_bools(self, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        health = model.get_health_status()
+        for comp, ok in health["components"].items():
+            assert isinstance(ok, bool), f"{comp} should be bool"
+    def test_cache_invalidate(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        model.generate(spec_dict)  # populates cache
+        model.invalidate_cache()
+        assert model._cache is None or len(model._cache._cache) == 0
+# ---------------------------------------------------------------------------
+# Edge case / resilience tests
+# ---------------------------------------------------------------------------
+class TestResilience:
+    def test_generate_twice_with_same_spec_hits_cache(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        r1 = model.generate(spec_dict)
+        r2 = model.generate(spec_dict)
+        assert r1["passed"] == r2["passed"]
+    def test_clear_history(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        model.generate(spec_dict)
+        model.clear_history()
+        stats = model.get_learning_stats()
+        assert stats["total_generations"] == 0
+    def test_all_rl_strategies_generate(self, spec_dict):
+        strategies = ["epsilon_greedy", "softmax", "ucb", "thompson"]
+        for strategy in strategies:
+            cfg = PipelineConfig(
+                ml=MLConfig(
+                    enabled=True, model_type="v2", exploration_strategy=strategy,
+                    use_llm=False, use_semantic_encoder=False, use_learning=True,
+                )
+            )
+            model = EnhancedMLGenerationModelV2(cfg)
+            result = model.generate(spec_dict)
+            assert result["passed"], f"{strategy} strategy failed"
+    def test_generate_without_advanced_components_falls_back(self, spec_dict, base_cfg):
+        model = EnhancedMLGenerationModelV2(base_cfg)
+        model._index = None
+        model._extractor = None
+        model._adapter = None
+        result = model.generate(spec_dict)
+        assert result["passed"] or not result["passed"]  # should not crash
 if __name__ == "__main__":
+    pytest.main([__file__, "-v", "--tb=short"])