"""
Industry-level code validator for UVM testbench generation.

Validates generated SystemVerilog code for:
1. Basic syntax correctness
2. Spec compliance (signals, registers, interfaces used)
3. UVM best practices
4. Common error patterns
5. Compilation readiness

Provides detailed validation reports with:
- Errors (blocking issues)
- Warnings (potential issues)
- Info (best practice suggestions)
- Auto-fix suggestions
"""

from __future__ import annotations

import logging
import re
from dataclasses import dataclass, field
from enum import Enum
from typing import Any, Dict, List, Optional, Set, Tuple, Pattern

logger = logging.getLogger("uvmgen.validator")


class ValidationSeverity(Enum):
    ERROR = "error"
    WARNING = "warning"
    INFO = "info"
    STYLE = "style"


@dataclass
class ValidationIssue:
    """Single validation issue."""
    severity: ValidationSeverity
    code: str
    message: str
    line_number: Optional[int] = None
    context: Optional[str] = None
    suggestion: Optional[str] = None
    auto_fixable: bool = False

    def to_dict(self) -> Dict[str, Any]:
        return {
            "severity": self.severity.value,
            "code": self.code,
            "message": self.message,
            "line_number": self.line_number,
            "context": self.context,
            "suggestion": self.suggestion,
            "auto_fixable": self.auto_fixable,
        }


@dataclass
class FileValidationResult:
    """Validation result for a single file."""
    filename: str
    file_type: str
    passed: bool
    issues: List[ValidationIssue] = field(default_factory=list)
    checks_run: int = 0
    checks_passed: int = 0

    @property
    def error_count(self) -> int:
        return sum(1 for i in self.issues if i.severity == ValidationSeverity.ERROR)

    @property
    def warning_count(self) -> int:
        return sum(1 for i in self.issues if i.severity == ValidationSeverity.WARNING)

    @property
    def info_count(self) -> int:
        return sum(1 for i in self.issues if i.severity == ValidationSeverity.INFO)

    def to_dict(self) -> Dict[str, Any]:
        return {
            "filename": self.filename,
            "file_type": self.file_type,
            "passed": self.passed,
            "error_count": self.error_count,
            "warning_count": self.warning_count,
            "info_count": self.info_count,
            "checks_run": self.checks_run,
            "checks_passed": self.checks_passed,
            "issues": [i.to_dict() for i in self.issues],
        }


@dataclass
class ValidationReport:
    """Complete validation report for a generation run."""
    design_name: str
    overall_passed: bool
    files: List[FileValidationResult] = field(default_factory=list)
    timestamp: str = ""

    @property
    def total_errors(self) -> int:
        return sum(f.error_count for f in self.files)

    @property
    def total_warnings(self) -> int:
        return sum(f.warning_count for f in self.files)

    @property
    def total_checks_run(self) -> int:
        return sum(f.checks_run for f in self.files)

    @property
    def total_checks_passed(self) -> int:
        return sum(f.checks_passed for f in self.files)

    @property
    def pass_rate(self) -> float:
        if self.total_checks_run == 0:
            return 1.0
        return self.total_checks_passed / self.total_checks_run

    def to_dict(self) -> Dict[str, Any]:
        return {
            "design_name": self.design_name,
            "overall_passed": self.overall_passed,
            "total_errors": self.total_errors,
            "total_warnings": self.total_warnings,
            "total_checks_run": self.total_checks_run,
            "total_checks_passed": self.total_checks_passed,
            "pass_rate": round(self.pass_rate * 100, 1),
            "files": [f.to_dict() for f in self.files],
        }


SV_KEYWORDS = {
    "module", "endmodule", "interface", "endinterface", "class", "endclass",
    "input", "output", "inout", "logic", "reg", "wire", "bit", "int", "integer",
    "always", "initial", "assign", "begin", "end", "case", "endcase", "if", "else",
    "for", "while", "repeat", "forever", "task", "endtask", "function", "endfunction",
    "parameter", "localparam", "defparam", "typedef", "struct", "union", "enum",
    "posedge", "negedge", "or", "and", "not", "default", "none",
    "import", "export", "package", "endpackage", "include", "define",
    "uvm_object_utils", "uvm_component_utils", "uvm_field_utils",
    "virtual", "rand", "randc", "constraint", "extends", "implements",
}

UVM_BASE_CLASSES = {
    "uvm_test", "uvm_env", "uvm_agent", "uvm_driver", "uvm_monitor",
    "uvm_sequencer", "uvm_sequence", "uvm_sequence_item", "uvm_scoreboard",
    "uvm_subscriber", "uvm_reg_block", "uvm_reg", "uvm_reg_field",
    "uvm_reg_map", "uvm_reg_adapter", "uvm_reg_predictor",
    "uvm_analysis_port", "uvm_analysis_imp", "uvm_tlm_fifo",
    "uvm_component", "uvm_object", "uvm_report_object",
}


class SystemVerilogSyntaxChecker:
    """Basic but effective SystemVerilog syntax checker."""

    PAIR_CHECKS = [
        ("module", ["endmodule"]),
        ("interface", ["endinterface"]),
        ("class", ["endclass"]),
        ("function", ["endfunction"]),
        ("task", ["endtask"]),
        ("case", ["endcase"]),
        ("begin", ["end"]),
        ("fork", ["join", "join_any", "join_none"]),
    ]

    def __init__(self):
        self._patterns: Dict[str, Pattern] = {}
        self._compile_patterns()

    def _compile_patterns(self) -> None:
        self._patterns = {
            "comment_single": re.compile(r'//.*$', re.MULTILINE),
            "comment_multi": re.compile(r'/\*.*?\*/', re.DOTALL),
            "string_lit": re.compile(r'"[^"]*"'),
            "module_decl": re.compile(r'\bmodule\s+(\w+)\s*[#(;]'),
            "interface_decl": re.compile(r'\binterface\s+(\w+)\s*[#(;]'),
            "class_decl": re.compile(r'\bclass\s+(\w+)\s*(?:#\s*\(|extends|implements|;|{)'),
            "port_list": re.compile(r'\(([^)]+)\)'),
            "unbalanced_paren": re.compile(r'[()]'),
            "unbalanced_bracket": re.compile(r'[\[\]]'),
            "unbalanced_brace": re.compile(r'[{}]'),
            "semicolon": re.compile(r';\s*$'),
        }

    def _strip_comments_and_strings(self, content: str) -> str:
        """Remove comments and strings for analysis."""
        result = content
        result = self._patterns["comment_multi"].sub(" ", result)
        result = self._patterns["comment_single"].sub(" ", result)
        result = self._patterns["string_lit"].sub("\"STR\"", result)
        return result

    def check_balance(self, content: str) -> List[ValidationIssue]:
        """Check balanced delimiters (heuristic, warnings only)."""
        issues: List[ValidationIssue] = []
        stripped = self._strip_comments_and_strings(content)

        checks = [
            ("()", "parentheses"),
            ("[]", "brackets"),
            ("{}", "braces"),
        ]

        for pair, name in checks:
            count_open = stripped.count(pair[0])
            count_close = stripped.count(pair[1])
            if count_open != count_close:
                issues.append(ValidationIssue(
                    severity=ValidationSeverity.WARNING,
                    code=f"SV-SYN-001-{name}",
                    message=f"Possibly unbalanced {name}: {count_open} '{pair[0]}' vs {count_close} '{pair[1]}'",
                    auto_fixable=False,
                ))

        return issues

    def check_begin_end_pairs(self, content: str) -> List[ValidationIssue]:
        """Check begin/end and other block pairs (heuristic, warnings only)."""
        issues: List[ValidationIssue] = []
        stripped = self._strip_comments_and_strings(content)
        lines = stripped.split('\n')

        for open_kw, close_kws in self.PAIR_CHECKS:
            close_kws_set = set(close_kws)
            close_kw_display = close_kws[0] if len(close_kws) == 1 else f"{close_kws[0]}/..."

            stack: List[int] = []
            for line_num, line in enumerate(lines, 1):
                words = re.findall(r'\b\w+\b', line.lower())

                for word in words:
                    if word == open_kw:
                        stack.append(line_num)
                    elif word in close_kws_set:
                        if stack:
                            stack.pop()

            for line_num in stack:
                issues.append(ValidationIssue(
                    severity=ValidationSeverity.WARNING,
                    code="SV-SYN-003",
                    message=f"'{open_kw}' at line {line_num} may have no matching '{close_kw_display}'",
                    line_number=line_num,
                    auto_fixable=False,
                ))

        return issues

    def check_semicolons(self, content: str) -> List[ValidationIssue]:
        """Check for missing semicolons (heuristic)."""
        issues: List[ValidationIssue] = []
        lines = content.split('\n')

        statement_keywords = {
            "logic", "reg", "wire", "bit", "int", "input", "output", "inout",
            "parameter", "localparam", "typedef", "import", "assign",
        }

        block_starters = {
            "module", "interface", "class", "function", "task", "case",
            "begin", "fork", "if", "else", "for", "while", "repeat", "forever",
        }

        block_enders = {
            "endmodule", "endinterface", "endclass", "endfunction", "endtask",
            "endcase", "end", "join", "join_any", "join_none",
        }

        for line_num, line in enumerate(lines, 1):
            stripped = line.strip()

            if not stripped:
                continue
            if stripped.startswith('//'):
                continue
            if stripped.startswith('`'):
                continue

            first_word = stripped.split()[0].lower() if stripped.split() else ""

            if first_word in block_enders:
                continue

            if first_word in block_starters:
                if stripped.rstrip().endswith((':', 'begin', '{')):
                    continue

            if first_word in statement_keywords:
                if not stripped.rstrip().endswith(';') and not stripped.rstrip().endswith(')'):
                    issues.append(ValidationIssue(
                        severity=ValidationSeverity.WARNING,
                        code="SV-SYN-004",
                        message="Possible missing semicolon",
                        line_number=line_num,
                        context=stripped[:60],
                        suggestion="Add ';' at end of statement",
                        auto_fixable=True,
                    ))

        return issues

    def check(self, content: str) -> List[ValidationIssue]:
        """Run all syntax checks."""
        issues: List[ValidationIssue] = []
        issues.extend(self.check_balance(content))
        issues.extend(self.check_begin_end_pairs(content))
        issues.extend(self.check_semicolons(content))
        return issues


class SpecComplianceChecker:
    """Check that generated code matches the design spec."""

    def __init__(self, spec_dict: Dict[str, Any]):
        self.spec = spec_dict
        self.design_name = spec_dict.get("design_name", "unknown")
        self._extract_signals()
        self._extract_registers()

    def _extract_signals(self) -> None:
        """Extract all signals from spec."""
        self.all_signals: Set[str] = set()
        self.signals_by_direction: Dict[str, Set[str]] = {
            "input": set(),
            "output": set(),
            "inout": set(),
        }
        self.signal_widths: Dict[str, int] = {}

        for iface in self.spec.get("interfaces", []):
            for sig in iface.get("signals", []):
                name = sig.get("name", "")
                if name:
                    self.all_signals.add(name)
                    direction = sig.get("direction", "input")
                    self.signals_by_direction.get(direction, set()).add(name)
                    self.signal_widths[name] = sig.get("width", 1)

    def _extract_registers(self) -> None:
        """Extract all registers from spec."""
        self.all_registers: Set[str] = set()
        self.register_addresses: Dict[str, str] = {}
        self.register_fields: Dict[str, Set[str]] = {}

        for reg in self.spec.get("registers", []):
            name = reg.get("name", "")
            if name:
                self.all_registers.add(name)
                self.register_addresses[name] = reg.get("address", "")
                self.register_fields[name] = {
                    f.get("name", "") for f in reg.get("fields", []) if f.get("name")
                }

    def check_signals_in_code(
        self,
        content: str,
        file_type: str,
    ) -> List[ValidationIssue]:
        """Check that spec signals are referenced in code."""
        issues: List[ValidationIssue] = []
        stripped = self._strip_for_analysis(content)

        found_signals: Set[str] = set()
        for sig in self.all_signals:
            if re.search(r'\b' + re.escape(sig) + r'\b', stripped, re.IGNORECASE):
                found_signals.add(sig)

        if file_type in ("interface", "testbench"):
            missing_signals = self.all_signals - found_signals
            for sig in sorted(missing_signals):
                issues.append(ValidationIssue(
                    severity=ValidationSeverity.ERROR,
                    code="SPEC-001",
                    message=f"Signal '{sig}' defined in spec but not found in {file_type}",
                    suggestion=f"Ensure signal '{sig}' is declared in the {file_type}",
                    auto_fixable=False,
                ))

        for sig in sorted(found_signals & self.all_signals):
            issues.append(ValidationIssue(
                severity=ValidationSeverity.INFO,
                code="SPEC-002",
                message=f"Signal '{sig}' from spec is properly referenced",
                auto_fixable=False,
            ))

        return issues

    def check_registers_in_code(
        self,
        content: str,
        file_type: str,
    ) -> List[ValidationIssue]:
        """Check that spec registers are referenced in code."""
        issues: List[ValidationIssue] = []

        if file_type not in ("ral_model", "test", "sequence", "scoreboard", "coverage"):
            return issues

        if not self.all_registers:
            return issues

        stripped = self._strip_for_analysis(content)

        found_registers: Set[str] = set()
        for reg in self.all_registers:
            if re.search(r'\b' + re.escape(reg) + r'\b', stripped, re.IGNORECASE):
                found_registers.add(reg)

        if file_type == "ral_model":
            missing_regs = self.all_registers - found_registers
            for reg in sorted(missing_regs):
                issues.append(ValidationIssue(
                    severity=ValidationSeverity.ERROR,
                    code="SPEC-003",
                    message=f"Register '{reg}' defined in spec but not found in RAL model",
                    auto_fixable=False,
                ))

        return issues

    def check_clock_reset(
        self,
        content: str,
        file_type: str,
    ) -> List[ValidationIssue]:
        """Check clock/reset signals are present."""
        issues: List[ValidationIssue] = []

        if file_type not in ("interface", "testbench"):
            return issues

        cr = self.spec.get("clock_reset", {})
        clock = cr.get("clock", "clk")
        reset = cr.get("reset", "rst_n")

        stripped = self._strip_for_analysis(content)

        if not re.search(r'\b' + re.escape(clock) + r'\b', stripped, re.IGNORECASE):
            issues.append(ValidationIssue(
                severity=ValidationSeverity.ERROR,
                code="SPEC-004",
                message=f"Clock signal '{clock}' not found in {file_type}",
                auto_fixable=False,
            ))

        if not re.search(r'\b' + re.escape(reset) + r'\b', stripped, re.IGNORECASE):
            issues.append(ValidationIssue(
                severity=ValidationSeverity.ERROR,
                code="SPEC-005",
                message=f"Reset signal '{reset}' not found in {file_type}",
                auto_fixable=False,
            ))

        return issues

    @staticmethod
    def _strip_for_analysis(content: str) -> str:
        """Strip comments and strings for analysis."""
        result = content
        result = re.sub(r'/\*.*?\*/', ' ', result, flags=re.DOTALL)
        result = re.sub(r'//.*$', ' ', result, flags=re.MULTILINE)
        result = re.sub(r'"[^"]*"', 'STR', result)
        return result


class UVMBestPracticesChecker:
    """Check UVM best practices and common patterns."""

    def check(self, content: str, file_type: str) -> List[ValidationIssue]:
        """Run UVM best practice checks."""
        issues: List[ValidationIssue] = []
        lines = content.split('\n')

        checks: Dict[str, List[Tuple[Pattern, ValidationSeverity, str, Optional[str]]]] = {
            "driver": [
                (re.compile(r'\bseq_item_port\.(get|next_item)\b'),
                 ValidationSeverity.INFO, "UVM-DRV-001", "Uses proper sequence item retrieval"),
                (re.compile(r'\bseq_item_port\.item_done\b'),
                 ValidationSeverity.INFO, "UVM-DRV-002", "Properly completes items"),
            ],
            "monitor": [
                (re.compile(r'\banalysis_port\s*<'),
                 ValidationSeverity.INFO, "UVM-MON-001", "Has analysis port"),
                (re.compile(r'\bwrite\s*\('),
                 ValidationSeverity.INFO, "UVM-MON-002", "Writes to analysis port"),
            ],
            "agent": [
                (re.compile(r'\b(driver|monitor|sequencer)\s*=\s*'),
                 ValidationSeverity.INFO, "UVM-AGT-001", "Creates agent components"),
                (re.compile(r'\bget_is_active\b'),
                 ValidationSeverity.INFO, "UVM-AGT-002", "Checks active/passive mode"),
            ],
            "scoreboard": [
                (re.compile(r'\buvm_analysis_imp\s*<'),
                 ValidationSeverity.INFO, "UVM-SCB-001", "Has analysis exports"),
                (re.compile(r'\bwrite\s*\(\s*\w+\s+(\w+)\)'),
                 ValidationSeverity.INFO, "UVM-SCB-002", "Implements write methods"),
            ],
            "test": [
                (re.compile(r'\buvm_top\.(finish|stop|objection)'),
                 ValidationSeverity.INFO, "UVM-TEST-001", "Proper objection handling"),
                (re.compile(r'\braise_objection\b'),
                 ValidationSeverity.INFO, "UVM-TEST-002", "Raises objections"),
                (re.compile(r'\bdrop_objection\b'),
                 ValidationSeverity.INFO, "UVM-TEST-003", "Drops objections"),
            ],
            "sequence": [
                (re.compile(r'\bstart_item\b'),
                 ValidationSeverity.INFO, "UVM-SEQ-001", "Uses start_item"),
                (re.compile(r'\bfinish_item\b'),
                 ValidationSeverity.INFO, "UVM-SEQ-002", "Uses finish_item"),
            ],
            "any": [
                (re.compile(r'\b`uvm_(component|object)_utils\b'),
                 ValidationSeverity.INFO, "UVM-ANY-001", "Uses UVM factory registration"),
                (re.compile(r'\buvm_info\s*\('),
                 ValidationSeverity.INFO, "UVM-ANY-002", "Has UVM messaging"),
            ],
        }

        error_patterns = [
            (re.compile(r'\buvm_error\s*\(\s*"[^"]*"\s*,\s*"[^"]*"\s*,\s*UVM_(LOW|MEDIUM|HIGH|FULL|DEBUG)\)'),
             ValidationSeverity.INFO, "UVM-ANY-003", "Proper uvm_error usage"),
        ]

        relevant_checks = checks.get(file_type, []) + checks.get("any", [])

        for pattern, severity, code, message in relevant_checks:
            if pattern.search(content):
                issues.append(ValidationIssue(
                    severity=severity,
                    code=code,
                    message=message,
                    auto_fixable=False,
                ))

        is_uvm = any(uvm_base in content for uvm_base in UVM_BASE_CLASSES)
        if is_uvm and file_type in ("test", "env", "sequence"):
            if not re.search(r'\b(raise|drop)_objection\b', content):
                issues.append(ValidationIssue(
                    severity=ValidationSeverity.WARNING,
                    code="UVM-WARN-001",
                    message="UVM test/sequence without objection handling",
                    suggestion="Consider adding raise_objection/drop_objection for proper test termination",
                    auto_fixable=False,
                ))

        return issues


class CodeValidator:
    """
    Industry-level code validator for UVM testbench generation.

    Provides comprehensive validation with:
    - Syntax checking
    - Spec compliance
    - UVM best practices
    - Detailed reporting
    """

    FILE_TYPE_DETECTORS = [
        (r'ral_model', "ral_model"),
        (r'scoreboard', "scoreboard"),
        (r'driver', "driver"),
        (r'monitor', "monitor"),
        (r'agent', "agent"),
        (r'sequence_item', "sequence_item"),
        (r'_sequence', "sequence"),
        (r'regression', "sequence"),
        (r'coverage_collector', "coverage"),
        (r'protocol_checker', "checker"),
        (r'_test', "test"),
        (r'environment|env_', "env"),
        (r'testbench', "testbench"),
        (r'interface', "interface"),
        (r'serial_monitor', "monitor"),
    ]

    NON_SV_EXTENSIONS = {'.f', '.tcl', '.core', '.json', '.yaml', '.yml', '.md', '.txt'}

    def __init__(self, spec_dict: Optional[Dict[str, Any]] = None):
        self.spec_dict = spec_dict
        self._syntax_checker = SystemVerilogSyntaxChecker()
        self._spec_checker = SpecComplianceChecker(spec_dict) if spec_dict else None
        self._uvm_checker = UVMBestPracticesChecker()

    @classmethod
    def _is_sv_file(cls, filename: str) -> bool:
        """Check if file is a SystemVerilog/Verilog file."""
        fname_lower = filename.lower()
        for ext in cls.NON_SV_EXTENSIONS:
            if fname_lower.endswith(ext):
                return False
        if fname_lower.endswith(('.sv', '.v', '.svh', '.vh')):
            return True
        if '/' in fname_lower or '\\' in fname_lower:
            base = fname_lower.replace('\\', '/').split('/')[-1]
            if '.' not in base:
                return True
        return True

    @classmethod
    def detect_file_type(cls, filename: str) -> str:
        """Detect the type of SystemVerilog file from its name."""
        fname_lower = filename.lower()
        for pattern, file_type in cls.FILE_TYPE_DETECTORS:
            if re.search(pattern, fname_lower):
                return file_type
        return "unknown"

    def validate_file(
        self,
        filename: str,
        content: str,
        file_type: Optional[str] = None,
    ) -> FileValidationResult:
        """Validate a single file. Skip non-SV files."""
        if not self._is_sv_file(filename):
            return FileValidationResult(
                filename=filename,
                file_type="skipped",
                passed=True,
                issues=[],
                checks_run=0,
                checks_passed=0,
            )

        if file_type is None:
            file_type = self.detect_file_type(filename)

        issues: List[ValidationIssue] = []
        checks_run = 0
        checks_passed = 0

        syntax_issues = self._syntax_checker.check(content)
        issues.extend(syntax_issues)
        checks_run += 3
        syntax_errors = sum(1 for i in syntax_issues if i.severity == ValidationSeverity.ERROR)
        checks_passed += (3 - min(syntax_errors, 3))

        if self._spec_checker:
            spec_issues = self._spec_checker.check_signals_in_code(content, file_type)
            issues.extend(spec_issues)
            checks_run += 2

            reg_issues = self._spec_checker.check_registers_in_code(content, file_type)
            issues.extend(reg_issues)

            cr_issues = self._spec_checker.check_clock_reset(content, file_type)
            issues.extend(cr_issues)

            spec_errors = sum(1 for i in spec_issues + reg_issues + cr_issues
                              if i.severity == ValidationSeverity.ERROR)
            checks_passed += max(0, 2 - spec_errors)

        if file_type != "unknown":
            uvm_issues = self._uvm_checker.check(content, file_type)
            issues.extend(uvm_issues)

        errors = sum(1 for i in issues if i.severity == ValidationSeverity.ERROR)
        passed = errors == 0

        return FileValidationResult(
            filename=filename,
            file_type=file_type,
            passed=passed,
            issues=issues,
            checks_run=checks_run,
            checks_passed=checks_passed,
        )

    def validate_files(
        self,
        files: Dict[str, str],
        design_name: str = "",
    ) -> ValidationReport:
        """Validate multiple files and generate a report."""
        file_results: List[FileValidationResult] = []

        for filename, content in files.items():
            result = self.validate_file(filename, content)
            file_results.append(result)

        total_errors = sum(f.error_count for f in file_results)
        overall_passed = total_errors == 0

        import datetime
        report = ValidationReport(
            design_name=design_name,
            overall_passed=overall_passed,
            files=file_results,
            timestamp=datetime.datetime.now().isoformat(),
        )

        return report

    def validate_files_by_path(
        self,
        file_paths: Dict[str, str],
        design_name: str = "",
    ) -> ValidationReport:
        """Validate files given as path mappings."""
        content_map: Dict[str, str] = {}

        for filename, path in file_paths.items():
            try:
                with open(path, "r", encoding="utf-8") as f:
                    content_map[filename] = f.read()
            except Exception as e:
                logger.warning("Failed to read %s: %s", path, e)
                content_map[filename] = ""

        return self.validate_files(content_map, design_name)