import re
from flask import jsonify
import logging
import os

def clean_result(value):
    value = re.sub(r"\s+", " ", value)
    value = re.sub(r"[-_:]+", " ", value)
    value = re.sub(r"[^\x00-\x7F]+", " ", value)
    return value if value else "Not Available"

def extract_patient_name(text, qa_pipeline):
    if not text or not qa_pipeline:
        return None
    try:
        # Try multiple questions to extract the name
        questions = [
            "What is the patient's name?",
            "Who is the patient?",
            "What is the name of the person this document is about?",
            "What is the full name of the patient?",
            "What is the name of the person in this medical document?"
        ]
        
        for question in questions:
            result = qa_pipeline(question=question, context=text)
            answer = result.get("answer", "").strip()
            if answer and len(answer) > 1:  # Basic validation that we got a meaningful answer
                return answer
                
        return None
    except Exception as e:
        logging.error(f"Error extracting patient name: {str(e)}")
        return None

def normalize_name(name):
    """Clean and normalize names by lowercasing, stripping, removing punctuation and salutations."""
    if not name:
        return ""
    
    # Convert to lowercase and strip
    name = name.lower().strip()
    
    # Remove common salutations and titles
    salutations = [
        r"^(mr|mrs|ms|dr|prof|sir|madam|patient|name|of)\b\s*",
        r"\b(mr|mrs|ms|dr|prof|sir|madam|patient|name|of)\b\s*"
    ]
    for pattern in salutations:
        name = re.sub(pattern, "", name, flags=re.IGNORECASE)
    
    # Remove punctuation and extra spaces
    name = re.sub(r"[^\w\s]", "", name)
    name = re.sub(r"\s+", " ", name)
    
    return name.strip()

def extract_name_from_filename(filename):
    """Extract potential patient name from filename."""
    if not filename:
        return None
        
    # Remove file extension
    name = os.path.splitext(filename)[0]
    
    # Remove common prefixes
    name = re.sub(r"^(mr|mrs|ms|dr|prof|sir|madam)\b\s*", "", name, flags=re.IGNORECASE)
    
    # Clean up the name
    name = re.sub(r"[^\w\s]", " ", name)
    name = re.sub(r"\s+", " ", name)
    
    return name.strip()

def validate_patient_name(extracted_text, patient_name, filename, qa_pipeline):
    """Validates if the extracted name matches the registered patient name"""
    if not extracted_text or not patient_name:
        return (
            jsonify({"error": f"Missing required information for validation"}), 
            400
        )
        
    # First try to extract name from the text using QA pipeline
    detected_name = extract_patient_name(extracted_text, qa_pipeline)
    
    # If QA pipeline fails, try pattern matching in the text
    if not detected_name:
        name_patterns = [
            r"Patient:\s*([A-Za-z\s]+)",
            r"Name:\s*([A-Za-z\s]+)",
            r"Patient Name:\s*([A-Za-z\s]+)",
            r"Name of Patient:\s*([A-Za-z\s]+)",
            r"Patient's Name:\s*([A-Za-z\s]+)",
            r"Patient Name is:\s*([A-Za-z\s]+)",
            r"Name of the Patient:\s*([A-Za-z\s]+)"
        ]
        
        for pattern in name_patterns:
            match = re.search(pattern, extracted_text, re.IGNORECASE)
            if match:
                detected_name = match.group(1).strip()
                break
    
    # If still no name found, try to extract from filename
    if not detected_name:
        detected_name = extract_name_from_filename(filename)
    
    if not detected_name:
        return (
            jsonify({"error": f"Could not determine patient name from {filename}"}),
            400,
        )

    normalized_detected_name = normalize_name(detected_name)
    normalized_patient_name = normalize_name(patient_name)

    # Check if either name is contained within the other
    if (normalized_detected_name in normalized_patient_name or 
        normalized_patient_name in normalized_detected_name):
        return None

    # Try partial matching for names
    detected_parts = set(normalized_detected_name.split())
    patient_parts = set(normalized_patient_name.split())
    
    # If there's significant overlap in name parts, consider it a match
    if len(detected_parts.intersection(patient_parts)) >= min(len(detected_parts), len(patient_parts)) * 0.5:
        return None

    return (
        jsonify(
            {
                "error": f"Document '{filename}' does not belong to {patient_name}. Found: {detected_name}"
            }
        ),
        400,
    )