import gradio as gr
import torch
from transformers import AutoTokenizer, AutoConfig
from configuration_stacked import ImpressoConfig
from modeling_stacked import ExtendedMultitaskTimeModelForTokenClassification
import numpy as np

# Define the model name
MODEL_NAME = "impresso-project/ner-stacked-bert-multilingual-v1.1.0"

print("Loading tokenizer...")
ner_tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, trust_remote_code=True)

print("Loading model configuration...")
config = ImpressoConfig.from_pretrained(MODEL_NAME, trust_remote_code=True)

print("Loading model...")
model = ExtendedMultitaskTimeModelForTokenClassification.from_pretrained(
    MODEL_NAME,
    config=config,
    trust_remote_code=True,
)
model.eval()

# Load label maps
label_map = config.label_map
print(f"Label map: {label_map}")

# Create reverse label maps for decoding
id2label_coarse = {v: k for k, v in label_map["NE-COARSE-LIT"].items()}
id2label_fine = {v: k for k, v in label_map["NE-FINE-COMP"].items()}

print(f"Model loaded successfully!")
print(f"Using model: {MODEL_NAME}")
print(f"Using tokenizer: {MODEL_NAME}")


def format_entities_as_html(entities):
    excluded_keys = {"start", "end", "index"}  # Keys to exclude from the output
    html_output = "<div>"

    for entity in entities:
        html_output += (
            "<div style='margin-bottom: 10px;'>"  # Each entity in a separate div
        )

        # Dynamically add all fields except the excluded ones
        for key, value in entity.items():
            if key not in excluded_keys:
                if isinstance(value, float):  # Format score if it's a float
                    html_output += (
                        f"<strong>{key.capitalize()}:</strong> {value:.2f}<br>"
                    )
                else:
                    html_output += f"<strong>{key.capitalize()}:</strong> {value}<br>"

        html_output += "</div>"

    html_output += "</div>"
    return html_output


def predict_entities(text):
    """Run NER prediction on text and return entities in the expected format."""
    # Tokenize input
    inputs = ner_tokenizer(
        text,
        return_tensors="pt",
        truncation=True,
        max_length=512,
        return_offsets_mapping=True,
    )
    
    offset_mapping = inputs.pop("offset_mapping")[0]
    
    # Run inference
    with torch.no_grad():
        outputs = model(**inputs)
    
    # Get predictions for both tasks
    logits_coarse = outputs.logits["NE-COARSE-LIT"]
    logits_fine = outputs.logits["NE-FINE-COMP"]
    
    predictions_coarse = torch.argmax(logits_coarse, dim=-1)[0]
    predictions_fine = torch.argmax(logits_fine, dim=-1)[0]
    
    # Get scores (confidence)
    scores_coarse = torch.softmax(logits_coarse, dim=-1)[0]
    scores_fine = torch.softmax(logits_fine, dim=-1)[0]
    
    # Convert predictions to labels
    entities = []
    current_entity = None
    
    for idx, (pred_coarse, pred_fine, score_c, score_f) in enumerate(
        zip(predictions_coarse, predictions_fine, scores_coarse, scores_fine)
    ):
        label_coarse = id2label_coarse[pred_coarse.item()]
        label_fine = id2label_fine[pred_fine.item()]
        
        # Skip special tokens and 'O' tags
        if idx == 0 or idx >= len(offset_mapping):
            continue
            
        start, end = offset_mapping[idx]
        if start == end:  # Skip special tokens
            continue
        
        # Process coarse-grained entities (loc, pers, org)
        if label_coarse.startswith("B-"):
            # Save previous entity
            if current_entity:
                entities.append(current_entity)
            
            entity_type = label_coarse[2:]  # Remove 'B-' prefix
            current_entity = {
                "type": entity_type,
                "surface": text[start:end],
                "lOffset": start.item(),
                "rOffset": end.item(),
                "confidence_ner": float(score_c[pred_coarse].item() * 100),
            }
            
            # Add fine-grained info if available
            if not label_fine.startswith("O"):
                fine_type = label_fine[2:] if label_fine.startswith(("B-", "I-")) else label_fine
                fine_parts = fine_type.split(".")
                if len(fine_parts) == 2:
                    attr_name = fine_parts[1]  # e.g., 'name', 'title', 'function'
                    current_entity[attr_name] = text[start:end]
                    
        elif label_coarse.startswith("I-") and current_entity:
            # Continue current entity
            entity_type = label_coarse[2:]
            if entity_type == current_entity["type"]:
                current_entity["rOffset"] = end.item()
                current_entity["surface"] = text[current_entity["lOffset"]:end]
                
                # Extend fine-grained attributes
                if not label_fine.startswith("O"):
                    fine_type = label_fine[2:] if label_fine.startswith(("B-", "I-")) else label_fine
                    fine_parts = fine_type.split(".")
                    if len(fine_parts) == 2:
                        attr_name = fine_parts[1]
                        if attr_name in current_entity:
                            current_entity[attr_name] = text[current_entity["lOffset"]:end]
                        else:
                            current_entity[attr_name] = text[start:end]
        else:
            # End of entity
            if current_entity:
                entities.append(current_entity)
                current_entity = None
    
    # Don't forget the last entity
    if current_entity:
        entities.append(current_entity)
    
    return entities


# Function to process the sentence and extract entities
def extract_entities(sentence):
    if not sentence or not sentence.strip():
        return {"text": "", "entities": []}
    
    results = predict_entities(sentence)

    # Debugging the result format
    print(f"NER results: {results}")

    entities = []
    seen_spans = set()  # Track the spans we have already added to avoid overlaps

    for entity in results:
        entity_span = (entity["lOffset"], entity["rOffset"])

        # Only add non-overlapping entities
        if entity_span not in seen_spans:
            seen_spans.add(entity_span)
            label = f"{entity['type']}"
            if "title" in entity:
                label += f" - Title: {entity['title']}"
            if "name" in entity:
                label += f" - Name: {entity['name']}"
            if "function" in entity:
                label += f" - Function: {entity['function']}"
            entity["entity"] = label
            entity["start"] = entity["lOffset"]
            entity["end"] = entity["rOffset"]
            entities.append(entity)
    
    print(f"Entities: {entities}")
    return {"text": sentence, "entities": entities}


# Create Gradio interface
def ner_app_interface():
    input_sentence = gr.Textbox(
        lines=5, label="Input Sentence", placeholder="Enter a sentence for NER:"
    )
    output_entities = gr.HTML(label="Extracted Entities")

    # Interface definition
    interface = gr.Interface(
        fn=extract_entities,
        inputs=input_sentence,
        outputs=[gr.HighlightedText(label="Text with  mentions")],
        # outputs=output_entities,
        title="Named Entity Recognition",
        description="Enter a sentence to extract named entities using the NER model from the Impresso project.",
        examples=[
            [
                "Des chercheurs de l'Université de Cambridge ont développé une nouvelle technique de calcul quantique qui promet d'augmenter exponentiellement les vitesses de calcul."
            ],
            [
                "Le rapport complet sur ces découvertes a été publié dans la prestigieuse revue 'Nature Physics'. (Reuters)"
            ],
            ["In the year 1789, the Estates-General was convened in France."],
            [
                "The event was held at the Palace of Versailles, a symbol of French monarchy."
            ],
            [
                "At Versailles, Marie Antoinette, the Queen of France, was involved in discussions."
            ],
            [
                "Maximilien Robespierre, a leading member of the National Assembly, also participated."
            ],
            [
                "Jean-Jacques Rousseau, the famous philosopher, was a significant figure in the debate."
            ],
            [
                "Another important participant was Charles de Talleyrand, the Bishop of Autun."
            ],
            [
                "Meanwhile, across the Atlantic, George Washington, the first President of the United States, was shaping policies."
            ],
            [
                "Thomas Jefferson, the nation's Secretary of State, played a key role in drafting policies for the new American government."
            ],
        ],
        live=False,
    )

    interface.launch(share=True)


# Run the app
if __name__ == "__main__":
    ner_app_interface()