"""
Drug Repurposing Engine
Identifies novel therapeutic uses for existing drugs using biological network analysis.

Core Concept:
- Model biomedical ecosystem as network graph: Drugs → Proteins → Pathways → Diseases
- Use network proximity and shortest path analysis to discover repurposing opportunities
"""

import networkx as nx
import pandas as pd
import numpy as np
from typing import Dict, List, Tuple, Optional, Set
import asyncio
import httpx
from collections import defaultdict
import streamlit as st


class DrugRepurposingEngine:
    """
    Graph-based drug repurposing engine that analyzes drug-protein-disease networks
    to identify novel therapeutic indications for existing drugs.
    """
    
    def __init__(self, api_client, cache_manager):
        self.api_client = api_client
        self.cache = cache_manager
        self.graph = nx.Graph()
        self.drug_to_proteins = {}  # drug_name -> [uniprot_ids]
        self.protein_to_diseases = {}  # uniprot_id -> [disease_names]
        self.protein_to_pathways = {}  # uniprot_id -> [pathway_names]
        self.disease_to_proteins = defaultdict(list)  # disease_name -> [uniprot_ids]
        
    async def fetch_drug_targets(self, drug_name: str, drugbank_id: Optional[str] = None) -> Dict:
        """
        Fetch protein targets for a given drug from DrugBank and ChEMBL.
        
        Args:
            drug_name: Name of the drug
            drugbank_id: Optional DrugBank ID
            
        Returns:
            Dictionary with drug info and list of target proteins
        """
        cache_key = f"drug_targets_{drug_name.lower()}_{drugbank_id or ''}"
        cached = self.cache.get(cache_key)
        if cached:
            return cached
        
        targets = []
        drug_info = {
            "name": drug_name,
            "drugbank_id": drugbank_id,
            "targets": []
        }
        
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                # Try DrugBank API (if ID provided)
                if drugbank_id:
                    try:
                        # DrugBank public API endpoint
                        drugbank_url = f"https://go.drugbank.com/drugs/{drugbank_id}.json"
                        # Note: DrugBank requires authentication for API access
                        # For now, we'll use ChEMBL as primary source
                    except:
                        pass
                
                # Primary source: ChEMBL
                # Search for drug by name
                chembl_search_url = "https://www.ebi.ac.uk/chembl/api/data/molecule/search.json"
                search_params = {
                    "q": drug_name,
                    "max_phase": 4,  # FDA approved
                    "limit": 5  # Get more results
                }
                
                search_response = await client.get(chembl_search_url, params=search_params)
                if search_response.status_code == 200:
                    search_data = search_response.json()
                    molecules = search_data.get("molecules", [])
                    
                    # Try to find exact match first
                    exact_match = None
                    for mol in molecules:
                        pref_name = mol.get("pref_name", "").lower()
                        synonyms = [s.lower() for s in mol.get("synonyms", [])]
                        if (drug_name.lower() in pref_name or 
                            drug_name.lower() in synonyms or
                            pref_name in drug_name.lower()):
                            exact_match = mol
                            break
                    
                    molecule = exact_match or (molecules[0] if molecules else None)
                    
                    if molecule:
                        chembl_id = molecule.get("molecule_chembl_id")
                        
                        # Get targets for this molecule
                        target_url = "https://www.ebi.ac.uk/chembl/api/data/mechanism.json"
                        target_params = {
                            "molecule_chembl_id": chembl_id,
                            "format": "json"
                        }
                        
                        target_response = await client.get(target_url, params=target_params)
                        if target_response.status_code == 200:
                            target_data = target_response.json()
                            
                            for mechanism in target_data.get("mechanisms", []):
                                target_chembl_id = mechanism.get("target_chembl_id")
                                action_type = mechanism.get("action_type", "N/A")
                                
                                # Get target details
                                target_detail_url = f"https://www.ebi.ac.uk/chembl/api/data/target/{target_chembl_id}.json"
                                target_detail_response = await client.get(target_detail_url)
                                
                                if target_detail_response.status_code == 200:
                                    target_detail = target_detail_response.json()
                                    
                                    # Extract UniProt IDs
                                    target_components = target_detail.get("target_components", [])
                                    for component in target_components:
                                        for accession in self._iter_uniprot_accessions(component):
                                            # Avoid duplicates
                                            if not any(t["uniprot_id"] == accession for t in targets):
                                                targets.append({
                                                    "uniprot_id": accession,
                                                    "target_name": target_detail.get("pref_name", "Unknown"),
                                                    "action_type": action_type,
                                                    "chembl_target_id": target_chembl_id
                                                })
                        
                        # Also try activity data as fallback
                        if not targets:
                            activity_url = "https://www.ebi.ac.uk/chembl/api/data/activity.json"
                            activity_params = {
                                "molecule_chembl_id": chembl_id,
                                "target_organism": "Homo sapiens",
                                "format": "json",
                                "limit": 10
                            }
                            
                            activity_response = await client.get(activity_url, params=activity_params)
                            if activity_response.status_code == 200:
                                activity_data = activity_response.json()
                                seen_targets = set()
                                
                                for activity in activity_data.get("activities", []):
                                    target_chembl_id = activity.get("target_chembl_id")
                                    if target_chembl_id and target_chembl_id not in seen_targets:
                                        seen_targets.add(target_chembl_id)
                                        
                                        # Get target details
                                        target_detail_url = f"https://www.ebi.ac.uk/chembl/api/data/target/{target_chembl_id}.json"
                                        target_detail_response = await client.get(target_detail_url)
                                        
                                        if target_detail_response.status_code == 200:
                                            target_detail = target_detail_response.json()
                                            target_components = target_detail.get("target_components", [])
                                            
                                            for component in target_components:
                                                for accession in self._iter_uniprot_accessions(component):
                                                    if not any(t["uniprot_id"] == accession for t in targets):
                                                        targets.append({
                                                            "uniprot_id": accession,
                                                            "target_name": target_detail.get("pref_name", "Unknown"),
                                                            "action_type": "Activity",
                                                            "chembl_target_id": target_chembl_id
                                                        })
                
                # Alternative: Search UniProt for drug name (less reliable)
                if not targets:
                    uniprot_search_url = "https://rest.uniprot.org/uniprotkb/search"
                    uniprot_params = {
                        "query": f"{drug_name} AND (reviewed:true) AND (organism_id:9606)",
                        "format": "json",
                        "size": 5
                    }
                    
                    uniprot_response = await client.get(uniprot_search_url, params=uniprot_params)
                    if uniprot_response.status_code == 200:
                        uniprot_data = uniprot_response.json()
                        # This is a fallback - UniProt doesn't directly link drugs
                        # but we can try to find proteins mentioned with drug name
                        pass
                
                # Fallback: Use curated drug-target database for common drugs
                if not targets:
                    targets = self._get_curated_drug_targets(drug_name)
                
                drug_info["targets"] = targets
                self.cache.set(cache_key, drug_info)
                return drug_info
                
        except Exception as e:
            st.warning(f"Error fetching drug targets: {str(e)}")
            # Try curated database as fallback
            targets = self._get_curated_drug_targets(drug_name)
            drug_info["targets"] = targets
            return drug_info
    
    def _get_curated_drug_targets(self, drug_name: str) -> List[Dict]:
        """
        Curated drug-target associations for common FDA-approved drugs.
        Used as fallback when API data is unavailable.
        """
        drug_name_lower = drug_name.lower()
        
        curated_targets = {
            "metformin": [
                {"uniprot_id": "Q9Y478", "target_name": "AMPK", "action_type": "Activator"},
                {"uniprot_id": "P42345", "target_name": "mTOR", "action_type": "Inhibitor"},
            ],
            "aspirin": [
                {"uniprot_id": "P23219", "target_name": "PTGS1 (COX-1)", "action_type": "Inhibitor"},
                {"uniprot_id": "P35354", "target_name": "PTGS2 (COX-2)", "action_type": "Inhibitor"},
            ],
            "ibuprofen": [
                {"uniprot_id": "P23219", "target_name": "PTGS1 (COX-1)", "action_type": "Inhibitor"},
                {"uniprot_id": "P35354", "target_name": "PTGS2 (COX-2)", "action_type": "Inhibitor"},
            ],
            "erlotinib": [
                {"uniprot_id": "P00533", "target_name": "EGFR", "action_type": "Inhibitor"},
            ],
            "gefitinib": [
                {"uniprot_id": "P00533", "target_name": "EGFR", "action_type": "Inhibitor"},
            ],
            "cetuximab": [
                {"uniprot_id": "P00533", "target_name": "EGFR", "action_type": "Antibody"},
            ],
            "olaparib": [
                {"uniprot_id": "P38398", "target_name": "BRCA1", "action_type": "PARP Inhibitor"},
                {"uniprot_id": "P51587", "target_name": "BRCA2", "action_type": "PARP Inhibitor"},
            ],
            "imatinib": [
                {"uniprot_id": "P00519", "target_name": "ABL1", "action_type": "Inhibitor"},
                {"uniprot_id": "P16234", "target_name": "PDGFR", "action_type": "Inhibitor"},
            ],
            "atorvastatin": [
                {"uniprot_id": "P04035", "target_name": "HMGCR", "action_type": "Inhibitor"},
            ],
            "simvastatin": [
                {"uniprot_id": "P04035", "target_name": "HMGCR", "action_type": "Inhibitor"},
            ],
        }
        
        # Try exact match first
        if drug_name_lower in curated_targets:
            return curated_targets[drug_name_lower]
        
        # Try partial match
        for drug_key, targets_list in curated_targets.items():
            if drug_key in drug_name_lower or drug_name_lower in drug_key:
                return targets_list
        
        return []

    @staticmethod
    def _iter_uniprot_accessions(component: Dict) -> List[str]:
        """Normalize ChEMBL target component accessions to UniProt-like IDs."""
        raw_accessions = component.get("accession", [])
        if isinstance(raw_accessions, str):
            raw_accessions = [raw_accessions]
        elif raw_accessions is None:
            raw_accessions = []

        accessions = []
        for accession in raw_accessions:
            accession_text = str(accession).strip()
            if (
                len(accession_text) == 6
                and accession_text[0] in {"O", "P", "Q"}
                and accession_text[-1].isdigit()
            ):
                accessions.append(accession_text)
        return accessions
    
    async def fetch_disease_protein_associations(self, uniprot_ids: List[str]) -> Dict:
        """
        Fetch disease-protein associations from DisGeNET and OpenTargets.
        
        Args:
            uniprot_ids: List of UniProt IDs
            
        Returns:
            Dictionary mapping uniprot_id -> list of diseases with scores
        """
        cache_key = f"disease_proteins_{hash(tuple(sorted(uniprot_ids)))}"
        cached = self.cache.get(cache_key)
        if cached:
            return cached
        
        associations = defaultdict(list)
        
        try:
            async with httpx.AsyncClient(timeout=30.0) as client:
                # Use curated disease-protein associations
                # In production, integrate with DisGeNET/OpenTargets APIs
                curated = self._get_curated_disease_associations_detailed(uniprot_ids)
                
                for uniprot_id, diseases in curated.items():
                    associations[uniprot_id].extend(diseases)
                
        except Exception as e:
            st.warning(f"Error fetching disease associations: {str(e)}")
        
        result = dict(associations)
        self.cache.set(cache_key, result)
        return result
    
    def _get_curated_disease_associations_detailed(self, uniprot_ids: List[str]) -> Dict:
        """
        Detailed curated disease-protein associations with confidence scores.
        Based on known literature and database associations.
        """
        curated = {
            # EGFR - Epidermal Growth Factor Receptor
            "P00533": [
                {"disease_name": "Non-small cell lung cancer", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Colorectal cancer", "score": 0.85, "evidence": "Strong"},
                {"disease_name": "Head and neck cancer", "score": 0.80, "evidence": "Moderate"},
                {"disease_name": "Glioblastoma", "score": 0.75, "evidence": "Moderate"},
                {"disease_name": "Breast cancer", "score": 0.70, "evidence": "Moderate"},
            ],
            # TP53 - Tumor Protein p53
            "P04637": [
                {"disease_name": "Li-Fraumeni syndrome", "score": 0.98, "evidence": "Strong"},
                {"disease_name": "Ovarian cancer", "score": 0.90, "evidence": "Strong"},
                {"disease_name": "Colorectal cancer", "score": 0.88, "evidence": "Strong"},
                {"disease_name": "Breast cancer", "score": 0.85, "evidence": "Strong"},
                {"disease_name": "Lung cancer", "score": 0.82, "evidence": "Moderate"},
                {"disease_name": "Pancreatic cancer", "score": 0.80, "evidence": "Moderate"},
            ],
            # BRCA1 - Breast Cancer 1
            "P38398": [
                {"disease_name": "Hereditary breast and ovarian cancer", "score": 0.98, "evidence": "Strong"},
                {"disease_name": "Breast cancer", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Ovarian cancer", "score": 0.92, "evidence": "Strong"},
                {"disease_name": "Prostate cancer", "score": 0.70, "evidence": "Moderate"},
            ],
            # BRCA2 - Breast Cancer 2
            "P51587": [
                {"disease_name": "Hereditary breast and ovarian cancer", "score": 0.98, "evidence": "Strong"},
                {"disease_name": "Breast cancer", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Ovarian cancer", "score": 0.92, "evidence": "Strong"},
                {"disease_name": "Pancreatic cancer", "score": 0.75, "evidence": "Moderate"},
            ],
            # INS - Insulin
            "P01308": [
                {"disease_name": "Type 1 diabetes", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Type 2 diabetes", "score": 0.90, "evidence": "Strong"},
                {"disease_name": "Diabetes mellitus", "score": 0.88, "evidence": "Strong"},
                {"disease_name": "Metabolic syndrome", "score": 0.70, "evidence": "Moderate"},
            ],
            # ALB - Albumin
            "P02768": [
                {"disease_name": "Hypoalbuminemia", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Nephrotic syndrome", "score": 0.85, "evidence": "Strong"},
                {"disease_name": "Liver disease", "score": 0.75, "evidence": "Moderate"},
                {"disease_name": "Malnutrition", "score": 0.70, "evidence": "Moderate"},
            ],
            # ABCB1 - P-glycoprotein (MDR1)
            "P08183": [
                {"disease_name": "Drug resistance", "score": 0.90, "evidence": "Strong"},
                {"disease_name": "Cancer", "score": 0.75, "evidence": "Moderate"},
                {"disease_name": "Epilepsy", "score": 0.65, "evidence": "Moderate"},
            ],
            # PTGS2 - COX-2
            "P35354": [
                {"disease_name": "Inflammation", "score": 0.90, "evidence": "Strong"},
                {"disease_name": "Pain", "score": 0.85, "evidence": "Strong"},
                {"disease_name": "Arthritis", "score": 0.80, "evidence": "Strong"},
                {"disease_name": "Colorectal cancer", "score": 0.70, "evidence": "Moderate"},
            ],
            # PTGS1 - COX-1
            "P23219": [
                {"disease_name": "Inflammation", "score": 0.88, "evidence": "Strong"},
                {"disease_name": "Pain", "score": 0.85, "evidence": "Strong"},
                {"disease_name": "Cardiovascular disease", "score": 0.75, "evidence": "Moderate"},
            ],
            # APP - Amyloid Beta Precursor Protein
            "P05067": [
                {"disease_name": "Alzheimer's Disease", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Dementia", "score": 0.85, "evidence": "Moderate"},
            ],
            # SNCA - Alpha-synuclein
            "P37840": [
                {"disease_name": "Parkinson's Disease", "score": 0.95, "evidence": "Strong"},
                {"disease_name": "Dementia", "score": 0.80, "evidence": "Moderate"},
            ],
            # HTT - Huntingtin
            "P42858": [
                {"disease_name": "Huntington's Disease", "score": 0.98, "evidence": "Strong"},
            ],
            # CFTR - Cystic Fibrosis Transmembrane Conductance Regulator
            "P13569": [
                {"disease_name": "Cystic fibrosis", "score": 0.98, "evidence": "Strong"},
            ],
        }
        
        result = {}
        for uniprot_id in uniprot_ids:
            if uniprot_id in curated:
                result[uniprot_id] = curated[uniprot_id]
        
        return result
    
    def build_network_graph(self, drug_name: str, drug_targets: List[Dict], 
                           ppi_data: Dict, disease_associations: Dict,
                           pathway_data: Dict) -> nx.Graph:
        """
        Build a network graph connecting drugs, proteins, pathways, and diseases.
        
        Args:
            drug_name: Name of the drug
            drug_targets: List of target proteins
            ppi_data: Protein-protein interaction data
            disease_associations: Disease-protein associations
            pathway_data: Pathway-protein associations
            
        Returns:
            NetworkX graph
        """
        G = nx.Graph()
        
        # Add drug node
        G.add_node(drug_name, node_type="drug")
        
        # Add direct target proteins
        for target in drug_targets:
            uniprot_id = target.get("uniprot_id")
            target_name = target.get("target_name", uniprot_id)
            
            if uniprot_id:
                G.add_node(uniprot_id, node_type="protein", name=target_name)
                G.add_edge(drug_name, uniprot_id, 
                          edge_type="drug_target", 
                          action=target.get("action_type", "unknown"))
        
        # Add PPI network (indirect targets)
        if ppi_data and ppi_data.get("available"):
            interactions = ppi_data.get("interactions", [])
            for interaction in interactions:
                partner_id = interaction.get("partner_id")
                partner_name = interaction.get("partner_name")
                score = interaction.get("combined_score", 0)
                
                # Only add high-confidence interactions
                if score >= 400:  # Medium confidence threshold
                    # Check if this partner is a direct target
                    is_direct_target = any(
                        t.get("uniprot_id") == partner_id for t in drug_targets
                    )
                    
                    if not is_direct_target:
                        G.add_node(partner_id, node_type="protein", name=partner_name)
                        # Link to direct targets (if they exist in graph)
                        for target in drug_targets:
                            target_id = target.get("uniprot_id")
                            if target_id in G:
                                G.add_edge(target_id, partner_id, 
                                          edge_type="ppi", 
                                          score=score,
                                          confidence=interaction.get("confidence", "Medium"))
        
        # Add pathway nodes and connections
        if pathway_data and pathway_data.get("available"):
            pathways = pathway_data.get("pathways", [])
            for pathway in pathways[:10]:  # Limit pathways
                pathway_id = pathway.get("pathway_id", "")
                pathway_name = pathway.get("pathway_name", "")
                
                if pathway_id:
                    G.add_node(pathway_id, node_type="pathway", name=pathway_name)
                    
                    # Connect proteins to pathways
                    for target in drug_targets:
                        target_id = target.get("uniprot_id")
                        if target_id in G:
                            G.add_edge(target_id, pathway_id, edge_type="protein_pathway")
        
        # Add disease nodes and connections
        for uniprot_id, diseases in disease_associations.items():
            if uniprot_id in G:
                for disease_info in diseases:
                    disease_name = disease_info.get("disease_name", "")
                    score = disease_info.get("score", 0)
                    
                    if disease_name and score > 0.3:  # Confidence threshold
                        G.add_node(disease_name, node_type="disease")
                        G.add_edge(uniprot_id, disease_name, 
                                  edge_type="protein_disease", 
                                  score=score)
        
        return G
    
    def calculate_network_proximity(self, graph: nx.Graph, drug_name: str, 
                                   disease_name: str) -> Dict:
        """
        Calculate network proximity between drug and disease.
        Uses shortest path analysis and network distance metrics.
        
        Args:
            graph: NetworkX graph
            drug_name: Name of the drug node
            disease_name: Name of the disease node
            
        Returns:
            Dictionary with proximity metrics
        """
        if drug_name not in graph or disease_name not in graph:
            return {
                "distance": float('inf'),
                "shortest_path": [],
                "proximity_score": 0.0,
                "pathway_count": 0,
                "intermediate_proteins": []
            }
        
        try:
            # Calculate shortest path
            if nx.has_path(graph, drug_name, disease_name):
                shortest_path = nx.shortest_path(graph, drug_name, disease_name)
                distance = len(shortest_path) - 1  # Number of edges
                
                # Extract intermediate nodes
                intermediate_proteins = [
                    node for node in shortest_path[1:-1] 
                    if graph.nodes[node].get("node_type") == "protein"
                ]
                
                # Count pathways in path
                pathway_count = sum(
                    1 for node in shortest_path 
                    if graph.nodes[node].get("node_type") == "pathway"
                )
                
                # Calculate proximity score (inverse of distance, normalized)
                # Shorter paths = higher score
                max_distance = 10  # Maximum expected path length
                proximity_score = max(0, 1 - (distance / max_distance))
                
                # Boost score if pathways are involved
                if pathway_count > 0:
                    proximity_score *= (1 + 0.2 * pathway_count)
                    proximity_score = min(1.0, proximity_score)
                
                return {
                    "distance": distance,
                    "shortest_path": shortest_path,
                    "proximity_score": proximity_score,
                    "pathway_count": pathway_count,
                    "intermediate_proteins": intermediate_proteins,
                    "path_length": len(shortest_path)
                }
            else:
                return {
                    "distance": float('inf'),
                    "shortest_path": [],
                    "proximity_score": 0.0,
                    "pathway_count": 0,
                    "intermediate_proteins": []
                }
                
        except Exception as e:
            st.warning(f"Error calculating proximity: {str(e)}")
            return {
                "distance": float('inf'),
                "shortest_path": [],
                "proximity_score": 0.0,
                "pathway_count": 0,
                "intermediate_proteins": []
            }
    
    def calculate_confidence_score(self, proximity_metrics: Dict, 
                                  disease_associations: Dict,
                                  pathway_count: int) -> float:
        """
        Calculate confidence score for a repurposing prediction.
        Combines multiple factors:
        - Network proximity
        - Disease association strength
        - Pathway involvement
        - Number of connecting paths
        
        Args:
            proximity_metrics: Results from calculate_network_proximity
            disease_associations: Disease-protein association scores
            pathway_count: Number of pathways involved
            
        Returns:
            Confidence score (0-100)
        """
        base_score = 0.0
        
        # Factor 1: Network proximity (40% weight)
        proximity_score = proximity_metrics.get("proximity_score", 0.0)
        distance = proximity_metrics.get("distance", float('inf'))
        
        if distance == 1:
            # Direct connection (drug -> protein -> disease)
            base_score += 40.0
        elif distance == 2:
            # One intermediate (drug -> protein -> protein -> disease)
            base_score += 30.0
        elif distance == 3:
            # Two intermediates
            base_score += 20.0
        elif distance <= 5:
            # Short path
            base_score += 10.0
        
        # Factor 2: Disease association strength (30% weight)
        max_disease_score = 0.0
        for uniprot_id, diseases in disease_associations.items():
            for disease_info in diseases:
                score = disease_info.get("score", 0.0)
                max_disease_score = max(max_disease_score, score)
        
        base_score += max_disease_score * 30.0
        
        # Factor 3: Pathway involvement (20% weight)
        if pathway_count > 0:
            pathway_bonus = min(20.0, pathway_count * 5.0)
            base_score += pathway_bonus
        
        # Factor 4: Number of connecting proteins (10% weight)
        intermediate_count = len(proximity_metrics.get("intermediate_proteins", []))
        if intermediate_count > 0:
            protein_bonus = min(10.0, intermediate_count * 2.0)
            base_score += protein_bonus
        
        # Normalize to 0-100 scale
        confidence = min(100.0, max(0.0, base_score))
        
        return round(confidence, 1)
    
    def generate_explanation(self, drug_name: str, disease_name: str,
                            proximity_metrics: Dict, graph: nx.Graph) -> str:
        """
        Generate human-readable explanation for repurposing prediction.
        
        Args:
            drug_name: Name of the drug
            disease_name: Name of the disease
            proximity_metrics: Proximity analysis results
            graph: Network graph
            
        Returns:
            Explanation string
        """
        distance = proximity_metrics.get("distance", float('inf'))
        shortest_path = proximity_metrics.get("shortest_path", [])
        intermediate_proteins = proximity_metrics.get("intermediate_proteins", [])
        pathway_count = proximity_metrics.get("pathway_count", 0)
        
        if distance == float('inf'):
            return f"No direct or indirect network connection found between {drug_name} and {disease_name}."
        
        explanation_parts = []
        
        # Direct target mechanism
        if distance == 1:
            explanation_parts.append(
                f"{drug_name} directly targets proteins associated with {disease_name}."
            )
        elif distance == 2:
            explanation_parts.append(
                f"{drug_name} targets proteins that interact with disease-associated proteins in {disease_name}."
            )
        else:
            explanation_parts.append(
                f"{drug_name} influences {disease_name} through a network of {distance-1} protein interactions."
            )
        
        # Pathway involvement
        if pathway_count > 0:
            pathway_nodes = [
                graph.nodes[node].get("name", node) 
                for node in shortest_path 
                if graph.nodes[node].get("node_type") == "pathway"
            ]
            if pathway_nodes:
                explanation_parts.append(
                    f"Mechanism involves {', '.join(pathway_nodes[:2])} pathways."
                )
        
        # Intermediate proteins
        if intermediate_proteins:
            protein_names = []
            for protein_id in intermediate_proteins[:3]:
                name = graph.nodes[protein_id].get("name", protein_id)
                protein_names.append(name)
            
            if protein_names:
                explanation_parts.append(
                    f"Key intermediate proteins: {', '.join(protein_names)}."
                )
        
        return " ".join(explanation_parts)
    
    async def predict_repurposing_opportunities(self, drug_name: str, 
                                                drugbank_id: Optional[str] = None,
                                                max_results: int = 10) -> List[Dict]:
        """
        Main function to predict drug repurposing opportunities.
        
        Args:
            drug_name: Name of the drug
            drugbank_id: Optional DrugBank ID
            max_results: Maximum number of predictions to return
            
        Returns:
            List of repurposing predictions with scores and explanations
        """
        # Step 1: Fetch drug targets
        drug_targets_data = await self.fetch_drug_targets(drug_name, drugbank_id)
        drug_targets = drug_targets_data.get("targets", [])
        
        if not drug_targets:
            return [{
                "disease_name": "No targets found",
                "confidence": 0.0,
                "explanation": f"Could not identify protein targets for {drug_name}. Please verify the drug name or DrugBank ID.",
                "affected_proteins": [],
                "pathways": []
            }]
        
        # Step 2: Fetch PPI data for targets
        all_ppi_data = {}
        all_uniprot_ids = [t.get("uniprot_id") for t in drug_targets if t.get("uniprot_id")]
        
        # Fetch PPI for each target (limit to first 3 to avoid too many API calls)
        for target in drug_targets[:3]:
            uniprot_id = target.get("uniprot_id")
            if uniprot_id:
                try:
                    # Get gene name from UniProt
                    uniprot_data = await self.api_client.fetch_uniprot_data(uniprot_id)
                    gene_name = uniprot_data.get("gene_name", "")
                    
                    if gene_name:
                        ppi_data = await self.api_client.fetch_string_ppi(gene_name, uniprot_id, limit=15)
                        if ppi_data and ppi_data.get("available"):
                            all_ppi_data[uniprot_id] = ppi_data
                except Exception as e:
                    st.warning(f"Could not fetch PPI data for {uniprot_id}: {str(e)}")
                    continue
        
        # Step 3: Fetch pathway data
        pathway_data = {}
        if all_uniprot_ids:
            try:
                first_target = drug_targets[0]
                uniprot_id = first_target.get("uniprot_id")
                uniprot_data = await self.api_client.fetch_uniprot_data(uniprot_id)
                gene_name = uniprot_data.get("gene_name", "")
                
                if gene_name:
                    pathway_data = await self.api_client.fetch_kegg_pathways(gene_name, uniprot_id)
            except Exception as e:
                st.warning(f"Could not fetch pathway data: {str(e)}")
                pathway_data = {}
        
        # Step 4: Fetch disease associations
        disease_associations = await self.fetch_disease_protein_associations(all_uniprot_ids)
        
        # Step 5: Build network graph
        graph = self.build_network_graph(
            drug_name, drug_targets, 
            all_ppi_data.get(all_uniprot_ids[0] if all_uniprot_ids else "", {}),
            disease_associations,
            pathway_data
        )
        
        # Step 6: Find all diseases in graph
        diseases_in_graph = [
            node for node in graph.nodes() 
            if graph.nodes[node].get("node_type") == "disease"
        ]
        
        if not diseases_in_graph:
            # Fallback: Use curated disease-protein associations
            diseases_in_graph = self._get_curated_disease_associations(all_uniprot_ids)
        
        # Step 7: Calculate repurposing scores for each disease
        predictions = []
        
        for disease_name in diseases_in_graph[:50]:  # Limit to avoid too many calculations
            # Calculate network proximity
            proximity_metrics = self.calculate_network_proximity(graph, drug_name, disease_name)
            
            # Calculate confidence score
            confidence = self.calculate_confidence_score(
                proximity_metrics, disease_associations, 
                proximity_metrics.get("pathway_count", 0)
            )
            
            # Generate explanation
            explanation = self.generate_explanation(
                drug_name, disease_name, proximity_metrics, graph
            )
            
            # Extract affected proteins and pathways
            affected_proteins = proximity_metrics.get("intermediate_proteins", [])
            if proximity_metrics.get("shortest_path"):
                # Get protein names from path
                protein_names = [
                    graph.nodes[node].get("name", node)
                    for node in proximity_metrics["shortest_path"]
                    if graph.nodes[node].get("node_type") == "protein"
                ]
                affected_proteins = list(set(affected_proteins + protein_names))
            
            pathway_names = [
                graph.nodes[node].get("name", node)
                for node in proximity_metrics.get("shortest_path", [])
                if graph.nodes[node].get("node_type") == "pathway"
            ]
            
            predictions.append({
                "disease_name": disease_name,
                "confidence": confidence,
                "explanation": explanation,
                "affected_proteins": affected_proteins[:5],  # Limit to top 5
                "pathways": pathway_names,
                "distance": proximity_metrics.get("distance", float('inf')),
                "proximity_score": proximity_metrics.get("proximity_score", 0.0)
            })
        
        # Step 8: Sort by confidence and return top results
        predictions.sort(key=lambda x: x["confidence"], reverse=True)
        
        return predictions[:max_results]
    
    def _get_curated_disease_associations(self, uniprot_ids: List[str]) -> List[str]:
        """
        Curated disease-protein associations for common proteins.
        Used as fallback when API data is unavailable.
        """
        detailed = self._get_curated_disease_associations_detailed(uniprot_ids)
        diseases = set()
        for uniprot_id, disease_list in detailed.items():
            for disease_info in disease_list:
                diseases.add(disease_info["disease_name"])
        return list(diseases)