# mypy: enable-error-code=var-annotated import plotly.graph_objects as go import plotly.express as px from typing import Any, Dict, List, Optional, TypedDict import pandas as pd import numpy as np from collections import defaultdict from functools import wraps import matplotlib matplotlib.use('Agg') # Use non-interactive backend for Streamlit import matplotlib.pyplot as plt from io import StringIO, BytesIO import base64 import html as html_lib # visualizations.py - Plotly chart generation class ProteinVisualizer: """Creates interactive Plotly visualizations for protein data""" COLORWAY = [ "#2563eb", "#16a34a", "#f59e0b", "#dc2626", "#7c3aed", "#0891b2", "#db2777", "#64748b", ] QUALITY_COLORS = { "excellent": "#15803d", "good": "#65a30d", "moderate": "#d97706", "poor": "#dc2626", "neutral": "#64748b", } DOMAIN_PALETTES = { "alpha_fold_confidence": { "very_high": "#0053D6", "high": "#65CBF3", "low": "#FFDB13", "very_low": "#FF7D45", }, "atom_element": { "carbon": "#3f3f46", "oxygen": "#ef4444", "nitrogen": "#2563eb", "sulfur": "#f59e0b", "phosphorus": "#fb923c", "hydrogen": "#f8fafc", }, "protein_chain": { "chain_a": "#1f9d55", "chain_b": "#2563eb", "chain_c": "#7c3aed", "chain_d": "#dc2626", }, "docking_strength": { "strong": "#15803d", "moderate": "#d97706", "weak": "#dc2626", "unavailable": "#64748b", }, "clinical_risk": { "low": "#16a34a", "medium": "#f59e0b", "high": "#dc2626", "unknown": "#64748b", }, } @staticmethod def _empty_figure(message: str, height: int = 360, title: Optional[str] = None) -> go.Figure: """Return a consistently styled no-data figure.""" fig = go.Figure() fig.add_annotation( text=message, xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, align="center", font=dict(size=14, color="#64748b"), ) fig.update_layout(height=height, title=title) return fig @staticmethod def _style_figure(fig: go.Figure) -> go.Figure: """Apply OmniBiMol defaults without overwriting chart-specific encodings.""" default_margin = {"l": 70, "r": 36, "t": 64, "b": 64} existing_margin = fig.layout.margin.to_plotly_json() if fig.layout.margin else {} margin = { side: existing_margin[side] if existing_margin.get(side) is not None else value for side, value in default_margin.items() } fig.update_layout( template="plotly_white", colorway=ProteinVisualizer.COLORWAY, font=dict(family="Inter, Arial, sans-serif", size=13, color="#1f2937"), title=dict(font=dict(size=18, color="#111827"), x=0.02, xanchor="left"), margin=margin, hoverlabel=dict( bgcolor="white", bordercolor="#cbd5e1", font=dict(color="#111827", size=12), ), legend=dict( orientation="h", yanchor="bottom", y=1.02, xanchor="right", x=1, bgcolor="rgba(255,255,255,0.85)", ), ) for axis_name in ("xaxis", "xaxis2", "yaxis", "yaxis2"): axis = getattr(fig.layout, axis_name, None) if axis is None: continue if axis.showgrid is None and axis.overlaying is None: axis.showgrid = True if axis.gridcolor is None: axis.gridcolor = "#e5e7eb" if axis.zerolinecolor is None: axis.zerolinecolor = "#94a3b8" if axis.linecolor is None: axis.linecolor = "#cbd5e1" if axis.ticks is None: axis.ticks = "outside" return fig @staticmethod def _affinity_semantics(predictions: List[Dict]) -> Dict[str, Any]: units = [ str(p.get("prediction", {}).get("binding_affinity_units", "")).strip() for p in predictions ] unit_label = next((unit for unit in units if unit), "kcal/mol") normalized_units = unit_label.lower() higher_is_better = any( marker in normalized_units for marker in ("paffinity", "-log10", "pkd", "pic50", "pec50") ) return { "unit_label": unit_label, "higher_is_better": higher_is_better, "axis_hint": "higher is stronger" if higher_is_better else "lower is stronger", } @staticmethod def _affinity_colors(affinities: List[float], higher_is_better: bool) -> List[str]: colors = [] for affinity in affinities: if higher_is_better: if affinity >= 8: colors.append(ProteinVisualizer.QUALITY_COLORS["excellent"]) elif affinity >= 7: colors.append(ProteinVisualizer.QUALITY_COLORS["good"]) elif affinity >= 6: colors.append(ProteinVisualizer.QUALITY_COLORS["moderate"]) else: colors.append(ProteinVisualizer.QUALITY_COLORS["poor"]) else: if affinity <= -8: colors.append(ProteinVisualizer.QUALITY_COLORS["excellent"]) elif affinity <= -6: colors.append(ProteinVisualizer.QUALITY_COLORS["good"]) elif affinity <= -4: colors.append(ProteinVisualizer.QUALITY_COLORS["moderate"]) else: colors.append(ProteinVisualizer.QUALITY_COLORS["poor"]) return colors @staticmethod def create_tissue_expression_chart(df: pd.DataFrame) -> go.Figure: """ Create horizontal bar chart for tissue expression levels Color-coded by expression level (High=red, Medium=orange, Low=yellow, None=gray) """ if df.empty: fig = go.Figure() fig.add_annotation( text="No tissue expression data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=400) return fig # Color mapping color_map = { "High": "#d62728", "Medium": "#ff7f0e", "Low": "#ffdd57", "Not detected": "#d3d3d3" } colors = [color_map.get(level, "#d3d3d3") for level in df["level"]] fig = go.Figure(go.Bar( x=df["level_numeric"], y=df["tissue"], orientation='h', marker=dict( color=colors, line=dict(color='rgb(8,48,107)', width=1) ), text=df["level"], textposition='auto', hovertemplate='%{y}
Expression: %{text}' )) fig.update_layout( title="Tissue Expression Levels (Top 20)", xaxis_title="Expression Level", yaxis_title="Tissue", height=max(400, len(df) * 20), xaxis=dict( tickmode='array', tickvals=[0, 1, 2, 3], ticktext=['Not detected', 'Low', 'Medium', 'High'] ), showlegend=False, template="plotly_white", margin=dict(l=150, r=50, t=50, b=50) ) return fig @staticmethod def create_subcellular_heatmap(df: pd.DataFrame) -> go.Figure: """ Create heatmap for subcellular locations with reliability scores """ if df.empty: fig = go.Figure() fig.add_annotation( text="No subcellular location data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=300) return fig # Create matrix format z_data = [[val] for val in df["reliability_numeric"]] fig = go.Figure(data=go.Heatmap( z=z_data, x=["Reliability"], y=df["location"], colorscale=[[0, '#f0f0f0'], [0.33, '#ffdd57'], [0.66, '#ff7f0e'], [1, '#2ca02c']], text=[[rel] for rel in df["reliability"]], texttemplate='%{text}', textfont={"size": 10}, colorbar=dict( title="Reliability", tickmode='array', tickvals=[0, 1, 2, 3], ticktext=['Uncertain', 'Approved', 'Supported', 'Enhanced'] ), hovertemplate='%{y}
Reliability: %{text}' )) fig.update_layout( title="Subcellular Localization", height=max(300, len(df) * 30), template="plotly_white", margin=dict(l=150, r=150, t=50, b=50) ) return fig @staticmethod def create_go_terms_chart(go_terms: Dict) -> go.Figure: """ Create stacked bar chart showing GO term counts by category """ categories = [] counts = [] for category, terms in go_terms.items(): if terms: categories.append(category) counts.append(len(terms)) if not categories: fig = go.Figure() fig.add_annotation( text="No GO terms available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=250) return fig fig = go.Figure(go.Bar( x=categories, y=counts, marker=dict( color=['#1f77b4', '#ff7f0e', '#2ca02c'], line=dict(color='rgb(8,48,107)', width=1) ), text=counts, textposition='auto', hovertemplate='%{x}
Count: %{y}' )) fig.update_layout( title="Gene Ontology Term Distribution", xaxis_title="GO Category", yaxis_title="Number of Terms", height=300, template="plotly_white", showlegend=False ) return fig @staticmethod def _viewer_message(message: str, tone: str = "neutral") -> str: colors = { "neutral": ("#f8fafc", "#cbd5e1", "#334155"), "warning": ("#fff7ed", "#fed7aa", "#9a3412"), "error": ("#fef2f2", "#fecaca", "#991b1b"), } bg, border, text = colors.get(tone, colors["neutral"]) return f"""
{html_lib.escape(message)}
""" @staticmethod def _molstar_shell( *, title: str, body_script: str, height: int = 560, legend_html: str = "", status: str = "", show_side_panels: bool = True, ) -> str: status_html = ( f'
{html_lib.escape(status)}
' if status else "" ) return f"""
{html_lib.escape(title)}{status_html}
{legend_html}
""" @staticmethod def create_molstar_structure_viewer( structure_data: Dict, structure_type: str = "alphafold" ) -> str: """Create a Mol*-based protein structure viewer.""" import json if structure_type == "alphafold" and structure_data.get("available"): pdb_url = structure_data.get("pdb_url") alphafold_accession = str(structure_data.get("uniprot_id") or "").strip().upper() title = f"AlphaFold Prediction - {alphafold_accession or 'Unknown'}" status = "Colored by AlphaFold pLDDT confidence" legend_html = """
Very high (pLDDT > 90)
Confident (70-90)
Low (50-70)
Very low (< 50)
""" elif structure_type == "pdb" and structure_data.get("available"): structures = structure_data.get("structures") or [] first_structure = structures[0] if structures else {} pdb_url = first_structure.get("pdb_url") or structure_data.get("pdb_url") pdb_text = str(structure_data.get("pdb_text") or "") title = f"Experimental Structure - {first_structure.get('pdb_id') or structure_data.get('structure_id') or 'PDB'}" ligand_summary = first_structure.get("ligand_summary") if ligand_summary is None: if "bound_components" in first_structure: bound_components = first_structure.get("bound_components") or [] ligand_summary = ( ", ".join(str(component) for component in bound_components) if bound_components else "None annotated" ) else: ligand_summary = "Not fetched" elif not ligand_summary: bound_components = first_structure.get("bound_components") or [] ligand_summary = ( ", ".join(str(component) for component in bound_components) if bound_components else "None annotated" ) status = f"Mol* default structure rendering; bound components: {ligand_summary}" legend_html = "" else: return ProteinVisualizer._viewer_message( str(structure_data.get("error", "No structure available")), "warning" ) pdb_text = str(structure_data.get("pdb_text") or "") if structure_type != "alphafold" else "" if not pdb_url and not pdb_text: return ProteinVisualizer._viewer_message("Invalid structure URL", "error") source_url = json.dumps(str(pdb_url)) source_text = json.dumps(pdb_text) if structure_type == "alphafold": accession_json = json.dumps(alphafold_accession) body_script = f""" const alphaFoldAccession = {accession_json}; const sourceUrl = {source_url}; try {{ await viewer.loadAlphaFoldDb(alphaFoldAccession); }} catch (alphaFoldError) {{ console.warn("AlphaFold DB loader failed; falling back to the exact PDB URL.", alphaFoldError); if (!sourceUrl) throw alphaFoldError; await viewer.loadStructureFromUrl(sourceUrl, "pdb", false, {{ representationParams: {{ theme: {{ globalName: "plddt-confidence", focus: {{name: "plddt-confidence"}} }} }} }}); }} """ else: body_script = f""" const sourceUrl = {source_url}; const sourceText = {source_text}; if (sourceText && sourceText.length > 0) {{ await viewer.loadStructureFromData(sourceText, "pdb"); }} else {{ await viewer.loadStructureFromUrl(sourceUrl, "pdb"); }} """ return ProteinVisualizer._molstar_shell( title=title, status=status, legend_html=legend_html, body_script=body_script, height=560, ) @staticmethod def create_structure_viewer(structure_data: Dict, structure_type: str = "alphafold") -> str: """ Create HTML for a Mol*-based 3D protein structure viewer. Kept as a compatibility wrapper for existing Streamlit call sites. """ return ProteinVisualizer.create_molstar_structure_viewer(structure_data, structure_type) if structure_type == "alphafold" and structure_data.get("available"): pdb_url = structure_data.get("pdb_url") uniprot_id = structure_data.get("uniprot_id") title = f"AlphaFold Prediction - {uniprot_id}" color_scheme = "bfactor" # Color by confidence elif structure_type == "pdb" and structure_data.get("available"): pdb_url = structure_data["structures"][0].get("pdb_url") pdb_id = structure_data["structures"][0].get("pdb_id") title = f"Experimental Structure - {pdb_id}" color_scheme = "chainindex" # Color by chain else: error_msg = structure_data.get("error", "No structure available") return f"

{error_msg}

" # Validate URL if not pdb_url or pdb_url == "": return "

Error: Invalid structure URL

" html = f"""
Loading 3D structure from:
{pdb_url}
{title}
""" return html @staticmethod def create_confidence_plot( uniprot_id: str, entry_id: Optional[str] = None, pdb_url: Optional[str] = None, ) -> go.Figure: """ Create plot showing AlphaFold confidence scores along sequence pLDDT scores: >90=very high, 70-90=confident, 50-70=low, <50=very low """ import httpx try: # Use entry_id if provided, otherwise construct it if not entry_id: entry_id = f"AF-{uniprot_id}-F1" # Prefer the exact model URL returned by AlphaFold metadata. The # version candidates remain for compatibility with older callers. urls_to_try = [url for url in [pdb_url] if url] urls_to_try.extend([ f"https://alphafold.ebi.ac.uk/files/{entry_id}-model_v6.pdb", f"https://alphafold.ebi.ac.uk/files/{entry_id}-model_v4.pdb", f"https://alphafold.ebi.ac.uk/files/{entry_id}-model_v3.pdb", f"https://alphafold.ebi.ac.uk/files/{entry_id}-model_v2.pdb", ]) urls_to_try = list(dict.fromkeys(urls_to_try)) pdb_content = None for pdb_url in urls_to_try: try: response = httpx.get(pdb_url, timeout=30.0, follow_redirects=True) if response.status_code == 200: pdb_content = response.text break except: continue if not pdb_content: raise Exception(f"No AlphaFold structure found for {uniprot_id}") # Parse pLDDT scores from B-factor column in PDB file residues = [] plddt_scores = [] for line in pdb_content.split('\n'): if line.startswith('ATOM') and line[13:15].strip() == 'CA': # Only CA atoms try: residue_num = int(line[22:26].strip()) bfactor = float(line[60:66].strip()) residues.append(residue_num) plddt_scores.append(bfactor) except: continue if not residues: raise Exception("No confidence data found in PDB file") # Create color mapping for scatter plot colors = [] for score in plddt_scores: if score > 90: colors.append('#0053D6') # Very high - dark blue elif score > 70: colors.append('#65CBF3') # Confident - light blue elif score > 50: colors.append('#FFDB13') # Low - yellow else: colors.append('#FF7D45') # Very low - orange fig = go.Figure() fig.add_trace(go.Scatter( x=residues, y=plddt_scores, mode='lines', line=dict(color='#1f77b4', width=2), fill='tozeroy', fillcolor='rgba(31, 119, 180, 0.3)', name='pLDDT Score', hovertemplate='Residue: %{x}
Confidence: %{y:.1f}' )) # Add confidence level zones fig.add_hrect(y0=90, y1=100, fillcolor="rgba(0, 83, 214, 0.1)", line_width=0, annotation_text="Very High", annotation_position="right") fig.add_hrect(y0=70, y1=90, fillcolor="rgba(101, 203, 243, 0.1)", line_width=0, annotation_text="Confident", annotation_position="right") fig.add_hrect(y0=50, y1=70, fillcolor="rgba(255, 219, 19, 0.1)", line_width=0, annotation_text="Low", annotation_position="right") fig.add_hrect(y0=0, y1=50, fillcolor="rgba(255, 125, 69, 0.1)", line_width=0, annotation_text="Very Low", annotation_position="right") fig.update_layout( title="AlphaFold Confidence Score (pLDDT) per Residue", xaxis_title="Residue Position", yaxis_title="Confidence Score (pLDDT)", yaxis=dict(range=[0, 100]), height=350, template="plotly_white", hovermode='x unified' ) return fig except Exception as e: # Return empty figure with error message fig = go.Figure() fig.add_annotation( text=f"AlphaFold structure not available for {uniprot_id}

This protein may not be in the AlphaFold database.", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray") ) fig.update_layout(height=350, template="plotly_white") return fig @staticmethod def create_pathway_network(pathways: list) -> go.Figure: """ Create network visualization showing pathway relationships Groups pathways by class for better organization """ if not pathways: fig = go.Figure() fig.add_annotation( text="No pathway data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=400) return fig # Group pathways by class pathway_classes: dict[str, list[Dict]] = {} for pathway in pathways: pathway_class = pathway.get('pathway_class', 'Other') # Extract main class (before semicolon) if ';' in pathway_class: pathway_class = pathway_class.split(';')[0].strip() if pathway_class not in pathway_classes: pathway_classes[pathway_class] = [] pathway_classes[pathway_class].append(pathway) # Create sunburst chart showing pathway hierarchy labels = ["Pathways"] parents = [""] values = [len(pathways)] colors = ["#1f77b4"] hover_texts = [f"Total Pathways: {len(pathways)}"] # Color palette for classes class_colors = ['#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf'] for idx, (pathway_class, class_pathways) in enumerate(pathway_classes.items()): # Add class level class_label = pathway_class if pathway_class else "Unclassified" labels.append(class_label) parents.append("Pathways") values.append(len(class_pathways)) colors.append(class_colors[idx % len(class_colors)]) hover_texts.append(f"{class_label}
Count: {len(class_pathways)}") # Add individual pathways for pathway in class_pathways[:10]: # Limit to first 10 per class for readability labels.append(pathway['pathway_name'][:30]) # Truncate long names parents.append(class_label) values.append(1) colors.append(class_colors[idx % len(class_colors)]) hover_texts.append(f"{pathway['pathway_name']}
ID: {pathway['pathway_id']}") fig = go.Figure(go.Sunburst( labels=labels, parents=parents, values=values, marker=dict(colors=colors), text=hover_texts, hovertemplate='%{text}', branchvalues="total" )) fig.update_layout( title="KEGG Pathway Classification", height=500, margin=dict(t=50, l=0, r=0, b=0) ) return fig @staticmethod def create_pathway_table_html(pathways: list) -> str: """ Create formatted HTML table for pathways with clickable links """ if not pathways: return "

No pathways found

" html = """ """ for pathway in pathways: pathway_name = pathway.get('pathway_name', 'Unknown') pathway_id = pathway.get('pathway_id', '') pathway_class = pathway.get('pathway_class', 'N/A') highlight_url = pathway.get('highlight_url', pathway.get('kegg_url', '#')) html += f""" """ html += """
Pathway Name Pathway ID Classification
{pathway_name} {pathway_id} {pathway_class}
""" return html @staticmethod def analyze_sequence_composition(sequence: str) -> Dict: """ Analyze amino acid composition of the sequence """ if not sequence: return {} # Count amino acids aa_counts: dict[str, int] = {} for aa in sequence: aa_counts[aa] = aa_counts.get(aa, 0) + 1 total = len(sequence) # Calculate percentages aa_composition = {aa: (count / total) * 100 for aa, count in aa_counts.items()} # Group by properties hydrophobic = ['A', 'V', 'I', 'L', 'M', 'F', 'W', 'P'] polar = ['S', 'T', 'Y', 'N', 'Q', 'C'] charged = ['K', 'R', 'H', 'D', 'E'] hydrophobic_percent = sum(aa_composition.get(aa, 0) for aa in hydrophobic) polar_percent = sum(aa_composition.get(aa, 0) for aa in polar) charged_percent = sum(aa_composition.get(aa, 0) for aa in charged) return { "aa_composition": aa_composition, "hydrophobic_percent": hydrophobic_percent, "polar_percent": polar_percent, "charged_percent": charged_percent, "length": total } @staticmethod def create_sequence_composition_chart(composition_data: Dict) -> go.Figure: """ Create bar chart showing amino acid composition """ if not composition_data: fig = go.Figure() fig.add_annotation( text="No sequence data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=400) return fig aa_comp = composition_data.get('aa_composition', {}) # Sort by percentage sorted_aa = sorted(aa_comp.items(), key=lambda x: x[1], reverse=True) amino_acids = [aa for aa, _ in sorted_aa] percentages = [pct for _, pct in sorted_aa] # Color by property colors = [] for aa in amino_acids: if aa in ['A', 'V', 'I', 'L', 'M', 'F', 'W', 'P']: colors.append('#ff7f0e') # Hydrophobic - orange elif aa in ['S', 'T', 'Y', 'N', 'Q', 'C']: colors.append('#2ca02c') # Polar - green elif aa in ['K', 'R', 'H', 'D', 'E']: colors.append('#d62728') # Charged - red else: colors.append('#7f7f7f') # Other - gray fig = go.Figure(go.Bar( x=amino_acids, y=percentages, marker=dict(color=colors, line=dict(color='rgb(8,48,107)', width=1)), text=[f"{p:.1f}%" for p in percentages], textposition='outside', hovertemplate='%{x}
Percentage: %{y:.2f}%' )) fig.update_layout( title="Amino Acid Composition", xaxis_title="Amino Acid", yaxis_title="Percentage (%)", height=400, template="plotly_white", showlegend=False ) return fig @staticmethod def create_blast_results_table_html(blast_hits: list) -> str: """ Create formatted HTML table for BLAST results Displays all new fields: similarity, gaps, coverage, query range """ if not blast_hits: return "

No BLAST results available

" html = """ """ for idx, hit in enumerate(blast_hits, 1): accession_raw = str(hit.get('accession', 'N/A')) title_raw = str(hit.get('title', 'Unknown')) organism_raw = str(hit.get('organism', 'Unknown')) identity = hit.get('identity_percent', 0) similarity = hit.get('similarity_percent', 0) coverage = hit.get('coverage_percent', 0) gaps = hit.get('gap_percent', 0) e_value = hit.get('e_value', 1.0) bit_score = hit.get('bit_score', 0) query_range = hit.get('query_range', 'N/A') ncbi_url_raw = str( hit.get('ncbi_url', f'https://www.ncbi.nlm.nih.gov/protein/{accession_raw}') ) # Truncate description short_title_raw = title_raw[:80] + ('...' if len(title_raw) > 80 else '') accession = html_lib.escape(accession_raw) title = html_lib.escape(title_raw, quote=True) short_title = html_lib.escape(short_title_raw) organism = html_lib.escape(organism_raw) ncbi_url = html_lib.escape(ncbi_url_raw, quote=True) # Color code identity if identity >= 80: identity_class = "identity-high" elif identity >= 40: identity_class = "identity-medium" else: identity_class = "identity-low" # Format e-value if e_value == 0: e_value_str = "0.0" elif e_value < 1e-100: e_value_str = "< 1e-100" elif e_value < 0.0001: e_value_str = f"{e_value:.2e}" else: e_value_str = f"{e_value:.4f}" html += f""" """ html += """
# Accession Description Organism Identity Similarity Coverage Gaps E-value Score
{idx} {accession} {short_title} {organism} {identity:.1f}% {similarity:.1f}% {coverage:.1f}% {gaps:.1f}% {e_value_str} {bit_score:.0f}
""" return html @staticmethod def create_feature_map(features: list, sequence_length: int) -> go.Figure: """ Create visual map of protein features (domains, regions, sites) """ if not features: return ProteinVisualizer._empty_figure("No feature annotations available", height=300) try: sequence_length = max(1, int(sequence_length)) except (TypeError, ValueError): sequence_length = 1 # Group features by type feature_types: dict[str, list[Dict]] = {} for feature in features: ftype = feature.get('type', 'Other') if ftype not in feature_types: feature_types[ftype] = [] feature_types[ftype].append(feature) # Create figure fig = go.Figure() # Color palette for different feature types colors = { 'DOMAIN': '#1f77b4', 'REGION': '#ff7f0e', 'BINDING': '#2ca02c', 'SITE': '#d62728', 'MOTIF': '#9467bd', 'TRANSMEM': '#8c564b', 'SIGNAL': '#e377c2', 'Other': '#7f7f7f' } y_position = 0 legend_seen = set() for ftype, feats in feature_types.items(): for feat in feats: try: start = int(feat.get('start', 0) or 0) end = int(feat.get('end', 0) or 0) except (TypeError, ValueError): continue start = max(1, min(start, sequence_length)) end = max(1, min(end, sequence_length)) if end < start: start, end = end, start if start == end: end = min(sequence_length, start + 1) description = feat.get('description', ftype) color = colors.get(ftype, colors['Other']) show_legend = ftype not in legend_seen legend_seen.add(ftype) # Add rectangle for feature fig.add_trace(go.Scatter( x=[start, end, end, start, start], y=[y_position, y_position, y_position + 0.8, y_position + 0.8, y_position], fill='toself', fillcolor=color, line=dict(color=color, width=2), hovertemplate=f'{ftype}
{description}
Position: {start}-{end}
Length: {end-start+1} aa', name=ftype, showlegend=show_legend, legendgroup=ftype )) y_position += 1 # Add full-length protein bar at bottom fig.add_trace(go.Scatter( x=[1, sequence_length], y=[-1, -1], mode='lines', line=dict(color='black', width=3), showlegend=False, hoverinfo='skip' )) fig.update_layout( title="Protein Feature Map", xaxis_title="Amino Acid Position", xaxis=dict(range=[1, max(1, sequence_length)]), yaxis=dict( showticklabels=False, range=[-2, y_position + 1] ), height=max(300, y_position * 30 + 100), template="plotly_white", hovermode='closest' ) return fig @staticmethod def create_alignment_visualization(alignment_data: Dict) -> str: """ Create HTML visualization for pairwise alignment """ if not alignment_data.get('available'): return "

No alignment data available

" identity = alignment_data.get('identity', 0) similarity = alignment_data.get('similarity', 0) gaps = alignment_data.get('gaps', 0) score = alignment_data.get('score', 0) alignment_text = html_lib.escape(str(alignment_data.get('alignment_display', ''))) # Determine quality color if identity >= 70: quality_color = "#28a745" quality_text = "High" elif identity >= 40: quality_color = "#ffc107" quality_text = "Moderate" else: quality_color = "#dc3545" quality_text = "Low" html = f"""
Alignment Quality: {quality_text}
{identity:.1f}%
Identity
{similarity:.1f}%
Similarity
{gaps:.1f}%
Gaps
{score:.1f}
Alignment Score

Alignment Details:

{alignment_text}
""" return html @staticmethod def create_ligand_table_html(ligands: list) -> str: """ Create formatted HTML table for known ligands """ if not ligands: return "

No known ligands found

" html = """ """ for ligand in ligands: chembl_id = ligand.get('chembl_id', 'N/A') name = ligand.get('name') # Use ChEMBL ID if name is None or empty if not name: name = chembl_id activity_type = ligand.get('activity_type', 'N/A') activity_value = ligand.get('activity_value', 0) activity_units = ligand.get('activity_units', 'nM') mw = ligand.get('molecular_weight', 'N/A') chembl_url = ligand.get('chembl_url', '#') # Format activity value if activity_value < 100: activity_class = "activity-strong" activity_label = "Strong" elif activity_value < 1000: activity_class = "activity-moderate" activity_label = "Moderate" else: activity_class = "activity-weak" activity_label = "Weak" # Structure image from ChEMBL img_url = f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg" # Escape single quotes in name for JavaScript name_escaped = name.replace("'", "\\'") html += f""" """ html += """
Structure Compound Name ChEMBL ID Activity Type Value MW (Da) Action
{name} {name} {chembl_id} {activity_type} {activity_value:.1f} {activity_units}
{activity_label}
{mw if isinstance(mw, str) else f"{mw:.1f}"}
""" return html @staticmethod def create_docking_results_chart(docking_results: Dict) -> go.Figure: """ Create bar chart showing binding affinities for different binding modes """ if not docking_results.get('available'): return ProteinVisualizer._empty_figure("No docking results available", height=400) modes = docking_results.get('modes', []) if not modes: return ProteinVisualizer._empty_figure("No docking modes available", height=400) valid_modes = [] for mode in modes: try: affinity = float(mode.get("affinity")) except (TypeError, ValueError): continue if np.isfinite(affinity): valid_modes.append((mode, affinity)) if not valid_modes: return ProteinVisualizer._empty_figure("No valid docking scores available", height=400) modes = [mode for mode, _ in valid_modes] mode_numbers = [m.get('mode', idx + 1) for idx, m in enumerate(modes)] affinities = [affinity for _, affinity in valid_modes] best_affinity = min(affinities) # Color code by affinity strength colors = [] for aff in affinities: if aff <= -8: colors.append(ProteinVisualizer.QUALITY_COLORS["excellent"]) elif aff <= -6: colors.append(ProteinVisualizer.QUALITY_COLORS["good"]) elif aff <= -4: colors.append(ProteinVisualizer.QUALITY_COLORS["moderate"]) else: colors.append(ProteinVisualizer.QUALITY_COLORS["poor"]) fig = go.Figure(go.Bar( x=mode_numbers, y=affinities, marker=dict(color=colors, line=dict(color="#334155", width=0.8)), text=[f"{a:.1f}" for a in affinities], textposition='outside', customdata=[ [ "Best pose" if aff == best_affinity else "Alternative pose", mode.get("rmsd_lb"), mode.get("rmsd_ub"), ] for mode, aff in zip(modes, affinities) ], hovertemplate=( 'Mode %{x}
Affinity: %{y:.2f} kcal/mol' '
%{customdata[0]}' '
Vina RMSD lower: %{customdata[1]} Å' '
Vina RMSD upper: %{customdata[2]} Å' ) )) fig.update_layout( title="Predicted Binding Modes", xaxis_title="Binding Mode", yaxis_title="Binding Affinity (kcal/mol, lower is stronger)", yaxis=dict(zeroline=True), height=400, template="plotly_white", showlegend=False ) # Add reference lines fig.add_hline(y=-7, line_dash="dash", line_color=ProteinVisualizer.QUALITY_COLORS["excellent"], annotation_text="Strong binding", annotation_position="right") fig.add_hline(y=-5, line_dash="dash", line_color=ProteinVisualizer.QUALITY_COLORS["moderate"], annotation_text="Moderate binding", annotation_position="right") return fig @staticmethod def create_ppi_network_chart(interactions: list, query_protein: str) -> go.Figure: """ Create network graph for protein-protein interactions """ if not interactions: fig = go.Figure() fig.add_annotation( text="No protein interactions available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=400) return fig import math # Create circular layout n = len(interactions) + 1 # +1 for query protein # Query protein at center node_x: list[float] = [0.0] node_y: list[float] = [0.0] node_names = [query_protein] node_colors = ['#d62728'] # Red for query node_sizes: list[float] = [30.0] # Partner proteins in circle for i, interaction in enumerate(interactions): angle = 2 * math.pi * i / len(interactions) x = math.cos(angle) y = math.sin(angle) node_x.append(x) node_y.append(y) node_names.append(interaction['partner_name']) # Color by confidence if interaction['confidence'] == 'Highest': node_colors.append('#1f77b4') # Dark blue for highest elif interaction['confidence'] == 'High': node_colors.append('#2ca02c') # Green for high elif interaction['confidence'] == 'Medium': node_colors.append('#ff7f0e') # Orange for medium else: node_colors.append('#7f7f7f') # Gray for low # Size by score size = 10 + (interaction['combined_score'] / 1000) * 20 node_sizes.append(size) # Create edges grouped by confidence level edge_groups: dict[str, dict[str, Any]] = { 'Highest': {'x': [], 'y': [], 'color': 'rgba(31, 119, 180, 0.6)'}, 'High': {'x': [], 'y': [], 'color': 'rgba(44, 160, 44, 0.5)'}, 'Medium': {'x': [], 'y': [], 'color': 'rgba(255, 127, 14, 0.5)'}, 'Low': {'x': [], 'y': [], 'color': 'rgba(127, 127, 127, 0.3)'} } for i in range(len(interactions)): conf = interactions[i]['confidence'] # Line from center to partner edge_groups[conf]['x'].extend([0, node_x[i+1], None]) edge_groups[conf]['y'].extend([0, node_y[i+1], None]) # Create figure fig = go.Figure() # Add edges as separate traces for each confidence level for conf_level, edges in edge_groups.items(): if edges['x']: # Only add if there are edges for this confidence fig.add_trace(go.Scatter( x=edges['x'], y=edges['y'], mode='lines', line=dict(color=edges['color'], width=2), hoverinfo='none', name=f"{conf_level} confidence", showlegend=False )) # Add nodes hover_text = [] for i, name in enumerate(node_names): if i == 0: hover_text.append(f"{name}
Query Protein") else: interaction = interactions[i-1] hover_text.append( f"{name}
" f"Score: {interaction['combined_score']}/1000
" f"Confidence: {interaction['confidence']}
" f"Evidence: {interaction['evidence_types']}" ) fig.add_trace(go.Scatter( x=node_x, y=node_y, mode='markers+text', marker=dict( size=node_sizes, color=node_colors, line=dict(color='white', width=2) ), text=node_names, textposition='top center', textfont=dict(size=10), hovertext=hover_text, hoverinfo='text', showlegend=False )) fig.update_layout( title="Protein-Protein Interaction Network", height=600, showlegend=False, hovermode='closest', xaxis=dict(showgrid=False, zeroline=False, showticklabels=False), yaxis=dict(showgrid=False, zeroline=False, showticklabels=False), plot_bgcolor='white', margin=dict(l=20, r=20, t=50, b=20) ) return fig @staticmethod def create_ppi_table_html(interactions: list) -> str: """ Create formatted HTML table for PPI data """ if not interactions: return "

No interactions found

" html = """ """ for interaction in interactions: partner = html_lib.escape(str(interaction.get("partner_name", "Unknown"))) score = html_lib.escape(str(interaction.get("combined_score", "N/A"))) confidence = html_lib.escape(str(interaction.get("confidence", "Unknown"))) evidence = html_lib.escape(str(interaction.get("evidence_types", "N/A"))) if confidence == "Highest": conf_class = "confidence-highest" elif confidence == "High": conf_class = "confidence-high" elif confidence == "Medium": conf_class = "confidence-medium" else: conf_class = "confidence-low" html += f""" """ html += """
Partner Protein Combined Score Confidence Evidence Types
{partner} {score}/1000 {confidence} {evidence}
""" return html @staticmethod def create_molstar_docking_viewer( protein_structure: Dict, ligand_data: Dict, docking_result: Dict, ligand_name: str, view_mode: str = "Cartoon (Ribbon)", selected_mode: Optional[int] = None, ) -> str: """Create a Mol*-based viewer for an actual protein-ligand pose.""" import json if not protein_structure.get("available"): return ProteinVisualizer._viewer_message("No protein structure available", "warning") pdb_text = str(protein_structure.get("pdb_text") or "") pdb_url = str(protein_structure.get("pdb_url") or "") if pdb_url.startswith("data:"): pdb_url = "" if not pdb_text and not pdb_url: return ProteinVisualizer._viewer_message( "No protein coordinate data is available for this docking view.", "warning" ) pose_blocks = docking_result.get("pose_pdb_by_mode") or {} best_mode = docking_result.get("best_mode") or {} mode_number = selected_mode or best_mode.get("mode") or 1 pose_pdb = ( pose_blocks.get(mode_number) or pose_blocks.get(str(mode_number)) or next(iter(pose_blocks.values()), "") ) pose_source = str(docking_result.get("pose_source") or "unavailable") selected_pocket = docking_result.get("selected_pocket") or {} grid = selected_pocket.get("grid") or docking_result.get("grid") or docking_result.get("docking_box") if not pose_pdb: if docking_result.get("simulated") or pose_source == "simulation": return ProteinVisualizer._viewer_message( "Simulated docking contains score and center metadata only; no ligand pose coordinates are rendered.", "warning", ) return ProteinVisualizer._viewer_message( "Ligand pose unavailable. Run real docking with Vina pose output to render the complex.", "warning", ) affinity = docking_result.get("binding_affinity") affinity_text = f"{float(affinity):.2f} kcal/mol" if affinity is not None else "N/A" ligand_label = ligand_name or ligand_data.get("name") or "Ligand pose" mode_options = "".join( f'' for mode in pose_blocks.keys() ) selector_html = ( f'' if len(pose_blocks) > 1 else "" ) legend_html = f"""
Protein
{html_lib.escape(str(ligand_label))}
Oxygen
Nitrogen
Docking grid
{selector_html}
""" protein_text_json = json.dumps(pdb_text) protein_url_json = json.dumps(pdb_url) pose_blocks_json = json.dumps({str(k): v for k, v in pose_blocks.items()}) selected_mode_json = json.dumps(str(mode_number)) view_mode_json = json.dumps(view_mode) grid_json = json.dumps(grid or {}) body_script = f""" const proteinText = {protein_text_json}; const proteinUrl = {protein_url_json}; const poseBlocks = {pose_blocks_json}; let selectedMode = {selected_mode_json}; const viewMode = {view_mode_json}; const dockingGrid = {grid_json}; function gridAsPdb(grid) {{ if (!grid || !grid.center || !grid.size) return ""; const c = grid.center; const h = {{x: grid.size.x / 2, y: grid.size.y / 2, z: grid.size.z / 2}}; const corners = [ [-1,-1,-1], [1,-1,-1], [1,1,-1], [-1,1,-1], [-1,-1,1], [1,-1,1], [1,1,1], [-1,1,1] ]; const lines = corners.map((sign, index) => {{ const x = c.x + sign[0] * h.x; const y = c.y + sign[1] * h.y; const z = c.z + sign[2] * h.z; return `HETATM${{String(index + 1).padStart(5)}} C${{index + 1}} GRD G 1 ${{x.toFixed(3).padStart(8)}}${{y.toFixed(3).padStart(8)}}${{z.toFixed(3).padStart(8)}} 0.25 0.00 C`; }}); const edges = [[1,2],[2,3],[3,4],[4,1],[5,6],[6,7],[7,8],[8,5],[1,5],[2,6],[3,7],[4,8]]; edges.forEach(edge => lines.push(`CONECT${{String(edge[0]).padStart(5)}}${{String(edge[1]).padStart(5)}}`)); lines.push("END"); return lines.join("\\n"); }} async function loadProtein() {{ if (proteinText && proteinText.length > 0) {{ await viewer.loadStructureFromData(proteinText, "pdb"); }} else {{ await viewer.loadStructureFromUrl(proteinUrl, "pdb"); }} }} async function loadPose(mode) {{ const poseData = poseBlocks[mode] || poseBlocks[Object.keys(poseBlocks)[0]]; if (!poseData) throw new Error("No ligand pose coordinates are available."); await viewer.loadStructureFromData(poseData, "pdb"); }} await loadProtein(); await loadPose(selectedMode); const gridBoxData = gridAsPdb(dockingGrid); if (gridBoxData) await viewer.loadStructureFromData(gridBoxData, "pdb"); const poseSelect = document.getElementById("pose-mode-select"); if (poseSelect) {{ poseSelect.addEventListener("change", async function(event) {{ await viewer.plugin.clear(); selectedMode = event.target.value; await loadProtein(); await loadPose(selectedMode); if (gridBoxData) await viewer.loadStructureFromData(gridBoxData, "pdb"); if (viewer.plugin && viewer.plugin.managers && viewer.plugin.managers.camera) {{ viewer.plugin.managers.camera.reset(); }} }}); }} """ return ProteinVisualizer._molstar_shell( title=f"Protein-Ligand Complex - {ligand_label}", status=f"Pose source: {pose_source}; affinity: {affinity_text}", legend_html=legend_html, body_script=body_script, height=620, show_side_panels=False, ) @staticmethod def create_docking_3d_viewer(protein_structure: Dict, ligand_data: Dict, docking_result: Dict, ligand_name: str, view_mode: str = "Cartoon (Ribbon)") -> str: """ Create a Mol*-based 3D viewer showing protein with docked ligand. Kept as a compatibility wrapper for existing Streamlit call sites. Features: - Automatically clears and resets viewer for each render (prevents stale structures) - Removes all previous models before loading new ones - Centers and zooms the protein-ligand complex using zoomTo() - Applies fallback zoom if initial zoom is insufficient - Uses unique viewer instance per render (Streamlit-compatible) """ return ProteinVisualizer.create_molstar_docking_viewer( protein_structure, ligand_data, docking_result, ligand_name, view_mode=view_mode, ) if not protein_structure.get('available'): return "

No protein structure available

" # Get PDB data - prefer pdb_text over pdb_url pdb_text = protein_structure.get('pdb_text', '') pdb_url = protein_structure.get('pdb_url', '') # Validate pdb_url - skip if it's a data URI if pdb_url and pdb_url.startswith('data:'): pdb_url = '' # Critical validation: ensure we have actual PDB data if not pdb_text and not pdb_url: return """

⚠️ No Protein Structure Data Available

Please predict the protein structure in the Protein Structure Prediction tab first.

The docking visualization requires a predicted 3D structure.

""" ligand_smiles = ligand_data.get('smiles', '') # Get binding affinity for display affinity = docking_result.get('binding_affinity', 0) # Build a simple ligand PDB from docking center (visualization only) best_center = docking_result.get("best_mode", {}).get("center", {}) try: center_x = float(best_center.get("x", 0.0) or 0.0) center_y = float(best_center.get("y", 0.0) or 0.0) center_z = float(best_center.get("z", 0.0) or 0.0) except (TypeError, ValueError): center_x, center_y, center_z = 0.0, 0.0, 0.0 def _format_hetatm(serial, name, resn, chain, resi, x, y, z, element): return ( f"HETATM{serial:5d} {name:<4}{resn:>3} {chain}{resi:4d} " f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 20.00 {element:>2}" ) ligand_atoms = [ ("C1", center_x, center_y, center_z, "C"), ("O1", center_x + 1.2, center_y, center_z, "O"), ("N1", center_x - 1.2, center_y, center_z, "N"), ("S1", center_x, center_y + 1.2, center_z, "S"), ] ligand_pdb_lines = [ _format_hetatm(idx + 1, atom[0], "LIG", "A", 1, atom[1], atom[2], atom[3], atom[4]) for idx, atom in enumerate(ligand_atoms) ] ligand_pdb_lines.append("END") ligand_pdb = "\n".join(ligand_pdb_lines) # Properly escape ligand_name and pdb_text for JavaScript import json ligand_name_escaped = json.dumps(ligand_name) pdb_text_escaped = json.dumps(pdb_text if pdb_text else '') pdb_url_escaped = json.dumps(pdb_url if pdb_url else '') ligand_pdb_escaped = json.dumps(ligand_pdb) view_mode_escaped = json.dumps(view_mode) # Generate unique ID for this viewer instance (prevents reuse/caching) import uuid viewer_id = f"viewer_{uuid.uuid4().hex[:8]}" html = f"""
Loading protein-ligand complex...
Binding Affinity: {affinity} kcal/mol
Ligand:
Protein
Ligand
Binding Site
Protein-Ligand Docking Complex (Simulated Pose)
""" return html @staticmethod def predict_best_ligand(ligands: list, protein_data: Dict) -> Dict: """ Predict which known ligand should bind best based on multiple factors Scoring criteria: 1. Experimental activity data (IC50/Ki) 2. Molecular properties (MW, LogP) 3. Drug-likeness (Lipinski's Rule of Five) Returns top predicted ligand with explanation """ if not ligands: return {"available": False, "message": "No ligands to analyze"} predictions = [] for ligand in ligands: score = 0 reasons = [] # Factor 1: Activity value (most important) activity_value = ligand.get('activity_value', float('inf')) activity_type = ligand.get('activity_type', '') if activity_value < 10: # Very potent score += 50 reasons.append(f"Very potent {activity_type}: {activity_value:.2f} nM") elif activity_value < 100: # Potent score += 35 reasons.append(f"Potent {activity_type}: {activity_value:.2f} nM") elif activity_value < 1000: # Moderate score += 20 reasons.append(f"Moderate {activity_type}: {activity_value:.2f} nM") else: # Weak score += 5 reasons.append(f"Weak activity: {activity_value:.2f} nM") # Factor 2: Molecular weight (drug-like range) mw = ligand.get('molecular_weight', 0) # Convert to float if it's a string if isinstance(mw, str): try: mw = float(mw) except (ValueError, TypeError): mw = 0 if mw and 160 <= mw <= 500: # Optimal drug-like range score += 15 reasons.append(f"Optimal MW: {mw:.1f} Da") elif mw and mw <= 160: score += 5 reasons.append(f"Low MW: {mw:.1f} Da") elif mw and mw > 500: score += 8 reasons.append(f"High MW: {mw:.1f} Da") # Factor 3: SMILES availability (for structure-based predictions) if ligand.get('smiles'): score += 10 reasons.append("Structure available for docking") # Factor 4: Name indicates known drug name = ligand.get('name', '').lower() drug_indicators = ['inhibitor', 'mab', 'nib', 'tinib', 'zumab', 'ciclib'] if any(indicator in name for indicator in drug_indicators): score += 10 reasons.append("Known drug or inhibitor class") predictions.append({ "ligand": ligand, "score": score, "reasons": reasons, "confidence": "High" if score >= 70 else ("Medium" if score >= 50 else "Low") }) # Sort by score predictions = sorted(predictions, key=lambda x: x['score'], reverse=True) best = predictions[0] return { "available": True, "best_ligand": best['ligand'], "score": best['score'], "confidence": best['confidence'], "reasons": best['reasons'], "all_predictions": predictions[:5] # Top 5 } @staticmethod def advanced_binding_prediction(known_ligands: list, protein_data: Dict, novel_compounds: Optional[list] = None) -> Dict: """ Advanced ML-based binding prediction for both known and unknown ligands Prediction features: 1. Molecular descriptors (MW, LogP, HBD, HBA, TPSA) 2. Structural fingerprints (if SMILES available) 3. Protein-ligand interaction fingerprints 4. Pharmacophore matching 5. QSAR model predictions Returns comprehensive predictions with confidence scores """ import math predictions: dict[str, Any] = { "known_ligands": [], "novel_candidates": [], "binding_rules": {}, "recommendations": [] } binding_rules: dict[str, Any] = {} # Analyze known ligands to extract binding rules if known_ligands: binding_rules = ProteinVisualizer.extract_binding_rules(known_ligands) predictions["binding_rules"] = binding_rules # Predict for known ligands for ligand in known_ligands: pred = ProteinVisualizer.predict_binding_score(ligand, binding_rules, protein_data, is_known=True) predictions["known_ligands"].append(pred) # Sort by predicted binding predictions["known_ligands"] = sorted( predictions["known_ligands"], key=lambda x: x["predicted_score"], reverse=True ) # Predict for novel/unknown compounds if novel_compounds: for compound in novel_compounds: pred = ProteinVisualizer.predict_binding_score(compound, binding_rules if known_ligands else {}, protein_data, is_known=False) predictions["novel_candidates"].append(pred) # Sort by predicted binding predictions["novel_candidates"] = sorted( predictions["novel_candidates"], key=lambda x: x["predicted_score"], reverse=True ) # Generate recommendations predictions["recommendations"] = ProteinVisualizer.generate_recommendations( predictions["known_ligands"], predictions["novel_candidates"], binding_rules if known_ligands else {} ) return predictions @staticmethod def extract_binding_rules(known_ligands: list) -> Dict: """ Extract SAR (Structure-Activity Relationship) rules from known ligands """ rules: dict[str, Any] = { "optimal_mw_range": [0.0, 0.0], "activity_threshold": {}, "pharmacophore": [], "property_ranges": {} } # Extract activity data activities: list[float] = [] mw_values: list[float] = [] for lig in known_ligands: activity = lig.get('activity_value', 0) mw = lig.get('molecular_weight', 0) # Convert activity to float if it's a string if isinstance(activity, str): try: activity = float(activity) except (ValueError, TypeError): activity = 0 # Convert mw to float if it's a string if isinstance(mw, str): try: mw = float(mw) except (ValueError, TypeError): mw = 0 if activity > 0: activities.append(activity) if mw > 0: mw_values.append(mw) if activities: # Define potent threshold (bottom 25th percentile) activities_sorted = sorted(activities) potent_threshold = activities_sorted[len(activities_sorted) // 4] if len(activities_sorted) > 4 else activities_sorted[0] rules["activity_threshold"] = { "potent": potent_threshold, "moderate": potent_threshold * 10, "weak": potent_threshold * 100 } if mw_values: # Optimal MW range (mean ± 1 std dev) import statistics mean_mw = statistics.mean(mw_values) std_mw = statistics.stdev(mw_values) if len(mw_values) > 1 else 50 rules["optimal_mw_range"] = [ max(150, mean_mw - std_mw), min(600, mean_mw + std_mw) ] # Lipinski's Rule of Five compliance from known actives rules["lipinski_compliance"] = True return rules @staticmethod def predict_binding_score(compound: Dict, binding_rules: Dict, protein_data: Dict, is_known: bool = False) -> Dict: """ Predict binding affinity score for a compound Returns score 0-100 with confidence level """ import math score = 0 confidence_factors = [] reasons = [] warnings = [] # Factor 1: Experimental activity (only for known ligands) if is_known and compound.get('activity_value'): activity = compound['activity_value'] activity_type = compound.get('activity_type', 'IC50') # Convert activity to float if it's a string if isinstance(activity, str): try: activity = float(activity) except (ValueError, TypeError): activity = None if activity is not None: thresholds = binding_rules.get('activity_threshold', {}) if activity <= thresholds.get('potent', 10): score += 50 confidence_factors.append(0.95) reasons.append(f"Very potent {activity_type}: {activity:.2f} nM (experimental)") elif activity <= thresholds.get('moderate', 100): score += 35 confidence_factors.append(0.85) reasons.append(f"Moderate {activity_type}: {activity:.2f} nM (experimental)") else: score += 15 confidence_factors.append(0.70) reasons.append(f"Weak activity: {activity:.2f} nM (experimental)") # Factor 2: Molecular weight (drug-likeness) mw = compound.get('molecular_weight', 0) # Convert mw to float if it's a string if isinstance(mw, str): try: mw = float(mw) except (ValueError, TypeError): mw = 0 optimal_range = binding_rules.get('optimal_mw_range', [160, 500]) if mw: if optimal_range[0] <= mw <= optimal_range[1]: score += 15 confidence_factors.append(0.80) reasons.append(f"Optimal MW: {mw:.1f} Da (within active range)") elif 150 <= mw <= 600: # Lipinski range score += 10 confidence_factors.append(0.70) reasons.append(f"Acceptable MW: {mw:.1f} Da (drug-like)") if mw > 500: warnings.append("MW >500 Da may reduce oral bioavailability") else: score += 3 confidence_factors.append(0.50) warnings.append(f"MW {mw:.1f} Da outside optimal range") # Factor 3: Lipinski's Rule of Five compliance lipinski_violations = ProteinVisualizer.calculate_lipinski_violations(compound) if lipinski_violations == 0: score += 15 confidence_factors.append(0.85) reasons.append("Passes Lipinski's Rule of Five (drug-like)") elif lipinski_violations == 1: score += 10 confidence_factors.append(0.75) reasons.append("1 Lipinski violation (acceptable)") warnings.append("Minor drug-likeness concern") else: score += 5 confidence_factors.append(0.60) warnings.append(f"{lipinski_violations} Lipinski violations (poor drug-likeness)") # Factor 4: Chemical structure availability if compound.get('smiles'): score += 10 confidence_factors.append(0.90) reasons.append("Structure available for computational docking") # Factor 5: Known drug status name = compound.get('name', '').lower() source = compound.get('source', '') if 'fda' in source.lower() or 'approved' in str(compound.get('status', '')).lower(): score += 15 confidence_factors.append(0.95) reasons.append("FDA-approved drug (validated safety profile)") elif any(x in name for x in ['inhibitor', 'mab', 'nib', 'tinib', 'zumab']): score += 12 confidence_factors.append(0.85) reasons.append("Known inhibitor/drug class") # Factor 6: Literature evidence if compound.get('pmid') or 'literature' in source.lower(): score += 8 confidence_factors.append(0.75) reasons.append("Literature evidence of activity") # Factor 7: Target class match (for repurposing) if compound.get('target_class'): score += 10 confidence_factors.append(0.80) reasons.append(f"Target class match: {compound['target_class']}") # Calculate confidence (average of all factors) confidence = sum(confidence_factors) / len(confidence_factors) if confidence_factors else 0.5 # Confidence level if confidence >= 0.85: confidence_level = "High" confidence_color = "#28a745" elif confidence >= 0.70: confidence_level = "Medium" confidence_color = "#ffc107" else: confidence_level = "Low" confidence_color = "#dc3545" # Predicted binding affinity (simplified QSAR) if is_known and compound.get('activity_value'): # Convert IC50 to approximate binding affinity ic50 = compound['activity_value'] predicted_affinity = -math.log10(ic50 / 1e9) * 1.36 # kcal/mol predicted_affinity = max(-12, min(-4, predicted_affinity)) else: # For unknowns: estimate based on score predicted_affinity = -4 - (score / 100) * 6 # Range: -4 to -10 return { "compound": compound, "predicted_score": min(100, score), "confidence": round(confidence, 2), "confidence_level": confidence_level, "confidence_color": confidence_color, "predicted_affinity": round(predicted_affinity, 2), "reasons": reasons, "warnings": warnings, "is_known": is_known, "recommendation": "Highly recommended" if score >= 75 else ("Worth testing" if score >= 50 else "Low priority") } @staticmethod def calculate_lipinski_violations(compound: Dict) -> int: """ Calculate Lipinski's Rule of Five violations Rules: MW ≤500, LogP ≤5, HBD ≤5, HBA ≤10 """ violations: float = 0.0 mw = compound.get('molecular_weight', 0) # Convert mw to float if it's a string if isinstance(mw, str): try: mw = float(mw) except (ValueError, TypeError): mw = 0 if mw > 500: violations += 1 # Note: Would need to calculate LogP, HBD, HBA from SMILES # For now, estimate based on MW if mw > 450: # Rough proxy for LogP violations violations += 0.5 return int(violations) @staticmethod def generate_recommendations(known_predictions: list, novel_predictions: list, binding_rules: Dict) -> list: """ Generate actionable recommendations for drug discovery """ recommendations = [] # Recommendation 1: Best known binder if known_predictions: best_known = known_predictions[0] recommendations.append({ "type": "Best Known Binder", "compound": best_known["compound"]["name"], "score": best_known["predicted_score"], "action": f"Use as positive control in experiments (predicted affinity: {best_known['predicted_affinity']:.1f} kcal/mol)", "priority": "High" }) # Recommendation 2: Top novel candidate if novel_predictions: best_novel = novel_predictions[0] if best_novel["predicted_score"] >= 60: recommendations.append({ "type": "Novel Candidate", "compound": best_novel["compound"]["name"], "score": best_novel["predicted_score"], "action": f"Priority for experimental validation (confidence: {best_novel['confidence_level']})", "priority": "High" if best_novel["predicted_score"] >= 75 else "Medium" }) # Recommendation 3: Repurposing opportunities repurposing = [p for p in (novel_predictions or []) if p["compound"].get("source") == "Drug Repurposing"] if repurposing: recommendations.append({ "type": "Drug Repurposing", "compound": f"{len(repurposing)} FDA-approved drug(s)", "score": max([p["predicted_score"] for p in repurposing]), "action": "Consider for off-label use or clinical trials (safety already established)", "priority": "High" }) # Recommendation 4: Structure optimization if known_predictions and len(known_predictions) >= 3: mw_list = [] for p in known_predictions[:3]: mw = p["compound"].get("molecular_weight", 0) # Convert to float if it's a string if isinstance(mw, str): try: mw = float(mw) except (ValueError, TypeError): mw = 0 mw_list.append(mw) top3_avg_mw = sum(mw_list) / 3 if mw_list else 0 recommendations.append({ "type": "Structure Optimization", "compound": "New derivatives", "score": 0, "action": f"Design analogs around MW ~{top3_avg_mw:.0f} Da based on top binders", "priority": "Medium" }) return recommendations @staticmethod def create_risk_calculator_ui() -> str: """ Create interactive risk calculator HTML form Returns HTML with JavaScript for real-time calculation """ html = """

🔬 Predictive Risk Calculator

Calculate your disease risk based on biomarker levels and personal factors

""" return html @staticmethod def create_drug_target_visualization(drug_data: Dict) -> go.Figure: """ Create visualization of drugs by development phase """ if not drug_data.get('available'): fig = go.Figure() fig.add_annotation( text="No drug-target data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=400) return fig categories = ['FDA Approved', 'Clinical Trials', 'Investigational'] counts = [ drug_data.get('total_fda', 0), drug_data.get('total_trials', 0), drug_data.get('total_investigational', 0) ] colors = ['#28a745', '#ffc107', '#17a2b8'] fig = go.Figure(go.Bar( x=categories, y=counts, marker=dict(color=colors, line=dict(color='black', width=1.5)), text=counts, textposition='outside', hovertemplate='%{x}
Count: %{y}' )) fig.update_layout( title="Drug Development Pipeline", yaxis_title="Number of Drugs", height=350, template="plotly_white", showlegend=False ) return fig @staticmethod def create_binding_affinity_chart(predictions: List[Dict]) -> go.Figure: """ Create bar chart showing binding affinity for multiple molecules Args: predictions: List of prediction dictionaries Returns: Plotly figure """ valid_predictions = [p for p in predictions if p.get("is_valid", False)] if not valid_predictions: return ProteinVisualizer._empty_figure("No valid predictions available", height=400) # Extract data molecule_names = [p.get("molecule_name", f"Molecule {i+1}") for i, p in enumerate(valid_predictions)] affinities = [ float(p.get("prediction", {}).get("binding_affinity", 0) or 0) for p in valid_predictions ] semantics = ProteinVisualizer._affinity_semantics(valid_predictions) unit_label = semantics["unit_label"] axis_hint = semantics["axis_hint"] higher_is_better = bool(semantics["higher_is_better"]) colors = ProteinVisualizer._affinity_colors(affinities, higher_is_better) yaxis_range = None if higher_is_better: yaxis_range = [0, max(10.0, max(affinities) * 1.12)] fig = go.Figure(go.Bar( x=molecule_names, y=affinities, marker=dict( color=colors, line=dict(color="#334155", width=0.8) ), text=[f"{aff:.2f}" for aff in affinities], textposition='outside', customdata=[unit_label] * len(affinities), hovertemplate=( "%{x}
Binding affinity: %{y:.2f} %{customdata}" f"
Interpretation: {axis_hint}" ) )) fig.update_layout( title="Predicted Binding Affinity", xaxis_title="Molecule", yaxis_title=f"Binding Affinity ({unit_label}, {axis_hint})", yaxis=dict(range=yaxis_range), height=max(400, len(valid_predictions) * 40), template="plotly_white", showlegend=False, xaxis=dict(tickangle=-45) ) return fig @staticmethod def create_binding_likelihood_chart(predictions: List[Dict]) -> go.Figure: """ Create bar chart showing binding likelihood (probability) for multiple molecules Args: predictions: List of prediction dictionaries Returns: Plotly figure """ valid_predictions = [p for p in predictions if p.get("is_valid", False)] if not valid_predictions: fig = go.Figure() fig.add_annotation( text="No valid predictions available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=16, color="gray") ) fig.update_layout(height=400) return fig # Extract data molecule_names = [p.get("molecule_name", f"Molecule {i+1}") for i, p in enumerate(valid_predictions)] likelihoods = [p.get("prediction", {}).get("binding_likelihood", 0) for p in valid_predictions] # Color based on likelihood colors = [] for lik in likelihoods: if lik >= 70: colors.append("#28a745") # Green - high elif lik >= 50: colors.append("#ffc107") # Yellow - medium elif lik >= 30: colors.append("#ff9800") # Orange - low else: colors.append("#dc3545") # Red - very low fig = go.Figure(go.Bar( x=molecule_names, y=likelihoods, marker=dict( color=colors, line=dict(color='rgb(8,48,107)', width=1) ), text=[f"{lik:.1f}%" for lik in likelihoods], textposition='outside', hovertemplate='%{x}
Binding Likelihood: %{y:.1f}%' )) fig.update_layout( title="Predicted Binding Likelihood", xaxis_title="Molecule", yaxis_title="Binding Likelihood (%)", yaxis=dict(range=[0, 100]), height=max(400, len(valid_predictions) * 40), template="plotly_white", showlegend=False, xaxis=dict(tickangle=-45) ) return fig @staticmethod def create_binding_ranking_chart(ranked_molecules: List[Dict], top_n: int = 10) -> go.Figure: """ Create scatter plot ranking molecules by affinity and likelihood Args: ranked_molecules: List of ranked prediction dictionaries top_n: Number of top molecules to display Returns: Plotly figure """ if not ranked_molecules: return ProteinVisualizer._empty_figure("No ranked molecules available", height=400) # Take top N top_molecules = ranked_molecules[:top_n] # Extract data molecule_names = [m.get("molecule_name", f"Molecule {m.get('rank', i+1)}") for i, m in enumerate(top_molecules)] affinities = [ float(m.get("prediction", {}).get("binding_affinity", 0) or 0) for m in top_molecules ] likelihoods = [ float(m.get("prediction", {}).get("binding_likelihood", 0) or 0) for m in top_molecules ] ranks = [m.get("rank", i+1) for i, m in enumerate(top_molecules)] semantics = ProteinVisualizer._affinity_semantics(top_molecules) unit_label = semantics["unit_label"] axis_hint = semantics["axis_hint"] higher_is_better = bool(semantics["higher_is_better"]) # Size based on rank (higher rank = smaller) sizes = [max(10, 30 - r) for r in ranks] fig = go.Figure() # Add scatter points fig.add_trace(go.Scatter( x=affinities, y=likelihoods, mode='markers+text', marker=dict( size=sizes, color=likelihoods, colorscale='RdYlGn', cmin=0, cmax=100, showscale=True, colorbar=dict(title="Likelihood (%)"), line=dict(width=1.5, color="#111827") ), text=[str(r) for r in ranks], textposition="middle center", textfont=dict(size=10, color='white', family='Arial Black'), customdata=list(zip(molecule_names, ranks, [unit_label] * len(top_molecules))), hovertemplate=( "%{customdata[0]}
Rank: #%{customdata[1]}" "
Affinity: %{x:.2f} %{customdata[2]}" "
Likelihood: %{y:.1f}%" ) )) fig.update_layout( title=f"Top {len(top_molecules)} Ranked Drug Candidates", xaxis_title=f"Binding Affinity ({unit_label}, {axis_hint})", yaxis_title="Binding Likelihood (%)", xaxis=dict(autorange=None if higher_is_better else "reversed"), yaxis=dict(range=[0, 100]), height=500, template="plotly_white", showlegend=False ) return fig @staticmethod def render_phylogenetic_tree(newick_str: str, num_taxa: int = 2) -> bytes: """ Render a phylogenetic tree from Newick format to PNG bytes. Uses BioPython and matplotlib to create a visual tree. Args: newick_str: Newick format tree string num_taxa: Number of taxa (used for figure sizing) Returns: PNG image bytes """ fig = None try: from Bio import Phylo # Parse the Newick string tree_handle = StringIO(newick_str) tree = Phylo.read(tree_handle, "newick") # Calculate figure size based on taxa count # Minimum height of 4, scale up with more taxa fig_height = max(4, num_taxa * 0.5) fig_width = max(8, num_taxa * 0.6) # Create figure fig, ax = plt.subplots(figsize=(fig_width, fig_height)) # Draw the tree Phylo.draw(tree, axes=ax, do_show=False, show_confidence=False) # Improve styling ax.set_xlabel("Branch Length", fontsize=10) ax.set_title("Phylogenetic Tree", fontsize=12, fontweight='bold', pad=15) ax.spines['top'].set_visible(False) ax.spines['right'].set_visible(False) ax.spines['left'].set_visible(False) ax.tick_params(left=False, labelleft=True) # Adjust layout to prevent label clipping plt.tight_layout() # Convert to PNG bytes for native Streamlit rendering buf = BytesIO() fig.savefig(buf, format='png', dpi=150, bbox_inches='tight', facecolor='white') return buf.getvalue() except Exception as e: raise ValueError(f"Unable to render phylogenetic tree: {e}") from e finally: if fig is not None: plt.close(fig) @staticmethod def create_phylogenetic_tree_visualization(newick_string: str, metadata: Dict) -> str: """ Create an interactive phylogenetic tree visualization with rendered tree image. Parses Newick format and displays actual tree structure with branches and labels. Args: newick_string: Newick format tree string metadata: Dictionary with method, num_taxa, tree_length Returns: HTML string with embedded tree visualization """ # Get number of taxa for sizing num_taxa = metadata.get('num_taxa', 2) # Render the tree to base64 image tree_png = ProteinVisualizer.render_phylogenetic_tree(newick_string, num_taxa) tree_image = "data:image/png;base64," + base64.b64encode(tree_png).decode("ascii") escaped_newick = html_lib.escape(newick_string) # Create HTML visualization html = f"""

📊 Phylogenetic Tree

Tree Visualization:

Phylogenetic Tree
📄 Raw Newick Format
{escaped_newick}
📝 How to read: Branch lengths represent evolutionary distance between taxa. Longer branches indicate greater evolutionary divergence.
🔹 Method: Tree constructed using {metadata.get('method', 'N/A').upper()} algorithm with {num_taxa} taxa.
""" return html @staticmethod def create_phylogenetic_dendrogram(newick_string: str, metadata: Dict) -> go.Figure: """Create an interactive dendrogram visualization like ClustalW""" try: from Bio import Phylo from io import StringIO tree = Phylo.read(StringIO(newick_string), "newick") terminals = tree.get_terminals() terminal_names = [t.name if t.name else f"Seq{i}" for i, t in enumerate(terminals)] fig = go.Figure() if terminal_names: fig.add_trace( go.Scatter( x=list(range(len(terminal_names))), y=[0] * len(terminal_names), mode="markers+text", text=terminal_names, textposition="middle right", marker=dict(size=8, color="#1f77b4"), hovertemplate="%{text}", showlegend=False, ) ) fig.update_layout( title=f"Phylogenetic Tree ({metadata.get('method', 'N/A').upper()}) - {metadata.get('num_taxa', 0)} Taxa", xaxis_title="Terminal order", yaxis=dict(showticklabels=False, zeroline=False), height=max(400, len(terminal_names) * 30), width=1000, showlegend=False, template="plotly_white", hovermode="closest", margin=dict(l=150, r=100, t=80, b=50), ) return fig except Exception: fig = go.Figure() fig.add_annotation( text="Dendrogram visualization unavailable
Showing Newick format instead", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray") ) fig.update_layout(height=300, title="Phylogenetic Tree") return fig @staticmethod def create_variant_impact_distribution_chart(annotated_variants: List[Dict]) -> go.Figure: """Create impact-class distribution chart for annotated variants.""" if not annotated_variants: fig = go.Figure() fig.add_annotation( text="No annotated variants available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig counts: Dict[str, int] = defaultdict(int) for row in annotated_variants: counts[str(row.get("predicted_effect_class", "unknown")).lower()] += 1 order = ["high", "moderate", "low", "unknown"] x_vals = [label.title() for label in order] y_vals = [counts.get(label, 0) for label in order] colors = ["#d62728", "#ff7f0e", "#2ca02c", "#9e9e9e"] fig = go.Figure( data=[ go.Bar( x=x_vals, y=y_vals, marker_color=colors, text=y_vals, textposition="auto", ) ] ) fig.update_layout( title="Variant Impact Distribution", xaxis_title="Impact class", yaxis_title="Variant count", template="plotly_white", height=340, showlegend=False, ) return fig @staticmethod def create_top_gene_impact_chart(gene_impact: Dict, top_n: int = 10) -> go.Figure: """Create bar chart of top impacted genes.""" genes = list(gene_impact.get("genes", {}).values())[:top_n] if not genes: fig = go.Figure() fig.add_annotation( text="No gene impact data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig names = [g.get("gene", "UNK") for g in genes] scores = [float(g.get("score", 0.0)) for g in genes] fig = go.Figure( data=[go.Bar(x=names, y=scores, marker_color="#1f77b4", text=[f"{s:.1f}" for s in scores], textposition="auto")] ) fig.update_layout( title="Top Gene Impact Scores", xaxis_title="Gene", yaxis_title="Impact score (0-100)", template="plotly_white", height=360, showlegend=False, ) return fig @staticmethod def create_pathway_perturbation_chart(pathway_impact: Dict, top_n: int = 10) -> go.Figure: """Create horizontal bar chart of pathway perturbation.""" pathways = pathway_impact.get("pathways", [])[:top_n] if not pathways: fig = go.Figure() fig.add_annotation( text="No pathway impact data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig names = [p.get("pathway_name", "Unknown") for p in pathways][::-1] scores = [float(p.get("impact_score", 0.0)) for p in pathways][::-1] confidences = [p.get("confidence", "Low") for p in pathways][::-1] fig = go.Figure( data=[ go.Bar( x=scores, y=names, orientation="h", text=[f"{s:.1f}" for s in scores], textposition="auto", customdata=confidences, hovertemplate="%{y}
Impact: %{x:.1f}
Confidence: %{customdata}", marker_color="#6a3d9a", ) ] ) fig.update_layout( title="Pathway Perturbation Scores", xaxis_title="Pathway score (0-100)", yaxis_title="Pathway", template="plotly_white", height=max(360, len(pathways) * 28), showlegend=False, ) return fig @staticmethod def create_therapy_contribution_chart(ranked_candidates: List[Dict], top_n: int = 8) -> go.Figure: """Create stacked contribution chart for therapy ranking components.""" top = ranked_candidates[:top_n] if not top: fig = go.Figure() fig.add_annotation( text="No therapy candidates available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig labels = [c.get("drug_name", "Unknown") for c in top] components = { "Target match": [float(c.get("target_gene_match", 0.0)) * 100.0 for c in top], "Pathway relevance": [float(c.get("pathway_relevance", 0.0)) * 100.0 for c in top], "Evidence quality": [float(c.get("evidence_quality", 0.0)) * 100.0 for c in top], "Clinical maturity": [float(c.get("clinical_maturity", 0.0)) * 100.0 for c in top], "Safety penalty": [-float(c.get("safety_risk_penalty", 0.0)) * 100.0 for c in top], } fig = go.Figure() palette = ["#1f77b4", "#9467bd", "#2ca02c", "#ff7f0e", "#d62728"] for idx, (label, values) in enumerate(components.items()): fig.add_trace(go.Bar(x=labels, y=values, name=label, marker_color=palette[idx])) fig.update_layout( barmode="relative", title="Therapy Candidate Score Contributions", xaxis_title="Drug candidate", yaxis_title="Component contribution (scaled)", template="plotly_white", height=420, legend_title="Components", ) return fig @staticmethod def create_confidence_completeness_chart(ranked_candidates: List[Dict], top_n: int = 8) -> go.Figure: """Create bubble chart of confidence vs completeness for top therapies.""" top = ranked_candidates[:top_n] if not top: fig = go.Figure() fig.add_annotation( text="No confidence/completeness data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig x_vals = [float(c.get("completeness_pct", 0.0)) for c in top] y_vals = [float(c.get("ranking_confidence", 0.0)) for c in top] names = [c.get("drug_name", "Unknown") for c in top] scores = [float(c.get("composite_score", 0.0)) for c in top] fig = go.Figure( data=[ go.Scatter( x=x_vals, y=y_vals, mode="markers+text", text=names, textposition="top center", marker=dict( size=[max(10.0, min(40.0, s / 2.5)) for s in scores], color=scores, colorscale="Blues", showscale=True, colorbar=dict(title="Composite"), ), hovertemplate="%{text}
Completeness=%{x:.1f}%
Confidence=%{y:.1f}", ) ] ) fig.update_layout( title="Confidence vs Data Completeness", xaxis_title="Completeness (%)", yaxis_title="Confidence (0-100)", template="plotly_white", height=380, showlegend=False, ) return fig @staticmethod def create_assay_ranking_comparison_chart(assays: List[Dict], top_n: int = 8) -> go.Figure: """Create ranked assay score chart with confidence coloring.""" top = assays[:top_n] if not top: fig = go.Figure() fig.add_annotation( text="No assay ranking data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig names = [row.get("assay_name", "Assay") for row in top][::-1] scores = [float(row.get("rank_score", 0.0)) for row in top][::-1] confidence = [row.get("confidence", "Low") for row in top][::-1] color_map = {"High": "#2ca02c", "Med": "#ff7f0e", "Low": "#d62728"} colors = [color_map.get(c, "#636efa") for c in confidence] fig = go.Figure( data=[ go.Bar( x=scores, y=names, orientation="h", marker_color=colors, text=[f"{s:.1f}" for s in scores], textposition="auto", customdata=confidence, hovertemplate="%{y}
Score=%{x:.1f}
Confidence=%{customdata}", ) ] ) fig.update_layout( title="Assay Ranking Comparison", xaxis_title="Heuristic score (0-100)", yaxis_title="Assay", template="plotly_white", height=max(340, len(top) * 36), showlegend=False, ) return fig @staticmethod def create_crispr_candidate_score_plot(candidates: List[Dict], top_n: int = 30) -> go.Figure: """Create position-vs-score scatter for CRISPR candidates.""" top = candidates[:top_n] if not top: fig = go.Figure() fig.add_annotation( text="No CRISPR candidate scores available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig risk_color = {"Low": "#2ca02c", "Medium": "#ff7f0e", "High": "#d62728"} x_vals = [int(row.get("target_position", 0)) for row in top] y_vals = [float(row.get("heuristic_score", 0.0)) for row in top] labels = [row.get("off_target_risk_level", "Medium") for row in top] colors = [risk_color.get(lbl, "#636efa") for lbl in labels] text = [row.get("spacer_sequence", "")[:12] + "..." for row in top] fig = go.Figure( data=[ go.Scatter( x=x_vals, y=y_vals, mode="markers", marker=dict(size=10, color=colors, line=dict(width=0.5, color="#333333")), text=text, customdata=labels, hovertemplate="Position=%{x}
Score=%{y:.1f}
Risk=%{customdata}
Spacer=%{text}", ) ] ) fig.update_layout( title="CRISPR Candidate Score Plot", xaxis_title="Target position", yaxis_title="Heuristic score (0-100)", template="plotly_white", height=360, showlegend=False, ) return fig @staticmethod def create_primer_quality_comparison_chart(primers: List[Dict], top_n: int = 10) -> go.Figure: """Create grouped chart for primer quality and amplicon size.""" top = primers[:top_n] if not top: fig = go.Figure() fig.add_annotation( text="No primer quality data available", xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False, font=dict(size=14, color="gray"), ) fig.update_layout(height=320, template="plotly_white") return fig labels = [f"Pair {idx+1}" for idx in range(len(top))] quality = [float(row.get("quality_score", 0.0)) for row in top] amplicon = [int(row.get("expected_amplicon_size", 0)) for row in top] fig = go.Figure() fig.add_trace(go.Bar(x=labels, y=quality, name="Quality score", marker_color="#1f77b4")) fig.add_trace(go.Scatter(x=labels, y=amplicon, name="Amplicon size (bp)", yaxis="y2", mode="lines+markers", marker_color="#ff7f0e")) fig.update_layout( title="Primer Quality Comparison", xaxis_title="Primer pair", yaxis=dict(title="Quality score (0-100)"), yaxis2=dict(title="Amplicon size (bp)", overlaying="y", side="right"), template="plotly_white", height=380, legend=dict(orientation="h"), ) return fig @staticmethod def create_portfolio_funnel(stage_distribution: List[Dict]) -> go.Figure: """Create a stage distribution funnel for portfolio visibility.""" if not stage_distribution: fig = go.Figure() fig.add_annotation(text="No projects available", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False) fig.update_layout(template="plotly_white", height=320) return fig df = pd.DataFrame(stage_distribution) fig = go.Figure(go.Funnel(y=df["stage"], x=df["count"], textinfo="value+percent initial")) fig.update_layout(title="Portfolio Funnel by Stage", template="plotly_white", height=360) return fig @staticmethod def create_milestone_burndown(milestones: List[Dict]) -> go.Figure: """Show milestone status and due-date completion trend.""" if not milestones: fig = go.Figure() fig.add_annotation(text="No milestones yet", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False) fig.update_layout(template="plotly_white", height=320) return fig df = pd.DataFrame(milestones) status_counts = df["status"].value_counts().to_dict() if "status" in df.columns else {} fig = go.Figure( data=[ go.Bar( x=list(status_counts.keys()), y=list(status_counts.values()), marker_color=["#9ca3af", "#3b82f6", "#ef4444", "#10b981"][: max(1, len(status_counts))], ) ] ) fig.update_layout(title="Milestone Completion / Blockers", yaxis_title="Count", template="plotly_white", height=340) return fig def _install_plotly_theme_wrapper() -> None: """Theme every public ProteinVisualizer figure without changing call sites.""" for name, attr in list(vars(ProteinVisualizer).items()): if name.startswith("_") or not isinstance(attr, staticmethod): continue func = attr.__func__ @wraps(func) def themed(*args: Any, __func=func, **kwargs: Any) -> Any: result = __func(*args, **kwargs) if isinstance(result, go.Figure): return ProteinVisualizer._style_figure(result) return result setattr(ProteinVisualizer, name, staticmethod(themed)) _install_plotly_theme_wrapper()