"""
return html
@staticmethod
def create_ligand_table_html(ligands: list) -> str:
"""
Create formatted HTML table for known ligands
"""
if not ligands:
return "
No known ligands found
"
html = """
Structure
Compound Name
ChEMBL ID
Activity Type
Value
MW (Da)
Action
"""
for ligand in ligands:
chembl_id = ligand.get('chembl_id', 'N/A')
name = ligand.get('name')
# Use ChEMBL ID if name is None or empty
if not name:
name = chembl_id
activity_type = ligand.get('activity_type', 'N/A')
activity_value = ligand.get('activity_value', 0)
activity_units = ligand.get('activity_units', 'nM')
mw = ligand.get('molecular_weight', 'N/A')
chembl_url = ligand.get('chembl_url', '#')
# Format activity value
if activity_value < 100:
activity_class = "activity-strong"
activity_label = "Strong"
elif activity_value < 1000:
activity_class = "activity-moderate"
activity_label = "Moderate"
else:
activity_class = "activity-weak"
activity_label = "Weak"
# Structure image from ChEMBL
img_url = f"https://www.ebi.ac.uk/chembl/api/data/image/{chembl_id}.svg"
# Escape single quotes in name for JavaScript
name_escaped = name.replace("'", "\\'")
html += f"""
"""
return html
@staticmethod
def create_molstar_docking_viewer(
protein_structure: Dict,
ligand_data: Dict,
docking_result: Dict,
ligand_name: str,
view_mode: str = "Cartoon (Ribbon)",
selected_mode: Optional[int] = None,
) -> str:
"""Create a Mol*-based viewer for an actual protein-ligand pose."""
import json
if not protein_structure.get("available"):
return ProteinVisualizer._viewer_message("No protein structure available", "warning")
pdb_text = str(protein_structure.get("pdb_text") or "")
pdb_url = str(protein_structure.get("pdb_url") or "")
if pdb_url.startswith("data:"):
pdb_url = ""
if not pdb_text and not pdb_url:
return ProteinVisualizer._viewer_message(
"No protein coordinate data is available for this docking view.", "warning"
)
pose_blocks = docking_result.get("pose_pdb_by_mode") or {}
best_mode = docking_result.get("best_mode") or {}
mode_number = selected_mode or best_mode.get("mode") or 1
pose_pdb = (
pose_blocks.get(mode_number)
or pose_blocks.get(str(mode_number))
or next(iter(pose_blocks.values()), "")
)
pose_source = str(docking_result.get("pose_source") or "unavailable")
selected_pocket = docking_result.get("selected_pocket") or {}
grid = selected_pocket.get("grid") or docking_result.get("grid") or docking_result.get("docking_box")
if not pose_pdb:
if docking_result.get("simulated") or pose_source == "simulation":
return ProteinVisualizer._viewer_message(
"Simulated docking contains score and center metadata only; no ligand pose coordinates are rendered.",
"warning",
)
return ProteinVisualizer._viewer_message(
"Ligand pose unavailable. Run real docking with Vina pose output to render the complex.",
"warning",
)
affinity = docking_result.get("binding_affinity")
affinity_text = f"{float(affinity):.2f} kcal/mol" if affinity is not None else "N/A"
ligand_label = ligand_name or ligand_data.get("name") or "Ligand pose"
mode_options = "".join(
f''
for mode in pose_blocks.keys()
)
selector_html = (
f''
if len(pose_blocks) > 1
else ""
)
legend_html = f"""
Protein
{html_lib.escape(str(ligand_label))}
Oxygen
Nitrogen
Docking grid
{selector_html}
"""
protein_text_json = json.dumps(pdb_text)
protein_url_json = json.dumps(pdb_url)
pose_blocks_json = json.dumps({str(k): v for k, v in pose_blocks.items()})
selected_mode_json = json.dumps(str(mode_number))
view_mode_json = json.dumps(view_mode)
grid_json = json.dumps(grid or {})
body_script = f"""
const proteinText = {protein_text_json};
const proteinUrl = {protein_url_json};
const poseBlocks = {pose_blocks_json};
let selectedMode = {selected_mode_json};
const viewMode = {view_mode_json};
const dockingGrid = {grid_json};
function gridAsPdb(grid) {{
if (!grid || !grid.center || !grid.size) return "";
const c = grid.center;
const h = {{x: grid.size.x / 2, y: grid.size.y / 2, z: grid.size.z / 2}};
const corners = [
[-1,-1,-1], [1,-1,-1], [1,1,-1], [-1,1,-1],
[-1,-1,1], [1,-1,1], [1,1,1], [-1,1,1]
];
const lines = corners.map((sign, index) => {{
const x = c.x + sign[0] * h.x;
const y = c.y + sign[1] * h.y;
const z = c.z + sign[2] * h.z;
return `HETATM${{String(index + 1).padStart(5)}} C${{index + 1}} GRD G 1 ${{x.toFixed(3).padStart(8)}}${{y.toFixed(3).padStart(8)}}${{z.toFixed(3).padStart(8)}} 0.25 0.00 C`;
}});
const edges = [[1,2],[2,3],[3,4],[4,1],[5,6],[6,7],[7,8],[8,5],[1,5],[2,6],[3,7],[4,8]];
edges.forEach(edge => lines.push(`CONECT${{String(edge[0]).padStart(5)}}${{String(edge[1]).padStart(5)}}`));
lines.push("END");
return lines.join("\\n");
}}
async function loadProtein() {{
if (proteinText && proteinText.length > 0) {{
await viewer.loadStructureFromData(proteinText, "pdb");
}} else {{
await viewer.loadStructureFromUrl(proteinUrl, "pdb");
}}
}}
async function loadPose(mode) {{
const poseData = poseBlocks[mode] || poseBlocks[Object.keys(poseBlocks)[0]];
if (!poseData) throw new Error("No ligand pose coordinates are available.");
await viewer.loadStructureFromData(poseData, "pdb");
}}
await loadProtein();
await loadPose(selectedMode);
const gridBoxData = gridAsPdb(dockingGrid);
if (gridBoxData) await viewer.loadStructureFromData(gridBoxData, "pdb");
const poseSelect = document.getElementById("pose-mode-select");
if (poseSelect) {{
poseSelect.addEventListener("change", async function(event) {{
await viewer.plugin.clear();
selectedMode = event.target.value;
await loadProtein();
await loadPose(selectedMode);
if (gridBoxData) await viewer.loadStructureFromData(gridBoxData, "pdb");
if (viewer.plugin && viewer.plugin.managers && viewer.plugin.managers.camera) {{
viewer.plugin.managers.camera.reset();
}}
}});
}}
"""
return ProteinVisualizer._molstar_shell(
title=f"Protein-Ligand Complex - {ligand_label}",
status=f"Pose source: {pose_source}; affinity: {affinity_text}",
legend_html=legend_html,
body_script=body_script,
height=620,
show_side_panels=False,
)
@staticmethod
def create_docking_3d_viewer(protein_structure: Dict, ligand_data: Dict,
docking_result: Dict, ligand_name: str,
view_mode: str = "Cartoon (Ribbon)") -> str:
"""
Create a Mol*-based 3D viewer showing protein with docked ligand.
Kept as a compatibility wrapper for existing Streamlit call sites.
Features:
- Automatically clears and resets viewer for each render (prevents stale structures)
- Removes all previous models before loading new ones
- Centers and zooms the protein-ligand complex using zoomTo()
- Applies fallback zoom if initial zoom is insufficient
- Uses unique viewer instance per render (Streamlit-compatible)
"""
return ProteinVisualizer.create_molstar_docking_viewer(
protein_structure,
ligand_data,
docking_result,
ligand_name,
view_mode=view_mode,
)
if not protein_structure.get('available'):
return "
No protein structure available
"
# Get PDB data - prefer pdb_text over pdb_url
pdb_text = protein_structure.get('pdb_text', '')
pdb_url = protein_structure.get('pdb_url', '')
# Validate pdb_url - skip if it's a data URI
if pdb_url and pdb_url.startswith('data:'):
pdb_url = ''
# Critical validation: ensure we have actual PDB data
if not pdb_text and not pdb_url:
return """
⚠️ No Protein Structure Data Available
Please predict the protein structure in the Protein Structure Prediction tab first.
The docking visualization requires a predicted 3D structure.
"""
ligand_smiles = ligand_data.get('smiles', '')
# Get binding affinity for display
affinity = docking_result.get('binding_affinity', 0)
# Build a simple ligand PDB from docking center (visualization only)
best_center = docking_result.get("best_mode", {}).get("center", {})
try:
center_x = float(best_center.get("x", 0.0) or 0.0)
center_y = float(best_center.get("y", 0.0) or 0.0)
center_z = float(best_center.get("z", 0.0) or 0.0)
except (TypeError, ValueError):
center_x, center_y, center_z = 0.0, 0.0, 0.0
def _format_hetatm(serial, name, resn, chain, resi, x, y, z, element):
return (
f"HETATM{serial:5d} {name:<4}{resn:>3} {chain}{resi:4d} "
f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 20.00 {element:>2}"
)
ligand_atoms = [
("C1", center_x, center_y, center_z, "C"),
("O1", center_x + 1.2, center_y, center_z, "O"),
("N1", center_x - 1.2, center_y, center_z, "N"),
("S1", center_x, center_y + 1.2, center_z, "S"),
]
ligand_pdb_lines = [
_format_hetatm(idx + 1, atom[0], "LIG", "A", 1, atom[1], atom[2], atom[3], atom[4])
for idx, atom in enumerate(ligand_atoms)
]
ligand_pdb_lines.append("END")
ligand_pdb = "\n".join(ligand_pdb_lines)
# Properly escape ligand_name and pdb_text for JavaScript
import json
ligand_name_escaped = json.dumps(ligand_name)
pdb_text_escaped = json.dumps(pdb_text if pdb_text else '')
pdb_url_escaped = json.dumps(pdb_url if pdb_url else '')
ligand_pdb_escaped = json.dumps(ligand_pdb)
view_mode_escaped = json.dumps(view_mode)
# Generate unique ID for this viewer instance (prevents reuse/caching)
import uuid
viewer_id = f"viewer_{uuid.uuid4().hex[:8]}"
html = f"""
Loading protein-ligand complex...
Binding Affinity: {affinity} kcal/mol
Ligand:
Protein
Ligand
Binding Site
Protein-Ligand Docking Complex (Simulated Pose)
"""
return html
@staticmethod
def predict_best_ligand(ligands: list, protein_data: Dict) -> Dict:
"""
Predict which known ligand should bind best based on multiple factors
Scoring criteria:
1. Experimental activity data (IC50/Ki)
2. Molecular properties (MW, LogP)
3. Drug-likeness (Lipinski's Rule of Five)
Returns top predicted ligand with explanation
"""
if not ligands:
return {"available": False, "message": "No ligands to analyze"}
predictions = []
for ligand in ligands:
score = 0
reasons = []
# Factor 1: Activity value (most important)
activity_value = ligand.get('activity_value', float('inf'))
activity_type = ligand.get('activity_type', '')
if activity_value < 10: # Very potent
score += 50
reasons.append(f"Very potent {activity_type}: {activity_value:.2f} nM")
elif activity_value < 100: # Potent
score += 35
reasons.append(f"Potent {activity_type}: {activity_value:.2f} nM")
elif activity_value < 1000: # Moderate
score += 20
reasons.append(f"Moderate {activity_type}: {activity_value:.2f} nM")
else: # Weak
score += 5
reasons.append(f"Weak activity: {activity_value:.2f} nM")
# Factor 2: Molecular weight (drug-like range)
mw = ligand.get('molecular_weight', 0)
# Convert to float if it's a string
if isinstance(mw, str):
try:
mw = float(mw)
except (ValueError, TypeError):
mw = 0
if mw and 160 <= mw <= 500: # Optimal drug-like range
score += 15
reasons.append(f"Optimal MW: {mw:.1f} Da")
elif mw and mw <= 160:
score += 5
reasons.append(f"Low MW: {mw:.1f} Da")
elif mw and mw > 500:
score += 8
reasons.append(f"High MW: {mw:.1f} Da")
# Factor 3: SMILES availability (for structure-based predictions)
if ligand.get('smiles'):
score += 10
reasons.append("Structure available for docking")
# Factor 4: Name indicates known drug
name = ligand.get('name', '').lower()
drug_indicators = ['inhibitor', 'mab', 'nib', 'tinib', 'zumab', 'ciclib']
if any(indicator in name for indicator in drug_indicators):
score += 10
reasons.append("Known drug or inhibitor class")
predictions.append({
"ligand": ligand,
"score": score,
"reasons": reasons,
"confidence": "High" if score >= 70 else ("Medium" if score >= 50 else "Low")
})
# Sort by score
predictions = sorted(predictions, key=lambda x: x['score'], reverse=True)
best = predictions[0]
return {
"available": True,
"best_ligand": best['ligand'],
"score": best['score'],
"confidence": best['confidence'],
"reasons": best['reasons'],
"all_predictions": predictions[:5] # Top 5
}
@staticmethod
def advanced_binding_prediction(known_ligands: list, protein_data: Dict,
novel_compounds: Optional[list] = None) -> Dict:
"""
Advanced ML-based binding prediction for both known and unknown ligands
Prediction features:
1. Molecular descriptors (MW, LogP, HBD, HBA, TPSA)
2. Structural fingerprints (if SMILES available)
3. Protein-ligand interaction fingerprints
4. Pharmacophore matching
5. QSAR model predictions
Returns comprehensive predictions with confidence scores
"""
import math
predictions: dict[str, Any] = {
"known_ligands": [],
"novel_candidates": [],
"binding_rules": {},
"recommendations": []
}
binding_rules: dict[str, Any] = {}
# Analyze known ligands to extract binding rules
if known_ligands:
binding_rules = ProteinVisualizer.extract_binding_rules(known_ligands)
predictions["binding_rules"] = binding_rules
# Predict for known ligands
for ligand in known_ligands:
pred = ProteinVisualizer.predict_binding_score(ligand, binding_rules, protein_data, is_known=True)
predictions["known_ligands"].append(pred)
# Sort by predicted binding
predictions["known_ligands"] = sorted(
predictions["known_ligands"],
key=lambda x: x["predicted_score"],
reverse=True
)
# Predict for novel/unknown compounds
if novel_compounds:
for compound in novel_compounds:
pred = ProteinVisualizer.predict_binding_score(compound, binding_rules if known_ligands else {},
protein_data, is_known=False)
predictions["novel_candidates"].append(pred)
# Sort by predicted binding
predictions["novel_candidates"] = sorted(
predictions["novel_candidates"],
key=lambda x: x["predicted_score"],
reverse=True
)
# Generate recommendations
predictions["recommendations"] = ProteinVisualizer.generate_recommendations(
predictions["known_ligands"],
predictions["novel_candidates"],
binding_rules if known_ligands else {}
)
return predictions
@staticmethod
def extract_binding_rules(known_ligands: list) -> Dict:
"""
Extract SAR (Structure-Activity Relationship) rules from known ligands
"""
rules: dict[str, Any] = {
"optimal_mw_range": [0.0, 0.0],
"activity_threshold": {},
"pharmacophore": [],
"property_ranges": {}
}
# Extract activity data
activities: list[float] = []
mw_values: list[float] = []
for lig in known_ligands:
activity = lig.get('activity_value', 0)
mw = lig.get('molecular_weight', 0)
# Convert activity to float if it's a string
if isinstance(activity, str):
try:
activity = float(activity)
except (ValueError, TypeError):
activity = 0
# Convert mw to float if it's a string
if isinstance(mw, str):
try:
mw = float(mw)
except (ValueError, TypeError):
mw = 0
if activity > 0:
activities.append(activity)
if mw > 0:
mw_values.append(mw)
if activities:
# Define potent threshold (bottom 25th percentile)
activities_sorted = sorted(activities)
potent_threshold = activities_sorted[len(activities_sorted) // 4] if len(activities_sorted) > 4 else activities_sorted[0]
rules["activity_threshold"] = {
"potent": potent_threshold,
"moderate": potent_threshold * 10,
"weak": potent_threshold * 100
}
if mw_values:
# Optimal MW range (mean ± 1 std dev)
import statistics
mean_mw = statistics.mean(mw_values)
std_mw = statistics.stdev(mw_values) if len(mw_values) > 1 else 50
rules["optimal_mw_range"] = [
max(150, mean_mw - std_mw),
min(600, mean_mw + std_mw)
]
# Lipinski's Rule of Five compliance from known actives
rules["lipinski_compliance"] = True
return rules
@staticmethod
def predict_binding_score(compound: Dict, binding_rules: Dict,
protein_data: Dict, is_known: bool = False) -> Dict:
"""
Predict binding affinity score for a compound
Returns score 0-100 with confidence level
"""
import math
score = 0
confidence_factors = []
reasons = []
warnings = []
# Factor 1: Experimental activity (only for known ligands)
if is_known and compound.get('activity_value'):
activity = compound['activity_value']
activity_type = compound.get('activity_type', 'IC50')
# Convert activity to float if it's a string
if isinstance(activity, str):
try:
activity = float(activity)
except (ValueError, TypeError):
activity = None
if activity is not None:
thresholds = binding_rules.get('activity_threshold', {})
if activity <= thresholds.get('potent', 10):
score += 50
confidence_factors.append(0.95)
reasons.append(f"Very potent {activity_type}: {activity:.2f} nM (experimental)")
elif activity <= thresholds.get('moderate', 100):
score += 35
confidence_factors.append(0.85)
reasons.append(f"Moderate {activity_type}: {activity:.2f} nM (experimental)")
else:
score += 15
confidence_factors.append(0.70)
reasons.append(f"Weak activity: {activity:.2f} nM (experimental)")
# Factor 2: Molecular weight (drug-likeness)
mw = compound.get('molecular_weight', 0)
# Convert mw to float if it's a string
if isinstance(mw, str):
try:
mw = float(mw)
except (ValueError, TypeError):
mw = 0
optimal_range = binding_rules.get('optimal_mw_range', [160, 500])
if mw:
if optimal_range[0] <= mw <= optimal_range[1]:
score += 15
confidence_factors.append(0.80)
reasons.append(f"Optimal MW: {mw:.1f} Da (within active range)")
elif 150 <= mw <= 600: # Lipinski range
score += 10
confidence_factors.append(0.70)
reasons.append(f"Acceptable MW: {mw:.1f} Da (drug-like)")
if mw > 500:
warnings.append("MW >500 Da may reduce oral bioavailability")
else:
score += 3
confidence_factors.append(0.50)
warnings.append(f"MW {mw:.1f} Da outside optimal range")
# Factor 3: Lipinski's Rule of Five compliance
lipinski_violations = ProteinVisualizer.calculate_lipinski_violations(compound)
if lipinski_violations == 0:
score += 15
confidence_factors.append(0.85)
reasons.append("Passes Lipinski's Rule of Five (drug-like)")
elif lipinski_violations == 1:
score += 10
confidence_factors.append(0.75)
reasons.append("1 Lipinski violation (acceptable)")
warnings.append("Minor drug-likeness concern")
else:
score += 5
confidence_factors.append(0.60)
warnings.append(f"{lipinski_violations} Lipinski violations (poor drug-likeness)")
# Factor 4: Chemical structure availability
if compound.get('smiles'):
score += 10
confidence_factors.append(0.90)
reasons.append("Structure available for computational docking")
# Factor 5: Known drug status
name = compound.get('name', '').lower()
source = compound.get('source', '')
if 'fda' in source.lower() or 'approved' in str(compound.get('status', '')).lower():
score += 15
confidence_factors.append(0.95)
reasons.append("FDA-approved drug (validated safety profile)")
elif any(x in name for x in ['inhibitor', 'mab', 'nib', 'tinib', 'zumab']):
score += 12
confidence_factors.append(0.85)
reasons.append("Known inhibitor/drug class")
# Factor 6: Literature evidence
if compound.get('pmid') or 'literature' in source.lower():
score += 8
confidence_factors.append(0.75)
reasons.append("Literature evidence of activity")
# Factor 7: Target class match (for repurposing)
if compound.get('target_class'):
score += 10
confidence_factors.append(0.80)
reasons.append(f"Target class match: {compound['target_class']}")
# Calculate confidence (average of all factors)
confidence = sum(confidence_factors) / len(confidence_factors) if confidence_factors else 0.5
# Confidence level
if confidence >= 0.85:
confidence_level = "High"
confidence_color = "#28a745"
elif confidence >= 0.70:
confidence_level = "Medium"
confidence_color = "#ffc107"
else:
confidence_level = "Low"
confidence_color = "#dc3545"
# Predicted binding affinity (simplified QSAR)
if is_known and compound.get('activity_value'):
# Convert IC50 to approximate binding affinity
ic50 = compound['activity_value']
predicted_affinity = -math.log10(ic50 / 1e9) * 1.36 # kcal/mol
predicted_affinity = max(-12, min(-4, predicted_affinity))
else:
# For unknowns: estimate based on score
predicted_affinity = -4 - (score / 100) * 6 # Range: -4 to -10
return {
"compound": compound,
"predicted_score": min(100, score),
"confidence": round(confidence, 2),
"confidence_level": confidence_level,
"confidence_color": confidence_color,
"predicted_affinity": round(predicted_affinity, 2),
"reasons": reasons,
"warnings": warnings,
"is_known": is_known,
"recommendation": "Highly recommended" if score >= 75 else ("Worth testing" if score >= 50 else "Low priority")
}
@staticmethod
def calculate_lipinski_violations(compound: Dict) -> int:
"""
Calculate Lipinski's Rule of Five violations
Rules: MW ≤500, LogP ≤5, HBD ≤5, HBA ≤10
"""
violations: float = 0.0
mw = compound.get('molecular_weight', 0)
# Convert mw to float if it's a string
if isinstance(mw, str):
try:
mw = float(mw)
except (ValueError, TypeError):
mw = 0
if mw > 500:
violations += 1
# Note: Would need to calculate LogP, HBD, HBA from SMILES
# For now, estimate based on MW
if mw > 450: # Rough proxy for LogP violations
violations += 0.5
return int(violations)
@staticmethod
def generate_recommendations(known_predictions: list, novel_predictions: list,
binding_rules: Dict) -> list:
"""
Generate actionable recommendations for drug discovery
"""
recommendations = []
# Recommendation 1: Best known binder
if known_predictions:
best_known = known_predictions[0]
recommendations.append({
"type": "Best Known Binder",
"compound": best_known["compound"]["name"],
"score": best_known["predicted_score"],
"action": f"Use as positive control in experiments (predicted affinity: {best_known['predicted_affinity']:.1f} kcal/mol)",
"priority": "High"
})
# Recommendation 2: Top novel candidate
if novel_predictions:
best_novel = novel_predictions[0]
if best_novel["predicted_score"] >= 60:
recommendations.append({
"type": "Novel Candidate",
"compound": best_novel["compound"]["name"],
"score": best_novel["predicted_score"],
"action": f"Priority for experimental validation (confidence: {best_novel['confidence_level']})",
"priority": "High" if best_novel["predicted_score"] >= 75 else "Medium"
})
# Recommendation 3: Repurposing opportunities
repurposing = [p for p in (novel_predictions or [])
if p["compound"].get("source") == "Drug Repurposing"]
if repurposing:
recommendations.append({
"type": "Drug Repurposing",
"compound": f"{len(repurposing)} FDA-approved drug(s)",
"score": max([p["predicted_score"] for p in repurposing]),
"action": "Consider for off-label use or clinical trials (safety already established)",
"priority": "High"
})
# Recommendation 4: Structure optimization
if known_predictions and len(known_predictions) >= 3:
mw_list = []
for p in known_predictions[:3]:
mw = p["compound"].get("molecular_weight", 0)
# Convert to float if it's a string
if isinstance(mw, str):
try:
mw = float(mw)
except (ValueError, TypeError):
mw = 0
mw_list.append(mw)
top3_avg_mw = sum(mw_list) / 3 if mw_list else 0
recommendations.append({
"type": "Structure Optimization",
"compound": "New derivatives",
"score": 0,
"action": f"Design analogs around MW ~{top3_avg_mw:.0f} Da based on top binders",
"priority": "Medium"
})
return recommendations
@staticmethod
def create_risk_calculator_ui() -> str:
"""
Create interactive risk calculator HTML form
Returns HTML with JavaScript for real-time calculation
"""
html = """
🔬 Predictive Risk Calculator
Calculate your disease risk based on biomarker levels and personal factors
"""
return html
@staticmethod
def create_drug_target_visualization(drug_data: Dict) -> go.Figure:
"""
Create visualization of drugs by development phase
"""
if not drug_data.get('available'):
fig = go.Figure()
fig.add_annotation(
text="No drug-target data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="gray")
)
fig.update_layout(height=400)
return fig
categories = ['FDA Approved', 'Clinical Trials', 'Investigational']
counts = [
drug_data.get('total_fda', 0),
drug_data.get('total_trials', 0),
drug_data.get('total_investigational', 0)
]
colors = ['#28a745', '#ffc107', '#17a2b8']
fig = go.Figure(go.Bar(
x=categories,
y=counts,
marker=dict(color=colors, line=dict(color='black', width=1.5)),
text=counts,
textposition='outside',
hovertemplate='%{x} Count: %{y}'
))
fig.update_layout(
title="Drug Development Pipeline",
yaxis_title="Number of Drugs",
height=350,
template="plotly_white",
showlegend=False
)
return fig
@staticmethod
def create_binding_affinity_chart(predictions: List[Dict]) -> go.Figure:
"""
Create bar chart showing binding affinity for multiple molecules
Args:
predictions: List of prediction dictionaries
Returns:
Plotly figure
"""
valid_predictions = [p for p in predictions if p.get("is_valid", False)]
if not valid_predictions:
return ProteinVisualizer._empty_figure("No valid predictions available", height=400)
# Extract data
molecule_names = [p.get("molecule_name", f"Molecule {i+1}") for i, p in enumerate(valid_predictions)]
affinities = [
float(p.get("prediction", {}).get("binding_affinity", 0) or 0)
for p in valid_predictions
]
semantics = ProteinVisualizer._affinity_semantics(valid_predictions)
unit_label = semantics["unit_label"]
axis_hint = semantics["axis_hint"]
higher_is_better = bool(semantics["higher_is_better"])
colors = ProteinVisualizer._affinity_colors(affinities, higher_is_better)
yaxis_range = None
if higher_is_better:
yaxis_range = [0, max(10.0, max(affinities) * 1.12)]
fig = go.Figure(go.Bar(
x=molecule_names,
y=affinities,
marker=dict(
color=colors,
line=dict(color="#334155", width=0.8)
),
text=[f"{aff:.2f}" for aff in affinities],
textposition='outside',
customdata=[unit_label] * len(affinities),
hovertemplate=(
"%{x} Binding affinity: %{y:.2f} %{customdata}"
f" Interpretation: {axis_hint}"
)
))
fig.update_layout(
title="Predicted Binding Affinity",
xaxis_title="Molecule",
yaxis_title=f"Binding Affinity ({unit_label}, {axis_hint})",
yaxis=dict(range=yaxis_range),
height=max(400, len(valid_predictions) * 40),
template="plotly_white",
showlegend=False,
xaxis=dict(tickangle=-45)
)
return fig
@staticmethod
def create_binding_likelihood_chart(predictions: List[Dict]) -> go.Figure:
"""
Create bar chart showing binding likelihood (probability) for multiple molecules
Args:
predictions: List of prediction dictionaries
Returns:
Plotly figure
"""
valid_predictions = [p for p in predictions if p.get("is_valid", False)]
if not valid_predictions:
fig = go.Figure()
fig.add_annotation(
text="No valid predictions available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=16, color="gray")
)
fig.update_layout(height=400)
return fig
# Extract data
molecule_names = [p.get("molecule_name", f"Molecule {i+1}") for i, p in enumerate(valid_predictions)]
likelihoods = [p.get("prediction", {}).get("binding_likelihood", 0) for p in valid_predictions]
# Color based on likelihood
colors = []
for lik in likelihoods:
if lik >= 70:
colors.append("#28a745") # Green - high
elif lik >= 50:
colors.append("#ffc107") # Yellow - medium
elif lik >= 30:
colors.append("#ff9800") # Orange - low
else:
colors.append("#dc3545") # Red - very low
fig = go.Figure(go.Bar(
x=molecule_names,
y=likelihoods,
marker=dict(
color=colors,
line=dict(color='rgb(8,48,107)', width=1)
),
text=[f"{lik:.1f}%" for lik in likelihoods],
textposition='outside',
hovertemplate='%{x} Binding Likelihood: %{y:.1f}%'
))
fig.update_layout(
title="Predicted Binding Likelihood",
xaxis_title="Molecule",
yaxis_title="Binding Likelihood (%)",
yaxis=dict(range=[0, 100]),
height=max(400, len(valid_predictions) * 40),
template="plotly_white",
showlegend=False,
xaxis=dict(tickangle=-45)
)
return fig
@staticmethod
def create_binding_ranking_chart(ranked_molecules: List[Dict], top_n: int = 10) -> go.Figure:
"""
Create scatter plot ranking molecules by affinity and likelihood
Args:
ranked_molecules: List of ranked prediction dictionaries
top_n: Number of top molecules to display
Returns:
Plotly figure
"""
if not ranked_molecules:
return ProteinVisualizer._empty_figure("No ranked molecules available", height=400)
# Take top N
top_molecules = ranked_molecules[:top_n]
# Extract data
molecule_names = [m.get("molecule_name", f"Molecule {m.get('rank', i+1)}") for i, m in enumerate(top_molecules)]
affinities = [
float(m.get("prediction", {}).get("binding_affinity", 0) or 0)
for m in top_molecules
]
likelihoods = [
float(m.get("prediction", {}).get("binding_likelihood", 0) or 0)
for m in top_molecules
]
ranks = [m.get("rank", i+1) for i, m in enumerate(top_molecules)]
semantics = ProteinVisualizer._affinity_semantics(top_molecules)
unit_label = semantics["unit_label"]
axis_hint = semantics["axis_hint"]
higher_is_better = bool(semantics["higher_is_better"])
# Size based on rank (higher rank = smaller)
sizes = [max(10, 30 - r) for r in ranks]
fig = go.Figure()
# Add scatter points
fig.add_trace(go.Scatter(
x=affinities,
y=likelihoods,
mode='markers+text',
marker=dict(
size=sizes,
color=likelihoods,
colorscale='RdYlGn',
cmin=0,
cmax=100,
showscale=True,
colorbar=dict(title="Likelihood (%)"),
line=dict(width=1.5, color="#111827")
),
text=[str(r) for r in ranks],
textposition="middle center",
textfont=dict(size=10, color='white', family='Arial Black'),
customdata=list(zip(molecule_names, ranks, [unit_label] * len(top_molecules))),
hovertemplate=(
"%{customdata[0]} Rank: #%{customdata[1]}"
" Affinity: %{x:.2f} %{customdata[2]}"
" Likelihood: %{y:.1f}%"
)
))
fig.update_layout(
title=f"Top {len(top_molecules)} Ranked Drug Candidates",
xaxis_title=f"Binding Affinity ({unit_label}, {axis_hint})",
yaxis_title="Binding Likelihood (%)",
xaxis=dict(autorange=None if higher_is_better else "reversed"),
yaxis=dict(range=[0, 100]),
height=500,
template="plotly_white",
showlegend=False
)
return fig
@staticmethod
def render_phylogenetic_tree(newick_str: str, num_taxa: int = 2) -> bytes:
"""
Render a phylogenetic tree from Newick format to PNG bytes.
Uses BioPython and matplotlib to create a visual tree.
Args:
newick_str: Newick format tree string
num_taxa: Number of taxa (used for figure sizing)
Returns:
PNG image bytes
"""
fig = None
try:
from Bio import Phylo
# Parse the Newick string
tree_handle = StringIO(newick_str)
tree = Phylo.read(tree_handle, "newick")
# Calculate figure size based on taxa count
# Minimum height of 4, scale up with more taxa
fig_height = max(4, num_taxa * 0.5)
fig_width = max(8, num_taxa * 0.6)
# Create figure
fig, ax = plt.subplots(figsize=(fig_width, fig_height))
# Draw the tree
Phylo.draw(tree, axes=ax, do_show=False, show_confidence=False)
# Improve styling
ax.set_xlabel("Branch Length", fontsize=10)
ax.set_title("Phylogenetic Tree", fontsize=12, fontweight='bold', pad=15)
ax.spines['top'].set_visible(False)
ax.spines['right'].set_visible(False)
ax.spines['left'].set_visible(False)
ax.tick_params(left=False, labelleft=True)
# Adjust layout to prevent label clipping
plt.tight_layout()
# Convert to PNG bytes for native Streamlit rendering
buf = BytesIO()
fig.savefig(buf, format='png', dpi=150, bbox_inches='tight', facecolor='white')
return buf.getvalue()
except Exception as e:
raise ValueError(f"Unable to render phylogenetic tree: {e}") from e
finally:
if fig is not None:
plt.close(fig)
@staticmethod
def create_phylogenetic_tree_visualization(newick_string: str, metadata: Dict) -> str:
"""
Create an interactive phylogenetic tree visualization with rendered tree image.
Parses Newick format and displays actual tree structure with branches and labels.
Args:
newick_string: Newick format tree string
metadata: Dictionary with method, num_taxa, tree_length
Returns:
HTML string with embedded tree visualization
"""
# Get number of taxa for sizing
num_taxa = metadata.get('num_taxa', 2)
# Render the tree to base64 image
tree_png = ProteinVisualizer.render_phylogenetic_tree(newick_string, num_taxa)
tree_image = "data:image/png;base64," + base64.b64encode(tree_png).decode("ascii")
escaped_newick = html_lib.escape(newick_string)
# Create HTML visualization
html = f"""
📊 Phylogenetic Tree
Tree Visualization:
📄 Raw Newick Format
{escaped_newick}
📝 How to read: Branch lengths represent evolutionary distance between taxa. Longer branches indicate greater evolutionary divergence.
🔹 Method: Tree constructed using {metadata.get('method', 'N/A').upper()} algorithm with {num_taxa} taxa.
"""
return html
@staticmethod
def create_phylogenetic_dendrogram(newick_string: str, metadata: Dict) -> go.Figure:
"""Create an interactive dendrogram visualization like ClustalW"""
try:
from Bio import Phylo
from io import StringIO
tree = Phylo.read(StringIO(newick_string), "newick")
terminals = tree.get_terminals()
terminal_names = [t.name if t.name else f"Seq{i}" for i, t in enumerate(terminals)]
fig = go.Figure()
if terminal_names:
fig.add_trace(
go.Scatter(
x=list(range(len(terminal_names))),
y=[0] * len(terminal_names),
mode="markers+text",
text=terminal_names,
textposition="middle right",
marker=dict(size=8, color="#1f77b4"),
hovertemplate="%{text}",
showlegend=False,
)
)
fig.update_layout(
title=f"Phylogenetic Tree ({metadata.get('method', 'N/A').upper()}) - {metadata.get('num_taxa', 0)} Taxa",
xaxis_title="Terminal order",
yaxis=dict(showticklabels=False, zeroline=False),
height=max(400, len(terminal_names) * 30),
width=1000,
showlegend=False,
template="plotly_white",
hovermode="closest",
margin=dict(l=150, r=100, t=80, b=50),
)
return fig
except Exception:
fig = go.Figure()
fig.add_annotation(
text="Dendrogram visualization unavailable Showing Newick format instead",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray")
)
fig.update_layout(height=300, title="Phylogenetic Tree")
return fig
@staticmethod
def create_variant_impact_distribution_chart(annotated_variants: List[Dict]) -> go.Figure:
"""Create impact-class distribution chart for annotated variants."""
if not annotated_variants:
fig = go.Figure()
fig.add_annotation(
text="No annotated variants available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
counts: Dict[str, int] = defaultdict(int)
for row in annotated_variants:
counts[str(row.get("predicted_effect_class", "unknown")).lower()] += 1
order = ["high", "moderate", "low", "unknown"]
x_vals = [label.title() for label in order]
y_vals = [counts.get(label, 0) for label in order]
colors = ["#d62728", "#ff7f0e", "#2ca02c", "#9e9e9e"]
fig = go.Figure(
data=[
go.Bar(
x=x_vals,
y=y_vals,
marker_color=colors,
text=y_vals,
textposition="auto",
)
]
)
fig.update_layout(
title="Variant Impact Distribution",
xaxis_title="Impact class",
yaxis_title="Variant count",
template="plotly_white",
height=340,
showlegend=False,
)
return fig
@staticmethod
def create_top_gene_impact_chart(gene_impact: Dict, top_n: int = 10) -> go.Figure:
"""Create bar chart of top impacted genes."""
genes = list(gene_impact.get("genes", {}).values())[:top_n]
if not genes:
fig = go.Figure()
fig.add_annotation(
text="No gene impact data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
names = [g.get("gene", "UNK") for g in genes]
scores = [float(g.get("score", 0.0)) for g in genes]
fig = go.Figure(
data=[go.Bar(x=names, y=scores, marker_color="#1f77b4", text=[f"{s:.1f}" for s in scores], textposition="auto")]
)
fig.update_layout(
title="Top Gene Impact Scores",
xaxis_title="Gene",
yaxis_title="Impact score (0-100)",
template="plotly_white",
height=360,
showlegend=False,
)
return fig
@staticmethod
def create_pathway_perturbation_chart(pathway_impact: Dict, top_n: int = 10) -> go.Figure:
"""Create horizontal bar chart of pathway perturbation."""
pathways = pathway_impact.get("pathways", [])[:top_n]
if not pathways:
fig = go.Figure()
fig.add_annotation(
text="No pathway impact data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
names = [p.get("pathway_name", "Unknown") for p in pathways][::-1]
scores = [float(p.get("impact_score", 0.0)) for p in pathways][::-1]
confidences = [p.get("confidence", "Low") for p in pathways][::-1]
fig = go.Figure(
data=[
go.Bar(
x=scores,
y=names,
orientation="h",
text=[f"{s:.1f}" for s in scores],
textposition="auto",
customdata=confidences,
hovertemplate="%{y} Impact: %{x:.1f} Confidence: %{customdata}",
marker_color="#6a3d9a",
)
]
)
fig.update_layout(
title="Pathway Perturbation Scores",
xaxis_title="Pathway score (0-100)",
yaxis_title="Pathway",
template="plotly_white",
height=max(360, len(pathways) * 28),
showlegend=False,
)
return fig
@staticmethod
def create_therapy_contribution_chart(ranked_candidates: List[Dict], top_n: int = 8) -> go.Figure:
"""Create stacked contribution chart for therapy ranking components."""
top = ranked_candidates[:top_n]
if not top:
fig = go.Figure()
fig.add_annotation(
text="No therapy candidates available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
labels = [c.get("drug_name", "Unknown") for c in top]
components = {
"Target match": [float(c.get("target_gene_match", 0.0)) * 100.0 for c in top],
"Pathway relevance": [float(c.get("pathway_relevance", 0.0)) * 100.0 for c in top],
"Evidence quality": [float(c.get("evidence_quality", 0.0)) * 100.0 for c in top],
"Clinical maturity": [float(c.get("clinical_maturity", 0.0)) * 100.0 for c in top],
"Safety penalty": [-float(c.get("safety_risk_penalty", 0.0)) * 100.0 for c in top],
}
fig = go.Figure()
palette = ["#1f77b4", "#9467bd", "#2ca02c", "#ff7f0e", "#d62728"]
for idx, (label, values) in enumerate(components.items()):
fig.add_trace(go.Bar(x=labels, y=values, name=label, marker_color=palette[idx]))
fig.update_layout(
barmode="relative",
title="Therapy Candidate Score Contributions",
xaxis_title="Drug candidate",
yaxis_title="Component contribution (scaled)",
template="plotly_white",
height=420,
legend_title="Components",
)
return fig
@staticmethod
def create_confidence_completeness_chart(ranked_candidates: List[Dict], top_n: int = 8) -> go.Figure:
"""Create bubble chart of confidence vs completeness for top therapies."""
top = ranked_candidates[:top_n]
if not top:
fig = go.Figure()
fig.add_annotation(
text="No confidence/completeness data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
x_vals = [float(c.get("completeness_pct", 0.0)) for c in top]
y_vals = [float(c.get("ranking_confidence", 0.0)) for c in top]
names = [c.get("drug_name", "Unknown") for c in top]
scores = [float(c.get("composite_score", 0.0)) for c in top]
fig = go.Figure(
data=[
go.Scatter(
x=x_vals,
y=y_vals,
mode="markers+text",
text=names,
textposition="top center",
marker=dict(
size=[max(10.0, min(40.0, s / 2.5)) for s in scores],
color=scores,
colorscale="Blues",
showscale=True,
colorbar=dict(title="Composite"),
),
hovertemplate="%{text} Completeness=%{x:.1f}% Confidence=%{y:.1f}",
)
]
)
fig.update_layout(
title="Confidence vs Data Completeness",
xaxis_title="Completeness (%)",
yaxis_title="Confidence (0-100)",
template="plotly_white",
height=380,
showlegend=False,
)
return fig
@staticmethod
def create_assay_ranking_comparison_chart(assays: List[Dict], top_n: int = 8) -> go.Figure:
"""Create ranked assay score chart with confidence coloring."""
top = assays[:top_n]
if not top:
fig = go.Figure()
fig.add_annotation(
text="No assay ranking data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
names = [row.get("assay_name", "Assay") for row in top][::-1]
scores = [float(row.get("rank_score", 0.0)) for row in top][::-1]
confidence = [row.get("confidence", "Low") for row in top][::-1]
color_map = {"High": "#2ca02c", "Med": "#ff7f0e", "Low": "#d62728"}
colors = [color_map.get(c, "#636efa") for c in confidence]
fig = go.Figure(
data=[
go.Bar(
x=scores,
y=names,
orientation="h",
marker_color=colors,
text=[f"{s:.1f}" for s in scores],
textposition="auto",
customdata=confidence,
hovertemplate="%{y} Score=%{x:.1f} Confidence=%{customdata}",
)
]
)
fig.update_layout(
title="Assay Ranking Comparison",
xaxis_title="Heuristic score (0-100)",
yaxis_title="Assay",
template="plotly_white",
height=max(340, len(top) * 36),
showlegend=False,
)
return fig
@staticmethod
def create_crispr_candidate_score_plot(candidates: List[Dict], top_n: int = 30) -> go.Figure:
"""Create position-vs-score scatter for CRISPR candidates."""
top = candidates[:top_n]
if not top:
fig = go.Figure()
fig.add_annotation(
text="No CRISPR candidate scores available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
risk_color = {"Low": "#2ca02c", "Medium": "#ff7f0e", "High": "#d62728"}
x_vals = [int(row.get("target_position", 0)) for row in top]
y_vals = [float(row.get("heuristic_score", 0.0)) for row in top]
labels = [row.get("off_target_risk_level", "Medium") for row in top]
colors = [risk_color.get(lbl, "#636efa") for lbl in labels]
text = [row.get("spacer_sequence", "")[:12] + "..." for row in top]
fig = go.Figure(
data=[
go.Scatter(
x=x_vals,
y=y_vals,
mode="markers",
marker=dict(size=10, color=colors, line=dict(width=0.5, color="#333333")),
text=text,
customdata=labels,
hovertemplate="Position=%{x} Score=%{y:.1f} Risk=%{customdata} Spacer=%{text}",
)
]
)
fig.update_layout(
title="CRISPR Candidate Score Plot",
xaxis_title="Target position",
yaxis_title="Heuristic score (0-100)",
template="plotly_white",
height=360,
showlegend=False,
)
return fig
@staticmethod
def create_primer_quality_comparison_chart(primers: List[Dict], top_n: int = 10) -> go.Figure:
"""Create grouped chart for primer quality and amplicon size."""
top = primers[:top_n]
if not top:
fig = go.Figure()
fig.add_annotation(
text="No primer quality data available",
xref="paper", yref="paper",
x=0.5, y=0.5, showarrow=False,
font=dict(size=14, color="gray"),
)
fig.update_layout(height=320, template="plotly_white")
return fig
labels = [f"Pair {idx+1}" for idx in range(len(top))]
quality = [float(row.get("quality_score", 0.0)) for row in top]
amplicon = [int(row.get("expected_amplicon_size", 0)) for row in top]
fig = go.Figure()
fig.add_trace(go.Bar(x=labels, y=quality, name="Quality score", marker_color="#1f77b4"))
fig.add_trace(go.Scatter(x=labels, y=amplicon, name="Amplicon size (bp)", yaxis="y2", mode="lines+markers", marker_color="#ff7f0e"))
fig.update_layout(
title="Primer Quality Comparison",
xaxis_title="Primer pair",
yaxis=dict(title="Quality score (0-100)"),
yaxis2=dict(title="Amplicon size (bp)", overlaying="y", side="right"),
template="plotly_white",
height=380,
legend=dict(orientation="h"),
)
return fig
@staticmethod
def create_portfolio_funnel(stage_distribution: List[Dict]) -> go.Figure:
"""Create a stage distribution funnel for portfolio visibility."""
if not stage_distribution:
fig = go.Figure()
fig.add_annotation(text="No projects available", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_layout(template="plotly_white", height=320)
return fig
df = pd.DataFrame(stage_distribution)
fig = go.Figure(go.Funnel(y=df["stage"], x=df["count"], textinfo="value+percent initial"))
fig.update_layout(title="Portfolio Funnel by Stage", template="plotly_white", height=360)
return fig
@staticmethod
def create_milestone_burndown(milestones: List[Dict]) -> go.Figure:
"""Show milestone status and due-date completion trend."""
if not milestones:
fig = go.Figure()
fig.add_annotation(text="No milestones yet", x=0.5, y=0.5, xref="paper", yref="paper", showarrow=False)
fig.update_layout(template="plotly_white", height=320)
return fig
df = pd.DataFrame(milestones)
status_counts = df["status"].value_counts().to_dict() if "status" in df.columns else {}
fig = go.Figure(
data=[
go.Bar(
x=list(status_counts.keys()),
y=list(status_counts.values()),
marker_color=["#9ca3af", "#3b82f6", "#ef4444", "#10b981"][: max(1, len(status_counts))],
)
]
)
fig.update_layout(title="Milestone Completion / Blockers", yaxis_title="Count", template="plotly_white", height=340)
return fig
def _install_plotly_theme_wrapper() -> None:
"""Theme every public ProteinVisualizer figure without changing call sites."""
for name, attr in list(vars(ProteinVisualizer).items()):
if name.startswith("_") or not isinstance(attr, staticmethod):
continue
func = attr.__func__
@wraps(func)
def themed(*args: Any, __func=func, **kwargs: Any) -> Any:
result = __func(*args, **kwargs)
if isinstance(result, go.Figure):
return ProteinVisualizer._style_figure(result)
return result
setattr(ProteinVisualizer, name, staticmethod(themed))
_install_plotly_theme_wrapper()