import html as _html
from typing import Dict, List, Tuple, Optional
from utils.analyze import compute_properties
def predicted_confidence(row: Dict) -> Optional[float]:
"""
Convert stored model probability (AMP probability) into "confidence of the predicted label".
"""
if not row:
return None
pred = row.get("Prediction")
p_amp = row.get("Confidence")
if p_amp is None:
return None
try:
p_amp = float(p_amp)
except (TypeError, ValueError):
return None
if pred == "AMP":
return p_amp
# Non-AMP probability is (1 - AMP probability)
return 1.0 - p_amp
def format_conf_percent(conf_prob: float, digits: int = 1) -> str:
return f"{round(conf_prob * 100, digits)}%"
def heuristic_reason_for_profile(charge: float, hydro_fraction: float) -> str:
if charge > 2:
return "High positive charge supports membrane disruption"
if 0.3 <= hydro_fraction <= 0.6:
return "Balanced hydrophobicity"
return "Favorable predicted profile"
def choose_top_candidate(predictions: List[Dict]) -> Optional[Dict]:
"""
Return dict with top-candidate info:
- sequence
- predicted_confidence (AMP-prob for AMP rows, else Non-AMP prob)
- label
- reason (heuristic based on computed properties)
"""
if not predictions:
return None
# Prefer AMP predictions; otherwise pick highest confidence overall.
amp_rows = [r for r in predictions if r.get("Prediction") == "AMP"]
rows = amp_rows if amp_rows else predictions
best_row = None
best_conf = -1.0
for r in rows:
c = predicted_confidence(r)
if c is None:
continue
if c > best_conf:
best_conf = c
best_row = r
if best_row is None:
return None
seq = best_row.get("Sequence", "")
if not seq:
return None
props = compute_properties(seq)
charge = props.get("Net Charge (approx.)", 0)
hydro = props.get("Hydrophobic Fraction", 0)
return {
"Sequence": seq,
"Prediction": best_row.get("Prediction"),
"predicted_confidence": best_conf,
"Reason": heuristic_reason_for_profile(charge, hydro),
"Charge": charge,
"Hydrophobic Fraction": hydro,
}
def mutation_heatmap_html(original: str, final: str) -> str:
"""
Compare residues position-by-position. Changed residues are highlighted in red.
"""
orig = original or ""
fin = final or ""
max_len = max(len(orig), len(fin))
# Display in a fixed-width monospace container.
out: List[str] = [
"
"
]
for i in range(max_len):
o = orig[i] if i < len(orig) else ""
f = fin[i] if i < len(fin) else ""
residue = f if f else o
changed = (o != f)
residue_escaped = _html.escape(residue)
if changed and residue:
out.append(f"{residue_escaped}")
else:
out.append(residue_escaped if residue else " ")
out.append("
")
return "".join(out)
def mutation_diff_table(original: str, final: str) -> List[Dict]:
orig = original or ""
fin = final or ""
max_len = max(len(orig), len(fin))
rows: List[Dict] = []
for i in range(max_len):
o = orig[i] if i < len(orig) else ""
f = fin[i] if i < len(fin) else ""
rows.append(
{
"Position": i + 1,
"Original": o,
"Final": f,
"Changed": "Yes" if o != f else "No",
}
)
return rows
def _ideal_distance_to_interval(value: float, low: float, high: float) -> float:
if low <= value <= high:
return 0.0
if value < low:
return low - value
return value - high
def optimization_summary(orig_seq: str, orig_conf: float, final_seq: str, final_conf: float) -> Dict:
"""
Compute small summary signals for the Optimize page.
"""
orig_seq = orig_seq or ""
final_seq = final_seq or ""
# Compute properties only if non-empty.
props_orig = compute_properties(orig_seq) if orig_seq else {}
props_final = compute_properties(final_seq) if final_seq else {}
charge_orig = props_orig.get("Net Charge (approx.)", props_orig.get("Net charge", 0))
charge_final = props_final.get("Net Charge (approx.)", props_final.get("Net charge", 0))
hydro_orig = props_orig.get("Hydrophobic Fraction", 0)
hydro_final = props_final.get("Hydrophobic Fraction", 0)
delta_conf_pct = (float(final_conf) - float(orig_conf)) * 100.0
if charge_final > charge_orig:
charge_change = "Increased"
elif charge_final < charge_orig:
charge_change = "Decreased"
else:
charge_change = "Same"
ideal_low, ideal_high = 0.4, 0.5
dist_orig = _ideal_distance_to_interval(float(hydro_orig), ideal_low, ideal_high)
dist_final = _ideal_distance_to_interval(float(hydro_final), ideal_low, ideal_high)
if dist_final < dist_orig:
hydro_change = "Improved balance"
elif dist_final > dist_orig:
hydro_change = "Less optimal"
else:
hydro_change = "Same"
return {
"delta_conf_pct": delta_conf_pct,
"charge_orig": charge_orig,
"charge_final": charge_final,
"charge_change": charge_change,
"hydro_orig": hydro_orig,
"hydro_final": hydro_final,
"hydro_change": hydro_change,
}
def sequence_length_warning(seq: str) -> Optional[str]:
if not seq:
return None
n = len(seq)
if n < 8:
return "Too short for typical AMP"
if n > 50:
return "Unusually long sequence"
return None
def sequence_health_label(conf_prob: float, charge: float, hydro_fraction: float) -> Tuple[str, str]:
"""
Returns: (label, color_css)
"""
# If the model is *extremely* confident, treat it as strong regardless
# of charge/hydrophobicity heuristics (prevents "moderate" at ~99%).
if conf_prob >= 0.9:
return "Strong AMP candidate", "#2ca02c"
if conf_prob > 0.75 and charge >= 2 and 0.3 <= hydro_fraction <= 0.6:
return "Strong AMP candidate", "#2ca02c"
if conf_prob > 0.5:
return "Moderate potential", "#ff9800"
return "Unlikely AMP", "#d62728"
def build_analysis_summary_text(
sequence: str,
prediction: str,
confidence_display: str,
props: Dict,
analysis_lines: List[str],
) -> str:
length = props.get("Length", len(sequence))
charge = props.get("Net Charge (approx.)", props.get("Net charge", 0))
hydro = props.get("Hydrophobic Fraction", props.get("Hydrophobic", 0))
analysis_block = "\n".join(f"- {line}" for line in (analysis_lines or []))
return (
f"Sequence: {sequence}\n"
f"Prediction: {prediction}\n"
f"Confidence: {confidence_display}\n"
f"Length: {length}\n"
f"Net Charge (approx.): {charge}\n"
f"Hydrophobic Fraction: {hydro}\n\n"
f"Summary:\n{analysis_block}\n"
)