PeptideAI / StreamlitApp /utils /peptide_extras.py
m0ksh's picture
Sync from GitHub (preserve manual model files)
cccf8bd verified
Raw
History Blame
15.4 kB
"""
Optional peptide UI helpers: 3D approximation (py3Dmol), known-AMP similarity, residue highlighting.
Does not modify model loading or prediction logic.
"""
from __future__ import annotations
import csv
import math
import pathlib
from typing import List, Optional, Tuple, Any
import numpy as np
# Fallback if `Data/ampData.csv` is missing (e.g. local dev without Data/).
_FALLBACK_KNOWN_AMPS: Tuple[str, ...] = (
"KWKLFKKIGAVLKVL",
"GIGKFLHSAKKFGKAFVGEIMNS",
"LLGDFFRKSKEKIGKEFKRIVQRIKDFLRNLV",
"KLFKKILKYL",
"FLPLLAGLAANFLPKIFCKITRKC",
)
def _amp_data_csv_path() -> pathlib.Path:
# StreamlitApp/utils/peptide_extras.py -> repo root is parents[2]
return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv"
def _load_known_amps_from_csv() -> List[str]:
"""
Load unique sequences labeled as AMP (label == 1) from Data/ampData.csv.
Sequences are uppercased for consistent similarity matching.
"""
path = _amp_data_csv_path()
if not path.exists():
return list(_FALLBACK_KNOWN_AMPS)
seen: set[str] = set()
amps: List[str] = []
try:
with path.open(newline="", encoding="utf-8") as f:
reader = csv.DictReader(f)
if not reader.fieldnames or "sequence" not in reader.fieldnames:
return list(_FALLBACK_KNOWN_AMPS)
for row in reader:
label = str(row.get("label", "")).strip()
if label != "1":
continue
raw = (row.get("sequence") or "").strip()
if not raw:
continue
seq = raw.upper()
if seq in seen:
continue
seen.add(seq)
amps.append(seq)
except Exception:
return list(_FALLBACK_KNOWN_AMPS)
return amps if amps else list(_FALLBACK_KNOWN_AMPS)
# Known AMP pool for similarity search (from ampData.csv label=1, or fallback list).
KNOWN_AMPS: List[str] = _load_known_amps_from_csv()
# py3Dmol viewer: skip very long sequences (labels + sticks scale with length).
MAX_3D_SEQUENCE_LENGTH: int = 60
STRUCTURE_3D_LEGEND_MARKDOWN: str = """
**Color legend**
- **Blue:** Positively charged residues (K, R, H)
- **Red:** Negatively charged residues (D, E)
- **Green:** Hydrophobic residues (A, V, I, L, M, F, W, Y)
- **Gray:** Other / polar or unclassified residues
"""
STRUCTURE_3D_INTERPRETATION_MARKDOWN: str = """
**Structural interpretation (approximation only)**
This is a **simplified helical CA trace** used to visualize how residue chemistry is arranged in 3D space — **not** an experimentally determined fold.
- **Clusters of green** often correspond to membrane-facing / hydrophobic patches.
- **Blue regions** highlight cationic residues that can promote binding to anionic bacterial surfaces.
- **Spatial separation** between hydrophobic and charged segments can suggest **amphipathic** character, common among many AMPs.
Together, these cues help discuss whether a sequence has motifs frequently associated with antimicrobial peptides — **wet-lab validation is still required**.
"""
# One-letter -> three-letter (for minimal PDB lines for py3Dmol).
_ONE_TO_THREE = {
"A": "ALA",
"R": "ARG",
"N": "ASN",
"D": "ASP",
"C": "CYS",
"Q": "GLN",
"E": "GLU",
"G": "GLY",
"H": "HIS",
"I": "ILE",
"L": "LEU",
"K": "LYS",
"M": "MET",
"F": "PHE",
"P": "PRO",
"S": "SER",
"T": "THR",
"W": "TRP",
"Y": "TYR",
"V": "VAL",
}
def sequence_similarity(seq1: str, seq2: str) -> float:
"""Position-wise match rate normalized by max length (as specified)."""
if not seq1 or not seq2:
return 0.0
matches = sum(1 for a, b in zip(seq1, seq2) if a == b)
return matches / max(len(seq1), len(seq2))
def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
if not sequence or not KNOWN_AMPS:
return None, 0.0
seq = "".join(c for c in sequence.upper() if not c.isspace())
if not seq:
return None, 0.0
best_seq = KNOWN_AMPS[0]
best_score = sequence_similarity(seq, KNOWN_AMPS[0])
for amp in KNOWN_AMPS[1:]:
score = sequence_similarity(seq, amp)
if score > best_score:
best_score = score
best_seq = amp
return best_seq, best_score
def get_residue_color(aa: str) -> str:
"""Map one-letter residue to a py3Dmol color name (single-letter, uppercased)."""
ch = aa.upper() if aa else ""
positive = ["K", "R", "H"]
negative = ["D", "E"]
hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
if ch in positive:
return "blue"
if ch in negative:
return "red"
if ch in hydrophobic:
return "green"
return "gray"
def residue_color_mpl(aa: str) -> str:
"""Matplotlib-compatible hex colors matching `get_residue_color` categories (high-contrast for plots)."""
cat = get_residue_color(aa)
return {
"blue": "#1D4ED8",
"red": "#DC2626",
"green": "#16A34A",
"gray": "#57534E",
}.get(cat, "#57534E")
HELIX_WHEEL_LEGEND_MARKDOWN: str = """
**Helical wheel readout**
- **Blue wedge:** cationic (K, R, H) — often important for initial membrane association.
- **Red wedge:** anionic (D, E).
- **Green wedge:** hydrophobic — often grouped on one face in amphipathic helices (membrane-facing).
- **Gray:** polar / other — may participate in solubility or hydrogen bonding.
Residues are placed using a **100° step** per position (common α-helical wheel convention). This is a **2D projection**, not a solved 3D structure.
"""
# Short blurbs for compact UI expanders (Visualize Peptide page)
COMPACT_3D_LEGEND: str = """
**How to read this 3D view**
- **Blue:** positively charged residues (K, R, H)
- **Red:** negatively charged residues (D, E)
- **Green:** hydrophobic residues (A, V, I, L, M, F, W, Y)
- **Gray:** other / polar residues
- Backbone is a **helix-like approximation**, not an experimental structure.
"""
COMPACT_WHEEL_LEGEND: str = """
**How to read this helical wheel**
- **Radial spokes:** residue positions around the helix (100 degrees per residue)
- **Black connectors:** sequence order (`i -> i+1`) across the wheel
- **Colored circles:** residue chemistry classes
- Color mapping matches the 3D view (**blue / red / green / gray**)
"""
COMPACT_MAP_LEGEND: str = """
**How to read this sequence map**
- Uses the same residue color mapping as 3D and helical wheel
- Highlights where charged vs hydrophobic residues cluster along the sequence
- Useful for quick amphipathic pattern checks
"""
def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any:
"""
Detailed helical wheel (matplotlib polar): radial spokes, sequence-order connectors (i→i+1),
and colored residue disks — same chemistry classes as 3D / HTML maps (high-contrast colors).
"""
import matplotlib.pyplot as plt
from matplotlib import patheffects as pe
clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
n = len(clean)
fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
fig.patch.set_facecolor("white")
if n == 0:
ax.set_facecolor("#ffffff")
ax.set_title("Helical wheel (empty sequence)", pad=12)
return fig
ax.set_facecolor("#ffffff")
angles_deg = np.array([i * 100.0 for i in range(n)], dtype=float) % 360.0
angles_rad = np.deg2rad(angles_deg)
r_inner, r_ring = 0.06, 0.88
fs = max(7, min(11, int(220 / max(n, 1))))
pt_size = float(np.clip(8000.0 / max(n, 1), 130.0, 420.0))
ax.set_theta_zero_location("N")
ax.set_theta_direction(-1)
# Radial spokes (residue positions)
for i in range(n):
th = angles_rad[i]
ax.plot(
[th, th],
[r_inner, r_ring],
color="#1a1a1a",
linewidth=0.65,
alpha=0.45,
zorder=1,
)
# Sequence-order connections (straight chords in the plane — classic wheel “star”)
for i in range(n - 1):
ax.plot(
[angles_rad[i], angles_rad[i + 1]],
[r_ring, r_ring],
color="#0a0a0a",
linewidth=1.05,
solid_capstyle="round",
zorder=2,
)
colors = [residue_color_mpl(aa) for aa in clean]
ax.scatter(
angles_rad,
np.full(n, r_ring),
s=pt_size,
c=colors,
edgecolors="#111111",
linewidths=1.2,
zorder=4,
)
for i, aa in enumerate(clean):
t = ax.text(
angles_rad[i],
r_ring,
aa,
ha="center",
va="center",
fontsize=fs,
color="#0a0a0a",
fontweight="bold",
zorder=5,
)
t.set_path_effects([pe.withStroke(linewidth=2.2, foreground="white")])
ax.set_ylim(0, 1.0)
ax.set_yticklabels([])
ax.set_xticklabels([])
ax.grid(False)
ax.set_title(
"Helical wheel (α-helix, 100°/residue) — spokes + sequence connectors",
pad=14,
fontsize=11,
color="#111111",
)
return fig
def get_residue_style(aa: str) -> str:
"""Inline styles for sequence map — colors aligned with wheel / 3D categories (high contrast)."""
positive = ["K", "R", "H"]
negative = ["D", "E"]
hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
if aa in positive:
return "background-color: #1D4ED8; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
if aa in negative:
return "background-color: #DC2626; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
if aa in hydrophobic:
return "background-color: #16A34A; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
return "background-color: #57534E; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
def build_importance_map_html(sequence: str) -> str:
"""Build HTML for residue importance highlighting (escape non-AA safely)."""
import html as html_mod
parts: List[str] = []
for ch in sequence:
if ch.isspace():
continue
aa = ch.upper()
style = get_residue_style(aa)
parts.append(f'<span style="{style}">{html_mod.escape(aa)}</span>')
return "".join(parts)
def generate_helix_pdb(sequence: str, smooth: bool = False) -> str:
"""
Generate a minimal PDB string (helix-like CA trace).
When smooth=True, apply light coordinate smoothing for a softer backbone path.
"""
pdb_lines: List[str] = []
atom_index = 1
clean = "".join(c for c in sequence.upper() if not c.isspace())
n = len(clean)
if n == 0:
return ""
theta_step = 100.0 * math.pi / 180.0 # ~α-helix angular step on the wheel
rise = 1.45
coords: List[Tuple[float, float, float]] = []
for i in range(n):
angle = i * theta_step
r = 5.0 + 0.12 * math.sin(i * 0.4)
x = math.cos(angle) * r
y = math.sin(angle) * r
z = i * rise
coords.append((x, y, z))
if smooth and n >= 3:
xs = np.array([c[0] for c in coords], dtype=float)
ys = np.array([c[1] for c in coords], dtype=float)
zs = np.array([c[2] for c in coords], dtype=float)
k = np.array([0.2, 0.6, 0.2])
for _ in range(2):
xs = np.convolve(xs, k, mode="same")
ys = np.convolve(ys, k, mode="same")
zs = np.convolve(zs, k, mode="same")
xs[0], xs[-1] = coords[0][0], coords[-1][0]
ys[0], ys[-1] = coords[0][1], coords[-1][1]
zs[0], zs[-1] = coords[0][2], coords[-1][2]
coords = list(zip(xs.tolist(), ys.tolist(), zs.tolist()))
for i, aa in enumerate(clean):
res_name = _ONE_TO_THREE.get(aa, "UNK")
x, y, z = coords[i]
res_num = i + 1
pdb_lines.append(
f"ATOM {atom_index:5d} CA {res_name:3s} A{res_num:4d} "
f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00 C"
)
atom_index += 1
return "\n".join(pdb_lines)
def render_3d_structure(
sequence: str,
width: int = 500,
height: int = 400,
iframe_height: int = 420,
*,
enhanced: bool = False,
spin: bool = False,
) -> bool:
"""
Render py3Dmol view: gray stick backbone + colored spheres per residue (CA-only PDB).
When enhanced=True: smoother helix path, slightly larger spheres, more labels.
When spin=True: enable viewer spin (off by default).
Not a real folded structure — helix-like CA trace only.
"""
import streamlit.components.v1 as components
clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
if not clean:
return False
if len(clean) > MAX_3D_SEQUENCE_LENGTH:
return False
try:
import py3Dmol # type: ignore
except Exception:
return False
try:
pdb_data = generate_helix_pdb(clean, smooth=enhanced)
view = py3Dmol.view(width=width, height=height)
view.addModel(pdb_data, "pdb")
try:
view.setBackgroundColor("#0f0f12" if enhanced else "#1e1e1e")
except Exception:
pass
add_style = getattr(view, "addStyle", None)
stick_radius = 0.14 if enhanced else 0.12
sphere_radius = 0.48 if enhanced else 0.42
try:
view.setStyle({}, {"stick": {"radius": stick_radius, "color": "#9e9e9e"}})
except Exception:
pass
for i, aa in enumerate(clean):
color = get_residue_color(aa)
sel = {"resi": i + 1}
sphere_style = {"sphere": {"radius": sphere_radius, "color": color}}
if callable(add_style):
add_style(sel, sphere_style)
else:
view.setStyle(sel, sphere_style)
if enhanced:
try:
if callable(add_style):
add_style({}, {"cartoon": {"color": "#dddddd", "opacity": 0.25}})
except Exception:
pass
max_labels = 60 if enhanced else 40
label_every = max(1, (len(clean) + max_labels - 1) // max_labels)
fs = 10 if enhanced else 9
for i, aa in enumerate(clean):
if i % label_every != 0:
continue
try:
view.addLabel(
aa,
{
"position": {"resi": i + 1, "atom": "CA"},
"backgroundColor": "#1a1a1a",
"fontColor": "#ffffff",
"fontSize": fs,
},
)
except Exception:
pass
view.zoomTo()
if spin:
try:
view.spin(True)
except Exception:
try:
sp = getattr(view, "spin", None)
if callable(sp):
sp()
except Exception:
pass
if hasattr(view, "_make_html"):
html = view._make_html()
else:
html = view.write()
components.html(html, height=iframe_height)
return True
except Exception:
return False