""" Optional peptide UI helpers: 3D approximation (py3Dmol), known-AMP similarity, residue highlighting. Does not modify model loading or prediction logic. """ from __future__ import annotations import csv import math import pathlib from typing import List, Optional, Tuple, Any import numpy as np # Fallback if `Data/ampData.csv` is missing (e.g. local dev without Data/). _FALLBACK_KNOWN_AMPS: Tuple[str, ...] = ( "KWKLFKKIGAVLKVL", "GIGKFLHSAKKFGKAFVGEIMNS", "LLGDFFRKSKEKIGKEFKRIVQRIKDFLRNLV", "KLFKKILKYL", "FLPLLAGLAANFLPKIFCKITRKC", ) def _amp_data_csv_path() -> pathlib.Path: # StreamlitApp/utils/peptide_extras.py -> repo root is parents[2] return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv" def _load_known_amps_from_csv() -> List[str]: """ Load unique sequences labeled as AMP (label == 1) from Data/ampData.csv. Sequences are uppercased for consistent similarity matching. """ path = _amp_data_csv_path() if not path.exists(): return list(_FALLBACK_KNOWN_AMPS) seen: set[str] = set() amps: List[str] = [] try: with path.open(newline="", encoding="utf-8") as f: reader = csv.DictReader(f) if not reader.fieldnames or "sequence" not in reader.fieldnames: return list(_FALLBACK_KNOWN_AMPS) for row in reader: label = str(row.get("label", "")).strip() if label != "1": continue raw = (row.get("sequence") or "").strip() if not raw: continue seq = raw.upper() if seq in seen: continue seen.add(seq) amps.append(seq) except Exception: return list(_FALLBACK_KNOWN_AMPS) return amps if amps else list(_FALLBACK_KNOWN_AMPS) # Known AMP pool for similarity search (from ampData.csv label=1, or fallback list). KNOWN_AMPS: List[str] = _load_known_amps_from_csv() # py3Dmol viewer: skip very long sequences (labels + sticks scale with length). MAX_3D_SEQUENCE_LENGTH: int = 60 STRUCTURE_3D_LEGEND_MARKDOWN: str = """ **Color legend** - **Blue:** Positively charged residues (K, R, H) - **Red:** Negatively charged residues (D, E) - **Green:** Hydrophobic residues (A, V, I, L, M, F, W, Y) - **Gray:** Other / polar or unclassified residues """ STRUCTURE_3D_INTERPRETATION_MARKDOWN: str = """ **Structural interpretation (approximation only)** This is a **simplified helical CA trace** used to visualize how residue chemistry is arranged in 3D space — **not** an experimentally determined fold. - **Clusters of green** often correspond to membrane-facing / hydrophobic patches. - **Blue regions** highlight cationic residues that can promote binding to anionic bacterial surfaces. - **Spatial separation** between hydrophobic and charged segments can suggest **amphipathic** character, common among many AMPs. Together, these cues help discuss whether a sequence has motifs frequently associated with antimicrobial peptides — **wet-lab validation is still required**. """ # One-letter -> three-letter (for minimal PDB lines for py3Dmol). _ONE_TO_THREE = { "A": "ALA", "R": "ARG", "N": "ASN", "D": "ASP", "C": "CYS", "Q": "GLN", "E": "GLU", "G": "GLY", "H": "HIS", "I": "ILE", "L": "LEU", "K": "LYS", "M": "MET", "F": "PHE", "P": "PRO", "S": "SER", "T": "THR", "W": "TRP", "Y": "TYR", "V": "VAL", } def sequence_similarity(seq1: str, seq2: str) -> float: """Position-wise match rate normalized by max length (as specified).""" if not seq1 or not seq2: return 0.0 matches = sum(1 for a, b in zip(seq1, seq2) if a == b) return matches / max(len(seq1), len(seq2)) def find_most_similar(sequence: str) -> Tuple[Optional[str], float]: if not sequence or not KNOWN_AMPS: return None, 0.0 seq = "".join(c for c in sequence.upper() if not c.isspace()) if not seq: return None, 0.0 best_seq = KNOWN_AMPS[0] best_score = sequence_similarity(seq, KNOWN_AMPS[0]) for amp in KNOWN_AMPS[1:]: score = sequence_similarity(seq, amp) if score > best_score: best_score = score best_seq = amp return best_seq, best_score def get_residue_color(aa: str) -> str: """Map one-letter residue to a py3Dmol color name (single-letter, uppercased).""" ch = aa.upper() if aa else "" positive = ["K", "R", "H"] negative = ["D", "E"] hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"] if ch in positive: return "blue" if ch in negative: return "red" if ch in hydrophobic: return "green" return "gray" def residue_color_mpl(aa: str) -> str: """Matplotlib-compatible hex colors matching `get_residue_color` categories (high-contrast for plots).""" cat = get_residue_color(aa) return { "blue": "#1D4ED8", "red": "#DC2626", "green": "#16A34A", "gray": "#57534E", }.get(cat, "#57534E") HELIX_WHEEL_LEGEND_MARKDOWN: str = """ **Helical wheel readout** - **Blue wedge:** cationic (K, R, H) — often important for initial membrane association. - **Red wedge:** anionic (D, E). - **Green wedge:** hydrophobic — often grouped on one face in amphipathic helices (membrane-facing). - **Gray:** polar / other — may participate in solubility or hydrogen bonding. Residues are placed using a **100° step** per position (common α-helical wheel convention). This is a **2D projection**, not a solved 3D structure. """ # Short blurbs for compact UI expanders (Visualize Peptide page) COMPACT_3D_LEGEND: str = """ **How to read this 3D view** - **Blue:** positively charged residues (K, R, H) - **Red:** negatively charged residues (D, E) - **Green:** hydrophobic residues (A, V, I, L, M, F, W, Y) - **Gray:** other / polar residues - Backbone is a **helix-like approximation**, not an experimental structure. """ COMPACT_WHEEL_LEGEND: str = """ **How to read this helical wheel** - **Radial spokes:** residue positions around the helix (100 degrees per residue) - **Black connectors:** sequence order (`i -> i+1`) across the wheel - **Colored circles:** residue chemistry classes - Color mapping matches the 3D view (**blue / red / green / gray**) """ COMPACT_MAP_LEGEND: str = """ **How to read this sequence map** - Uses the same residue color mapping as 3D and helical wheel - Highlights where charged vs hydrophobic residues cluster along the sequence - Useful for quick amphipathic pattern checks """ def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any: """ Detailed helical wheel (matplotlib polar): radial spokes, sequence-order connectors (i→i+1), and colored residue disks — same chemistry classes as 3D / HTML maps (high-contrast colors). """ import matplotlib.pyplot as plt from matplotlib import patheffects as pe clean = "".join(c for c in (sequence or "").upper() if not c.isspace()) n = len(clean) fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"}) fig.patch.set_facecolor("white") if n == 0: ax.set_facecolor("#ffffff") ax.set_title("Helical wheel (empty sequence)", pad=12) return fig ax.set_facecolor("#ffffff") angles_deg = np.array([i * 100.0 for i in range(n)], dtype=float) % 360.0 angles_rad = np.deg2rad(angles_deg) r_inner, r_ring = 0.06, 0.88 fs = max(7, min(11, int(220 / max(n, 1)))) pt_size = float(np.clip(8000.0 / max(n, 1), 130.0, 420.0)) ax.set_theta_zero_location("N") ax.set_theta_direction(-1) # Radial spokes (residue positions) for i in range(n): th = angles_rad[i] ax.plot( [th, th], [r_inner, r_ring], color="#1a1a1a", linewidth=0.65, alpha=0.45, zorder=1, ) # Sequence-order connections (straight chords in the plane — classic wheel “star”) for i in range(n - 1): ax.plot( [angles_rad[i], angles_rad[i + 1]], [r_ring, r_ring], color="#0a0a0a", linewidth=1.05, solid_capstyle="round", zorder=2, ) colors = [residue_color_mpl(aa) for aa in clean] ax.scatter( angles_rad, np.full(n, r_ring), s=pt_size, c=colors, edgecolors="#111111", linewidths=1.2, zorder=4, ) for i, aa in enumerate(clean): t = ax.text( angles_rad[i], r_ring, aa, ha="center", va="center", fontsize=fs, color="#0a0a0a", fontweight="bold", zorder=5, ) t.set_path_effects([pe.withStroke(linewidth=2.2, foreground="white")]) ax.set_ylim(0, 1.0) ax.set_yticklabels([]) ax.set_xticklabels([]) ax.grid(False) ax.set_title( "Helical wheel (α-helix, 100°/residue) — spokes + sequence connectors", pad=14, fontsize=11, color="#111111", ) return fig def get_residue_style(aa: str) -> str: """Inline styles for sequence map — colors aligned with wheel / 3D categories (high contrast).""" positive = ["K", "R", "H"] negative = ["D", "E"] hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"] if aa in positive: return "background-color: #1D4ED8; color: #ffffff; padding: 2px 3px; border-radius: 2px;" if aa in negative: return "background-color: #DC2626; color: #ffffff; padding: 2px 3px; border-radius: 2px;" if aa in hydrophobic: return "background-color: #16A34A; color: #ffffff; padding: 2px 3px; border-radius: 2px;" return "background-color: #57534E; color: #ffffff; padding: 2px 3px; border-radius: 2px;" def build_importance_map_html(sequence: str) -> str: """Build HTML for residue importance highlighting (escape non-AA safely).""" import html as html_mod parts: List[str] = [] for ch in sequence: if ch.isspace(): continue aa = ch.upper() style = get_residue_style(aa) parts.append(f'{html_mod.escape(aa)}') return "".join(parts) def generate_helix_pdb(sequence: str, smooth: bool = False) -> str: """ Generate a minimal PDB string (helix-like CA trace). When smooth=True, apply light coordinate smoothing for a softer backbone path. """ pdb_lines: List[str] = [] atom_index = 1 clean = "".join(c for c in sequence.upper() if not c.isspace()) n = len(clean) if n == 0: return "" theta_step = 100.0 * math.pi / 180.0 # ~α-helix angular step on the wheel rise = 1.45 coords: List[Tuple[float, float, float]] = [] for i in range(n): angle = i * theta_step r = 5.0 + 0.12 * math.sin(i * 0.4) x = math.cos(angle) * r y = math.sin(angle) * r z = i * rise coords.append((x, y, z)) if smooth and n >= 3: xs = np.array([c[0] for c in coords], dtype=float) ys = np.array([c[1] for c in coords], dtype=float) zs = np.array([c[2] for c in coords], dtype=float) k = np.array([0.2, 0.6, 0.2]) for _ in range(2): xs = np.convolve(xs, k, mode="same") ys = np.convolve(ys, k, mode="same") zs = np.convolve(zs, k, mode="same") xs[0], xs[-1] = coords[0][0], coords[-1][0] ys[0], ys[-1] = coords[0][1], coords[-1][1] zs[0], zs[-1] = coords[0][2], coords[-1][2] coords = list(zip(xs.tolist(), ys.tolist(), zs.tolist())) for i, aa in enumerate(clean): res_name = _ONE_TO_THREE.get(aa, "UNK") x, y, z = coords[i] res_num = i + 1 pdb_lines.append( f"ATOM {atom_index:5d} CA {res_name:3s} A{res_num:4d} " f"{x:8.3f}{y:8.3f}{z:8.3f} 1.00 0.00 C" ) atom_index += 1 return "\n".join(pdb_lines) def render_3d_structure( sequence: str, width: int = 500, height: int = 400, iframe_height: int = 420, *, enhanced: bool = False, spin: bool = False, ) -> bool: """ Render py3Dmol view: gray stick backbone + colored spheres per residue (CA-only PDB). When enhanced=True: smoother helix path, slightly larger spheres, more labels. When spin=True: enable viewer spin (off by default). Not a real folded structure — helix-like CA trace only. """ import streamlit.components.v1 as components clean = "".join(c for c in (sequence or "").upper() if not c.isspace()) if not clean: return False if len(clean) > MAX_3D_SEQUENCE_LENGTH: return False try: import py3Dmol # type: ignore except Exception: return False try: pdb_data = generate_helix_pdb(clean, smooth=enhanced) view = py3Dmol.view(width=width, height=height) view.addModel(pdb_data, "pdb") try: view.setBackgroundColor("#0f0f12" if enhanced else "#1e1e1e") except Exception: pass add_style = getattr(view, "addStyle", None) stick_radius = 0.14 if enhanced else 0.12 sphere_radius = 0.48 if enhanced else 0.42 try: view.setStyle({}, {"stick": {"radius": stick_radius, "color": "#9e9e9e"}}) except Exception: pass for i, aa in enumerate(clean): color = get_residue_color(aa) sel = {"resi": i + 1} sphere_style = {"sphere": {"radius": sphere_radius, "color": color}} if callable(add_style): add_style(sel, sphere_style) else: view.setStyle(sel, sphere_style) if enhanced: try: if callable(add_style): add_style({}, {"cartoon": {"color": "#dddddd", "opacity": 0.25}}) except Exception: pass max_labels = 60 if enhanced else 40 label_every = max(1, (len(clean) + max_labels - 1) // max_labels) fs = 10 if enhanced else 9 for i, aa in enumerate(clean): if i % label_every != 0: continue try: view.addLabel( aa, { "position": {"resi": i + 1, "atom": "CA"}, "backgroundColor": "#1a1a1a", "fontColor": "#ffffff", "fontSize": fs, }, ) except Exception: pass view.zoomTo() if spin: try: view.spin(True) except Exception: try: sp = getattr(view, "spin", None) if callable(sp): sp() except Exception: pass if hasattr(view, "_make_html"): html = view._make_html() else: html = view.write() components.html(html, height=iframe_height) return True except Exception: return False