File size: 15,362 Bytes
e118453
 
 
 
 
 
 
69969dc
e118453
69969dc
bd4c024
 
 
e118453
69969dc
 
e118453
 
 
 
 
69969dc
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e118453
b74b496
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e118453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
69969dc
 
 
e118453
69969dc
e118453
69969dc
e118453
 
 
 
 
 
 
b74b496
 
e118453
 
 
b74b496
e118453
b74b496
e118453
b74b496
e118453
 
 
 
bd4c024
ea9947a
bd4c024
 
ea9947a
 
 
 
 
bd4c024
 
 
 
 
 
 
 
 
 
 
 
4da8903
cccf8bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4da8903
bd4c024
 
 
ea9947a
 
bd4c024
 
4da8903
bd4c024
 
 
 
4da8903
bd4c024
ea9947a
bd4c024
 
 
ea9947a
4da8903
bd4c024
 
ea9947a
 
 
 
 
 
 
 
 
bd4c024
ea9947a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4da8903
ea9947a
 
bd4c024
 
 
4da8903
ea9947a
bd4c024
ea9947a
bd4c024
ea9947a
bd4c024
ea9947a
bd4c024
 
 
ea9947a
 
 
 
 
 
bd4c024
 
 
e118453
ea9947a
e118453
 
 
 
ea9947a
e118453
ea9947a
e118453
ea9947a
 
e118453
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bd4c024
 
 
 
 
e118453
 
 
bd4c024
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e118453
 
bd4c024
e118453
 
 
 
 
 
 
 
 
b74b496
 
 
 
 
bd4c024
 
4da8903
b74b496
e118453
4020906
4da8903
 
bd4c024
e118453
 
 
 
 
 
b74b496
 
e118453
 
 
 
 
 
bd4c024
e118453
 
b74b496
 
bd4c024
b74b496
 
 
4020906
 
bd4c024
 
4020906
bd4c024
4020906
 
b74b496
e118453
 
b74b496
bd4c024
b74b496
4020906
b74b496
4020906
b74b496
bd4c024
 
 
 
 
 
 
 
4020906
bd4c024
b74b496
4020906
 
b74b496
 
 
 
 
bd4c024
4020906
bd4c024
b74b496
 
 
 
 
e118453
b74b496
4da8903
bd4c024
 
 
 
 
 
 
 
 
 
e118453
 
 
 
b74b496
e118453
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
"""
Optional peptide UI helpers: 3D approximation (py3Dmol), known-AMP similarity, residue highlighting.

Does not modify model loading or prediction logic.
"""
from __future__ import annotations

import csv
import math
import pathlib
from typing import List, Optional, Tuple, Any

import numpy as np

# Fallback if `Data/ampData.csv` is missing (e.g. local dev without Data/).
_FALLBACK_KNOWN_AMPS: Tuple[str, ...] = (
    "KWKLFKKIGAVLKVL",
    "GIGKFLHSAKKFGKAFVGEIMNS",
    "LLGDFFRKSKEKIGKEFKRIVQRIKDFLRNLV",
    "KLFKKILKYL",
    "FLPLLAGLAANFLPKIFCKITRKC",
)


def _amp_data_csv_path() -> pathlib.Path:
    # StreamlitApp/utils/peptide_extras.py -> repo root is parents[2]
    return pathlib.Path(__file__).resolve().parents[2] / "Data" / "ampData.csv"


def _load_known_amps_from_csv() -> List[str]:
    """
    Load unique sequences labeled as AMP (label == 1) from Data/ampData.csv.
    Sequences are uppercased for consistent similarity matching.
    """
    path = _amp_data_csv_path()
    if not path.exists():
        return list(_FALLBACK_KNOWN_AMPS)

    seen: set[str] = set()
    amps: List[str] = []
    try:
        with path.open(newline="", encoding="utf-8") as f:
            reader = csv.DictReader(f)
            if not reader.fieldnames or "sequence" not in reader.fieldnames:
                return list(_FALLBACK_KNOWN_AMPS)
            for row in reader:
                label = str(row.get("label", "")).strip()
                if label != "1":
                    continue
                raw = (row.get("sequence") or "").strip()
                if not raw:
                    continue
                seq = raw.upper()
                if seq in seen:
                    continue
                seen.add(seq)
                amps.append(seq)
    except Exception:
        return list(_FALLBACK_KNOWN_AMPS)

    return amps if amps else list(_FALLBACK_KNOWN_AMPS)


# Known AMP pool for similarity search (from ampData.csv label=1, or fallback list).
KNOWN_AMPS: List[str] = _load_known_amps_from_csv()

# py3Dmol viewer: skip very long sequences (labels + sticks scale with length).
MAX_3D_SEQUENCE_LENGTH: int = 60

STRUCTURE_3D_LEGEND_MARKDOWN: str = """
**Color legend**
- **Blue:** Positively charged residues (K, R, H)  
- **Red:** Negatively charged residues (D, E)  
- **Green:** Hydrophobic residues (A, V, I, L, M, F, W, Y)  
- **Gray:** Other / polar or unclassified residues  
"""

STRUCTURE_3D_INTERPRETATION_MARKDOWN: str = """
**Structural interpretation (approximation only)**

This is a **simplified helical CA trace** used to visualize how residue chemistry is arranged in 3D space — **not** an experimentally determined fold.

- **Clusters of green** often correspond to membrane-facing / hydrophobic patches.  
- **Blue regions** highlight cationic residues that can promote binding to anionic bacterial surfaces.  
- **Spatial separation** between hydrophobic and charged segments can suggest **amphipathic** character, common among many AMPs.  

Together, these cues help discuss whether a sequence has motifs frequently associated with antimicrobial peptides — **wet-lab validation is still required**.
"""

# One-letter -> three-letter (for minimal PDB lines for py3Dmol).
_ONE_TO_THREE = {
    "A": "ALA",
    "R": "ARG",
    "N": "ASN",
    "D": "ASP",
    "C": "CYS",
    "Q": "GLN",
    "E": "GLU",
    "G": "GLY",
    "H": "HIS",
    "I": "ILE",
    "L": "LEU",
    "K": "LYS",
    "M": "MET",
    "F": "PHE",
    "P": "PRO",
    "S": "SER",
    "T": "THR",
    "W": "TRP",
    "Y": "TYR",
    "V": "VAL",
}


def sequence_similarity(seq1: str, seq2: str) -> float:
    """Position-wise match rate normalized by max length (as specified)."""
    if not seq1 or not seq2:
        return 0.0
    matches = sum(1 for a, b in zip(seq1, seq2) if a == b)
    return matches / max(len(seq1), len(seq2))


def find_most_similar(sequence: str) -> Tuple[Optional[str], float]:
    if not sequence or not KNOWN_AMPS:
        return None, 0.0
    seq = "".join(c for c in sequence.upper() if not c.isspace())
    if not seq:
        return None, 0.0
    best_seq = KNOWN_AMPS[0]
    best_score = sequence_similarity(seq, KNOWN_AMPS[0])
    for amp in KNOWN_AMPS[1:]:
        score = sequence_similarity(seq, amp)
        if score > best_score:
            best_score = score
            best_seq = amp
    return best_seq, best_score


def get_residue_color(aa: str) -> str:
    """Map one-letter residue to a py3Dmol color name (single-letter, uppercased)."""
    ch = aa.upper() if aa else ""
    positive = ["K", "R", "H"]
    negative = ["D", "E"]
    hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
    if ch in positive:
        return "blue"
    if ch in negative:
        return "red"
    if ch in hydrophobic:
        return "green"
    return "gray"


def residue_color_mpl(aa: str) -> str:
    """Matplotlib-compatible hex colors matching `get_residue_color` categories (high-contrast for plots)."""
    cat = get_residue_color(aa)
    return {
        "blue": "#1D4ED8",
        "red": "#DC2626",
        "green": "#16A34A",
        "gray": "#57534E",
    }.get(cat, "#57534E")


HELIX_WHEEL_LEGEND_MARKDOWN: str = """
**Helical wheel readout**
- **Blue wedge:** cationic (K, R, H) — often important for initial membrane association.  
- **Red wedge:** anionic (D, E).  
- **Green wedge:** hydrophobic — often grouped on one face in amphipathic helices (membrane-facing).  
- **Gray:** polar / other — may participate in solubility or hydrogen bonding.  

Residues are placed using a **100° step** per position (common α-helical wheel convention). This is a **2D projection**, not a solved 3D structure.
"""

# Short blurbs for compact UI expanders (Visualize Peptide page)
COMPACT_3D_LEGEND: str = """
**How to read this 3D view**
- **Blue:** positively charged residues (K, R, H)
- **Red:** negatively charged residues (D, E)
- **Green:** hydrophobic residues (A, V, I, L, M, F, W, Y)
- **Gray:** other / polar residues
- Backbone is a **helix-like approximation**, not an experimental structure.
"""
COMPACT_WHEEL_LEGEND: str = """
**How to read this helical wheel**
- **Radial spokes:** residue positions around the helix (100 degrees per residue)
- **Black connectors:** sequence order (`i -> i+1`) across the wheel
- **Colored circles:** residue chemistry classes
- Color mapping matches the 3D view (**blue / red / green / gray**)
"""
COMPACT_MAP_LEGEND: str = """
**How to read this sequence map**
- Uses the same residue color mapping as 3D and helical wheel
- Highlights where charged vs hydrophobic residues cluster along the sequence
- Useful for quick amphipathic pattern checks
"""


def plot_helical_wheel(sequence: str, figsize: Tuple[float, float] = (6.2, 6.2)) -> Any:
    """
    Detailed helical wheel (matplotlib polar): radial spokes, sequence-order connectors (i→i+1),
    and colored residue disks — same chemistry classes as 3D / HTML maps (high-contrast colors).
    """
    import matplotlib.pyplot as plt
    from matplotlib import patheffects as pe

    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
    n = len(clean)
    fig, ax = plt.subplots(figsize=figsize, subplot_kw={"projection": "polar"})
    fig.patch.set_facecolor("white")
    if n == 0:
        ax.set_facecolor("#ffffff")
        ax.set_title("Helical wheel (empty sequence)", pad=12)
        return fig

    ax.set_facecolor("#ffffff")

    angles_deg = np.array([i * 100.0 for i in range(n)], dtype=float) % 360.0
    angles_rad = np.deg2rad(angles_deg)
    r_inner, r_ring = 0.06, 0.88
    fs = max(7, min(11, int(220 / max(n, 1))))
    pt_size = float(np.clip(8000.0 / max(n, 1), 130.0, 420.0))

    ax.set_theta_zero_location("N")
    ax.set_theta_direction(-1)

    # Radial spokes (residue positions)
    for i in range(n):
        th = angles_rad[i]
        ax.plot(
            [th, th],
            [r_inner, r_ring],
            color="#1a1a1a",
            linewidth=0.65,
            alpha=0.45,
            zorder=1,
        )

    # Sequence-order connections (straight chords in the plane — classic wheel “star”)
    for i in range(n - 1):
        ax.plot(
            [angles_rad[i], angles_rad[i + 1]],
            [r_ring, r_ring],
            color="#0a0a0a",
            linewidth=1.05,
            solid_capstyle="round",
            zorder=2,
        )

    colors = [residue_color_mpl(aa) for aa in clean]
    ax.scatter(
        angles_rad,
        np.full(n, r_ring),
        s=pt_size,
        c=colors,
        edgecolors="#111111",
        linewidths=1.2,
        zorder=4,
    )

    for i, aa in enumerate(clean):
        t = ax.text(
            angles_rad[i],
            r_ring,
            aa,
            ha="center",
            va="center",
            fontsize=fs,
            color="#0a0a0a",
            fontweight="bold",
            zorder=5,
        )
        t.set_path_effects([pe.withStroke(linewidth=2.2, foreground="white")])

    ax.set_ylim(0, 1.0)
    ax.set_yticklabels([])
    ax.set_xticklabels([])
    ax.grid(False)
    ax.set_title(
        "Helical wheel (α-helix, 100°/residue) — spokes + sequence connectors",
        pad=14,
        fontsize=11,
        color="#111111",
    )
    return fig


def get_residue_style(aa: str) -> str:
    """Inline styles for sequence map — colors aligned with wheel / 3D categories (high contrast)."""
    positive = ["K", "R", "H"]
    negative = ["D", "E"]
    hydrophobic = ["A", "V", "I", "L", "M", "F", "W", "Y"]
    if aa in positive:
        return "background-color: #1D4ED8; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
    if aa in negative:
        return "background-color: #DC2626; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
    if aa in hydrophobic:
        return "background-color: #16A34A; color: #ffffff; padding: 2px 3px; border-radius: 2px;"
    return "background-color: #57534E; color: #ffffff; padding: 2px 3px; border-radius: 2px;"


def build_importance_map_html(sequence: str) -> str:
    """Build HTML for residue importance highlighting (escape non-AA safely)."""
    import html as html_mod

    parts: List[str] = []
    for ch in sequence:
        if ch.isspace():
            continue
        aa = ch.upper()
        style = get_residue_style(aa)
        parts.append(f'<span style="{style}">{html_mod.escape(aa)}</span>')
    return "".join(parts)


def generate_helix_pdb(sequence: str, smooth: bool = False) -> str:
    """
    Generate a minimal PDB string (helix-like CA trace).
    When smooth=True, apply light coordinate smoothing for a softer backbone path.
    """
    pdb_lines: List[str] = []
    atom_index = 1
    clean = "".join(c for c in sequence.upper() if not c.isspace())
    n = len(clean)
    if n == 0:
        return ""

    theta_step = 100.0 * math.pi / 180.0  # ~α-helix angular step on the wheel
    rise = 1.45
    coords: List[Tuple[float, float, float]] = []
    for i in range(n):
        angle = i * theta_step
        r = 5.0 + 0.12 * math.sin(i * 0.4)
        x = math.cos(angle) * r
        y = math.sin(angle) * r
        z = i * rise
        coords.append((x, y, z))

    if smooth and n >= 3:
        xs = np.array([c[0] for c in coords], dtype=float)
        ys = np.array([c[1] for c in coords], dtype=float)
        zs = np.array([c[2] for c in coords], dtype=float)
        k = np.array([0.2, 0.6, 0.2])
        for _ in range(2):
            xs = np.convolve(xs, k, mode="same")
            ys = np.convolve(ys, k, mode="same")
            zs = np.convolve(zs, k, mode="same")
        xs[0], xs[-1] = coords[0][0], coords[-1][0]
        ys[0], ys[-1] = coords[0][1], coords[-1][1]
        zs[0], zs[-1] = coords[0][2], coords[-1][2]
        coords = list(zip(xs.tolist(), ys.tolist(), zs.tolist()))

    for i, aa in enumerate(clean):
        res_name = _ONE_TO_THREE.get(aa, "UNK")
        x, y, z = coords[i]
        res_num = i + 1
        pdb_lines.append(
            f"ATOM  {atom_index:5d}  CA  {res_name:3s} A{res_num:4d}    "
            f"{x:8.3f}{y:8.3f}{z:8.3f}  1.00  0.00           C"
        )
        atom_index += 1
    return "\n".join(pdb_lines)


def render_3d_structure(
    sequence: str,
    width: int = 500,
    height: int = 400,
    iframe_height: int = 420,
    *,
    enhanced: bool = False,
    spin: bool = False,
) -> bool:
    """
    Render py3Dmol view: gray stick backbone + colored spheres per residue (CA-only PDB).
    When enhanced=True: smoother helix path, slightly larger spheres, more labels.
    When spin=True: enable viewer spin (off by default).
    Not a real folded structure — helix-like CA trace only.
    """
    import streamlit.components.v1 as components

    clean = "".join(c for c in (sequence or "").upper() if not c.isspace())
    if not clean:
        return False
    if len(clean) > MAX_3D_SEQUENCE_LENGTH:
        return False
    try:
        import py3Dmol  # type: ignore
    except Exception:
        return False

    try:
        pdb_data = generate_helix_pdb(clean, smooth=enhanced)
        view = py3Dmol.view(width=width, height=height)
        view.addModel(pdb_data, "pdb")

        try:
            view.setBackgroundColor("#0f0f12" if enhanced else "#1e1e1e")
        except Exception:
            pass

        add_style = getattr(view, "addStyle", None)

        stick_radius = 0.14 if enhanced else 0.12
        sphere_radius = 0.48 if enhanced else 0.42
        try:
            view.setStyle({}, {"stick": {"radius": stick_radius, "color": "#9e9e9e"}})
        except Exception:
            pass

        for i, aa in enumerate(clean):
            color = get_residue_color(aa)
            sel = {"resi": i + 1}
            sphere_style = {"sphere": {"radius": sphere_radius, "color": color}}
            if callable(add_style):
                add_style(sel, sphere_style)
            else:
                view.setStyle(sel, sphere_style)

        if enhanced:
            try:
                if callable(add_style):
                    add_style({}, {"cartoon": {"color": "#dddddd", "opacity": 0.25}})
            except Exception:
                pass

        max_labels = 60 if enhanced else 40
        label_every = max(1, (len(clean) + max_labels - 1) // max_labels)
        fs = 10 if enhanced else 9
        for i, aa in enumerate(clean):
            if i % label_every != 0:
                continue
            try:
                view.addLabel(
                    aa,
                    {
                        "position": {"resi": i + 1, "atom": "CA"},
                        "backgroundColor": "#1a1a1a",
                        "fontColor": "#ffffff",
                        "fontSize": fs,
                    },
                )
            except Exception:
                pass

        view.zoomTo()

        if spin:
            try:
                view.spin(True)
            except Exception:
                try:
                    sp = getattr(view, "spin", None)
                    if callable(sp):
                        sp()
                except Exception:
                    pass

        if hasattr(view, "_make_html"):
            html = view._make_html()
        else:
            html = view.write()
        components.html(html, height=iframe_height)
        return True
    except Exception:
        return False