"""Post-generation safety: enforce citations + override banned chemicals. Any answer that recommends a banned chemical or contradicts a HARD_OVERRIDE gets rewritten before reaching the user. Cheap, deterministic, no LLM call. """ from __future__ import annotations import re from typing import List, Tuple from .prompt import BANNED_CHEMICALS, HARD_OVERRIDES, REQUIRES_CITATION _CITATION_RE = re.compile(r"\[(\d+)\]") def has_citations(answer: str) -> bool: return bool(_CITATION_RE.search(answer)) _NEGATION_WINDOW = 30 # fallback window for inline negations _SENTENCE_SPLIT = re.compile(r"[.!?\n]+") _NEGATIONS = ( "do not use", "don't use", "never use", "avoid", "banned", "no use of", "not recommended", "instead of", "rather than", "not ", "non-", "no ", "without ", ) def _is_negated(answer_lower: str, chemical: str) -> bool: """True if every occurrence of `chemical` sits inside a negated sentence or is preceded (within 30 chars) by a negation phrase. Why a sentence scan, not just a 30-char window: list-style safety advice such as "Banned chemicals (Endosulfan, Monocrotophos, phorate, phosphamidon)" keeps the negation cue ("banned") far from the 4th item, so the small window misses it. Splitting on sentence terminators catches the whole list. """ idx = answer_lower.find(chemical) while idx != -1: # 1. Whole-sentence scan — find sentence boundaries around `idx`. sent_start = max((m.end() for m in _SENTENCE_SPLIT.finditer(answer_lower, 0, idx)), default=0) next_term = _SENTENCE_SPLIT.search(answer_lower, idx) sent_end = next_term.start() if next_term else len(answer_lower) sentence = answer_lower[sent_start:sent_end] # 2. Inline window — covers cross-sentence cues like "Use X, not Y". window = answer_lower[max(0, idx - _NEGATION_WINDOW):idx] if any(neg in sentence for neg in _NEGATIONS) or any(neg in window for neg in _NEGATIONS): idx = answer_lower.find(chemical, idx + len(chemical)) continue return False return True # all occurrences are negated def banned_chemical_check(answer: str) -> List[str]: """Return list of banned chemicals mentioned positively (not negated).""" a = answer.lower() return [b for b in BANNED_CHEMICALS if b in a and not _is_negated(a, b)] def hard_override_violations(answer: str) -> List[str]: """Return replacement notes if the answer hit any HARD_OVERRIDE rule. Only fires when both trigger and banned chemical are present AND the chemical mention is not negated (e.g. "do NOT use Imidacloprid"). """ a = answer.lower() out = [] for trigger, banned, replacement in HARD_OVERRIDES: if trigger in a and banned in a and not _is_negated(a, banned): out.append(replacement) return out def review(answer: str, *, problem_type: str) -> Tuple[str, List[str]]: """Apply post-generation review. Returns (possibly-rewritten answer, list of warnings shown to the user). Never raises — best-effort filter. """ warnings: List[str] = [] # 1. Banned chemicals — strikethrough + warn. banned = banned_chemical_check(answer) if banned: warnings.append( f"⚠️ Answer mentioned banned chemical(s): {', '.join(banned)}. " "These have been struck through; do NOT use them." ) for b in banned: answer = re.sub(re.escape(b), f"~~{b}~~", answer, flags=re.IGNORECASE) # 2. Hard overrides — prepend the corrected advice. overrides = hard_override_violations(answer) if overrides: for repl in overrides: answer = f"❗ CORRECTION: {repl}\n\n---\n\n{answer}" warnings.append("Answer was overridden by a safety rule.") # 3. Citation requirement. if problem_type in REQUIRES_CITATION and not has_citations(answer): warnings.append( "Answer did not cite sources from the retrieved Q&A. " "Treat as a general suggestion, not a verified protocol." ) return answer, warnings