Spaces:

minhtudragon
/

headroom

Running

App Files Files Community

chopratejas commited on Apr 25

Commit

18d80ca

2 Parent(s): df66610 42196be

Merge pull request #262 from gglucass/fix/traffic-learner-error-recovery

Browse files

Files changed (3) hide show

CHANGELOG.md +17 -0
headroom/memory/traffic_learner.py +247 -16
tests/test_memory/test_traffic_learner.py +772 -4

CHANGELOG.md CHANGED Viewed

@@ -8,6 +8,23 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### Fixed
 - **`headroom unwrap codex` now actually undoes `headroom wrap codex`** —
   previously there was no `unwrap codex` subcommand at all, so the injected
   `model_provider = "headroom"` / `[model_providers.headroom]` block stayed

 ## [Unreleased]
 ### Fixed
+- **`Learned: error recovery` section in MEMORY.md no longer bloats with
+  stale or contradictory entries.** The dedup key for error-recovery
+  patterns was the literal rendered bullet text, so near-duplicate
+  recoveries (same intent, different `| tail -N` count, same error path
+  guessed against different successors) each created a new row. There was
+  also no TTL or re-validation, so wrong-today entries lingered. Fixed by:
+  (1) normalizing the hash on recovery intent — Read recoveries key on
+  `(basename(error_path), basename(success_path))`; Bash recoveries strip
+  volatile suffixes and hash only the primary command before the first
+  `|`/`&&`; (2) stamping `first_seen_at` / `last_seen_at` on every pattern
+  and bumping them in `_bump_persisted_evidence` via `json_set`; (3)
+  refining at render time — drop rows not re-observed in 21 days,
+  re-validate Read success paths against the filesystem, collapse
+  same-error_path-with-multiple-targets into one "use Glob/Grep first"
+  bullet, rank by `evidence_count * 0.5 ** (days/5)`, cap the section at
+  15. Other `Learned: …` categories (environment, preference,
+  architecture) are untouched.
 - **`headroom unwrap codex` now actually undoes `headroom wrap codex`** —
   previously there was no `unwrap codex` subcommand at all, so the injected
   `model_provider = "headroom"` / `[model_providers.headroom]` block stayed

headroom/memory/traffic_learner.py CHANGED Viewed

@@ -22,10 +22,12 @@ import asyncio
 import hashlib
 import json
 import logging
 import re
 import sqlite3
 import time
 from dataclasses import dataclass, field
 from enum import Enum
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
@@ -45,6 +47,20 @@ FLUSH_DEBOUNCE_SECONDS = 10.0
 # Matches POSIX paths (starts with /) and common Windows drive paths.
 _ABS_PATH_RE = re.compile(r"(?:[A-Za-z]:[\\/]|/)[\w./\\\-]+")
 # =============================================================================
 # Pattern Categories
@@ -87,10 +103,60 @@ class ExtractedPattern:
     entity_refs: list[str] = field(default_factory=list)
     metadata: dict[str, Any] = field(default_factory=dict)
     content_hash: str = ""
     def __post_init__(self) -> None:
         if not self.content_hash:
-            self.content_hash = hashlib.sha256(self.content.encode()).hexdigest()[:16]
 # =============================================================================
@@ -389,6 +455,7 @@ class TrafficLearner:
         Evidence counts are summed across duplicates.
         """
         by_hash: dict[str, ExtractedPattern] = {}
         # Persisted rows from memory.db
         db_path = _resolve_backend_db_path(self._backend)
@@ -404,11 +471,15 @@ class TrafficLearner:
                 else:
                     by_hash[p.content_hash] = p
-        # In-memory accumulator (patterns not yet persisted)
         for pattern, count in self._pattern_counts.values():
             h = pattern.content_hash
             if h in by_hash:
-                by_hash[h].evidence_count += count
             else:
                 by_hash[h] = ExtractedPattern(
                     category=pattern.category,
@@ -418,6 +489,8 @@ class TrafficLearner:
                     entity_refs=list(pattern.entity_refs),
                     metadata=dict(pattern.metadata),
                     content_hash=pattern.content_hash,
                 )
         return list(by_hash.values())
@@ -578,7 +651,12 @@ class TrafficLearner:
                     content=content,
                     importance=0.7,
                     entity_refs=[success_path],
-                    metadata={"error_category": error_cat},
                 )
         elif tool in ("Grep", "Glob"):
             error_pattern = error_entry["input"].get("pattern", "")
@@ -635,7 +713,12 @@ class TrafficLearner:
             content=content,
             importance=importance,
             entity_refs=entities,
-            metadata={"error_category": error_cat, "failed_cmd": failed_short},
         )
     def _extract_environment(self, entry: dict[str, Any]) -> list[ExtractedPattern]:
@@ -762,6 +845,7 @@ class TrafficLearner:
                 if self._backend is None:
                     continue
                 memory = await self._backend.save_memory(
                     content=pattern.content,
                     user_id=self._user_id,
@@ -770,6 +854,8 @@ class TrafficLearner:
                         "source": "traffic_learner",
                         "category": pattern.category.value,
                         "evidence_count": pattern.evidence_count,
                         **pattern.metadata,
                     },
                 )
@@ -796,7 +882,7 @@ class TrafficLearner:
         if db_path is None or not db_path.exists():
             return
-        def _read() -> list[tuple[str, str]]:
             uri = f"file:{db_path}?mode=ro"
             try:
                 conn = sqlite3.connect(uri, uri=True)
@@ -804,7 +890,7 @@ class TrafficLearner:
                 return []
             try:
                 rows = conn.execute(
-                    "SELECT id, content FROM memories "
                     "WHERE json_extract(metadata, '$.source') = 'traffic_learner'"
                 ).fetchall()
             except sqlite3.DatabaseError:
@@ -814,7 +900,7 @@ class TrafficLearner:
                     conn.close()
                 except Exception:
                     pass
-            return [(row[0], row[1] or "") for row in rows]
         try:
             rows = await asyncio.to_thread(_read)
@@ -822,10 +908,24 @@ class TrafficLearner:
             logger.debug("Traffic learner hydrate failed: %s", e)
             return
-        for memory_id, content in rows:
             if not content:
                 continue
-            h = hashlib.sha256(content.encode()).hexdigest()[:16]
             self._saved_hashes.add(h)
             # If multiple rows share the same content (legacy duplicates),
             # last-wins — we only need one id to target the bump.
@@ -837,15 +937,18 @@ class TrafficLearner:
         if db_path is None or not db_path.exists():
             return
         def _bump() -> None:
             conn = sqlite3.connect(str(db_path))
             try:
                 conn.execute(
                     "UPDATE memories SET metadata = json_set("
                     "metadata, '$.evidence_count', "
-                    "COALESCE(json_extract(metadata, '$.evidence_count'), 0) + 1"
                     ") WHERE id = ?",
-                    (memory_id,),
                 )
                 conn.commit()
             finally:
@@ -1007,7 +1110,7 @@ def _load_persisted_patterns_from_sqlite(db_path: Path) -> list[ExtractedPattern
     try:
         conn.row_factory = sqlite3.Row
         rows = conn.execute(
-            "SELECT content, metadata, entity_refs, importance "
             "FROM memories "
             "WHERE json_extract(metadata, '$.source') = 'traffic_learner'"
         ).fetchall()
@@ -1045,12 +1148,24 @@ def _load_persisted_patterns_from_sqlite(db_path: Path) -> list[ExtractedPattern
         except (TypeError, ValueError):
             importance = 0.5
-        h = hashlib.sha256(content.encode()).hexdigest()[:16]
         if h in patterns:
             existing = patterns[h]
             existing.evidence_count += evidence
             if importance > existing.importance:
                 existing.importance = importance
         else:
             patterns[h] = ExtractedPattern(
                 category=category,
@@ -1060,11 +1175,26 @@ def _load_persisted_patterns_from_sqlite(db_path: Path) -> list[ExtractedPattern
                 entity_refs=list(entity_refs),
                 metadata=meta,
                 content_hash=h,
             )
     return list(patterns.values())
 def _patterns_to_recommendations(patterns: list[ExtractedPattern]) -> list:
     """Group patterns by category into one Recommendation per category.
@@ -1086,8 +1216,13 @@ def _patterns_to_recommendations(patterns: list[ExtractedPattern]) -> list:
             if target_str == "context_file"
             else RecommendationTarget.MEMORY_FILE
         )
-        # Sort by evidence_count desc so the most-supported rules appear first.
-        items.sort(key=lambda p: p.evidence_count, reverse=True)
         bullets = "\n".join(f"- {p.content}" for p in items)
         recs.append(
             Recommendation(
@@ -1099,3 +1234,99 @@ def _patterns_to_recommendations(patterns: list[ExtractedPattern]) -> list:
             )
         )
     return recs

 import hashlib
 import json
 import logging
+import os
 import re
 import sqlite3
 import time
 from dataclasses import dataclass, field
+from datetime import datetime, timezone
 from enum import Enum
 from pathlib import Path
 from typing import TYPE_CHECKING, Any
 # Matches POSIX paths (starts with /) and common Windows drive paths.
 _ABS_PATH_RE = re.compile(r"(?:[A-Za-z]:[\\/]|/)[\w./\\\-]+")
+# Error-recovery refinement: the Learned: error recovery section is capped,
+# decayed, and re-validated at render time. Other categories are untouched.
+_ERROR_RECOVERY_SECTION_CAP = 15
+_ERROR_RECOVERY_HALF_LIFE_DAYS = 5.0
+_ERROR_RECOVERY_HARD_FLOOR_DAYS = 21
+# Suffixes that vary between otherwise-identical Bash recoveries. Stripping
+# them before hashing collapses near-duplicates.
+_BASH_VOLATILE_SUFFIX_RE = re.compile(
+    r"(?:\s*\|\s*(?:head|tail)\s+-n?\s*\d+"
+    r"|\s+-A\s*\d+|\s+-B\s*\d+|\s+-C\s*\d+"
+    r"|\s+2>&1|\s+2>/dev/null)+\s*$"
+)
 # =============================================================================
 # Pattern Categories
     entity_refs: list[str] = field(default_factory=list)
     metadata: dict[str, Any] = field(default_factory=dict)
     content_hash: str = ""
+    first_seen_at: datetime | None = None
+    last_seen_at: datetime | None = None
     def __post_init__(self) -> None:
         if not self.content_hash:
+            key = _normalize_hash_key(self.category, self.content, self.metadata)
+            self.content_hash = hashlib.sha256(key.encode()).hexdigest()[:16]
+def _normalize_hash_key(
+    category: PatternCategory,
+    content: str,
+    metadata: dict[str, Any],
+) -> str:
+    """Build the string that feeds the content hash.
+    Error-recovery rows are collapsed on recovery intent, not literal text:
+    trivial invocation differences (tail counts, pipe suffixes, full paths
+    that share a basename) hash to the same key. Other categories hash the
+    raw content for backwards compatibility.
+    """
+    if category is not PatternCategory.ERROR_RECOVERY:
+        return content
+    tool = metadata.get("tool")
+    if tool == "Read":
+        error_path = metadata.get("error_path", "")
+        success_path = metadata.get("success_path", "")
+        return (
+            f"error_recovery|Read|{os.path.basename(error_path)}|{os.path.basename(success_path)}"
+        )
+    if tool == "Bash":
+        failed = metadata.get("failed_cmd", "")
+        success = metadata.get("success_cmd", "")
+        return (
+            f"error_recovery|Bash|"
+            f"{_normalize_bash_for_hash(failed)}|{_normalize_bash_for_hash(success)}"
+        )
+    return content
+def _normalize_bash_for_hash(cmd: str) -> str:
+    """Strip volatile suffixes and truncate at the first pipe/chain boundary."""
+    if not cmd:
+        return ""
+    # Drop paging, line-context flags, and redirections that vary between runs.
+    trimmed = _BASH_VOLATILE_SUFFIX_RE.sub("", cmd).strip()
+    # Cut at the first pipe or && so we hash the primary command, not the tail.
+    for sep in (" | ", " && "):
+        idx = trimmed.find(sep)
+        if idx != -1:
+            trimmed = trimmed[:idx].rstrip()
+            break
+    return trimmed
 # =============================================================================
         Evidence counts are summed across duplicates.
         """
         by_hash: dict[str, ExtractedPattern] = {}
+        now = datetime.now(timezone.utc)
         # Persisted rows from memory.db
         db_path = _resolve_backend_db_path(self._backend)
                 else:
                     by_hash[p.content_hash] = p
+        # In-memory accumulator (patterns not yet persisted). Re-sightings in
+        # this session bump last_seen_at to "now" on top of the persisted
+        # timestamp so recency ranking reflects live activity.
         for pattern, count in self._pattern_counts.values():
             h = pattern.content_hash
             if h in by_hash:
+                existing = by_hash[h]
+                existing.evidence_count += count
+                existing.last_seen_at = now
             else:
                 by_hash[h] = ExtractedPattern(
                     category=pattern.category,
                     entity_refs=list(pattern.entity_refs),
                     metadata=dict(pattern.metadata),
                     content_hash=pattern.content_hash,
+                    first_seen_at=now,
+                    last_seen_at=now,
                 )
         return list(by_hash.values())
                     content=content,
                     importance=0.7,
                     entity_refs=[success_path],
+                    metadata={
+                        "error_category": error_cat,
+                        "tool": "Read",
+                        "error_path": error_path,
+                        "success_path": success_path,
+                    },
                 )
         elif tool in ("Grep", "Glob"):
             error_pattern = error_entry["input"].get("pattern", "")
             content=content,
             importance=importance,
             entity_refs=entities,
+            metadata={
+                "error_category": error_cat,
+                "tool": "Bash",
+                "failed_cmd": failed_short,
+                "success_cmd": success_short,
+            },
         )
     def _extract_environment(self, entry: dict[str, Any]) -> list[ExtractedPattern]:
                 if self._backend is None:
                     continue
+                now_iso = datetime.now(timezone.utc).isoformat()
                 memory = await self._backend.save_memory(
                     content=pattern.content,
                     user_id=self._user_id,
                         "source": "traffic_learner",
                         "category": pattern.category.value,
                         "evidence_count": pattern.evidence_count,
+                        "first_seen_at": now_iso,
+                        "last_seen_at": now_iso,
                         **pattern.metadata,
                     },
                 )
         if db_path is None or not db_path.exists():
             return
+        def _read() -> list[tuple[str, str, str]]:
             uri = f"file:{db_path}?mode=ro"
             try:
                 conn = sqlite3.connect(uri, uri=True)
                 return []
             try:
                 rows = conn.execute(
+                    "SELECT id, content, metadata FROM memories "
                     "WHERE json_extract(metadata, '$.source') = 'traffic_learner'"
                 ).fetchall()
             except sqlite3.DatabaseError:
                     conn.close()
                 except Exception:
                     pass
+            return [(row[0], row[1] or "", row[2] or "{}") for row in rows]
         try:
             rows = await asyncio.to_thread(_read)
             logger.debug("Traffic learner hydrate failed: %s", e)
             return
+        for memory_id, content, metadata_json in rows:
             if not content:
                 continue
+            try:
+                metadata = json.loads(metadata_json) if metadata_json else {}
+            except json.JSONDecodeError:
+                metadata = {}
+            category_value = metadata.get("category")
+            try:
+                category = PatternCategory(category_value) if category_value else None
+            except ValueError:
+                category = None
+            if category is None:
+                # Legacy row without category — fall back to literal hash.
+                key = content
+            else:
+                key = _normalize_hash_key(category, content, metadata)
+            h = hashlib.sha256(key.encode()).hexdigest()[:16]
             self._saved_hashes.add(h)
             # If multiple rows share the same content (legacy duplicates),
             # last-wins — we only need one id to target the bump.
         if db_path is None or not db_path.exists():
             return
+        now_iso = datetime.now(timezone.utc).isoformat()
         def _bump() -> None:
             conn = sqlite3.connect(str(db_path))
             try:
                 conn.execute(
                     "UPDATE memories SET metadata = json_set("
                     "metadata, '$.evidence_count', "
+                    "COALESCE(json_extract(metadata, '$.evidence_count'), 0) + 1, "
+                    "'$.last_seen_at', ?"
                     ") WHERE id = ?",
+                    (now_iso, memory_id),
                 )
                 conn.commit()
             finally:
     try:
         conn.row_factory = sqlite3.Row
         rows = conn.execute(
+            "SELECT content, metadata, entity_refs, importance, created_at "
             "FROM memories "
             "WHERE json_extract(metadata, '$.source') = 'traffic_learner'"
         ).fetchall()
         except (TypeError, ValueError):
             importance = 0.5
+        first_seen = _parse_iso_timestamp(meta.get("first_seen_at")) or _parse_iso_timestamp(
+            row["created_at"]
+        )
+        last_seen = _parse_iso_timestamp(meta.get("last_seen_at")) or first_seen
+        key = _normalize_hash_key(category, content, meta)
+        h = hashlib.sha256(key.encode()).hexdigest()[:16]
         if h in patterns:
             existing = patterns[h]
             existing.evidence_count += evidence
             if importance > existing.importance:
                 existing.importance = importance
+            if last_seen and (existing.last_seen_at is None or last_seen > existing.last_seen_at):
+                existing.last_seen_at = last_seen
+            if first_seen and (
+                existing.first_seen_at is None or first_seen < existing.first_seen_at
+            ):
+                existing.first_seen_at = first_seen
         else:
             patterns[h] = ExtractedPattern(
                 category=category,
                 entity_refs=list(entity_refs),
                 metadata=meta,
                 content_hash=h,
+                first_seen_at=first_seen,
+                last_seen_at=last_seen,
             )
     return list(patterns.values())
+def _parse_iso_timestamp(value: Any) -> datetime | None:
+    """Parse an ISO-8601 timestamp stored as TEXT. Returns None on any failure."""
+    if not value or not isinstance(value, str):
+        return None
+    try:
+        parsed = datetime.fromisoformat(value)
+    except ValueError:
+        return None
+    if parsed.tzinfo is None:
+        parsed = parsed.replace(tzinfo=timezone.utc)
+    return parsed
 def _patterns_to_recommendations(patterns: list[ExtractedPattern]) -> list:
     """Group patterns by category into one Recommendation per category.
             if target_str == "context_file"
             else RecommendationTarget.MEMORY_FILE
         )
+        if category is PatternCategory.ERROR_RECOVERY:
+            items = _refine_error_recovery(items)
+        else:
+            # Sort by evidence_count desc so the most-supported rules appear first.
+            items.sort(key=lambda p: p.evidence_count, reverse=True)
+        if not items:
+            continue
         bullets = "\n".join(f"- {p.content}" for p in items)
         recs.append(
             Recommendation(
             )
         )
     return recs
+def _refine_error_recovery(patterns: list[ExtractedPattern]) -> list[ExtractedPattern]:
+    """Apply the render-time pipeline for error_recovery patterns.
+    Pipeline: hard-floor drop by last_seen_at, re-validate Read success
+    paths against the filesystem, collapse ambiguous error_paths into a
+    single "search first" hint, rank by recency-weighted evidence, and
+    cap the section at _ERROR_RECOVERY_SECTION_CAP bullets.
+    """
+    now = datetime.now(timezone.utc)
+    # 1. Hard floor — drop rows not re-observed in the last N days.
+    alive: list[ExtractedPattern] = []
+    for p in patterns:
+        last_seen = p.last_seen_at or p.first_seen_at
+        if last_seen is None:
+            # No timestamp — treat as just-seen so it survives one render.
+            alive.append(p)
+            continue
+        age_days = (now - last_seen).total_seconds() / 86400.0
+        if age_days <= _ERROR_RECOVERY_HARD_FLOOR_DAYS:
+            alive.append(p)
+    # 2. Re-validate Read recoveries — drop if success_path no longer exists.
+    validated: list[ExtractedPattern] = []
+    for p in alive:
+        if p.metadata.get("tool") == "Read":
+            success_path = p.metadata.get("success_path")
+            if success_path:
+                try:
+                    if not Path(success_path).exists():
+                        continue
+                except OSError:
+                    # Path check failed (permissions, etc.) — keep the row
+                    # rather than drop on a transient error.
+                    pass
+        validated.append(p)
+    # 3. Collision-collapse — same error_path with >=2 distinct success_paths
+    #    is an ambiguity signal, not N separate lessons. Replace the group
+    #    with one synthesized "search first" bullet.
+    read_groups: dict[str, list[ExtractedPattern]] = {}
+    others: list[ExtractedPattern] = []
+    for p in validated:
+        if p.metadata.get("tool") == "Read" and p.metadata.get("error_path"):
+            read_groups.setdefault(p.metadata["error_path"], []).append(p)
+        else:
+            others.append(p)
+    collapsed: list[ExtractedPattern] = list(others)
+    for error_path, group in read_groups.items():
+        distinct_targets = {g.metadata.get("success_path") for g in group}
+        distinct_targets.discard(None)
+        if len(group) >= 2 and len(distinct_targets) >= 2:
+            basename = os.path.basename(error_path) or error_path
+            synth_content = (
+                f"Path `{basename}` has been guessed wrong repeatedly — "
+                f"use Glob/Grep to locate before reading."
+            )
+            max_last_seen = max(
+                (g.last_seen_at for g in group if g.last_seen_at),
+                default=now,
+            )
+            collapsed.append(
+                ExtractedPattern(
+                    category=PatternCategory.ERROR_RECOVERY,
+                    content=synth_content,
+                    importance=max(g.importance for g in group),
+                    evidence_count=sum(g.evidence_count for g in group),
+                    metadata={
+                        "tool": "Read",
+                        "error_path": error_path,
+                        "collapsed": True,
+                    },
+                    last_seen_at=max_last_seen,
+                    first_seen_at=min(
+                        (g.first_seen_at for g in group if g.first_seen_at),
+                        default=max_last_seen,
+                    ),
+                )
+            )
+        else:
+            collapsed.extend(group)
+    # 4. Recency-weighted score.
+    def _score(p: ExtractedPattern) -> float:
+        last_seen = p.last_seen_at or p.first_seen_at or now
+        age_days = max(0.0, (now - last_seen).total_seconds() / 86400.0)
+        decay = float(0.5 ** (age_days / _ERROR_RECOVERY_HALF_LIFE_DAYS))
+        return float(p.evidence_count) * decay
+    collapsed.sort(key=_score, reverse=True)
+    # 5. Cap the section.
+    return collapsed[:_ERROR_RECOVERY_SECTION_CAP]

tests/test_memory/test_traffic_learner.py CHANGED Viewed

@@ -6,6 +6,8 @@ a real memory backend.
 from __future__ import annotations
 import pytest
 from headroom.memory.traffic_learner import (
@@ -15,10 +17,15 @@ from headroom.memory.traffic_learner import (
     _classify_error,
     _is_error,
     _load_persisted_patterns_from_sqlite,
     _patterns_to_recommendations,
     _project_for_pattern,
 )
 # =============================================================================
 # Error Classification Tests
 # =============================================================================
@@ -361,18 +368,21 @@ class TestLoadPersistedPatterns:
             "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
             "metadata TEXT NOT NULL DEFAULT '{}', "
             "entity_refs TEXT NOT NULL DEFAULT '[]', "
-            "importance REAL NOT NULL DEFAULT 0.5)"
         )
         for i, r in enumerate(rows):
             conn.execute(
-                "INSERT INTO memories (id, content, metadata, entity_refs, importance) "
-                "VALUES (?,?,?,?,?)",
                 (
                     str(i),
                     r["content"],
                     _json.dumps(r.get("metadata", {})),
                     _json.dumps(r.get("entity_refs", [])),
                     r.get("importance", 0.5),
                 ),
             )
         conn.commit()
@@ -612,7 +622,8 @@ def _init_db(path):
         "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
         "metadata TEXT NOT NULL DEFAULT '{}', "
         "entity_refs TEXT NOT NULL DEFAULT '[]', "
-        "importance REAL NOT NULL DEFAULT 0.5)"
     )
     conn.commit()
     conn.close()
@@ -1076,3 +1087,760 @@ class TestStopCancels:
         assert learner._flush_task is not None and not learner._flush_task.done()
         await learner.stop()
         assert learner._flush_task is None or learner._flush_task.done()

 from __future__ import annotations
+from datetime import datetime, timedelta, timezone
 import pytest
 from headroom.memory.traffic_learner import (
     _classify_error,
     _is_error,
     _load_persisted_patterns_from_sqlite,
+    _normalize_bash_for_hash,
+    _parse_iso_timestamp,
     _patterns_to_recommendations,
     _project_for_pattern,
+    _refine_error_recovery,
 )
+UTC = timezone.utc
 # =============================================================================
 # Error Classification Tests
 # =============================================================================
             "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
             "metadata TEXT NOT NULL DEFAULT '{}', "
             "entity_refs TEXT NOT NULL DEFAULT '[]', "
+            "importance REAL NOT NULL DEFAULT 0.5, "
+            "created_at TEXT)"
         )
         for i, r in enumerate(rows):
             conn.execute(
+                "INSERT INTO memories "
+                "(id, content, metadata, entity_refs, importance, created_at) "
+                "VALUES (?,?,?,?,?,?)",
                 (
                     str(i),
                     r["content"],
                     _json.dumps(r.get("metadata", {})),
                     _json.dumps(r.get("entity_refs", [])),
                     r.get("importance", 0.5),
+                    r.get("created_at"),
                 ),
             )
         conn.commit()
         "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
         "metadata TEXT NOT NULL DEFAULT '{}', "
         "entity_refs TEXT NOT NULL DEFAULT '[]', "
+        "importance REAL NOT NULL DEFAULT 0.5, "
+        "created_at TEXT)"
     )
     conn.commit()
     conn.close()
         assert learner._flush_task is not None and not learner._flush_task.done()
         await learner.stop()
         assert learner._flush_task is None or learner._flush_task.done()
+class TestNormalizedHash:
+    """Error-recovery patterns hash on recovery intent, not literal text."""
+    def _mk(self, **meta) -> ExtractedPattern:
+        return ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content=f"content-{meta.get('tool', 'none')}-{len(meta)}",
+            importance=0.7,
+            metadata=meta,
+        )
+    def test_read_recovery_basename_hash(self):
+        a = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="File `/a/state.rs` does not exist. The correct path is `/a/lib.rs`.",
+            importance=0.7,
+            metadata={"tool": "Read", "error_path": "/a/state.rs", "success_path": "/a/lib.rs"},
+        )
+        b = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="File `/b/state.rs` does not exist. The correct path is `/b/lib.rs`.",
+            importance=0.7,
+            metadata={"tool": "Read", "error_path": "/b/state.rs", "success_path": "/b/lib.rs"},
+        )
+        assert a.content_hash == b.content_hash
+    def test_bash_recovery_tail_count_collapse(self):
+        a = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="Command `cargo check` fails. Use `cargo check --manifest-path src-tauri/Cargo.toml | tail -10` instead.",
+            importance=0.7,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "cargo check",
+                "success_cmd": "cargo check --manifest-path src-tauri/Cargo.toml | tail -10",
+            },
+        )
+        b = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="Command `cargo check` fails. Use `cargo check --manifest-path src-tauri/Cargo.toml | tail -50` instead.",
+            importance=0.7,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "cargo check",
+                "success_cmd": "cargo check --manifest-path src-tauri/Cargo.toml | tail -50",
+            },
+        )
+        assert a.content_hash == b.content_hash
+    def test_bash_recovery_pipe_boundary(self):
+        a = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="x",
+            importance=0.7,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "grep foo bar.txt",
+                "success_cmd": "grep -n foo bar.txt | head -5",
+            },
+        )
+        b = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="y",
+            importance=0.7,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "grep foo bar.txt",
+                "success_cmd": "grep -n foo bar.txt | wc -l",
+            },
+        )
+        assert a.content_hash == b.content_hash
+    def test_bash_recovery_different_primary_cmd_different_hash(self):
+        a = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="x",
+            importance=0.7,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "cargo check",
+                "success_cmd": "cargo build",
+            },
+        )
+        b = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="y",
+            importance=0.7,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "cargo check",
+                "success_cmd": "cargo test",
+            },
+        )
+        assert a.content_hash != b.content_hash
+    def test_non_error_recovery_unchanged(self):
+        a = ExtractedPattern(
+            category=PatternCategory.ENVIRONMENT,
+            content="Use /usr/bin/python3.",
+            importance=0.7,
+        )
+        b = ExtractedPattern(
+            category=PatternCategory.ENVIRONMENT,
+            content="Use /opt/bin/python3.",
+            importance=0.7,
+        )
+        assert a.content_hash != b.content_hash
+    def test_error_recovery_without_tool_falls_back_to_content(self):
+        """Legacy error_recovery rows without a `tool` metadata key still work."""
+        a = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="Some legacy bullet.",
+            importance=0.7,
+        )
+        b = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="Some legacy bullet.",
+            importance=0.7,
+        )
+        assert a.content_hash == b.content_hash
+class TestRefineErrorRecovery:
+    """Render-time pipeline: hard floor, re-validate, collapse, rank, cap."""
+    def _mk_read(
+        self,
+        *,
+        error_path: str,
+        success_path: str,
+        evidence: int = 1,
+        last_seen: datetime | None = None,
+    ) -> ExtractedPattern:
+        now = datetime.now(UTC)
+        return ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content=f"File `{error_path}` does not exist. The correct path is `{success_path}`.",
+            importance=0.7,
+            evidence_count=evidence,
+            metadata={
+                "tool": "Read",
+                "error_path": error_path,
+                "success_path": success_path,
+            },
+            last_seen_at=last_seen or now,
+            first_seen_at=last_seen or now,
+        )
+    def test_drops_patterns_beyond_hard_floor(self, tmp_path):
+        target = tmp_path / "lib.rs"
+        target.write_text("pub fn x() {}")
+        old = self._mk_read(
+            error_path=str(tmp_path / "state.rs"),
+            success_path=str(target),
+            last_seen=datetime.now(UTC) - timedelta(days=22),
+        )
+        fresh = self._mk_read(
+            error_path=str(tmp_path / "other.rs"),
+            success_path=str(target),
+        )
+        refined = _refine_error_recovery([old, fresh])
+        assert fresh in refined
+        assert old not in refined
+    def test_revalidates_read_success_path(self, tmp_path):
+        present = tmp_path / "present.rs"
+        present.write_text("x")
+        p_ok = self._mk_read(
+            error_path=str(tmp_path / "miss.rs"),
+            success_path=str(present),
+        )
+        p_missing = self._mk_read(
+            error_path=str(tmp_path / "other.rs"),
+            success_path=str(tmp_path / "gone.rs"),
+        )
+        refined = _refine_error_recovery([p_ok, p_missing])
+        assert p_ok in refined
+        assert p_missing not in refined
+    def test_collapses_ambiguous_error_path(self, tmp_path):
+        a = tmp_path / "a.rs"
+        a.write_text("x")
+        b = tmp_path / "b.rs"
+        b.write_text("y")
+        c = tmp_path / "c.rs"
+        c.write_text("z")
+        error_path = str(tmp_path / "ambiguous.rs")
+        group = [
+            self._mk_read(error_path=error_path, success_path=str(a), evidence=3),
+            self._mk_read(error_path=error_path, success_path=str(b), evidence=2),
+            self._mk_read(error_path=error_path, success_path=str(c), evidence=1),
+        ]
+        refined = _refine_error_recovery(group)
+        assert len(refined) == 1
+        collapsed = refined[0]
+        assert collapsed.metadata.get("collapsed") is True
+        assert collapsed.evidence_count == 6
+        assert "ambiguous.rs" in collapsed.content
+        assert "Glob/Grep" in collapsed.content
+    def test_single_success_path_not_collapsed(self, tmp_path):
+        a = tmp_path / "a.rs"
+        a.write_text("x")
+        error_path = str(tmp_path / "only-one-target.rs")
+        patterns = [
+            self._mk_read(error_path=error_path, success_path=str(a), evidence=3),
+            self._mk_read(error_path=error_path, success_path=str(a), evidence=2),
+        ]
+        refined = _refine_error_recovery(patterns)
+        # Not collapsed — only one distinct success_path.
+        assert all(p.metadata.get("collapsed") is not True for p in refined)
+        assert len(refined) == 2
+    def test_recency_ranking_prefers_fresh_over_stale_heavy(self, tmp_path):
+        target = tmp_path / "lib.rs"
+        target.write_text("x")
+        # Heavy but old: evidence=10, seen 10 days ago → score ~10 * 0.5**2 = 2.5
+        heavy_old = self._mk_read(
+            error_path=str(tmp_path / "old.rs"),
+            success_path=str(target),
+            evidence=10,
+            last_seen=datetime.now(UTC) - timedelta(days=10),
+        )
+        # Light but fresh: evidence=3, seen now → score ~3
+        light_fresh = self._mk_read(
+            error_path=str(tmp_path / "fresh.rs"),
+            success_path=str(target),
+            evidence=3,
+        )
+        refined = _refine_error_recovery([heavy_old, light_fresh])
+        assert refined[0] is light_fresh
+        assert refined[1] is heavy_old
+    def test_section_cap_enforced(self, tmp_path):
+        target = tmp_path / "lib.rs"
+        target.write_text("x")
+        patterns = [
+            self._mk_read(
+                error_path=str(tmp_path / f"miss_{i}.rs"),
+                success_path=str(target),
+                evidence=i + 1,
+            )
+            for i in range(25)
+        ]
+        refined = _refine_error_recovery(patterns)
+        assert len(refined) == 15
+        # Highest-evidence ones kept (all are equally fresh, so evidence wins).
+        kept_evidence = sorted(p.evidence_count for p in refined)
+        assert kept_evidence[0] >= 11  # Bottom of top-15 out of 1..25
+    def test_read_recovery_without_success_path_not_revalidated(self):
+        """Read patterns lacking `success_path` in metadata skip re-validation cleanly."""
+        p = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="Some legacy Read bullet",
+            importance=0.7,
+            metadata={"tool": "Read", "error_path": "/something.rs"},
+            last_seen_at=datetime.now(UTC),
+        )
+        refined = _refine_error_recovery([p])
+        assert p in refined
+    def test_bash_recoveries_not_revalidated(self, tmp_path):
+        """Bash patterns pass through re-validation regardless of command content."""
+        bash_pat = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="Command `x` fails. Use `y` instead.",
+            importance=0.7,
+            evidence_count=1,
+            metadata={
+                "tool": "Bash",
+                "failed_cmd": "x",
+                "success_cmd": "y",
+            },
+            last_seen_at=datetime.now(UTC),
+        )
+        refined = _refine_error_recovery([bash_pat])
+        assert bash_pat in refined
+    def test_empty_input_returns_empty(self):
+        assert _refine_error_recovery([]) == []
+    def test_missing_timestamps_survive_one_render(self):
+        """Patterns without timestamps are kept rather than silently dropped."""
+        p = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="legacy bullet",
+            importance=0.7,
+        )
+        assert p.first_seen_at is None
+        assert p.last_seen_at is None
+        refined = _refine_error_recovery([p])
+        assert p in refined
+    def test_refined_empty_skips_section_in_recommendations(self, tmp_path):
+        """If all error_recovery patterns fail re-validation, no recommendation is emitted."""
+        # Only pattern is a Read recovery pointing at a nonexistent success_path.
+        stale = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="File `/a.rs` does not exist. The correct path is `/gone.rs`.",
+            importance=0.7,
+            metadata={
+                "tool": "Read",
+                "error_path": "/a.rs",
+                "success_path": str(tmp_path / "does-not-exist.rs"),
+            },
+            last_seen_at=datetime.now(UTC),
+        )
+        recs = _patterns_to_recommendations([stale])
+        # Section should be skipped entirely — no recommendation produced.
+        assert recs == []
+    def test_oserror_during_revalidation_keeps_row(self, monkeypatch):
+        """Transient OS errors during path checks should not drop the row."""
+        p = ExtractedPattern(
+            category=PatternCategory.ERROR_RECOVERY,
+            content="File `/a.rs` does not exist. The correct path is `/b.rs`.",
+            importance=0.7,
+            metadata={"tool": "Read", "error_path": "/a.rs", "success_path": "/b.rs"},
+            last_seen_at=datetime.now(UTC),
+        )
+        def _raise(self):
+            raise OSError("simulated permission error")
+        monkeypatch.setattr("pathlib.Path.exists", _raise)
+        refined = _refine_error_recovery([p])
+        assert p in refined
+class TestNormalizeBashForHash:
+    """Bash command normalization for hash-key collapse."""
+    def test_empty_string_returns_empty(self):
+        assert _normalize_bash_for_hash("") == ""
+    def test_no_volatile_suffix_unchanged(self):
+        assert _normalize_bash_for_hash("cargo check") == "cargo check"
+    def test_strips_head_suffix(self):
+        assert _normalize_bash_for_hash("grep foo bar | head -20") == "grep foo bar"
+    def test_strips_tail_suffix(self):
+        assert _normalize_bash_for_hash("cargo check | tail -5") == "cargo check"
+    def test_strips_trailing_context_flags(self):
+        # The regex is anchored to end-of-string; context flags must be trailing.
+        assert _normalize_bash_for_hash("grep foo bar -A 3") == "grep foo bar"
+    def test_strips_stderr_redirect(self):
+        assert _normalize_bash_for_hash("cargo check 2>&1") == "cargo check"
+    def test_cuts_at_first_chain(self):
+        # && boundary collapses to just the primary command
+        assert _normalize_bash_for_hash("cd /tmp && ls") == "cd /tmp"
+class TestParseIsoTimestamp:
+    """Edge-case coverage for _parse_iso_timestamp."""
+    def test_none_returns_none(self):
+        assert _parse_iso_timestamp(None) is None
+    def test_empty_string_returns_none(self):
+        assert _parse_iso_timestamp("") is None
+    def test_non_string_returns_none(self):
+        assert _parse_iso_timestamp(12345) is None
+        assert _parse_iso_timestamp(3.14) is None
+    def test_invalid_format_returns_none(self):
+        assert _parse_iso_timestamp("not an iso string") is None
+    def test_naive_timestamp_assumed_utc(self):
+        parsed = _parse_iso_timestamp("2026-04-20T12:00:00")
+        assert parsed is not None
+        assert parsed.tzinfo == UTC
+    def test_aware_timestamp_preserved(self):
+        parsed = _parse_iso_timestamp("2026-04-20T12:00:00+00:00")
+        assert parsed is not None
+        assert parsed.tzinfo is not None
+class TestLoadPersistedPatternsTimestamps:
+    """The sqlite load path reads first_seen_at / last_seen_at correctly."""
+    def _make_db(self, tmp_path, rows: list[dict]):
+        import json as _json
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        conn = _sql.connect(db)
+        conn.execute(
+            "CREATE TABLE memories ("
+            "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
+            "metadata TEXT NOT NULL DEFAULT '{}', "
+            "entity_refs TEXT NOT NULL DEFAULT '[]', "
+            "importance REAL NOT NULL DEFAULT 0.5, "
+            "created_at TEXT)"
+        )
+        for i, r in enumerate(rows):
+            conn.execute(
+                "INSERT INTO memories "
+                "(id, content, metadata, entity_refs, importance, created_at) "
+                "VALUES (?,?,?,?,?,?)",
+                (
+                    str(i),
+                    r["content"],
+                    _json.dumps(r.get("metadata", {})),
+                    _json.dumps(r.get("entity_refs", [])),
+                    r.get("importance", 0.5),
+                    r.get("created_at"),
+                ),
+            )
+        conn.commit()
+        conn.close()
+        return db
+    def test_reads_timestamps_from_metadata(self, tmp_path):
+        db = self._make_db(
+            tmp_path,
+            [
+                {
+                    "content": "env bullet",
+                    "metadata": {
+                        "source": "traffic_learner",
+                        "category": "environment",
+                        "evidence_count": 3,
+                        "first_seen_at": "2026-04-10T10:00:00+00:00",
+                        "last_seen_at": "2026-04-20T15:00:00+00:00",
+                    },
+                }
+            ],
+        )
+        patterns = _load_persisted_patterns_from_sqlite(db)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.first_seen_at is not None
+        assert p.first_seen_at.year == 2026 and p.first_seen_at.month == 4
+        assert p.last_seen_at is not None
+        assert p.last_seen_at.day == 20
+    def test_falls_back_to_created_at(self, tmp_path):
+        """When metadata has no timestamps, `created_at` is used."""
+        db = self._make_db(
+            tmp_path,
+            [
+                {
+                    "content": "env bullet",
+                    "metadata": {
+                        "source": "traffic_learner",
+                        "category": "environment",
+                        "evidence_count": 1,
+                    },
+                    "created_at": "2026-03-01T09:00:00+00:00",
+                }
+            ],
+        )
+        patterns = _load_persisted_patterns_from_sqlite(db)
+        assert len(patterns) == 1
+        assert patterns[0].first_seen_at is not None
+        assert patterns[0].first_seen_at.month == 3
+        # last_seen defaults to first_seen when metadata lacks both.
+        assert patterns[0].last_seen_at == patterns[0].first_seen_at
+    def test_collision_merges_timestamps_max_last_min_first(self, tmp_path):
+        """Two rows collapsing to the same hash keep the widest timestamp range."""
+        db = self._make_db(
+            tmp_path,
+            [
+                {
+                    "content": "dup bullet",
+                    "importance": 0.4,
+                    "metadata": {
+                        "source": "traffic_learner",
+                        "category": "preference",
+                        "evidence_count": 2,
+                        "first_seen_at": "2026-04-10T00:00:00+00:00",
+                        "last_seen_at": "2026-04-15T00:00:00+00:00",
+                    },
+                },
+                {
+                    "content": "dup bullet",
+                    "importance": 0.9,
+                    "metadata": {
+                        "source": "traffic_learner",
+                        "category": "preference",
+                        "evidence_count": 3,
+                        "first_seen_at": "2026-04-01T00:00:00+00:00",
+                        "last_seen_at": "2026-04-20T00:00:00+00:00",
+                    },
+                },
+            ],
+        )
+        patterns = _load_persisted_patterns_from_sqlite(db)
+        assert len(patterns) == 1
+        p = patterns[0]
+        assert p.evidence_count == 5
+        # Higher importance wins when collision merges.
+        assert p.importance == 0.9
+        assert p.first_seen_at is not None and p.first_seen_at.day == 1
+        assert p.last_seen_at is not None and p.last_seen_at.day == 20
+    def test_non_numeric_importance_falls_back_to_default(self, tmp_path):
+        """Rows with an unparseable importance value use 0.5."""
+        import json as _json
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        conn = _sql.connect(db)
+        conn.execute(
+            "CREATE TABLE memories ("
+            "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
+            "metadata TEXT NOT NULL DEFAULT '{}', "
+            "entity_refs TEXT NOT NULL DEFAULT '[]', "
+            "importance TEXT, "
+            "created_at TEXT)"
+        )
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata, importance) VALUES (?,?,?,?)",
+            (
+                "0",
+                "bullet",
+                _json.dumps(
+                    {
+                        "source": "traffic_learner",
+                        "category": "environment",
+                        "evidence_count": 1,
+                    }
+                ),
+                "not-a-number",
+            ),
+        )
+        conn.commit()
+        conn.close()
+        patterns = _load_persisted_patterns_from_sqlite(db)
+        assert len(patterns) == 1
+        assert patterns[0].importance == 0.5
+    def test_malformed_metadata_json_skipped_gracefully(self, tmp_path):
+        """Rows with invalid JSON metadata don't crash the load."""
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        conn = _sql.connect(db)
+        conn.execute(
+            "CREATE TABLE memories ("
+            "id TEXT PRIMARY KEY, content TEXT NOT NULL, "
+            "metadata TEXT NOT NULL DEFAULT '{}', "
+            "entity_refs TEXT NOT NULL DEFAULT '[]', "
+            "importance REAL NOT NULL DEFAULT 0.5, "
+            "created_at TEXT)"
+        )
+        # Invalid JSON in metadata
+        conn.execute(
+            "INSERT INTO memories VALUES (?,?,?,?,?,?)",
+            ("0", "bullet", "{not json", "[]", 0.5, None),
+        )
+        conn.commit()
+        conn.close()
+        # Should not raise — the row is simply skipped (no recognizable category).
+        patterns = _load_persisted_patterns_from_sqlite(db)
+        assert patterns == []
+class TestBumpPersistsLastSeenAt:
+    """_bump_persisted_evidence sets $.last_seen_at on every bump."""
+    @pytest.mark.asyncio
+    async def test_bump_sets_last_seen_at_in_metadata(self, tmp_path):
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        _init_db(db)
+        # Seed a traffic_learner row with no last_seen_at.
+        import json as _json
+        conn = _sql.connect(db)
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata) VALUES (?,?,?)",
+            (
+                "row-1",
+                "bullet",
+                _json.dumps(
+                    {
+                        "source": "traffic_learner",
+                        "category": "environment",
+                        "evidence_count": 1,
+                    }
+                ),
+            ),
+        )
+        conn.commit()
+        conn.close()
+        backend = _FakeBackend(db)
+        learner = TrafficLearner(backend=backend, min_evidence=1)
+        await learner._bump_persisted_evidence("row-1")
+        conn = _sql.connect(db)
+        row = conn.execute("SELECT metadata FROM memories WHERE id='row-1'").fetchone()
+        conn.close()
+        meta = _json.loads(row[0])
+        assert meta["evidence_count"] == 2
+        assert "last_seen_at" in meta
+        # Should be parseable back.
+        parsed = _parse_iso_timestamp(meta["last_seen_at"])
+        assert parsed is not None
+class TestHydrateLegacyRow:
+    """Legacy rows without `category` metadata fall back to literal-content hashing."""
+    @pytest.mark.asyncio
+    async def test_hydrate_legacy_row_without_category(self, tmp_path):
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        _init_db(db)
+        import json as _json
+        conn = _sql.connect(db)
+        # No `category` key in metadata — must still hydrate.
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata) VALUES (?,?,?)",
+            (
+                "legacy-1",
+                "legacy bullet",
+                _json.dumps({"source": "traffic_learner"}),
+            ),
+        )
+        conn.commit()
+        conn.close()
+        backend = _FakeBackend(db)
+        learner = TrafficLearner(backend=backend, min_evidence=1)
+        await learner._hydrate_persisted_state()
+        # Falls back to sha256(content) for the hash key.
+        import hashlib as _h
+        expected = _h.sha256(b"legacy bullet").hexdigest()[:16]
+        assert expected in learner._saved_hashes
+        assert learner._persisted_ids[expected] == "legacy-1"
+    @pytest.mark.asyncio
+    async def test_hydrate_skips_empty_content(self, tmp_path):
+        """Rows with empty content are skipped during hydration."""
+        import json as _json
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        _init_db(db)
+        conn = _sql.connect(db)
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata) VALUES (?,?,?)",
+            ("empty", "", _json.dumps({"source": "traffic_learner"})),
+        )
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata) VALUES (?,?,?)",
+            (
+                "ok",
+                "normal bullet",
+                _json.dumps({"source": "traffic_learner", "category": "environment"}),
+            ),
+        )
+        conn.commit()
+        conn.close()
+        backend = _FakeBackend(db)
+        learner = TrafficLearner(backend=backend, min_evidence=1)
+        await learner._hydrate_persisted_state()
+        assert "empty" not in learner._persisted_ids.values()
+        assert "ok" in learner._persisted_ids.values()
+    @pytest.mark.asyncio
+    async def test_hydrate_invalid_category_falls_back(self, tmp_path):
+        """Unknown category values (e.g., typos) are handled as legacy rows."""
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        _init_db(db)
+        import json as _json
+        conn = _sql.connect(db)
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata) VALUES (?,?,?)",
+            (
+                "bad-cat",
+                "mystery bullet",
+                _json.dumps({"source": "traffic_learner", "category": "mystery_type"}),
+            ),
+        )
+        conn.commit()
+        conn.close()
+        backend = _FakeBackend(db)
+        learner = TrafficLearner(backend=backend, min_evidence=1)
+        # Must not raise.
+        await learner._hydrate_persisted_state()
+class TestCollectAllPatternsTimestamps:
+    """_collect_all_patterns bumps last_seen_at on in-session re-sightings."""
+    @pytest.mark.asyncio
+    async def test_re_sighting_bumps_last_seen_at(self, tmp_path):
+        """A persisted pattern re-observed in this session gets last_seen_at=now."""
+        import json as _json
+        import sqlite3 as _sql
+        db = tmp_path / "memory.db"
+        _init_db(db)
+        old_last_seen = "2026-01-01T00:00:00+00:00"
+        conn = _sql.connect(db)
+        conn.execute(
+            "INSERT INTO memories (id, content, metadata) VALUES (?,?,?)",
+            (
+                "seed-1",
+                "some env bullet",
+                _json.dumps(
+                    {
+                        "source": "traffic_learner",
+                        "category": "environment",
+                        "evidence_count": 1,
+                        "first_seen_at": old_last_seen,
+                        "last_seen_at": old_last_seen,
+                    }
+                ),
+            ),
+        )
+        conn.commit()
+        conn.close()
+        backend = _FakeBackend(db)
+        learner = TrafficLearner(backend=backend, min_evidence=1)
+        # Simulate in-session accumulation of the same pattern.
+        pattern = ExtractedPattern(
+            category=PatternCategory.ENVIRONMENT,
+            content="some env bullet",
+            importance=0.7,
+        )
+        learner._pattern_counts[pattern.content_hash] = (pattern, 1)
+        merged = learner._collect_all_patterns()
+        assert len(merged) == 1
+        m = merged[0]
+        assert m.last_seen_at is not None
+        # last_seen_at should be bumped past the stale 2026-01 timestamp.
+        assert m.last_seen_at.year == datetime.now(UTC).year
+        assert m.last_seen_at > _parse_iso_timestamp(old_last_seen)