Spaces:

minhtudragon
/

headroom

Build error

App Files Files Community

JerrettDavis Copilot commited on Apr 22

Commit

e4d7c78

2 Parent(s): a74cd94 68245cd

Merge upstream/main into feat/canonical-pipeline

Browse files

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Files changed (21) hide show

.claude-plugin/marketplace.json +2 -2
.github/plugin/marketplace.json +2 -2
CHANGELOG.md +19 -0
codecov.yml +19 -0
headroom/cli/wrap.py +0 -0
headroom/copilot_auth.py +138 -0
headroom/dashboard/templates/dashboard.html +54 -12
headroom/learn/writer.py +72 -12
headroom/proxy/handlers/openai.py +0 -0
headroom/proxy/handlers/streaming.py +35 -0
headroom/proxy/helpers.py +53 -12
headroom/proxy/server.py +45 -4
plugins/headroom-agent-hooks/.claude-plugin/plugin.json +1 -1
plugins/headroom-agent-hooks/.github/plugin/plugin.json +1 -1
tests/test_cli/test_wrap_copilot.py +335 -212
tests/test_cli/test_wrap_persistent.py +33 -17
tests/test_copilot_auth.py +261 -11
tests/test_learn/test_writer.py +135 -9
tests/test_proxy_copilot_auth_hooks.py +47 -23
tests/test_proxy_dashboard_stats_cache.py +144 -0
tests/test_proxy_streaming_request_logger.py +174 -0

.claude-plugin/marketplace.json CHANGED Viewed

@@ -5,14 +5,14 @@
   },
   "metadata": {
     "description": "Headroom marketplace for Claude Code and GitHub Copilot CLI plugins.",
-    "version": "0.10.0"
   },
   "plugins": [
     {
       "name": "headroom",
       "source": "./plugins/headroom-agent-hooks",
       "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
-      "version": "0.10.0",
       "author": {
         "name": "Headroom Contributors",
         "url": "https://github.com/chopratejas/headroom"

   },
   "metadata": {
     "description": "Headroom marketplace for Claude Code and GitHub Copilot CLI plugins.",
+    "version": "0.11.0"
   },
   "plugins": [
     {
       "name": "headroom",
       "source": "./plugins/headroom-agent-hooks",
       "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
+      "version": "0.11.0",
       "author": {
         "name": "Headroom Contributors",
         "url": "https://github.com/chopratejas/headroom"

.github/plugin/marketplace.json CHANGED Viewed

@@ -5,14 +5,14 @@
   },
   "metadata": {
     "description": "Headroom marketplace for Claude Code and GitHub Copilot CLI plugins.",
-    "version": "0.10.0"
   },
   "plugins": [
     {
       "name": "headroom",
       "source": "./plugins/headroom-agent-hooks",
       "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
-      "version": "0.10.0",
       "author": {
         "name": "Headroom Contributors",
         "url": "https://github.com/chopratejas/headroom"

   },
   "metadata": {
     "description": "Headroom marketplace for Claude Code and GitHub Copilot CLI plugins.",
+    "version": "0.11.0"
   },
   "plugins": [
     {
       "name": "headroom",
       "source": "./plugins/headroom-agent-hooks",
       "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
+      "version": "0.11.0",
       "author": {
         "name": "Headroom Contributors",
         "url": "https://github.com/chopratejas/headroom"

CHANGELOG.md CHANGED Viewed

@@ -7,6 +7,14 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 ## [Unreleased]
 ### Added
 - **Telemetry stack & install-mode identity fields** — anonymous beacon now
   reports `headroom_stack` (how Headroom is invoked: `proxy`, `wrap_claude`,
@@ -39,6 +47,17 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
   hourly/daily/weekly/monthly rollups. Responses now include a
   `history_summary` block describing stored versus returned points.
 ## [0.5.22] - 2026-04-11
 ### Added

 ## [Unreleased]
+### Fixed
+- **`headroom learn` no longer clobbers prior recommendations on re-run** —
+  the marker block in `CLAUDE.md` / `MEMORY.md` is now merged with the
+  prior block instead of wholesale-replaced. Sections re-surfaced by the
+  new run win; sections not re-surfaced are carried forward so learnings
+  accumulate across runs instead of disappearing. To fully rebuild the
+  block, delete it manually and re-run. (#231)
 ### Added
 - **Telemetry stack & install-mode identity fields** — anonymous beacon now
   reports `headroom_stack` (how Headroom is invoked: `proxy`, `wrap_claude`,
   hourly/daily/weekly/monthly rollups. Responses now include a
   `history_summary` block describing stored versus returned points.
+### Fixed
+- **Streaming Anthropic requests are now visible to `/stats.recent_requests`
+  and `/transformations/feed`** — `_finalize_stream_response` did not call
+  `self.logger.log(...)`, so the entire streaming Anthropic code path (the
+  one Claude Code uses) silently bypassed the request logger. Only the
+  non-streaming Anthropic path and the Bedrock streaming path were logged.
+  As a consequence, `--log-messages` had no observable effect on the live
+  transformations feed for typical traffic. The streaming finalizer now
+  emits the same `RequestLog` shape the other paths do, including
+  `request_messages` when `log_full_messages` is enabled.
 ## [0.5.22] - 2026-04-11
 ### Added

codecov.yml ADDED Viewed

	@@ -0,0 +1,19 @@

+codecov:
+  require_ci_to_pass: true
+coverage:
+  status:
+    project:
+      default:
+        target: auto
+    patch:
+      default:
+        target: auto
+ignore:
+  - "tests/**"
+  - "scripts/tests/**"
+  - ".github/**"
+  - ".claude-plugin/**"
+  - "plugins/headroom-agent-hooks/.claude-plugin/**"
+  - "plugins/headroom-agent-hooks/.github/**"

headroom/cli/wrap.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

headroom/copilot_auth.py CHANGED Viewed

@@ -3,10 +3,13 @@
 from __future__ import annotations
 import asyncio
 import json
 import logging
 import os
 import time
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
@@ -67,6 +70,11 @@ def _token_exchange_url() -> str:
     return os.environ.get("GITHUB_COPILOT_TOKEN_EXCHANGE_URL", DEFAULT_TOKEN_EXCHANGE_URL).strip()
 def _resolve_token_file_paths() -> list[Path]:
     override = os.environ.get("GITHUB_COPILOT_TOKEN_FILE", "").strip()
     if override:
@@ -83,6 +91,108 @@ def _resolve_token_file_paths() -> list[Path]:
     return paths
 def _parse_expiry(value: Any) -> float | None:
     if value in (None, ""):
         return None
@@ -157,6 +267,14 @@ def read_cached_oauth_token() -> str | None:
         if token:
             return token
     host = _github_host()
     for path in _resolve_token_file_paths():
         try:
@@ -203,6 +321,16 @@ def is_copilot_api_url(url: str | None) -> bool:
     return "githubcopilot.com" in host
 class CopilotTokenProvider:
     """Resolve and cache short-lived Copilot API tokens."""
@@ -233,6 +361,16 @@ class CopilotTokenProvider:
             if not oauth_token:
                 raise RuntimeError("No GitHub Copilot OAuth token is available.")
             exchanged = await self._exchange_token(oauth_token)
             self._cached = exchanged
             return exchanged

 from __future__ import annotations
 import asyncio
+import ctypes
 import json
 import logging
 import os
+import subprocess
 import time
+from ctypes import wintypes
 from dataclasses import dataclass
 from datetime import datetime
 from pathlib import Path
     return os.environ.get("GITHUB_COPILOT_TOKEN_EXCHANGE_URL", DEFAULT_TOKEN_EXCHANGE_URL).strip()
+def _should_exchange_oauth_token() -> bool:
+    raw = os.environ.get("GITHUB_COPILOT_USE_TOKEN_EXCHANGE", "").strip().lower()
+    return raw in {"1", "true", "yes", "on"}
 def _resolve_token_file_paths() -> list[Path]:
     override = os.environ.get("GITHUB_COPILOT_TOKEN_FILE", "").strip()
     if override:
     return paths
+def _read_gh_cli_oauth_token() -> str | None:
+    gh_bin = os.environ.get("GH_PATH", "").strip() or "gh"
+    command = [gh_bin, "auth", "token"]
+    host = _github_host()
+    if host and host != DEFAULT_GITHUB_HOST:
+        command.extend(["--hostname", host])
+    try:
+        result = subprocess.run(
+            command,
+            capture_output=True,
+            text=True,
+            encoding="utf-8",
+            errors="replace",
+            check=False,
+        )
+    except OSError as exc:
+        logger.debug("Unable to invoke GitHub CLI for Copilot auth discovery: %s", exc)
+        return None
+    if result.returncode != 0:
+        logger.debug("GitHub CLI auth token lookup failed with exit code %s", result.returncode)
+        return None
+    token = result.stdout.strip()
+    return token or None
+def _read_windows_copilot_cli_oauth_token() -> str | None:
+    if os.name != "nt":
+        return None
+    class FILETIME(ctypes.Structure):
+        _fields_ = [
+            ("dwLowDateTime", wintypes.DWORD),
+            ("dwHighDateTime", wintypes.DWORD),
+        ]
+    class CREDENTIAL(ctypes.Structure):
+        _fields_ = [
+            ("Flags", wintypes.DWORD),
+            ("Type", wintypes.DWORD),
+            ("TargetName", wintypes.LPWSTR),
+            ("Comment", wintypes.LPWSTR),
+            ("LastWritten", FILETIME),
+            ("CredentialBlobSize", wintypes.DWORD),
+            ("CredentialBlob", ctypes.POINTER(ctypes.c_ubyte)),
+            ("Persist", wintypes.DWORD),
+            ("AttributeCount", wintypes.DWORD),
+            ("Attributes", wintypes.LPVOID),
+            ("TargetAlias", wintypes.LPWSTR),
+            ("UserName", wintypes.LPWSTR),
+        ]
+    cred_ptr = ctypes.POINTER(CREDENTIAL)
+    credentials = ctypes.POINTER(cred_ptr)()
+    count = wintypes.DWORD()
+    win_dll = getattr(ctypes, "WinDLL", None)
+    if win_dll is None:
+        return None
+    advapi32 = win_dll("Advapi32.dll")
+    advapi32.CredEnumerateW.argtypes = [
+        wintypes.LPCWSTR,
+        wintypes.DWORD,
+        ctypes.POINTER(wintypes.DWORD),
+        ctypes.POINTER(ctypes.POINTER(cred_ptr)),
+    ]
+    advapi32.CredEnumerateW.restype = wintypes.BOOL
+    advapi32.CredFree.argtypes = [wintypes.LPVOID]
+    try:
+        if not advapi32.CredEnumerateW(None, 0, ctypes.byref(count), ctypes.byref(credentials)):
+            return None
+    except OSError as exc:
+        logger.debug("Unable to enumerate Windows credentials for Copilot auth discovery: %s", exc)
+        return None
+    host = _github_host().lower()
+    service_prefixes = [f"copilot-cli/{host}:"]
+    if "://" not in host:
+        service_prefixes.append(f"copilot-cli/https://{host}:")
+    try:
+        for idx in range(count.value):
+            credential = credentials[idx].contents
+            target = (credential.TargetName or "").strip().lower()
+            if not any(target.startswith(prefix) for prefix in service_prefixes):
+                continue
+            if credential.CredentialBlobSize <= 0 or not credential.CredentialBlob:
+                continue
+            blob = ctypes.string_at(credential.CredentialBlob, credential.CredentialBlobSize)
+            token = blob.decode("utf-8", errors="replace").strip()
+            if token:
+                return token
+    finally:
+        if credentials:
+            advapi32.CredFree(credentials)
+    return None
 def _parse_expiry(value: Any) -> float | None:
     if value in (None, ""):
         return None
         if token:
             return token
+    windows_copilot_token = _read_windows_copilot_cli_oauth_token()
+    if windows_copilot_token:
+        return windows_copilot_token
+    gh_token = _read_gh_cli_oauth_token()
+    if gh_token:
+        return gh_token
     host = _github_host()
     for path in _resolve_token_file_paths():
         try:
     return "githubcopilot.com" in host
+def build_copilot_upstream_url(base_url: str, path: str) -> str:
+    """Build an upstream URL, normalizing GitHub Copilot's non-/v1 path layout."""
+    normalized_base = base_url.rstrip("/")
+    normalized_path = path if path.startswith("/") else f"/{path}"
+    if is_copilot_api_url(normalized_base) and normalized_path.startswith("/v1/"):
+        normalized_path = normalized_path[3:]
+    return f"{normalized_base}{normalized_path}"
 class CopilotTokenProvider:
     """Resolve and cache short-lived Copilot API tokens."""
             if not oauth_token:
                 raise RuntimeError("No GitHub Copilot OAuth token is available.")
+            if not _should_exchange_oauth_token():
+                direct_token = CopilotAPIToken(
+                    token=oauth_token,
+                    expires_at=time.time() + 3600,
+                    api_url=os.environ.get("GITHUB_COPILOT_API_URL", DEFAULT_API_URL).strip()
+                    or DEFAULT_API_URL,
+                )
+                self._cached = direct_token
+                return direct_token
             exchanged = await self._exchange_token(oauth_token)
             self._cached = exchanged
             return exchanged

headroom/dashboard/templates/dashboard.html CHANGED Viewed

@@ -55,12 +55,12 @@
             <div class="inline-flex rounded-lg border border-border bg-surface p-1">
                 <button class="px-3 py-1.5 text-sm rounded-md transition-colors"
                         :class="viewMode === 'session' ? 'bg-accent text-black' : 'text-gray-400 hover:text-gray-200'"
-                        @click="viewMode = 'session'">
                     Session
                 </button>
                 <button class="px-3 py-1.5 text-sm rounded-md transition-colors"
                         :class="viewMode === 'history' ? 'bg-accent text-black' : 'text-gray-400 hover:text-gray-200'"
-                        @click="viewMode = 'history'">
                     Historical
                 </button>
             </div>
@@ -89,7 +89,7 @@
             </div>
             <button x-show="log_full_messages" id="feed-toggle"
                     class="px-3 py-1.5 text-sm rounded-md border border-border bg-surface text-gray-300 hover:text-white transition-colors"
-                    @click="feedOpen = !feedOpen"
                     :class="feedOpen ? 'bg-accent text-black' : ''">
                 Live Feed
             </button>
@@ -1256,6 +1256,11 @@
                 savingsHistory: [],
                 expandedRows: {},
                 pollInterval: null,
                 feedOpen: false,
                 transformations: [],
                 feedScrolled_: false,
@@ -1267,31 +1272,55 @@
                 async init() {
                     await this.fetchStats();
-                    await this.fetchTransformations();
                     this.pollInterval = setInterval(() => {
-                        this.fetchStats();
-                        this.fetchTransformations();
-                    }, 3000);
                     // Keyboard shortcuts
                     document.addEventListener('keydown', (e) => {
                         if (e.key === 'r' || e.key === 'R') {
-                            this.fetchStats();
                         }
                     });
                 },
                 async fetchStats() {
                     try {
-                        const [statsRes, historyRes, healthRes] = await Promise.all([
-                            fetch('/stats'),
-                            fetch('/stats-history'),
                             fetch('/health')
                         ]);
                         this.stats = await statsRes.json();
-                        this.historyStats = await historyRes.json();
                         const health = await healthRes.json();
                         this.healthy = health.status === 'healthy';
                         this.version = health.version || '0.3.0';
@@ -1312,6 +1341,18 @@
                     }
                 },
                 async fetchTransformations() {
                     try {
                         const prevLen = this.transformations.length;
@@ -1324,6 +1365,7 @@
                             }
                             this.transformations = data.transformations || [];
                             this.log_full_messages = data.log_full_messages ?? this.log_full_messages;
                             this.renderTransformations();
                         }
                     } catch (e) {

             <div class="inline-flex rounded-lg border border-border bg-surface p-1">
                 <button class="px-3 py-1.5 text-sm rounded-md transition-colors"
                         :class="viewMode === 'session' ? 'bg-accent text-black' : 'text-gray-400 hover:text-gray-200'"
+                        @click="setViewMode('session')">
                     Session
                 </button>
                 <button class="px-3 py-1.5 text-sm rounded-md transition-colors"
                         :class="viewMode === 'history' ? 'bg-accent text-black' : 'text-gray-400 hover:text-gray-200'"
+                        @click="setViewMode('history')">
                     Historical
                 </button>
             </div>
             </div>
             <button x-show="log_full_messages" id="feed-toggle"
                     class="px-3 py-1.5 text-sm rounded-md border border-border bg-surface text-gray-300 hover:text-white transition-colors"
+                    @click="toggleFeed()"
                     :class="feedOpen ? 'bg-accent text-black' : ''">
                 Live Feed
             </button>
                 savingsHistory: [],
                 expandedRows: {},
                 pollInterval: null,
+                statsPollMs: 5000,
+                historyPollMs: 30000,
+                feedPollMs: 5000,
+                lastHistoryFetchMs: 0,
+                lastFeedFetchMs: 0,
                 feedOpen: false,
                 transformations: [],
                 feedScrolled_: false,
                 async init() {
                     await this.fetchStats();
                     this.pollInterval = setInterval(() => {
+                        this.pollDashboard();
+                    }, this.statsPollMs);
                     // Keyboard shortcuts
                     document.addEventListener('keydown', (e) => {
                         if (e.key === 'r' || e.key === 'R') {
+                            this.pollDashboard(true);
                         }
                     });
                 },
+                async pollDashboard(force = false) {
+                    if (!force && document.hidden) return;
+                    await this.fetchStats();
+                    const now = Date.now();
+                    if (this.viewMode === 'history' && (force || now - this.lastHistoryFetchMs >= this.historyPollMs)) {
+                        await this.fetchHistoryStats();
+                    }
+                    if (this.feedOpen && (force || now - this.lastFeedFetchMs >= this.feedPollMs)) {
+                        await this.fetchTransformations();
+                    }
+                },
+                async setViewMode(mode) {
+                    this.viewMode = mode;
+                    if (mode === 'history') {
+                        await this.fetchHistoryStats();
+                    }
+                },
+                async toggleFeed() {
+                    this.feedOpen = !this.feedOpen;
+                    if (this.feedOpen) {
+                        await this.fetchTransformations();
+                    }
+                },
                 async fetchStats() {
                     try {
+                        const [statsRes, healthRes] = await Promise.all([
+                            fetch('/stats?cached=1'),
                             fetch('/health')
                         ]);
                         this.stats = await statsRes.json();
                         const health = await healthRes.json();
                         this.healthy = health.status === 'healthy';
                         this.version = health.version || '0.3.0';
                     }
                 },
+                async fetchHistoryStats() {
+                    try {
+                        const response = await fetch('/stats-history');
+                        if (response.ok) {
+                            this.historyStats = await response.json();
+                            this.lastHistoryFetchMs = Date.now();
+                        }
+                    } catch (e) {
+                        console.error('Failed to fetch history stats:', e);
+                    }
+                },
                 async fetchTransformations() {
                     try {
                         const prevLen = this.transformations.length;
                             }
                             this.transformations = data.transformations || [];
                             this.log_full_messages = data.log_full_messages ?? this.log_full_messages;
+                            this.lastFeedFetchMs = Date.now();
                             this.renderTransformations();
                         }
                     } catch (e) {

headroom/learn/writer.py CHANGED Viewed

@@ -87,8 +87,73 @@ def _build_section(recommendations: list[Recommendation]) -> str:
     return "\n".join(lines)
-def _merge_into_file(file_path: Path, section: str) -> str:
-    """Merge the section into an existing file, replacing any prior section."""
     if file_path.exists():
         existing = file_path.read_text()
         if _MARKER_START in existing:
@@ -119,8 +184,7 @@ class ClaudeCodeWriter(ContextWriter):
         if context_recs:
             claude_md_path = self._resolve_context_path(project)
-            section_content = _build_section(context_recs)
-            full_content = _merge_into_file(claude_md_path, section_content)
             result.add(claude_md_path, full_content)
             if not dry_run:
                 claude_md_path.parent.mkdir(parents=True, exist_ok=True)
@@ -128,8 +192,7 @@ class ClaudeCodeWriter(ContextWriter):
         if memory_recs:
             memory_path = self._resolve_memory_path(project)
-            section_content = _build_section(memory_recs)
-            full_content = _merge_into_file(memory_path, section_content)
             result.add(memory_path, full_content)
             if not dry_run:
                 memory_path.parent.mkdir(parents=True, exist_ok=True)
@@ -174,8 +237,7 @@ class CodexWriter(ContextWriter):
         if context_recs:
             agents_md = project.context_file or (project.project_path / "AGENTS.md")
-            section_content = _build_section(context_recs)
-            full_content = _merge_into_file(agents_md, section_content)
             result.add(agents_md, full_content)
             if not dry_run:
                 agents_md.parent.mkdir(parents=True, exist_ok=True)
@@ -183,8 +245,7 @@ class CodexWriter(ContextWriter):
         if memory_recs:
             instructions_md = project.memory_file or (project.data_path.parent / "instructions.md")
-            section_content = _build_section(memory_recs)
-            full_content = _merge_into_file(instructions_md, section_content)
             result.add(instructions_md, full_content)
             if not dry_run:
                 instructions_md.parent.mkdir(parents=True, exist_ok=True)
@@ -214,8 +275,7 @@ class GeminiWriter(ContextWriter):
             return result
         gemini_md = project.context_file or (project.project_path / "GEMINI.md")
-        section_content = _build_section(recommendations)
-        full_content = _merge_into_file(gemini_md, section_content)
         result.add(gemini_md, full_content)
         if not dry_run:
             gemini_md.parent.mkdir(parents=True, exist_ok=True)

     return "\n".join(lines)
+# Matches the "*~N tokens/session saved*" annotation emitted by _build_section.
+_TOKENS_ANNOTATION_PATTERN = re.compile(r"\*~([\d,]+) tokens/session saved\*\n?")
+def _parse_prior_recommendations(existing: str) -> list[Recommendation]:
+    """Parse recommendations out of a prior marker block.
+    Returns [] if no marker block is present or it contains no sections.
+    The returned Recommendation objects are round-trip compatible with
+    _build_section — target is set to a placeholder since the marker block
+    itself doesn't record it (blocks are always per-file and per-target).
+    """
+    match = _MARKER_PATTERN.search(existing)
+    if not match:
+        return []
+    inner = match.group(0)[len(_MARKER_START) : -len(_MARKER_END)]
+    recs: list[Recommendation] = []
+    for part in re.split(r"\n### ", "\n" + inner)[1:]:
+        heading_line, _, body = part.partition("\n")
+        heading = heading_line.strip()
+        if not heading:
+            continue
+        tokens_saved = 0
+        tokens_match = _TOKENS_ANNOTATION_PATTERN.match(body)
+        if tokens_match:
+            tokens_saved = int(tokens_match.group(1).replace(",", ""))
+            body = body[tokens_match.end() :]
+        recs.append(
+            Recommendation(
+                target=RecommendationTarget.CONTEXT_FILE,
+                section=heading,
+                content=body.rstrip(),
+                estimated_tokens_saved=tokens_saved,
+            )
+        )
+    return recs
+def _merge_recommendations(
+    file_path: Path,
+    new_recommendations: list[Recommendation],
+) -> list[Recommendation]:
+    """Union new recommendations with prior ones whose section is not re-surfaced.
+    Sections produced by the current run take precedence over same-named
+    prior sections — the latest analysis is authoritative. Prior sections
+    whose headings do not reappear in the new run are carried forward so
+    a re-run doesn't silently drop accumulated learnings. To fully rebuild
+    the block, delete it manually and re-run.
+    """
+    if not file_path.exists():
+        return new_recommendations
+    prior = _parse_prior_recommendations(file_path.read_text())
+    if not prior:
+        return new_recommendations
+    new_sections = {r.section for r in new_recommendations}
+    carried = [p for p in prior if p.section not in new_sections]
+    return list(new_recommendations) + carried
+def _merge_into_file(file_path: Path, new_recommendations: list[Recommendation]) -> str:
+    """Merge new recommendations with any existing marker block and rebuild the file."""
+    merged = _merge_recommendations(file_path, new_recommendations)
+    section = _build_section(merged)
     if file_path.exists():
         existing = file_path.read_text()
         if _MARKER_START in existing:
         if context_recs:
             claude_md_path = self._resolve_context_path(project)
+            full_content = _merge_into_file(claude_md_path, context_recs)
             result.add(claude_md_path, full_content)
             if not dry_run:
                 claude_md_path.parent.mkdir(parents=True, exist_ok=True)
         if memory_recs:
             memory_path = self._resolve_memory_path(project)
+            full_content = _merge_into_file(memory_path, memory_recs)
             result.add(memory_path, full_content)
             if not dry_run:
                 memory_path.parent.mkdir(parents=True, exist_ok=True)
         if context_recs:
             agents_md = project.context_file or (project.project_path / "AGENTS.md")
+            full_content = _merge_into_file(agents_md, context_recs)
             result.add(agents_md, full_content)
             if not dry_run:
                 agents_md.parent.mkdir(parents=True, exist_ok=True)
         if memory_recs:
             instructions_md = project.memory_file or (project.data_path.parent / "instructions.md")
+            full_content = _merge_into_file(instructions_md, memory_recs)
             result.add(instructions_md, full_content)
             if not dry_run:
                 instructions_md.parent.mkdir(parents=True, exist_ok=True)
             return result
         gemini_md = project.context_file or (project.project_path / "GEMINI.md")
+        full_content = _merge_into_file(gemini_md, recommendations)
         result.add(gemini_md, full_content)
         if not dry_run:
             gemini_md.parent.mkdir(parents=True, exist_ok=True)

headroom/proxy/handlers/openai.py CHANGED Viewed

The diff for this file is too large to render. See raw diff

headroom/proxy/handlers/streaming.py CHANGED Viewed

@@ -451,6 +451,7 @@ class StreamingMixin:
         optimization_latency: float,
         stream_state: dict[str, Any],
         start_time: float,
         pipeline_timing: dict[str, float] | None = None,
         prefix_tracker: Any | None = None,
         original_messages: list[dict] | None = None,
@@ -547,6 +548,38 @@ class StreamingMixin:
                 uncached_input_tokens=uncached_input_tokens,
             )
     async def _stream_response(
         self,
         url: str,
@@ -702,6 +735,7 @@ class StreamingMixin:
                 optimization_latency=optimization_latency,
                 stream_state=stream_state,
                 start_time=start_time,
                 pipeline_timing=pipeline_timing,
                 prefix_tracker=prefix_tracker,
                 original_messages=original_messages,
@@ -875,6 +909,7 @@ class StreamingMixin:
                     optimization_latency=optimization_latency,
                     stream_state=stream_state,
                     start_time=start_time,
                     pipeline_timing=pipeline_timing,
                     prefix_tracker=prefix_tracker,
                     original_messages=original_messages,

         optimization_latency: float,
         stream_state: dict[str, Any],
         start_time: float,
+        tags: dict[str, str] | None = None,
         pipeline_timing: dict[str, float] | None = None,
         prefix_tracker: Any | None = None,
         original_messages: list[dict] | None = None,
                 uncached_input_tokens=uncached_input_tokens,
             )
+        # Log the request to the in-memory request logger so it shows up in
+        # /stats `recent_requests` and `/transformations/feed`. Without this
+        # the streaming Anthropic path (which is what Claude Code uses) is
+        # invisible to both surfaces — only Bedrock streaming and the
+        # non-streaming Anthropic path were logged previously.
+        if getattr(self, "logger", None) is not None:
+            from headroom.proxy.models import RequestLog
+            self.logger.log(
+                RequestLog(
+                    request_id=request_id,
+                    timestamp=datetime.now().isoformat(),
+                    provider=provider,
+                    model=model,
+                    input_tokens_original=original_tokens,
+                    input_tokens_optimized=optimized_tokens,
+                    output_tokens=output_tokens,
+                    tokens_saved=tokens_saved,
+                    savings_percent=(tokens_saved / original_tokens * 100)
+                    if original_tokens > 0
+                    else 0,
+                    optimization_latency_ms=optimization_latency,
+                    total_latency_ms=total_latency,
+                    tags=tags or {},
+                    cache_hit=False,
+                    transforms_applied=transforms_applied,
+                    request_messages=body.get("messages")
+                    if getattr(self.config, "log_full_messages", False)
+                    else None,
+                )
+            )
     async def _stream_response(
         self,
         url: str,
                 optimization_latency=optimization_latency,
                 stream_state=stream_state,
                 start_time=start_time,
+                tags=tags,
                 pipeline_timing=pipeline_timing,
                 prefix_tracker=prefix_tracker,
                 original_messages=original_messages,
                     optimization_latency=optimization_latency,
                     stream_state=stream_state,
                     start_time=start_time,
+                    tags=tags,
                     pipeline_timing=pipeline_timing,
                     prefix_tracker=prefix_tracker,
                     original_messages=original_messages,

headroom/proxy/helpers.py CHANGED Viewed

@@ -11,8 +11,10 @@ from __future__ import annotations
 import json
 import logging
 import random
 from pathlib import Path
-from typing import TYPE_CHECKING, Any
 from headroom import paths as _paths
@@ -21,6 +23,14 @@ if TYPE_CHECKING:
 logger = logging.getLogger("headroom.proxy")
 # Maximum request body size (100MB - increased to support image-heavy requests)
 MAX_REQUEST_BODY_SIZE = 100 * 1024 * 1024
@@ -114,11 +124,18 @@ def _get_rtk_stats() -> dict[str, Any] | None:
     """Get rtk (Rust Token Killer) savings stats if rtk is installed.
     Reads from rtk's tracking database via `rtk gain --format json`.
-    Returns None if rtk is not installed.
     """
     import shutil
     import subprocess as _sp
     rtk_bin = shutil.which("rtk")
     if not rtk_bin:
         # Check headroom-managed install. Preserve the historical Unix-name
@@ -128,7 +145,16 @@ def _get_rtk_stats() -> dict[str, Any] | None:
         if rtk_managed.exists():
             rtk_bin = str(rtk_managed)
         else:
-            return None
     try:
         result = _sp.run(
@@ -140,21 +166,36 @@ def _get_rtk_stats() -> dict[str, Any] | None:
         if result.returncode == 0 and result.stdout.strip():
             data = json.loads(result.stdout)
             summary = data.get("summary", {})
-            return {
                 "installed": True,
                 "total_commands": summary.get("total_commands", 0),
                 "tokens_saved": summary.get("total_saved", 0),
                 "avg_savings_pct": summary.get("avg_savings_pct", 0.0),
             }
     except Exception:
-        pass
-    return {
-        "installed": True,
-        "total_commands": 0,
-        "tokens_saved": 0,
-        "avg_savings_pct": 0.0,
-    }
 def is_anthropic_auth(headers: dict[str, str]) -> bool:

 import json
 import logging
 import random
+import threading
+import time
 from pathlib import Path
+from typing import TYPE_CHECKING, Any, cast
 from headroom import paths as _paths
 logger = logging.getLogger("headroom.proxy")
+RTK_STATS_CACHE_TTL_SECONDS = 5.0
+_rtk_stats_cache_lock = threading.Lock()
+_rtk_stats_cache: dict[str, Any] = {
+    "expires_at": 0.0,
+    "has_value": False,
+    "value": None,
+}
 # Maximum request body size (100MB - increased to support image-heavy requests)
 MAX_REQUEST_BODY_SIZE = 100 * 1024 * 1024
     """Get rtk (Rust Token Killer) savings stats if rtk is installed.
     Reads from rtk's tracking database via `rtk gain --format json`.
+    Results are memoized briefly so dashboard polling does not spawn a new
+    subprocess on every refresh.
     """
     import shutil
     import subprocess as _sp
+    now = time.monotonic()
+    with _rtk_stats_cache_lock:
+        if _rtk_stats_cache["has_value"] and now < float(_rtk_stats_cache["expires_at"]):
+            return cast(dict[str, Any] | None, _rtk_stats_cache["value"])
+    payload: dict[str, Any] | None
     rtk_bin = shutil.which("rtk")
     if not rtk_bin:
         # Check headroom-managed install. Preserve the historical Unix-name
         if rtk_managed.exists():
             rtk_bin = str(rtk_managed)
         else:
+            payload = None
+            with _rtk_stats_cache_lock:
+                _rtk_stats_cache.update(
+                    {
+                        "expires_at": time.monotonic() + RTK_STATS_CACHE_TTL_SECONDS,
+                        "has_value": True,
+                        "value": payload,
+                    }
+                )
+            return payload
     try:
         result = _sp.run(
         if result.returncode == 0 and result.stdout.strip():
             data = json.loads(result.stdout)
             summary = data.get("summary", {})
+            payload = {
                 "installed": True,
                 "total_commands": summary.get("total_commands", 0),
                 "tokens_saved": summary.get("total_saved", 0),
                 "avg_savings_pct": summary.get("avg_savings_pct", 0.0),
             }
+        else:
+            payload = {
+                "installed": True,
+                "total_commands": 0,
+                "tokens_saved": 0,
+                "avg_savings_pct": 0.0,
+            }
     except Exception:
+        payload = {
+            "installed": True,
+            "total_commands": 0,
+            "tokens_saved": 0,
+            "avg_savings_pct": 0.0,
+        }
+    with _rtk_stats_cache_lock:
+        _rtk_stats_cache.update(
+            {
+                "expires_at": time.monotonic() + RTK_STATS_CACHE_TTL_SECONDS,
+                "has_value": True,
+                "value": payload,
+            }
+        )
+    return payload
 def is_anthropic_auth(headers: dict[str, str]) -> bool:

headroom/proxy/server.py CHANGED Viewed

@@ -32,7 +32,7 @@ import sys
 import time
 from datetime import datetime, timezone
 from pathlib import Path
-from typing import TYPE_CHECKING, Any, Literal
 if TYPE_CHECKING:
     from ..backends.base import Backend
@@ -1403,9 +1403,12 @@ def create_app(config: ProxyConfig | None = None) -> FastAPI:
         """Serve the Headroom dashboard UI."""
         return get_dashboard_html()
-    @app.get("/stats")
-    async def stats():
-        """Get comprehensive proxy statistics.
         This is the main stats endpoint - it aggregates data from all subsystems:
         - Request metrics (total, cached, failed, by model/provider)
@@ -1634,6 +1637,44 @@ def create_app(config: ProxyConfig | None = None) -> FastAPI:
             **get_quota_registry().get_all_stats(),
         }
     @app.get("/stats-history")
     async def stats_history(
         format: Literal["json", "csv"] = "json",

 import time
 from datetime import datetime, timezone
 from pathlib import Path
+from typing import TYPE_CHECKING, Any, Literal, cast
 if TYPE_CHECKING:
     from ..backends.base import Backend
         """Serve the Headroom dashboard UI."""
         return get_dashboard_html()
+    DASHBOARD_STATS_CACHE_TTL_SECONDS = 5.0
+    _stats_snapshot_lock = asyncio.Lock()
+    _stats_snapshot: dict[str, Any] = {"expires_at": 0.0, "value": None}
+    async def _build_stats_payload() -> dict[str, Any]:
+        """Build the full `/stats` response payload.
         This is the main stats endpoint - it aggregates data from all subsystems:
         - Request metrics (total, cached, failed, by model/provider)
             **get_quota_registry().get_all_stats(),
         }
+    async def _get_cached_stats_payload() -> dict[str, Any]:
+        """Return a short-TTL cached `/stats` snapshot for dashboard polling."""
+        now = time.monotonic()
+        cached_payload = cast(dict[str, Any] | None, _stats_snapshot.get("value"))
+        if cached_payload is not None and now < float(_stats_snapshot["expires_at"]):
+            return cached_payload
+        async with _stats_snapshot_lock:
+            now = time.monotonic()
+            cached_payload = cast(dict[str, Any] | None, _stats_snapshot.get("value"))
+            if cached_payload is not None and now < float(_stats_snapshot["expires_at"]):
+                return cached_payload
+            payload = await _build_stats_payload()
+            _stats_snapshot["value"] = payload
+            _stats_snapshot["expires_at"] = time.monotonic() + DASHBOARD_STATS_CACHE_TTL_SECONDS
+            return payload
+    @app.get("/stats")
+    async def stats(cached: bool = False):
+        """Get comprehensive proxy statistics.
+        This is the main stats endpoint - it aggregates data from all subsystems:
+        - Request metrics (total, cached, failed, by model/provider)
+        - Token usage and savings
+        - Cost tracking
+        - Canonical persisted display_session metrics for downstream dashboards
+        - Compression (CCR) statistics
+        - Telemetry/TOIN (data flywheel) statistics
+        - Cache and rate limiter stats
+        Use ``?cached=1`` for the dashboard fast path. That returns a short-TTL
+        snapshot to avoid rebuilding the full payload on every UI poll.
+        """
+        if cached:
+            return await _get_cached_stats_payload()
+        return await _build_stats_payload()
     @app.get("/stats-history")
     async def stats_history(
         format: Literal["json", "csv"] = "json",

plugins/headroom-agent-hooks/.claude-plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "headroom",
-  "version": "0.10.0",
   "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
   "author": {
     "name": "Headroom Contributors",

 {
   "name": "headroom",
+  "version": "0.11.0",
   "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
   "author": {
     "name": "Headroom Contributors",

plugins/headroom-agent-hooks/.github/plugin/plugin.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "headroom",
-  "version": "0.10.0",
   "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
   "author": {
     "name": "Headroom Contributors",

 {
   "name": "headroom",
+  "version": "0.11.0",
   "description": "Headroom startup hooks for Claude Code and GitHub Copilot CLI.",
   "author": {
     "name": "Headroom Contributors",

tests/test_cli/test_wrap_copilot.py CHANGED Viewed

@@ -1,212 +1,335 @@
-"""Tests for `headroom wrap copilot` command."""
-from __future__ import annotations
-import sys
-import types
-from pathlib import Path
-from unittest.mock import patch
-import pytest
-from click.testing import CliRunner
-if "fastapi" not in sys.modules:
-    fastapi_mod = types.ModuleType("fastapi")
-    fastapi_mod.FastAPI = type("FastAPI", (), {})
-    fastapi_mod.Request = type("Request", (), {})
-    fastapi_mod.WebSocket = type("WebSocket", (), {})
-    sys.modules["fastapi"] = fastapi_mod
-if "fastapi.responses" not in sys.modules:
-    responses_mod = types.ModuleType("fastapi.responses")
-    responses_mod.Response = type("Response", (), {})
-    sys.modules["fastapi.responses"] = responses_mod
-from headroom.cli import wrap as wrap_cli
-from headroom.cli.main import main
-@pytest.fixture
-def runner() -> CliRunner:
-    return CliRunner()
-def test_wrap_copilot_auto_anthropic_injects_instructions(
-    runner: CliRunner, tmp_path: Path, monkeypatch: pytest.MonkeyPatch
-) -> None:
-    monkeypatch.chdir(tmp_path)
-    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
-    captured: dict[str, object] = {}
-    def fake_launch_tool(**kwargs):  # noqa: ANN003
-        captured.update(kwargs)
-    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
-        with patch("headroom.cli.wrap._ensure_rtk_binary", return_value=Path("/tmp/rtk")):
-            with patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool):
-                result = runner.invoke(
-                    main,
-                    ["wrap", "copilot", "--", "--model", "claude-sonnet-4-20250514"],
-                )
-    assert result.exit_code == 0, result.output
-    instructions = tmp_path / ".github" / "copilot-instructions.md"
-    assert instructions.exists()
-    content = instructions.read_text()
-    assert wrap_cli._RTK_MARKER in content
-    assert "RTK (Rust Token Killer)" in content
-    env = captured["env"]
-    assert isinstance(env, dict)
-    assert env["COPILOT_PROVIDER_TYPE"] == "anthropic"
-    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787"
-    assert "COPILOT_PROVIDER_WIRE_API" not in env
-    assert captured["agent_type"] == "copilot"
-    assert captured["tool_label"] == "COPILOT"
-    assert captured["args"] == ("--model", "claude-sonnet-4-20250514")
-def test_wrap_copilot_openai_backend_sets_completions_env(
-    runner: CliRunner, monkeypatch: pytest.MonkeyPatch
-) -> None:
-    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-dummy")
-    captured: dict[str, object] = {}
-    def fake_launch_tool(**kwargs):  # noqa: ANN003
-        captured.update(kwargs)
-    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
-        with patch("headroom.cli.wrap._check_proxy", return_value=False):
-            with patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool):
-                result = runner.invoke(
-                    main,
-                    [
-                        "wrap",
-                        "copilot",
-                        "--no-rtk",
-                        "--backend",
-                        "anyllm",
-                        "--anyllm-provider",
-                        "groq",
-                        "--region",
-                        "us-central1",
-                        "--",
-                        "--model",
-                        "gpt-4o",
-                    ],
-                )
-    assert result.exit_code == 0, result.output
-    env = captured["env"]
-    assert isinstance(env, dict)
-    assert env["COPILOT_PROVIDER_TYPE"] == "openai"
-    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787/v1"
-    assert env["COPILOT_PROVIDER_WIRE_API"] == "completions"
-    assert captured["backend"] == "anyllm"
-    assert captured["anyllm_provider"] == "groq"
-    assert captured["region"] == "us-central1"
-    assert captured["args"] == ("--model", "gpt-4o")
-def test_wrap_copilot_auto_detects_running_proxy_backend(
-    runner: CliRunner, monkeypatch: pytest.MonkeyPatch
-) -> None:
-    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-dummy")
-    captured: dict[str, object] = {}
-    def fake_launch_tool(**kwargs):  # noqa: ANN003
-        captured.update(kwargs)
-    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
-        with patch("headroom.cli.wrap._check_proxy", return_value=True):
-            with patch("headroom.cli.wrap._detect_running_proxy_backend", return_value="anyllm"):
-                with patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool):
-                    result = runner.invoke(
-                        main,
-                        ["wrap", "copilot", "--no-rtk", "--", "--model", "gpt-4o"],
-                    )
-    assert result.exit_code == 0, result.output
-    env = captured["env"]
-    assert isinstance(env, dict)
-    assert env["COPILOT_PROVIDER_TYPE"] == "openai"
-    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787/v1"
-    assert env["COPILOT_PROVIDER_WIRE_API"] == "completions"
-def test_wrap_copilot_rejects_wire_api_for_anthropic_provider(runner: CliRunner) -> None:
-    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
-        result = runner.invoke(
-            main,
-            [
-                "wrap",
-                "copilot",
-                "--wire-api",
-                "responses",
-                "--",
-                "--model",
-                "claude-sonnet-4-20250514",
-            ],
-        )
-    assert result.exit_code != 0
-    assert "--wire-api is only valid" in result.output
-def test_wrap_copilot_rejects_responses_for_translated_backends(runner: CliRunner) -> None:
-    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
-        with patch("headroom.cli.wrap._check_proxy", return_value=False):
-            result = runner.invoke(
-                main,
-                [
-                    "wrap",
-                    "copilot",
-                    "--backend",
-                    "anyllm",
-                    "--wire-api",
-                    "responses",
-                    "--",
-                    "--model",
-                    "gpt-4o",
-                ],
-            )
-    assert result.exit_code != 0
-    assert "not supported with translated backends" in result.output
-def test_wrap_copilot_clears_stale_wire_api_in_anthropic_mode(
-    runner: CliRunner, monkeypatch: pytest.MonkeyPatch
-) -> None:
-    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
-    captured: dict[str, object] = {}
-    def fake_launch_tool(**kwargs):  # noqa: ANN003
-        captured.update(kwargs)
-    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
-        with patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool):
-            result = runner.invoke(
-                main,
-                ["wrap", "copilot", "--no-rtk", "--", "--model", "claude-sonnet-4-20250514"],
-                env={
-                    "COPILOT_PROVIDER_WIRE_API": "responses",
-                    "ANTHROPIC_API_KEY": "sk-test-dummy",
-                },
-            )
-    assert result.exit_code == 0, result.output
-    env = captured["env"]
-    assert isinstance(env, dict)
-    assert env["COPILOT_PROVIDER_TYPE"] == "anthropic"
-    assert "COPILOT_PROVIDER_WIRE_API" not in env
-def test_wrap_copilot_fails_when_binary_missing(runner: CliRunner) -> None:
-    with patch("headroom.cli.wrap.shutil.which", return_value=None):
-        result = runner.invoke(main, ["wrap", "copilot", "--", "--model", "gpt-4o"])
-    assert result.exit_code == 1
-    assert "'copilot' not found in PATH" in result.output
-    assert "Install GitHub Copilot CLI" in result.output

+"""Tests for `headroom wrap copilot` command."""
+from __future__ import annotations
+import importlib
+import sys
+import types
+from pathlib import Path
+from unittest.mock import patch
+import click
+import pytest
+from click.testing import CliRunner
+from headroom.copilot_auth import DEFAULT_API_URL
+@pytest.fixture
+def runner() -> CliRunner:
+    return CliRunner()
+@pytest.fixture
+def wrap_modules(monkeypatch: pytest.MonkeyPatch) -> tuple[types.ModuleType, click.Group]:
+    headroom_pkg = sys.modules.get("headroom")
+    saved_headroom_cli_attr = (
+        headroom_pkg.cli if headroom_pkg is not None and hasattr(headroom_pkg, "cli") else None
+    )
+    saved_modules = {
+        name: sys.modules.get(name)
+        for name in ("headroom.cli", "headroom.cli.main", "headroom.cli.wrap")
+    }
+    fake_main_module = types.ModuleType("headroom.cli.main")
+    fake_main_module.main = click.Group()
+    sys.modules["headroom.cli.main"] = fake_main_module
+    sys.modules.pop("headroom.cli", None)
+    sys.modules.pop("headroom.cli.wrap", None)
+    wrap_cli = importlib.import_module("headroom.cli.wrap")
+    monkeypatch.setattr(wrap_cli, "_check_proxy", lambda _port: False)
+    try:
+        yield wrap_cli, fake_main_module.main
+    finally:
+        for name in ("headroom.cli.wrap", "headroom.cli.main", "headroom.cli"):
+            sys.modules.pop(name, None)
+        for name, module in saved_modules.items():
+            if module is not None:
+                sys.modules[name] = module
+        if saved_modules["headroom.cli"] is not None:
+            cli_pkg = saved_modules["headroom.cli"]
+            if saved_modules["headroom.cli.main"] is not None:
+                cli_pkg.main = saved_modules["headroom.cli.main"]
+            if saved_modules["headroom.cli.wrap"] is not None:
+                cli_pkg.wrap = saved_modules["headroom.cli.wrap"]
+        if headroom_pkg is not None:
+            if saved_headroom_cli_attr is None:
+                if hasattr(headroom_pkg, "cli"):
+                    delattr(headroom_pkg, "cli")
+            else:
+                headroom_pkg.cli = saved_headroom_cli_attr
+def test_wrap_copilot_auto_anthropic_injects_instructions(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+    tmp_path: Path,
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    wrap_cli, main = wrap_modules
+    monkeypatch.chdir(tmp_path)
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
+    captured: dict[str, object] = {}
+    def fake_launch_tool(**kwargs):  # noqa: ANN003
+        captured.update(kwargs)
+    with (
+        patch("headroom.cli.wrap.shutil.which", return_value="copilot"),
+        patch("headroom.cli.wrap.has_oauth_auth", return_value=False),
+        patch("headroom.cli.wrap._ensure_rtk_binary", return_value=Path("/tmp/rtk")),
+        patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool),
+    ):
+        result = runner.invoke(
+            main,
+            ["wrap", "copilot", "--", "--model", "claude-sonnet-4-20250514"],
+        )
+    assert result.exit_code == 0, result.output
+    instructions = tmp_path / ".github" / "copilot-instructions.md"
+    assert instructions.exists()
+    content = instructions.read_text()
+    assert wrap_cli._RTK_MARKER in content
+    assert "RTK (Rust Token Killer)" in content
+    env = captured["env"]
+    assert isinstance(env, dict)
+    assert env["COPILOT_PROVIDER_TYPE"] == "anthropic"
+    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787"
+    assert "COPILOT_PROVIDER_WIRE_API" not in env
+    assert captured["agent_type"] == "copilot"
+    assert captured["tool_label"] == "COPILOT"
+    assert captured["args"] == ("--model", "claude-sonnet-4-20250514")
+def test_wrap_copilot_openai_backend_sets_completions_env(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _wrap_cli, main = wrap_modules
+    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-dummy")
+    captured: dict[str, object] = {}
+    def fake_launch_tool(**kwargs):  # noqa: ANN003
+        captured.update(kwargs)
+    with (
+        patch("headroom.cli.wrap.shutil.which", return_value="copilot"),
+        patch("headroom.cli.wrap.has_oauth_auth", return_value=False),
+        patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool),
+    ):
+        result = runner.invoke(
+            main,
+            [
+                "wrap",
+                "copilot",
+                "--no-rtk",
+                "--backend",
+                "anyllm",
+                "--anyllm-provider",
+                "groq",
+                "--region",
+                "us-central1",
+                "--",
+                "--model",
+                "gpt-4o",
+            ],
+        )
+    assert result.exit_code == 0, result.output
+    env = captured["env"]
+    assert isinstance(env, dict)
+    assert env["COPILOT_PROVIDER_TYPE"] == "openai"
+    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787/v1"
+    assert env["COPILOT_PROVIDER_WIRE_API"] == "completions"
+    assert captured["backend"] == "anyllm"
+    assert captured["anyllm_provider"] == "groq"
+    assert captured["region"] == "us-central1"
+    assert captured["args"] == ("--model", "gpt-4o")
+def test_wrap_copilot_auto_detects_running_proxy_backend(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _wrap_cli, main = wrap_modules
+    monkeypatch.setenv("OPENAI_API_KEY", "sk-test-dummy")
+    captured: dict[str, object] = {}
+    def fake_launch_tool(**kwargs):  # noqa: ANN003
+        captured.update(kwargs)
+    with (
+        patch("headroom.cli.wrap.shutil.which", return_value="copilot"),
+        patch("headroom.cli.wrap.has_oauth_auth", return_value=False),
+        patch("headroom.cli.wrap._check_proxy", return_value=True),
+        patch("headroom.cli.wrap._detect_running_proxy_backend", return_value="anyllm"),
+        patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool),
+    ):
+        result = runner.invoke(
+            main,
+            ["wrap", "copilot", "--no-rtk", "--", "--model", "gpt-4o"],
+        )
+    assert result.exit_code == 0, result.output
+    env = captured["env"]
+    assert isinstance(env, dict)
+    assert env["COPILOT_PROVIDER_TYPE"] == "openai"
+    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787/v1"
+    assert env["COPILOT_PROVIDER_WIRE_API"] == "completions"
+def test_wrap_copilot_prefers_existing_oauth_session(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _wrap_cli, main = wrap_modules
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
+    captured: dict[str, object] = {}
+    def fake_launch_tool(**kwargs):  # noqa: ANN003
+        captured.update(kwargs)
+    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
+        with patch("headroom.cli.wrap.resolve_client_bearer_token", return_value="gho-existing"):
+            with patch("headroom.cli.wrap.has_oauth_auth", return_value=True):
+                with patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool):
+                    result = runner.invoke(
+                        main,
+                        ["wrap", "copilot", "--no-rtk", "--", "--model", "claude-sonnet-4.6"],
+                    )
+    assert result.exit_code == 0, result.output
+    env = captured["env"]
+    assert isinstance(env, dict)
+    assert env["COPILOT_PROVIDER_TYPE"] == "openai"
+    assert env["COPILOT_PROVIDER_BASE_URL"] == "http://127.0.0.1:8787/v1"
+    assert env["COPILOT_PROVIDER_WIRE_API"] == "completions"
+    assert env["COPILOT_PROVIDER_BEARER_TOKEN"] == "gho-existing"
+    assert "COPILOT_PROVIDER_API_KEY" not in env
+    assert captured["openai_api_url"] == DEFAULT_API_URL
+def test_wrap_copilot_translated_backend_still_requires_byok(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+) -> None:
+    _wrap_cli, main = wrap_modules
+    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
+        with patch("headroom.cli.wrap.has_oauth_auth", return_value=True):
+            result = runner.invoke(
+                main,
+                [
+                    "wrap",
+                    "copilot",
+                    "--no-rtk",
+                    "--backend",
+                    "anyllm",
+                    "--",
+                    "--model",
+                    "gpt-4o",
+                ],
+            )
+    assert result.exit_code == 1
+    assert "Copilot BYOK mode requires a provider API key" in result.output
+def test_wrap_copilot_rejects_wire_api_for_anthropic_provider(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+) -> None:
+    _wrap_cli, main = wrap_modules
+    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
+        result = runner.invoke(
+            main,
+            [
+                "wrap",
+                "copilot",
+                "--wire-api",
+                "responses",
+                "--",
+                "--model",
+                "claude-sonnet-4-20250514",
+            ],
+        )
+    assert result.exit_code != 0
+    assert "--wire-api is only valid" in result.output
+def test_wrap_copilot_rejects_responses_for_translated_backends(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+) -> None:
+    _wrap_cli, main = wrap_modules
+    with patch("headroom.cli.wrap.shutil.which", return_value="copilot"):
+        result = runner.invoke(
+            main,
+            [
+                "wrap",
+                "copilot",
+                "--backend",
+                "anyllm",
+                "--wire-api",
+                "responses",
+                "--",
+                "--model",
+                "gpt-4o",
+            ],
+        )
+    assert result.exit_code != 0
+    assert "not supported with translated backends" in result.output
+def test_wrap_copilot_clears_stale_wire_api_in_anthropic_mode(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    _wrap_cli, main = wrap_modules
+    monkeypatch.setenv("ANTHROPIC_API_KEY", "sk-test-dummy")
+    captured: dict[str, object] = {}
+    def fake_launch_tool(**kwargs):  # noqa: ANN003
+        captured.update(kwargs)
+    with (
+        patch("headroom.cli.wrap.shutil.which", return_value="copilot"),
+        patch("headroom.cli.wrap.has_oauth_auth", return_value=False),
+        patch("headroom.cli.wrap._launch_tool", side_effect=fake_launch_tool),
+    ):
+        result = runner.invoke(
+            main,
+            ["wrap", "copilot", "--no-rtk", "--", "--model", "claude-sonnet-4-20250514"],
+            env={
+                "COPILOT_PROVIDER_WIRE_API": "responses",
+                "ANTHROPIC_API_KEY": "sk-test-dummy",
+            },
+        )
+    assert result.exit_code == 0, result.output
+    env = captured["env"]
+    assert isinstance(env, dict)
+    assert env["COPILOT_PROVIDER_TYPE"] == "anthropic"
+    assert "COPILOT_PROVIDER_WIRE_API" not in env
+def test_wrap_copilot_fails_when_binary_missing(
+    runner: CliRunner,
+    wrap_modules: tuple[types.ModuleType, click.Group],
+) -> None:
+    _wrap_cli, main = wrap_modules
+    with patch("headroom.cli.wrap.shutil.which", return_value=None):
+        result = runner.invoke(main, ["wrap", "copilot", "--", "--model", "gpt-4o"])
+    assert result.exit_code == 1
+    assert "'copilot' not found in PATH" in result.output
+    assert "Install GitHub Copilot CLI" in result.output

tests/test_cli/test_wrap_persistent.py CHANGED Viewed

@@ -2,7 +2,7 @@ from __future__ import annotations
 import click
-from headroom.cli.wrap import _ensure_proxy, _find_persistent_manifest, _recover_persistent_proxy
 class _Manifest:
@@ -15,8 +15,8 @@ class _Manifest:
 def test_ensure_proxy_recovers_matching_persistent_deployment(monkeypatch) -> None:
     calls: list[str] = []
-    monkeypatch.setattr("headroom.cli.wrap._check_proxy", lambda port: False)
-    monkeypatch.setattr("headroom.cli.wrap._find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
     monkeypatch.setattr(
         "headroom.install.supervisors.start_supervisor",
@@ -26,13 +26,14 @@ def test_ensure_proxy_recovers_matching_persistent_deployment(monkeypatch) -> No
         "headroom.install.runtime.wait_ready", lambda manifest, timeout_seconds=45: True
     )
     monkeypatch.setattr(
-        "headroom.cli.wrap._start_proxy",
         lambda *args, **kwargs: (_ for _ in ()).throw(
             AssertionError("ephemeral proxy should not start")
         ),
     )
-    result = _ensure_proxy(8787, False)
     assert result is None
     assert calls == ["start:default"]
@@ -41,8 +42,8 @@ def test_ensure_proxy_recovers_matching_persistent_deployment(monkeypatch) -> No
 def test_ensure_proxy_recovers_persistent_deployment_when_socket_is_bound(monkeypatch) -> None:
     calls: list[str] = []
-    monkeypatch.setattr("headroom.cli.wrap._check_proxy", lambda port: True)
-    monkeypatch.setattr("headroom.cli.wrap._find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
     monkeypatch.setattr(
         "headroom.install.supervisors.start_supervisor",
@@ -52,26 +53,41 @@ def test_ensure_proxy_recovers_persistent_deployment_when_socket_is_bound(monkey
         "headroom.install.runtime.wait_ready", lambda manifest, timeout_seconds=45: True
     )
-    result = _ensure_proxy(8787, False)
     assert result is None
     assert calls == ["start:default"]
 def test_ensure_proxy_rejects_unhealthy_persistent_deployment(monkeypatch) -> None:
-    monkeypatch.setattr("headroom.cli.wrap._check_proxy", lambda port: True)
-    monkeypatch.setattr("headroom.cli.wrap._find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
-    monkeypatch.setattr("headroom.cli.wrap._recover_persistent_proxy", lambda port: False)
     try:
-        _ensure_proxy(8787, False)
     except click.ClickException as exc:
         assert "is not healthy" in str(exc)
     else:
         raise AssertionError("expected unhealthy persistent deployment to raise")
 def test_find_persistent_manifest_prefers_default_profile(monkeypatch) -> None:
     class DefaultManifest:
         profile = "default"
@@ -86,23 +102,23 @@ def test_find_persistent_manifest_prefers_default_profile(monkeypatch) -> None:
         lambda: [OtherManifest(), DefaultManifest()],
     )
-    manifest = _find_persistent_manifest(8787)
     assert manifest.profile == "default"
 def test_recover_persistent_proxy_reuses_healthy_deployment(monkeypatch) -> None:
-    monkeypatch.setattr("headroom.cli.wrap._find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: True)
-    assert _recover_persistent_proxy(8787) is True
 def test_recover_persistent_proxy_warns_for_task_deployment(monkeypatch) -> None:
     class TaskManifest(_Manifest):
         supervisor_kind = "task"
-    monkeypatch.setattr("headroom.cli.wrap._find_persistent_manifest", lambda port: TaskManifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
-    assert _recover_persistent_proxy(8787) is False

 import click
+import headroom.cli.wrap as wrap_cli
 class _Manifest:
 def test_ensure_proxy_recovers_matching_persistent_deployment(monkeypatch) -> None:
     calls: list[str] = []
+    monkeypatch.setattr(wrap_cli, "_check_proxy", lambda port: False)
+    monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
     monkeypatch.setattr(
         "headroom.install.supervisors.start_supervisor",
         "headroom.install.runtime.wait_ready", lambda manifest, timeout_seconds=45: True
     )
     monkeypatch.setattr(
+        wrap_cli,
+        "_start_proxy",
         lambda *args, **kwargs: (_ for _ in ()).throw(
             AssertionError("ephemeral proxy should not start")
         ),
     )
+    result = wrap_cli._ensure_proxy(8787, False)
     assert result is None
     assert calls == ["start:default"]
 def test_ensure_proxy_recovers_persistent_deployment_when_socket_is_bound(monkeypatch) -> None:
     calls: list[str] = []
+    monkeypatch.setattr(wrap_cli, "_check_proxy", lambda port: True)
+    monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
     monkeypatch.setattr(
         "headroom.install.supervisors.start_supervisor",
         "headroom.install.runtime.wait_ready", lambda manifest, timeout_seconds=45: True
     )
+    result = wrap_cli._ensure_proxy(8787, False)
     assert result is None
     assert calls == ["start:default"]
 def test_ensure_proxy_rejects_unhealthy_persistent_deployment(monkeypatch) -> None:
+    monkeypatch.setattr(wrap_cli, "_check_proxy", lambda port: True)
+    monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
+    monkeypatch.setattr(wrap_cli, "_recover_persistent_proxy", lambda port: False)
     try:
+        wrap_cli._ensure_proxy(8787, False)
     except click.ClickException as exc:
         assert "is not healthy" in str(exc)
     else:
         raise AssertionError("expected unhealthy persistent deployment to raise")
+def test_ensure_proxy_falls_back_when_persistent_manifest_is_stale(monkeypatch) -> None:
+    calls: list[str] = []
+    monkeypatch.setattr(wrap_cli, "_check_proxy", lambda port: False)
+    monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: _Manifest())
+    monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
+    monkeypatch.setattr(wrap_cli, "_recover_persistent_proxy", lambda port: False)
+    monkeypatch.setattr(wrap_cli, "_start_proxy", lambda *args, **kwargs: calls.append("start"))
+    result = wrap_cli._ensure_proxy(8787, False)
+    assert result is None
+    assert calls == ["start"]
 def test_find_persistent_manifest_prefers_default_profile(monkeypatch) -> None:
     class DefaultManifest:
         profile = "default"
         lambda: [OtherManifest(), DefaultManifest()],
     )
+    manifest = wrap_cli._find_persistent_manifest(8787)
     assert manifest.profile == "default"
 def test_recover_persistent_proxy_reuses_healthy_deployment(monkeypatch) -> None:
+    monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: _Manifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: True)
+    assert wrap_cli._recover_persistent_proxy(8787) is True
 def test_recover_persistent_proxy_warns_for_task_deployment(monkeypatch) -> None:
     class TaskManifest(_Manifest):
         supervisor_kind = "task"
+    monkeypatch.setattr(wrap_cli, "_find_persistent_manifest", lambda port: TaskManifest())
     monkeypatch.setattr("headroom.install.health.probe_ready", lambda url: False)
+    assert wrap_cli._recover_persistent_proxy(8787) is False

tests/test_copilot_auth.py CHANGED Viewed

@@ -1,8 +1,11 @@
 from __future__ import annotations
 import json
 import time
 from pathlib import Path
 import pytest
@@ -14,6 +17,68 @@ def test_read_cached_oauth_token_prefers_env(monkeypatch: pytest.MonkeyPatch) ->
     assert copilot_auth.read_cached_oauth_token() == "gho-env"
 def test_read_cached_oauth_token_reads_hosts_file(
     monkeypatch: pytest.MonkeyPatch, tmp_path: Path
 ) -> None:
@@ -31,6 +96,8 @@ def test_read_cached_oauth_token_reads_hosts_file(
     )
     monkeypatch.delenv("GITHUB_COPILOT_TOKEN", raising=False)
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN_FILE", str(hosts))
     assert copilot_auth.read_cached_oauth_token() == "gho-file"
@@ -44,10 +111,68 @@ def test_read_cached_oauth_token_skips_expired_entries(
         encoding="utf-8",
     )
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN_FILE", str(hosts))
     assert copilot_auth.read_cached_oauth_token() is None
 def test_resolve_client_bearer_token_prefers_api_token(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.setenv("GITHUB_COPILOT_API_TOKEN", "copilot-api")
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN", "gho-oauth")
@@ -55,16 +180,36 @@ def test_resolve_client_bearer_token_prefers_api_token(monkeypatch: pytest.Monke
     assert copilot_auth.resolve_client_bearer_token() == "copilot-api"
 def test_is_copilot_api_url_matches_expected_hosts() -> None:
     assert copilot_auth.is_copilot_api_url("https://api.githubcopilot.com/v1/chat/completions")
     assert copilot_auth.is_copilot_api_url("wss://api.githubcopilot.com/v1/responses")
     assert not copilot_auth.is_copilot_api_url("https://api.openai.com/v1/chat/completions")
-@pytest.mark.asyncio
-async def test_apply_copilot_api_auth_replaces_authorization(
-    monkeypatch: pytest.MonkeyPatch,
-) -> None:
     async def fake_get_api_token() -> copilot_auth.CopilotAPIToken:
         return copilot_auth.CopilotAPIToken(
             token="copilot-session",
@@ -78,18 +223,48 @@ async def test_apply_copilot_api_auth_replaces_authorization(
         fake_get_api_token,
     )
-    headers = await copilot_auth.apply_copilot_api_auth(
-        {"authorization": "Bearer downstream-token"},
-        url="https://api.githubcopilot.com/v1/chat/completions",
     )
     assert headers["Authorization"] == "Bearer copilot-session"
     assert "authorization" not in headers
-@pytest.mark.asyncio
-async def test_token_provider_exchanges_and_caches(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN", "gho-oauth")
     provider = copilot_auth.CopilotTokenProvider()
     calls = {"count": 0}
@@ -106,9 +281,84 @@ async def test_token_provider_exchanges_and_caches(monkeypatch: pytest.MonkeyPat
     monkeypatch.setattr(provider, "_exchange_token_sync", staticmethod(fake_exchange))
-    first = await provider.get_api_token()
-    second = await provider.get_api_token()
     assert first.token == "copilot-api"
     assert second.token == "copilot-api"
     assert calls["count"] == 1

 from __future__ import annotations
+import asyncio
 import json
 import time
 from pathlib import Path
+from types import SimpleNamespace
+from urllib import error as urllib_error
 import pytest
     assert copilot_auth.read_cached_oauth_token() == "gho-env"
+def test_should_exchange_oauth_token_supports_truthy_values(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    for raw in ("1", "true", "YES", "On"):
+        monkeypatch.setenv("GITHUB_COPILOT_USE_TOKEN_EXCHANGE", raw)
+        assert copilot_auth._should_exchange_oauth_token() is True
+    monkeypatch.setenv("GITHUB_COPILOT_USE_TOKEN_EXCHANGE", "off")
+    assert copilot_auth._should_exchange_oauth_token() is False
+def test_resolve_token_file_paths_prefers_override(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("GITHUB_COPILOT_TOKEN_FILE", "~/custom-token.json")
+    paths = copilot_auth._resolve_token_file_paths()
+    assert paths == [Path("~/custom-token.json").expanduser()]
+def test_resolve_token_file_paths_includes_localappdata_and_config(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: Path
+) -> None:
+    monkeypatch.delenv("GITHUB_COPILOT_TOKEN_FILE", raising=False)
+    monkeypatch.setenv("LOCALAPPDATA", str(tmp_path / "local"))
+    monkeypatch.setattr(copilot_auth.Path, "home", staticmethod(lambda: tmp_path / "home"))
+    paths = copilot_auth._resolve_token_file_paths()
+    assert paths == [
+        tmp_path / "local" / "github-copilot" / "apps.json",
+        tmp_path / "local" / "github-copilot" / "hosts.json",
+        tmp_path / "home" / ".config" / "github-copilot" / "apps.json",
+        tmp_path / "home" / ".config" / "github-copilot" / "hosts.json",
+    ]
+def test_read_cached_oauth_token_falls_back_to_gh_cli(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("GITHUB_COPILOT_GITHUB_TOKEN", raising=False)
+    monkeypatch.delenv("GITHUB_COPILOT_TOKEN", raising=False)
+    monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+    monkeypatch.delenv("COPILOT_GITHUB_TOKEN", raising=False)
+    monkeypatch.setattr(copilot_auth, "_read_windows_copilot_cli_oauth_token", lambda: None)
+    monkeypatch.setattr(copilot_auth, "_read_gh_cli_oauth_token", lambda: "gho-gh-cli")
+    assert copilot_auth.read_cached_oauth_token() == "gho-gh-cli"
+def test_read_cached_oauth_token_prefers_copilot_cli_windows_token(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.delenv("GITHUB_COPILOT_GITHUB_TOKEN", raising=False)
+    monkeypatch.delenv("GITHUB_COPILOT_TOKEN", raising=False)
+    monkeypatch.delenv("GITHUB_TOKEN", raising=False)
+    monkeypatch.delenv("COPILOT_GITHUB_TOKEN", raising=False)
+    monkeypatch.setattr(
+        copilot_auth, "_read_windows_copilot_cli_oauth_token", lambda: "gho-copilot"
+    )
+    monkeypatch.setattr(copilot_auth, "_read_gh_cli_oauth_token", lambda: "gho-gh-cli")
+    assert copilot_auth.read_cached_oauth_token() == "gho-copilot"
 def test_read_cached_oauth_token_reads_hosts_file(
     monkeypatch: pytest.MonkeyPatch, tmp_path: Path
 ) -> None:
     )
     monkeypatch.delenv("GITHUB_COPILOT_TOKEN", raising=False)
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN_FILE", str(hosts))
+    monkeypatch.setattr(copilot_auth, "_read_windows_copilot_cli_oauth_token", lambda: None)
+    monkeypatch.setattr(copilot_auth, "_read_gh_cli_oauth_token", lambda: None)
     assert copilot_auth.read_cached_oauth_token() == "gho-file"
         encoding="utf-8",
     )
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN_FILE", str(hosts))
+    monkeypatch.setattr(copilot_auth, "_read_windows_copilot_cli_oauth_token", lambda: None)
+    monkeypatch.setattr(copilot_auth, "_read_gh_cli_oauth_token", lambda: None)
     assert copilot_auth.read_cached_oauth_token() is None
+def test_read_gh_cli_oauth_token_uses_hostname(monkeypatch: pytest.MonkeyPatch) -> None:
+    calls: list[list[str]] = []
+    class CompletedProcess:
+        def __init__(self) -> None:
+            self.returncode = 0
+            self.stdout = "gho-gh-cli\n"
+    def fake_run(*args: object, **kwargs: object) -> CompletedProcess:
+        calls.append(list(args[0]))
+        assert kwargs["capture_output"] is True
+        assert kwargs["check"] is False
+        return CompletedProcess()
+    monkeypatch.setenv("GITHUB_COPILOT_HOST", "example.ghe.com")
+    monkeypatch.setattr(copilot_auth.subprocess, "run", fake_run)
+    assert copilot_auth._read_gh_cli_oauth_token() == "gho-gh-cli"
+    assert calls == [["gh", "auth", "token", "--hostname", "example.ghe.com"]]
+def test_read_gh_cli_oauth_token_returns_none_when_invocation_fails(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    def fake_run(*args: object, **kwargs: object) -> None:  # noqa: ANN002, ANN003
+        raise OSError("gh missing")
+    monkeypatch.setattr(copilot_auth.subprocess, "run", fake_run)
+    assert copilot_auth._read_gh_cli_oauth_token() is None
+def test_read_gh_cli_oauth_token_returns_none_for_nonzero_exit(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        copilot_auth.subprocess,
+        "run",
+        lambda *args, **kwargs: SimpleNamespace(returncode=1, stdout="ignored"),
+    )
+    assert copilot_auth._read_gh_cli_oauth_token() is None
+def test_read_gh_cli_oauth_token_returns_none_for_blank_stdout(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(
+        copilot_auth.subprocess,
+        "run",
+        lambda *args, **kwargs: SimpleNamespace(returncode=0, stdout=" \n"),
+    )
+    assert copilot_auth._read_gh_cli_oauth_token() is None
 def test_resolve_client_bearer_token_prefers_api_token(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.setenv("GITHUB_COPILOT_API_TOKEN", "copilot-api")
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN", "gho-oauth")
     assert copilot_auth.resolve_client_bearer_token() == "copilot-api"
+def test_has_oauth_auth_false_when_no_tokens(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setattr(copilot_auth, "resolve_client_bearer_token", lambda: None)
+    assert copilot_auth.has_oauth_auth() is False
 def test_is_copilot_api_url_matches_expected_hosts() -> None:
     assert copilot_auth.is_copilot_api_url("https://api.githubcopilot.com/v1/chat/completions")
     assert copilot_auth.is_copilot_api_url("wss://api.githubcopilot.com/v1/responses")
     assert not copilot_auth.is_copilot_api_url("https://api.openai.com/v1/chat/completions")
+def test_build_copilot_upstream_url_strips_v1_only_for_copilot_hosts() -> None:
+    assert (
+        copilot_auth.build_copilot_upstream_url(
+            "https://api.githubcopilot.com",
+            "/v1/chat/completions",
+        )
+        == "https://api.githubcopilot.com/chat/completions"
+    )
+    assert (
+        copilot_auth.build_copilot_upstream_url(
+            "https://api.openai.com",
+            "/v1/chat/completions",
+        )
+        == "https://api.openai.com/v1/chat/completions"
+    )
+def test_apply_copilot_api_auth_replaces_authorization(monkeypatch: pytest.MonkeyPatch) -> None:
     async def fake_get_api_token() -> copilot_auth.CopilotAPIToken:
         return copilot_auth.CopilotAPIToken(
             token="copilot-session",
         fake_get_api_token,
     )
+    headers = asyncio.run(
+        copilot_auth.apply_copilot_api_auth(
+            {"authorization": "Bearer downstream-token"},
+            url="https://api.githubcopilot.com/v1/chat/completions",
+        )
     )
     assert headers["Authorization"] == "Bearer copilot-session"
     assert "authorization" not in headers
+def test_token_provider_reuses_oauth_token_without_exchange(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setenv("GITHUB_COPILOT_TOKEN", "gho-oauth")
+    provider = copilot_auth.CopilotTokenProvider()
+    calls = {"count": 0}
+    def fake_exchange(headers: dict[str, str]) -> dict[str, object]:
+        calls["count"] += 1
+        return {
+            "token": "copilot-api",
+            "expires_at": int(time.time()) + 3600,
+            "refresh_in": 1200,
+            "endpoints": {"api": "https://api.githubcopilot.com"},
+            "sku": "copilot_individual",
+        }
+    monkeypatch.setattr(provider, "_exchange_token_sync", staticmethod(fake_exchange))
+    first = asyncio.run(provider.get_api_token())
+    second = asyncio.run(provider.get_api_token())
+    assert first.token == "gho-oauth"
+    assert second.token == "gho-oauth"
+    assert calls["count"] == 0
+def test_token_provider_can_exchange_when_enabled(monkeypatch: pytest.MonkeyPatch) -> None:
     monkeypatch.setenv("GITHUB_COPILOT_TOKEN", "gho-oauth")
+    monkeypatch.setenv("GITHUB_COPILOT_USE_TOKEN_EXCHANGE", "true")
     provider = copilot_auth.CopilotTokenProvider()
     calls = {"count": 0}
     monkeypatch.setattr(provider, "_exchange_token_sync", staticmethod(fake_exchange))
+    first = asyncio.run(provider.get_api_token())
+    second = asyncio.run(provider.get_api_token())
     assert first.token == "copilot-api"
     assert second.token == "copilot-api"
     assert calls["count"] == 1
+def test_token_provider_prefers_explicit_api_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.setenv("GITHUB_COPILOT_API_TOKEN", "copilot-api")
+    monkeypatch.setenv("GITHUB_COPILOT_API_URL", "https://api.githubcopilot.com")
+    token = asyncio.run(copilot_auth.CopilotTokenProvider().get_api_token())
+    assert token.token == "copilot-api"
+    assert token.api_url == "https://api.githubcopilot.com"
+def test_token_provider_raises_without_oauth_token(monkeypatch: pytest.MonkeyPatch) -> None:
+    monkeypatch.delenv("GITHUB_COPILOT_API_TOKEN", raising=False)
+    monkeypatch.setattr(copilot_auth, "read_cached_oauth_token", lambda: None)
+    with pytest.raises(RuntimeError, match="No GitHub Copilot OAuth token"):
+        asyncio.run(copilot_auth.CopilotTokenProvider().get_api_token())
+def test_exchange_token_raises_when_exchange_returns_empty_token(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    provider = copilot_auth.CopilotTokenProvider()
+    monkeypatch.setattr(
+        provider,
+        "_exchange_token_sync",
+        staticmethod(lambda headers: {"token": "", "expires_at": int(time.time()) + 1}),
+    )
+    with pytest.raises(RuntimeError, match="empty token"):
+        asyncio.run(provider._exchange_token("gho-oauth"))
+def test_exchange_token_sync_raises_for_http_error(monkeypatch: pytest.MonkeyPatch) -> None:
+    class DummyResponse:
+        def read(self) -> bytes:
+            return b'{"message":"Not Found"}'
+        def close(self) -> None:
+            return None
+    def fake_urlopen(request, timeout: float):  # noqa: ANN001, ANN202
+        raise urllib_error.HTTPError(
+            url=request.full_url,
+            code=404,
+            msg="Not Found",
+            hdrs=None,
+            fp=DummyResponse(),
+        )
+    monkeypatch.setattr(copilot_auth.urllib_request, "urlopen", fake_urlopen)
+    with pytest.raises(RuntimeError, match="HTTP 404"):
+        copilot_auth.CopilotTokenProvider._exchange_token_sync({"Authorization": "token test"})
+def test_apply_copilot_api_auth_returns_original_headers_for_non_copilot_url() -> None:
+    headers = asyncio.run(
+        copilot_auth.apply_copilot_api_auth(
+            {"authorization": "Bearer downstream-token"},
+            url="https://api.openai.com/v1/chat/completions",
+        )
+    )
+    assert headers == {"authorization": "Bearer downstream-token"}
+def test_read_windows_copilot_cli_oauth_token_returns_none_without_windll(
+    monkeypatch: pytest.MonkeyPatch,
+) -> None:
+    monkeypatch.setattr(copilot_auth.os, "name", "nt")
+    monkeypatch.delattr(copilot_auth.ctypes, "WinDLL", raising=False)
+    assert copilot_auth._read_windows_copilot_cli_oauth_token() is None

tests/test_learn/test_writer.py CHANGED Viewed

@@ -3,7 +3,12 @@
 from pathlib import Path
 from headroom.learn.models import ProjectInfo, Recommendation, RecommendationTarget
-from headroom.learn.writer import _MARKER_END, _MARKER_START, ClaudeCodeWriter
 def _project(tmp_path: Path) -> ProjectInfo:
@@ -80,26 +85,114 @@ class TestClaudeCodeWriter:
         assert "Existing instructions here" in content
         assert "Use uv" in content
-    def test_replaces_existing_headroom_section(self, tmp_path):
         proj = _project(tmp_path)
         claude_md = proj.project_path / "CLAUDE.md"
-        old_section = (
-            f"# My Project\n\n{_MARKER_START}\n## Old Patterns\nold stuff\n{_MARKER_END}\n"
         )
-        claude_md.write_text(old_section)
         writer = ClaudeCodeWriter()
-        recs = [_rec(RecommendationTarget.CONTEXT_FILE, "Environment", "- New stuff")]
         writer.write(recs, proj, dry_run=False)
         content = claude_md.read_text()
-        assert "old stuff" not in content
-        assert "New stuff" in content
         assert "My Project" in content
-        # Should have exactly one marker pair
         assert content.count(_MARKER_START) == 1
         assert content.count(_MARKER_END) == 1
     def test_appends_to_existing_memory_md(self, tmp_path):
         proj = _project(tmp_path)
         memory_md = proj.data_path / "memory" / "MEMORY.md"
@@ -113,3 +206,36 @@ class TestClaudeCodeWriter:
         assert "Existing Memory" in content
         assert "Some facts" in content
         assert "New pattern" in content

 from pathlib import Path
 from headroom.learn.models import ProjectInfo, Recommendation, RecommendationTarget
+from headroom.learn.writer import (
+    _MARKER_END,
+    _MARKER_START,
+    ClaudeCodeWriter,
+    _parse_prior_recommendations,
+)
 def _project(tmp_path: Path) -> ProjectInfo:
         assert "Existing instructions here" in content
         assert "Use uv" in content
+    def test_carries_forward_prior_sections_not_resurfaced(self, tmp_path):
+        """Re-running learn must not drop prior sections that the new run didn't re-surface."""
         proj = _project(tmp_path)
         claude_md = proj.project_path / "CLAUDE.md"
+        prior_block = (
+            f"# My Project\n\n{_MARKER_START}\n"
+            "## Headroom Learned Patterns\n"
+            "*Auto-generated by `headroom learn` on 2026-01-01 — do not edit manually*\n\n"
+            "### Large Files\n"
+            "*~15,000 tokens/session saved*\n"
+            "- src/App.tsx is huge\n\n"
+            "### Build Commands\n"
+            "- cargo check from src-tauri/\n\n"
+            f"{_MARKER_END}\n"
         )
+        claude_md.write_text(prior_block)
         writer = ClaudeCodeWriter()
+        recs = [_rec(RecommendationTarget.CONTEXT_FILE, "Environment", "- Use uv")]
         writer.write(recs, proj, dry_run=False)
         content = claude_md.read_text()
         assert "My Project" in content
+        # New section present
+        assert "Use uv" in content
+        # Prior sections preserved (neither heading re-surfaced by the new run)
+        assert "### Large Files" in content
+        assert "src/App.tsx is huge" in content
+        assert "### Build Commands" in content
+        assert "cargo check from src-tauri/" in content
+        # Tokens annotation round-tripped
+        assert "*~15,000 tokens/session saved*" in content
+        # Still exactly one marker pair
         assert content.count(_MARKER_START) == 1
         assert content.count(_MARKER_END) == 1
+    def test_new_run_overrides_same_named_prior_section(self, tmp_path):
+        """When a section appears in both prior and new, the new run wins."""
+        proj = _project(tmp_path)
+        claude_md = proj.project_path / "CLAUDE.md"
+        prior_block = (
+            f"{_MARKER_START}\n"
+            "## Headroom Learned Patterns\n"
+            "*Auto-generated by `headroom learn` on 2026-01-01 — do not edit manually*\n\n"
+            "### Environment\n"
+            "- old stale environment note\n\n"
+            f"{_MARKER_END}\n"
+        )
+        claude_md.write_text(prior_block)
+        writer = ClaudeCodeWriter()
+        recs = [_rec(RecommendationTarget.CONTEXT_FILE, "Environment", "- fresh environment note")]
+        writer.write(recs, proj, dry_run=False)
+        content = claude_md.read_text()
+        assert "fresh environment note" in content
+        assert "old stale environment note" not in content
+        # Only one Environment section in the final block
+        assert content.count("### Environment") == 1
+    def test_memory_md_carry_forward(self, tmp_path):
+        """Carry-forward also works for MEMORY.md."""
+        proj = _project(tmp_path)
+        memory_md = proj.data_path / "memory" / "MEMORY.md"
+        memory_md.write_text(
+            f"{_MARKER_START}\n"
+            "## Headroom Learned Patterns\n"
+            "*Auto-generated by `headroom learn` on 2026-01-01 — do not edit manually*\n\n"
+            "### User Workflow Preferences\n"
+            "- User rejects sleep-based polling\n\n"
+            f"{_MARKER_END}\n"
+        )
+        writer = ClaudeCodeWriter()
+        recs = [
+            _rec(RecommendationTarget.MEMORY_FILE, "Related Codebases", "- web app at ~/Code/web")
+        ]
+        writer.write(recs, proj, dry_run=False)
+        content = memory_md.read_text()
+        assert "User rejects sleep-based polling" in content
+        assert "web app at ~/Code/web" in content
+    def test_section_without_tokens_annotation_round_trips(self, tmp_path):
+        """Prior sections emitted without a tokens annotation must still carry forward cleanly."""
+        proj = _project(tmp_path)
+        claude_md = proj.project_path / "CLAUDE.md"
+        claude_md.write_text(
+            f"{_MARKER_START}\n"
+            "## Headroom Learned Patterns\n"
+            "*Auto-generated by `headroom learn` on 2026-01-01 — do not edit manually*\n\n"
+            "### Misc\n"
+            "- one-liner pattern\n\n"
+            f"{_MARKER_END}\n"
+        )
+        writer = ClaudeCodeWriter()
+        recs = [_rec(RecommendationTarget.CONTEXT_FILE, "Other", "- new one")]
+        writer.write(recs, proj, dry_run=False)
+        content = claude_md.read_text()
+        assert "### Misc" in content
+        assert "one-liner pattern" in content
+        # No spurious tokens annotation injected for a prior that didn't have one
+        misc_idx = content.index("### Misc")
+        after_misc = content[misc_idx : misc_idx + 200]
+        assert "tokens/session saved" not in after_misc
     def test_appends_to_existing_memory_md(self, tmp_path):
         proj = _project(tmp_path)
         memory_md = proj.data_path / "memory" / "MEMORY.md"
         assert "Existing Memory" in content
         assert "Some facts" in content
         assert "New pattern" in content
+class TestParsePriorRecommendations:
+    """Direct coverage for _parse_prior_recommendations edge cases."""
+    def test_no_marker_block_returns_empty(self):
+        """A file without any marker block yields no prior recommendations."""
+        assert _parse_prior_recommendations("# Project\n\nJust a regular README.\n") == []
+    def test_empty_marker_block_yields_no_recs(self):
+        """A marker block with nothing between the markers yields no recs."""
+        content = f"prefix\n{_MARKER_START}\n{_MARKER_END}\nsuffix\n"
+        assert _parse_prior_recommendations(content) == []
+    def test_marker_block_with_empty_heading_is_skipped(self):
+        """A stray `### ` (empty heading) inside the block is skipped, not raised."""
+        # Leading `### ` with no heading text, followed by a real section.
+        content = (
+            f"{_MARKER_START}\n"
+            "## Headroom Learned Patterns\n"
+            "### \n"
+            "some orphan content\n"
+            "\n"
+            "### Real Section\n"
+            "- real bullet\n"
+            "\n"
+            f"{_MARKER_END}\n"
+        )
+        recs = _parse_prior_recommendations(content)
+        # Only the real section is parsed; the empty-heading entry is dropped.
+        assert len(recs) == 1
+        assert recs[0].section == "Real Section"
+        assert "real bullet" in recs[0].content

tests/test_proxy_copilot_auth_hooks.py CHANGED Viewed

@@ -1,5 +1,6 @@
 from __future__ import annotations
 import importlib.util
 import sys
 import types
@@ -9,6 +10,27 @@ from types import SimpleNamespace
 import pytest
 ROOT = Path(__file__).resolve().parents[1]
 def _load_handler_module(monkeypatch: pytest.MonkeyPatch, module_name: str, relative_path: str):
@@ -54,8 +76,7 @@ def _load_handler_module(monkeypatch: pytest.MonkeyPatch, module_name: str, rela
     return module
-@pytest.mark.asyncio
-async def test_openai_passthrough_applies_copilot_auth(monkeypatch: pytest.MonkeyPatch) -> None:
     openai_mod = _load_handler_module(
         monkeypatch,
         "tests.headroom_proxy_handlers_openai",
@@ -100,20 +121,21 @@ async def test_openai_passthrough_applies_copilot_auth(monkeypatch: pytest.Monke
     request.body = body
     handler = Dummy()
-    response = await handler.handle_passthrough(
-        request,
-        "https://api.githubcopilot.com",
-        "models",
-        "openai",
     )
-    assert seen["url"] == "https://api.githubcopilot.com/v1/models"
     assert seen["request_kwargs"]["headers"] == {"Authorization": "Bearer upstream-token"}
     assert response.status_code == 200
-@pytest.mark.asyncio
-async def test_streaming_response_applies_copilot_auth(monkeypatch: pytest.MonkeyPatch) -> None:
     streaming_mod = _load_handler_module(
         monkeypatch,
         "tests.headroom_proxy_handlers_streaming",
@@ -155,19 +177,21 @@ async def test_streaming_response_applies_copilot_auth(monkeypatch: pytest.Monke
             return SimpleNamespace(headers={}, status_code=200)
     handler = Dummy()
-    response = await handler._stream_response(
-        url="https://api.githubcopilot.com/v1/responses",
-        headers={"authorization": "Bearer downstream"},
-        body={"model": "gpt-4o"},
-        provider="openai",
-        model="gpt-4o",
-        request_id="req-test",
-        original_tokens=0,
-        optimized_tokens=0,
-        tokens_saved=0,
-        transforms_applied=[],
-        tags={},
-        optimization_latency=0.0,
     )
     assert seen["url"] == "https://api.githubcopilot.com/v1/responses"

 from __future__ import annotations
+import asyncio
 import importlib.util
 import sys
 import types
 import pytest
 ROOT = Path(__file__).resolve().parents[1]
+_ISOLATED_MODULE_NAMES = (
+    "headroom.proxy",
+    "headroom.proxy.handlers",
+    "httpx",
+    "fastapi.responses",
+    "tests.headroom_proxy_handlers_openai",
+    "tests.headroom_proxy_handlers_streaming",
+)
+@pytest.fixture(autouse=True)
+def restore_isolated_modules() -> None:
+    saved_modules = {name: sys.modules.get(name) for name in _ISOLATED_MODULE_NAMES}
+    try:
+        yield
+    finally:
+        for name in _ISOLATED_MODULE_NAMES:
+            sys.modules.pop(name, None)
+        for name, module in saved_modules.items():
+            if module is not None:
+                sys.modules[name] = module
 def _load_handler_module(monkeypatch: pytest.MonkeyPatch, module_name: str, relative_path: str):
     return module
+def test_openai_passthrough_applies_copilot_auth(monkeypatch: pytest.MonkeyPatch) -> None:
     openai_mod = _load_handler_module(
         monkeypatch,
         "tests.headroom_proxy_handlers_openai",
     request.body = body
     handler = Dummy()
+    response = asyncio.run(
+        handler.handle_passthrough(
+            request,
+            "https://api.githubcopilot.com",
+            "models",
+            "openai",
+        )
     )
+    assert seen["url"] == "https://api.githubcopilot.com/models"
     assert seen["request_kwargs"]["headers"] == {"Authorization": "Bearer upstream-token"}
     assert response.status_code == 200
+def test_streaming_response_applies_copilot_auth(monkeypatch: pytest.MonkeyPatch) -> None:
     streaming_mod = _load_handler_module(
         monkeypatch,
         "tests.headroom_proxy_handlers_streaming",
             return SimpleNamespace(headers={}, status_code=200)
     handler = Dummy()
+    response = asyncio.run(
+        handler._stream_response(
+            url="https://api.githubcopilot.com/v1/responses",
+            headers={"authorization": "Bearer downstream"},
+            body={"model": "gpt-4o"},
+            provider="openai",
+            model="gpt-4o",
+            request_id="req-test",
+            original_tokens=0,
+            optimized_tokens=0,
+            tokens_saved=0,
+            transforms_applied=[],
+            tags={},
+            optimization_latency=0.0,
+        )
     )
     assert seen["url"] == "https://api.githubcopilot.com/v1/responses"

tests/test_proxy_dashboard_stats_cache.py ADDED Viewed

	@@ -0,0 +1,144 @@

+from __future__ import annotations
+import json
+import shutil
+import subprocess
+from types import SimpleNamespace
+import pytest
+from headroom.dashboard import get_dashboard_html
+from headroom.proxy import helpers as proxy_helpers
+class _StatsStub:
+    def __init__(self, calls: dict[str, int], key: str, payload: dict):
+        self._calls = calls
+        self._key = key
+        self._payload = payload
+    def get_stats(self) -> dict:
+        self._calls[self._key] += 1
+        return dict(self._payload)
+class _ToinStub:
+    def get_stats(self) -> dict:
+        return {"patterns": 0}
+@pytest.fixture(autouse=True)
+def _reset_rtk_stats_cache() -> None:
+    proxy_helpers._rtk_stats_cache.update({"expires_at": 0.0, "has_value": False, "value": None})
+def test_get_rtk_stats_memoizes_subprocess_calls(monkeypatch: pytest.MonkeyPatch) -> None:
+    now = {"value": 100.0}
+    calls = {"run": 0}
+    def _fake_run(*args, **kwargs):
+        calls["run"] += 1
+        return SimpleNamespace(
+            returncode=0,
+            stdout=json.dumps({"summary": {"total_commands": 7, "total_saved": 1234}}),
+        )
+    monkeypatch.setattr(proxy_helpers.time, "monotonic", lambda: now["value"])
+    monkeypatch.setattr(shutil, "which", lambda name: "/usr/bin/rtk")
+    monkeypatch.setattr(subprocess, "run", _fake_run)
+    first = proxy_helpers._get_rtk_stats()
+    second = proxy_helpers._get_rtk_stats()
+    assert first == second
+    assert first == {
+        "installed": True,
+        "total_commands": 7,
+        "tokens_saved": 1234,
+        "avg_savings_pct": 0.0,
+    }
+    assert calls["run"] == 1
+    now["value"] += proxy_helpers.RTK_STATS_CACHE_TTL_SECONDS + 0.1
+    third = proxy_helpers._get_rtk_stats()
+    assert third == first
+    assert calls["run"] == 2
+def test_stats_cached_query_reuses_short_ttl_snapshot(monkeypatch: pytest.MonkeyPatch) -> None:
+    pytest.importorskip("fastapi")
+    from fastapi.testclient import TestClient
+    import headroom.proxy.server as server
+    from headroom.proxy.server import ProxyConfig, create_app
+    calls = {"store": 0, "telemetry": 0, "feedback": 0, "rtk": 0}
+    now = {"value": 100.0}
+    monkeypatch.setattr(server.time, "monotonic", lambda: now["value"])
+    monkeypatch.setattr(
+        server,
+        "get_compression_store",
+        lambda: _StatsStub(calls, "store", {"entry_count": 1, "max_entries": 100}),
+    )
+    monkeypatch.setattr(
+        server,
+        "get_telemetry_collector",
+        lambda: _StatsStub(calls, "telemetry", {"enabled": True}),
+    )
+    monkeypatch.setattr(
+        server,
+        "get_compression_feedback",
+        lambda: _StatsStub(calls, "feedback", {}),
+    )
+    def _fake_rtk_stats() -> dict[str, int | bool | float]:
+        calls["rtk"] += 1
+        return {
+            "installed": True,
+            "total_commands": 1,
+            "tokens_saved": 5,
+            "avg_savings_pct": 10.0,
+        }
+    monkeypatch.setattr(server, "_get_rtk_stats", _fake_rtk_stats)
+    monkeypatch.setattr(server, "get_toin", lambda: _ToinStub())
+    app = create_app(
+        ProxyConfig(
+            optimize=False,
+            cache_enabled=False,
+            rate_limit_enabled=False,
+            cost_tracking_enabled=False,
+            log_requests=False,
+            ccr_inject_tool=False,
+            ccr_handle_responses=False,
+            ccr_context_tracking=False,
+        )
+    )
+    with TestClient(app) as client:
+        first = client.get("/stats?cached=1")
+        second = client.get("/stats?cached=1")
+        now["value"] += 5.1
+        third = client.get("/stats?cached=1")
+        uncached = client.get("/stats")
+    assert first.status_code == 200
+    assert second.status_code == 200
+    assert third.status_code == 200
+    assert uncached.status_code == 200
+    assert calls == {"store": 3, "telemetry": 3, "feedback": 3, "rtk": 3}
+    assert first.json()["cli_filtering"]["tokens_saved"] == 5
+def test_dashboard_uses_cached_stats_and_lazy_history_feed_polling() -> None:
+    html = get_dashboard_html()
+    assert "fetch('/stats?cached=1')" in html
+    assert "@click=\"setViewMode('history')\"" in html
+    assert '@click="toggleFeed()"' in html
+    assert "this.viewMode === 'history'" in html
+    assert "this.feedOpen" in html

tests/test_proxy_streaming_request_logger.py ADDED Viewed

	@@ -0,0 +1,174 @@

+"""Tests that the Anthropic streaming finalizer logs requests for the feed.
+Without this, the streaming Anthropic path (which is what Claude Code uses)
+silently bypassed the request logger, leaving `/stats.recent_requests` and
+`/transformations/feed` permanently empty even when `--log-messages` was set.
+The non-streaming Anthropic path and the Bedrock streaming path were the
+only ones that called `self.logger.log(...)`.
+"""
+from unittest.mock import AsyncMock, MagicMock
+import httpx
+import pytest
+from headroom.proxy.request_logger import RequestLogger
+from headroom.proxy.server import HeadroomProxy
+def _build_proxy_with_real_logger(*, log_full_messages: bool) -> HeadroomProxy:
+    """Build a HeadroomProxy with mocks for everything except the request logger,
+    so we can assert what actually gets recorded."""
+    proxy = object.__new__(HeadroomProxy)
+    proxy.http_client = MagicMock(spec=httpx.AsyncClient)
+    proxy.metrics = MagicMock()
+    proxy.metrics.record_request = AsyncMock(return_value=None)
+    proxy.cost_tracker = MagicMock()
+    proxy.cost_tracker.record_tokens.return_value = None
+    proxy.memory_manager = None
+    proxy.memory_handler = None
+    proxy._config = MagicMock()
+    proxy._config.log_full_messages = log_full_messages
+    proxy._config.ccr_inject_tool = False
+    proxy.config = proxy._config
+    proxy.logger = RequestLogger(log_file=None, log_full_messages=log_full_messages)
+    return proxy
+def _stream_state(output_tokens: int = 42) -> dict:
+    return {
+        "output_tokens": output_tokens,
+        "total_bytes": 200,
+        "ttfb_ms": 35.0,
+        "input_tokens": 1000,
+        "cache_read_input_tokens": 0,
+        "cache_creation_input_tokens": 0,
+        "cache_creation_ephemeral_5m_input_tokens": 0,
+        "cache_creation_ephemeral_1h_input_tokens": 0,
+        "sse_buffer": "",
+    }
+@pytest.mark.asyncio
+async def test_finalize_stream_response_logs_request_for_feed():
+    proxy = _build_proxy_with_real_logger(log_full_messages=False)
+    await proxy._finalize_stream_response(
+        body={"messages": [{"role": "user", "content": "hi"}]},
+        provider="anthropic",
+        model="claude-sonnet-4-6",
+        request_id="req-stream-1",
+        original_tokens=1000,
+        optimized_tokens=600,
+        tokens_saved=400,
+        transforms_applied=["smart_crusher"],
+        optimization_latency=12.0,
+        stream_state=_stream_state(),
+        start_time=0.0,
+        tags={"stack": "wrap_claude"},
+    )
+    entries = proxy.logger.get_recent(10)
+    assert len(entries) == 1, "streaming finalizer must log exactly one entry per request"
+    entry = entries[0]
+    assert entry["request_id"] == "req-stream-1"
+    assert entry["provider"] == "anthropic"
+    assert entry["model"] == "claude-sonnet-4-6"
+    assert entry["input_tokens_original"] == 1000
+    assert entry["input_tokens_optimized"] == 600
+    assert entry["tokens_saved"] == 400
+    assert entry["savings_percent"] == pytest.approx(40.0)
+    assert entry["transforms_applied"] == ["smart_crusher"]
+    assert entry["tags"] == {"stack": "wrap_claude"}
+    assert entry["cache_hit"] is False
+@pytest.mark.asyncio
+async def test_finalize_stream_response_includes_messages_when_log_full_messages_enabled():
+    proxy = _build_proxy_with_real_logger(log_full_messages=True)
+    body = {"messages": [{"role": "user", "content": "hello"}]}
+    await proxy._finalize_stream_response(
+        body=body,
+        provider="anthropic",
+        model="claude-sonnet-4-6",
+        request_id="req-stream-2",
+        original_tokens=10,
+        optimized_tokens=8,
+        tokens_saved=2,
+        transforms_applied=[],
+        optimization_latency=1.0,
+        stream_state=_stream_state(output_tokens=5),
+        start_time=0.0,
+    )
+    entries = proxy.logger.get_recent_with_messages(10)
+    assert len(entries) == 1
+    assert entries[0]["request_messages"] == body["messages"]
+@pytest.mark.asyncio
+async def test_finalize_stream_response_omits_messages_when_log_full_messages_disabled():
+    proxy = _build_proxy_with_real_logger(log_full_messages=False)
+    await proxy._finalize_stream_response(
+        body={"messages": [{"role": "user", "content": "hello"}]},
+        provider="anthropic",
+        model="claude-sonnet-4-6",
+        request_id="req-stream-3",
+        original_tokens=10,
+        optimized_tokens=8,
+        tokens_saved=2,
+        transforms_applied=[],
+        optimization_latency=1.0,
+        stream_state=_stream_state(output_tokens=5),
+        start_time=0.0,
+    )
+    entries = proxy.logger.get_recent_with_messages(10)
+    assert len(entries) == 1
+    assert entries[0]["request_messages"] is None
+@pytest.mark.asyncio
+async def test_finalize_stream_response_handles_zero_original_tokens():
+    proxy = _build_proxy_with_real_logger(log_full_messages=False)
+    await proxy._finalize_stream_response(
+        body={"messages": []},
+        provider="anthropic",
+        model="claude-sonnet-4-6",
+        request_id="req-stream-4",
+        original_tokens=0,
+        optimized_tokens=0,
+        tokens_saved=0,
+        transforms_applied=[],
+        optimization_latency=0.0,
+        stream_state=_stream_state(output_tokens=0),
+        start_time=0.0,
+    )
+    entries = proxy.logger.get_recent(10)
+    assert len(entries) == 1
+    assert entries[0]["savings_percent"] == 0
+@pytest.mark.asyncio
+async def test_finalize_stream_response_no_op_when_logger_disabled():
+    proxy = _build_proxy_with_real_logger(log_full_messages=False)
+    proxy.logger = None  # `--no-log-requests` would put us here
+    # Should not raise.
+    await proxy._finalize_stream_response(
+        body={"messages": []},
+        provider="anthropic",
+        model="claude-sonnet-4-6",
+        request_id="req-stream-5",
+        original_tokens=10,
+        optimized_tokens=8,
+        tokens_saved=2,
+        transforms_applied=[],
+        optimization_latency=1.0,
+        stream_state=_stream_state(),
+        start_time=0.0,
+    )