"""Tests for the ASR-backed journal pipeline. These tests cover the fast/skip-model path and the metadata path so the rest of the journal pipeline (scoring, recap, persistence) keeps working even when no audio model is available. Run with: CITYQUEST_FAST_TEST=1 python test_asr.py To exercise the real model, run without the skip env var on a GPU box. """ from __future__ import annotations import json import os import sys import tempfile import uuid from pathlib import Path # ── Fast-test guard ─────────────────────────────────────────────────────── if os.environ.get("CITYQUEST_FAST_TEST") or not os.environ.get("CITYQUEST_RUN_ASR"): os.environ.setdefault("CITYQUEST_SKIP_MODEL", "1") passed = 0 failed = 0 errors: list[str] = [] def check(label: str, condition: bool, detail: str = "") -> None: global passed, failed if condition: passed += 1 print(f" ✓ PASS: {label}") else: failed += 1 msg = f"✗ FAIL: {label} — {detail}" if detail else f"✗ FAIL: {label}" errors.append(msg) print(f" {msg}") def main() -> int: global passed, failed print("=" * 80) print("ASR JOURNAL PIPELINE — TESTS") print("=" * 80) # ── T1: ASR module imports & constants ────────────────────────────── print("\n" + "-" * 80) print("T1: ASR MODULE IMPORTS") print("-" * 80) try: from app.services.asr import ( MODEL_ID, SUPPORTED_LANGUAGES, DEFAULT_LANGUAGE, transcribe, transcribe_text, ) check("ASR module imports", True) check("Model id is Cohere Transcribe", "cohere-transcribe" in MODEL_ID, f"Got {MODEL_ID}") check("Supports 14 languages", len(SUPPORTED_LANGUAGES) == 14, f"Got {len(SUPPORTED_LANGUAGES)}") check("Default language is English", DEFAULT_LANGUAGE == "en") except Exception as e: check("ASR module imports", False, str(e)) _summary() return 1 # ── T2: transcribe() returns structured result ────────────────────── print("\n" + "-" * 80) print("T2: transcribe() CONTRACT") print("-" * 80) try: # Use a fake file path — skipped mode should never reach the disk result = transcribe("/nonexistent/audio.wav", language="en") check("transcribe() returns dict", isinstance(result, dict)) for key in ("transcript", "language", "model", "status", "error"): check(f"transcribe() has '{key}'", key in result) check("status is 'ok' | 'skipped' | 'error'", result["status"] in ("ok", "skipped", "error"), f"Got {result['status']}") check("transcript is a string", isinstance(result["transcript"], str)) check("model is reported", bool(result["model"])) except Exception as e: check("transcribe() contract", False, str(e)) # ── T3: Skipped mode is silent and empty ──────────────────────────── print("\n" + "-" * 80) print("T3: SKIP-MODE BEHAVIOR") print("-" * 80) try: os.environ["CITYQUEST_SKIP_MODEL"] = "1" result = transcribe("/tmp/fake.wav", language="en") check("Skip mode returns empty transcript", result["transcript"] == "") check("Skip mode reports status=skipped", result["status"] == "skipped") # Restore to be safe del os.environ["CITYQUEST_SKIP_MODEL"] except Exception as e: check("Skip-mode behavior", False, str(e)) # ── T4: transcribe_journal() returns empty on skip ────────────────── print("\n" + "-" * 80) print("T4: JOURNAL.transcribe_journal() FALLBACK") print("-" * 80) try: os.environ["CITYQUEST_SKIP_MODEL"] = "1" from app.services.journal import transcribe_journal # Create a tiny valid wav so path checks pass with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fh: wav_path = fh.name # Minimal RIFF header — not a real wav, but enough for the # existence check in transcribe_journal() fh.write(b"RIFF\x24\x00\x00\x00WAVEfmt ") try: text = transcribe_journal(wav_path, language="en") check("transcribe_journal returns str", isinstance(text, str)) check("transcribe_journal empty when ASR skipped", text == "", f"Got {text!r}") finally: try: os.unlink(wav_path) except OSError: pass except Exception as e: check("Journal.transcribe_journal fallback", False, str(e)) # ── T5: create_journal_entry() supports ASR metadata ─────────────── print("\n" + "-" * 80) print("T5: JOURNAL ENTRY METADATA") print("-" * 80) try: from app.services.journal import create_journal_entry, save_journal_entry, load_journal_entries asr_meta = { "model": "CohereLabs/cohere-transcribe-03-2026", "language": "en", "status": "ok", "error": None, } entry = create_journal_entry( transcript="We just found the mural near the canal, amazing!", session_id="test-asr", team_id="team-a", task_id="t1", location_note="Canal area", audio_ref="/tmp/clip.wav", asr_metadata=asr_meta, transcript_source="asr", ) check("Entry has transcript_source", entry.get("transcript_source") == "asr") check("Entry has audio_ref", entry.get("audio_ref") == "/tmp/clip.wav") check("Entry has asr metadata", isinstance(entry.get("asr"), dict)) check("ASR metadata has model", entry["asr"]["model"] == asr_meta["model"]) check("ASR metadata has language", entry["asr"]["language"] == "en") check("ASR metadata has status", entry["asr"]["status"] == "ok") # Schema validation (optional — only if jsonschema is installed) try: import jsonschema # type: ignore schema_path = Path("app/schemas/journal_schema.json") schema = json.loads(schema_path.read_text()) jsonschema.validate(instance=entry, schema=schema) check("Entry validates against journal_schema.json", True) except ImportError: check("Entry validates against journal_schema.json (skipped, no jsonschema)", True) # Persistence round-trip save_journal_entry(entry) loaded = load_journal_entries(session_id="test-asr") check("Entry persisted with ASR metadata", len(loaded) >= 1) if loaded: check("Loaded entry has transcript_source", loaded[-1].get("transcript_source") == "asr") check("Loaded entry has asr block", isinstance(loaded[-1].get("asr"), dict)) except Exception as e: check("Journal entry metadata", False, str(e)) # ── T6: app.record_journal() handles voice path ───────────────────── print("\n" + "-" * 80) print("T6: app.record_journal() VOICE PATH (skip-mode)") print("-" * 80) try: os.environ["CITYQUEST_SKIP_MODEL"] = "1" # We can't import the whole app (it boots Gradio), so test the # logic by directly exercising the journal functions. from app.services.journal import transcribe_journal, create_journal_entry from app.services.asr import transcribe as _asr_transcribe with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fh: wav_path = fh.name fh.write(b"RIFF\x24\x00\x00\x00WAVEfmt ") try: asr_result = _asr_transcribe(wav_path, language="en") check("ASR returns skipped status", asr_result["status"] == "skipped") check("ASR transcript is empty", asr_result["transcript"] == "") # When ASR produces no text, journal creation should still # be possible via the typed/manual correction path. entry = create_journal_entry( transcript="Typed correction (ASR was unavailable).", session_id="test-asr-voice", audio_ref=wav_path, asr_metadata={ "model": asr_result["model"], "language": asr_result["language"], "status": asr_result["status"], "error": asr_result["error"], }, transcript_source="typed", ) check("Hybrid entry created with asr metadata", entry.get("asr", {}).get("status") == "skipped") finally: try: os.unlink(wav_path) except OSError: pass except Exception as e: check("Voice path in skip-mode", False, str(e)) _summary() return 0 if failed == 0 else 1 def _summary() -> None: total = passed + failed print("\n" + "=" * 80) if failed == 0: print(f"RESULTS: {passed}/{total} tests passed — ALL CLEAR 🎉") else: print(f"RESULTS: {passed}/{total} tests passed — {failed} FAILED") print("=" * 80) if errors: print("\nFailed tests:") for e in errors: print(f" {e}") if __name__ == "__main__": sys.exit(main())