"""
tests/test_transcription.py
=============================
Unit tests for the Whisper transcription service.
"""

import os
import json
import pytest
from unittest.mock import patch, MagicMock

from backend.services.whisper_transcriber import WhisperTranscriber
from backend.utils.helper import seconds_to_timestamp


# ── Fixtures ──────────────────────────────────────────────────────────────────

SAMPLE_WHISPER_OUTPUT = {
    "text": "Hello world. This is a test transcript.",
    "language": "en",
    "segments": [
        {
            "id": 0,
            "start": 0.0,
            "end": 2.5,
            "text": "Hello world.",
            "words": [
                {"word": "Hello", "start": 0.0, "end": 0.5},
                {"word": "world",  "start": 0.6, "end": 1.0},
            ],
        },
        {
            "id": 1,
            "start": 2.6,
            "end": 5.0,
            "text": "This is a test transcript.",
            "words": [],
        },
    ],
}


@pytest.fixture
def transcriber(tmp_path):
    """Return a WhisperTranscriber with output dir set to tmp_path."""
    t = WhisperTranscriber()
    t.output_dir = str(tmp_path)
    return t


# ── Tests ─────────────────────────────────────────────────────────────────────

def test_parse_result_structure(transcriber):
    """_parse_result should return expected keys and types."""
    result = transcriber._parse_result(SAMPLE_WHISPER_OUTPUT)
    assert "text" in result
    assert "segments" in result
    assert "language" in result
    assert "duration" in result
    assert isinstance(result["segments"], list)
    assert result["language"] == "en"


def test_parse_result_timestamps(transcriber):
    """Segments should have HH:MM:SS timestamp strings."""
    result = transcriber._parse_result(SAMPLE_WHISPER_OUTPUT)
    seg = result["segments"][0]
    assert seg["start_ts"] == seconds_to_timestamp(0.0)
    assert seg["end_ts"]   == seconds_to_timestamp(2.5)


def test_parse_result_full_text(transcriber):
    """Full text should concatenate segment texts."""
    result = transcriber._parse_result(SAMPLE_WHISPER_OUTPUT)
    assert "Hello world" in result["text"]
    assert "test transcript" in result["text"]


def test_parse_result_empty_segments(transcriber):
    """Empty segment list should return empty text and 0 duration."""
    empty = {"text": "", "language": "en", "segments": []}
    result = transcriber._parse_result(empty)
    assert result["text"] == ""
    assert result["duration"] == 0
    assert result["segments"] == []


@patch("backend.services.whisper_transcriber.WhisperTranscriber._get_model")
def test_transcribe_saves_json(mock_model, transcriber, tmp_path):
    """transcribe() should save a JSON file in the output directory."""
    # Mock the model
    mock_whisper = MagicMock()
    mock_whisper.transcribe.return_value = SAMPLE_WHISPER_OUTPUT
    mock_model.return_value = mock_whisper

    # Create a dummy audio file
    audio_path = str(tmp_path / "test_audio.wav")
    with open(audio_path, "wb") as f:
        f.write(b"\x00" * 100)

    result = transcriber.transcribe(audio_path, "test_job_001")

    json_path = str(tmp_path / "test_job_001.json")
    assert os.path.exists(json_path), "Transcript JSON was not saved"
    with open(json_path) as f:
        data = json.load(f)
    assert "text" in data
    assert "segments" in data


def test_load_transcript_returns_none_if_missing(transcriber):
    """load_transcript() should return None for unknown job_id."""
    result = transcriber.load_transcript("nonexistent_job_999")
    assert result is None


def test_helper_seconds_to_timestamp():
    """seconds_to_timestamp helper should format correctly."""
    assert seconds_to_timestamp(0)     == "00:00:00"
    assert seconds_to_timestamp(61)    == "00:01:01"
    assert seconds_to_timestamp(3661)  == "01:01:01"
    assert seconds_to_timestamp(86399) == "23:59:59"