from _core.llm import LLMClient
from _core.ui import build_agent_app
from agent import EvalHarness


def run_fn(llm: LLMClient, _user_input: str):
    return EvalHarness(llm=llm).run()


demo = build_agent_app(
    title="Evals & LLM-as-judge",
    description=(
        "How do you know your agent is any good? This harness asks the model a fixed "
        "Q&A set, then uses an LLM judge to score each answer against a reference. "
        "Press Run (the input box is ignored). Bring your own OpenRouter key."
    ),
    input_label="(ignored) press Run to evaluate the built-in dataset",
    input_placeholder="",
    run_fn=run_fn,
)

if __name__ == "__main__":
    demo.launch()