File size: 9,315 Bytes
81e3ca2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
"""Professor agent planning.

The model plans one short teaching beat at a time. The Gradio orchestrator owns
all state mutation and validates every requested tool before executing it.
"""

from __future__ import annotations

import json
import re
from dataclasses import dataclass, field
from typing import Any, Literal

from openai import OpenAI

from .config import CONFIG

Trigger = Literal["continue", "question"]
ActionName = Literal[
    "goto_slide",
    "next_slide",
    "prev_slide",
    "write_note",
    "write_latex",
    "clear_whiteboard",
]

_PLANNER_SYSTEM = """You are the planning brain of AI Prof, a live professor.
Plan exactly one short teaching beat at a time.

You receive a compact index of the entire lecture, the full reading of the current
slide, recent conversation, and the current whiteboard. Decide what the student
should hear next and whether a slide or whiteboard action would help.

Return JSON only:
{
  "narration": "Natural spoken explanation, usually 1-3 short paragraphs.",
  "actions": [
    {"tool": "goto_slide", "args": {"index": 4}},
    {"tool": "write_note", "args": {"title": "Kernel", "body": "A small matrix of weights."}},
    {"tool": "write_latex", "args": {"expression": "g(x,y)=sum_i sum_j h(i,j)f(x-i,y-j)"}},
    {"tool": "clear_whiteboard", "args": {}}
  ],
  "continue_lecture": true
}

Rules:
- Slide indices are 1-based and must exist in the supplied deck index.
- Use at most one navigation action and at most two whiteboard actions.
- Stay on the current slide unless another indexed slide is clearly more useful.
- For a student question, answer it directly. Navigate only when another slide
  materially improves the answer.
- Use the whiteboard only when a compact note or equation improves understanding.
- Clear it when old material would be confusing.
- Do not call a tool just to demonstrate agency.
- Keep narration conversational and suitable for text-to-speech.
- Use no markdown headings and do not mention tool names."""


@dataclass(frozen=True)
class AgentAction:
    tool: ActionName
    args: dict[str, Any] = field(default_factory=dict)


@dataclass(frozen=True)
class TeachingBeat:
    narration: str
    actions: tuple[AgentAction, ...] = ()
    continue_lecture: bool = True


def _client() -> OpenAI:
    return OpenAI(
        base_url=CONFIG.brain.openai_base_url,
        api_key=CONFIG.brain.api_key,
    )


def _extract_json(text: str) -> dict[str, Any]:
    text = text.strip()
    if text.startswith("```"):
        text = re.sub(r"^```(?:json)?\s*", "", text)
        text = re.sub(r"\s*```$", "", text)
    try:
        value = json.loads(text)
    except json.JSONDecodeError:
        start = text.find("{")
        end = text.rfind("}")
        if start < 0 or end <= start:
            raise
        value = json.loads(text[start : end + 1])
    if not isinstance(value, dict):
        raise ValueError("agent response must be a JSON object")
    return value


def _repair_json(text: str) -> str | None:
    """Ask the brain to repair malformed planner JSON without re-planning."""
    if not CONFIG.brain.is_live:
        return None
    try:
        response = _client().chat.completions.create(
            model=CONFIG.brain.model,
            messages=[
                {
                    "role": "system",
                    "content": (
                        "Repair the supplied malformed JSON. Preserve its intended values, "
                        "return one valid JSON object only, and add no commentary."
                    ),
                },
                {"role": "user", "content": text},
            ],
            temperature=0,
            max_tokens=700,
            response_format={"type": "json_object"},
        )
        return response.choices[0].message.content
    except Exception as exc:
        print(f"[agent] JSON repair error: {exc}")
        return None


def _validate_actions(raw_actions: Any, total_slides: int) -> tuple[AgentAction, ...]:
    if not isinstance(raw_actions, list):
        return ()

    valid: list[AgentAction] = []
    navigation_count = 0
    whiteboard_count = 0
    for raw in raw_actions:
        if not isinstance(raw, dict):
            continue
        tool = raw.get("tool")
        args = raw.get("args") if isinstance(raw.get("args"), dict) else {}

        if tool in {"goto_slide", "next_slide", "prev_slide"}:
            if navigation_count:
                continue
            if tool == "goto_slide":
                index = args.get("index")
                if not isinstance(index, int) or not 1 <= index <= total_slides:
                    continue
                args = {"index": index}
            else:
                args = {}
            navigation_count += 1
        elif tool == "write_note":
            if whiteboard_count >= 2:
                continue
            title = str(args.get("title", "")).strip()[:80]
            body = str(args.get("body", "")).strip()[:500]
            if not title and not body:
                continue
            args = {"title": title, "body": body}
            whiteboard_count += 1
        elif tool == "write_latex":
            if whiteboard_count >= 2:
                continue
            expression = str(args.get("expression", "")).strip()[:500]
            if not expression:
                continue
            args = {"expression": expression}
            whiteboard_count += 1
        elif tool == "clear_whiteboard":
            if whiteboard_count >= 2:
                continue
            args = {}
            whiteboard_count += 1
        else:
            continue
        valid.append(AgentAction(tool=tool, args=args))
    return tuple(valid)


def _fallback_beat(
    *,
    trigger: Trigger,
    current_slide: int,
    total_slides: int,
    current_reading: str,
    question: str | None,
) -> TeachingBeat:
    title = next(
        (
            line.split(":", 1)[1].strip()
            for line in current_reading.splitlines()
            if line.upper().startswith("TITLE:")
        ),
        f"slide {current_slide}",
    )
    if trigger == "question":
        narration = (
            f"Let’s connect that question to {title}. {question or ''} "
            "The important idea is how the details on this slide support the concept."
        )
        return TeachingBeat(narration=narration.strip(), continue_lecture=False)
    return TeachingBeat(
        narration=f"Let’s work through {title} and focus on the main idea.",
        continue_lecture=current_slide < total_slides,
    )


def plan_teaching_beat(
    *,
    trigger: Trigger,
    deck_index: str,
    current_slide: int,
    total_slides: int,
    current_reading: str,
    whiteboard_state: list[dict[str, str]] | None = None,
    history: list[dict] | None = None,
    question: str | None = None,
) -> TeachingBeat:
    """Return one validated teaching beat from the professor agent."""
    if not CONFIG.brain.is_live:
        return _fallback_beat(
            trigger=trigger,
            current_slide=current_slide,
            total_slides=total_slides,
            current_reading=current_reading,
            question=question,
        )

    messages: list[dict[str, str]] = [
        {"role": "system", "content": _PLANNER_SYSTEM}
    ]
    for turn in (history or [])[-6:]:
        role = turn.get("role")
        content = turn.get("content")
        if role in {"user", "assistant"} and isinstance(content, str) and content:
            messages.append({"role": role, "content": content})
    messages.append(
        {
            "role": "user",
            "content": (
                f"Trigger: {trigger}\n"
                f"Student question: {question or '(none)'}\n"
                f"Current slide: {current_slide} of {total_slides}\n\n"
                f"Complete deck index:\n{deck_index}\n\n"
                f"Current slide reading:\n{current_reading}\n\n"
                "Current whiteboard JSON:\n"
                f"{json.dumps(whiteboard_state or [], ensure_ascii=True)}"
            ),
        }
    )

    try:
        response = _client().chat.completions.create(
            model=CONFIG.brain.model,
            messages=messages,
            temperature=0.25,
            max_tokens=700,
            response_format={"type": "json_object"},
        )
        raw = response.choices[0].message.content or ""
        try:
            data = _extract_json(raw)
        except (json.JSONDecodeError, ValueError):
            repaired = _repair_json(raw)
            if not repaired:
                raise
            data = _extract_json(repaired)
        narration = str(data.get("narration", "")).strip()
        if not narration:
            raise ValueError("agent returned empty narration")
        return TeachingBeat(
            narration=narration,
            actions=_validate_actions(data.get("actions"), total_slides),
            continue_lecture=bool(data.get("continue_lecture", trigger == "continue")),
        )
    except Exception as exc:
        print(f"[agent] planning error: {exc}")
        return _fallback_beat(
            trigger=trigger,
            current_slide=current_slide,
            total_slides=total_slides,
            current_reading=current_reading,
            question=question,
        )