File size: 7,210 Bytes
849ee7b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c8055f7
 
8457788
849ee7b
 
 
 
 
 
 
 
 
 
 
 
c8055f7
 
8457788
849ee7b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
"""Shared data structures and codebook constants for Trace Field Notes."""

from __future__ import annotations

from dataclasses import asdict, dataclass, field
from typing import Any, Literal


AgentType = Literal["codex", "claude_code", "pi", "unknown"]


DIFFICULTY_TYPES = {
    "requirement_uncertainty": "Requirements, specification, or user intent are unclear.",
    "localization_difficulty": "The relevant module, file, function, or root cause is unclear.",
    "architecture_complexity": "The system structure, dependencies, or shared surfaces are more complex than expected.",
    "implementation_difficulty": "The direction is known, but the implementation is non-trivial.",
    "compatibility_risk": "A change may break existing behavior or nearby surfaces.",
    "verification_difficulty": "It is unclear how to verify that the work is correct.",
    "environment_blocker": "Dependencies, permissions, network, runtime, or local environment block progress.",
    "insufficient_context": "The agent reports that more context is needed.",
    "conflicting_assumptions": "A prior assumption conflicts with new evidence.",
    "unknown": "The evidence is too weak to classify.",
}

APPRAISALS = {
    "local_fix_possible": "The agent frames the issue as locally fixable.",
    "needs_more_context": "The agent says it needs more information.",
    "initial_hypothesis_wrong": "The agent revises an earlier hypothesis.",
    "risk_is_higher_than_expected": "The agent recognizes higher side-effect or regression risk.",
    "scope_too_large": "The agent decides the original scope is too broad.",
    "needs_alternative_path": "The agent seeks a different route.",
    "cannot_reliably_verify": "The agent says verification is not reliable yet.",
    "task_boundary_unclear": "The agent sees the task boundary as unclear.",
    "unknown": "The evidence is too weak to classify.",
}

DETOUR_TYPES = {
    "direct_continuation": "The agent continues the original strategy.",
    "decomposition": "The agent breaks the problem down.",
    "scope_narrowing": "The agent narrows the scope.",
    "alternative_path": "The agent switches route.",
    "workaround": "The agent works around the issue without resolving the root cause.",
    "rollback_or_reversal": "The agent abandons or reverses a prior direction.",
    "hypothesis_switch": "The agent changes its problem hypothesis.",
    "verification_shift": "The agent changes verification strategy.",
    "ask_or_defer": "The agent asks for input or defers judgment.",
    "premature_closure": "The agent closes before the difficulty is resolved.",
    "unknown": "The evidence is too weak to classify.",
}

RESOLUTION_MODES = {
    "information_gathering": "The episode resolves through additional context.",
    "problem_reframing": "The agent reframes the problem.",
    "minimal_patch": "The agent applies a focused patch.",
    "structural_change": "The agent makes or proposes a structural change.",
    "defensive_handling": "The agent adds guards, validation, or explicit handling.",
    "alternative_implementation": "The agent changes implementation approach.",
    "goal_reduction": "The agent lowers the goal or solves a subset.",
    "explicit_limitation": "The agent explicitly states a limitation.",
    "narrative_rationalization": "The agent smooths over unresolved evidence in prose.",
    "unknown": "The evidence is too weak to classify.",
}

RECOVERY_PATTERNS = {
    "smooth_recovery": "The agent quickly understands the issue and moves forward.",
    "iterative_recovery": "The agent recovers through repeated attempts.",
    "detour_recovery": "The agent recovers after a route change.",
    "partial_recovery": "The agent solves part of the issue while preserving caveats.",
    "failed_recovery": "The episode does not recover.",
    "avoidant_recovery": "The agent bypasses the difficulty by doing adjacent work.",
    "overconfident_recovery": "The agent claims success without enough visible support.",
    "reflective_recovery": "The agent identifies a wrong assumption and corrects course.",
    "unknown": "The evidence is too weak to classify.",
}

OUTCOME_CLAIMS = {
    "resolved_with_confidence": "The agent clearly claims completion.",
    "resolved_with_caveat": "The agent claims completion with a caveat.",
    "partially_resolved": "The agent says only part of the work is complete.",
    "not_resolved": "The agent says the issue remains unresolved.",
    "needs_verification": "The agent says more testing or confirmation is needed.",
    "uncertain_but_proceeding": "The agent proceeds despite uncertainty.",
    "premature_success_claim": "The agent claims success with weak visible evidence.",
    "unknown": "The evidence is too weak to classify.",
}


@dataclass(slots=True)
class NarrativeMessage:
    """A visible user or assistant message extracted from an agent trace."""

    index: int
    role: Literal["assistant", "user"]
    text: str
    timestamp: str | None = None
    source: str = "unknown"

    def to_dict(self) -> dict[str, Any]:
        return asdict(self)


@dataclass(slots=True)
class MessageSpan:
    start_index: int
    end_index: int
    start_time: str | None = None
    end_time: str | None = None
    duration_label: str = "unknown"

    def to_dict(self) -> dict[str, Any]:
        return asdict(self)


@dataclass(slots=True)
class DifficultyEpisode:
    episode_id: str
    title: str
    message_span: MessageSpan
    initial_intention: str
    reported_difficulty: str
    difficulty_type: str
    appraisal: str
    strategy_before: str
    strategy_after: str
    detour_type: str
    resolution_mode: str
    recovery_pattern: str
    outcome_claim: str
    productive_detour: Literal["yes", "no", "mixed", "unknown"]
    evidence_quotes: list[str] = field(default_factory=list)
    analyst_memo: str = ""

    def to_dict(self) -> dict[str, Any]:
        data = asdict(self)
        data["message_span"] = self.message_span.to_dict()
        return data


@dataclass(slots=True)
class AnalysisResult:
    trace_title: str
    agent_type_guess: AgentType
    analysis_scope: str
    privacy_notes: list[str]
    episodes: list[DifficultyEpisode]
    overall_patterns: dict[str, str]
    narrative_message_count: int
    redaction_count: int = 0
    engine: str = "deterministic-codebook"
    model_notes: list[str] = field(default_factory=list)
    model_memo: dict[str, Any] = field(default_factory=dict)
    session_verdict: dict[str, Any] = field(default_factory=dict)

    def to_dict(self) -> dict[str, Any]:
        return {
            "trace_title": self.trace_title,
            "agent_type_guess": self.agent_type_guess,
            "analysis_scope": self.analysis_scope,
            "privacy_notes": self.privacy_notes,
            "episodes": [episode.to_dict() for episode in self.episodes],
            "overall_patterns": self.overall_patterns,
            "narrative_message_count": self.narrative_message_count,
            "redaction_count": self.redaction_count,
            "engine": self.engine,
            "model_notes": self.model_notes,
            "model_memo": self.model_memo,
            "session_verdict": self.session_verdict,
        }