""" Sentiment Analysis Pydantic Models for MongoDB Author: AI Generated Created: 2025-11-24 Purpose: Define schemas for sentiment analysis results """ from pydantic import BaseModel, Field from typing import List, Optional, Dict from datetime import datetime from bson import ObjectId class PyObjectId(ObjectId): """Custom ObjectId type for Pydantic v2""" @classmethod def __get_pydantic_core_schema__(cls, source_type, handler): from pydantic_core import core_schema return core_schema.union_schema([ core_schema.is_instance_schema(ObjectId), core_schema.chain_schema([ core_schema.str_schema(), core_schema.no_info_plain_validator_function(cls.validate), ]) ], serialization=core_schema.plain_serializer_function_ser_schema( lambda x: str(x) )) @classmethod def validate(cls, v): if isinstance(v, ObjectId): return v if isinstance(v, str): if not ObjectId.is_valid(v): raise ValueError(f"Invalid ObjectId: {v}") return ObjectId(v) raise ValueError(f"Expected ObjectId or string, got {type(v)}") class SentimentAnalysisResult(BaseModel): """Individual sentiment analysis result for a comment/feedback""" id: Optional[PyObjectId] = Field(default=None, alias="_id") source_id: PyObjectId = Field(..., description="ID of the original comment/post") source_type: str = Field(default="UserCommentPost", description="Type of source") # NEW: Event context event_code: str = Field(..., description="Event identifier this comment belongs to") sentiment_label: str = Field(..., description="Positive, Negative, or Neutral") confidence_score: float = Field(..., ge=0.0, le=1.0, description="Model confidence (0-1)") key_phrases: List[str] = Field( default_factory=list, description="Extracted keywords/phrases from the text" ) analyzed_at: datetime = Field(default_factory=datetime.utcnow) class Config: populate_by_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str} class EventInsightReport(BaseModel): """ High-level insights for an event, generated by LLM. Includes Top 5 issues, NPS prediction, and improvement suggestions. """ id: Optional[PyObjectId] = Field(default=None, alias="_id") event_code: str = Field(..., description="Reference to EventVersion.EventCode") report_date: datetime = Field(default_factory=datetime.utcnow) total_comments: int = Field(0, description="Total number of comments analyzed") sentiment_breakdown: Dict[str, int] = Field( default_factory=dict, description="Count by sentiment: { 'Positive': 50, 'Negative': 10, 'Neutral': 20 }" ) predicted_nps: Optional[float] = Field(None, description="Predicted NPS score (0-100)") top_issues: List[str] = Field( default_factory=list, description="Top 5 recurring issues, e.g., ['Check-in slow', 'Sound quality poor']" ) improvement_suggestions: List[str] = Field( default_factory=list, description="AI-generated suggestions for improvement" ) class Config: populate_by_name = True arbitrary_types_allowed = True json_encoders = {ObjectId: str}