Spaces:

minhvtt
/

Aus_F

Sleeping

File size: 3,523 Bytes

"""

Sentiment Analysis Pydantic Models for MongoDB  

Author: AI Generated

Created: 2025-11-24

Purpose: Define schemas for sentiment analysis results

"""

from pydantic import BaseModel, Field
from typing import List, Optional, Dict
from datetime import datetime
from bson import ObjectId


class PyObjectId(ObjectId):
    """Custom ObjectId type for Pydantic v2"""
    
    @classmethod
    def __get_pydantic_core_schema__(cls, source_type, handler):
        from pydantic_core import core_schema
        
        return core_schema.union_schema([
            core_schema.is_instance_schema(ObjectId),
            core_schema.chain_schema([
                core_schema.str_schema(),
                core_schema.no_info_plain_validator_function(cls.validate),
            ])
        ],
        serialization=core_schema.plain_serializer_function_ser_schema(
            lambda x: str(x)
        ))
    
    @classmethod
    def validate(cls, v):
        if isinstance(v, ObjectId):
            return v
        if isinstance(v, str):
            if not ObjectId.is_valid(v):
                raise ValueError(f"Invalid ObjectId: {v}")
            return ObjectId(v)
        raise ValueError(f"Expected ObjectId or string, got {type(v)}")



class SentimentAnalysisResult(BaseModel):
    """Individual sentiment analysis result for a comment/feedback"""
    id: Optional[PyObjectId] = Field(default=None, alias="_id")
    source_id: PyObjectId = Field(..., description="ID of the original comment/post")
    source_type: str = Field(default="UserCommentPost", description="Type of source")
    
    # NEW: Event context
    event_code: str = Field(..., description="Event identifier this comment belongs to")
    
    sentiment_label: str = Field(..., description="Positive, Negative, or Neutral")
    confidence_score: float = Field(..., ge=0.0, le=1.0, description="Model confidence (0-1)")
    
    key_phrases: List[str] = Field(
        default_factory=list,
        description="Extracted keywords/phrases from the text"
    )
    
    analyzed_at: datetime = Field(default_factory=datetime.utcnow)
    
    class Config:
        populate_by_name = True
        arbitrary_types_allowed = True
        json_encoders = {ObjectId: str}


class EventInsightReport(BaseModel):
    """

    High-level insights for an event, generated by LLM.

    Includes Top 5 issues, NPS prediction, and improvement suggestions.

    """
    id: Optional[PyObjectId] = Field(default=None, alias="_id")
    event_code: str = Field(..., description="Reference to EventVersion.EventCode")
    report_date: datetime = Field(default_factory=datetime.utcnow)
    total_comments: int = Field(0, description="Total number of comments analyzed")
    sentiment_breakdown: Dict[str, int] = Field(
        default_factory=dict,
        description="Count by sentiment: { 'Positive': 50, 'Negative': 10, 'Neutral': 20 }"
    )
    predicted_nps: Optional[float] = Field(None, description="Predicted NPS score (0-100)")
    top_issues: List[str] = Field(
        default_factory=list,
        description="Top 5 recurring issues, e.g., ['Check-in slow', 'Sound quality poor']"
    )
    improvement_suggestions: List[str] = Field(
        default_factory=list,
        description="AI-generated suggestions for improvement"
    )

    class Config:
        populate_by_name = True
        arbitrary_types_allowed = True
        json_encoders = {ObjectId: str}