| """
|
| MongoDB Models for Audience Segmentation
|
| Author: AI Generated
|
| Created: 2025-11-24
|
| Purpose: Define data models for storing audience segmentation results
|
| """
|
|
|
| from datetime import datetime
|
| from typing import Optional, List, Dict
|
| from pydantic import BaseModel, Field
|
| from bson import ObjectId
|
|
|
|
|
| class PyObjectId(ObjectId):
|
| """Custom ObjectId type for Pydantic v2"""
|
|
|
| @classmethod
|
| def __get_pydantic_core_schema__(cls, source_type, handler):
|
| from pydantic_core import core_schema
|
|
|
| return core_schema.union_schema([
|
| core_schema.is_instance_schema(ObjectId),
|
| core_schema.chain_schema([
|
| core_schema.str_schema(),
|
| core_schema.no_info_plain_validator_function(cls.validate),
|
| ])
|
| ],
|
| serialization=core_schema.plain_serializer_function_ser_schema(
|
| lambda x: str(x)
|
| ))
|
|
|
| @classmethod
|
| def validate(cls, v):
|
| if isinstance(v, ObjectId):
|
| return v
|
| if isinstance(v, str):
|
| if not ObjectId.is_valid(v):
|
| raise ValueError(f"Invalid ObjectId: {v}")
|
| return ObjectId(v)
|
| raise ValueError(f"Expected ObjectId or string, got {type(v)}")
|
|
|
|
|
| class AudienceSegment(BaseModel):
|
| """
|
| Defines the characteristics of an audience segment.
|
| This is the result of K-Means clustering on user behavior data.
|
| """
|
| id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
|
| segment_name: str = Field(..., description="Human-readable segment name, e.g., 'Big Spenders', 'Music Lovers'")
|
| description: Optional[str] = Field(None, description="Detailed description of this segment")
|
| criteria: Dict = Field(default_factory=dict, description="Statistical criteria: min_spend, max_spend, top_categories, etc.")
|
| user_count: int = Field(0, description="Number of users in this segment")
|
| last_updated: datetime = Field(default_factory=datetime.utcnow)
|
|
|
|
|
| marketing_content: Optional[Dict] = Field(
|
| None,
|
| description="AI-generated marketing content: { 'email_subject': str, 'email_body': str }"
|
| )
|
|
|
| class Config:
|
| allow_population_by_field_name = True
|
| arbitrary_types_allowed = True
|
| json_encoders = {ObjectId: str}
|
|
|
|
|
| class UserSegmentAssignment(BaseModel):
|
| """
|
| Links a user to their assigned segment.
|
| Many-to-one relationship: many users belong to one segment.
|
| """
|
| id: Optional[PyObjectId] = Field(default_factory=PyObjectId, alias="_id")
|
| user_id: PyObjectId = Field(..., description="Reference to User._id")
|
| segment_id: PyObjectId = Field(..., description="Reference to AudienceSegment._id")
|
| confidence_score: float = Field(..., description="Distance to cluster center (lower is better)")
|
| assigned_at: datetime = Field(default_factory=datetime.utcnow)
|
|
|
| class Config:
|
| allow_population_by_field_name = True
|
| arbitrary_types_allowed = True
|
| json_encoders = {ObjectId: str}
|
|
|