# app/utils/token_utils.py """ Token Utilities - Character-to-Token Estimation and Smart Truncation. Used to prevent context window overflow before hitting the LLM API. """ import re from typing import List, Dict, Any def estimate_tokens(text: str) -> int: """ Heuristic estimation of token count. Standard rule of thumb: 1 token ~= 4 characters in English. For Hinglish/mixed content, we use a safer ratio of 1:3. """ if not text: return 0 return len(text) // 3 + 1 def smart_truncate(text: str, max_chars: int = 4000) -> str: """ Truncates text to a maximum character limit while attempting to preserve sentence/semantic integrity at the cut point. """ if len(text) <= max_chars: return text # Attempt to cut at the last punctuation before the limit truncated = text[:max_chars] last_punct = max(truncated.rfind('.'), truncated.rfind('!'), truncated.rfind('?'), truncated.rfind('\n')) if last_punct > max_chars * 0.8: # Only cut at punctuation if it's not too much wasted space return truncated[:last_punct + 1] + " [TRUNCATED]" return truncated + "... [TRUNCATED]" def calculate_payload_tokens(messages: List[Dict[str, str]]) -> int: """ Estimates total tokens for a standard chat message list. """ total = 0 for msg in messages: total += estimate_tokens(msg.get("content", "")) total += 4 # Overhead for role/format tags return total + 2 # Final assistant prefix overhead