from __future__ import annotations import json import re from pathlib import Path from typing import Dict, Optional, Set from models import CatalogEntry _CATALOG_PATH = Path(__file__).parent / "data" / "fmcg_catalog.json" # 20 HSN prefix → GST rate mappings (prefix-match, longest wins) GST_LOOKUP: Dict[str, float] = { "0401": 0.0, # liquid milk "0402": 12.0, # milk powder / flavoured milk "0403": 5.0, # curd, buttermilk "0405": 12.0, # butter, ghee, fats "0406": 12.0, # cheese (block/slices); paneer is 5% but shares prefix "0901": 5.0, # coffee beans / roasted "0902": 5.0, # tea "1101": 5.0, # wheat flour / atta "1512": 5.0, # sunflower, safflower oil "1513": 5.0, # coconut oil, palm oil "1901": 18.0, # malt-based health drinks "1902": 18.0, # pasta, noodles "1905": 18.0, # biscuits, bread, rusks "2009": 12.0, # fruit juices (100%) "2101": 5.0, # instant coffee / tea extracts "2201": 18.0, # packaged drinking water "2202": 28.0, # aerated drinks (cola, soda) — fruit drinks override below "3305": 18.0, # hair products (shampoo, oil) "3306": 18.0, # oral care (toothpaste) "3401": 18.0, # soap "3402": 18.0, # detergents / dishwash "2501": 0.0, # salt "3304": 18.0, # skin-care preparations "3307": 18.0, # deodorants, room fresheners "3808": 18.0, # insecticides / toilet cleaners } def _normalise_key(text: str) -> str: return re.sub(r"\s+", " ", text.lower().strip()) class FMCGCatalog: def __init__(self) -> None: self._by_id: Dict[str, CatalogEntry] = {} self._by_alias: Dict[str, str] = {} # normalised alias → product_id self._all_ids: Set[str] = set() def load(self, path: Path = _CATALOG_PATH) -> "FMCGCatalog": raw = json.loads(path.read_text(encoding="utf-8")) for entry in raw: e = CatalogEntry(**entry) self._by_id[e.product_id] = e self._all_ids.add(e.product_id) # Index canonical name self._by_alias[_normalise_key(e.canonical_name)] = e.product_id # Index all aliases for alias in e.common_aliases: key = _normalise_key(alias) if key not in self._by_alias: # first writer wins on collision self._by_alias[key] = e.product_id return self # ── Public API ──────────────────────────────────────────────────────────── def get_by_id(self, product_id: str) -> Optional[CatalogEntry]: return self._by_id.get(product_id) def lookup_alias(self, text: str) -> Optional[str]: """Return product_id for an exact alias match (case-insensitive, whitespace-normalised).""" return self._by_alias.get(_normalise_key(text)) def all_product_ids(self) -> Set[str]: return set(self._all_ids) def get_gst_rate(self, hsn_code: str) -> Optional[float]: """Prefix-match hsn_code against GST_LOOKUP; longest prefix wins.""" if not hsn_code: return None best_len, result = 0, None for prefix, rate in GST_LOOKUP.items(): if hsn_code.startswith(prefix) and len(prefix) > best_len: best_len, result = len(prefix), rate return result def __len__(self) -> int: return len(self._by_id) # ── Module-level singleton ──────────────────────────────────────────────────── _catalog_instance: Optional[FMCGCatalog] = None def get_catalog() -> FMCGCatalog: global _catalog_instance if _catalog_instance is None: _catalog_instance = FMCGCatalog().load() return _catalog_instance