File size: 3,983 Bytes
7b5611f | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 | from __future__ import annotations
import json
import re
from pathlib import Path
from typing import Dict, Optional, Set
from models import CatalogEntry
_CATALOG_PATH = Path(__file__).parent / "data" / "fmcg_catalog.json"
# 20 HSN prefix β GST rate mappings (prefix-match, longest wins)
GST_LOOKUP: Dict[str, float] = {
"0401": 0.0, # liquid milk
"0402": 12.0, # milk powder / flavoured milk
"0403": 5.0, # curd, buttermilk
"0405": 12.0, # butter, ghee, fats
"0406": 12.0, # cheese (block/slices); paneer is 5% but shares prefix
"0901": 5.0, # coffee beans / roasted
"0902": 5.0, # tea
"1101": 5.0, # wheat flour / atta
"1512": 5.0, # sunflower, safflower oil
"1513": 5.0, # coconut oil, palm oil
"1901": 18.0, # malt-based health drinks
"1902": 18.0, # pasta, noodles
"1905": 18.0, # biscuits, bread, rusks
"2009": 12.0, # fruit juices (100%)
"2101": 5.0, # instant coffee / tea extracts
"2201": 18.0, # packaged drinking water
"2202": 28.0, # aerated drinks (cola, soda) β fruit drinks override below
"3305": 18.0, # hair products (shampoo, oil)
"3306": 18.0, # oral care (toothpaste)
"3401": 18.0, # soap
"3402": 18.0, # detergents / dishwash
"2501": 0.0, # salt
"3304": 18.0, # skin-care preparations
"3307": 18.0, # deodorants, room fresheners
"3808": 18.0, # insecticides / toilet cleaners
}
def _normalise_key(text: str) -> str:
return re.sub(r"\s+", " ", text.lower().strip())
class FMCGCatalog:
def __init__(self) -> None:
self._by_id: Dict[str, CatalogEntry] = {}
self._by_alias: Dict[str, str] = {} # normalised alias β product_id
self._all_ids: Set[str] = set()
def load(self, path: Path = _CATALOG_PATH) -> "FMCGCatalog":
raw = json.loads(path.read_text(encoding="utf-8"))
for entry in raw:
e = CatalogEntry(**entry)
self._by_id[e.product_id] = e
self._all_ids.add(e.product_id)
# Index canonical name
self._by_alias[_normalise_key(e.canonical_name)] = e.product_id
# Index all aliases
for alias in e.common_aliases:
key = _normalise_key(alias)
if key not in self._by_alias: # first writer wins on collision
self._by_alias[key] = e.product_id
return self
# ββ Public API ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ
def get_by_id(self, product_id: str) -> Optional[CatalogEntry]:
return self._by_id.get(product_id)
def lookup_alias(self, text: str) -> Optional[str]:
"""Return product_id for an exact alias match (case-insensitive, whitespace-normalised)."""
return self._by_alias.get(_normalise_key(text))
def all_product_ids(self) -> Set[str]:
return set(self._all_ids)
def get_gst_rate(self, hsn_code: str) -> Optional[float]:
"""Prefix-match hsn_code against GST_LOOKUP; longest prefix wins."""
if not hsn_code:
return None
best_len, result = 0, None
for prefix, rate in GST_LOOKUP.items():
if hsn_code.startswith(prefix) and len(prefix) > best_len:
best_len, result = len(prefix), rate
return result
def __len__(self) -> int:
return len(self._by_id)
# ββ Module-level singleton ββββββββββββββββββββββββββββββββββββββββββββββββββββ
_catalog_instance: Optional[FMCGCatalog] = None
def get_catalog() -> FMCGCatalog:
global _catalog_instance
if _catalog_instance is None:
_catalog_instance = FMCGCatalog().load()
return _catalog_instance
|