""" Task 0.1 — Fine-tune YOLO26n on merged Indian grocery datasets. Run on Modal (A10G GPU, ~1-2 hours): modal run finetune/train_yolo26n.py Publishes: naazimsnh02/yolo26n-indian-fmcg-detection yolo26n_fmcg.onnx class_names.json model card Datasets merged (all downloaded as yolov8 format, NOT openai format): 1. agentsk47/indian-grocery-object-detection-mfsnx v1 (~10 classes, small) 2. iit-patna-qg1jh/grocery_items-7i2em v45 (20 classes, 6,695 images) 3. project-c5ho0/indian-market-qieug v2 (2 classes, 4,694 images) Prerequisites: ROBOFLOW_API_KEY in env (for dataset download) HF_TOKEN in env (for HF Hub publish) modal token set """ import json import os import modal app = modal.App("kirana-yolo26n-finetune") IMAGE = ( modal.Image.debian_slim(python_version="3.11") .apt_install("libgl1-mesa-glx", "libglib2.0-0") .pip_install( "ultralytics>=8.4.0", "roboflow>=1.1.0", "huggingface_hub>=0.30.0", "onnx>=1.16.0", "onnxruntime>=1.18.0", "pyyaml>=6.0", ) ) ROBOFLOW_API_KEY = modal.Secret.from_name("roboflow-secret") HF_SECRET = modal.Secret.from_name("hf-secret") # All three Roboflow datasets — downloaded as "yolov8" format (NOT "openai") DATASETS = [ { "workspace": "agentsk47", "project": "indian-grocery-object-detection-mfsnx", "version": 1, "location": "/data/ds_agentsk47", }, { "workspace": "iit-patna-qg1jh", "project": "grocery_items-7i2em", "version": 45, "location": "/data/ds_iitpatna", }, { "workspace": "project-c5ho0", "project": "indian-market-qieug", "version": 2, "location": "/data/ds_indianmarket", }, ] MERGED_DIR = "/output/merged_dataset" # persisted on volume → skip re-download on resume HF_REPO = "naazimsnh02/yolo26n-indian-fmcg-detection" YOLO_BASE_MODEL = "yolo26n.pt" YOLO_FALLBACK = "yolo11n.pt" EPOCHS = 100 IMG_SIZE = 640 BATCH = 16 # ── Dataset merge helpers ────────────────────────────────────────────────────── def _read_class_names(data_yaml_path: str) -> list[str]: import yaml with open(data_yaml_path) as f: cfg = yaml.safe_load(f) names = cfg.get("names", []) if isinstance(names, dict): names = [names[i] for i in sorted(names.keys())] return names def _remap_label_file(src: str, dst: str, id_map: dict[int, int]) -> None: """Copy a YOLO label file, remapping class IDs via id_map.""" from pathlib import Path Path(dst).parent.mkdir(parents=True, exist_ok=True) lines_out = [] with open(src) as f: for line in f: line = line.strip() if not line: continue parts = line.split() old_id = int(parts[0]) new_id = id_map.get(old_id, old_id) lines_out.append(f"{new_id} {' '.join(parts[1:])}") with open(dst, "w") as f: f.write("\n".join(lines_out)) def merge_yolo_datasets(dataset_locations: list[str], output_dir: str) -> str: """ Merge N YOLOv8 datasets into one directory with unified class IDs. Returns the path to the merged data.yaml. """ import shutil import yaml from pathlib import Path # 1. Build unified class list (insertion-order dedup across all datasets) unified_classes: list[str] = [] per_ds_classes: list[list[str]] = [] for loc in dataset_locations: yaml_path = Path(loc) / "data.yaml" if not yaml_path.exists(): # Try one level deeper (Roboflow sometimes nests) candidates = list(Path(loc).rglob("data.yaml")) yaml_path = candidates[0] if candidates else yaml_path names = _read_class_names(str(yaml_path)) per_ds_classes.append(names) for name in names: if name not in unified_classes: unified_classes.append(name) print(f"Unified class list ({len(unified_classes)} classes): {unified_classes}") # 2. Build per-dataset old_id → new_id maps id_maps: list[dict[int, int]] = [] for names in per_ds_classes: id_maps.append({i: unified_classes.index(name) for i, name in enumerate(names)}) # 3. Copy images + remapped labels for each split splits = ["train", "valid", "test"] out_root = Path(output_dir) for ds_idx, loc in enumerate(dataset_locations): ds_root = Path(loc) # Roboflow may nest under a subdirectory matching the project name if not (ds_root / "train").exists(): subdirs = [d for d in ds_root.iterdir() if d.is_dir() and (d / "train").exists()] if subdirs: ds_root = subdirs[0] id_map = id_maps[ds_idx] ds_tag = f"ds{ds_idx}" for split in splits: img_src = ds_root / split / "images" lbl_src = ds_root / split / "labels" if not img_src.exists(): continue img_dst = out_root / split / "images" lbl_dst = out_root / split / "labels" img_dst.mkdir(parents=True, exist_ok=True) lbl_dst.mkdir(parents=True, exist_ok=True) for img_file in img_src.iterdir(): # Prefix filename with dataset tag to avoid collisions new_name = f"{ds_tag}_{img_file.name}" shutil.copy(str(img_file), str(img_dst / new_name)) stem = img_file.stem lbl_file = lbl_src / f"{stem}.txt" if lbl_file.exists(): _remap_label_file( str(lbl_file), str(lbl_dst / f"{ds_tag}_{stem}.txt"), id_map, ) # 4. Write merged data.yaml merged_yaml = out_root / "data.yaml" cfg = { "path": str(out_root), "train": "train/images", "val": "valid/images", "test": "test/images", "nc": len(unified_classes), "names": unified_classes, } with open(merged_yaml, "w") as f: yaml.dump(cfg, f, allow_unicode=True, default_flow_style=False) # Count merged images for split in splits: n = len(list((out_root / split / "images").glob("*"))) if (out_root / split / "images").exists() else 0 print(f" {split}: {n} images") return str(merged_yaml) # ── Modal function ───────────────────────────────────────────────────────────── @app.function( image=IMAGE, gpu="A10G", timeout=28800, secrets=[ROBOFLOW_API_KEY, HF_SECRET], volumes={"/output": modal.Volume.from_name("kirana-yolo-output", create_if_missing=True)}, ) def train(): import shutil from pathlib import Path from roboflow import Roboflow from ultralytics import YOLO from huggingface_hub import HfApi last_pt = Path("/output/runs/yolo26n_fmcg/weights/last.pt") merged_yaml = Path(MERGED_DIR) / "data.yaml" # --- Dataset: skip download+merge if already cached on the volume --- if merged_yaml.exists(): print(f"Merged dataset found at {merged_yaml}, skipping download.") data_yaml = str(merged_yaml) else: rf = Roboflow(api_key=os.environ["ROBOFLOW_API_KEY"]) locations = [] for ds in DATASETS: print(f"Downloading {ds['workspace']}/{ds['project']} v{ds['version']}...") project = rf.workspace(ds["workspace"]).project(ds["project"]) result = project.version(ds["version"]).download("yolov8", location=ds["location"]) locations.append(ds["location"]) print(f" -> {result.location}") print("Merging datasets...") data_yaml = merge_yolo_datasets(locations, MERGED_DIR) print(f"Merged data.yaml: {data_yaml}") # --- Resume from checkpoint if one exists, otherwise start fresh --- if last_pt.exists(): import torch ckpt = torch.load(str(last_pt), map_location="cpu", weights_only=False) done_epoch = ckpt.get("epoch", 0) # 0-indexed epoch that finished remaining = EPOCHS - (done_epoch + 1) print(f"Checkpoint found — epoch {done_epoch + 1}/{EPOCHS}, {remaining} epochs remaining.") if remaining <= 0: print("Training already complete, skipping to export.") results = type("R", (), {"results_dict": {}})() # dummy result else: try: model = YOLO(str(last_pt)) results = model.train(resume=True) except (ValueError, RuntimeError) as exc: # Optimizer state mismatch (e.g. after env/package upgrade). # Fall back: load weights, continue for remaining epochs with a # lower LR so we don't disturb the already-converged parameters. print(f"Full resume failed ({exc}).") print(f"Falling back to weights-only resume: {remaining} more epochs.") model = YOLO(str(last_pt)) results = model.train( data=data_yaml, epochs=remaining, imgsz=IMG_SIZE, batch=BATCH, project="/output/runs", name="yolo26n_fmcg", exist_ok=True, device=0, patience=20, save=True, plots=True, lr0=0.0005, # reduced: weights already partially trained lrf=0.01, ) else: try: model = YOLO(YOLO_BASE_MODEL) print(f"Loaded base model: {YOLO_BASE_MODEL}") except Exception: print(f"YOLO26n not found, falling back to {YOLO_FALLBACK}") model = YOLO(YOLO_FALLBACK) results = model.train( data=data_yaml, epochs=EPOCHS, imgsz=IMG_SIZE, batch=BATCH, project="/output/runs", name="yolo26n_fmcg", exist_ok=True, device=0, patience=20, save=True, plots=True, ) print(f"Training complete. Best mAP50: {results.results_dict.get('metrics/mAP50(B)', 'N/A')}") best_pt = Path("/output/runs/yolo26n_fmcg/weights/best.pt") # --- Export to ONNX --- export_model = YOLO(str(best_pt)) onnx_path = export_model.export(format="onnx", imgsz=IMG_SIZE, opset=12, simplify=True) shutil.copy(str(onnx_path), "/output/yolo26n_fmcg.onnx") print(f"Exported ONNX to {onnx_path}") # --- Save unified class names --- import yaml with open(data_yaml) as f: data_cfg = yaml.safe_load(f) class_names = data_cfg.get("names", []) if isinstance(class_names, dict): class_names = [class_names[i] for i in sorted(class_names.keys())] with open("/output/class_names.json", "w") as f: json.dump(class_names, f, indent=2, ensure_ascii=False) print(f"Saved {len(class_names)} unified class names") # --- Publish to HF Hub --- api = HfApi(token=os.environ["HF_TOKEN"]) api.create_repo(HF_REPO, repo_type="model", exist_ok=True) api.upload_file(path_or_fileobj="/output/yolo26n_fmcg.onnx", path_in_repo="yolo26n_fmcg.onnx", repo_id=HF_REPO) api.upload_file(path_or_fileobj="/output/class_names.json", path_in_repo="class_names.json", repo_id=HF_REPO) model_card = f"""--- license: apache-2.0 base_model: yolo26n language: - en tags: - object-detection - yolo - indian-fmcg - onnx - ultralytics pipeline_tag: object-detection datasets: - agentsk47/indian-grocery-object-detection-mfsnx - iit-patna-qg1jh/grocery_items-7i2em - project-c5ho0/indian-market-qieug --- # YOLO26n — Indian FMCG Product Detection Fine-tuned [YOLO26n](https://docs.ultralytics.com) on a **merged dataset of three Indian grocery sources** from Roboflow Universe. Part of the **Kirana Detective** project — an AI system for small Indian grocery stores to visually count and reconcile inventory from shelf/counter photos. ## Training Datasets | Dataset | Workspace | Version | Images | Classes | |---|---|---|---|---| | [Indian Grocery Object Detection](https://universe.roboflow.com/agentsk47/indian-grocery-object-detection-mfsnx) | agentsk47 | v1 | ~400 | 10 | | [Grocery Items](https://universe.roboflow.com/iit-patna-qg1jh/grocery_items-7i2em) | IIT Patna | v45 | 6,695 | 20 | | [Indian Market](https://universe.roboflow.com/project-c5ho0/indian-market-qieug) | project-c5ho0 | v2 | 4,694 | 2 | All three datasets were downloaded in **YOLOv8 format**, class IDs remapped to a unified list, and merged before training. ## Classes ({len(class_names)} total) {chr(10).join(f"- {name}" for name in class_names)} ## How to Use ### Python (ONNX Runtime) ```python import json import numpy as np import onnxruntime as ort from PIL import Image session = ort.InferenceSession("yolo26n_fmcg.onnx", providers=["CPUExecutionProvider"]) class_names = json.load(open("class_names.json")) def preprocess(image_path, size=640): img = Image.open(image_path).convert("RGB").resize((size, size)) arr = np.array(img, dtype=np.float32) / 255.0 return arr.transpose(2, 0, 1)[None] # BCHW input_name = session.get_inputs()[0].name outputs = session.run(None, {{input_name: preprocess("shelf.jpg")}}) # outputs[0]: (1, 300, 6) — [x1, y1, x2, y2, confidence, class_id] ``` ### Ultralytics (PyTorch) ```python from ultralytics import YOLO model = YOLO("yolo26n_fmcg.onnx", task="detect") results = model.predict("shelf.jpg", imgsz=640, conf=0.25) results[0].show() ``` ## Training Details | Parameter | Value | |---|---| | Base model | YOLO26n | | Input size | 640 × 640 | | Epochs (scheduled) | {EPOCHS} | | Batch size | {BATCH} | | Early stopping patience | 20 | | Export format | ONNX opset 12 | | Hardware | NVIDIA A10G (Modal) | ## Citation ```bibtex @misc{{kirana-detective-yolo-2026, title = {{Kirana Detective: YOLO26n Indian FMCG Product Detector}}, author = {{Naazim}}, year = {{2026}}, url = {{https://huggingface.co/naazimsnh02/yolo26n-indian-fmcg-detection}} }} ``` """ api.upload_file( path_or_fileobj=model_card.encode(), path_in_repo="README.md", repo_id=HF_REPO, ) print(f"Published to {HF_REPO}") return {"class_count": len(class_names), "onnx_path": "/output/yolo26n_fmcg.onnx"} @app.local_entrypoint() def main(): result = train.remote() print(f"Done: {result}")