"""
Push the trained MiniCPM-V LoRA adapter from Modal volume to HuggingFace Hub.

Usage:
    modal run finetune/push_minicpm_v_to_hf.py

Reads from Modal volume: kirana-minicpm-v-output  (/output/minicpm-v-lora)
Pushes to: naazimsnh02/minicpm-v-4-6-indian-invoice-extraction
"""

import os
import modal

app = modal.App("kirana-push-minicpm-v")

IMAGE = (
    modal.Image.debian_slim(python_version="3.11")
    .pip_install("huggingface_hub>=0.30.0")
)

HF_SECRET = modal.Secret.from_name("hf-secret")

HF_REPO = "naazimsnh02/minicpm-v-4-6-indian-invoice-extraction"
BASE_MODEL = "openbmb/MiniCPM-V-4.6"
HF_DATASET_REPO = "build-small-hackathon/kirana-invoice-train-data"

MODEL_CARD = """\
---
license: apache-2.0
base_model: openbmb/MiniCPM-V-4.6
datasets:
  - build-small-hackathon/kirana-invoice-train-data
language:
  - en
tags:
  - invoice-extraction
  - indian-fmcg
  - minicpm-v
  - ocr
  - qlora
  - peft
  - kirana
  - vision-language
pipeline_tag: image-text-to-text
---

# MiniCPM-V 4.6 — Indian Invoice Extraction (LoRA Adapter)

Fine-tuned [`openbmb/MiniCPM-V-4.6`](https://huggingface.co/openbmb/MiniCPM-V-4.6) for structured JSON extraction from Indian distributor invoices.
Part of the **Kirana Detective** project — an AI audit pipeline for small Indian grocery (kirana) stores.

> **This is a PEFT LoRA adapter** — you need the base model + this adapter to run inference.

## Training Results

| Epoch | Train Loss | Eval Loss |
|-------|-----------|-----------|
| 1 | ~6.08 | 0.2901 |
| 2 | ~3.95 | 0.2281 |
| 3 | ~3.33 | **0.212** |

**Training summary** (3 epochs, 87 steps, ~52 min on A10G):
- Total average train loss: 4.774
- Best eval loss: **0.212** (epoch 3, loaded as final checkpoint)
- Trainable parameters: 9,486,336 / 1,309,914,352 (0.72%)
- Dataset: 450 train + 50 eval synthetic invoices

## Usage

```python
from peft import PeftModel, PeftConfig
from transformers import AutoModel, AutoProcessor
import torch

# Load adapter config to get base model id
config = PeftConfig.from_pretrained("naazimsnh02/minicpm-v-4-6-indian-invoice-extraction")

base_model = AutoModel.from_pretrained(
    config.base_model_name_or_path,
    trust_remote_code=True,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

model = PeftModel.from_pretrained(base_model, "naazimsnh02/minicpm-v-4-6-indian-invoice-extraction")
processor = AutoProcessor.from_pretrained("naazimsnh02/minicpm-v-4-6-indian-invoice-extraction", trust_remote_code=True)
```

### Inference Example

```python
from PIL import Image

image = Image.open("invoice.jpg")
messages = [
    {
        "role": "system",
        "content": "You are an invoice extraction assistant. Given an invoice image, extract all fields as valid JSON. Return ONLY the JSON object, no explanation."
    },
    {
        "role": "user",
        "content": [
            {"type": "image", "image": image},
            {"type": "text", "text": "Extract all invoice fields as JSON."}
        ]
    }
]

inputs = processor(messages, return_tensors="pt").to(model.device)
with torch.no_grad():
    output = model.generate(**inputs, max_new_tokens=512)
result_json = processor.decode(output[0], skip_special_tokens=True)
```

## Output Schema

```json
{
  "invoice_number": "INV-2024-001",
  "supplier": "Hindustan Unilever Ltd.",
  "date": "2026-06-10",
  "items": [
    {
      "product_raw": "SURF XL 1KG",
      "quantity": 12,
      "unit_price": 95.00,
      "gst_rate": 18,
      "line_total": 1140.00
    }
  ],
  "grand_total": 9650.00,
  "extraction_warnings": []
}
```

## Supported Invoice Formats

- Printed GST invoices (Tally-style, thermal-print)
- Tally PDF exports
- WhatsApp screenshot invoices
- Handwritten bills

## Training Details

| Parameter | Value |
|-----------|-------|
| Base model | openbmb/MiniCPM-V-4.6 |
| Model class | MiniCPMV4_6ForConditionalGeneration |
| Fine-tuning method | QLoRA (4-bit + LoRA) |
| LoRA rank | 16 |
| Quantization | bitsandbytes 4-bit (nf4) |
| Batch size | 1 (grad accum × 16 = effective 16) |
| Learning rate | 1e-4 (cosine decay, warmup 10 steps) |
| Epochs | 3 |
| Total steps | 87 |
| Hardware | NVIDIA A10G (22 GB VRAM) |
| Training time | ~52 minutes |
| Orchestration | Modal (serverless GPU) |
| Framework | Transformers ≥ 5.7.0 + PEFT |

## Citation

```bibtex
@misc{kirana-detector-minicpm-v-2026,
  title  = {Kirana Detective: MiniCPM-V 4.6 Indian Invoice Extraction},
  author = {Syed Naazim Hussain},
  year   = {2026},
  url    = {https://huggingface.co/naazimsnh02/minicpm-v-4-6-indian-invoice-extraction}
}
```

## License

Apache 2.0 (same as base model openbmb/MiniCPM-V-4.6)
"""


@app.function(
    image=IMAGE,
    timeout=600,
    secrets=[HF_SECRET],
    volumes={
        "/output": modal.Volume.from_name("kirana-minicpm-v-output", create_if_missing=False)
    },
)
def push_to_hub():
    from huggingface_hub import HfApi
    from pathlib import Path

    token = os.environ["HF_TOKEN"]
    api = HfApi(token=token)

    print(f"Creating repo: {HF_REPO}")
    api.create_repo(HF_REPO, repo_type="model", exist_ok=True, private=False)

    adapter_dir = Path("/output/minicpm-v-lora")
    if not adapter_dir.exists():
        raise FileNotFoundError(
            f"Adapter not found at {adapter_dir}. "
            "Did the training job complete successfully?"
        )

    files = list(adapter_dir.iterdir())
    print(f"Found {len(files)} files in {adapter_dir}:")
    for f in files:
        print(f"  {f.name} ({f.stat().st_size / 1024:.1f} KB)")

    for f in files:
        if f.is_file():
            print(f"Uploading {f.name}...")
            api.upload_file(
                path_or_fileobj=str(f),
                path_in_repo=f.name,
                repo_id=HF_REPO,
                repo_type="model",
            )

    print("Uploading README.md (model card)...")
    api.upload_file(
        path_or_fileobj=MODEL_CARD.encode(),
        path_in_repo="README.md",
        repo_id=HF_REPO,
        repo_type="model",
    )

    print(f"\nDone! Model published at: https://huggingface.co/{HF_REPO}")


@app.local_entrypoint()
def main():
    push_to_hub.remote()