Commit ·
a2c4e53
1
Parent(s): 8dc382f
Model loading fix for vison model
Browse files- agents/product_matcher.py +4 -1
- app.py +5 -21
agents/product_matcher.py
CHANGED
|
@@ -16,7 +16,7 @@ AGENT_NAME = "Product_Matcher"
|
|
| 16 |
AGENT_VERSION = "1.0.0"
|
| 17 |
MODEL_REPO = "build-small-hackathon/minicpm5-1b-indian-fmcg-normalizer"
|
| 18 |
|
| 19 |
-
_TIMEOUT_SECONDS =
|
| 20 |
|
| 21 |
_SYSTEM_PROMPT = (
|
| 22 |
"You are an Indian FMCG product name normalizer. "
|
|
@@ -66,6 +66,9 @@ class ProductMatcherAgent:
|
|
| 66 |
temperature=0.0,
|
| 67 |
)
|
| 68 |
text = response["choices"][0]["message"]["content"].strip()
|
|
|
|
|
|
|
|
|
|
| 69 |
data = json.loads(text)
|
| 70 |
returned_id = data.get("product_id")
|
| 71 |
except Exception as e:
|
|
|
|
| 16 |
AGENT_VERSION = "1.0.0"
|
| 17 |
MODEL_REPO = "build-small-hackathon/minicpm5-1b-indian-fmcg-normalizer"
|
| 18 |
|
| 19 |
+
_TIMEOUT_SECONDS = 60
|
| 20 |
|
| 21 |
_SYSTEM_PROMPT = (
|
| 22 |
"You are an Indian FMCG product name normalizer. "
|
|
|
|
| 66 |
temperature=0.0,
|
| 67 |
)
|
| 68 |
text = response["choices"][0]["message"]["content"].strip()
|
| 69 |
+
if not text:
|
| 70 |
+
logger.warning("ProductMatcher LLM returned empty response for %r", product_raw)
|
| 71 |
+
return None, None
|
| 72 |
data = json.loads(text)
|
| 73 |
returned_id = data.get("product_id")
|
| 74 |
except Exception as e:
|
app.py
CHANGED
|
@@ -64,8 +64,6 @@ def load_models() -> None:
|
|
| 64 |
import os
|
| 65 |
import torch
|
| 66 |
from transformers import AutoProcessor
|
| 67 |
-
from huggingface_hub import snapshot_download
|
| 68 |
-
from safetensors.torch import load_file as safetensors_load
|
| 69 |
|
| 70 |
_BASE_REPO = "openbmb/MiniCPM-V-4.6"
|
| 71 |
_MERGED_REPO = "build-small-hackathon/minicpm-v-4-6-indian-invoice-extraction-merged"
|
|
@@ -77,9 +75,9 @@ def load_models() -> None:
|
|
| 77 |
except ImportError:
|
| 78 |
from transformers import AutoModelForMultimodalLM as _VisionModel
|
| 79 |
|
| 80 |
-
#
|
| 81 |
-
#
|
| 82 |
-
logger.info("Loading
|
| 83 |
_model_kwargs = {
|
| 84 |
"trust_remote_code": True,
|
| 85 |
"torch_dtype": _dtype,
|
|
@@ -88,29 +86,15 @@ def load_models() -> None:
|
|
| 88 |
_model_kwargs["token"] = _HF_TOKEN
|
| 89 |
if torch.cuda.is_available():
|
| 90 |
_model_kwargs["device_map"] = "auto"
|
| 91 |
-
_vision_model = _VisionModel.from_pretrained(
|
| 92 |
if not torch.cuda.is_available():
|
| 93 |
_vision_model.to(_device)
|
| 94 |
|
| 95 |
-
logger.info("Loading fine-tuned weights from %s …", _MERGED_REPO)
|
| 96 |
-
merged_local = snapshot_download(_MERGED_REPO, token=_HF_TOKEN or None)
|
| 97 |
-
shard_files = sorted(
|
| 98 |
-
f for f in os.listdir(merged_local) if f.endswith(".safetensors")
|
| 99 |
-
)
|
| 100 |
-
if not shard_files:
|
| 101 |
-
raise RuntimeError(f"No .safetensors files found in {_MERGED_REPO}")
|
| 102 |
-
state_dict = {}
|
| 103 |
-
for sf in shard_files:
|
| 104 |
-
state_dict.update(safetensors_load(os.path.join(merged_local, sf), device="cpu"))
|
| 105 |
-
missing, unexpected = _vision_model.load_state_dict(state_dict, strict=False)
|
| 106 |
-
if unexpected:
|
| 107 |
-
logger.warning("Unexpected keys loading fine-tuned weights: %s", unexpected[:5])
|
| 108 |
-
logger.info("Fine-tuned weights loaded (%d keys, %d missing)", len(state_dict), len(missing))
|
| 109 |
-
|
| 110 |
_vision_model.eval()
|
| 111 |
_processor_kwargs = {"trust_remote_code": True}
|
| 112 |
if _HF_TOKEN:
|
| 113 |
_processor_kwargs["token"] = _HF_TOKEN
|
|
|
|
| 114 |
_vision_processor = AutoProcessor.from_pretrained(_BASE_REPO, **_processor_kwargs)
|
| 115 |
vision_llm = (_vision_model, _vision_processor)
|
| 116 |
logger.info("Vision LLM ready (device=%s dtype=%s)", _device, _dtype)
|
|
|
|
| 64 |
import os
|
| 65 |
import torch
|
| 66 |
from transformers import AutoProcessor
|
|
|
|
|
|
|
| 67 |
|
| 68 |
_BASE_REPO = "openbmb/MiniCPM-V-4.6"
|
| 69 |
_MERGED_REPO = "build-small-hackathon/minicpm-v-4-6-indian-invoice-extraction-merged"
|
|
|
|
| 75 |
except ImportError:
|
| 76 |
from transformers import AutoModelForMultimodalLM as _VisionModel
|
| 77 |
|
| 78 |
+
# The merged repo is a fully-merged model (not a LoRA delta) — load it directly.
|
| 79 |
+
# Loading base + overlaying weights fails because the repos use different param naming.
|
| 80 |
+
logger.info("Loading merged vision model from %s …", _MERGED_REPO)
|
| 81 |
_model_kwargs = {
|
| 82 |
"trust_remote_code": True,
|
| 83 |
"torch_dtype": _dtype,
|
|
|
|
| 86 |
_model_kwargs["token"] = _HF_TOKEN
|
| 87 |
if torch.cuda.is_available():
|
| 88 |
_model_kwargs["device_map"] = "auto"
|
| 89 |
+
_vision_model = _VisionModel.from_pretrained(_MERGED_REPO, **_model_kwargs)
|
| 90 |
if not torch.cuda.is_available():
|
| 91 |
_vision_model.to(_device)
|
| 92 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
_vision_model.eval()
|
| 94 |
_processor_kwargs = {"trust_remote_code": True}
|
| 95 |
if _HF_TOKEN:
|
| 96 |
_processor_kwargs["token"] = _HF_TOKEN
|
| 97 |
+
# Load processor from base repo — has complete preprocessor/chat-template configs.
|
| 98 |
_vision_processor = AutoProcessor.from_pretrained(_BASE_REPO, **_processor_kwargs)
|
| 99 |
vision_llm = (_vision_model, _vision_processor)
|
| 100 |
logger.info("Vision LLM ready (device=%s dtype=%s)", _device, _dtype)
|