| """Upload generation_config.json to the merged MiniCPM-V model repo. |
| |
| Fixes runaway generation: the merged model has no generation_config.json and its |
| config.json sets eos_token_id to <|endoftext|> (248044), which the chat format |
| never emits, so generate() ran to max_new_tokens. This config stops on <|im_end|>. |
| |
| Usage (needs a write-scoped HF_TOKEN in env or .env): |
| python finetune/push_generation_config.py |
| """ |
| from __future__ import annotations |
|
|
| import os |
| from pathlib import Path |
|
|
| from huggingface_hub import HfApi |
|
|
| REPO_ID = "build-small-hackathon/minicpm-v-4-6-indian-invoice-extraction-merged" |
| LOCAL_FILE = Path(__file__).parent / "generation_config.json" |
|
|
|
|
| def _load_token() -> str: |
| token = os.getenv("HF_TOKEN", "") |
| if not token: |
| env = Path(__file__).parent.parent / ".env" |
| if env.exists(): |
| for line in env.read_text(encoding="utf-8").splitlines(): |
| if line.strip().startswith("HF_TOKEN="): |
| token = line.split("=", 1)[1].strip() |
| break |
| if not token: |
| raise SystemExit("HF_TOKEN not set (env or .env). Needs write permission.") |
| return token |
|
|
|
|
| def main() -> None: |
| token = _load_token() |
| api = HfApi(token=token) |
| api.upload_file( |
| path_or_fileobj=str(LOCAL_FILE), |
| path_in_repo="generation_config.json", |
| repo_id=REPO_ID, |
| repo_type="model", |
| commit_message="Add generation_config.json (stop on <|im_end|>)", |
| ) |
| print(f"Uploaded generation_config.json -> {REPO_ID}") |
|
|
|
|
| if __name__ == "__main__": |
| main() |
|
|