kirana-detective / finetune /push_generation_config.py
naazimsnh02's picture
Upload generation_config.json
e4a9cee
Raw
History Blame
1.58 kB
"""Upload generation_config.json to the merged MiniCPM-V model repo.
Fixes runaway generation: the merged model has no generation_config.json and its
config.json sets eos_token_id to <|endoftext|> (248044), which the chat format
never emits, so generate() ran to max_new_tokens. This config stops on <|im_end|>.
Usage (needs a write-scoped HF_TOKEN in env or .env):
python finetune/push_generation_config.py
"""
from __future__ import annotations
import os
from pathlib import Path
from huggingface_hub import HfApi
REPO_ID = "build-small-hackathon/minicpm-v-4-6-indian-invoice-extraction-merged"
LOCAL_FILE = Path(__file__).parent / "generation_config.json"
def _load_token() -> str:
token = os.getenv("HF_TOKEN", "")
if not token:
env = Path(__file__).parent.parent / ".env"
if env.exists():
for line in env.read_text(encoding="utf-8").splitlines():
if line.strip().startswith("HF_TOKEN="):
token = line.split("=", 1)[1].strip()
break
if not token:
raise SystemExit("HF_TOKEN not set (env or .env). Needs write permission.")
return token
def main() -> None:
token = _load_token()
api = HfApi(token=token)
api.upload_file(
path_or_fileobj=str(LOCAL_FILE),
path_in_repo="generation_config.json",
repo_id=REPO_ID,
repo_type="model",
commit_message="Add generation_config.json (stop on <|im_end|>)",
)
print(f"Uploaded generation_config.json -> {REPO_ID}")
if __name__ == "__main__":
main()