HyperscaleAI bigshanedogg commited on
Commit
a03175b
·
1 Parent(s): ddcb423

Migration to Native transformers Support (>= 5.9.0) (#12)

Browse files

- Migration to Native transformers Support (>= 5.9.0) (76f1e8140d2f2658fb681fa07de08155096cce06)


Co-authored-by: bigshane <bigshanedogg@users.noreply.huggingface.co>

Files changed (4) hide show
  1. README.md +42 -306
  2. chat_template.jinja +108 -0
  3. generation_config.json +5 -2
  4. tokenizer_config.json +1 -1
README.md CHANGED
@@ -369,7 +369,7 @@ For all later turns, the reasoning (think) content from previous turns is not ad
369
 
370
  ## **Huggingface Usage Example**
371
 
372
- After downloading the model binaries, including the configuration files, to a local path(`/path/to/hyperclova-x-seed-think-14b`), you can run the following in a Python environment with the [Huggingface library](https://huggingface.co/docs/transformers/installation)(verified to work with version >= 4.53.0) and [timm(pytorch-image-models)](https://github.com/huggingface/pytorch-image-models) installed.
373
 
374
  You can use the `apply_chat_template` parameter to explicitly enable or disable the reasoning feature.
375
 
@@ -390,70 +390,16 @@ inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, force_r
390
  inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, skip_reasoning=True, return_dict=True, return_tensors="pt")
391
  ```
392
 
393
- ### Non-think Example Code
394
- ```python
395
- from transformers import AutoModelForCausalLM, AutoTokenizer
396
-
397
- model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"
398
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
399
- tokenizer = AutoTokenizer.from_pretrained(model_name)
400
-
401
- chat = [
402
- {"role": "system", "content": "- In this environment, various tools can be used to answer users' questions.\n- You are \"CLOVA X,\" an AI language model developed by NAVER.\n- Begin by creating a plan for solving the problem, and then utilize the tools accordingly to address the problem.\n- The current date is Monday, July 21, 2025.\n- Latest information such as news, stock prices, and shopping is retrieved through the tool_list.\n- If external tools are required, the assistant should not answer directly but must first obtain the necessary information via the assistant -> tool/function_call role, and then respond."},
403
- {"role": "user", "content": "Explain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics."},
404
- ]
405
-
406
- # By adding skip_reasoning=True, the model is forced to always answer directly without reasoning
407
- inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, skip_reasoning=True, return_dict=True, return_tensors="pt")
408
- inputs = inputs.to("cuda")
409
-
410
- output_ids = model.generate(
411
- **inputs,
412
- max_length=1024,
413
- stop_strings=["<|endofturn|>", "<|stop|>"],
414
- temperature=0.5,
415
- top_p=0.6,
416
- repetition_penalty=1.05,
417
- tokenizer=tokenizer
418
- )
419
- print(tokenizer.batch_decode(output_ids))
420
- ```
421
-
422
- ### Think Example Code
423
- ```python
424
- from transformers import AutoModelForCausalLM, AutoTokenizer
425
-
426
- model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"
427
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
428
- tokenizer = AutoTokenizer.from_pretrained(model_name)
429
-
430
- chat = [
431
- {"role": "system", "content": "- In this environment, various tools can be used to answer users' questions.\n- You are \"CLOVA X,\" an AI language model developed by NAVER.\n- Begin by creating a plan for solving the problem, and then utilize the tools accordingly to address the problem.\n- The current date is Monday, July 21, 2025.\n- Latest information such as news, stock prices, and shopping is retrieved through the tool_list.\n- If external tools are required, the assistant should not answer directly but must first obtain the necessary information via the assistant -> tool/function_call role, and then respond."},
432
- {"role": "user", "content": "Explain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics."},
433
- ]
434
 
435
- # By adding force_reasoning=True, the model is forced to always reason before responding
436
- inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, force_reasoning=True, return_dict=True, return_tensors="pt")
437
- inputs = inputs.to("cuda")
438
 
439
- output_ids = model.generate(
440
- **inputs,
441
- max_length=1024,
442
- stop_strings=["<|endofturn|>", "<|stop|>"],
443
- temperature=0.5,
444
- top_p=0.6,
445
- repetition_penalty=1.05,
446
- tokenizer=tokenizer
447
- )
448
- print(tokenizer.batch_decode(output_ids))
449
- ```
450
-
451
- ### Hybrid(the model decides whether to use think or non-think mode) Example Code
452
  ```python
453
  from transformers import AutoModelForCausalLM, AutoTokenizer
454
 
455
  model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"
456
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
457
  tokenizer = AutoTokenizer.from_pretrained(model_name)
458
 
459
  chat = [
@@ -461,8 +407,10 @@ chat = [
461
  {"role": "user", "content": "Explain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics."},
462
  ]
463
 
464
- # The model decides whether to answer after reasoning or to respond immediately without reasoning
465
  inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_dict=True, return_tensors="pt")
 
 
466
  inputs = inputs.to("cuda")
467
 
468
  output_ids = model.generate(
@@ -485,7 +433,7 @@ import json
485
  from transformers import AutoModelForCausalLM, AutoTokenizer
486
 
487
  model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"
488
- model = AutoModelForCausalLM.from_pretrained(model_name, trust_remote_code=True, device_map="auto")
489
  tokenizer = AutoTokenizer.from_pretrained(model_name)
490
 
491
  # 1) The name of the tool should be written as function_call.{{ name }}.
@@ -547,265 +495,53 @@ print(tokenizer.batch_decode(output_ids))
547
 
548
  ## **vLLM Usage Example**
549
 
550
- The HyperCLOVA X SEED Think model is built on a custom LLM architecture based on the LLaMA architecture, incorporating μP and Peri-LN techniques. For convenient use with vLLM, it is available as a dedicated vLLM plugin that can be installed and used with ease once vLLM is set up.
551
-
552
- 1. Download vLLM plugin source code
553
 
554
- ```bash
555
- git clone https://github.com/NAVER-Cloud-HyperCLOVA-X/hcx-vllm-plugin
556
- ```
557
-
558
- 2. vLLM Plugin Build & Installation: While keeping the NAVER-Cloud-HyperCLOVA-X/hcx-vllm-plugin path downloaded in step 1, refer to the commands below.
559
 
560
- ```bash
561
- pip install -e .
562
- ```
563
 
564
- After downloading the model checkpoint to a local path (`/path/to/hyperclova-x-seed-think-14b`), you can perform text inference by running the following commands on a GPU environment with A100 or higher.
 
565
 
566
  ```bash
567
- python -m vllm.entrypoints.openai.api_server --model=/path/to/hyperclova-x-seed-think-14b --trust_remote_code --port=8000
568
-
569
- curl http://localhost:8000/v1/completions \
570
  -H "Content-Type: application/json" \
571
  -d '{
572
- "prompt": "<|im_start|>tool_list\n<|im_end|>\n<|im_start|>system\n- The AI language model is named \"CLOVA X\" and was developed by NAVER.\n- Today is Friday, July 18, 2025.<|im_end|>\n<|im_start|>user\nExplain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics.<|im_end|>\n<|im_start|>assistant/think\n",
573
- "top_k":-1,
574
- "temperature":0.5,
575
- "top_p":0.6,
576
- "repetition_penalty":1.05,
577
- "stop":["<|im_end|><|endofturn|>", "<|im_end|><|stop|>"],
578
- "max_tokens":8192,
579
- "skip_special_tokens":false
 
 
 
 
580
  }'
581
  ```
582
 
583
- ### Chat Completions Usage Example
584
-
585
- 1. Using the Chat completions endpoint
586
-
587
- <!-- end list -->
588
-
589
- - Basic serving script (same as completions)
590
-
591
- `vllm serve naver-hyperclovax/HyperCLOVAX-SEED-Think-14B --trust_remote_code`
592
-
593
- - Sampling parameters such as `top_k`, `temperature`, `top_p`, `repetition_penalty`, and `max_tokens` can be set freely.
594
-
595
- - However, the `skip_special_tokens` and `stop` options must be set as below for vLLM to recognize the model's token generation stop signal and cease generation.
596
-
597
- - request example
598
-
599
- ```bash
600
- curl -X POST http://localhost:8000/v1/chat/completions \
601
- -H "Content-Type: application/json" \
602
- -d '{
603
- "model": "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
604
- "messages": [
605
- {"role": "system", "content": "- The AI language model is named \"CLOVA X\" and was developed by NAVER.\n- Today is Friday, July 18, 2025."},
606
- {"role": "user", "content": "Explain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics."}
607
- ],
608
- "skip_special_tokens":false,
609
- "stop": [
610
- "<|im_end|><|endofturn|>",
611
- "<|im_end|><|stop|>"
612
- ],
613
- "top_k": -1,
614
- "temperature": 0.5,
615
- "top_p": 0.6,
616
- "repetition_penalty": 1.05,
617
- "max_tokens": 8192
618
- }'
619
- ```
620
-
621
- <!-- end list -->
622
-
623
- 2. tool call usage example
624
-
625
- <!-- end list -->
626
-
627
- - Serving script
628
-
629
- - You need to add `--enable-auto-tool-choice --tool-call-parser hcx` to the existing script.
630
-
631
- `vllm serve naver-hyperclovax/HyperCLOVAX-SEED-Think-14B --trust_remote_code --enable-auto-tool-choice --tool-call-parser hcx`
632
-
633
- - request example
634
-
635
- - If you put the available tools in `tools`, they will be applied to the `tool_list` part passed to the model.
636
-
637
- ```bash
638
- curl -X POST http://localhost:8000/v1/chat/completions \
639
- -H "Content-Type: application/json" \
640
- -d '{
641
- "model": "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
642
- "messages": [
643
- {"role": "user", "content": "Could you please tell me the current weather conditions in Boston, MA in Celsius?"}
644
- ],
645
- "stop": ["<|im_end|><|stop|>", "<|im_end|><|endofturn|>"],
646
- "max_tokens": 8192,
647
- "skip_special_tokens": false,
648
- "tools": [
649
- {
650
- "type": "function",
651
- "function": {
652
- "name": "get_current_weather",
653
- "description": "Retrieves the current weather conditions for a specified city and state.",
654
- "parameters": {
655
- "type": "object",
656
- "required": ["location"],
657
- "properties": {
658
- "location": {
659
- "type": "string",
660
- "description": "The location for which to get the weather, in the format of \'\\'City, State\\'', such as \'\\'San Francisco, CA\\'' if State for the city exists. \'\\'City, Country\\'' if State for the city does not exist. Use short form for state."
661
- },
662
- "unit": {
663
- "type": "string",
664
- "description": "The unit of temperature for the weather report.",
665
- "enum": ["celsius", "fahrenheit"],
666
- "default": "fahrenheit"
667
- }
668
- }
669
- }
670
- }
671
- }
672
- ]
673
- }'
674
- ```
675
-
676
- - response example
677
-
678
- - Parsed tool calls are returned in the `tool_calls` field.
679
- - If there is a response generated by the model other than the tool call, it is returned in the `content` field. Otherwise, `null` is returned.
680
- ```bash
681
- {
682
- "id": "chatcmpl-b9aad45639464c0ebf71861df13b4eb2",
683
- "object": "chat.completion",
684
- "created": 1753358351,
685
- "model": "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
686
- "choices": [
687
- {
688
- "index": 0,
689
- "message": {
690
- "role": "assistant",
691
- "reasoning_content": null,
692
- "content": null,
693
- "tool_calls": [
694
- {
695
- "id": "chatcmpl-tool-b9513352e4c64065a315e495b2613753",
696
- "type": "function",
697
- "function": {
698
- "name": "get_current_weather",
699
- "arguments": "{\"location\": \"Boston, MA\", \"unit\": \"celsius\"}"
700
- }
701
- }
702
- ]
703
- },
704
- "logprobs": null,
705
- "finish_reason": "tool_calls",
706
- "stop_reason": "<|im_end|><|stop|>"
707
- }
708
- ],
709
- "usage": {
710
- "prompt_tokens": 189,
711
- "total_tokens": 224,
712
- "completion_tokens": 35,
713
- "prompt_tokens_details": null
714
- },
715
- "prompt_logprobs": null,
716
- "kv_transfer_params": null
717
- }
718
- ```
719
-
720
- <!-- end list -->
721
-
722
- 3. reasoning usage example
723
-
724
- <!-- end list -->
725
-
726
- - Serving script
727
- - You need to add `--enable-reasoning --reasoning-parser hcx` to the existing script.
728
-
729
- `vllm serve naver-hyperclovax/HyperCLOVAX-SEED-Think-14B --trust_remote_code --enable-reasoning --reasoning-parser hcx`
730
-
731
- - The `--enable-reasoning` option has been deprecated since vLLM v0.9.0.
732
-
733
- - If you are using vLLM v0.9.0 or higher, you only need to add `--reasoning-parser hcx` without `--enable-reasoning`.
734
-
735
- - The reasoning parser extracts the reasoning content from responses generated in reasoning mode. This option does not always make the model operate in reasoning mode, nor does excluding the parser necessarily force non-reasoning operation.
736
-
737
- - request example
738
-
739
- - `"chat_template_kwargs": {"force_reasoning": true}` forces reasoning.
740
- - `"chat_template_kwargs": {"skip_reasoning": true}` forces non-reasoning.
741
- - If both are set to `true`, `force_reasoning: true` has higher priority.
742
- - If neither is given, the model decides whether to reason or not.
743
-
744
- <!-- end list -->
745
-
746
- ```bash
747
- curl -X POST http://localhost:8000/v1/chat/completions \
748
- -H "Content-Type: application/json" \
749
- -d '{
750
- "model": "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
751
- "messages": [
752
- {"role": "system", "content": "You are a helpful assistant."},
753
- {"role": "user", "content": "Tell me the prime number closest to 1000."}
754
- ],
755
- "stop": ["<|im_end|><|stop|>", "<|im_end|><|endofturn|>"],
756
- "max_tokens": 8192,
757
- "skip_special_tokens": false,
758
- "chat_template_kwargs": {"force_reasoning": true}
759
- }'
760
- ```
761
-
762
- - response example
763
-
764
- - The reasoning part is returned in the `reasoning_content` field, and the assistant's final response is returned in the `content` field separately.
765
-
766
- <!-- end list -->
767
-
768
- ```bash
769
- {
770
- "id": "chatcmpl-157d282ebaca4333a9f04b1bdfa7eb8b",
771
- "object": "chat.completion",
772
- "created": 1753361336,
773
- "model": "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
774
- "choices": [
775
- {
776
- "index": 0,
777
- "message": {
778
- "role": "assistant",
779
- "reasoning_content": "Okay, so I need to find the prime number closest to 1000. Hmm, let's start by recalling what a prime number is. ... (중략) ... But in this case, at least for the nearest, 3 versus9, which gives997 as the answer.\n\nTherefore, the correct answer would be997.",
780
- "content": "\nThe prime number closest to 1000 is **997**. \n\nHere's the reasoning:\n1. **Check if 1000 is prime:** It's even, so not prime.\n2. **Find primes near 1000:**\n - **Below 1000:** Check numbers downward. \n - Upon verifying, 997 is prime (no divisors up to its square root).\n - **Above 1000:** Next prime after 997 is 1009, which is 9 units away from 1000.\n3. **Compare distances:**\n - \\( |1000 - 997| = 3 \\)\n - \\( |1009 - 1000| = 9 \\)\n \nSince 3 < 9, **997** is the closest prime. \n\n\\boxed{997}",
781
- "tool_calls": []
782
- },
783
- "logprobs": null,
784
- "finish_reason": "stop",
785
- "stop_reason": "<|im_end|><|endofturn|>"
786
- }
787
- ],
788
- "usage": {
789
- "prompt_tokens": 38,
790
- "total_tokens": 3254,
791
- "completion_tokens": 3216,
792
- "prompt_tokens_details": null
793
- },
794
- "prompt_logprobs": null,
795
- "kv_transfer_params": null
796
- }
797
- ```
798
 
799
- <!-- end list -->
800
 
801
- 4. reasoning + tool call usage example
 
 
 
802
 
803
- <!-- end list -->
804
 
805
- - Serving script
806
- - If you want to use both the reasoning parser and the tool call parser, you can combine the reasoning serving script and the tool call serving script.
 
807
 
808
- `vllm serve naver-hyperclovax/HyperCLOVAX-SEED-Think-14B --trust_remote_code --enable-auto-tool-choice --tool-call-parser hcx --enable-reasoning --reasoning-parser hcx`
809
 
810
 
811
  ## License
@@ -828,4 +564,4 @@ The model is licensed under [HyperCLOVA X SEED Model License Agreement](./LICENS
828
 
829
  ## Questions
830
 
831
- For any other questions, please feel free to contact us at [dl_hcxopensource@navercorp.com](mailto:dl_hcxopensource@navercorp.com).
 
369
 
370
  ## **Huggingface Usage Example**
371
 
372
+ After downloading the model binaries, including the configuration files, to a local path(`/path/to/hyperclova-x-seed-think-14b`), you can run the following in a Python environment with the [Huggingface library](https://huggingface.co/docs/transformers/installation) (verified to work with version >= 5.9.0) installed.
373
 
374
  You can use the `apply_chat_template` parameter to explicitly enable or disable the reasoning feature.
375
 
 
390
  inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, skip_reasoning=True, return_dict=True, return_tensors="pt")
391
  ```
392
 
393
+ ### Basic Example Code
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
394
 
395
+ The example below runs the model in **hybrid mode** (the model decides whether to reason).
396
+ To force a specific mode, replace the `inputs = ...` line with one of the commented alternatives.
 
397
 
 
 
 
 
 
 
 
 
 
 
 
 
 
398
  ```python
399
  from transformers import AutoModelForCausalLM, AutoTokenizer
400
 
401
  model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"
402
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
403
  tokenizer = AutoTokenizer.from_pretrained(model_name)
404
 
405
  chat = [
 
407
  {"role": "user", "content": "Explain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics."},
408
  ]
409
 
410
+ # Hybrid (default): the model decides whether to reason before answering.
411
  inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, return_dict=True, return_tensors="pt")
412
+ # Think mode: inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, force_reasoning=True, return_dict=True, return_tensors="pt")
413
+ # Non-think mode: inputs = tokenizer.apply_chat_template(chat, add_generation_prompt=True, skip_reasoning=True, return_dict=True, return_tensors="pt")
414
  inputs = inputs.to("cuda")
415
 
416
  output_ids = model.generate(
 
433
  from transformers import AutoModelForCausalLM, AutoTokenizer
434
 
435
  model_name = "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B"
436
+ model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")
437
  tokenizer = AutoTokenizer.from_pretrained(model_name)
438
 
439
  # 1) The name of the tool should be written as function_call.{{ name }}.
 
495
 
496
  ## **vLLM Usage Example**
497
 
498
+ The HyperCLOVA X SEED Think model is natively supported by vLLM. After installing vLLM, you can serve the model directly.
 
 
499
 
500
+ ```bash
501
+ pip install vllm
502
+ vllm serve naver-hyperclovax/HyperCLOVAX-SEED-Think-14B
503
+ ```
 
504
 
505
+ ### Chat Completions Request
 
 
506
 
507
+ - Sampling parameters such as `top_k`, `temperature`, `top_p`, `repetition_penalty`, and `max_tokens` can be set freely.
508
+ - However, the `skip_special_tokens` and `stop` options must be set as below for vLLM to recognize the model's token generation stop signal and cease generation.
509
 
510
  ```bash
511
+ curl -X POST http://localhost:8000/v1/chat/completions \
 
 
512
  -H "Content-Type: application/json" \
513
  -d '{
514
+ "model": "naver-hyperclovax/HyperCLOVAX-SEED-Think-14B",
515
+ "messages": [
516
+ {"role": "system", "content": "- The AI language model is named \"CLOVA X\" and was developed by NAVER.\n- Today is Friday, July 18, 2025."},
517
+ {"role": "user", "content": "Explain in as much detail as possible the relationship between the Schrödinger equation and quantum mechanics."}
518
+ ],
519
+ "skip_special_tokens": false,
520
+ "stop": ["<|im_end|><|endofturn|>", "<|im_end|><|stop|>"],
521
+ "top_k": -1,
522
+ "temperature": 0.5,
523
+ "top_p": 0.6,
524
+ "repetition_penalty": 1.05,
525
+ "max_tokens": 8192
526
  }'
527
  ```
528
 
529
+ ### Controlling Reasoning Mode
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
530
 
531
+ Use `chat_template_kwargs` in the request body to control reasoning behavior:
532
 
533
+ - `{"force_reasoning": true}` Always reason before answering.
534
+ - `{"skip_reasoning": true}` — Always answer directly without reasoning.
535
+ - If neither is given, the model decides on its own.
536
+ - If both are set to `true`, `force_reasoning` takes priority.
537
 
538
+ Example:
539
 
540
+ ```json
541
+ "chat_template_kwargs": {"force_reasoning": true}
542
+ ```
543
 
544
+ > **Note:** Native `--reasoning-parser` and `--tool-call-parser` support for HyperCLOVA-X is not yet available in vLLM upstream. To extract `reasoning_content` and `tool_calls` as structured response fields, please refer to the [hcx-vllm-plugin](https://github.com/NAVER-Cloud-HyperCLOVA-X/hcx-vllm-plugin) repository.
545
 
546
 
547
  ## License
 
564
 
565
  ## Questions
566
 
567
+ For any other questions, please feel free to contact us at [dl_hcxopensource@navercorp.com](mailto:dl_hcxopensource@navercorp.com).
chat_template.jinja ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {% if tools is not defined or tools is none %}
2
+ {{- '<|im_start|>tool_list\n<|im_end|>\n' }}
3
+ {%- else %}
4
+ {{- '<|im_start|>tool_list\n[' }}
5
+ {%- for tool in tools %}
6
+ {{- '{"name": "' }}
7
+ {{- tool.function.name }}
8
+ {{- '", ' }}
9
+ {{- '"description": "' }}
10
+ {{- tool.function.description }}
11
+ {{- '"' }}
12
+ {%- if tool.function.parameters is defined %}
13
+ {{- ', "parameters": ' }}
14
+ {{- tool.function.parameters | tojson }}
15
+ {%- endif %}
16
+ {{- '}' }}
17
+ {%- if not loop.last %}
18
+ {{- ', ' }}
19
+ {%- endif %}
20
+ {%- endfor %}
21
+ {{- ']<|im_end|>\n' }}
22
+ {%- endif %}
23
+
24
+ {%- set ns = namespace(is_searching=true, last_query_index=-1) %}
25
+ {%- for message in messages[::-1] %}
26
+ {%- set index = (messages|length - 1) - loop.index0 %}
27
+ {%- if ns.is_searching and (message.role == 'user' or message.role == 'tool') %}
28
+ {%- set ns.last_query_index = index %}
29
+ {%- set ns.is_searching = false %}
30
+ {%- endif %}
31
+ {%- endfor %}
32
+
33
+ {%- for message in messages %}
34
+ {%- if loop.index0 == 0 and message.role != 'system' %}
35
+ {{- '<|im_start|>system\n<|im_end|>\n' }}
36
+ {%- endif %}
37
+
38
+ {%- if message.content is string %}
39
+ {%- set content = message.content %}
40
+ {%- elif message.content is iterable and message.content is not none %}
41
+ {%- set ns_content = namespace(text='') %}
42
+ {%- for part in message.content %}
43
+ {%- if part.type is defined and part.type == 'text' and part.text is defined %}
44
+ {%- set ns_content.text = ns_content.text + part.text %}
45
+ {%- endif %}
46
+ {%- endfor %}
47
+ {%- set content = ns_content.text %}
48
+ {%- else %}
49
+ {%- set content = '' %}
50
+ {%- endif %}
51
+
52
+ {%- set reasoning_content = '' %}
53
+ {%- if message.reasoning_content is defined and message.reasoning_content is not none %}
54
+ {%- set reasoning_content = message.reasoning_content %}
55
+ {%- endif %}
56
+ {%- if message.role == "assistant" %}
57
+ {%- if loop.index0 > ns.last_query_index %}
58
+ {%- if reasoning_content %}
59
+ {{- '<|im_start|>assistant/think\n' + reasoning_content.strip('\n') + '<|im_end|>\n' }}
60
+ {%- endif %}
61
+ {%- endif %}
62
+
63
+ {%- if content %}
64
+ {{- '<|im_start|>assistant\n' + content.strip('\n') + '<|im_end|>' }}
65
+ {%- if message.tool_calls %}
66
+ {{- '\n' }}
67
+ {%- else %}
68
+ {{- '<|endofturn|>\n' }}
69
+ {%- endif %}
70
+ {%- endif %}
71
+
72
+ {%- if message.tool_calls %}
73
+ {{- '<|im_start|>assistant -> tool/function_call\n[' }}
74
+ {%- for tool_call in message.tool_calls %}
75
+ {%- if not loop.first %}
76
+ {{- ', ' }}
77
+ {%- endif %}
78
+ {%- if tool_call.function %}
79
+ {%- set tool_call = tool_call.function %}
80
+ {%- endif %}
81
+ {{- '{"name": "' }}
82
+ {{- tool_call.name }}
83
+ {{- '", "arguments": ' }}
84
+ {%- if tool_call.arguments is string %}
85
+ {{- tool_call.arguments }}
86
+ {%- else %}
87
+ {{- tool_call.arguments | tojson }}
88
+ {%- endif %}
89
+ {{- '}' }}
90
+ {%- endfor %}
91
+ {{- ']<|im_end|><|stop|>\n' }}
92
+
93
+ {%- endif %}
94
+ {%- elif message.role == "tool" %}
95
+ {{- '<|im_start|>tool/function_call\n' + content + '<|im_end|>\n' }}
96
+ {%- else %}
97
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>\n' }}
98
+ {%- endif %}
99
+ {%- endfor %}
100
+ {%- if add_generation_prompt %}
101
+ {%- if force_reasoning is defined and force_reasoning is true %}
102
+ {{- '<|im_start|>assistant/think\n' }}
103
+ {%- elif skip_reasoning is defined and skip_reasoning is true %}
104
+ {{- '<|im_start|>assistant/think\n<|im_end|>\n<|im_start|>assistant\n' }}
105
+ {%- else %}
106
+ {{- '<|im_start|>assistant' }}
107
+ {%- endif %}
108
+ {%- endif %}
generation_config.json CHANGED
@@ -1,8 +1,11 @@
1
  {
2
- "_from_model_config": true,
3
  "bos_token_id": 100257,
4
  "eos_token_id": 100257,
 
5
  "pad_token_id": 100257,
6
- "transformers_version": "4.52.4",
 
 
 
7
  "use_cache": false
8
  }
 
1
  {
 
2
  "bos_token_id": 100257,
3
  "eos_token_id": 100257,
4
+ "max_new_tokens": 256,
5
  "pad_token_id": 100257,
6
+ "stop_strings": [
7
+ "<|endofturn|>",
8
+ "<|stop|>"
9
+ ],
10
  "use_cache": false
11
  }
tokenizer_config.json CHANGED
@@ -491,9 +491,9 @@
491
  "<PASSWORD>"
492
  ],
493
  "bos_token": "<|endoftext|>",
494
- "chat_template": "{% if tools is not defined or tools is none %}\n {{- '<|im_start|>tool_list\\n<|im_end|>\\n' }}\n{%- else %}\n {{- '<|im_start|>tool_list\\n[' }}\n {%- for tool in tools %}\n {{- '{\"name\": \"' }}\n {{- tool.function.name }}\n {{- '\", ' }}\n {{- '\"description\": \"' }}\n {{- tool.function.description }}\n {{- '\"' }}\n {%- if tool.function.parameters is defined %}\n {{- ', \"parameters\": ' }}\n {{- tool.function.parameters | tojson }}\n {%- endif %}\n {{- '}' }}\n {%- if not loop.last %}\n {{- ', ' }}\n {%- endif %}\n {%- endfor %}\n{{- ']<|im_end|>\\n' }}\n{%- endif %}\n\n{%- set ns = namespace(is_searching=true, last_query_index=messages|length - 1) %}\n{%- for message in messages[::-1] %}\n {%- set index = (messages|length - 1) - loop.index0 %}\n {%- if ns.is_searching and (message.role == 'user' or message.role == 'tool') %}\n {%- set ns.last_query_index = index %}\n {%- set ns.is_searching = false %}\n {%- endif %}\n{%- endfor %}\n\n{%- for message in messages %}\n {%- if loop.index0 == 0 and message.role != 'system' %}\n {{- '<|im_start|>system\\n<|im_end|>\\n' }}\n {%- endif %}\n\n {%- if message.content is string %}\n {%- set content = message.content %}\n {%- else %}\n {%- set content = '' %}\n {%- endif %}\n\n {%- set reasoning_content = '' %}\n {%- if message.reasoning_content is defined and message.reasoning_content is not none %}\n {%- set reasoning_content = message.reasoning_content %} \n {%- endif %}\n {%- if message.role == \"assistant\" %}\n {%- if loop.index0 > ns.last_query_index %}\n {%- if reasoning_content %}\n {{- '<|im_start|>assistant/think\\n' + reasoning_content.strip('\\n') + '<|im_end|>\\n' }}\n {%- endif %}\n {%- endif %}\n\n {%- if content %}\n {{- '<|im_start|>assistant\\n' + content.strip('\\n') + '<|im_end|>' }}\n {%- if message.tool_calls %}\n {{- '\\n' }}\n {%- else %}\n {{- '<|endofturn|>\\n' }}\n {%- endif %}\n {%- endif %}\n\n {%- if message.tool_calls %}\n {{- '<|im_start|>assistant -> tool/function_call\\n[' }}\n {%- for tool_call in message.tool_calls %}\n {%- if not loop.first %}\n {{- ', ' }}\n {%- endif %}\n {%- if tool_call.function %}\n {%- set tool_call = tool_call.function %}\n {%- endif %}\n {{- '{\"name\": \"' }}\n {{- tool_call.name }}\n {{- '\", \"arguments\": ' }}\n {%- if tool_call.arguments is string %}\n {{- tool_call.arguments }}\n {%- else %}\n {{- tool_call.arguments | tojson }}\n {%- endif %}\n {{- '}' }}\n {%- endfor %}\n {{- ']<|im_end|><|stop|>\\n' }}\n\n {%- endif %}\n {%- elif message.role == \"tool\" %}\n {{- '<|im_start|>tool/function_call\\n' + content + '<|im_end|>\\n' }}\n {%- else %}\n {{- '<|im_start|>' + message.role + '\\n' + content + '<|im_end|>\\n' }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {%- if force_reasoning is defined and force_reasoning is true %}\n {{- '<|im_start|>assistant/think\\n' }}\n {%- elif skip_reasoning is defined and skip_reasoning is true %}\n {{- '<|im_start|>assistant\\n' }}\n {%- else %}\n {{- '<|im_start|>assistant' }}\n {%- endif %}\n{%- endif %}",
495
  "clean_up_tokenization_spaces": true,
496
  "eos_token": "<|endoftext|>",
 
497
  "model_max_length": 1000000000000000019884624838656,
498
  "pad_token": "<|endoftext|>",
499
  "tokenizer_class": "GPT2Tokenizer",
 
491
  "<PASSWORD>"
492
  ],
493
  "bos_token": "<|endoftext|>",
 
494
  "clean_up_tokenization_spaces": true,
495
  "eos_token": "<|endoftext|>",
496
+ "extra_special_tokens": {},
497
  "model_max_length": 1000000000000000019884624838656,
498
  "pad_token": "<|endoftext|>",
499
  "tokenizer_class": "GPT2Tokenizer",