yuchenxie commited on
Commit
3574365
·
verified ·
1 Parent(s): 4fcf285

Upload folder using huggingface_hub

Browse files
chat_template.jinja ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {%- if tools %}
2
+ {{- '<|im_start|>system\n' }}
3
+ {%- if messages[0].role == 'system' %}
4
+ {{- messages[0].content + '\n\n' }}
5
+ {%- endif %}
6
+ {{- "# Tools\n\nYou are ArlowGPT, a helpful AI assistant, built by Yuchen Xie. You may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
7
+ {%- for tool in tools %}
8
+ {{- "\n" }}
9
+ {{- tool | tojson }}
10
+ {%- endfor %}
11
+ {{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
12
+ {%- else %}
13
+ {%- if messages[0].role == 'system' %}
14
+ {{- '<|im_start|>system\n' + messages[0].content + '<|im_end|>\n' }}
15
+ {%- endif %}
16
+ {%- endif %}
17
+ {%- for message in messages %}
18
+ {%- if message.content is string %}
19
+ {%- set content = message.content %}
20
+ {%- else %}
21
+ {%- set content = '' %}
22
+ {%- endif %}
23
+ {%- if (message.role == "user") or (message.role == "system" and not loop.first) %}
24
+ {{- '<|im_start|>' + message.role + '\n' + content + '<|im_end|>' + '\n' }}
25
+ {%- elif message.role == "assistant" %}
26
+ {{- '<|im_start|>' + message.role + '\n' + content }}
27
+ {%- if message.tool_calls %}
28
+ {%- for tool_call in message.tool_calls %}
29
+ {%- if (loop.first and content) or (not loop.first) %}
30
+ {{- '\n' }}
31
+ {%- endif %}
32
+ {%- if tool_call.function %}
33
+ {%- set tool_call = tool_call.function %}
34
+ {%- endif %}
35
+ {{- '<tool_call>\n{"name": "' }}
36
+ {{- tool_call.name }}
37
+ {{- '", "arguments": ' }}
38
+ {%- if tool_call.arguments is string %}
39
+ {{- tool_call.arguments }}
40
+ {%- else }}
41
+ {{- tool_call.arguments | tojson }}
42
+ {%- endif }}
43
+ {{- '}\n</tool_call>' }}
44
+ {%- endfor %}
45
+ {%- endif %}
46
+ {{- '<|im_end|>\n' }}
47
+ {%- elif message.role == "tool" %}
48
+ {%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
49
+ {{- '<|im_start|>user' }}
50
+ {%- endif %}
51
+ {{- '\n<tool_response>\n' }}
52
+ {{- content }}
53
+ {{- '\n</tool_response>' }}
54
+ {%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
55
+ {{- '<|im_end|>\n' }}
56
+ {%- endif }}
57
+ {%- endif %}
58
+ {%- endfor %}
59
+ {%- if add_generation_prompt %}
60
+ {{- '<|im_start|>assistant\n' }}
61
+ {%- endif %}
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
special_tokens_map.json ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ {
2
+ "eos_token": "<|endoftext|>",
3
+ "unk_token": "<|endoftext|>",
4
+ "pad_token": "<|endoftext|>"
5
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|im_start|>",
4
+ "<|im_end|>",
5
+ "<|vision_start|>",
6
+ "<|vision_end|>",
7
+ "<|vision_pad|>",
8
+ "<|image_pad|>",
9
+ "<|video_pad|>",
10
+ "<|object_ref_start|>",
11
+ "<|object_ref_end|>",
12
+ "<|box_start|>",
13
+ "<|box_end|>",
14
+ "<|quad_start|>",
15
+ "<|quad_end|>",
16
+ "<tool_call>",
17
+ "</tool_call>",
18
+ "<tool_response>",
19
+ "</tool_response>",
20
+ "<|fim_prefix|>",
21
+ "<|fim_middle|>",
22
+ "<|fim_suffix|>",
23
+ "<|fim_pad|>",
24
+ "<|repo_name|>",
25
+ "<|file_sep|>",
26
+ "<think>",
27
+ "</think>"
28
+ ],
29
+ "backend": "tokenizers",
30
+ "bos_token": null,
31
+ "eos_token": "<|endoftext|>",
32
+ "extra_special_tokens": [
33
+ "<image>",
34
+ "<video>",
35
+ "<|vision_start|>",
36
+ "<|vision_end|>"
37
+ ],
38
+ "image_token_id": 131072,
39
+ "model_max_length": 131072,
40
+ "pad_token": "<|endoftext|>",
41
+ "tokenizer_class": "ArlowTokenizer",
42
+ "unk_token": "<|endoftext|>",
43
+ "video_token_id": 131073,
44
+ "vision_end_token_id": 4,
45
+ "vision_start_token_id": 3
46
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff