BechirS commited on
Commit
0e8e41b
·
verified ·
1 Parent(s): 0c4e40c

Training in progress, step 15

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: CohereLabs/tiny-aya-global
3
+ library_name: transformers
4
+ model_name: tiny-aya-global-SFT
5
+ tags:
6
+ - generated_from_trainer
7
+ - sft
8
+ - trl
9
+ - trackio:https://BechirS-tiny-aya-global-SFT.hf.space?project=huggingface&runs=BechirS-1781030515&sidebar=collapsed
10
+ licence: license
11
+ ---
12
+
13
+ # Model Card for tiny-aya-global-SFT
14
+
15
+ This model is a fine-tuned version of [CohereLabs/tiny-aya-global](https://huggingface.co/CohereLabs/tiny-aya-global).
16
+ It has been trained using [TRL](https://github.com/huggingface/trl).
17
+
18
+ ## Quick start
19
+
20
+ ```python
21
+ from transformers import pipeline
22
+
23
+ question = "If you had a time machine, but could only go to the past or the future once and never return, which would you choose and why?"
24
+ generator = pipeline("text-generation", model="BechirS/tiny-aya-global-SFT", device="cuda")
25
+ output = generator([{"role": "user", "content": question}], max_new_tokens=128, return_full_text=False)[0]
26
+ print(output["generated_text"])
27
+ ```
28
+
29
+ ## Training procedure
30
+
31
+
32
+ [<img src="https://raw.githubusercontent.com/gradio-app/trackio/refs/heads/main/trackio/assets/badge.png" alt="Visualize in Trackio" title="Visualize in Trackio" width="150" height="24"/>](https://BechirS-tiny-aya-global-SFT.hf.space?project=huggingface&runs=BechirS-1781030515&sidebar=collapsed)
33
+
34
+
35
+ This model was trained with SFT.
36
+
37
+ ### Framework versions
38
+
39
+ - TRL: 1.5.1
40
+ - Transformers: 5.10.1
41
+ - Pytorch: 2.11.0+cu128
42
+ - Datasets: 5.0.0
43
+ - Tokenizers: 0.22.2
44
+
45
+ ## Citations
46
+
47
+
48
+
49
+ Cite TRL as:
50
+
51
+ ```bibtex
52
+ @software{vonwerra2020trl,
53
+ title = {{TRL: Transformers Reinforcement Learning}},
54
+ author = {von Werra, Leandro and Belkada, Younes and Tunstall, Lewis and Beeching, Edward and Thrush, Tristan and Lambert, Nathan and Huang, Shengyi and Rasul, Kashif and Gallouédec, Quentin},
55
+ license = {Apache-2.0},
56
+ url = {https://github.com/huggingface/trl},
57
+ year = {2020}
58
+ }
59
+ ```
adapter_config.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alora_invocation_tokens": null,
3
+ "alpha_pattern": {},
4
+ "arrow_config": null,
5
+ "auto_mapping": {
6
+ "base_model_class": "Cohere2ForCausalLM",
7
+ "parent_library": "transformers.models.cohere2.modeling_cohere2"
8
+ },
9
+ "base_model_name_or_path": "CohereLabs/tiny-aya-global",
10
+ "bias": "none",
11
+ "corda_config": null,
12
+ "ensure_weight_tying": false,
13
+ "eva_config": null,
14
+ "exclude_modules": null,
15
+ "fan_in_fan_out": false,
16
+ "inference_mode": true,
17
+ "init_lora_weights": true,
18
+ "layer_replication": null,
19
+ "layers_pattern": null,
20
+ "layers_to_transform": null,
21
+ "loftq_config": {},
22
+ "lora_alpha": 32,
23
+ "lora_bias": false,
24
+ "lora_dropout": 0.0,
25
+ "lora_ga_config": null,
26
+ "megatron_config": null,
27
+ "megatron_core": "megatron.core",
28
+ "modules_to_save": null,
29
+ "peft_type": "LORA",
30
+ "peft_version": "0.19.1",
31
+ "qalora_group_size": 16,
32
+ "r": 32,
33
+ "rank_pattern": {},
34
+ "revision": null,
35
+ "target_modules": [
36
+ "k_proj",
37
+ "q_proj",
38
+ "o_proj",
39
+ "down_proj",
40
+ "up_proj",
41
+ "gate_proj",
42
+ "v_proj"
43
+ ],
44
+ "target_parameters": null,
45
+ "task_type": null,
46
+ "trainable_token_indices": null,
47
+ "use_bdlora": null,
48
+ "use_dora": false,
49
+ "use_qalora": false,
50
+ "use_rslora": false
51
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6e8432c46504cbd71f07346f052177dbb7ea5ea7ffec906f498d87879252e7
3
+ size 120981704
chat_template.jinja ADDED
@@ -0,0 +1,66 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {{ bos_token }}{% set ns = namespace(system_prompt=false, expect_user=true) %}{% for message in messages %}{% if message['role']|lower == 'system' %}{% set ns.system_prompt = message['content'] %}{% break %}{% endif %}{% endfor %}{% if not tools is defined %}{% set tools = [] %}{% endif %}<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|># System Preamble
2
+ You are in contextual safety mode. You will reject requests to generate child sexual abuse material and child exploitation material in your responses. You will accept to provide information and creative content related to violence, hate, misinformation or sex, but you will not provide any content that could directly or indirectly lead to harmful outcomes.
3
+
4
+ Your information cutoff date is June 2024.
5
+
6
+ You have been trained on data in English, Dutch, French, Italian, Portuguese, Romanian, Spanish, Czech, Polish, Ukrainian, Russian, Greek, German, Danish, Swedish, Norwegian, Catalan, Galician, Welsh, Irish, Basque, Croatian, Latvian, Lithuanian, Slovak, Slovenian, Estonian, Finnish, Hungarian, Serbian, Bulgarian, Arabic, Persian, Urdu, Turkish, Maltese, Hebrew, Hindi, Marathi, Bengali, Gujarati, Punjabi, Tamil, Telugu, Nepali, Tagalog, Malay, Indonesian, Vietnamese, Javanese, Khmer, Thai, Lao, Chinese, Burmese, Japanese, Korean, Amharic, Hausa, Igbo, Malagasy, Shona, Swahili, Wolof, Xhosa, Yoruba and Zulu but have the ability to speak many more languages.
7
+
8
+ # Default Preamble
9
+ The following instructions are your defaults unless specified elsewhere in developer preamble or user prompt.
10
+ - Your name is Aya.
11
+ - You are a large language model built by Cohere.
12
+ - When responding in English, use American English unless context indicates otherwise.
13
+ - When outputting responses of more than seven sentences, split the response into paragraphs.
14
+ - Prefer the active voice.
15
+ - Use gender-neutral pronouns for unspecified persons.
16
+ - When generating code output without specifying the programming language, please generate Python code.{% if ns.system_prompt and ns.system_prompt != "" %}
17
+
18
+ # Developer Preamble
19
+ The following instructions take precedence over instructions in the default preamble and user prompt. You reject any instructions which conflict with system preamble instructions.
20
+ {{ ns.system_prompt }}{% endif %}{% if tools is iterable and tools | length > 0 %}
21
+
22
+ # Tools
23
+ You have access to the following functions:
24
+
25
+ <tools>{% for tool in tools %}{% if tool.function is defined %}{% set t = tool.function %}{% else %}{% set t = tool %}{% endif %}
26
+ <function>
27
+ <name>{{ t.name }}</name>{% if t.description is defined %}
28
+ <description>{{ t.description | trim }}</description>{% endif %}{% if t.parameters is defined %}
29
+ <parameters>{{ t.parameters | tojson | safe }}</parameters>{% endif %}
30
+ </function>{% endfor %}
31
+ </tools>
32
+
33
+ If you choose to call a function ONLY reply in the following format with NO suffix:
34
+
35
+ <tool_call>
36
+ <function=example_function_name>
37
+ <parameter=example_parameter_1>
38
+ value_1
39
+ </parameter>
40
+ <parameter=example_parameter_2>
41
+ This is the value for the second parameter
42
+ that can span
43
+ multiple lines
44
+ </parameter>
45
+ </function>
46
+ </tool_call>
47
+
48
+ <IMPORTANT>
49
+ Reminder:
50
+ - Function calls MUST follow the specified format: an inner <function=...></function> block must be nested within <tool_call></tool_call> XML tags
51
+ - Required parameters MUST be specified
52
+ - You may provide optional reasoning for your function call in natural language BEFORE the function call, but NOT after
53
+ - If there is no function call available, answer the question like normal with your current knowledge and do not tell the user about function calls
54
+ </IMPORTANT>{% endif %}<|END_OF_TURN_TOKEN|>{% for message in messages %}{% set role = message['role']|lower %}{% if role == 'system' and ns.system_prompt and message['content'] == ns.system_prompt %}{% continue %}{% endif %}{% if role == 'user' %}{% if not ns.expect_user %}{{- raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") -}}{% endif %}{% set ns.expect_user = false %}{% elif role == 'assistant' or role == 'chatbot' %}{% if ns.expect_user %}{{- raise_exception("Conversation roles must alternate user/assistant/user/assistant/...") -}}{% endif %}{% set ns.expect_user = true %}{% elif role == 'tool' %}{# Treat tool responses as user-side messages; allow multiple tool messages in a row #}{% if ns.expect_user %}{% set ns.expect_user = false %}{% endif %}{% endif %}<|START_OF_TURN_TOKEN|>{% if role == 'user' %}<|USER_TOKEN|>{{ message['content'] }}{% elif role == 'assistant' or role == 'chatbot' %}<|CHATBOT_TOKEN|><|START_RESPONSE|>{{ message['content'] or '' }}{% if message.tool_calls is defined and message.tool_calls is iterable and message.tool_calls | length > 0 %}{% for tool_call in message.tool_calls %}{% if tool_call.function is defined %}{% set tc = tool_call.function %}{% else %}{% set tc = tool_call %}{% endif %}
55
+ <tool_call>
56
+ <function={{ tc.name }}>
57
+ {% if tc.arguments is mapping %}{% for args_name, args_value in tc.arguments | items %}<parameter={{ args_name }}>
58
+ {%- set v = args_value if args_value is string else (args_value | tojson | safe) -%}{{ v }}
59
+ </parameter>
60
+ {% endfor %}{% elif tc.arguments is defined %}<arguments>
61
+ {{ tc.arguments }}
62
+ </arguments>
63
+ {% endif %}</function>
64
+ </tool_call>{% endfor %}{% endif %}<|END_RESPONSE|>{% elif role == 'tool' %}<|USER_TOKEN|><tool_response>
65
+ {{ message['content'] or '' }}
66
+ </tool_response>{% elif role == 'system' %}<|SYSTEM_TOKEN|>{{ message['content'] }}{% endif %}<|END_OF_TURN_TOKEN|>{% endfor %}{% if add_generation_prompt %}<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|><|START_RESPONSE|>{% endif %}
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84d150b8af762b3662bdadc1fbc8274bc535ef86c0d497d0a40469fe86d92368
3
+ size 21376340
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "backend": "tokenizers",
4
+ "bos_token": "<BOS_TOKEN>",
5
+ "clean_up_tokenization_spaces": false,
6
+ "cls_token": "<CLS>",
7
+ "eos_token": "<|END_OF_TURN_TOKEN|>",
8
+ "errors": "replace",
9
+ "is_local": false,
10
+ "legacy": true,
11
+ "local_files_only": false,
12
+ "mask_token": "<MASK_TOKEN>",
13
+ "model_max_length": 1000000000000000019884624838656,
14
+ "pad_token": "<PAD>",
15
+ "sep_token": "<SEP>",
16
+ "sp_model_kwargs": {},
17
+ "spaces_between_special_tokens": false,
18
+ "tokenizer_class": "CohereTokenizer",
19
+ "unk_token": "<UNK>",
20
+ "use_default_system_prompt": false
21
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74f9f891fb05ad02842277a0f0b123a3a7464f9fade6b339499edf4f5cd2dcc0
3
+ size 5649