Artyomorax commited on
Commit
288c286
·
1 Parent(s): d20b3ce

Add wav2vec fine-tuned model files

Browse files
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 30,
3
+ "<s>": 29
4
+ }
checkpoint-350/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 30,
3
+ "<s>": 29
4
+ }
checkpoint-350/config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.0,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1024,
80
+ "pad_token_id": 28,
81
+ "proj_codevector_dim": 768,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.55.4",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 31,
107
+ "xvector_output_dim": 512
108
+ }
checkpoint-350/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ed5f427487a4f75146b4e9dc480494f8cfaa16c8507284e695d3b23b353eb34c
3
+ size 1261934580
checkpoint-350/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8f716e20a540ad57a89a97cf60f2059a70665e1d6504d27ca6d95bdde5765f5
3
+ size 806521803
checkpoint-350/preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "return_tensors": "np",
10
+ "sampling_rate": 16000
11
+ }
checkpoint-350/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:123848c4e5af199b0a3023db0dc50b82d8d7f7a2d2e95b352af9861d5d70da79
3
+ size 14645
checkpoint-350/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca071cf113770e6722dad4a3d7b2eab7ca077ae7e6cfa66af1ff8bc547d5284
3
+ size 1383
checkpoint-350/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e07e2030005258fea5b5de0d579feff99fe4606f17cd41417eb090d35fca8f99
3
+ size 1465
checkpoint-350/special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<s>",
19
+ "eos_token": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": "[PAD]",
27
+ "unk_token": "[UNK]"
28
+ }
checkpoint-350/tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "27": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "28": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "29": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "30": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "additional_special_tokens": [
37
+ "<s>",
38
+ "</s>"
39
+ ],
40
+ "bos_token": "<s>",
41
+ "clean_up_tokenization_spaces": false,
42
+ "do_lower_case": false,
43
+ "eos_token": "</s>",
44
+ "extra_special_tokens": {},
45
+ "model_max_length": 1000000000000000019884624838656,
46
+ "pad_token": "[PAD]",
47
+ "processor_class": "Wav2Vec2Processor",
48
+ "replace_word_delimiter_char": " ",
49
+ "target_lang": null,
50
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
51
+ "unk_token": "[UNK]",
52
+ "word_delimiter_token": "|"
53
+ }
checkpoint-350/trainer_state.json ADDED
@@ -0,0 +1,279 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 14.0,
6
+ "eval_steps": 500,
7
+ "global_step": 350,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.4032258064516129,
14
+ "grad_norm": 15.945989608764648,
15
+ "learning_rate": 5.4e-07,
16
+ "loss": 2.942,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.8064516129032258,
21
+ "grad_norm": 15.050374984741211,
22
+ "learning_rate": 1.14e-06,
23
+ "loss": 3.345,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 1.2016129032258065,
28
+ "grad_norm": 9.487156867980957,
29
+ "learning_rate": 1.74e-06,
30
+ "loss": 2.6443,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 1.6048387096774195,
35
+ "grad_norm": 14.825271606445312,
36
+ "learning_rate": 2.34e-06,
37
+ "loss": 3.0528,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "grad_norm": 15.298517227172852,
43
+ "learning_rate": 2.9400000000000002e-06,
44
+ "loss": 2.2951,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 2.403225806451613,
49
+ "grad_norm": 13.529952049255371,
50
+ "learning_rate": 3.54e-06,
51
+ "loss": 2.3061,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 2.806451612903226,
56
+ "grad_norm": 12.780255317687988,
57
+ "learning_rate": 4.14e-06,
58
+ "loss": 1.828,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 3.2016129032258065,
63
+ "grad_norm": 4.392550468444824,
64
+ "learning_rate": 4.74e-06,
65
+ "loss": 1.7883,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 3.6048387096774195,
70
+ "grad_norm": 6.868764400482178,
71
+ "learning_rate": 5.34e-06,
72
+ "loss": 1.3883,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "grad_norm": 11.529573440551758,
78
+ "learning_rate": 5.940000000000001e-06,
79
+ "loss": 1.2804,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 4.403225806451613,
84
+ "grad_norm": 3.888104200363159,
85
+ "learning_rate": 6.54e-06,
86
+ "loss": 1.1184,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 4.806451612903226,
91
+ "grad_norm": 5.543831825256348,
92
+ "learning_rate": 7.14e-06,
93
+ "loss": 0.8838,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 5.201612903225806,
98
+ "grad_norm": 3.6435844898223877,
99
+ "learning_rate": 7.74e-06,
100
+ "loss": 0.7359,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 5.604838709677419,
105
+ "grad_norm": 3.873931407928467,
106
+ "learning_rate": 8.340000000000001e-06,
107
+ "loss": 0.6677,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 6.0,
112
+ "grad_norm": 6.526176929473877,
113
+ "learning_rate": 8.939999999999999e-06,
114
+ "loss": 0.5343,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 6.403225806451613,
119
+ "grad_norm": 2.3237783908843994,
120
+ "learning_rate": 9.54e-06,
121
+ "loss": 0.4596,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 6.806451612903226,
126
+ "grad_norm": 3.2708520889282227,
127
+ "learning_rate": 1.0140000000000001e-05,
128
+ "loss": 0.371,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 7.201612903225806,
133
+ "grad_norm": 1.420652985572815,
134
+ "learning_rate": 1.074e-05,
135
+ "loss": 0.397,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 7.604838709677419,
140
+ "grad_norm": 2.8168821334838867,
141
+ "learning_rate": 1.134e-05,
142
+ "loss": 0.2335,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 8.0,
147
+ "grad_norm": 0.872466504573822,
148
+ "learning_rate": 1.1940000000000001e-05,
149
+ "loss": 0.2535,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 8.403225806451612,
154
+ "grad_norm": 1.6869771480560303,
155
+ "learning_rate": 1.254e-05,
156
+ "loss": 0.1642,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 8.806451612903226,
161
+ "grad_norm": 8.301424026489258,
162
+ "learning_rate": 1.314e-05,
163
+ "loss": 0.112,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 9.201612903225806,
168
+ "grad_norm": 0.7436397671699524,
169
+ "learning_rate": 1.374e-05,
170
+ "loss": 0.1257,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 9.60483870967742,
175
+ "grad_norm": 1.891860842704773,
176
+ "learning_rate": 1.434e-05,
177
+ "loss": 0.0623,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 10.0,
182
+ "grad_norm": 0.5202131867408752,
183
+ "learning_rate": 1.4940000000000001e-05,
184
+ "loss": 0.0572,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 10.403225806451612,
189
+ "grad_norm": 0.0677497610449791,
190
+ "learning_rate": 1.554e-05,
191
+ "loss": 0.0212,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 10.806451612903226,
196
+ "grad_norm": 0.07801195234060287,
197
+ "learning_rate": 1.614e-05,
198
+ "loss": 0.0243,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 11.201612903225806,
203
+ "grad_norm": 0.11515898257493973,
204
+ "learning_rate": 1.6740000000000002e-05,
205
+ "loss": 0.0321,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 11.60483870967742,
210
+ "grad_norm": 0.11826858669519424,
211
+ "learning_rate": 1.734e-05,
212
+ "loss": 0.0254,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "grad_norm": 0.13558819890022278,
218
+ "learning_rate": 1.794e-05,
219
+ "loss": 0.014,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 12.403225806451612,
224
+ "grad_norm": 0.03777763620018959,
225
+ "learning_rate": 1.854e-05,
226
+ "loss": 0.0301,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 12.806451612903226,
231
+ "grad_norm": 0.0405518114566803,
232
+ "learning_rate": 1.914e-05,
233
+ "loss": 0.0027,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 13.201612903225806,
238
+ "grad_norm": 0.03762541711330414,
239
+ "learning_rate": 1.974e-05,
240
+ "loss": 0.0026,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 13.60483870967742,
245
+ "grad_norm": 0.06563286483287811,
246
+ "learning_rate": 2.0340000000000002e-05,
247
+ "loss": 0.0279,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 14.0,
252
+ "grad_norm": 0.12380703538656235,
253
+ "learning_rate": 2.094e-05,
254
+ "loss": 0.0063,
255
+ "step": 350
256
+ }
257
+ ],
258
+ "logging_steps": 10,
259
+ "max_steps": 375,
260
+ "num_input_tokens_seen": 0,
261
+ "num_train_epochs": 15,
262
+ "save_steps": 50,
263
+ "stateful_callbacks": {
264
+ "TrainerControl": {
265
+ "args": {
266
+ "should_epoch_stop": false,
267
+ "should_evaluate": false,
268
+ "should_log": false,
269
+ "should_save": true,
270
+ "should_training_stop": false
271
+ },
272
+ "attributes": {}
273
+ }
274
+ },
275
+ "total_flos": 1.651986705536446e+17,
276
+ "train_batch_size": 4,
277
+ "trial_name": null,
278
+ "trial_params": null
279
+ }
checkpoint-350/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65214c45db9799ff8e38f62a8a12f53de739d750564cf70464decc61c84f64ab
3
+ size 5777
checkpoint-350/vocab.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 28,
3
+ "[UNK]": 27,
4
+ "a": 1,
5
+ "b": 2,
6
+ "c": 3,
7
+ "d": 4,
8
+ "e": 5,
9
+ "f": 6,
10
+ "g": 7,
11
+ "h": 8,
12
+ "i": 9,
13
+ "j": 10,
14
+ "k": 11,
15
+ "l": 12,
16
+ "m": 13,
17
+ "n": 14,
18
+ "o": 15,
19
+ "p": 16,
20
+ "q": 17,
21
+ "r": 18,
22
+ "s": 19,
23
+ "t": 20,
24
+ "u": 21,
25
+ "v": 22,
26
+ "w": 23,
27
+ "x": 24,
28
+ "y": 25,
29
+ "z": 26,
30
+ "|": 0
31
+ }
checkpoint-375/added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "</s>": 30,
3
+ "<s>": 29
4
+ }
checkpoint-375/config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.0,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1024,
80
+ "pad_token_id": 28,
81
+ "proj_codevector_dim": 768,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.55.4",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 31,
107
+ "xvector_output_dim": 512
108
+ }
checkpoint-375/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a78453f5c1780262440dd4fd6465ec500764cf4eccb2e7bad9a7e7f5ffcc2e5
3
+ size 1261934580
checkpoint-375/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d46c229febe3e84451b6b8ab287e9ba97b56be3c714e4eddacaf6087128d1b55
3
+ size 806521803
checkpoint-375/preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "return_tensors": "np",
10
+ "sampling_rate": 16000
11
+ }
checkpoint-375/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214119e423d32a6bd19a61d9c6c73a5bb02729dfeacd793faf03518656d409c4
3
+ size 14645
checkpoint-375/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffa100fb89452a105fbe870f5976239d27ca580780da5887426b11aec30bf36e
3
+ size 1383
checkpoint-375/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f0d44bba9ff495f2f261cd3e092af3e4a799899ba7c44ee5b3bc6534485b82ea
3
+ size 1465
checkpoint-375/special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<s>",
19
+ "eos_token": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": "[PAD]",
27
+ "unk_token": "[UNK]"
28
+ }
checkpoint-375/tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "27": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "28": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "29": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "30": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "additional_special_tokens": [
37
+ "<s>",
38
+ "</s>"
39
+ ],
40
+ "bos_token": "<s>",
41
+ "clean_up_tokenization_spaces": false,
42
+ "do_lower_case": false,
43
+ "eos_token": "</s>",
44
+ "extra_special_tokens": {},
45
+ "model_max_length": 1000000000000000019884624838656,
46
+ "pad_token": "[PAD]",
47
+ "processor_class": "Wav2Vec2Processor",
48
+ "replace_word_delimiter_char": " ",
49
+ "target_lang": null,
50
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
51
+ "unk_token": "[UNK]",
52
+ "word_delimiter_token": "|"
53
+ }
checkpoint-375/trainer_state.json ADDED
@@ -0,0 +1,293 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": null,
3
+ "best_metric": null,
4
+ "best_model_checkpoint": null,
5
+ "epoch": 15.0,
6
+ "eval_steps": 500,
7
+ "global_step": 375,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 0.4032258064516129,
14
+ "grad_norm": 15.945989608764648,
15
+ "learning_rate": 5.4e-07,
16
+ "loss": 2.942,
17
+ "step": 10
18
+ },
19
+ {
20
+ "epoch": 0.8064516129032258,
21
+ "grad_norm": 15.050374984741211,
22
+ "learning_rate": 1.14e-06,
23
+ "loss": 3.345,
24
+ "step": 20
25
+ },
26
+ {
27
+ "epoch": 1.2016129032258065,
28
+ "grad_norm": 9.487156867980957,
29
+ "learning_rate": 1.74e-06,
30
+ "loss": 2.6443,
31
+ "step": 30
32
+ },
33
+ {
34
+ "epoch": 1.6048387096774195,
35
+ "grad_norm": 14.825271606445312,
36
+ "learning_rate": 2.34e-06,
37
+ "loss": 3.0528,
38
+ "step": 40
39
+ },
40
+ {
41
+ "epoch": 2.0,
42
+ "grad_norm": 15.298517227172852,
43
+ "learning_rate": 2.9400000000000002e-06,
44
+ "loss": 2.2951,
45
+ "step": 50
46
+ },
47
+ {
48
+ "epoch": 2.403225806451613,
49
+ "grad_norm": 13.529952049255371,
50
+ "learning_rate": 3.54e-06,
51
+ "loss": 2.3061,
52
+ "step": 60
53
+ },
54
+ {
55
+ "epoch": 2.806451612903226,
56
+ "grad_norm": 12.780255317687988,
57
+ "learning_rate": 4.14e-06,
58
+ "loss": 1.828,
59
+ "step": 70
60
+ },
61
+ {
62
+ "epoch": 3.2016129032258065,
63
+ "grad_norm": 4.392550468444824,
64
+ "learning_rate": 4.74e-06,
65
+ "loss": 1.7883,
66
+ "step": 80
67
+ },
68
+ {
69
+ "epoch": 3.6048387096774195,
70
+ "grad_norm": 6.868764400482178,
71
+ "learning_rate": 5.34e-06,
72
+ "loss": 1.3883,
73
+ "step": 90
74
+ },
75
+ {
76
+ "epoch": 4.0,
77
+ "grad_norm": 11.529573440551758,
78
+ "learning_rate": 5.940000000000001e-06,
79
+ "loss": 1.2804,
80
+ "step": 100
81
+ },
82
+ {
83
+ "epoch": 4.403225806451613,
84
+ "grad_norm": 3.888104200363159,
85
+ "learning_rate": 6.54e-06,
86
+ "loss": 1.1184,
87
+ "step": 110
88
+ },
89
+ {
90
+ "epoch": 4.806451612903226,
91
+ "grad_norm": 5.543831825256348,
92
+ "learning_rate": 7.14e-06,
93
+ "loss": 0.8838,
94
+ "step": 120
95
+ },
96
+ {
97
+ "epoch": 5.201612903225806,
98
+ "grad_norm": 3.6435844898223877,
99
+ "learning_rate": 7.74e-06,
100
+ "loss": 0.7359,
101
+ "step": 130
102
+ },
103
+ {
104
+ "epoch": 5.604838709677419,
105
+ "grad_norm": 3.873931407928467,
106
+ "learning_rate": 8.340000000000001e-06,
107
+ "loss": 0.6677,
108
+ "step": 140
109
+ },
110
+ {
111
+ "epoch": 6.0,
112
+ "grad_norm": 6.526176929473877,
113
+ "learning_rate": 8.939999999999999e-06,
114
+ "loss": 0.5343,
115
+ "step": 150
116
+ },
117
+ {
118
+ "epoch": 6.403225806451613,
119
+ "grad_norm": 2.3237783908843994,
120
+ "learning_rate": 9.54e-06,
121
+ "loss": 0.4596,
122
+ "step": 160
123
+ },
124
+ {
125
+ "epoch": 6.806451612903226,
126
+ "grad_norm": 3.2708520889282227,
127
+ "learning_rate": 1.0140000000000001e-05,
128
+ "loss": 0.371,
129
+ "step": 170
130
+ },
131
+ {
132
+ "epoch": 7.201612903225806,
133
+ "grad_norm": 1.420652985572815,
134
+ "learning_rate": 1.074e-05,
135
+ "loss": 0.397,
136
+ "step": 180
137
+ },
138
+ {
139
+ "epoch": 7.604838709677419,
140
+ "grad_norm": 2.8168821334838867,
141
+ "learning_rate": 1.134e-05,
142
+ "loss": 0.2335,
143
+ "step": 190
144
+ },
145
+ {
146
+ "epoch": 8.0,
147
+ "grad_norm": 0.872466504573822,
148
+ "learning_rate": 1.1940000000000001e-05,
149
+ "loss": 0.2535,
150
+ "step": 200
151
+ },
152
+ {
153
+ "epoch": 8.403225806451612,
154
+ "grad_norm": 1.6869771480560303,
155
+ "learning_rate": 1.254e-05,
156
+ "loss": 0.1642,
157
+ "step": 210
158
+ },
159
+ {
160
+ "epoch": 8.806451612903226,
161
+ "grad_norm": 8.301424026489258,
162
+ "learning_rate": 1.314e-05,
163
+ "loss": 0.112,
164
+ "step": 220
165
+ },
166
+ {
167
+ "epoch": 9.201612903225806,
168
+ "grad_norm": 0.7436397671699524,
169
+ "learning_rate": 1.374e-05,
170
+ "loss": 0.1257,
171
+ "step": 230
172
+ },
173
+ {
174
+ "epoch": 9.60483870967742,
175
+ "grad_norm": 1.891860842704773,
176
+ "learning_rate": 1.434e-05,
177
+ "loss": 0.0623,
178
+ "step": 240
179
+ },
180
+ {
181
+ "epoch": 10.0,
182
+ "grad_norm": 0.5202131867408752,
183
+ "learning_rate": 1.4940000000000001e-05,
184
+ "loss": 0.0572,
185
+ "step": 250
186
+ },
187
+ {
188
+ "epoch": 10.403225806451612,
189
+ "grad_norm": 0.0677497610449791,
190
+ "learning_rate": 1.554e-05,
191
+ "loss": 0.0212,
192
+ "step": 260
193
+ },
194
+ {
195
+ "epoch": 10.806451612903226,
196
+ "grad_norm": 0.07801195234060287,
197
+ "learning_rate": 1.614e-05,
198
+ "loss": 0.0243,
199
+ "step": 270
200
+ },
201
+ {
202
+ "epoch": 11.201612903225806,
203
+ "grad_norm": 0.11515898257493973,
204
+ "learning_rate": 1.6740000000000002e-05,
205
+ "loss": 0.0321,
206
+ "step": 280
207
+ },
208
+ {
209
+ "epoch": 11.60483870967742,
210
+ "grad_norm": 0.11826858669519424,
211
+ "learning_rate": 1.734e-05,
212
+ "loss": 0.0254,
213
+ "step": 290
214
+ },
215
+ {
216
+ "epoch": 12.0,
217
+ "grad_norm": 0.13558819890022278,
218
+ "learning_rate": 1.794e-05,
219
+ "loss": 0.014,
220
+ "step": 300
221
+ },
222
+ {
223
+ "epoch": 12.403225806451612,
224
+ "grad_norm": 0.03777763620018959,
225
+ "learning_rate": 1.854e-05,
226
+ "loss": 0.0301,
227
+ "step": 310
228
+ },
229
+ {
230
+ "epoch": 12.806451612903226,
231
+ "grad_norm": 0.0405518114566803,
232
+ "learning_rate": 1.914e-05,
233
+ "loss": 0.0027,
234
+ "step": 320
235
+ },
236
+ {
237
+ "epoch": 13.201612903225806,
238
+ "grad_norm": 0.03762541711330414,
239
+ "learning_rate": 1.974e-05,
240
+ "loss": 0.0026,
241
+ "step": 330
242
+ },
243
+ {
244
+ "epoch": 13.60483870967742,
245
+ "grad_norm": 0.06563286483287811,
246
+ "learning_rate": 2.0340000000000002e-05,
247
+ "loss": 0.0279,
248
+ "step": 340
249
+ },
250
+ {
251
+ "epoch": 14.0,
252
+ "grad_norm": 0.12380703538656235,
253
+ "learning_rate": 2.094e-05,
254
+ "loss": 0.0063,
255
+ "step": 350
256
+ },
257
+ {
258
+ "epoch": 14.403225806451612,
259
+ "grad_norm": 0.015481448732316494,
260
+ "learning_rate": 2.154e-05,
261
+ "loss": 0.0021,
262
+ "step": 360
263
+ },
264
+ {
265
+ "epoch": 14.806451612903226,
266
+ "grad_norm": 0.036212582141160965,
267
+ "learning_rate": 2.214e-05,
268
+ "loss": 0.0019,
269
+ "step": 370
270
+ }
271
+ ],
272
+ "logging_steps": 10,
273
+ "max_steps": 375,
274
+ "num_input_tokens_seen": 0,
275
+ "num_train_epochs": 15,
276
+ "save_steps": 50,
277
+ "stateful_callbacks": {
278
+ "TrainerControl": {
279
+ "args": {
280
+ "should_epoch_stop": false,
281
+ "should_evaluate": false,
282
+ "should_log": false,
283
+ "should_save": true,
284
+ "should_training_stop": true
285
+ },
286
+ "attributes": {}
287
+ }
288
+ },
289
+ "total_flos": 1.770072662194541e+17,
290
+ "train_batch_size": 4,
291
+ "trial_name": null,
292
+ "trial_params": null
293
+ }
checkpoint-375/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65214c45db9799ff8e38f62a8a12f53de739d750564cf70464decc61c84f64ab
3
+ size 5777
checkpoint-375/vocab.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 28,
3
+ "[UNK]": 27,
4
+ "a": 1,
5
+ "b": 2,
6
+ "c": 3,
7
+ "d": 4,
8
+ "e": 5,
9
+ "f": 6,
10
+ "g": 7,
11
+ "h": 8,
12
+ "i": 9,
13
+ "j": 10,
14
+ "k": 11,
15
+ "l": 12,
16
+ "m": 13,
17
+ "n": 14,
18
+ "o": 15,
19
+ "p": 16,
20
+ "q": 17,
21
+ "r": 18,
22
+ "s": 19,
23
+ "t": 20,
24
+ "u": 21,
25
+ "v": 22,
26
+ "w": 23,
27
+ "x": 24,
28
+ "y": 25,
29
+ "z": 26,
30
+ "|": 0
31
+ }
config.json ADDED
@@ -0,0 +1,108 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "activation_dropout": 0.0,
3
+ "adapter_attn_dim": null,
4
+ "adapter_kernel_size": 3,
5
+ "adapter_stride": 2,
6
+ "add_adapter": false,
7
+ "apply_spec_augment": true,
8
+ "architectures": [
9
+ "Wav2Vec2ForCTC"
10
+ ],
11
+ "attention_dropout": 0.0,
12
+ "bos_token_id": 1,
13
+ "classifier_proj_size": 256,
14
+ "codevector_dim": 768,
15
+ "contrastive_logits_temperature": 0.1,
16
+ "conv_bias": true,
17
+ "conv_dim": [
18
+ 512,
19
+ 512,
20
+ 512,
21
+ 512,
22
+ 512,
23
+ 512,
24
+ 512
25
+ ],
26
+ "conv_kernel": [
27
+ 10,
28
+ 3,
29
+ 3,
30
+ 3,
31
+ 3,
32
+ 2,
33
+ 2
34
+ ],
35
+ "conv_stride": [
36
+ 5,
37
+ 2,
38
+ 2,
39
+ 2,
40
+ 2,
41
+ 2,
42
+ 2
43
+ ],
44
+ "ctc_loss_reduction": "mean",
45
+ "ctc_zero_infinity": false,
46
+ "diversity_loss_weight": 0.1,
47
+ "do_stable_layer_norm": true,
48
+ "eos_token_id": 2,
49
+ "feat_extract_activation": "gelu",
50
+ "feat_extract_dropout": 0.0,
51
+ "feat_extract_norm": "layer",
52
+ "feat_proj_dropout": 0.0,
53
+ "feat_quantizer_dropout": 0.0,
54
+ "final_dropout": 0.0,
55
+ "gradient_checkpointing": false,
56
+ "hidden_act": "gelu",
57
+ "hidden_dropout": 0.0,
58
+ "hidden_size": 1024,
59
+ "initializer_range": 0.02,
60
+ "intermediate_size": 4096,
61
+ "layer_norm_eps": 1e-05,
62
+ "layerdrop": 0.0,
63
+ "mask_feature_length": 10,
64
+ "mask_feature_min_masks": 0,
65
+ "mask_feature_prob": 0.0,
66
+ "mask_time_length": 10,
67
+ "mask_time_min_masks": 2,
68
+ "mask_time_prob": 0.0,
69
+ "model_type": "wav2vec2",
70
+ "num_adapter_layers": 3,
71
+ "num_attention_heads": 16,
72
+ "num_codevector_groups": 2,
73
+ "num_codevectors_per_group": 320,
74
+ "num_conv_pos_embedding_groups": 16,
75
+ "num_conv_pos_embeddings": 128,
76
+ "num_feat_extract_layers": 7,
77
+ "num_hidden_layers": 24,
78
+ "num_negatives": 100,
79
+ "output_hidden_size": 1024,
80
+ "pad_token_id": 28,
81
+ "proj_codevector_dim": 768,
82
+ "tdnn_dilation": [
83
+ 1,
84
+ 2,
85
+ 3,
86
+ 1,
87
+ 1
88
+ ],
89
+ "tdnn_dim": [
90
+ 512,
91
+ 512,
92
+ 512,
93
+ 512,
94
+ 1500
95
+ ],
96
+ "tdnn_kernel": [
97
+ 5,
98
+ 3,
99
+ 3,
100
+ 1,
101
+ 1
102
+ ],
103
+ "torch_dtype": "float32",
104
+ "transformers_version": "4.55.4",
105
+ "use_weighted_layer_sum": false,
106
+ "vocab_size": 31,
107
+ "xvector_output_dim": 512
108
+ }
evaluation_results.csv ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Reference,Prediction,WER,CER
2
+ wi,,1.0,1.0
3
+ wi,,1.0,1.0
4
+ wi,,1.0,1.0
5
+ wi,,1.0,1.0
6
+ wi,,1.0,1.0
7
+ wi,<unk>,1.0,2.5
8
+ wi,,1.0,1.0
9
+ wi,,1.0,1.0
10
+ wi,,1.0,1.0
11
+ wo,,1.0,1.0
12
+ wo,,1.0,1.0
13
+ wo,,1.0,1.0
14
+ wo,,1.0,1.0
15
+ wo,,1.0,1.0
16
+ wo,,1.0,1.0
17
+ wo,,1.0,1.0
18
+ wo,,1.0,1.0
19
+ wo,,1.0,1.0
20
+ wo,,1.0,1.0
21
+ wo,,1.0,1.0
22
+ wu,,1.0,1.0
23
+ wu,,1.0,1.0
24
+ wu,,1.0,1.0
25
+ wu,,1.0,1.0
26
+ wu,,1.0,1.0
27
+ wu,,1.0,1.0
28
+ wu,,1.0,1.0
29
+ wu,,1.0,1.0
30
+ wu,,1.0,1.0
31
+ ya,,1.0,1.0
32
+ ya,,1.0,1.0
33
+ ya,,1.0,1.0
34
+ ya,,1.0,1.0
35
+ ya,,1.0,1.0
36
+ ya,,1.0,1.0
37
+ ya,,1.0,1.0
38
+ ye,,1.0,1.0
39
+ ye,,1.0,1.0
40
+ ye,,1.0,1.0
41
+ ye,,1.0,1.0
42
+ ye,,1.0,1.0
43
+ yi,,1.0,1.0
44
+ yi,,1.0,1.0
45
+ yi,,1.0,1.0
46
+ yi,,1.0,1.0
47
+ yi,,1.0,1.0
48
+ yo,,1.0,1.0
49
+ yo,,1.0,1.0
50
+ yo,,1.0,1.0
51
+ yo,</s>,1.0,2.0
52
+ yo,,1.0,1.0
53
+ yu,,1.0,1.0
54
+ yu,,1.0,1.0
55
+ yu,,1.0,1.0
56
+ yu,,1.0,1.0
57
+ yu,,1.0,1.0
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a78453f5c1780262440dd4fd6465ec500764cf4eccb2e7bad9a7e7f5ffcc2e5
3
+ size 1261934580
preprocessor_config.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_normalize": true,
3
+ "feature_extractor_type": "Wav2Vec2FeatureExtractor",
4
+ "feature_size": 1,
5
+ "padding_side": "right",
6
+ "padding_value": 0.0,
7
+ "processor_class": "Wav2Vec2Processor",
8
+ "return_attention_mask": true,
9
+ "return_tensors": "np",
10
+ "sampling_rate": 16000
11
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<s>",
5
+ "lstrip": false,
6
+ "normalized": true,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "</s>",
12
+ "lstrip": false,
13
+ "normalized": true,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": "<s>",
19
+ "eos_token": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false
25
+ },
26
+ "pad_token": "[PAD]",
27
+ "unk_token": "[UNK]"
28
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "27": {
4
+ "content": "[UNK]",
5
+ "lstrip": true,
6
+ "normalized": false,
7
+ "rstrip": true,
8
+ "single_word": false,
9
+ "special": false
10
+ },
11
+ "28": {
12
+ "content": "[PAD]",
13
+ "lstrip": true,
14
+ "normalized": false,
15
+ "rstrip": true,
16
+ "single_word": false,
17
+ "special": false
18
+ },
19
+ "29": {
20
+ "content": "<s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "30": {
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ }
35
+ },
36
+ "additional_special_tokens": [
37
+ "<s>",
38
+ "</s>"
39
+ ],
40
+ "bos_token": "<s>",
41
+ "clean_up_tokenization_spaces": false,
42
+ "do_lower_case": false,
43
+ "eos_token": "</s>",
44
+ "extra_special_tokens": {},
45
+ "model_max_length": 1000000000000000019884624838656,
46
+ "pad_token": "[PAD]",
47
+ "processor_class": "Wav2Vec2Processor",
48
+ "replace_word_delimiter_char": " ",
49
+ "target_lang": null,
50
+ "tokenizer_class": "Wav2Vec2CTCTokenizer",
51
+ "unk_token": "[UNK]",
52
+ "word_delimiter_token": "|"
53
+ }
training_log.tsv ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ timestamp step epoch train_loss grad_norm learning_rate
2
+ 2025-09-21T21:41:21.703893 10 0.32 903.355000 614.498596 1.00e-07
3
+ 2025-09-21T21:41:27.856246 20 0.65 902.448600 617.974670 5.00e-07
4
+ 2025-09-21T21:41:33.781063 30 0.97 886.335200 578.719543 1.00e-06
5
+ 2025-09-21T21:41:39.650969 40 1.29 941.081400 763.962463 1.50e-06
6
+ 2025-09-21T21:41:45.448703 50 1.61 878.512000 745.441284 2.00e-06
7
+ 2025-09-21T21:41:46.303569 50 1.61 NA NA NA
8
+ 2025-09-21T21:41:53.272236 60 1.94 870.239400 805.721558 2.50e-06
9
+ 2025-09-21T21:41:58.923912 70 2.26 883.066600 810.874512 3.00e-06
10
+ 2025-09-21T21:42:04.647459 80 2.58 858.037100 876.961365 3.50e-06
11
+ 2025-09-21T21:42:10.439181 90 2.90 879.551700 879.198303 4.00e-06
12
+ 2025-09-21T21:42:16.088489 100 3.23 838.324500 1011.899841 4.50e-06
13
+ 2025-09-21T21:42:16.862886 100 3.23 NA NA NA
14
+ 2025-09-21T21:42:24.087341 110 3.55 789.837500 1295.429565 5.00e-06
15
+ 2025-09-21T21:42:29.817688 120 3.87 745.176600 1823.199463 5.50e-06
16
+ 2025-09-21T21:42:35.473868 130 4.19 701.951900 1844.962524 6.00e-06
17
+ 2025-09-21T21:42:41.363224 140 4.52 661.328600 1836.961670 6.50e-06
18
+ 2025-09-21T21:42:47.037840 150 4.84 558.101000 1857.315308 7.00e-06
19
+ 2025-09-21T21:42:47.769415 150 4.84 NA NA NA
20
+ 2025-09-21T21:42:54.652980 160 5.16 499.938000 2131.156982 7.50e-06
21
+ 2025-09-21T21:43:00.440291 170 5.48 450.506500 1810.863647 8.00e-06
22
+ 2025-09-21T21:43:06.233803 180 5.81 390.488400 1685.968994 8.50e-06
23
+ 2025-09-21T21:43:12.164436 190 6.13 324.997900 1826.587402 9.00e-06
24
+ 2025-09-21T21:43:17.924886 200 6.45 280.306800 1618.513672 9.50e-06
25
+ 2025-09-21T21:43:18.676823 200 6.45 NA NA NA
26
+ 2025-09-21T21:43:20.259029 200 6.45 NA NA NA
vocab.json ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "[PAD]": 28,
3
+ "[UNK]": 27,
4
+ "a": 1,
5
+ "b": 2,
6
+ "c": 3,
7
+ "d": 4,
8
+ "e": 5,
9
+ "f": 6,
10
+ "g": 7,
11
+ "h": 8,
12
+ "i": 9,
13
+ "j": 10,
14
+ "k": 11,
15
+ "l": 12,
16
+ "m": 13,
17
+ "n": 14,
18
+ "o": 15,
19
+ "p": 16,
20
+ "q": 17,
21
+ "r": 18,
22
+ "s": 19,
23
+ "t": 20,
24
+ "u": 21,
25
+ "v": 22,
26
+ "w": 23,
27
+ "x": 24,
28
+ "y": 25,
29
+ "z": 26,
30
+ "|": 0
31
+ }