Artyomorax commited on Sep 27, 2025

Commit

288c286

1 Parent(s): d20b3ce

Add wav2vec fine-tuned model files

Browse files

Files changed (35) hide show

added_tokens.json +4 -0
checkpoint-350/added_tokens.json +4 -0
checkpoint-350/config.json +108 -0
checkpoint-350/model.safetensors +3 -0
checkpoint-350/optimizer.pt +3 -0
checkpoint-350/preprocessor_config.json +11 -0
checkpoint-350/rng_state.pth +3 -0
checkpoint-350/scaler.pt +3 -0
checkpoint-350/scheduler.pt +3 -0
checkpoint-350/special_tokens_map.json +28 -0
checkpoint-350/tokenizer_config.json +53 -0
checkpoint-350/trainer_state.json +279 -0
checkpoint-350/training_args.bin +3 -0
checkpoint-350/vocab.json +31 -0
checkpoint-375/added_tokens.json +4 -0
checkpoint-375/config.json +108 -0
checkpoint-375/model.safetensors +3 -0
checkpoint-375/optimizer.pt +3 -0
checkpoint-375/preprocessor_config.json +11 -0
checkpoint-375/rng_state.pth +3 -0
checkpoint-375/scaler.pt +3 -0
checkpoint-375/scheduler.pt +3 -0
checkpoint-375/special_tokens_map.json +28 -0
checkpoint-375/tokenizer_config.json +53 -0
checkpoint-375/trainer_state.json +293 -0
checkpoint-375/training_args.bin +3 -0
checkpoint-375/vocab.json +31 -0
config.json +108 -0
evaluation_results.csv +57 -0
model.safetensors +3 -0
preprocessor_config.json +11 -0
special_tokens_map.json +28 -0
tokenizer_config.json +53 -0
training_log.tsv +26 -0
vocab.json +31 -0

added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 30,
+  "<s>": 29
+}

checkpoint-350/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 30,
+  "<s>": 29
+}

checkpoint-350/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "activation_dropout": 0.0,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.0,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 28,
+  "proj_codevector_dim": 768,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 31,
+  "xvector_output_dim": 512
+}

checkpoint-350/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ed5f427487a4f75146b4e9dc480494f8cfaa16c8507284e695d3b23b353eb34c
+size 1261934580

checkpoint-350/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a8f716e20a540ad57a89a97cf60f2059a70665e1d6504d27ca6d95bdde5765f5
+size 806521803

checkpoint-350/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "return_tensors": "np",
+  "sampling_rate": 16000
+}

checkpoint-350/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:123848c4e5af199b0a3023db0dc50b82d8d7f7a2d2e95b352af9861d5d70da79
+size 14645

checkpoint-350/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:eca071cf113770e6722dad4a3d7b2eab7ca077ae7e6cfa66af1ff8bc547d5284
+size 1383

checkpoint-350/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:e07e2030005258fea5b5de0d579feff99fe4606f17cd41417eb090d35fca8f99
+size 1465

checkpoint-350/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": "<s>",
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

checkpoint-350/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "added_tokens_decoder": {
+    "27": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": null,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

checkpoint-350/trainer_state.json ADDED Viewed

	@@ -0,0 +1,279 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 14.0,
+  "eval_steps": 500,
+  "global_step": 350,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4032258064516129,
+      "grad_norm": 15.945989608764648,
+      "learning_rate": 5.4e-07,
+      "loss": 2.942,
+      "step": 10
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 15.050374984741211,
+      "learning_rate": 1.14e-06,
+      "loss": 3.345,
+      "step": 20
+    },
+    {
+      "epoch": 1.2016129032258065,
+      "grad_norm": 9.487156867980957,
+      "learning_rate": 1.74e-06,
+      "loss": 2.6443,
+      "step": 30
+    },
+    {
+      "epoch": 1.6048387096774195,
+      "grad_norm": 14.825271606445312,
+      "learning_rate": 2.34e-06,
+      "loss": 3.0528,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 15.298517227172852,
+      "learning_rate": 2.9400000000000002e-06,
+      "loss": 2.2951,
+      "step": 50
+    },
+    {
+      "epoch": 2.403225806451613,
+      "grad_norm": 13.529952049255371,
+      "learning_rate": 3.54e-06,
+      "loss": 2.3061,
+      "step": 60
+    },
+    {
+      "epoch": 2.806451612903226,
+      "grad_norm": 12.780255317687988,
+      "learning_rate": 4.14e-06,
+      "loss": 1.828,
+      "step": 70
+    },
+    {
+      "epoch": 3.2016129032258065,
+      "grad_norm": 4.392550468444824,
+      "learning_rate": 4.74e-06,
+      "loss": 1.7883,
+      "step": 80
+    },
+    {
+      "epoch": 3.6048387096774195,
+      "grad_norm": 6.868764400482178,
+      "learning_rate": 5.34e-06,
+      "loss": 1.3883,
+      "step": 90
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 11.529573440551758,
+      "learning_rate": 5.940000000000001e-06,
+      "loss": 1.2804,
+      "step": 100
+    },
+    {
+      "epoch": 4.403225806451613,
+      "grad_norm": 3.888104200363159,
+      "learning_rate": 6.54e-06,
+      "loss": 1.1184,
+      "step": 110
+    },
+    {
+      "epoch": 4.806451612903226,
+      "grad_norm": 5.543831825256348,
+      "learning_rate": 7.14e-06,
+      "loss": 0.8838,
+      "step": 120
+    },
+    {
+      "epoch": 5.201612903225806,
+      "grad_norm": 3.6435844898223877,
+      "learning_rate": 7.74e-06,
+      "loss": 0.7359,
+      "step": 130
+    },
+    {
+      "epoch": 5.604838709677419,
+      "grad_norm": 3.873931407928467,
+      "learning_rate": 8.340000000000001e-06,
+      "loss": 0.6677,
+      "step": 140
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 6.526176929473877,
+      "learning_rate": 8.939999999999999e-06,
+      "loss": 0.5343,
+      "step": 150
+    },
+    {
+      "epoch": 6.403225806451613,
+      "grad_norm": 2.3237783908843994,
+      "learning_rate": 9.54e-06,
+      "loss": 0.4596,
+      "step": 160
+    },
+    {
+      "epoch": 6.806451612903226,
+      "grad_norm": 3.2708520889282227,
+      "learning_rate": 1.0140000000000001e-05,
+      "loss": 0.371,
+      "step": 170
+    },
+    {
+      "epoch": 7.201612903225806,
+      "grad_norm": 1.420652985572815,
+      "learning_rate": 1.074e-05,
+      "loss": 0.397,
+      "step": 180
+    },
+    {
+      "epoch": 7.604838709677419,
+      "grad_norm": 2.8168821334838867,
+      "learning_rate": 1.134e-05,
+      "loss": 0.2335,
+      "step": 190
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.872466504573822,
+      "learning_rate": 1.1940000000000001e-05,
+      "loss": 0.2535,
+      "step": 200
+    },
+    {
+      "epoch": 8.403225806451612,
+      "grad_norm": 1.6869771480560303,
+      "learning_rate": 1.254e-05,
+      "loss": 0.1642,
+      "step": 210
+    },
+    {
+      "epoch": 8.806451612903226,
+      "grad_norm": 8.301424026489258,
+      "learning_rate": 1.314e-05,
+      "loss": 0.112,
+      "step": 220
+    },
+    {
+      "epoch": 9.201612903225806,
+      "grad_norm": 0.7436397671699524,
+      "learning_rate": 1.374e-05,
+      "loss": 0.1257,
+      "step": 230
+    },
+    {
+      "epoch": 9.60483870967742,
+      "grad_norm": 1.891860842704773,
+      "learning_rate": 1.434e-05,
+      "loss": 0.0623,
+      "step": 240
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.5202131867408752,
+      "learning_rate": 1.4940000000000001e-05,
+      "loss": 0.0572,
+      "step": 250
+    },
+    {
+      "epoch": 10.403225806451612,
+      "grad_norm": 0.0677497610449791,
+      "learning_rate": 1.554e-05,
+      "loss": 0.0212,
+      "step": 260
+    },
+    {
+      "epoch": 10.806451612903226,
+      "grad_norm": 0.07801195234060287,
+      "learning_rate": 1.614e-05,
+      "loss": 0.0243,
+      "step": 270
+    },
+    {
+      "epoch": 11.201612903225806,
+      "grad_norm": 0.11515898257493973,
+      "learning_rate": 1.6740000000000002e-05,
+      "loss": 0.0321,
+      "step": 280
+    },
+    {
+      "epoch": 11.60483870967742,
+      "grad_norm": 0.11826858669519424,
+      "learning_rate": 1.734e-05,
+      "loss": 0.0254,
+      "step": 290
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.13558819890022278,
+      "learning_rate": 1.794e-05,
+      "loss": 0.014,
+      "step": 300
+    },
+    {
+      "epoch": 12.403225806451612,
+      "grad_norm": 0.03777763620018959,
+      "learning_rate": 1.854e-05,
+      "loss": 0.0301,
+      "step": 310
+    },
+    {
+      "epoch": 12.806451612903226,
+      "grad_norm": 0.0405518114566803,
+      "learning_rate": 1.914e-05,
+      "loss": 0.0027,
+      "step": 320
+    },
+    {
+      "epoch": 13.201612903225806,
+      "grad_norm": 0.03762541711330414,
+      "learning_rate": 1.974e-05,
+      "loss": 0.0026,
+      "step": 330
+    },
+    {
+      "epoch": 13.60483870967742,
+      "grad_norm": 0.06563286483287811,
+      "learning_rate": 2.0340000000000002e-05,
+      "loss": 0.0279,
+      "step": 340
+    },
+    {
+      "epoch": 14.0,
+      "grad_norm": 0.12380703538656235,
+      "learning_rate": 2.094e-05,
+      "loss": 0.0063,
+      "step": 350
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 375,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 50,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.651986705536446e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-350/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65214c45db9799ff8e38f62a8a12f53de739d750564cf70464decc61c84f64ab
+size 5777

checkpoint-350/vocab.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "[PAD]": 28,
+  "[UNK]": 27,
+  "a": 1,
+  "b": 2,
+  "c": 3,
+  "d": 4,
+  "e": 5,
+  "f": 6,
+  "g": 7,
+  "h": 8,
+  "i": 9,
+  "j": 10,
+  "k": 11,
+  "l": 12,
+  "m": 13,
+  "n": 14,
+  "o": 15,
+  "p": 16,
+  "q": 17,
+  "r": 18,
+  "s": 19,
+  "t": 20,
+  "u": 21,
+  "v": 22,
+  "w": 23,
+  "x": 24,
+  "y": 25,
+  "z": 26,
+  "|": 0
+}

checkpoint-375/added_tokens.json ADDED Viewed

	@@ -0,0 +1,4 @@

+{
+  "</s>": 30,
+  "<s>": 29
+}

checkpoint-375/config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "activation_dropout": 0.0,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.0,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 28,
+  "proj_codevector_dim": 768,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 31,
+  "xvector_output_dim": 512
+}

checkpoint-375/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a78453f5c1780262440dd4fd6465ec500764cf4eccb2e7bad9a7e7f5ffcc2e5
+size 1261934580

checkpoint-375/optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d46c229febe3e84451b6b8ab287e9ba97b56be3c714e4eddacaf6087128d1b55
+size 806521803

checkpoint-375/preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "return_tensors": "np",
+  "sampling_rate": 16000
+}

checkpoint-375/rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:214119e423d32a6bd19a61d9c6c73a5bb02729dfeacd793faf03518656d409c4
+size 14645

checkpoint-375/scaler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ffa100fb89452a105fbe870f5976239d27ca580780da5887426b11aec30bf36e
+size 1383

checkpoint-375/scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f0d44bba9ff495f2f261cd3e092af3e4a799899ba7c44ee5b3bc6534485b82ea
+size 1465

checkpoint-375/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": "<s>",
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

checkpoint-375/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "added_tokens_decoder": {
+    "27": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": null,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

checkpoint-375/trainer_state.json ADDED Viewed

	@@ -0,0 +1,293 @@

+{
+  "best_global_step": null,
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 15.0,
+  "eval_steps": 500,
+  "global_step": 375,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.4032258064516129,
+      "grad_norm": 15.945989608764648,
+      "learning_rate": 5.4e-07,
+      "loss": 2.942,
+      "step": 10
+    },
+    {
+      "epoch": 0.8064516129032258,
+      "grad_norm": 15.050374984741211,
+      "learning_rate": 1.14e-06,
+      "loss": 3.345,
+      "step": 20
+    },
+    {
+      "epoch": 1.2016129032258065,
+      "grad_norm": 9.487156867980957,
+      "learning_rate": 1.74e-06,
+      "loss": 2.6443,
+      "step": 30
+    },
+    {
+      "epoch": 1.6048387096774195,
+      "grad_norm": 14.825271606445312,
+      "learning_rate": 2.34e-06,
+      "loss": 3.0528,
+      "step": 40
+    },
+    {
+      "epoch": 2.0,
+      "grad_norm": 15.298517227172852,
+      "learning_rate": 2.9400000000000002e-06,
+      "loss": 2.2951,
+      "step": 50
+    },
+    {
+      "epoch": 2.403225806451613,
+      "grad_norm": 13.529952049255371,
+      "learning_rate": 3.54e-06,
+      "loss": 2.3061,
+      "step": 60
+    },
+    {
+      "epoch": 2.806451612903226,
+      "grad_norm": 12.780255317687988,
+      "learning_rate": 4.14e-06,
+      "loss": 1.828,
+      "step": 70
+    },
+    {
+      "epoch": 3.2016129032258065,
+      "grad_norm": 4.392550468444824,
+      "learning_rate": 4.74e-06,
+      "loss": 1.7883,
+      "step": 80
+    },
+    {
+      "epoch": 3.6048387096774195,
+      "grad_norm": 6.868764400482178,
+      "learning_rate": 5.34e-06,
+      "loss": 1.3883,
+      "step": 90
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 11.529573440551758,
+      "learning_rate": 5.940000000000001e-06,
+      "loss": 1.2804,
+      "step": 100
+    },
+    {
+      "epoch": 4.403225806451613,
+      "grad_norm": 3.888104200363159,
+      "learning_rate": 6.54e-06,
+      "loss": 1.1184,
+      "step": 110
+    },
+    {
+      "epoch": 4.806451612903226,
+      "grad_norm": 5.543831825256348,
+      "learning_rate": 7.14e-06,
+      "loss": 0.8838,
+      "step": 120
+    },
+    {
+      "epoch": 5.201612903225806,
+      "grad_norm": 3.6435844898223877,
+      "learning_rate": 7.74e-06,
+      "loss": 0.7359,
+      "step": 130
+    },
+    {
+      "epoch": 5.604838709677419,
+      "grad_norm": 3.873931407928467,
+      "learning_rate": 8.340000000000001e-06,
+      "loss": 0.6677,
+      "step": 140
+    },
+    {
+      "epoch": 6.0,
+      "grad_norm": 6.526176929473877,
+      "learning_rate": 8.939999999999999e-06,
+      "loss": 0.5343,
+      "step": 150
+    },
+    {
+      "epoch": 6.403225806451613,
+      "grad_norm": 2.3237783908843994,
+      "learning_rate": 9.54e-06,
+      "loss": 0.4596,
+      "step": 160
+    },
+    {
+      "epoch": 6.806451612903226,
+      "grad_norm": 3.2708520889282227,
+      "learning_rate": 1.0140000000000001e-05,
+      "loss": 0.371,
+      "step": 170
+    },
+    {
+      "epoch": 7.201612903225806,
+      "grad_norm": 1.420652985572815,
+      "learning_rate": 1.074e-05,
+      "loss": 0.397,
+      "step": 180
+    },
+    {
+      "epoch": 7.604838709677419,
+      "grad_norm": 2.8168821334838867,
+      "learning_rate": 1.134e-05,
+      "loss": 0.2335,
+      "step": 190
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.872466504573822,
+      "learning_rate": 1.1940000000000001e-05,
+      "loss": 0.2535,
+      "step": 200
+    },
+    {
+      "epoch": 8.403225806451612,
+      "grad_norm": 1.6869771480560303,
+      "learning_rate": 1.254e-05,
+      "loss": 0.1642,
+      "step": 210
+    },
+    {
+      "epoch": 8.806451612903226,
+      "grad_norm": 8.301424026489258,
+      "learning_rate": 1.314e-05,
+      "loss": 0.112,
+      "step": 220
+    },
+    {
+      "epoch": 9.201612903225806,
+      "grad_norm": 0.7436397671699524,
+      "learning_rate": 1.374e-05,
+      "loss": 0.1257,
+      "step": 230
+    },
+    {
+      "epoch": 9.60483870967742,
+      "grad_norm": 1.891860842704773,
+      "learning_rate": 1.434e-05,
+      "loss": 0.0623,
+      "step": 240
+    },
+    {
+      "epoch": 10.0,
+      "grad_norm": 0.5202131867408752,
+      "learning_rate": 1.4940000000000001e-05,
+      "loss": 0.0572,
+      "step": 250
+    },
+    {
+      "epoch": 10.403225806451612,
+      "grad_norm": 0.0677497610449791,
+      "learning_rate": 1.554e-05,
+      "loss": 0.0212,
+      "step": 260
+    },
+    {
+      "epoch": 10.806451612903226,
+      "grad_norm": 0.07801195234060287,
+      "learning_rate": 1.614e-05,
+      "loss": 0.0243,
+      "step": 270
+    },
+    {
+      "epoch": 11.201612903225806,
+      "grad_norm": 0.11515898257493973,
+      "learning_rate": 1.6740000000000002e-05,
+      "loss": 0.0321,
+      "step": 280
+    },
+    {
+      "epoch": 11.60483870967742,
+      "grad_norm": 0.11826858669519424,
+      "learning_rate": 1.734e-05,
+      "loss": 0.0254,
+      "step": 290
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.13558819890022278,
+      "learning_rate": 1.794e-05,
+      "loss": 0.014,
+      "step": 300
+    },
+    {
+      "epoch": 12.403225806451612,
+      "grad_norm": 0.03777763620018959,
+      "learning_rate": 1.854e-05,
+      "loss": 0.0301,
+      "step": 310
+    },
+    {
+      "epoch": 12.806451612903226,
+      "grad_norm": 0.0405518114566803,
+      "learning_rate": 1.914e-05,
+      "loss": 0.0027,
+      "step": 320
+    },
+    {
+      "epoch": 13.201612903225806,
+      "grad_norm": 0.03762541711330414,
+      "learning_rate": 1.974e-05,
+      "loss": 0.0026,
+      "step": 330
+    },
+    {
+      "epoch": 13.60483870967742,
+      "grad_norm": 0.06563286483287811,
+      "learning_rate": 2.0340000000000002e-05,
+      "loss": 0.0279,
+      "step": 340
+    },
+    {
+      "epoch": 14.0,
+      "grad_norm": 0.12380703538656235,
+      "learning_rate": 2.094e-05,
+      "loss": 0.0063,
+      "step": 350
+    },
+    {
+      "epoch": 14.403225806451612,
+      "grad_norm": 0.015481448732316494,
+      "learning_rate": 2.154e-05,
+      "loss": 0.0021,
+      "step": 360
+    },
+    {
+      "epoch": 14.806451612903226,
+      "grad_norm": 0.036212582141160965,
+      "learning_rate": 2.214e-05,
+      "loss": 0.0019,
+      "step": 370
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 375,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 15,
+  "save_steps": 50,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.770072662194541e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}

checkpoint-375/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:65214c45db9799ff8e38f62a8a12f53de739d750564cf70464decc61c84f64ab
+size 5777

checkpoint-375/vocab.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "[PAD]": 28,
+  "[UNK]": 27,
+  "a": 1,
+  "b": 2,
+  "c": 3,
+  "d": 4,
+  "e": 5,
+  "f": 6,
+  "g": 7,
+  "h": 8,
+  "i": 9,
+  "j": 10,
+  "k": 11,
+  "l": 12,
+  "m": 13,
+  "n": 14,
+  "o": 15,
+  "p": 16,
+  "q": 17,
+  "r": 18,
+  "s": 19,
+  "t": 20,
+  "u": 21,
+  "v": 22,
+  "w": 23,
+  "x": 24,
+  "y": 25,
+  "z": 26,
+  "|": 0
+}

config.json ADDED Viewed

	@@ -0,0 +1,108 @@

+{
+  "activation_dropout": 0.0,
+  "adapter_attn_dim": null,
+  "adapter_kernel_size": 3,
+  "adapter_stride": 2,
+  "add_adapter": false,
+  "apply_spec_augment": true,
+  "architectures": [
+    "Wav2Vec2ForCTC"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "classifier_proj_size": 256,
+  "codevector_dim": 768,
+  "contrastive_logits_temperature": 0.1,
+  "conv_bias": true,
+  "conv_dim": [
+    512,
+    512,
+    512,
+    512,
+    512,
+    512,
+    512
+  ],
+  "conv_kernel": [
+    10,
+    3,
+    3,
+    3,
+    3,
+    2,
+    2
+  ],
+  "conv_stride": [
+    5,
+    2,
+    2,
+    2,
+    2,
+    2,
+    2
+  ],
+  "ctc_loss_reduction": "mean",
+  "ctc_zero_infinity": false,
+  "diversity_loss_weight": 0.1,
+  "do_stable_layer_norm": true,
+  "eos_token_id": 2,
+  "feat_extract_activation": "gelu",
+  "feat_extract_dropout": 0.0,
+  "feat_extract_norm": "layer",
+  "feat_proj_dropout": 0.0,
+  "feat_quantizer_dropout": 0.0,
+  "final_dropout": 0.0,
+  "gradient_checkpointing": false,
+  "hidden_act": "gelu",
+  "hidden_dropout": 0.0,
+  "hidden_size": 1024,
+  "initializer_range": 0.02,
+  "intermediate_size": 4096,
+  "layer_norm_eps": 1e-05,
+  "layerdrop": 0.0,
+  "mask_feature_length": 10,
+  "mask_feature_min_masks": 0,
+  "mask_feature_prob": 0.0,
+  "mask_time_length": 10,
+  "mask_time_min_masks": 2,
+  "mask_time_prob": 0.0,
+  "model_type": "wav2vec2",
+  "num_adapter_layers": 3,
+  "num_attention_heads": 16,
+  "num_codevector_groups": 2,
+  "num_codevectors_per_group": 320,
+  "num_conv_pos_embedding_groups": 16,
+  "num_conv_pos_embeddings": 128,
+  "num_feat_extract_layers": 7,
+  "num_hidden_layers": 24,
+  "num_negatives": 100,
+  "output_hidden_size": 1024,
+  "pad_token_id": 28,
+  "proj_codevector_dim": 768,
+  "tdnn_dilation": [
+    1,
+    2,
+    3,
+    1,
+    1
+  ],
+  "tdnn_dim": [
+    512,
+    512,
+    512,
+    512,
+    1500
+  ],
+  "tdnn_kernel": [
+    5,
+    3,
+    3,
+    1,
+    1
+  ],
+  "torch_dtype": "float32",
+  "transformers_version": "4.55.4",
+  "use_weighted_layer_sum": false,
+  "vocab_size": 31,
+  "xvector_output_dim": 512
+}

evaluation_results.csv ADDED Viewed

	@@ -0,0 +1,57 @@

+Reference,Prediction,WER,CER
+wi,,1.0,1.0
+wi,,1.0,1.0
+wi,,1.0,1.0
+wi,,1.0,1.0
+wi,,1.0,1.0
+wi,<unk>,1.0,2.5
+wi,,1.0,1.0
+wi,,1.0,1.0
+wi,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wo,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+wu,,1.0,1.0
+ya,,1.0,1.0
+ya,,1.0,1.0
+ya,,1.0,1.0
+ya,,1.0,1.0
+ya,,1.0,1.0
+ya,,1.0,1.0
+ya,,1.0,1.0
+ye,,1.0,1.0
+ye,,1.0,1.0
+ye,,1.0,1.0
+ye,,1.0,1.0
+ye,,1.0,1.0
+yi,,1.0,1.0
+yi,,1.0,1.0
+yi,,1.0,1.0
+yi,,1.0,1.0
+yi,,1.0,1.0
+yo,,1.0,1.0
+yo,,1.0,1.0
+yo,,1.0,1.0
+yo,</s>,1.0,2.0
+yo,,1.0,1.0
+yu,,1.0,1.0
+yu,,1.0,1.0
+yu,,1.0,1.0
+yu,,1.0,1.0
+yu,,1.0,1.0

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:4a78453f5c1780262440dd4fd6465ec500764cf4eccb2e7bad9a7e7f5ffcc2e5
+size 1261934580

preprocessor_config.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "do_normalize": true,
+  "feature_extractor_type": "Wav2Vec2FeatureExtractor",
+  "feature_size": 1,
+  "padding_side": "right",
+  "padding_value": 0.0,
+  "processor_class": "Wav2Vec2Processor",
+  "return_attention_mask": true,
+  "return_tensors": "np",
+  "sampling_rate": 16000
+}

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,28 @@

+{
+  "additional_special_tokens": [
+    {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    },
+    {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false
+    }
+  ],
+  "bos_token": "<s>",
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "[PAD]",
+  "unk_token": "[UNK]"
+}

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,53 @@

+{
+  "added_tokens_decoder": {
+    "27": {
+      "content": "[UNK]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "28": {
+      "content": "[PAD]",
+      "lstrip": true,
+      "normalized": false,
+      "rstrip": true,
+      "single_word": false,
+      "special": false
+    },
+    "29": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "30": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [
+    "<s>",
+    "</s>"
+  ],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "do_lower_case": false,
+  "eos_token": "</s>",
+  "extra_special_tokens": {},
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "[PAD]",
+  "processor_class": "Wav2Vec2Processor",
+  "replace_word_delimiter_char": " ",
+  "target_lang": null,
+  "tokenizer_class": "Wav2Vec2CTCTokenizer",
+  "unk_token": "[UNK]",
+  "word_delimiter_token": "|"
+}

training_log.tsv ADDED Viewed

	@@ -0,0 +1,26 @@

+timestamp	step	epoch	train_loss	grad_norm	learning_rate
+2025-09-21T21:41:21.703893	10	0.32	903.355000	614.498596	1.00e-07
+2025-09-21T21:41:27.856246	20	0.65	902.448600	617.974670	5.00e-07
+2025-09-21T21:41:33.781063	30	0.97	886.335200	578.719543	1.00e-06
+2025-09-21T21:41:39.650969	40	1.29	941.081400	763.962463	1.50e-06
+2025-09-21T21:41:45.448703	50	1.61	878.512000	745.441284	2.00e-06
+2025-09-21T21:41:46.303569	50	1.61	NA	NA	NA
+2025-09-21T21:41:53.272236	60	1.94	870.239400	805.721558	2.50e-06
+2025-09-21T21:41:58.923912	70	2.26	883.066600	810.874512	3.00e-06
+2025-09-21T21:42:04.647459	80	2.58	858.037100	876.961365	3.50e-06
+2025-09-21T21:42:10.439181	90	2.90	879.551700	879.198303	4.00e-06
+2025-09-21T21:42:16.088489	100	3.23	838.324500	1011.899841	4.50e-06
+2025-09-21T21:42:16.862886	100	3.23	NA	NA	NA
+2025-09-21T21:42:24.087341	110	3.55	789.837500	1295.429565	5.00e-06
+2025-09-21T21:42:29.817688	120	3.87	745.176600	1823.199463	5.50e-06
+2025-09-21T21:42:35.473868	130	4.19	701.951900	1844.962524	6.00e-06
+2025-09-21T21:42:41.363224	140	4.52	661.328600	1836.961670	6.50e-06
+2025-09-21T21:42:47.037840	150	4.84	558.101000	1857.315308	7.00e-06
+2025-09-21T21:42:47.769415	150	4.84	NA	NA	NA
+2025-09-21T21:42:54.652980	160	5.16	499.938000	2131.156982	7.50e-06
+2025-09-21T21:43:00.440291	170	5.48	450.506500	1810.863647	8.00e-06
+2025-09-21T21:43:06.233803	180	5.81	390.488400	1685.968994	8.50e-06
+2025-09-21T21:43:12.164436	190	6.13	324.997900	1826.587402	9.00e-06
+2025-09-21T21:43:17.924886	200	6.45	280.306800	1618.513672	9.50e-06
+2025-09-21T21:43:18.676823	200	6.45	NA	NA	NA
+2025-09-21T21:43:20.259029	200	6.45	NA	NA	NA

vocab.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "[PAD]": 28,
+  "[UNK]": 27,
+  "a": 1,
+  "b": 2,
+  "c": 3,
+  "d": 4,
+  "e": 5,
+  "f": 6,
+  "g": 7,
+  "h": 8,
+  "i": 9,
+  "j": 10,
+  "k": 11,
+  "l": 12,
+  "m": 13,
+  "n": 14,
+  "o": 15,
+  "p": 16,
+  "q": 17,
+  "r": 18,
+  "s": 19,
+  "t": 20,
+  "u": 21,
+  "v": 22,
+  "w": 23,
+  "x": 24,
+  "y": 25,
+  "z": 26,
+  "|": 0
+}