Praha-Labs commited on
Commit
4f93701
·
verified ·
1 Parent(s): 634a715

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ language:
4
+ - ml
5
+ base_model: ResembleAI/chatterbox
6
+ tags:
7
+ - text-to-speech
8
+ - tts
9
+ - malayalam
10
+ - chatterbox
11
+ - lora
12
+ ---
13
+
14
+ # PrahaTTS-ML
15
+
16
+ Malayalam LoRA adapter for ResembleAI Chatterbox non-turbo TTS.
17
+
18
+ This repository contains the selected 17k-step adapter checkpoint, chosen by listening quality rather than lowest training loss.
19
+
20
+ ## Contents
21
+
22
+ - `adapter_config.json`
23
+ - `adapter_model.safetensors`
24
+ - `tokenizer_indic.json`
25
+ - `tokenizer_indic.json.manifest.json`
26
+ - `config_indic.py`
27
+
28
+ This is not a merged full model. Use it with the base Chatterbox non-turbo model and the included Indic tokenizer.
adapter_config.json ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": {
4
+ "base_model_class": "T3",
5
+ "parent_library": "src.chatterbox_.models.t3.t3"
6
+ },
7
+ "base_model_name_or_path": null,
8
+ "bias": "none",
9
+ "corda_config": null,
10
+ "eva_config": null,
11
+ "exclude_modules": null,
12
+ "fan_in_fan_out": false,
13
+ "inference_mode": true,
14
+ "init_lora_weights": true,
15
+ "layer_replication": null,
16
+ "layers_pattern": null,
17
+ "layers_to_transform": null,
18
+ "loftq_config": {},
19
+ "lora_alpha": 256,
20
+ "lora_bias": false,
21
+ "lora_dropout": 0.05,
22
+ "megatron_config": null,
23
+ "megatron_core": "megatron.core",
24
+ "modules_to_save": [
25
+ "text_emb",
26
+ "text_head"
27
+ ],
28
+ "peft_type": "LORA",
29
+ "qalora_group_size": 16,
30
+ "r": 128,
31
+ "rank_pattern": {},
32
+ "revision": null,
33
+ "target_modules": [
34
+ "o_proj",
35
+ "down_proj",
36
+ "v_proj",
37
+ "k_proj",
38
+ "q_proj",
39
+ "gate_proj",
40
+ "up_proj",
41
+ "spkr_enc"
42
+ ],
43
+ "target_parameters": null,
44
+ "task_type": null,
45
+ "trainable_token_indices": null,
46
+ "use_dora": false,
47
+ "use_qalora": false,
48
+ "use_rslora": false
49
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e25755212b840d2ba40a187126a7fbd49fd02f0b7c9de2a58b4e1b33bde1d8
3
+ size 383549136
config_indic.py ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass, field
2
+ from typing import List, Optional
3
+
4
+
5
+ @dataclass
6
+ class IndicTrainConfig:
7
+ # Base Chatterbox model files downloaded by setup.py.
8
+ model_dir: str = "./pretrained_models"
9
+
10
+ # Dataset layout. Mixed-language metadata should include a language column:
11
+ # filename|raw_text|normalized_text|language_id
12
+ csv_path: str = "./IndicFinetuning/datasets/MalayalamDataset/metadata.csv"
13
+ metadata_path: str = "./IndicFinetuning/datasets/metadata.json"
14
+ wav_dir: str = "./IndicFinetuning/datasets/MalayalamDataset/wavs"
15
+ preprocessed_dir: str = "./IndicFinetuning/datasets/MalayalamDataset/preprocess"
16
+ output_dir: str = "./IndicFinetuning/outputs"
17
+ tokenizer_path: str = "./IndicFinetuning/tokenizer/tokenizer_indic.json"
18
+
19
+ # Model selection.
20
+ is_turbo: bool = False
21
+ is_lora: bool = True
22
+
23
+ # Toggle languages here. For single-language Malayalam training, keep ["ml"].
24
+ target_languages: List[str] = field(default_factory=lambda: ["ml"])
25
+ default_language: str = "ml"
26
+ metadata_language_column: Optional[int] = 3
27
+ add_language_tag: bool = True
28
+ normalize_unicode: str = "NFC"
29
+
30
+ # Dataset format.
31
+ ljspeech: bool = True
32
+ json_format: bool = False
33
+ preprocess: bool = True
34
+
35
+ # Inference smoke test.
36
+ is_inference: bool = False
37
+ inference_language: str = "ml"
38
+ inference_prompt_path: str = "/workspace/Indic-ChatterBox/IndicFinetuning/outputs/reference_trimmed.wav"
39
+ inference_test_text: str = "പ്രണവേ എനിക്ക് നിന്നെ കാണാൻ really തോന്നുന്നു ഇന്ന് whole day mind full of thoughts ആയിരുന്നു നീ എവിടെയാ, എന്താ doing എന്ന് constantly ഓർമ്മ വരുന്നു just come back once, എനിക്ക് സംസാരിക്കണം നിന്നോട്"
40
+
41
+ # Vocabulary. Update after building the Indic tokenizer.
42
+ new_vocab_size: int = 2573
43
+
44
+ # LoRA.
45
+ lora_r: int = 128
46
+ lora_alpha: int = 256
47
+ lora_target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "spkr_enc"])
48
+ turbo_lora_target_modules: List[str] = field(default_factory=lambda: ["c_attn", "c_proj", "c_fc", "spkr_enc"])
49
+ lora_modules_to_save: List[str] = field(default_factory=lambda: ["text_emb", "text_head"])
50
+
51
+ # Training.
52
+ batch_size: int = 16
53
+ grad_accum: int = 1
54
+ learning_rate: float = 1e-4
55
+ num_epochs: int = 10
56
+ save_steps: int = 500
57
+ save_total_limit: int = 5
58
+ dataloader_num_workers: int = 8
59
+
60
+ # Sequence constraints.
61
+ start_text_token: int = 255
62
+ stop_text_token: int = 0
63
+ max_text_len: int = 256
64
+ max_speech_len: int = 850
65
+ prompt_duration: float = 3.0
tokenizer_indic.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_indic.json.manifest.json ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "languages": [
3
+ "ml"
4
+ ],
5
+ "added_token_count": 119,
6
+ "final_vocab_size": 2573,
7
+ "added_tokens": [
8
+ "[ml]",
9
+ "ഀ",
10
+ "ഁ",
11
+ "ം",
12
+ "ഃ",
13
+ "ഄ",
14
+ "അ",
15
+ "ആ",
16
+ "ഇ",
17
+ "ഈ",
18
+ "ഉ",
19
+ "ഊ",
20
+ "ഋ",
21
+ "ഌ",
22
+ "എ",
23
+ "ഏ",
24
+ "ഐ",
25
+ "ഒ",
26
+ "ഓ",
27
+ "ഔ",
28
+ "ക",
29
+ "ഖ",
30
+ "ഗ",
31
+ "ഘ",
32
+ "ങ",
33
+ "ച",
34
+ "ഛ",
35
+ "ജ",
36
+ "ഝ",
37
+ "ഞ",
38
+ "ട",
39
+ "ഠ",
40
+ "ഡ",
41
+ "ഢ",
42
+ "ണ",
43
+ "ത",
44
+ "ഥ",
45
+ "ദ",
46
+ "ധ",
47
+ "ന",
48
+ "ഩ",
49
+ "പ",
50
+ "ഫ",
51
+ "ബ",
52
+ "ഭ",
53
+ "മ",
54
+ "യ",
55
+ "ര",
56
+ "റ",
57
+ "ല",
58
+ "ള",
59
+ "ഴ",
60
+ "വ",
61
+ "ശ",
62
+ "ഷ",
63
+ "സ",
64
+ "ഹ",
65
+ "ഺ",
66
+ "഻",
67
+ "഼",
68
+ "ഽ",
69
+ "ാ",
70
+ "ി",
71
+ "ീ",
72
+ "ു",
73
+ "ൂ",
74
+ "ൃ",
75
+ "ൄ",
76
+ "െ",
77
+ "േ",
78
+ "ൈ",
79
+ "ൊ",
80
+ "ോ",
81
+ "ൌ",
82
+ "്",
83
+ "ൎ",
84
+ "൏",
85
+ "ൔ",
86
+ "ൕ",
87
+ "ൖ",
88
+ "ൗ",
89
+ "൘",
90
+ "൙",
91
+ "൚",
92
+ "൛",
93
+ "൜",
94
+ "൝",
95
+ "൞",
96
+ "ൟ",
97
+ "ൠ",
98
+ "ൡ",
99
+ "ൢ",
100
+ "ൣ",
101
+ "൦",
102
+ "൧",
103
+ "൨",
104
+ "൩",
105
+ "൪",
106
+ "൫",
107
+ "൬",
108
+ "൭",
109
+ "൮",
110
+ "൯",
111
+ "൰",
112
+ "൱",
113
+ "൲",
114
+ "൳",
115
+ "൴",
116
+ "൵",
117
+ "൶",
118
+ "൷",
119
+ "൸",
120
+ "൹",
121
+ "ൺ",
122
+ "ൻ",
123
+ "ർ",
124
+ "ൽ",
125
+ "ൾ",
126
+ "ൿ"
127
+ ]
128
+ }