Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

README.md +28 -0
adapter_config.json +49 -0
adapter_model.safetensors +3 -0
config_indic.py +65 -0
tokenizer_indic.json +0 -0
tokenizer_indic.json.manifest.json +128 -0

README.md ADDED Viewed

	@@ -0,0 +1,28 @@

+---
+license: apache-2.0
+language:
+- ml
+base_model: ResembleAI/chatterbox
+tags:
+- text-to-speech
+- tts
+- malayalam
+- chatterbox
+- lora
+---
+# PrahaTTS-ML
+Malayalam LoRA adapter for ResembleAI Chatterbox non-turbo TTS.
+This repository contains the selected 17k-step adapter checkpoint, chosen by listening quality rather than lowest training loss.
+## Contents
+- `adapter_config.json`
+- `adapter_model.safetensors`
+- `tokenizer_indic.json`
+- `tokenizer_indic.json.manifest.json`
+- `config_indic.py`
+This is not a merged full model. Use it with the base Chatterbox non-turbo model and the included Indic tokenizer.

adapter_config.json ADDED Viewed

	@@ -0,0 +1,49 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": {
+    "base_model_class": "T3",
+    "parent_library": "src.chatterbox_.models.t3.t3"
+  },
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 256,
+  "lora_bias": false,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": [
+    "text_emb",
+    "text_head"
+  ],
+  "peft_type": "LORA",
+  "qalora_group_size": 16,
+  "r": 128,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "o_proj",
+    "down_proj",
+    "v_proj",
+    "k_proj",
+    "q_proj",
+    "gate_proj",
+    "up_proj",
+    "spkr_enc"
+  ],
+  "target_parameters": null,
+  "task_type": null,
+  "trainable_token_indices": null,
+  "use_dora": false,
+  "use_qalora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:48e25755212b840d2ba40a187126a7fbd49fd02f0b7c9de2a58b4e1b33bde1d8
+size 383549136

config_indic.py ADDED Viewed

	@@ -0,0 +1,65 @@

+from dataclasses import dataclass, field
+from typing import List, Optional
+@dataclass
+class IndicTrainConfig:
+    # Base Chatterbox model files downloaded by setup.py.
+    model_dir: str = "./pretrained_models"
+    # Dataset layout. Mixed-language metadata should include a language column:
+    # filename|raw_text|normalized_text|language_id
+    csv_path: str = "./IndicFinetuning/datasets/MalayalamDataset/metadata.csv"
+    metadata_path: str = "./IndicFinetuning/datasets/metadata.json"
+    wav_dir: str = "./IndicFinetuning/datasets/MalayalamDataset/wavs"
+    preprocessed_dir: str = "./IndicFinetuning/datasets/MalayalamDataset/preprocess"
+    output_dir: str = "./IndicFinetuning/outputs"
+    tokenizer_path: str = "./IndicFinetuning/tokenizer/tokenizer_indic.json"
+    # Model selection.
+    is_turbo: bool = False
+    is_lora: bool = True
+    # Toggle languages here. For single-language Malayalam training, keep ["ml"].
+    target_languages: List[str] = field(default_factory=lambda: ["ml"])
+    default_language: str = "ml"
+    metadata_language_column: Optional[int] = 3
+    add_language_tag: bool = True
+    normalize_unicode: str = "NFC"
+    # Dataset format.
+    ljspeech: bool = True
+    json_format: bool = False
+    preprocess: bool = True
+    # Inference smoke test.
+    is_inference: bool = False
+    inference_language: str = "ml"
+    inference_prompt_path: str = "/workspace/Indic-ChatterBox/IndicFinetuning/outputs/reference_trimmed.wav"
+    inference_test_text: str = "പ്രണവേ എനിക്ക് നിന്നെ കാണാൻ really തോന്നുന്നു ഇന്ന് whole day mind full of thoughts ആയിരുന്നു നീ എവിടെയാ, എന്താ doing എന്ന് constantly ഓർമ്മ വരുന്നു just come back once, എനിക്ക് സംസാരിക്കണം നിന്നോട്"
+    # Vocabulary. Update after building the Indic tokenizer.
+    new_vocab_size: int = 2573
+    # LoRA.
+    lora_r: int = 128
+    lora_alpha: int = 256
+    lora_target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj", "spkr_enc"])
+    turbo_lora_target_modules: List[str] = field(default_factory=lambda: ["c_attn", "c_proj", "c_fc", "spkr_enc"])
+    lora_modules_to_save: List[str] = field(default_factory=lambda: ["text_emb", "text_head"])
+    # Training.
+    batch_size: int = 16
+    grad_accum: int = 1
+    learning_rate: float = 1e-4
+    num_epochs: int = 10
+    save_steps: int = 500
+    save_total_limit: int = 5
+    dataloader_num_workers: int = 8
+    # Sequence constraints.
+    start_text_token: int = 255
+    stop_text_token: int = 0
+    max_text_len: int = 256
+    max_speech_len: int = 850
+    prompt_duration: float = 3.0

tokenizer_indic.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_indic.json.manifest.json ADDED Viewed

	@@ -0,0 +1,128 @@

+{
+  "languages": [
+    "ml"
+  ],
+  "added_token_count": 119,
+  "final_vocab_size": 2573,
+  "added_tokens": [
+    "[ml]",
+    "ഀ",
+    "ഁ",
+    "ം",
+    "ഃ",
+    "ഄ",
+    "അ",
+    "ആ",
+    "ഇ",
+    "ഈ",
+    "ഉ",
+    "ഊ",
+    "ഋ",
+    "ഌ",
+    "എ",
+    "ഏ",
+    "ഐ",
+    "ഒ",
+    "ഓ",
+    "ഔ",
+    "ക",
+    "ഖ",
+    "ഗ",
+    "ഘ",
+    "ങ",
+    "ച",
+    "ഛ",
+    "ജ",
+    "ഝ",
+    "ഞ",
+    "ട",
+    "ഠ",
+    "ഡ",
+    "ഢ",
+    "ണ",
+    "ത",
+    "ഥ",
+    "ദ",
+    "ധ",
+    "ന",
+    "ഩ",
+    "പ",
+    "ഫ",
+    "ബ",
+    "ഭ",
+    "മ",
+    "യ",
+    "ര",
+    "റ",
+    "ല",
+    "ള",
+    "ഴ",
+    "വ",
+    "ശ",
+    "ഷ",
+    "സ",
+    "ഹ",
+    "ഺ",
+    "഻",
+    "഼",
+    "ഽ",
+    "ാ",
+    "ി",
+    "ീ",
+    "ു",
+    "ൂ",
+    "ൃ",
+    "ൄ",
+    "െ",
+    "േ",
+    "ൈ",
+    "ൊ",
+    "ോ",
+    "ൌ",
+    "്",
+    "ൎ",
+    "൏",
+    "ൔ",
+    "ൕ",
+    "ൖ",
+    "ൗ",
+    "൘",
+    "൙",
+    "൚",
+    "൛",
+    "൜",
+    "൝",
+    "൞",
+    "ൟ",
+    "ൠ",
+    "ൡ",
+    "ൢ",
+    "ൣ",
+    "൦",
+    "൧",
+    "൨",
+    "൩",
+    "൪",
+    "൫",
+    "൬",
+    "൭",
+    "൮",
+    "൯",
+    "൰",
+    "൱",
+    "൲",
+    "൳",
+    "൴",
+    "൵",
+    "൶",
+    "൷",
+    "൸",
+    "൹",
+    "ൺ",
+    "ൻ",
+    "ർ",
+    "ൽ",
+    "ൾ",
+    "ൿ"
+  ]
+}