Spaces:

mamungtai-sat
/

character-studio

Running on Zero

App Files Files Community

pormungtai commited on 29 days ago

Commit

a19330a

verified ·

1 Parent(s): 5800fd3

Add Thai->English auto-translate (NLLB-200 + Typhoon 2 selectable)

Browse files

Files changed (2) hide show

app.py +21 -3
pipeline_manager.py +66 -0

app.py CHANGED Viewed

@@ -79,7 +79,8 @@ def modes_for(models, model_id):
 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def generate(model_id, mode, prompt, negative_prompt, ref_image,
-             steps, guidance, denoise, ip_scale, width, height, seed, randomize):
     models = load_models()
     cfg = pm.get_model(models, model_id)
     if cfg is None:
@@ -88,6 +89,14 @@ def generate(model_id, mode, prompt, negative_prompt, ref_image,
     if randomize or seed is None or int(seed) < 0:
         seed = random.randint(0, MAX_SEED)
     try:
         img = pm.run_generation(
             cfg=cfg, mode=mode, prompt=prompt, negative_prompt=negative_prompt,
@@ -98,7 +107,7 @@ def generate(model_id, mode, prompt, negative_prompt, ref_image,
         traceback.print_exc()
         raise gr.Error(str(e))
-    status = f"✅ {cfg['label']} · {pm.MODE_LABELS.get(mode, mode)} · seed {seed}"
     return img, seed, status
@@ -189,6 +198,14 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="blue"),
                 label="โหมดรูปต้นแบบ / Input mode",
             )
         # ---- right: output ----
         with gr.Column(scale=1):
             output = gr.Image(label="Generated Image", height=560, elem_classes="card")
@@ -227,7 +244,8 @@ with gr.Blocks(css=CSS, theme=gr.themes.Soft(primary_hue="blue"),
     )
     gen_inputs = [selected_id, mode_radio, prompt, negative_prompt, ref_image,
-                  steps, guidance, denoise, ip_scale, width, height, seed, randomize]
     gen_btn.click(generate, inputs=gen_inputs, outputs=[output, seed, status])
     prompt.submit(generate, inputs=gen_inputs, outputs=[output, seed, status])

 # ---------------------------------------------------------------------------
 @spaces.GPU(duration=120)
 def generate(model_id, mode, prompt, negative_prompt, ref_image,
+             steps, guidance, denoise, ip_scale, width, height, seed, randomize,
+             translator):
     models = load_models()
     cfg = pm.get_model(models, model_id)
     if cfg is None:
     if randomize or seed is None or int(seed) < 0:
         seed = random.randint(0, MAX_SEED)
+    # Thai → English so the (English) text encoders understand the prompt.
+    note = ""
+    orig_prompt = prompt
+    prompt = pm.translate_prompt(prompt, translator)
+    negative_prompt = pm.translate_prompt(negative_prompt, translator)
+    if prompt != orig_prompt:
+        note = f"  ·  🌐 {translator}: _{prompt[:120]}_"
     try:
         img = pm.run_generation(
             cfg=cfg, mode=mode, prompt=prompt, negative_prompt=negative_prompt,
         traceback.print_exc()
         raise gr.Error(str(e))
+    status = f"✅ {cfg['label']} · {pm.MODE_LABELS.get(mode, mode)} · seed {seed}{note}"
     return img, seed, status
                 label="โหมดรูปต้นแบบ / Input mode",
             )
+            translator = gr.Radio(
+                choices=[("ปิด / Off", "off"),
+                         ("NLLB-200 (เร็ว)", "nllb"),
+                         ("Typhoon 2 (ไทยแน่น)", "typhoon")],
+                value="nllb",
+                label="แปลไทย→อังกฤษ / Auto-translate (พิมพ์ไทยได้เลย)",
+            )
         # ---- right: output ----
         with gr.Column(scale=1):
             output = gr.Image(label="Generated Image", height=560, elem_classes="card")
     )
     gen_inputs = [selected_id, mode_radio, prompt, negative_prompt, ref_image,
+                  steps, guidance, denoise, ip_scale, width, height, seed, randomize,
+                  translator]
     gen_btn.click(generate, inputs=gen_inputs, outputs=[output, seed, status])
     prompt.submit(generate, inputs=gen_inputs, outputs=[output, seed, status])

pipeline_manager.py CHANGED Viewed

@@ -68,6 +68,72 @@ def get_model(models, model_id):
     return None
 # ---------------------------------------------------------------------------
 # Download helpers (Civitai / arbitrary URL → local cache)
 # ---------------------------------------------------------------------------

     return None
+# ---------------------------------------------------------------------------
+# Thai → English prompt translation (the SD/SDXL/FLUX text encoders are English;
+# Thai prompts otherwise produce unrelated images). Runs on the Space, no API key.
+# ---------------------------------------------------------------------------
+TRANSLATORS = {
+    "nllb": "facebook/nllb-200-distilled-600M",
+    "typhoon": "scb10x/llama3.2-typhoon2-3b-instruct",
+}
+_TRANSLATOR_CACHE = {}
+def has_thai(text):
+    return any("฀" <= ch <= "๿" for ch in (text or ""))
+def _load_translator(engine):
+    if engine in _TRANSLATOR_CACHE:
+        return _TRANSLATOR_CACHE[engine]
+    name = TRANSLATORS[engine]
+    if engine == "nllb":
+        from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
+        tok = AutoTokenizer.from_pretrained(name)
+        model = AutoModelForSeq2SeqLM.from_pretrained(name, torch_dtype=DTYPE_SD)
+    else:  # typhoon (causal LM)
+        from transformers import AutoTokenizer, AutoModelForCausalLM
+        tok = AutoTokenizer.from_pretrained(name)
+        model = AutoModelForCausalLM.from_pretrained(name, torch_dtype=DTYPE_SD)
+    model.eval()
+    _TRANSLATOR_CACHE[engine] = (tok, model)
+    return tok, model
+def translate_prompt(text, engine):
+    """Translate a Thai prompt to English. Pass-through if empty/English/off.
+    MUST be called inside the @spaces.GPU context (uses CUDA when available)."""
+    if not text or engine in (None, "off") or not has_thai(text):
+        return text
+    try:
+        tok, model = _load_translator(engine)
+        model = model.to(DEVICE)
+        if engine == "nllb":
+            tok.src_lang = "tha_Thai"
+            inputs = tok(text, return_tensors="pt", truncation=True,
+                         max_length=400).to(DEVICE)
+            bos = tok.convert_tokens_to_ids("eng_Latn")
+            out = model.generate(**inputs, forced_bos_token_id=bos,
+                                 max_new_tokens=256, num_beams=4)
+            return tok.batch_decode(out, skip_special_tokens=True)[0].strip()
+        # typhoon: ask the LLM to rewrite as a clean English image prompt
+        msgs = [
+            {"role": "system", "content": "You convert Thai text-to-image prompts "
+             "into a single concise, vivid English prompt for Stable Diffusion. "
+             "Keep the described subject, clothing, pose, and scene. Output ONLY the "
+             "English prompt as a comma-separated phrase — no quotes, no explanation."},
+            {"role": "user", "content": text},
+        ]
+        ids = tok.apply_chat_template(msgs, add_generation_prompt=True,
+                                      return_tensors="pt").to(DEVICE)
+        out = model.generate(ids, max_new_tokens=256, do_sample=False,
+                             pad_token_id=tok.eos_token_id)
+        return tok.decode(out[0][ids.shape[1]:], skip_special_tokens=True).strip()
+    except Exception as e:  # noqa
+        print(f"[translate] {engine} failed, using original text: {e}")
+        return text
 # ---------------------------------------------------------------------------
 # Download helpers (Civitai / arbitrary URL → local cache)
 # ---------------------------------------------------------------------------