Spaces:

mamungtai-sat
/

character-studio

Running on Zero

App Files Files Community

pormungtai commited on 28 days ago

Commit

cd243ab

verified ·

1 Parent(s): 817ab19

majicMIX photoreal pass: drop art tokens (masterpiece/best quality), add photographic style_prefix + sd-vae-ft-mse VAE, photo-focused Typhoon prompt, CFG 5.0

Browse files

Files changed (3) hide show

app.py +3 -2
models.json +5 -3
pipeline_manager.py +14 -6

app.py CHANGED Viewed

@@ -166,8 +166,9 @@ def build_prompt(subject, age, ethnicity, body, hair, eyes, outfit, pose,
         if v and str(v).strip():
             parts.append(str(v).strip())
     thai = ", ".join(parts)
-    # English quality suffix (kept in English; the model's negative handles the rest).
-    return thai + ", photorealistic, masterpiece, best quality, ultra-detailed, sharp focus"
 # ---------------------------------------------------------------------------

         if v and str(v).strip():
             parts.append(str(v).strip())
     thai = ", ".join(parts)
+    # Photographic suffix (NOT "masterpiece/best quality" — those art tokens push the model
+    # toward an illustrated/CG look. The model's style_prefix + negative do the heavy lifting).
+    return thai + ", realistic candid photograph, natural skin texture, soft natural light"
 # ---------------------------------------------------------------------------

models.json CHANGED Viewed

@@ -14,12 +14,14 @@
       "default_width": 512,
       "default_height": 768,
       "sampler": "dpmpp_2m_karras",
       "hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
       "neg_embeddings": ["easynegative"],
       "recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
-      "negative_prompt": "EasyNegative, (worst quality, low quality:1.4), (extra limbs, extra legs, three legs, extra arms, missing limbs, fused limbs:1.4), (mutated hands, missing fingers, extra fingers, fused fingers:1.3), (bad anatomy, deformed, mutated, disfigured:1.2), (plastic skin, airbrushed, doll, 3d, render:1.2), bad feet, extra toes, lowres, watermark, text",
-      "default_steps": 28,
-      "default_guidance": 5.5,
       "enabled": true
     },
     {

       "default_width": 512,
       "default_height": 768,
       "sampler": "dpmpp_2m_karras",
+      "vae": "stabilityai/sd-vae-ft-mse",
+      "style_prefix": "RAW photo, analog film photograph, film grain, detailed skin texture, skin pores, fine hair strands, detailed eyes, catchlight, natural nails",
       "hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
       "neg_embeddings": ["easynegative"],
       "recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
+      "negative_prompt": "EasyNegative, (worst quality, low quality:1.4), (extra limbs, extra legs, three legs, extra arms, missing limbs, fused limbs:1.4), (mutated hands, missing fingers, extra fingers, fused fingers:1.3), (bad anatomy, deformed, mutated, disfigured:1.2), (plastic skin, airbrushed, doll, smooth skin, 3d, render, illustration, painting:1.2), bad feet, extra toes, lowres, watermark, text",
+      "default_steps": 30,
+      "default_guidance": 5.0,
       "enabled": true
     },
     {

pipeline_manager.py CHANGED Viewed

@@ -150,9 +150,14 @@ def translate_prompt(text, engine):
         # typhoon: ask the LLM to rewrite as a clean English image prompt
         msgs = [
             {"role": "system", "content": "You convert Thai text-to-image prompts "
-             "into a single concise, vivid English prompt for Stable Diffusion. "
-             "Keep the described subject, clothing, pose, and scene. Output ONLY the "
-             "English prompt as a comma-separated phrase — no quotes, no explanation."},
             {"role": "user", "content": text},
         ]
         chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
@@ -461,9 +466,12 @@ def run_generation(cfg, mode, prompt, negative_prompt, ref_image,
     if seed is not None and int(seed) >= 0:
         generator = torch.Generator(device=DEVICE).manual_seed(int(seed))
-    full_prompt = prompt
-    if cfg.get("trigger"):
-        full_prompt = f"{cfg['trigger']}, {prompt}".strip(", ")
     call = dict(
         prompt=full_prompt,

         # typhoon: ask the LLM to rewrite as a clean English image prompt
         msgs = [
             {"role": "system", "content": "You convert Thai text-to-image prompts "
+             "into a single concise, vivid English prompt for a PHOTOREALISTIC Stable "
+             "Diffusion model. Describe it as a real candid photograph: keep the subject, "
+             "clothing, pose, and scene, and add realistic photographic detail (natural "
+             "skin texture and pores, real hair strands, lifelike eyes, soft natural "
+             "light). NEVER use illustration/painting/anime/CG words such as 'masterpiece', "
+             "'best quality', 'artstation', 'render', '3d', 'anime' or 'painting'. "
+             "Output ONLY the English prompt as a comma-separated phrase — no quotes, "
+             "no explanation."},
             {"role": "user", "content": text},
         ]
         chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
     if seed is not None and int(seed) >= 0:
         generator = torch.Generator(device=DEVICE).manual_seed(int(seed))
+    # Front-load a per-model photographic style prefix (e.g. "RAW photo, film grain")
+    # so the strongest realism cue survives CLIP's 77-token truncation. Applied AFTER
+    # translation (run_generation gets the already-English prompt), so it reaches the
+    # model verbatim regardless of the translator. Then trigger, then the user prompt.
+    _parts = [cfg.get("style_prefix"), cfg.get("trigger"), prompt]
+    full_prompt = ", ".join(p.strip() for p in _parts if p and str(p).strip()).strip(", ")
     call = dict(
         prompt=full_prompt,