Spaces:
Running on Zero
Running on Zero
majicMIX photoreal pass: drop art tokens (masterpiece/best quality), add photographic style_prefix + sd-vae-ft-mse VAE, photo-focused Typhoon prompt, CFG 5.0
Browse files- app.py +3 -2
- models.json +5 -3
- pipeline_manager.py +14 -6
app.py
CHANGED
|
@@ -166,8 +166,9 @@ def build_prompt(subject, age, ethnicity, body, hair, eyes, outfit, pose,
|
|
| 166 |
if v and str(v).strip():
|
| 167 |
parts.append(str(v).strip())
|
| 168 |
thai = ", ".join(parts)
|
| 169 |
-
#
|
| 170 |
-
|
|
|
|
| 171 |
|
| 172 |
|
| 173 |
# ---------------------------------------------------------------------------
|
|
|
|
| 166 |
if v and str(v).strip():
|
| 167 |
parts.append(str(v).strip())
|
| 168 |
thai = ", ".join(parts)
|
| 169 |
+
# Photographic suffix (NOT "masterpiece/best quality" — those art tokens push the model
|
| 170 |
+
# toward an illustrated/CG look. The model's style_prefix + negative do the heavy lifting).
|
| 171 |
+
return thai + ", realistic candid photograph, natural skin texture, soft natural light"
|
| 172 |
|
| 173 |
|
| 174 |
# ---------------------------------------------------------------------------
|
models.json
CHANGED
|
@@ -14,12 +14,14 @@
|
|
| 14 |
"default_width": 512,
|
| 15 |
"default_height": 768,
|
| 16 |
"sampler": "dpmpp_2m_karras",
|
|
|
|
|
|
|
| 17 |
"hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
|
| 18 |
"neg_embeddings": ["easynegative"],
|
| 19 |
"recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
|
| 20 |
-
"negative_prompt": "EasyNegative, (worst quality, low quality:1.4), (extra limbs, extra legs, three legs, extra arms, missing limbs, fused limbs:1.4), (mutated hands, missing fingers, extra fingers, fused fingers:1.3), (bad anatomy, deformed, mutated, disfigured:1.2), (plastic skin, airbrushed, doll, 3d, render:1.2), bad feet, extra toes, lowres, watermark, text",
|
| 21 |
-
"default_steps":
|
| 22 |
-
"default_guidance": 5.
|
| 23 |
"enabled": true
|
| 24 |
},
|
| 25 |
{
|
|
|
|
| 14 |
"default_width": 512,
|
| 15 |
"default_height": 768,
|
| 16 |
"sampler": "dpmpp_2m_karras",
|
| 17 |
+
"vae": "stabilityai/sd-vae-ft-mse",
|
| 18 |
+
"style_prefix": "RAW photo, analog film photograph, film grain, detailed skin texture, skin pores, fine hair strands, detailed eyes, catchlight, natural nails",
|
| 19 |
"hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
|
| 20 |
"neg_embeddings": ["easynegative"],
|
| 21 |
"recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
|
| 22 |
+
"negative_prompt": "EasyNegative, (worst quality, low quality:1.4), (extra limbs, extra legs, three legs, extra arms, missing limbs, fused limbs:1.4), (mutated hands, missing fingers, extra fingers, fused fingers:1.3), (bad anatomy, deformed, mutated, disfigured:1.2), (plastic skin, airbrushed, doll, smooth skin, 3d, render, illustration, painting:1.2), bad feet, extra toes, lowres, watermark, text",
|
| 23 |
+
"default_steps": 30,
|
| 24 |
+
"default_guidance": 5.0,
|
| 25 |
"enabled": true
|
| 26 |
},
|
| 27 |
{
|
pipeline_manager.py
CHANGED
|
@@ -150,9 +150,14 @@ def translate_prompt(text, engine):
|
|
| 150 |
# typhoon: ask the LLM to rewrite as a clean English image prompt
|
| 151 |
msgs = [
|
| 152 |
{"role": "system", "content": "You convert Thai text-to-image prompts "
|
| 153 |
-
"into a single concise, vivid English prompt for
|
| 154 |
-
"
|
| 155 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 156 |
{"role": "user", "content": text},
|
| 157 |
]
|
| 158 |
chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
|
|
@@ -461,9 +466,12 @@ def run_generation(cfg, mode, prompt, negative_prompt, ref_image,
|
|
| 461 |
if seed is not None and int(seed) >= 0:
|
| 462 |
generator = torch.Generator(device=DEVICE).manual_seed(int(seed))
|
| 463 |
|
| 464 |
-
|
| 465 |
-
|
| 466 |
-
|
|
|
|
|
|
|
|
|
|
| 467 |
|
| 468 |
call = dict(
|
| 469 |
prompt=full_prompt,
|
|
|
|
| 150 |
# typhoon: ask the LLM to rewrite as a clean English image prompt
|
| 151 |
msgs = [
|
| 152 |
{"role": "system", "content": "You convert Thai text-to-image prompts "
|
| 153 |
+
"into a single concise, vivid English prompt for a PHOTOREALISTIC Stable "
|
| 154 |
+
"Diffusion model. Describe it as a real candid photograph: keep the subject, "
|
| 155 |
+
"clothing, pose, and scene, and add realistic photographic detail (natural "
|
| 156 |
+
"skin texture and pores, real hair strands, lifelike eyes, soft natural "
|
| 157 |
+
"light). NEVER use illustration/painting/anime/CG words such as 'masterpiece', "
|
| 158 |
+
"'best quality', 'artstation', 'render', '3d', 'anime' or 'painting'. "
|
| 159 |
+
"Output ONLY the English prompt as a comma-separated phrase — no quotes, "
|
| 160 |
+
"no explanation."},
|
| 161 |
{"role": "user", "content": text},
|
| 162 |
]
|
| 163 |
chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
|
|
|
|
| 466 |
if seed is not None and int(seed) >= 0:
|
| 467 |
generator = torch.Generator(device=DEVICE).manual_seed(int(seed))
|
| 468 |
|
| 469 |
+
# Front-load a per-model photographic style prefix (e.g. "RAW photo, film grain")
|
| 470 |
+
# so the strongest realism cue survives CLIP's 77-token truncation. Applied AFTER
|
| 471 |
+
# translation (run_generation gets the already-English prompt), so it reaches the
|
| 472 |
+
# model verbatim regardless of the translator. Then trigger, then the user prompt.
|
| 473 |
+
_parts = [cfg.get("style_prefix"), cfg.get("trigger"), prompt]
|
| 474 |
+
full_prompt = ", ".join(p.strip() for p in _parts if p and str(p).strip()).strip(", ")
|
| 475 |
|
| 476 |
call = dict(
|
| 477 |
prompt=full_prompt,
|