Spaces:
Running on Zero
Running on Zero
Commit ·
a1a4f4b
1
Parent(s): 2d2d472
Full-body fix: shot field first, shorter style_prefix (free CLIP tokens), auto-tall canvas 512x832 on full-body intent (sd15) (#32)
Browse files- Full-body fix: shot field first, shorter style_prefix (free CLIP tokens), auto-tall canvas 512x832 on full-body intent (sd15) (eedbb2557e07ec27d04f6d6ed8f6877ed297862c)
Co-authored-by: pormungtailaw <pormungtai@users.noreply.huggingface.co>
- app.py +5 -1
- models.json +1 -1
- pipeline_manager.py +18 -0
app.py
CHANGED
|
@@ -156,6 +156,10 @@ def build_prompt(subject, age, ethnicity, skin, face, body, hair, eyes, outfit,
|
|
| 156 |
"""Assemble form fields into a prompt. Thai values are fine — the auto-translator
|
| 157 |
turns the whole thing into English at generate time."""
|
| 158 |
parts = []
|
|
|
|
|
|
|
|
|
|
|
|
|
| 159 |
who = (subject or "ผู้หญิง").strip()
|
| 160 |
if ethnicity and ethnicity.strip():
|
| 161 |
who = f"{ethnicity.strip()} {who}"
|
|
@@ -163,7 +167,7 @@ def build_prompt(subject, age, ethnicity, skin, face, body, hair, eyes, outfit,
|
|
| 163 |
who = f"{who} อายุ {str(age).strip()} ปี"
|
| 164 |
parts.append(who)
|
| 165 |
# skin tone + face shape sit right after "who" (core identity), then the rest.
|
| 166 |
-
for v in (skin, face, body, hair, eyes, outfit, pose, expression, scene, lighting
|
| 167 |
if v and str(v).strip():
|
| 168 |
parts.append(str(v).strip())
|
| 169 |
thai = ", ".join(parts)
|
|
|
|
| 156 |
"""Assemble form fields into a prompt. Thai values are fine — the auto-translator
|
| 157 |
turns the whole thing into English at generate time."""
|
| 158 |
parts = []
|
| 159 |
+
# Framing/shot goes FIRST so "full body / head to toe" survives CLIP's 77-token cut
|
| 160 |
+
# (it's the key compositional cue; if truncated, SD1.5 defaults to a portrait crop).
|
| 161 |
+
if shot and str(shot).strip():
|
| 162 |
+
parts.append(str(shot).strip())
|
| 163 |
who = (subject or "ผู้หญิง").strip()
|
| 164 |
if ethnicity and ethnicity.strip():
|
| 165 |
who = f"{ethnicity.strip()} {who}"
|
|
|
|
| 167 |
who = f"{who} อายุ {str(age).strip()} ปี"
|
| 168 |
parts.append(who)
|
| 169 |
# skin tone + face shape sit right after "who" (core identity), then the rest.
|
| 170 |
+
for v in (skin, face, body, hair, eyes, outfit, pose, expression, scene, lighting):
|
| 171 |
if v and str(v).strip():
|
| 172 |
parts.append(str(v).strip())
|
| 173 |
thai = ", ".join(parts)
|
models.json
CHANGED
|
@@ -15,7 +15,7 @@
|
|
| 15 |
"default_height": 768,
|
| 16 |
"sampler": "dpmpp_2m_karras",
|
| 17 |
"vae": "stabilityai/sd-vae-ft-mse",
|
| 18 |
-
"style_prefix": "RAW photo, analog film
|
| 19 |
"hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
|
| 20 |
"neg_embeddings": ["easynegative"],
|
| 21 |
"recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
|
|
|
|
| 15 |
"default_height": 768,
|
| 16 |
"sampler": "dpmpp_2m_karras",
|
| 17 |
"vae": "stabilityai/sd-vae-ft-mse",
|
| 18 |
+
"style_prefix": "RAW photo, analog film grain, detailed skin texture, skin pores, real hair strands",
|
| 19 |
"hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
|
| 20 |
"neg_embeddings": ["easynegative"],
|
| 21 |
"recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
|
pipeline_manager.py
CHANGED
|
@@ -114,6 +114,19 @@ def has_thai(text):
|
|
| 114 |
return any("" <= ch <= "" for ch in (text or ""))
|
| 115 |
|
| 116 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 117 |
def _load_translator(engine):
|
| 118 |
if engine in _TRANSLATOR_CACHE:
|
| 119 |
return _TRANSLATOR_CACHE[engine]
|
|
@@ -473,6 +486,11 @@ def run_generation(cfg, mode, prompt, negative_prompt, ref_image,
|
|
| 473 |
_parts = [cfg.get("style_prefix"), cfg.get("trigger"), prompt]
|
| 474 |
full_prompt = ", ".join(p.strip() for p in _parts if p and str(p).strip()).strip(", ")
|
| 475 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 476 |
call = dict(
|
| 477 |
prompt=full_prompt,
|
| 478 |
num_inference_steps=int(steps),
|
|
|
|
| 114 |
return any("" <= ch <= "" for ch in (text or ""))
|
| 115 |
|
| 116 |
|
| 117 |
+
# Full-body framing cues — if present, SD1.5's 512x768 canvas crops to a portrait,
|
| 118 |
+
# so we give the canvas more vertical room (see run_generation). Checked on the
|
| 119 |
+
# already-translated English prompt; Thai เต็มตัว/ทั้งตัว included as a safety net.
|
| 120 |
+
_FULL_BODY_CUES = ("full body", "full-body", "head to toe", "head-to-toe",
|
| 121 |
+
"full length", "full-length", "full shot", "entire body",
|
| 122 |
+
"whole body", "standing", "เต็มตัว", "ทั้งตัว", "เห็นเท้า")
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def wants_full_body(text):
|
| 126 |
+
t = (text or "").lower()
|
| 127 |
+
return any(c in t for c in _FULL_BODY_CUES)
|
| 128 |
+
|
| 129 |
+
|
| 130 |
def _load_translator(engine):
|
| 131 |
if engine in _TRANSLATOR_CACHE:
|
| 132 |
return _TRANSLATOR_CACHE[engine]
|
|
|
|
| 486 |
_parts = [cfg.get("style_prefix"), cfg.get("trigger"), prompt]
|
| 487 |
full_prompt = ", ".join(p.strip() for p in _parts if p and str(p).strip()).strip(", ")
|
| 488 |
|
| 489 |
+
# Full-body framing fix: on SD1.5 a 512x768 canvas crops standing/seated subjects
|
| 490 |
+
# to a portrait even when "full body" is requested. Give it more vertical room.
|
| 491 |
+
if base == "sd15" and mode == "txt2img" and wants_full_body(prompt):
|
| 492 |
+
height = max(int(height), 832)
|
| 493 |
+
|
| 494 |
call = dict(
|
| 495 |
prompt=full_prompt,
|
| 496 |
num_inference_steps=int(steps),
|