Spaces:
Running on Zero
Running on Zero
Commit ·
0446524
1
Parent(s): a1a4f4b
Fix scene truncation: front-load scene/lighting/composition in build_prompt + tighter compact-tag Typhoon output (fewer tokens, keep location) (#33)
Browse files- Fix scene truncation: front-load scene/lighting/composition in build_prompt + tighter compact-tag Typhoon output (fewer tokens, keep location) (84c945dd6150562415f83c5db317982bf6c1a838)
Co-authored-by: pormungtailaw <pormungtai@users.noreply.huggingface.co>
- app.py +4 -2
- pipeline_manager.py +11 -9
app.py
CHANGED
|
@@ -166,8 +166,10 @@ def build_prompt(subject, age, ethnicity, skin, face, body, hair, eyes, outfit,
|
|
| 166 |
if age and str(age).strip():
|
| 167 |
who = f"{who} อายุ {str(age).strip()} ปี"
|
| 168 |
parts.append(who)
|
| 169 |
-
#
|
| 170 |
-
|
|
|
|
|
|
|
| 171 |
if v and str(v).strip():
|
| 172 |
parts.append(str(v).strip())
|
| 173 |
thai = ", ".join(parts)
|
|
|
|
| 166 |
if age and str(age).strip():
|
| 167 |
who = f"{who} อายุ {str(age).strip()} ปี"
|
| 168 |
parts.append(who)
|
| 169 |
+
# Priority order for CLIP's 77-token budget: compositional anchors first
|
| 170 |
+
# (location/lighting/outfit/pose), fine appearance details last (least harmful
|
| 171 |
+
# if truncated). Skin texture realism is carried by the model's style_prefix anyway.
|
| 172 |
+
for v in (scene, lighting, outfit, pose, expression, body, hair, skin, face, eyes):
|
| 173 |
if v and str(v).strip():
|
| 174 |
parts.append(str(v).strip())
|
| 175 |
thai = ", ".join(parts)
|
pipeline_manager.py
CHANGED
|
@@ -162,15 +162,17 @@ def translate_prompt(text, engine):
|
|
| 162 |
return tok.batch_decode(out, skip_special_tokens=True)[0].strip()
|
| 163 |
# typhoon: ask the LLM to rewrite as a clean English image prompt
|
| 164 |
msgs = [
|
| 165 |
-
{"role": "system", "content": "You convert Thai text-to-image prompts "
|
| 166 |
-
"
|
| 167 |
-
"
|
| 168 |
-
"
|
| 169 |
-
"
|
| 170 |
-
"
|
| 171 |
-
"
|
| 172 |
-
"
|
| 173 |
-
"
|
|
|
|
|
|
|
| 174 |
{"role": "user", "content": text},
|
| 175 |
]
|
| 176 |
chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
|
|
|
|
| 162 |
return tok.batch_decode(out, skip_special_tokens=True)[0].strip()
|
| 163 |
# typhoon: ask the LLM to rewrite as a clean English image prompt
|
| 164 |
msgs = [
|
| 165 |
+
{"role": "system", "content": "You convert Thai text-to-image prompts into "
|
| 166 |
+
"an English prompt for a PHOTOREALISTIC Stable Diffusion model. Output a "
|
| 167 |
+
"COMPACT comma-separated list of English tags / short phrases (booru-tag "
|
| 168 |
+
"style) — NOT full sentences. Omit articles and filler words (a, an, the, "
|
| 169 |
+
"with, that is). Keep it short to fit a 77-token limit, but INCLUDE EVERY "
|
| 170 |
+
"detail from the input — especially the location/scene, camera framing "
|
| 171 |
+
"(e.g. full body), clothing and pose; never drop the setting. Treat it as a "
|
| 172 |
+
"real candid photograph (natural skin texture, real hair, lifelike eyes, "
|
| 173 |
+
"natural light). NEVER use illustration/painting/anime/CG words such as "
|
| 174 |
+
"'masterpiece', 'best quality', 'render', '3d', 'anime' or 'painting'. "
|
| 175 |
+
"Output ONLY the comma-separated tags — no quotes, no explanation."},
|
| 176 |
{"role": "user", "content": text},
|
| 177 |
]
|
| 178 |
chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
|