pormungtai commited on
Commit
cd243ab
·
verified ·
1 Parent(s): 817ab19

majicMIX photoreal pass: drop art tokens (masterpiece/best quality), add photographic style_prefix + sd-vae-ft-mse VAE, photo-focused Typhoon prompt, CFG 5.0

Browse files
Files changed (3) hide show
  1. app.py +3 -2
  2. models.json +5 -3
  3. pipeline_manager.py +14 -6
app.py CHANGED
@@ -166,8 +166,9 @@ def build_prompt(subject, age, ethnicity, body, hair, eyes, outfit, pose,
166
  if v and str(v).strip():
167
  parts.append(str(v).strip())
168
  thai = ", ".join(parts)
169
- # English quality suffix (kept in English; the model's negative handles the rest).
170
- return thai + ", photorealistic, masterpiece, best quality, ultra-detailed, sharp focus"
 
171
 
172
 
173
  # ---------------------------------------------------------------------------
 
166
  if v and str(v).strip():
167
  parts.append(str(v).strip())
168
  thai = ", ".join(parts)
169
+ # Photographic suffix (NOT "masterpiece/best quality" those art tokens push the model
170
+ # toward an illustrated/CG look. The model's style_prefix + negative do the heavy lifting).
171
+ return thai + ", realistic candid photograph, natural skin texture, soft natural light"
172
 
173
 
174
  # ---------------------------------------------------------------------------
models.json CHANGED
@@ -14,12 +14,14 @@
14
  "default_width": 512,
15
  "default_height": 768,
16
  "sampler": "dpmpp_2m_karras",
 
 
17
  "hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
18
  "neg_embeddings": ["easynegative"],
19
  "recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
20
- "negative_prompt": "EasyNegative, (worst quality, low quality:1.4), (extra limbs, extra legs, three legs, extra arms, missing limbs, fused limbs:1.4), (mutated hands, missing fingers, extra fingers, fused fingers:1.3), (bad anatomy, deformed, mutated, disfigured:1.2), (plastic skin, airbrushed, doll, 3d, render:1.2), bad feet, extra toes, lowres, watermark, text",
21
- "default_steps": 28,
22
- "default_guidance": 5.5,
23
  "enabled": true
24
  },
25
  {
 
14
  "default_width": 512,
15
  "default_height": 768,
16
  "sampler": "dpmpp_2m_karras",
17
+ "vae": "stabilityai/sd-vae-ft-mse",
18
+ "style_prefix": "RAW photo, analog film photograph, film grain, detailed skin texture, skin pores, fine hair strands, detailed eyes, catchlight, natural nails",
19
  "hires": { "scale": 1.3, "denoise": 0.3, "steps": 12 },
20
  "neg_embeddings": ["easynegative"],
21
  "recommended_prompt": "RAW photo, (photorealistic:1.2), portrait of a beautiful woman, detailed skin texture, visible skin pores, natural skin, subsurface scattering, film grain, soft cinematic light, 85mm, depth of field, ultra detailed, analog photo",
22
+ "negative_prompt": "EasyNegative, (worst quality, low quality:1.4), (extra limbs, extra legs, three legs, extra arms, missing limbs, fused limbs:1.4), (mutated hands, missing fingers, extra fingers, fused fingers:1.3), (bad anatomy, deformed, mutated, disfigured:1.2), (plastic skin, airbrushed, doll, smooth skin, 3d, render, illustration, painting:1.2), bad feet, extra toes, lowres, watermark, text",
23
+ "default_steps": 30,
24
+ "default_guidance": 5.0,
25
  "enabled": true
26
  },
27
  {
pipeline_manager.py CHANGED
@@ -150,9 +150,14 @@ def translate_prompt(text, engine):
150
  # typhoon: ask the LLM to rewrite as a clean English image prompt
151
  msgs = [
152
  {"role": "system", "content": "You convert Thai text-to-image prompts "
153
- "into a single concise, vivid English prompt for Stable Diffusion. "
154
- "Keep the described subject, clothing, pose, and scene. Output ONLY the "
155
- "English prompt as a comma-separated phrase no quotes, no explanation."},
 
 
 
 
 
156
  {"role": "user", "content": text},
157
  ]
158
  chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
@@ -461,9 +466,12 @@ def run_generation(cfg, mode, prompt, negative_prompt, ref_image,
461
  if seed is not None and int(seed) >= 0:
462
  generator = torch.Generator(device=DEVICE).manual_seed(int(seed))
463
 
464
- full_prompt = prompt
465
- if cfg.get("trigger"):
466
- full_prompt = f"{cfg['trigger']}, {prompt}".strip(", ")
 
 
 
467
 
468
  call = dict(
469
  prompt=full_prompt,
 
150
  # typhoon: ask the LLM to rewrite as a clean English image prompt
151
  msgs = [
152
  {"role": "system", "content": "You convert Thai text-to-image prompts "
153
+ "into a single concise, vivid English prompt for a PHOTOREALISTIC Stable "
154
+ "Diffusion model. Describe it as a real candid photograph: keep the subject, "
155
+ "clothing, pose, and scene, and add realistic photographic detail (natural "
156
+ "skin texture and pores, real hair strands, lifelike eyes, soft natural "
157
+ "light). NEVER use illustration/painting/anime/CG words such as 'masterpiece', "
158
+ "'best quality', 'artstation', 'render', '3d', 'anime' or 'painting'. "
159
+ "Output ONLY the English prompt as a comma-separated phrase — no quotes, "
160
+ "no explanation."},
161
  {"role": "user", "content": text},
162
  ]
163
  chat = tok.apply_chat_template(msgs, add_generation_prompt=True, tokenize=False)
 
466
  if seed is not None and int(seed) >= 0:
467
  generator = torch.Generator(device=DEVICE).manual_seed(int(seed))
468
 
469
+ # Front-load a per-model photographic style prefix (e.g. "RAW photo, film grain")
470
+ # so the strongest realism cue survives CLIP's 77-token truncation. Applied AFTER
471
+ # translation (run_generation gets the already-English prompt), so it reaches the
472
+ # model verbatim regardless of the translator. Then trigger, then the user prompt.
473
+ _parts = [cfg.get("style_prefix"), cfg.get("trigger"), prompt]
474
+ full_prompt = ", ".join(p.strip() for p in _parts if p and str(p).strip()).strip(", ")
475
 
476
  call = dict(
477
  prompt=full_prompt,