Malaji71 commited on
Commit
5dbe6d2
·
verified ·
1 Parent(s): df0c639

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +417 -84
app.py CHANGED
@@ -3,6 +3,15 @@ import torch
3
  from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
4
  from PIL import Image
5
  import random
 
 
 
 
 
 
 
 
 
6
 
7
  # Check GPU availability
8
  use_gpu = torch.cuda.is_available()
@@ -13,7 +22,7 @@ processor, model, zephyr_generator = None, None, None
13
  def load_models():
14
  """Load models only when needed"""
15
  global processor, model, zephyr_generator
16
- if processor is None or model is None or zephyr_generator is None:
17
  print("Loading BLIP model...")
18
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
19
  model = BlipForConditionalGeneration.from_pretrained(
@@ -22,14 +31,91 @@ def load_models():
22
  )
23
  print("✅ BLIP model loaded successfully!")
24
 
25
- print("Loading SARA-Zephyr fine-tuned model...")
26
- zephyr_generator = pipeline(
27
- "text-generation",
28
- model="Malaji71/SARA-Zephyr", # Cambiado al modelo fine-tuned
29
- torch_dtype=torch.float32, # Use float32 for CPU
30
- device_map="auto" if use_gpu else None # Use auto device mapping if GPU available
31
- )
32
- print("✅ SARA-Zephyr fine-tuned model loaded successfully!")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  # Universal Video Prompting Guide combining Gen-4 + SARA
35
  unified_instructions = """
@@ -68,7 +154,12 @@ def analyze_image_with_zephyr(image):
68
  return "Please upload an image first.", {}
69
  try:
70
  # Lazy load models
71
- load_models()
 
 
 
 
 
72
 
73
  # Convert to PIL if needed
74
  if not isinstance(image, Image.Image):
@@ -115,11 +206,32 @@ def analyze_image_with_zephyr(image):
115
  }
116
  return analysis, scene_info
117
  except Exception as e:
 
118
  return f"Error analyzing image: {str(e)}", {}
119
 
120
  def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
121
- """Use SARA-Zephyr for advanced scene analysis"""
122
- analysis_prompt = f"""<|system|>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
123
  You are a video prompt engineering expert specializing in the SARA framework. Analyze this image description for video creation potential.
124
  <|user|>
125
  Image description: "{basic_caption}"
@@ -132,34 +244,82 @@ Please provide:
132
  4. Best prompting approach (SARA vs Gen-4)
133
  Be concise and practical.
134
  <|assistant|>"""
135
- response = zephyr_generator(
136
- analysis_prompt,
137
- max_new_tokens=200,
138
- do_sample=True,
139
- temperature=0.7,
140
- pad_token_id=zephyr_generator.tokenizer.eos_token_id
141
- )
142
- ai_analysis = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
143
- lines = ai_analysis.split('\n')
144
- motion_insights = []
145
- recommended_approach = "SARA framework recommended for precise control"
146
- for line in lines:
147
- if line.strip():
148
- if any(keyword in line.lower() for keyword in ['motion', 'movement', 'camera', 'lighting']):
149
- motion_insights.append(line.strip('- ').strip())
150
- elif 'sara' in line.lower() or 'gen-4' in line.lower():
151
- recommended_approach = line.strip('- ').strip()
152
- return {
153
- 'scene_interpretation': ai_analysis.split('\n')[0] if ai_analysis else "Scene analysis completed",
154
- 'motion_insights': motion_insights[:6],
155
- 'recommended_approach': recommended_approach
156
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
157
 
158
  def generate_sample_prompts_with_zephyr(scene_info=None):
159
- """Generate sample prompts using SARA-Zephyr"""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
160
  if scene_info and scene_info.get('basic_description'):
161
- # Use Zephyr to generate contextual prompts
162
- context_prompt = f"""<|system|>
 
163
  Generate 3 professional video prompts using the SARA framework based on this image analysis.
164
  <|user|>
165
  Image description: {scene_info['basic_description']}
@@ -168,20 +328,33 @@ Aspect Ratio: {scene_info.get('aspect_ratio', 'N/A'):.2f}
168
  Remember the SARA framework: Subject + Action + Reference + Atmosphere
169
  <|assistant|>"""
170
 
171
- response = zephyr_generator(
172
- context_prompt,
173
- max_new_tokens=200,
174
- do_sample=True,
175
- temperature=0.8,
176
- pad_token_id=zephyr_generator.tokenizer.eos_token_id
177
- )
 
178
 
179
- # Extract and clean prompts
180
- prompts_text = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
181
- prompts = [p.strip('123.-• ') for p in prompts_text.split('\n') if p.strip()]
182
- # Return first 3 clean prompts
183
- if len(prompts) >= 3:
184
- return prompts[:3]
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
  # Fallback prompts if Zephyr fails or no scene info
187
  base_prompts = [
@@ -192,17 +365,30 @@ Remember the SARA framework: Subject + Action + Reference + Atmosphere
192
  return base_prompts
193
 
194
  def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
195
- """Optimize user's prompt idea using SARA-Zephyr while respecting SARA/Gen-4 structure"""
196
  if not user_idea.strip():
197
- return "Please enter your idea first."
 
 
 
 
 
 
 
 
 
 
 
 
198
 
199
  # Create context from scene if available
200
  context = ""
201
  if scene_info and scene_info.get('basic_description'):
202
  context = f"Image context: {scene_info['basic_description']}"
203
 
204
- # Enforce structure based on approach
205
- optimization_prompt = f"""<|system|>
 
206
  You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
207
  Key principles:
208
  - Focus on MOTION, not static description
@@ -216,30 +402,139 @@ User's idea: "{user_idea}"
216
  Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
217
  <|assistant|>"""
218
 
219
- response = zephyr_generator(
220
- optimization_prompt,
221
- max_new_tokens=100,
222
- do_sample=True,
223
- temperature=0.7,
224
- pad_token_id=zephyr_generator.tokenizer.eos_token_id
225
- )
 
226
 
227
- # Extract optimized prompt
228
- optimized = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
229
- return optimized
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
230
 
231
  def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info=None):
232
- """Refine a prompt based on user feedback using SARA-Zephyr"""
233
  if not feedback.strip():
234
  return current_prompt, chat_history
 
 
 
 
 
 
 
 
 
 
 
235
 
236
  # Create refinement context
237
  context = ""
238
  if scene_info and scene_info.get('basic_description'):
239
  context = f"Image context: {scene_info['basic_description']}"
240
 
241
- # Construct Zephyr refinement prompt
242
- refinement_prompt = f"""<|system|>
 
243
  You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
244
  Key principles:
245
  - Focus on MOTION, not static description
@@ -254,20 +549,34 @@ Feedback: "{feedback}"
254
  Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
255
  <|assistant|>"""
256
 
257
- response = zephyr_generator(
258
- refinement_prompt,
259
- max_new_tokens=100,
260
- do_sample=True,
261
- temperature=0.7,
262
- pad_token_id=zephyr_generator.tokenizer.eos_token_id
263
- )
264
-
265
- # Extract refined prompt
266
- refined = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
267
 
268
- # Update chat history
269
- new_chat_history = chat_history + [[feedback, refined]]
270
- return refined, new_chat_history
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
271
 
272
  def generate_gen4_prompts(scene_info, foundation=""):
273
  """Generate Gen-4 style prompts iteratively"""
@@ -371,9 +680,16 @@ def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion,
371
  # Create the Gradio interface
372
  def create_interface():
373
  """Create the Gradio interface"""
 
 
 
 
 
 
 
374
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Video Prompt Generator") as demo:
375
  # Header
376
- gr.Markdown("# 🎬 AI Video Prompt Generator - 🤖 SARA-Zephyr AI Powered")
377
  gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
378
 
379
  # State variables
@@ -434,6 +750,13 @@ def create_interface():
434
  lines=3
435
  )
436
  optimize_btn = gr.Button("🚀 Generate Optimized Prompt", variant="primary")
 
 
 
 
 
 
 
437
  optimized_prompt = gr.Textbox(
438
  label="AI-Optimized Video Prompt",
439
  lines=4,
@@ -526,7 +849,12 @@ def create_interface():
526
  optimize_btn.click(
527
  fn=optimize_user_prompt_with_zephyr,
528
  inputs=[user_idea, scene_state],
529
- outputs=[optimized_prompt]
 
 
 
 
 
530
  )
531
  refine_btn.click(
532
  fn=refine_prompt_with_zephyr,
@@ -553,7 +881,7 @@ def create_interface():
553
 
554
  # Launch the app
555
  if __name__ == "__main__":
556
- print("🎬 Starting AI Video Prompt Generator with SARA-Zephyr...")
557
  print(f"📊 Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
558
  print("🔧 Loading models (this may take a few minutes)...")
559
  try:
@@ -571,10 +899,15 @@ if __name__ == "__main__":
571
  print(f"❌ Error launching app: {e}")
572
  print("🔧 Make sure you have sufficient CPU resources and all dependencies installed.")
573
  print("📦 Required packages:")
574
- print(" pip install torch transformers gradio pillow accelerate bitsandbytes")
575
  # Alternative launch attempt
576
  print("\n🔄 Attempting alternative launch...")
577
  try:
 
 
 
 
 
578
  demo = create_interface()
579
  demo.launch(
580
  share=False,
 
3
  from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
4
  from PIL import Image
5
  import random
6
+ import os
7
+
8
+ # Instalar dependencias necesarias si no están presentes
9
+ try:
10
+ import peft
11
+ except ImportError:
12
+ print("Instalando peft...")
13
+ os.system("pip install -q peft")
14
+ import peft
15
 
16
  # Check GPU availability
17
  use_gpu = torch.cuda.is_available()
 
22
  def load_models():
23
  """Load models only when needed"""
24
  global processor, model, zephyr_generator
25
+ try:
26
  print("Loading BLIP model...")
27
  processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
28
  model = BlipForConditionalGeneration.from_pretrained(
 
31
  )
32
  print("✅ BLIP model loaded successfully!")
33
 
34
+ print("Loading SARA-Zephyr adapter model...")
35
+ try:
36
+ # Cargar el modelo base Zephyr primero
37
+ from transformers import AutoModelForCausalLM, AutoTokenizer
38
+ from peft import PeftModel, PeftConfig
39
+
40
+ # Cargar tokenizer del modelo base
41
+ tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
42
+
43
+ # Cargar modelo base
44
+ base_model = AutoModelForCausalLM.from_pretrained(
45
+ "HuggingFaceH4/zephyr-7b-beta",
46
+ torch_dtype=torch.float32,
47
+ device_map="auto" if use_gpu else None
48
+ )
49
+
50
+ # Cargar configuración del adaptador
51
+ try:
52
+ # Si está usando un repositorio en HuggingFace
53
+ adapter_config = PeftConfig.from_pretrained("Malaji71/SARA-Zephyr")
54
+
55
+ # Cargar el adaptador sobre el modelo base
56
+ peft_model = PeftModel.from_pretrained(
57
+ base_model,
58
+ "Malaji71/SARA-Zephyr"
59
+ )
60
+
61
+ print("✅ PEFT adapter loaded from HuggingFace!")
62
+
63
+ except Exception as e:
64
+ print(f"Error loading from HuggingFace: {str(e)}")
65
+ print("Trying to load adapter locally...")
66
+
67
+ # Intentar cargar localmente si está disponible
68
+ local_adapter_path = "./SARA-Zephyr" # Ajustar según sea necesario
69
+
70
+ try:
71
+ adapter_config = PeftConfig.from_pretrained(local_adapter_path)
72
+ peft_model = PeftModel.from_pretrained(
73
+ base_model,
74
+ local_adapter_path
75
+ )
76
+ print("✅ PEFT adapter loaded locally!")
77
+ except Exception as e2:
78
+ print(f"Error loading adapter locally: {str(e2)}")
79
+ print("Falling back to base model...")
80
+ peft_model = base_model
81
+
82
+ # Crear pipeline con el modelo adaptado
83
+ zephyr_generator = pipeline(
84
+ "text-generation",
85
+ model=peft_model,
86
+ tokenizer=tokenizer,
87
+ torch_dtype=torch.float32
88
+ )
89
+
90
+ # Verificar que el pipeline se haya creado correctamente
91
+ if zephyr_generator is None or not hasattr(zephyr_generator, 'tokenizer'):
92
+ raise ValueError("Pipeline creation failed or doesn't have tokenizer attribute")
93
+
94
+ print("✅ SARA-Zephyr adapter model loaded successfully!")
95
+ return True
96
+
97
+ except Exception as e:
98
+ print(f"Error loading SARA-Zephyr adapter: {str(e)}")
99
+ print("Falling back to standard Zephyr model...")
100
+
101
+ # Modelo de respaldo en caso de error
102
+ zephyr_generator = pipeline(
103
+ "text-generation",
104
+ model="HuggingFaceH4/zephyr-7b-beta",
105
+ torch_dtype=torch.float32,
106
+ device_map="auto" if use_gpu else None
107
+ )
108
+
109
+ # Verificar que el pipeline de respaldo se haya creado correctamente
110
+ if zephyr_generator is None or not hasattr(zephyr_generator, 'tokenizer'):
111
+ raise ValueError("Fallback pipeline creation failed or doesn't have tokenizer attribute")
112
+
113
+ print("✅ Fallback Zephyr model loaded successfully!")
114
+ return True
115
+
116
+ except Exception as e:
117
+ print(f"❌ Critical error loading models: {str(e)}")
118
+ return False
119
 
120
  # Universal Video Prompting Guide combining Gen-4 + SARA
121
  unified_instructions = """
 
154
  return "Please upload an image first.", {}
155
  try:
156
  # Lazy load models
157
+ load_success = load_models()
158
+ if not load_success:
159
+ return "Error: Model loading failed. Please try again later.", {}
160
+
161
+ if processor is None or model is None:
162
+ return "Error: Image analysis model failed to load. Please try again.", {}
163
 
164
  # Convert to PIL if needed
165
  if not isinstance(image, Image.Image):
 
206
  }
207
  return analysis, scene_info
208
  except Exception as e:
209
+ print(f"Error in analyze_image_with_zephyr: {str(e)}")
210
  return f"Error analyzing image: {str(e)}", {}
211
 
212
  def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
213
+ """Use Zephyr with SARA framework for advanced scene analysis"""
214
+ # Verificar que el modelo está cargado
215
+ if zephyr_generator is None:
216
+ # Intenta cargar los modelos si no están cargados
217
+ success = load_models()
218
+ if not success:
219
+ return {
220
+ 'scene_interpretation': "Error: Unable to load text generation model.",
221
+ 'motion_insights': ["Model loading failed. Please try again."],
222
+ 'recommended_approach': "Unable to determine approach due to model loading error."
223
+ }
224
+
225
+ # Verificar que zephyr_generator tiene el atributo tokenizer
226
+ if not hasattr(zephyr_generator, 'tokenizer'):
227
+ return {
228
+ 'scene_interpretation': "Error: Text generation model is not properly initialized.",
229
+ 'motion_insights': ["Model initialization failed. Please restart the application."],
230
+ 'recommended_approach': "Unable to determine approach due to model initialization error."
231
+ }
232
+
233
+ try:
234
+ analysis_prompt = f"""<|system|>
235
  You are a video prompt engineering expert specializing in the SARA framework. Analyze this image description for video creation potential.
236
  <|user|>
237
  Image description: "{basic_caption}"
 
244
  4. Best prompting approach (SARA vs Gen-4)
245
  Be concise and practical.
246
  <|assistant|>"""
247
+
248
+ response = zephyr_generator(
249
+ analysis_prompt,
250
+ max_new_tokens=200,
251
+ do_sample=True,
252
+ temperature=0.7,
253
+ top_k=50,
254
+ top_p=0.95
255
+ )
256
+
257
+ # Extract generated text
258
+ if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
259
+ generated_text = response[0]["generated_text"]
260
+ # Extraer solo la respuesta del asistente
261
+ if "<|assistant|>" in generated_text:
262
+ ai_analysis = generated_text.split("<|assistant|>")[-1].strip()
263
+ else:
264
+ # Intentar extraer la última parte del texto si no encontramos la etiqueta
265
+ ai_analysis = generated_text.split(analysis_prompt)[-1].strip()
266
+
267
+ lines = ai_analysis.split('\n')
268
+ motion_insights = []
269
+ recommended_approach = "SARA framework recommended for precise control"
270
+
271
+ for line in lines:
272
+ if line.strip():
273
+ if any(keyword in line.lower() for keyword in ['motion', 'movement', 'camera', 'lighting']):
274
+ motion_insights.append(line.strip('- ').strip())
275
+ elif 'sara' in line.lower() or 'gen-4' in line.lower():
276
+ recommended_approach = line.strip('- ').strip()
277
+
278
+ return {
279
+ 'scene_interpretation': ai_analysis.split('\n')[0] if ai_analysis else "Scene analysis completed",
280
+ 'motion_insights': motion_insights[:6] if motion_insights else ["Smooth cinematic movement", "Steady camera tracking", "Natural lighting transitions"],
281
+ 'recommended_approach': recommended_approach
282
+ }
283
+ else:
284
+ return {
285
+ 'scene_interpretation': "Unable to generate analysis with current model.",
286
+ 'motion_insights': ["Default: Smooth motion", "Default: Stable camera work", "Default: Natural lighting"],
287
+ 'recommended_approach': "SARA framework recommended as default"
288
+ }
289
+
290
+ except Exception as e:
291
+ print(f"Error in analyze_scene_with_zephyr: {str(e)}")
292
+ return {
293
+ 'scene_interpretation': f"Error analyzing scene: {str(e)}",
294
+ 'motion_insights': ["Error occurred during analysis", "Using default recommendations", "Try simplifying the image"],
295
+ 'recommended_approach': "SARA framework recommended (default)"
296
+ }
297
 
298
  def generate_sample_prompts_with_zephyr(scene_info=None):
299
+ """Generate sample prompts using Zephyr with SARA framework"""
300
+ # Verificar que el modelo está cargado
301
+ if zephyr_generator is None:
302
+ # Intenta cargar los modelos si no están cargados
303
+ success = load_models()
304
+ if not success:
305
+ return [
306
+ "Error: Unable to load text generation model. Please try again.",
307
+ "Default prompt: The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
308
+ "Default prompt: A dramatic close-up captures the subject's expression as they speak directly to the camera."
309
+ ]
310
+
311
+ # Verificar que zephyr_generator tiene el atributo tokenizer
312
+ if not hasattr(zephyr_generator, 'tokenizer'):
313
+ return [
314
+ "Error: Text generation model is not properly initialized. Please restart the application.",
315
+ "Default prompt: The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
316
+ "Default prompt: A dramatic close-up captures the subject's expression as they speak directly to the camera."
317
+ ]
318
+
319
  if scene_info and scene_info.get('basic_description'):
320
+ try:
321
+ # Use Zephyr to generate contextual prompts
322
+ context_prompt = f"""<|system|>
323
  Generate 3 professional video prompts using the SARA framework based on this image analysis.
324
  <|user|>
325
  Image description: {scene_info['basic_description']}
 
328
  Remember the SARA framework: Subject + Action + Reference + Atmosphere
329
  <|assistant|>"""
330
 
331
+ response = zephyr_generator(
332
+ context_prompt,
333
+ max_new_tokens=200,
334
+ do_sample=True,
335
+ temperature=0.8,
336
+ top_k=50,
337
+ top_p=0.95
338
+ )
339
 
340
+ # Extract generated text
341
+ if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
342
+ generated_text = response[0]["generated_text"]
343
+ # Extraer solo la respuesta del asistente
344
+ if "<|assistant|>" in generated_text:
345
+ prompts_text = generated_text.split("<|assistant|>")[-1].strip()
346
+ else:
347
+ # Intentar extraer la última parte del texto si no encontramos la etiqueta
348
+ prompts_text = generated_text.split(context_prompt)[-1].strip()
349
+
350
+ # Extract and clean prompts
351
+ prompts = [p.strip('123.-• ') for p in prompts_text.split('\n') if p.strip()]
352
+ # Return first 3 clean prompts
353
+ if len(prompts) >= 3:
354
+ return prompts[:3]
355
+ except Exception as e:
356
+ print(f"Error in generate_sample_prompts_with_zephyr: {str(e)}")
357
+ # Continue to fallback prompts if there's an error
358
 
359
  # Fallback prompts if Zephyr fails or no scene info
360
  base_prompts = [
 
365
  return base_prompts
366
 
367
  def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
368
+ """Optimize user's prompt idea using SARA framework with Zephyr model"""
369
  if not user_idea.strip():
370
+ return "Please enter your idea first.", "No input provided"
371
+
372
+ # Verificar que el modelo está cargado
373
+ if zephyr_generator is None:
374
+ # Intenta cargar los modelos si no están cargados
375
+ success = load_models()
376
+ if not success:
377
+ return "Error: Unable to load text generation model. Please try again or use Retry button.", "Model loading failed"
378
+
379
+ # Verificar que zephyr_generator tiene el atributo tokenizer
380
+ if not hasattr(zephyr_generator, 'tokenizer'):
381
+ return ("Error: Text generation model is not properly initialized. Please restart the application or use Retry button.",
382
+ "Model initialization failed")
383
 
384
  # Create context from scene if available
385
  context = ""
386
  if scene_info and scene_info.get('basic_description'):
387
  context = f"Image context: {scene_info['basic_description']}"
388
 
389
+ try:
390
+ # Enforce structure based on approach
391
+ optimization_prompt = f"""<|system|>
392
  You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
393
  Key principles:
394
  - Focus on MOTION, not static description
 
402
  Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
403
  <|assistant|>"""
404
 
405
+ response = zephyr_generator(
406
+ optimization_prompt,
407
+ max_new_tokens=100,
408
+ do_sample=True,
409
+ temperature=0.7,
410
+ top_k=50,
411
+ top_p=0.95
412
+ )
413
 
414
+ # Extract optimized prompt
415
+ if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
416
+ generated_text = response[0]["generated_text"]
417
+ # Extraer solo la respuesta del asistente
418
+ if "<|assistant|>" in generated_text:
419
+ optimized = generated_text.split("<|assistant|>")[-1].strip()
420
+ else:
421
+ # Intentar extraer la última parte del texto si no encontramos la etiqueta
422
+ optimized = generated_text.split(optimization_prompt)[-1].strip()
423
+ return optimized, "SARA-Zephyr model used successfully"
424
+ else:
425
+ return ("Error processing your idea. Please try again with a different description or use Retry button.",
426
+ "Invalid model response format")
427
+ except Exception as e:
428
+ print(f"Error in optimize_user_prompt_with_zephyr: {str(e)}")
429
+ return (f"Error generating prompt: {str(e)}. Please try again with a simpler description or use Retry button.",
430
+ f"Error: {str(e)}")
431
+
432
+ def fallback_generate_prompt(user_idea, scene_info=None):
433
+ """Función de respaldo para generar prompts cuando el modelo principal falla"""
434
+ if not user_idea.strip():
435
+ return "Please enter your idea first."
436
+
437
+ try:
438
+ # Crear un generador de respaldo específico para esta función
439
+ from transformers import pipeline
440
+ import torch
441
+
442
+ fallback_generator = pipeline(
443
+ "text-generation",
444
+ model="HuggingFaceH4/zephyr-7b-beta",
445
+ torch_dtype=torch.float32,
446
+ device_map="auto" if torch.cuda.is_available() else None
447
+ )
448
+
449
+ # Create context from scene if available
450
+ context = ""
451
+ if scene_info and scene_info.get('basic_description'):
452
+ context = f"Image context: {scene_info['basic_description']}"
453
+
454
+ # Enforce structure based on approach
455
+ optimization_prompt = f"""<|system|>
456
+ You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
457
+ Key principles:
458
+ - Focus on MOTION, not static description
459
+ - Use positive phrasing
460
+ - Be specific about camera work
461
+ - Include lighting/atmosphere details
462
+ - Follow the SARA structure: Subject + Action + Reference + Atmosphere
463
+ <|user|>
464
+ User's idea: "{user_idea}"
465
+ {context}
466
+ Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
467
+ <|assistant|>"""
468
+
469
+ response = fallback_generator(
470
+ optimization_prompt,
471
+ max_new_tokens=100,
472
+ do_sample=True,
473
+ temperature=0.7,
474
+ top_k=50,
475
+ top_p=0.95
476
+ )
477
+
478
+ # Extract optimized prompt
479
+ if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
480
+ generated_text = response[0]["generated_text"]
481
+ # Extraer solo la respuesta del asistente
482
+ if "<|assistant|>" in generated_text:
483
+ optimized = generated_text.split("<|assistant|>")[-1].strip()
484
+ else:
485
+ # Intentar extraer la última parte del texto si no encontramos la etiqueta
486
+ optimized = generated_text.split(optimization_prompt)[-1].strip()
487
+ return optimized
488
+ else:
489
+ return "Error processing your idea with the fallback model. Here's a template: Subject walks smoothly while camera remains steady, cinematic atmosphere."
490
+
491
+ except Exception as e:
492
+ print(f"Error in fallback_generate_prompt: {str(e)}")
493
+ # Generación manual de respaldo en caso de error total
494
+ words = user_idea.strip().split()
495
+ if len(words) > 2:
496
+ subject = "The subject"
497
+ if "man" in words or "boy" in words:
498
+ subject = "The man"
499
+ elif "woman" in words or "girl" in words:
500
+ subject = "The woman"
501
+ elif "child" in words or "kid" in words:
502
+ subject = "The child"
503
+
504
+ action = "moves naturally"
505
+ for verb in ["walk", "run", "jump", "sit", "stand", "dance", "move", "turn"]:
506
+ if any(verb in word.lower() for word in words):
507
+ action = verb + "s smoothly"
508
+ break
509
+
510
+ return f"{subject} {action} while camera remains steady, cinematic atmosphere."
511
+ else:
512
+ return "The subject moves naturally while camera remains steady, cinematic atmosphere."
513
 
514
  def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info=None):
515
+ """Refine a prompt based on user feedback using Zephyr with SARA framework"""
516
  if not feedback.strip():
517
  return current_prompt, chat_history
518
+
519
+ # Verificar que el modelo está cargado
520
+ if zephyr_generator is None:
521
+ # Intenta cargar los modelos si no están cargados
522
+ success = load_models()
523
+ if not success:
524
+ return "Error: Unable to load text generation model. Please try again.", chat_history
525
+
526
+ # Verificar que zephyr_generator tiene el atributo tokenizer
527
+ if not hasattr(zephyr_generator, 'tokenizer'):
528
+ return "Error: Text generation model is not properly initialized. Please restart the application.", chat_history
529
 
530
  # Create refinement context
531
  context = ""
532
  if scene_info and scene_info.get('basic_description'):
533
  context = f"Image context: {scene_info['basic_description']}"
534
 
535
+ try:
536
+ # Construct Zephyr refinement prompt
537
+ refinement_prompt = f"""<|system|>
538
  You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
539
  Key principles:
540
  - Focus on MOTION, not static description
 
549
  Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
550
  <|assistant|>"""
551
 
552
+ response = zephyr_generator(
553
+ refinement_prompt,
554
+ max_new_tokens=100,
555
+ do_sample=True,
556
+ temperature=0.7,
557
+ top_k=50,
558
+ top_p=0.95
559
+ )
 
 
560
 
561
+ # Extract refined prompt
562
+ if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
563
+ generated_text = response[0]["generated_text"]
564
+ # Extraer solo la respuesta del asistente
565
+ if "<|assistant|>" in generated_text:
566
+ refined = generated_text.split("<|assistant|>")[-1].strip()
567
+ else:
568
+ # Intentar extraer la última parte del texto si no encontramos la etiqueta
569
+ refined = generated_text.split(refinement_prompt)[-1].strip()
570
+
571
+ # Update chat history
572
+ new_chat_history = chat_history + [[feedback, refined]]
573
+ return refined, new_chat_history
574
+ else:
575
+ return current_prompt, chat_history
576
+
577
+ except Exception as e:
578
+ print(f"Error in refine_prompt_with_zephyr: {str(e)}")
579
+ return f"Error refining prompt: {str(e)}. Please try again with a simpler request.", chat_history
580
 
581
  def generate_gen4_prompts(scene_info, foundation=""):
582
  """Generate Gen-4 style prompts iteratively"""
 
680
  # Create the Gradio interface
681
  def create_interface():
682
  """Create the Gradio interface"""
683
+ # Asegúrate de cargar los modelos antes de crear la interfaz
684
+ try:
685
+ load_models()
686
+ except Exception as e:
687
+ print(f"⚠️ Warning: Initial model loading failed: {str(e)}")
688
+ print("Models will be loaded on demand.")
689
+
690
  with gr.Blocks(theme=gr.themes.Soft(), title="AI Video Prompt Generator") as demo:
691
  # Header
692
+ gr.Markdown("# 🎬 AI Video Prompt Generator - 🤖 SARA Framework Powered")
693
  gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
694
 
695
  # State variables
 
750
  lines=3
751
  )
752
  optimize_btn = gr.Button("🚀 Generate Optimized Prompt", variant="primary")
753
+ with gr.Row():
754
+ retry_btn = gr.Button("🔄 Retry with Default Model", variant="secondary")
755
+ model_status = gr.Textbox(
756
+ label="Model Status",
757
+ value="",
758
+ interactive=False
759
+ )
760
  optimized_prompt = gr.Textbox(
761
  label="AI-Optimized Video Prompt",
762
  lines=4,
 
849
  optimize_btn.click(
850
  fn=optimize_user_prompt_with_zephyr,
851
  inputs=[user_idea, scene_state],
852
+ outputs=[optimized_prompt, model_status]
853
+ )
854
+ retry_btn.click(
855
+ fn=lambda idea, scene_info: (fallback_generate_prompt(idea, scene_info), "Using default model"),
856
+ inputs=[user_idea, scene_state],
857
+ outputs=[optimized_prompt, model_status]
858
  )
859
  refine_btn.click(
860
  fn=refine_prompt_with_zephyr,
 
881
 
882
  # Launch the app
883
  if __name__ == "__main__":
884
+ print("🎬 Starting AI Video Prompt Generator with SARA LORA Adapter...")
885
  print(f"📊 Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
886
  print("🔧 Loading models (this may take a few minutes)...")
887
  try:
 
899
  print(f"❌ Error launching app: {e}")
900
  print("🔧 Make sure you have sufficient CPU resources and all dependencies installed.")
901
  print("📦 Required packages:")
902
+ print(" pip install torch transformers gradio pillow accelerate bitsandbytes peft")
903
  # Alternative launch attempt
904
  print("\n🔄 Attempting alternative launch...")
905
  try:
906
+ # Intenta instalar las dependencias necesarias
907
+ import subprocess
908
+ print("🔄 Installing/updating necessary dependencies...")
909
+ subprocess.call(["pip", "install", "-U", "transformers", "accelerate", "peft", "huggingface_hub"])
910
+
911
  demo = create_interface()
912
  demo.launch(
913
  share=False,