Malaji71 commited on
Commit
42b0843
Β·
verified Β·
1 Parent(s): cc94d83

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +391 -109
app.py CHANGED
@@ -2,15 +2,15 @@ import os
2
  import sys
3
  import logging
4
 
5
- # Logging setup
6
  logging.basicConfig(level=logging.INFO,
7
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
8
  handlers=[logging.StreamHandler(sys.stdout)])
9
  logger = logging.getLogger(__name__)
10
 
11
- # Auto-install dependencies
12
  def install_dependencies():
13
- logger.info("Checking and installing required dependencies...")
14
  try:
15
  # Try to import peft
16
  try:
@@ -29,15 +29,15 @@ def install_dependencies():
29
  os.system("pip install -q bitsandbytes>=0.41.0")
30
 
31
  # Ensure other dependencies are installed
32
- logger.info("Checking other dependencies...")
33
  os.system("pip install -q torch transformers>=4.30.0 accelerate>=0.20.0 gradio pillow psutil")
34
 
35
- logger.info("All dependencies installed successfully")
36
 
37
  # Re-import peft to verify
38
  import peft
39
  from peft import PeftModel, PeftConfig
40
- logger.info(f"PEFT imported successfully, version: {peft.__version__}")
41
 
42
  return True
43
  except Exception as e:
@@ -47,9 +47,9 @@ def install_dependencies():
47
  # Install dependencies before importing
48
  success = install_dependencies()
49
  if not success:
50
- logger.error("Failed to install required dependencies. The application may not work properly.")
51
 
52
- # Now that we have dependencies, import modules
53
  import torch
54
  from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
55
  from peft import PeftModel, PeftConfig
@@ -66,15 +66,15 @@ if use_gpu:
66
  logger.info(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
67
  logger.info(f"Available GPU memory: {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")
68
  except:
69
- logger.info("Could not get detailed GPU information")
70
 
71
  # Lazy loading of models
72
  processor, model = None, None
73
  peft_model, tokenizer = None, None
74
 
75
- # Custom function to generate text with PEFT model
76
  def generate_with_peft_model(prompt, max_new_tokens=100, temperature=0.7, top_p=0.95):
77
- """Generate text directly using PEFT model without pipeline"""
78
  global peft_model, tokenizer
79
 
80
  if peft_model is None or tokenizer is None:
@@ -105,7 +105,7 @@ def generate_with_peft_model(prompt, max_new_tokens=100, temperature=0.7, top_p=
105
  response = output_text.split("<|assistant|>")[-1].strip()
106
  return response
107
 
108
- # If we can't extract assistant response, remove original prompt
109
  if prompt in output_text:
110
  response = output_text[len(prompt):].strip()
111
  return response
@@ -198,13 +198,13 @@ def load_models():
198
  base_model,
199
  local_adapter_path
200
  )
201
- logger.info("βœ… LORA adapter loaded successfully from local path")
202
 
203
  return True
204
 
205
  except Exception as e2:
206
  logger.error(f"Error loading LORA adapter locally: {str(e2)}")
207
- logger.error("Could not load LORA adapter. The application may not work properly.")
208
  return False
209
 
210
  except Exception as e:
@@ -213,29 +213,17 @@ def load_models():
213
  logger.error(traceback.format_exc())
214
  return False
215
 
216
- # Universal Video Prompting Guide combining Gen-4 + SARA
217
  unified_instructions = """
218
  # 🎬 Universal Video Prompting Guide
219
- *Compatible with Gen-4, Sora, Pika, Luma, Runway and all diffusion-based video models*
220
- ## Core Principles (Universal)
221
  βœ… **Focus on MOTION, not static description**
222
  βœ… **Use positive phrasing exclusively**
223
  βœ… **Start simple, iterate progressively**
224
  βœ… **Refer to subjects in general terms** ("the subject," "the woman")
225
  βœ… **Keep prompts direct and easily understood**
226
- ## Two Complementary Approaches
227
- ### πŸš€ **Gen-4 Official Method** (Recommended for beginners)
228
- **Structure**: Simple iterative building
229
- 1. Start with essential motion only
230
- 2. Add one element at a time: Subject Motion β†’ Camera Motion β†’ Scene Motion β†’ Style Descriptors
231
- 3. Use general terms and avoid complex descriptions
232
- **Example**:
233
- - Basic: "The subject walks forward"
234
- - + Camera: "The subject walks forward. Handheld camera follows"
235
- - + Scene: "The subject walks forward. Handheld camera follows. Dust trails behind"
236
- - + Style: "The subject walks forward. Handheld camera follows. Dust trails behind. Cinematic."
237
- ### 🎯 **SARA Framework** (Advanced precision)
238
- **Structure**: [Subject] + [Action] + [Reference] + [Atmosphere]
239
  - **Subject (S)**: Main element to control
240
  - **Action (A)**: Movement/transformation ([verb] + [adverb])
241
  - **Reference (R)**: Spatial anchors ("while X remains steady")
@@ -324,7 +312,7 @@ def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
324
  """Use PEFT model for advanced scene analysis"""
325
  logger.info("Starting scene analysis...")
326
 
327
- # Verify model is loaded
328
  if peft_model is None or tokenizer is None:
329
  logger.error("PEFT model not available")
330
  return {
@@ -344,8 +332,8 @@ Please provide:
344
  1. Type of motion that would work best
345
  2. Recommended camera movements
346
  3. Emotional tone/style suggestions
347
- 4. Best prompting approach (SARA vs Gen-4)
348
- Be concise and practical. Keep your response in English.
349
  <|assistant|>"""
350
 
351
  logger.info("Generating analysis with PEFT model...")
@@ -356,7 +344,7 @@ Be concise and practical. Keep your response in English.
356
  top_p=0.95
357
  )
358
 
359
- logger.info(f"Generated analysis: {generated_text[:100]}...")
360
 
361
  lines = generated_text.split('\n')
362
  motion_insights = []
@@ -382,7 +370,7 @@ Be concise and practical. Keep your response in English.
382
  import traceback
383
  logger.error(traceback.format_exc())
384
  return {
385
- 'scene_interpretation': f"Error in analysis: {str(e)}",
386
  'motion_insights': ["Error during analysis", "Try with another image"],
387
  'recommended_approach': "SARA framework (default)"
388
  }
@@ -391,7 +379,7 @@ def generate_sample_prompts_with_zephyr(scene_info=None):
391
  """Generate sample prompts using PEFT model"""
392
  logger.info("Generating sample prompts...")
393
 
394
- # Verify model is loaded
395
  if peft_model is None or tokenizer is None:
396
  logger.error("PEFT model not available")
397
  return [
@@ -404,14 +392,12 @@ def generate_sample_prompts_with_zephyr(scene_info=None):
404
  try:
405
  # Use PEFT model to generate contextual prompts
406
  context_prompt = f"""<|system|>
407
- Generate 3 professional video prompts using the SARA framework based on this image analysis.
408
- Each prompt should follow the structure: Subject + Action + Reference + Atmosphere.
409
- Ensure the prompts are in English, emphasize motion, and are compatible with AI video models.
410
  <|user|>
411
  Image description: {scene_info['basic_description']}
412
  Composition: {scene_info.get('composition', 'Balanced')}
413
  Aspect Ratio: {scene_info.get('aspect_ratio', 'N/A'):.2f}
414
- Create three unique and expressive prompts following the SARA framework.
415
  <|assistant|>"""
416
 
417
  logger.info("Generating prompts for the scene...")
@@ -453,7 +439,7 @@ def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
453
  if not user_idea.strip():
454
  return "Please enter your idea first.", "No input provided"
455
 
456
- # Verify model is loaded
457
  if peft_model is None or tokenizer is None:
458
  logger.error("PEFT model not available")
459
  return "Error: Model not available. Try reloading the application.", "Model not loaded"
@@ -467,23 +453,41 @@ def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
467
  try:
468
  # Enforce structure based on approach
469
  logger.info("Preparing prompt for optimization...")
470
- optimization_prompt = f"""<|system|>
471
- You are an expert in video prompting, specializing in the SARA framework for advanced video prompt creation.
472
- Your task is to transform user ideas into professional, optimized prompts for AI video models.
473
-
 
 
 
 
 
 
 
474
  Key principles:
475
  - Focus on MOTION, not static description
476
- - Use positive phrasing only
477
  - Be specific about camera work
478
  - Include lighting/atmosphere details
479
- - Always follow the SARA structure: Subject + Action + Reference + Atmosphere
480
- - Always write prompts in English, regardless of the language of the input
481
-
482
- For any input in any language, create an optimized English video prompt following the SARA framework.
 
 
 
 
 
 
 
 
 
 
 
483
  <|user|>
484
  User's idea: "{user_idea}"
485
  {context}
486
- Create a professional video prompt using the SARA framework. Respond with just the prompt in English.
487
  <|assistant|>"""
488
 
489
  logger.info("Generating optimized prompt...")
@@ -495,6 +499,8 @@ Create a professional video prompt using the SARA framework. Respond with just t
495
  )
496
 
497
  logger.info(f"Optimized prompt: {optimized}")
 
 
498
  return optimized, "SARA-Zephyr LORA used successfully"
499
 
500
  except Exception as e:
@@ -505,7 +511,7 @@ Create a professional video prompt using the SARA framework. Respond with just t
505
  f"Error: {str(e)}")
506
 
507
  def fallback_generate_prompt(user_idea, scene_info=None):
508
- """Fallback function to generate prompts manually if model fails"""
509
  logger.info(f"Using fallback generation for: {user_idea}")
510
 
511
  if not user_idea.strip():
@@ -539,7 +545,7 @@ def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info
539
  if not feedback.strip():
540
  return current_prompt, chat_history
541
 
542
- # Verify model is loaded
543
  if peft_model is None or tokenizer is None:
544
  logger.error("PEFT model not available")
545
  return "Error: Model not available. Try reloading the application.", chat_history
@@ -550,26 +556,32 @@ def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info
550
  context = f"Image context: {scene_info['basic_description']}"
551
 
552
  try:
 
 
 
 
 
 
 
 
 
 
 
553
  # Construct refinement prompt
554
  refinement_prompt = f"""<|system|>
555
- You are an expert in refining video prompts using the SARA framework. You are given an existing prompt and specific feedback on how to improve it.
556
- Your task is to create an enhanced version of the prompt that incorporates the feedback while maintaining the SARA structure.
557
-
558
  Key principles:
559
  - Focus on MOTION, not static description
560
- - Use positive phrasing only
561
  - Be specific about camera work
562
  - Include lighting/atmosphere details
563
- - Always follow the SARA structure: Subject + Action + Reference + Atmosphere
564
- - Always keep the prompt in English
565
- - Apply the requested changes precisely as mentioned in the feedback
566
-
567
- Produce only the refined prompt text, nothing else.
568
  <|user|>
569
  Current prompt: "{current_prompt}"
570
  Feedback: "{feedback}"
571
  {context}
572
- Please refine the prompt based on this feedback. Keep it under 100 words. Return only the refined prompt.
573
  <|assistant|>"""
574
 
575
  logger.info("Generating refined prompt...")
@@ -592,49 +604,33 @@ Please refine the prompt based on this feedback. Keep it under 100 words. Return
592
  logger.error(traceback.format_exc())
593
  return f"Error refining prompt: {str(e)}. Try with a simpler request.", chat_history
594
 
595
- def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion, style, approach="SARA"):
596
- """Build custom prompt using selected approach"""
597
- if approach == "SARA":
598
- # SARA Structure: [Subject] [Action] while [Reference], [Atmosphere]
599
- parts = []
600
- if foundation:
601
- parts.append(foundation)
602
-
603
- # Add motion elements
604
- motion_parts = []
605
- if subject_motion:
606
- motion_parts.extend(subject_motion)
607
- if scene_motion:
608
- motion_parts.extend(scene_motion)
609
- if motion_parts:
610
- parts.append(", ".join(motion_parts))
611
-
612
- # Reference (camera stability)
613
- if camera_motion:
614
- parts.append(f"while {camera_motion}")
615
- else:
616
- parts.append("while background remains steady")
617
-
618
- # Atmosphere
619
- if style:
620
- parts.append(style)
621
-
622
- return " ".join(parts)
623
- else: # Gen-4 style
624
- # Gen-4 Structure: Simple iterative building
625
- parts = []
626
- if foundation:
627
- parts.append(foundation)
628
- if subject_motion:
629
- parts.extend(subject_motion)
630
- if camera_motion:
631
- parts.append(camera_motion)
632
- if scene_motion:
633
- parts.extend(scene_motion)
634
- if style:
635
- parts.append(style)
636
-
637
- return ". ".join(parts) if parts else "The subject moves naturally"
638
 
639
  def test_basic_generation():
640
  """Test basic generation with PEFT model"""
@@ -687,7 +683,7 @@ def get_debug_info():
687
  if peft_model is not None:
688
  info.append(f"PEFT model type: {type(peft_model).__name__}")
689
 
690
- # More info about PEFT model
691
  if hasattr(peft_model, 'base_model'):
692
  base_model_type = type(peft_model.base_model).__name__
693
  info.append(f"Base model type: {base_model_type}")
@@ -713,5 +709,291 @@ def get_debug_info():
713
  info.append(f"GPU available: {torch.cuda.is_available()}")
714
  if torch.cuda.is_available():
715
  info.append(f"GPU device: {torch.cuda.get_device_name(0)}")
716
- info.append(f"Memory allocated: {torch.cuda.memory_allocated(0) / (1024**3):.2f} GB")
717
- info.append(f"Memory reserved: {torch.cuda.memory_reserved(0) / (1024**3):.2f} GB")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  import sys
3
  import logging
4
 
5
+ # Logging configuration
6
  logging.basicConfig(level=logging.INFO,
7
  format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
8
  handlers=[logging.StreamHandler(sys.stdout)])
9
  logger = logging.getLogger(__name__)
10
 
11
+ # Install required dependencies automatically
12
  def install_dependencies():
13
+ logger.info("Verifying and installing required dependencies...")
14
  try:
15
  # Try to import peft
16
  try:
 
29
  os.system("pip install -q bitsandbytes>=0.41.0")
30
 
31
  # Ensure other dependencies are installed
32
+ logger.info("Verifying other dependencies...")
33
  os.system("pip install -q torch transformers>=4.30.0 accelerate>=0.20.0 gradio pillow psutil")
34
 
35
+ logger.info("All dependencies successfully installed")
36
 
37
  # Re-import peft to verify
38
  import peft
39
  from peft import PeftModel, PeftConfig
40
+ logger.info(f"PEFT correctly imported, version: {peft.__version__}")
41
 
42
  return True
43
  except Exception as e:
 
47
  # Install dependencies before importing
48
  success = install_dependencies()
49
  if not success:
50
+ logger.error("Failed to install required dependencies. The application may not function properly.")
51
 
52
+ # Now that we have the dependencies, we import the modules
53
  import torch
54
  from transformers import BlipProcessor, BlipForConditionalGeneration, AutoModelForCausalLM, AutoTokenizer
55
  from peft import PeftModel, PeftConfig
 
66
  logger.info(f"Total GPU memory: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.2f} GB")
67
  logger.info(f"Available GPU memory: {torch.cuda.memory_reserved(0) / 1024**3:.2f} GB")
68
  except:
69
+ logger.info("Could not retrieve detailed GPU information")
70
 
71
  # Lazy loading of models
72
  processor, model = None, None
73
  peft_model, tokenizer = None, None
74
 
75
+ # Custom function to generate text with the PEFT model
76
  def generate_with_peft_model(prompt, max_new_tokens=100, temperature=0.7, top_p=0.95):
77
+ """Generates text using the PEFT model directly without pipeline"""
78
  global peft_model, tokenizer
79
 
80
  if peft_model is None or tokenizer is None:
 
105
  response = output_text.split("<|assistant|>")[-1].strip()
106
  return response
107
 
108
+ # If we can't extract assistant response, remove the original prompt
109
  if prompt in output_text:
110
  response = output_text[len(prompt):].strip()
111
  return response
 
198
  base_model,
199
  local_adapter_path
200
  )
201
+ logger.info("βœ… LORA adapter loaded successfully from local storage")
202
 
203
  return True
204
 
205
  except Exception as e2:
206
  logger.error(f"Error loading LORA adapter locally: {str(e2)}")
207
+ logger.error("Could not load LORA adapter. The application will not function properly.")
208
  return False
209
 
210
  except Exception as e:
 
213
  logger.error(traceback.format_exc())
214
  return False
215
 
216
+ # Universal Video Prompting Guide combining SARA framework
217
  unified_instructions = """
218
  # 🎬 Universal Video Prompting Guide
219
+ *Compatible with Sora, Gen-4, Pika, Luma, Runway and all diffusion-based video models*
220
+ ## Core Principles
221
  βœ… **Focus on MOTION, not static description**
222
  βœ… **Use positive phrasing exclusively**
223
  βœ… **Start simple, iterate progressively**
224
  βœ… **Refer to subjects in general terms** ("the subject," "the woman")
225
  βœ… **Keep prompts direct and easily understood**
226
+ ## SARA Framework (Subject + Action + Reference + Atmosphere)
 
 
 
 
 
 
 
 
 
 
 
 
227
  - **Subject (S)**: Main element to control
228
  - **Action (A)**: Movement/transformation ([verb] + [adverb])
229
  - **Reference (R)**: Spatial anchors ("while X remains steady")
 
312
  """Use PEFT model for advanced scene analysis"""
313
  logger.info("Starting scene analysis...")
314
 
315
+ # Verify that the model is loaded
316
  if peft_model is None or tokenizer is None:
317
  logger.error("PEFT model not available")
318
  return {
 
332
  1. Type of motion that would work best
333
  2. Recommended camera movements
334
  3. Emotional tone/style suggestions
335
+ 4. Best prompting approach (SARA framework)
336
+ Be concise and practical.
337
  <|assistant|>"""
338
 
339
  logger.info("Generating analysis with PEFT model...")
 
344
  top_p=0.95
345
  )
346
 
347
+ logger.info(f"Analysis generated: {generated_text[:100]}...")
348
 
349
  lines = generated_text.split('\n')
350
  motion_insights = []
 
370
  import traceback
371
  logger.error(traceback.format_exc())
372
  return {
373
+ 'scene_interpretation': f"Analysis error: {str(e)}",
374
  'motion_insights': ["Error during analysis", "Try with another image"],
375
  'recommended_approach': "SARA framework (default)"
376
  }
 
379
  """Generate sample prompts using PEFT model"""
380
  logger.info("Generating sample prompts...")
381
 
382
+ # Verify that the model is loaded
383
  if peft_model is None or tokenizer is None:
384
  logger.error("PEFT model not available")
385
  return [
 
392
  try:
393
  # Use PEFT model to generate contextual prompts
394
  context_prompt = f"""<|system|>
395
+ Generate 3 professional video prompts using the SARA framework based on this image analysis.
 
 
396
  <|user|>
397
  Image description: {scene_info['basic_description']}
398
  Composition: {scene_info.get('composition', 'Balanced')}
399
  Aspect Ratio: {scene_info.get('aspect_ratio', 'N/A'):.2f}
400
+ Remember the SARA framework: Subject + Action + Reference + Atmosphere
401
  <|assistant|>"""
402
 
403
  logger.info("Generating prompts for the scene...")
 
439
  if not user_idea.strip():
440
  return "Please enter your idea first.", "No input provided"
441
 
442
+ # Verify that the model is loaded
443
  if peft_model is None or tokenizer is None:
444
  logger.error("PEFT model not available")
445
  return "Error: Model not available. Try reloading the application.", "Model not loaded"
 
453
  try:
454
  # Enforce structure based on approach
455
  logger.info("Preparing prompt for optimization...")
456
+
457
+ # Detect language and adjust system prompt accordingly
458
+ import re
459
+ non_english_pattern = re.compile(r'[^\x00-\x7F]+')
460
+ has_non_english = bool(non_english_pattern.search(user_idea))
461
+
462
+ if has_non_english:
463
+ logger.info("Detected non-English input")
464
+ optimization_prompt = f"""<|system|>
465
+ You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
466
+ IMPORTANT: Preserve the original language of the user's idea in your response. For example, if they write in Spanish, your response should be in Spanish.
467
  Key principles:
468
  - Focus on MOTION, not static description
469
+ - Use positive phrasing
470
  - Be specific about camera work
471
  - Include lighting/atmosphere details
472
+ - Follow the SARA structure: Subject + Action + Reference + Atmosphere
473
+ <|user|>
474
+ User's idea: "{user_idea}"
475
+ {context}
476
+ Please create an optimized video prompt using the SARA framework. Respond with just the prompt in the same language as the user's input.
477
+ <|assistant|>"""
478
+ else:
479
+ optimization_prompt = f"""<|system|>
480
+ You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
481
+ Key principles:
482
+ - Focus on MOTION, not static description
483
+ - Use positive phrasing
484
+ - Be specific about camera work
485
+ - Include lighting/atmosphere details
486
+ - Follow the SARA structure: Subject + Action + Reference + Atmosphere
487
  <|user|>
488
  User's idea: "{user_idea}"
489
  {context}
490
+ Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
491
  <|assistant|>"""
492
 
493
  logger.info("Generating optimized prompt...")
 
499
  )
500
 
501
  logger.info(f"Optimized prompt: {optimized}")
502
+
503
+ # Status message in English regardless of input language
504
  return optimized, "SARA-Zephyr LORA used successfully"
505
 
506
  except Exception as e:
 
511
  f"Error: {str(e)}")
512
 
513
  def fallback_generate_prompt(user_idea, scene_info=None):
514
+ """Fallback function to generate prompts manually if the model fails"""
515
  logger.info(f"Using fallback generation for: {user_idea}")
516
 
517
  if not user_idea.strip():
 
545
  if not feedback.strip():
546
  return current_prompt, chat_history
547
 
548
+ # Verify that the model is loaded
549
  if peft_model is None or tokenizer is None:
550
  logger.error("PEFT model not available")
551
  return "Error: Model not available. Try reloading the application.", chat_history
 
556
  context = f"Image context: {scene_info['basic_description']}"
557
 
558
  try:
559
+ # Detect language of current prompt and feedback
560
+ import re
561
+ non_english_pattern = re.compile(r'[^\x00-\x7F]+')
562
+ has_non_english_prompt = bool(non_english_pattern.search(current_prompt))
563
+ has_non_english_feedback = bool(non_english_pattern.search(feedback))
564
+
565
+ # Determine response language
566
+ preserve_language_instruction = ""
567
+ if has_non_english_prompt or has_non_english_feedback:
568
+ preserve_language_instruction = "IMPORTANT: Preserve the original language of the prompt in your response. For example, if the prompt is in Spanish, your refined prompt should be in Spanish."
569
+
570
  # Construct refinement prompt
571
  refinement_prompt = f"""<|system|>
572
+ You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
573
+ {preserve_language_instruction}
 
574
  Key principles:
575
  - Focus on MOTION, not static description
576
+ - Use positive phrasing
577
  - Be specific about camera work
578
  - Include lighting/atmosphere details
579
+ - Follow the SARA structure: Subject + Action + Reference + Atmosphere
 
 
 
 
580
  <|user|>
581
  Current prompt: "{current_prompt}"
582
  Feedback: "{feedback}"
583
  {context}
584
+ Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
585
  <|assistant|>"""
586
 
587
  logger.info("Generating refined prompt...")
 
604
  logger.error(traceback.format_exc())
605
  return f"Error refining prompt: {str(e)}. Try with a simpler request.", chat_history
606
 
607
+ def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion, style):
608
+ """Build custom prompt using SARA framework"""
609
+ # SARA Structure: [Subject] [Action] while [Reference], [Atmosphere]
610
+ parts = []
611
+ if foundation:
612
+ parts.append(foundation)
613
+
614
+ # Add motion elements
615
+ motion_parts = []
616
+ if subject_motion:
617
+ motion_parts.extend(subject_motion)
618
+ if scene_motion:
619
+ motion_parts.extend(scene_motion)
620
+ if motion_parts:
621
+ parts.append(", ".join(motion_parts))
622
+
623
+ # Reference (camera stability)
624
+ if camera_motion:
625
+ parts.append(f"while {camera_motion}")
626
+ else:
627
+ parts.append("while background remains steady")
628
+
629
+ # Atmosphere
630
+ if style:
631
+ parts.append(style)
632
+
633
+ return " ".join(parts)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
634
 
635
  def test_basic_generation():
636
  """Test basic generation with PEFT model"""
 
683
  if peft_model is not None:
684
  info.append(f"PEFT model type: {type(peft_model).__name__}")
685
 
686
+ # More information about PEFT model
687
  if hasattr(peft_model, 'base_model'):
688
  base_model_type = type(peft_model.base_model).__name__
689
  info.append(f"Base model type: {base_model_type}")
 
709
  info.append(f"GPU available: {torch.cuda.is_available()}")
710
  if torch.cuda.is_available():
711
  info.append(f"GPU device: {torch.cuda.get_device_name(0)}")
712
+ info.append(f"Allocated memory: {torch.cuda.memory_allocated(0) / (1024**3):.2f} GB")
713
+ info.append(f"Reserved memory: {torch.cuda.memory_reserved(0) / (1024**3):.2f} GB")
714
+
715
+ # System memory information
716
+ try:
717
+ import psutil
718
+ vm = psutil.virtual_memory()
719
+ info.append(f"Total RAM: {vm.total / (1024**3):.2f} GB")
720
+ info.append(f"Available RAM: {vm.available / (1024**3):.2f} GB")
721
+ info.append(f"RAM usage percentage: {vm.percent}%")
722
+ except ImportError:
723
+ info.append("psutil not available for system memory information")
724
+
725
+ return "\n".join(info)
726
+ except Exception as e:
727
+ logger.error(f"Error generating debug info: {str(e)}")
728
+ return f"Error: {str(e)}"
729
+
730
+ # Create the Gradio interface
731
+ def create_interface():
732
+ """Create the Gradio interface"""
733
+ # Pre-load models
734
+ try:
735
+ logger.info("Pre-loading models...")
736
+ load_models()
737
+ except Exception as e:
738
+ logger.error(f"Error during preloading: {str(e)}")
739
+ logger.info("Models will be loaded on demand")
740
+
741
+ logger.info("Creating Gradio interface...")
742
+
743
+ with gr.Blocks(title="AI Video Prompt Generator") as demo:
744
+ # Header
745
+ gr.Markdown("# 🎬 AI Video Prompt Generator - πŸ€– SARA Framework")
746
+ gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
747
+
748
+ # State variables
749
+ scene_state = gr.State({})
750
+ chat_history_state = gr.State([])
751
+
752
+ with gr.Tabs():
753
+ # Tab 1: Learning Guide
754
+ with gr.Tab("πŸ“š Prompting Guide"):
755
+ gr.Markdown(unified_instructions)
756
+ # Advanced tips
757
+ with gr.Accordion("🎯 Advanced Tips", open=False):
758
+ gr.Markdown("""
759
+ ## Advanced Prompting Strategies
760
+ ### 🎨 Style Integration
761
+ - **Cinematography**: "Dutch angle," "Extreme close-up," "Bird's eye view"
762
+ - **Lighting**: "Golden hour," "Neon glow," "Harsh shadows," "Soft diffused light"
763
+ - **Movement Quality**: "Fluid motion," "Mechanical precision," "Organic flow"
764
+ ### ⚑ Motion Types
765
+ - **Subject Motion**: Walking, running, dancing, gesturing
766
+ - **Camera Motion**: Pan, tilt, dolly, zoom, orbit, tracking
767
+ - **Environmental**: Wind, water flow, particle effects, lighting changes
768
+ """)
769
+
770
+ # Tab 2: Image Analysis
771
+ with gr.Tab("πŸ“· Image Analysis"):
772
+ with gr.Row():
773
+ with gr.Column(scale=1):
774
+ image_input = gr.Image(
775
+ label="Upload Image for Analysis",
776
+ type="pil"
777
+ )
778
+ analyze_btn = gr.Button("πŸ” Analyze Image", variant="primary")
779
+ with gr.Column(scale=2):
780
+ analysis_output = gr.Markdown(label="AI Analysis Results")
781
+
782
+ # Sample prompts section
783
+ with gr.Group():
784
+ gr.Markdown("### πŸ’‘ Sample Prompts")
785
+ sample_btn = gr.Button("🎲 Generate Sample Prompts")
786
+ sample_prompts = [
787
+ gr.Textbox(
788
+ label=f"Sample {i+1}",
789
+ lines=2,
790
+ interactive=False,
791
+ show_copy_button=True
792
+ )
793
+ for i in range(3)
794
+ ]
795
+
796
+ # Tab 3: AI Prompt Generator
797
+ with gr.Tab("πŸ€– AI Prompt Generator"):
798
+ with gr.Row():
799
+ with gr.Column():
800
+ user_idea = gr.Textbox(
801
+ label="Your Video Idea (any language)",
802
+ placeholder="e.g., 'el personaje camina lentamente' or 'character walks slowly'",
803
+ lines=3
804
+ )
805
+ optimize_btn = gr.Button("πŸš€ Generate Optimized Prompt", variant="primary")
806
+ with gr.Row():
807
+ retry_btn = gr.Button("πŸ”„ Manual Generation Fallback", variant="secondary")
808
+ model_status = gr.Textbox(
809
+ label="Model Status",
810
+ value="",
811
+ interactive=False
812
+ )
813
+ optimized_prompt = gr.Textbox(
814
+ label="AI-Optimized Video Prompt",
815
+ lines=4,
816
+ interactive=True,
817
+ show_copy_button=True
818
+ )
819
+ # Basic test button
820
+ test_btn = gr.Button("πŸ”¬ Test Basic Generation", variant="secondary")
821
+ test_output = gr.Textbox(
822
+ label="Basic Generation Test",
823
+ lines=2,
824
+ interactive=False
825
+ )
826
+ with gr.Column():
827
+ gr.Markdown("### πŸ”„ Refine Your Prompt")
828
+ feedback_input = gr.Textbox(
829
+ label="Feedback/Changes",
830
+ placeholder="e.g., 'make it more dramatic' or 'add camera movement'",
831
+ lines=2
832
+ )
833
+ refine_btn = gr.Button("πŸ”„ Refine Prompt")
834
+ # Chat history
835
+ with gr.Accordion("πŸ’¬ Refinement History", open=False):
836
+ chat_display = gr.Chatbot(height=300, type='messages')
837
+
838
+ # Model status and debug info
839
+ with gr.Accordion("πŸ”§ Debug Info", open=False):
840
+ debug_info = gr.Textbox(
841
+ label="Debug Information",
842
+ value="Click 'Get Debug Info' to see model status",
843
+ lines=8,
844
+ interactive=False
845
+ )
846
+ debug_btn = gr.Button("Get Debug Info")
847
+
848
+ # Tab 4: Custom Builder
849
+ with gr.Tab("πŸ› οΈ Custom Builder"):
850
+ gr.Markdown("## Build Your Custom Prompt")
851
+ with gr.Row():
852
+ custom_foundation = gr.Textbox(
853
+ label="Foundation",
854
+ placeholder="The subject...",
855
+ lines=1
856
+ )
857
+ with gr.Row():
858
+ subject_motion = gr.CheckboxGroup(
859
+ choices=[
860
+ "walks smoothly", "speaks clearly", "gestures naturally",
861
+ "moves gracefully", "turns slowly", "smiles confidently",
862
+ "dances rhythmically", "stands firmly", "runs energetically",
863
+ "sits relaxed", "laughs joyfully", "looks curiously"
864
+ ],
865
+ label="Subject Motion"
866
+ )
867
+ scene_motion = gr.CheckboxGroup(
868
+ choices=[
869
+ "dust swirls", "lighting changes", "wind effects",
870
+ "water movement", "atmosphere shifts", "leaves flutter",
871
+ "shadows elongate", "fog rolls in", "sunlight filters through",
872
+ "rain falls gently", "snow drifts", "crowds bustle"
873
+ ],
874
+ label="Scene Motion"
875
+ )
876
+ with gr.Row():
877
+ camera_motion = gr.Dropdown(
878
+ choices=[
879
+ "camera remains steady", "handheld camera follows",
880
+ "camera pans left", "camera pans right",
881
+ "camera tracks forward", "camera zooms in slowly",
882
+ "camera pulls back", "camera orbits subject",
883
+ "drone shot from above", "camera tilts upward",
884
+ "camera moves from low angle", "camera shifts focus"
885
+ ],
886
+ label="Camera Motion",
887
+ value="camera remains steady"
888
+ )
889
+ style_motion = gr.Dropdown(
890
+ choices=[
891
+ "cinematic atmosphere", "documentary style", "live-action feel",
892
+ "dramatic lighting", "peaceful ambiance", "energetic mood",
893
+ "professional setting", "nostalgic tone", "futuristic environment",
894
+ "golden hour warmth", "neon-lit urban setting", "minimalist aesthetic",
895
+ "high-contrast look", "soft-focused dreamlike quality"
896
+ ],
897
+ label="Style/Atmosphere",
898
+ value="cinematic atmosphere"
899
+ )
900
+ build_custom_btn = gr.Button("πŸ”¨ Build Custom Prompt", variant="secondary")
901
+ custom_output = gr.Textbox(
902
+ label="Your Custom Prompt",
903
+ lines=3,
904
+ interactive=True,
905
+ show_copy_button=True
906
+ )
907
+
908
+ # Event handlers
909
+ analyze_btn.click(
910
+ fn=analyze_image_with_zephyr,
911
+ inputs=[image_input],
912
+ outputs=[analysis_output, scene_state]
913
+ )
914
+ sample_btn.click(
915
+ fn=generate_sample_prompts_with_zephyr,
916
+ inputs=[scene_state],
917
+ outputs=sample_prompts
918
+ )
919
+ optimize_btn.click(
920
+ fn=optimize_user_prompt_with_zephyr,
921
+ inputs=[user_idea, scene_state],
922
+ outputs=[optimized_prompt, model_status]
923
+ )
924
+ retry_btn.click(
925
+ fn=fallback_generate_prompt,
926
+ inputs=[user_idea, scene_state],
927
+ outputs=[optimized_prompt, model_status]
928
+ )
929
+ test_btn.click(
930
+ fn=test_basic_generation,
931
+ inputs=[],
932
+ outputs=[test_output]
933
+ )
934
+ debug_btn.click(
935
+ fn=get_debug_info,
936
+ inputs=[],
937
+ outputs=[debug_info]
938
+ )
939
+ refine_btn.click(
940
+ fn=refine_prompt_with_zephyr,
941
+ inputs=[optimized_prompt, feedback_input, chat_history_state, scene_state],
942
+ outputs=[optimized_prompt, chat_history_state]
943
+ )
944
+ # Update chat display when history changes
945
+ chat_history_state.change(
946
+ fn=lambda history: history,
947
+ inputs=[chat_history_state],
948
+ outputs=[chat_display]
949
+ )
950
+ build_custom_btn.click(
951
+ fn=build_custom_prompt,
952
+ inputs=[custom_foundation, subject_motion, scene_motion, camera_motion, style_motion],
953
+ outputs=[custom_output]
954
+ )
955
+ return demo
956
+
957
+ # Launch the app
958
+ if __name__ == "__main__":
959
+ print("🎬 Starting AI Video Prompt Generator with SARA LORA Adapter...")
960
+ print(f"πŸ“Š Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
961
+ print("πŸ”§ Loading models (this may take a few minutes)...")
962
+ try:
963
+ demo = create_interface()
964
+ print("βœ… Interface created successfully!")
965
+ print("πŸš€ Launching application...")
966
+ demo.launch(
967
+ share=True,
968
+ server_name="0.0.0.0",
969
+ server_port=7860,
970
+ debug=True,
971
+ show_error=True
972
+ )
973
+ except Exception as e:
974
+ print(f"❌ Error launching app: {e}")
975
+ print("πŸ”§ Make sure you have sufficient CPU resources and all dependencies installed.")
976
+ print("πŸ“¦ Required packages:")
977
+ print(" pip install torch transformers gradio pillow accelerate bitsandbytes peft>=0.6.0")
978
+ # Alternative launch attempt
979
+ print("\nπŸ”„ Attempting alternative launch...")
980
+ try:
981
+ # Try to install necessary dependencies
982
+ import subprocess
983
+ print("πŸ”„ Installing/updating necessary dependencies...")
984
+ subprocess.call(["pip", "install", "-U", "transformers", "accelerate", "peft>=0.6.0", "huggingface_hub", "bitsandbytes"])
985
+
986
+ demo = create_interface()
987
+ demo.launch(
988
+ share=False,
989
+ server_name="127.0.0.1",
990
+ server_port=7860,
991
+ debug=False
992
+ )
993
+ except Exception as e2:
994
+ print(f"❌ Alternative launch failed: {e2}")
995
+ print("\nπŸ’‘ Troubleshooting tips:")
996
+ print("1. Ensure CPU resources are sufficient.")
997
+ print("2. Check CPU usage: top or htop")
998
+ print("3. Try reducing model precision: set torch_dtype=torch.float16")
999
+ print("4. Monitor memory usage: free -h")