Ai_Video_Production_Delta

Paused

App Files Files Community

Malaji71 commited on May 20, 2025

Commit

5dbe6d2

verified ·

1 Parent(s): df0c639

Update app.py

Browse files

Files changed (1) hide show

app.py +417 -84

app.py CHANGED Viewed

@@ -3,6 +3,15 @@ import torch
 from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
 from PIL import Image
 import random
 # Check GPU availability
 use_gpu = torch.cuda.is_available()
@@ -13,7 +22,7 @@ processor, model, zephyr_generator = None, None, None
 def load_models():
     """Load models only when needed"""
     global processor, model, zephyr_generator
-    if processor is None or model is None or zephyr_generator is None:
         print("Loading BLIP model...")
         processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
         model = BlipForConditionalGeneration.from_pretrained(
@@ -22,14 +31,91 @@ def load_models():
         )
         print("✅ BLIP model loaded successfully!")
-        print("Loading SARA-Zephyr fine-tuned model...")
-        zephyr_generator = pipeline(
-            "text-generation",
-            model="Malaji71/SARA-Zephyr",  # Cambiado al modelo fine-tuned
-            torch_dtype=torch.float32,  # Use float32 for CPU
-            device_map="auto" if use_gpu else None  # Use auto device mapping if GPU available
-        )
-        print("✅ SARA-Zephyr fine-tuned model loaded successfully!")
 # Universal Video Prompting Guide combining Gen-4 + SARA
 unified_instructions = """
@@ -68,7 +154,12 @@ def analyze_image_with_zephyr(image):
         return "Please upload an image first.", {}
     try:
         # Lazy load models
-        load_models()
         # Convert to PIL if needed
         if not isinstance(image, Image.Image):
@@ -115,11 +206,32 @@ def analyze_image_with_zephyr(image):
         }
         return analysis, scene_info
     except Exception as e:
         return f"Error analyzing image: {str(e)}", {}
 def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
-    """Use SARA-Zephyr for advanced scene analysis"""
-    analysis_prompt = f"""<|system|>
 You are a video prompt engineering expert specializing in the SARA framework. Analyze this image description for video creation potential.
 <|user|>
 Image description: "{basic_caption}"
@@ -132,34 +244,82 @@ Please provide:
 4. Best prompting approach (SARA vs Gen-4)
 Be concise and practical.
 <|assistant|>"""
-    response = zephyr_generator(
-        analysis_prompt,
-        max_new_tokens=200,
-        do_sample=True,
-        temperature=0.7,
-        pad_token_id=zephyr_generator.tokenizer.eos_token_id
-    )
-    ai_analysis = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
-    lines = ai_analysis.split('\n')
-    motion_insights = []
-    recommended_approach = "SARA framework recommended for precise control"
-    for line in lines:
-        if line.strip():
-            if any(keyword in line.lower() for keyword in ['motion', 'movement', 'camera', 'lighting']):
-                motion_insights.append(line.strip('- ').strip())
-            elif 'sara' in line.lower() or 'gen-4' in line.lower():
-                recommended_approach = line.strip('- ').strip()
-    return {
-        'scene_interpretation': ai_analysis.split('\n')[0] if ai_analysis else "Scene analysis completed",
-        'motion_insights': motion_insights[:6],
-        'recommended_approach': recommended_approach
-    }
 def generate_sample_prompts_with_zephyr(scene_info=None):
-    """Generate sample prompts using SARA-Zephyr"""
     if scene_info and scene_info.get('basic_description'):
-        # Use Zephyr to generate contextual prompts
-        context_prompt = f"""<|system|>
 Generate 3 professional video prompts using the SARA framework based on this image analysis.
 <|user|>
 Image description: {scene_info['basic_description']}
@@ -168,20 +328,33 @@ Aspect Ratio: {scene_info.get('aspect_ratio', 'N/A'):.2f}
 Remember the SARA framework: Subject + Action + Reference + Atmosphere
 <|assistant|>"""
-        response = zephyr_generator(
-            context_prompt,
-            max_new_tokens=200,
-            do_sample=True,
-            temperature=0.8,
-            pad_token_id=zephyr_generator.tokenizer.eos_token_id
-        )
-        # Extract and clean prompts
-        prompts_text = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
-        prompts = [p.strip('123.-• ') for p in prompts_text.split('\n') if p.strip()]
-        # Return first 3 clean prompts
-        if len(prompts) >= 3:
-            return prompts[:3]
     # Fallback prompts if Zephyr fails or no scene info
     base_prompts = [
@@ -192,17 +365,30 @@ Remember the SARA framework: Subject + Action + Reference + Atmosphere
     return base_prompts
 def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
-    """Optimize user's prompt idea using SARA-Zephyr while respecting SARA/Gen-4 structure"""
     if not user_idea.strip():
-        return "Please enter your idea first."
     # Create context from scene if available
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
-    # Enforce structure based on approach
-    optimization_prompt = f"""<|system|>
 You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
 Key principles:
 - Focus on MOTION, not static description
@@ -216,30 +402,139 @@ User's idea: "{user_idea}"
 Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
 <|assistant|>"""
-    response = zephyr_generator(
-        optimization_prompt,
-        max_new_tokens=100,
-        do_sample=True,
-        temperature=0.7,
-        pad_token_id=zephyr_generator.tokenizer.eos_token_id
-    )
-    # Extract optimized prompt
-    optimized = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
-    return optimized
 def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info=None):
-    """Refine a prompt based on user feedback using SARA-Zephyr"""
     if not feedback.strip():
         return current_prompt, chat_history
     # Create refinement context
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
-    # Construct Zephyr refinement prompt
-    refinement_prompt = f"""<|system|>
 You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
 Key principles:
 - Focus on MOTION, not static description
@@ -254,20 +549,34 @@ Feedback: "{feedback}"
 Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
 <|assistant|>"""
-    response = zephyr_generator(
-        refinement_prompt,
-        max_new_tokens=100,
-        do_sample=True,
-        temperature=0.7,
-        pad_token_id=zephyr_generator.tokenizer.eos_token_id
-    )
-    # Extract refined prompt
-    refined = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
-    # Update chat history
-    new_chat_history = chat_history + [[feedback, refined]]
-    return refined, new_chat_history
 def generate_gen4_prompts(scene_info, foundation=""):
     """Generate Gen-4 style prompts iteratively"""
@@ -371,9 +680,16 @@ def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion,
 # Create the Gradio interface
 def create_interface():
     """Create the Gradio interface"""
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Video Prompt Generator") as demo:
         # Header
-        gr.Markdown("# 🎬 AI Video Prompt Generator - 🤖 SARA-Zephyr AI Powered")
         gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
         # State variables
@@ -434,6 +750,13 @@ def create_interface():
                             lines=3
                         )
                         optimize_btn = gr.Button("🚀 Generate Optimized Prompt", variant="primary")
                         optimized_prompt = gr.Textbox(
                             label="AI-Optimized Video Prompt",
                             lines=4,
@@ -526,7 +849,12 @@ def create_interface():
         optimize_btn.click(
             fn=optimize_user_prompt_with_zephyr,
             inputs=[user_idea, scene_state],
-            outputs=[optimized_prompt]
         )
         refine_btn.click(
             fn=refine_prompt_with_zephyr,
@@ -553,7 +881,7 @@ def create_interface():
 # Launch the app
 if __name__ == "__main__":
-    print("🎬 Starting AI Video Prompt Generator with SARA-Zephyr...")
     print(f"📊 Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
     print("🔧 Loading models (this may take a few minutes)...")
     try:
@@ -571,10 +899,15 @@ if __name__ == "__main__":
         print(f"❌ Error launching app: {e}")
         print("🔧 Make sure you have sufficient CPU resources and all dependencies installed.")
         print("📦 Required packages:")
-        print("   pip install torch transformers gradio pillow accelerate bitsandbytes")
         # Alternative launch attempt
         print("\n🔄 Attempting alternative launch...")
         try:
             demo = create_interface()
             demo.launch(
                 share=False,

 from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
 from PIL import Image
 import random
+import os
+# Instalar dependencias necesarias si no están presentes
+try:
+    import peft
+except ImportError:
+    print("Instalando peft...")
+    os.system("pip install -q peft")
+    import peft
 # Check GPU availability
 use_gpu = torch.cuda.is_available()
 def load_models():
     """Load models only when needed"""
     global processor, model, zephyr_generator
+    try:
         print("Loading BLIP model...")
         processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
         model = BlipForConditionalGeneration.from_pretrained(
         )
         print("✅ BLIP model loaded successfully!")
+        print("Loading SARA-Zephyr adapter model...")
+        try:
+            # Cargar el modelo base Zephyr primero
+            from transformers import AutoModelForCausalLM, AutoTokenizer
+            from peft import PeftModel, PeftConfig
+            # Cargar tokenizer del modelo base
+            tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
+            # Cargar modelo base
+            base_model = AutoModelForCausalLM.from_pretrained(
+                "HuggingFaceH4/zephyr-7b-beta",
+                torch_dtype=torch.float32,
+                device_map="auto" if use_gpu else None
+            )
+            # Cargar configuración del adaptador
+            try:
+                # Si está usando un repositorio en HuggingFace
+                adapter_config = PeftConfig.from_pretrained("Malaji71/SARA-Zephyr")
+                # Cargar el adaptador sobre el modelo base
+                peft_model = PeftModel.from_pretrained(
+                    base_model,
+                    "Malaji71/SARA-Zephyr"
+                )
+                print("✅ PEFT adapter loaded from HuggingFace!")
+            except Exception as e:
+                print(f"Error loading from HuggingFace: {str(e)}")
+                print("Trying to load adapter locally...")
+                # Intentar cargar localmente si está disponible
+                local_adapter_path = "./SARA-Zephyr"  # Ajustar según sea necesario
+                try:
+                    adapter_config = PeftConfig.from_pretrained(local_adapter_path)
+                    peft_model = PeftModel.from_pretrained(
+                        base_model,
+                        local_adapter_path
+                    )
+                    print("✅ PEFT adapter loaded locally!")
+                except Exception as e2:
+                    print(f"Error loading adapter locally: {str(e2)}")
+                    print("Falling back to base model...")
+                    peft_model = base_model
+            # Crear pipeline con el modelo adaptado
+            zephyr_generator = pipeline(
+                "text-generation",
+                model=peft_model,
+                tokenizer=tokenizer,
+                torch_dtype=torch.float32
+            )
+            # Verificar que el pipeline se haya creado correctamente
+            if zephyr_generator is None or not hasattr(zephyr_generator, 'tokenizer'):
+                raise ValueError("Pipeline creation failed or doesn't have tokenizer attribute")
+            print("✅ SARA-Zephyr adapter model loaded successfully!")
+            return True
+        except Exception as e:
+            print(f"Error loading SARA-Zephyr adapter: {str(e)}")
+            print("Falling back to standard Zephyr model...")
+            # Modelo de respaldo en caso de error
+            zephyr_generator = pipeline(
+                "text-generation",
+                model="HuggingFaceH4/zephyr-7b-beta",
+                torch_dtype=torch.float32,
+                device_map="auto" if use_gpu else None
+            )
+            # Verificar que el pipeline de respaldo se haya creado correctamente
+            if zephyr_generator is None or not hasattr(zephyr_generator, 'tokenizer'):
+                raise ValueError("Fallback pipeline creation failed or doesn't have tokenizer attribute")
+            print("✅ Fallback Zephyr model loaded successfully!")
+            return True
+    except Exception as e:
+        print(f"❌ Critical error loading models: {str(e)}")
+        return False
 # Universal Video Prompting Guide combining Gen-4 + SARA
 unified_instructions = """
         return "Please upload an image first.", {}
     try:
         # Lazy load models
+        load_success = load_models()
+        if not load_success:
+            return "Error: Model loading failed. Please try again later.", {}
+        if processor is None or model is None:
+            return "Error: Image analysis model failed to load. Please try again.", {}
         # Convert to PIL if needed
         if not isinstance(image, Image.Image):
         }
         return analysis, scene_info
     except Exception as e:
+        print(f"Error in analyze_image_with_zephyr: {str(e)}")
         return f"Error analyzing image: {str(e)}", {}
 def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
+    """Use Zephyr with SARA framework for advanced scene analysis"""
+    # Verificar que el modelo está cargado
+    if zephyr_generator is None:
+        # Intenta cargar los modelos si no están cargados
+        success = load_models()
+        if not success:
+            return {
+                'scene_interpretation': "Error: Unable to load text generation model.",
+                'motion_insights': ["Model loading failed. Please try again."],
+                'recommended_approach': "Unable to determine approach due to model loading error."
+            }
+    # Verificar que zephyr_generator tiene el atributo tokenizer
+    if not hasattr(zephyr_generator, 'tokenizer'):
+        return {
+            'scene_interpretation': "Error: Text generation model is not properly initialized.",
+            'motion_insights': ["Model initialization failed. Please restart the application."],
+            'recommended_approach': "Unable to determine approach due to model initialization error."
+        }
+    try:
+        analysis_prompt = f"""<|system|>
 You are a video prompt engineering expert specializing in the SARA framework. Analyze this image description for video creation potential.
 <|user|>
 Image description: "{basic_caption}"
 4. Best prompting approach (SARA vs Gen-4)
 Be concise and practical.
 <|assistant|>"""
+        response = zephyr_generator(
+            analysis_prompt,
+            max_new_tokens=200,
+            do_sample=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.95
+        )
+        # Extract generated text
+        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
+            generated_text = response[0]["generated_text"]
+            # Extraer solo la respuesta del asistente
+            if "<|assistant|>" in generated_text:
+                ai_analysis = generated_text.split("<|assistant|>")[-1].strip()
+            else:
+                # Intentar extraer la última parte del texto si no encontramos la etiqueta
+                ai_analysis = generated_text.split(analysis_prompt)[-1].strip()
+            lines = ai_analysis.split('\n')
+            motion_insights = []
+            recommended_approach = "SARA framework recommended for precise control"
+            for line in lines:
+                if line.strip():
+                    if any(keyword in line.lower() for keyword in ['motion', 'movement', 'camera', 'lighting']):
+                        motion_insights.append(line.strip('- ').strip())
+                    elif 'sara' in line.lower() or 'gen-4' in line.lower():
+                        recommended_approach = line.strip('- ').strip()
+            return {
+                'scene_interpretation': ai_analysis.split('\n')[0] if ai_analysis else "Scene analysis completed",
+                'motion_insights': motion_insights[:6] if motion_insights else ["Smooth cinematic movement", "Steady camera tracking", "Natural lighting transitions"],
+                'recommended_approach': recommended_approach
+            }
+        else:
+            return {
+                'scene_interpretation': "Unable to generate analysis with current model.",
+                'motion_insights': ["Default: Smooth motion", "Default: Stable camera work", "Default: Natural lighting"],
+                'recommended_approach': "SARA framework recommended as default"
+            }
+    except Exception as e:
+        print(f"Error in analyze_scene_with_zephyr: {str(e)}")
+        return {
+            'scene_interpretation': f"Error analyzing scene: {str(e)}",
+            'motion_insights': ["Error occurred during analysis", "Using default recommendations", "Try simplifying the image"],
+            'recommended_approach': "SARA framework recommended (default)"
+        }
 def generate_sample_prompts_with_zephyr(scene_info=None):
+    """Generate sample prompts using Zephyr with SARA framework"""
+    # Verificar que el modelo está cargado
+    if zephyr_generator is None:
+        # Intenta cargar los modelos si no están cargados
+        success = load_models()
+        if not success:
+            return [
+                "Error: Unable to load text generation model. Please try again.",
+                "Default prompt: The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
+                "Default prompt: A dramatic close-up captures the subject's expression as they speak directly to the camera."
+            ]
+    # Verificar que zephyr_generator tiene el atributo tokenizer
+    if not hasattr(zephyr_generator, 'tokenizer'):
+        return [
+            "Error: Text generation model is not properly initialized. Please restart the application.",
+            "Default prompt: The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
+            "Default prompt: A dramatic close-up captures the subject's expression as they speak directly to the camera."
+        ]
     if scene_info and scene_info.get('basic_description'):
+        try:
+            # Use Zephyr to generate contextual prompts
+            context_prompt = f"""<|system|>
 Generate 3 professional video prompts using the SARA framework based on this image analysis.
 <|user|>
 Image description: {scene_info['basic_description']}
 Remember the SARA framework: Subject + Action + Reference + Atmosphere
 <|assistant|>"""
+            response = zephyr_generator(
+                context_prompt,
+                max_new_tokens=200,
+                do_sample=True,
+                temperature=0.8,
+                top_k=50,
+                top_p=0.95
+            )
+            # Extract generated text
+            if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
+                generated_text = response[0]["generated_text"]
+                # Extraer solo la respuesta del asistente
+                if "<|assistant|>" in generated_text:
+                    prompts_text = generated_text.split("<|assistant|>")[-1].strip()
+                else:
+                    # Intentar extraer la última parte del texto si no encontramos la etiqueta
+                    prompts_text = generated_text.split(context_prompt)[-1].strip()
+                # Extract and clean prompts
+                prompts = [p.strip('123.-• ') for p in prompts_text.split('\n') if p.strip()]
+                # Return first 3 clean prompts
+                if len(prompts) >= 3:
+                    return prompts[:3]
+        except Exception as e:
+            print(f"Error in generate_sample_prompts_with_zephyr: {str(e)}")
+            # Continue to fallback prompts if there's an error
     # Fallback prompts if Zephyr fails or no scene info
     base_prompts = [
     return base_prompts
 def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
+    """Optimize user's prompt idea using SARA framework with Zephyr model"""
     if not user_idea.strip():
+        return "Please enter your idea first.", "No input provided"
+    # Verificar que el modelo está cargado
+    if zephyr_generator is None:
+        # Intenta cargar los modelos si no están cargados
+        success = load_models()
+        if not success:
+            return "Error: Unable to load text generation model. Please try again or use Retry button.", "Model loading failed"
+    # Verificar que zephyr_generator tiene el atributo tokenizer
+    if not hasattr(zephyr_generator, 'tokenizer'):
+        return ("Error: Text generation model is not properly initialized. Please restart the application or use Retry button.",
+                "Model initialization failed")
     # Create context from scene if available
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
+    try:
+        # Enforce structure based on approach
+        optimization_prompt = f"""<|system|>
 You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
 Key principles:
 - Focus on MOTION, not static description
 Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
 <|assistant|>"""
+        response = zephyr_generator(
+            optimization_prompt,
+            max_new_tokens=100,
+            do_sample=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.95
+        )
+        # Extract optimized prompt
+        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
+            generated_text = response[0]["generated_text"]
+            # Extraer solo la respuesta del asistente
+            if "<|assistant|>" in generated_text:
+                optimized = generated_text.split("<|assistant|>")[-1].strip()
+            else:
+                # Intentar extraer la última parte del texto si no encontramos la etiqueta
+                optimized = generated_text.split(optimization_prompt)[-1].strip()
+            return optimized, "SARA-Zephyr model used successfully"
+        else:
+            return ("Error processing your idea. Please try again with a different description or use Retry button.",
+                    "Invalid model response format")
+    except Exception as e:
+        print(f"Error in optimize_user_prompt_with_zephyr: {str(e)}")
+        return (f"Error generating prompt: {str(e)}. Please try again with a simpler description or use Retry button.",
+                f"Error: {str(e)}")
+def fallback_generate_prompt(user_idea, scene_info=None):
+    """Función de respaldo para generar prompts cuando el modelo principal falla"""
+    if not user_idea.strip():
+        return "Please enter your idea first."
+    try:
+        # Crear un generador de respaldo específico para esta función
+        from transformers import pipeline
+        import torch
+        fallback_generator = pipeline(
+            "text-generation",
+            model="HuggingFaceH4/zephyr-7b-beta",
+            torch_dtype=torch.float32,
+            device_map="auto" if torch.cuda.is_available() else None
+        )
+        # Create context from scene if available
+        context = ""
+        if scene_info and scene_info.get('basic_description'):
+            context = f"Image context: {scene_info['basic_description']}"
+        # Enforce structure based on approach
+        optimization_prompt = f"""<|system|>
+You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
+Key principles:
+- Focus on MOTION, not static description
+- Use positive phrasing
+- Be specific about camera work
+- Include lighting/atmosphere details
+- Follow the SARA structure: Subject + Action + Reference + Atmosphere
+<|user|>
+User's idea: "{user_idea}"
+{context}
+Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
+<|assistant|>"""
+        response = fallback_generator(
+            optimization_prompt,
+            max_new_tokens=100,
+            do_sample=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.95
+        )
+        # Extract optimized prompt
+        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
+            generated_text = response[0]["generated_text"]
+            # Extraer solo la respuesta del asistente
+            if "<|assistant|>" in generated_text:
+                optimized = generated_text.split("<|assistant|>")[-1].strip()
+            else:
+                # Intentar extraer la última parte del texto si no encontramos la etiqueta
+                optimized = generated_text.split(optimization_prompt)[-1].strip()
+            return optimized
+        else:
+            return "Error processing your idea with the fallback model. Here's a template: Subject walks smoothly while camera remains steady, cinematic atmosphere."
+    except Exception as e:
+        print(f"Error in fallback_generate_prompt: {str(e)}")
+        # Generación manual de respaldo en caso de error total
+        words = user_idea.strip().split()
+        if len(words) > 2:
+            subject = "The subject"
+            if "man" in words or "boy" in words:
+                subject = "The man"
+            elif "woman" in words or "girl" in words:
+                subject = "The woman"
+            elif "child" in words or "kid" in words:
+                subject = "The child"
+            action = "moves naturally"
+            for verb in ["walk", "run", "jump", "sit", "stand", "dance", "move", "turn"]:
+                if any(verb in word.lower() for word in words):
+                    action = verb + "s smoothly"
+                    break
+            return f"{subject} {action} while camera remains steady, cinematic atmosphere."
+        else:
+            return "The subject moves naturally while camera remains steady, cinematic atmosphere."
 def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info=None):
+    """Refine a prompt based on user feedback using Zephyr with SARA framework"""
     if not feedback.strip():
         return current_prompt, chat_history
+    # Verificar que el modelo está cargado
+    if zephyr_generator is None:
+        # Intenta cargar los modelos si no están cargados
+        success = load_models()
+        if not success:
+            return "Error: Unable to load text generation model. Please try again.", chat_history
+    # Verificar que zephyr_generator tiene el atributo tokenizer
+    if not hasattr(zephyr_generator, 'tokenizer'):
+        return "Error: Text generation model is not properly initialized. Please restart the application.", chat_history
     # Create refinement context
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
+    try:
+        # Construct Zephyr refinement prompt
+        refinement_prompt = f"""<|system|>
 You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
 Key principles:
 - Focus on MOTION, not static description
 Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
 <|assistant|>"""
+        response = zephyr_generator(
+            refinement_prompt,
+            max_new_tokens=100,
+            do_sample=True,
+            temperature=0.7,
+            top_k=50,
+            top_p=0.95
+        )
+        # Extract refined prompt
+        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
+            generated_text = response[0]["generated_text"]
+            # Extraer solo la respuesta del asistente
+            if "<|assistant|>" in generated_text:
+                refined = generated_text.split("<|assistant|>")[-1].strip()
+            else:
+                # Intentar extraer la última parte del texto si no encontramos la etiqueta
+                refined = generated_text.split(refinement_prompt)[-1].strip()
+            # Update chat history
+            new_chat_history = chat_history + [[feedback, refined]]
+            return refined, new_chat_history
+        else:
+            return current_prompt, chat_history
+    except Exception as e:
+        print(f"Error in refine_prompt_with_zephyr: {str(e)}")
+        return f"Error refining prompt: {str(e)}. Please try again with a simpler request.", chat_history
 def generate_gen4_prompts(scene_info, foundation=""):
     """Generate Gen-4 style prompts iteratively"""
 # Create the Gradio interface
 def create_interface():
     """Create the Gradio interface"""
+    # Asegúrate de cargar los modelos antes de crear la interfaz
+    try:
+        load_models()
+    except Exception as e:
+        print(f"⚠️ Warning: Initial model loading failed: {str(e)}")
+        print("Models will be loaded on demand.")
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Video Prompt Generator") as demo:
         # Header
+        gr.Markdown("# 🎬 AI Video Prompt Generator - 🤖 SARA Framework Powered")
         gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
         # State variables
                             lines=3
                         )
                         optimize_btn = gr.Button("🚀 Generate Optimized Prompt", variant="primary")
+                        with gr.Row():
+                            retry_btn = gr.Button("🔄 Retry with Default Model", variant="secondary")
+                            model_status = gr.Textbox(
+                                label="Model Status",
+                                value="",
+                                interactive=False
+                            )
                         optimized_prompt = gr.Textbox(
                             label="AI-Optimized Video Prompt",
                             lines=4,
         optimize_btn.click(
             fn=optimize_user_prompt_with_zephyr,
             inputs=[user_idea, scene_state],
+            outputs=[optimized_prompt, model_status]
+        )
+        retry_btn.click(
+            fn=lambda idea, scene_info: (fallback_generate_prompt(idea, scene_info), "Using default model"),
+            inputs=[user_idea, scene_state],
+            outputs=[optimized_prompt, model_status]
         )
         refine_btn.click(
             fn=refine_prompt_with_zephyr,
 # Launch the app
 if __name__ == "__main__":
+    print("🎬 Starting AI Video Prompt Generator with SARA LORA Adapter...")
     print(f"📊 Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
     print("🔧 Loading models (this may take a few minutes)...")
     try:
         print(f"❌ Error launching app: {e}")
         print("🔧 Make sure you have sufficient CPU resources and all dependencies installed.")
         print("📦 Required packages:")
+        print("   pip install torch transformers gradio pillow accelerate bitsandbytes peft")
         # Alternative launch attempt
         print("\n🔄 Attempting alternative launch...")
         try:
+            # Intenta instalar las dependencias necesarias
+            import subprocess
+            print("🔄 Installing/updating necessary dependencies...")
+            subprocess.call(["pip", "install", "-U", "transformers", "accelerate", "peft", "huggingface_hub"])
             demo = create_interface()
             demo.launch(
                 share=False,