Ai_Video_Production_Delta

Paused

App Files Files Community

Malaji71 commited on May 20, 2025

Commit

f987206

verified ·

1 Parent(s): 5dbe6d2

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -455

app.py CHANGED Viewed

@@ -1,17 +1,7 @@
 import gradio as gr
 import torch
-from transformers import BlipProcessor, BlipForConditionalGeneration, pipeline
-from PIL import Image
-import random
-import os
-# Instalar dependencias necesarias si no están presentes
-try:
-    import peft
-except ImportError:
-    print("Instalando peft...")
-    os.system("pip install -q peft")
-    import peft
 # Check GPU availability
 use_gpu = torch.cuda.is_available()
@@ -22,7 +12,7 @@ processor, model, zephyr_generator = None, None, None
 def load_models():
     """Load models only when needed"""
     global processor, model, zephyr_generator
-    try:
         print("Loading BLIP model...")
         processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
         model = BlipForConditionalGeneration.from_pretrained(
@@ -30,92 +20,40 @@ def load_models():
             torch_dtype=torch.float32  # Use float32 for CPU
         )
         print("✅ BLIP model loaded successfully!")
-        print("Loading SARA-Zephyr adapter model...")
         try:
-            # Cargar el modelo base Zephyr primero
-            from transformers import AutoModelForCausalLM, AutoTokenizer
-            from peft import PeftModel, PeftConfig
-            # Cargar tokenizer del modelo base
-            tokenizer = AutoTokenizer.from_pretrained("HuggingFaceH4/zephyr-7b-beta")
-            # Cargar modelo base
-            base_model = AutoModelForCausalLM.from_pretrained(
-                "HuggingFaceH4/zephyr-7b-beta",
-                torch_dtype=torch.float32,
-                device_map="auto" if use_gpu else None
-            )
-            # Cargar configuración del adaptador
-            try:
-                # Si está usando un repositorio en HuggingFace
-                adapter_config = PeftConfig.from_pretrained("Malaji71/SARA-Zephyr")
-                # Cargar el adaptador sobre el modelo base
-                peft_model = PeftModel.from_pretrained(
-                    base_model,
-                    "Malaji71/SARA-Zephyr"
-                )
-                print("✅ PEFT adapter loaded from HuggingFace!")
-            except Exception as e:
-                print(f"Error loading from HuggingFace: {str(e)}")
-                print("Trying to load adapter locally...")
-                # Intentar cargar localmente si está disponible
-                local_adapter_path = "./SARA-Zephyr"  # Ajustar según sea necesario
-                try:
-                    adapter_config = PeftConfig.from_pretrained(local_adapter_path)
-                    peft_model = PeftModel.from_pretrained(
-                        base_model,
-                        local_adapter_path
-                    )
-                    print("✅ PEFT adapter loaded locally!")
-                except Exception as e2:
-                    print(f"Error loading adapter locally: {str(e2)}")
-                    print("Falling back to base model...")
-                    peft_model = base_model
-            # Crear pipeline con el modelo adaptado
-            zephyr_generator = pipeline(
-                "text-generation",
-                model=peft_model,
-                tokenizer=tokenizer,
-                torch_dtype=torch.float32
-            )
-            # Verificar que el pipeline se haya creado correctamente
-            if zephyr_generator is None or not hasattr(zephyr_generator, 'tokenizer'):
-                raise ValueError("Pipeline creation failed or doesn't have tokenizer attribute")
-            print("✅ SARA-Zephyr adapter model loaded successfully!")
-            return True
         except Exception as e:
-            print(f"Error loading SARA-Zephyr adapter: {str(e)}")
-            print("Falling back to standard Zephyr model...")
-            # Modelo de respaldo en caso de error
-            zephyr_generator = pipeline(
-                "text-generation",
-                model="HuggingFaceH4/zephyr-7b-beta",
-                torch_dtype=torch.float32,
-                device_map="auto" if use_gpu else None
-            )
-            # Verificar que el pipeline de respaldo se haya creado correctamente
-            if zephyr_generator is None or not hasattr(zephyr_generator, 'tokenizer'):
-                raise ValueError("Fallback pipeline creation failed or doesn't have tokenizer attribute")
-            print("✅ Fallback Zephyr model loaded successfully!")
-            return True
-    except Exception as e:
-        print(f"❌ Critical error loading models: {str(e)}")
-        return False
 # Universal Video Prompting Guide combining Gen-4 + SARA
 unified_instructions = """
@@ -154,17 +92,10 @@ def analyze_image_with_zephyr(image):
         return "Please upload an image first.", {}
     try:
         # Lazy load models
-        load_success = load_models()
-        if not load_success:
-            return "Error: Model loading failed. Please try again later.", {}
-        if processor is None or model is None:
-            return "Error: Image analysis model failed to load. Please try again.", {}
         # Convert to PIL if needed
         if not isinstance(image, Image.Image):
             image = Image.fromarray(image)
         # Get image dimensions
         width, height = image.size
         aspect_ratio = width / height
@@ -174,15 +105,12 @@ def analyze_image_with_zephyr(image):
             composition = "Vertical portrait shot"
         else:
             composition = "Balanced composition"
         # Generate caption with BLIP
         inputs = processor(image, return_tensors="pt")
         out = model.generate(**inputs, max_length=50, num_beams=3)
         basic_caption = processor.decode(out[0], skip_special_tokens=True)
         # Use Zephyr for advanced analysis
         enhanced_analysis = analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition)
         # Create comprehensive analysis
         analysis = f"""📊 **Image Analysis:**
 • **Dimensions**: {width} x {height}
@@ -196,7 +124,6 @@ def analyze_image_with_zephyr(image):
 {chr(10).join(f"• {insight}" for insight in enhanced_analysis['motion_insights'])}
 🎯 **Recommended Approach**:
 {enhanced_analysis['recommended_approach']}"""
         # Scene info for prompt generation
         scene_info = {
             'basic_description': basic_caption,
@@ -206,32 +133,11 @@ def analyze_image_with_zephyr(image):
         }
         return analysis, scene_info
     except Exception as e:
-        print(f"Error in analyze_image_with_zephyr: {str(e)}")
         return f"Error analyzing image: {str(e)}", {}
 def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
-    """Use Zephyr with SARA framework for advanced scene analysis"""
-    # Verificar que el modelo está cargado
-    if zephyr_generator is None:
-        # Intenta cargar los modelos si no están cargados
-        success = load_models()
-        if not success:
-            return {
-                'scene_interpretation': "Error: Unable to load text generation model.",
-                'motion_insights': ["Model loading failed. Please try again."],
-                'recommended_approach': "Unable to determine approach due to model loading error."
-            }
-    # Verificar que zephyr_generator tiene el atributo tokenizer
-    if not hasattr(zephyr_generator, 'tokenizer'):
-        return {
-            'scene_interpretation': "Error: Text generation model is not properly initialized.",
-            'motion_insights': ["Model initialization failed. Please restart the application."],
-            'recommended_approach': "Unable to determine approach due to model initialization error."
-        }
-    try:
-        analysis_prompt = f"""<|system|>
 You are a video prompt engineering expert specializing in the SARA framework. Analyze this image description for video creation potential.
 <|user|>
 Image description: "{basic_caption}"
@@ -244,82 +150,34 @@ Please provide:
 4. Best prompting approach (SARA vs Gen-4)
 Be concise and practical.
 <|assistant|>"""
-        response = zephyr_generator(
-            analysis_prompt,
-            max_new_tokens=200,
-            do_sample=True,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.95
-        )
-        # Extract generated text
-        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
-            generated_text = response[0]["generated_text"]
-            # Extraer solo la respuesta del asistente
-            if "<|assistant|>" in generated_text:
-                ai_analysis = generated_text.split("<|assistant|>")[-1].strip()
-            else:
-                # Intentar extraer la última parte del texto si no encontramos la etiqueta
-                ai_analysis = generated_text.split(analysis_prompt)[-1].strip()
-            lines = ai_analysis.split('\n')
-            motion_insights = []
-            recommended_approach = "SARA framework recommended for precise control"
-            for line in lines:
-                if line.strip():
-                    if any(keyword in line.lower() for keyword in ['motion', 'movement', 'camera', 'lighting']):
-                        motion_insights.append(line.strip('- ').strip())
-                    elif 'sara' in line.lower() or 'gen-4' in line.lower():
-                        recommended_approach = line.strip('- ').strip()
-            return {
-                'scene_interpretation': ai_analysis.split('\n')[0] if ai_analysis else "Scene analysis completed",
-                'motion_insights': motion_insights[:6] if motion_insights else ["Smooth cinematic movement", "Steady camera tracking", "Natural lighting transitions"],
-                'recommended_approach': recommended_approach
-            }
-        else:
-            return {
-                'scene_interpretation': "Unable to generate analysis with current model.",
-                'motion_insights': ["Default: Smooth motion", "Default: Stable camera work", "Default: Natural lighting"],
-                'recommended_approach': "SARA framework recommended as default"
-            }
-    except Exception as e:
-        print(f"Error in analyze_scene_with_zephyr: {str(e)}")
-        return {
-            'scene_interpretation': f"Error analyzing scene: {str(e)}",
-            'motion_insights': ["Error occurred during analysis", "Using default recommendations", "Try simplifying the image"],
-            'recommended_approach': "SARA framework recommended (default)"
-        }
 def generate_sample_prompts_with_zephyr(scene_info=None):
-    """Generate sample prompts using Zephyr with SARA framework"""
-    # Verificar que el modelo está cargado
-    if zephyr_generator is None:
-        # Intenta cargar los modelos si no están cargados
-        success = load_models()
-        if not success:
-            return [
-                "Error: Unable to load text generation model. Please try again.",
-                "Default prompt: The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
-                "Default prompt: A dramatic close-up captures the subject's expression as they speak directly to the camera."
-            ]
-    # Verificar que zephyr_generator tiene el atributo tokenizer
-    if not hasattr(zephyr_generator, 'tokenizer'):
-        return [
-            "Error: Text generation model is not properly initialized. Please restart the application.",
-            "Default prompt: The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
-            "Default prompt: A dramatic close-up captures the subject's expression as they speak directly to the camera."
-        ]
     if scene_info and scene_info.get('basic_description'):
-        try:
-            # Use Zephyr to generate contextual prompts
-            context_prompt = f"""<|system|>
 Generate 3 professional video prompts using the SARA framework based on this image analysis.
 <|user|>
 Image description: {scene_info['basic_description']}
@@ -327,35 +185,19 @@ Composition: {scene_info.get('composition', 'Balanced')}
 Aspect Ratio: {scene_info.get('aspect_ratio', 'N/A'):.2f}
 Remember the SARA framework: Subject + Action + Reference + Atmosphere
 <|assistant|>"""
-            response = zephyr_generator(
-                context_prompt,
-                max_new_tokens=200,
-                do_sample=True,
-                temperature=0.8,
-                top_k=50,
-                top_p=0.95
-            )
-            # Extract generated text
-            if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
-                generated_text = response[0]["generated_text"]
-                # Extraer solo la respuesta del asistente
-                if "<|assistant|>" in generated_text:
-                    prompts_text = generated_text.split("<|assistant|>")[-1].strip()
-                else:
-                    # Intentar extraer la última parte del texto si no encontramos la etiqueta
-                    prompts_text = generated_text.split(context_prompt)[-1].strip()
-                # Extract and clean prompts
-                prompts = [p.strip('123.-• ') for p in prompts_text.split('\n') if p.strip()]
-                # Return first 3 clean prompts
-                if len(prompts) >= 3:
-                    return prompts[:3]
-        except Exception as e:
-            print(f"Error in generate_sample_prompts_with_zephyr: {str(e)}")
-            # Continue to fallback prompts if there's an error
     # Fallback prompts if Zephyr fails or no scene info
     base_prompts = [
         "The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
@@ -365,30 +207,15 @@ Remember the SARA framework: Subject + Action + Reference + Atmosphere
     return base_prompts
 def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
-    """Optimize user's prompt idea using SARA framework with Zephyr model"""
     if not user_idea.strip():
-        return "Please enter your idea first.", "No input provided"
-    # Verificar que el modelo está cargado
-    if zephyr_generator is None:
-        # Intenta cargar los modelos si no están cargados
-        success = load_models()
-        if not success:
-            return "Error: Unable to load text generation model. Please try again or use Retry button.", "Model loading failed"
-    # Verificar que zephyr_generator tiene el atributo tokenizer
-    if not hasattr(zephyr_generator, 'tokenizer'):
-        return ("Error: Text generation model is not properly initialized. Please restart the application or use Retry button.",
-                "Model initialization failed")
     # Create context from scene if available
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
-    try:
-        # Enforce structure based on approach
-        optimization_prompt = f"""<|system|>
 You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
 Key principles:
 - Focus on MOTION, not static description
@@ -401,140 +228,27 @@ User's idea: "{user_idea}"
 {context}
 Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
 <|assistant|>"""
-        response = zephyr_generator(
-            optimization_prompt,
-            max_new_tokens=100,
-            do_sample=True,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.95
-        )
-        # Extract optimized prompt
-        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
-            generated_text = response[0]["generated_text"]
-            # Extraer solo la respuesta del asistente
-            if "<|assistant|>" in generated_text:
-                optimized = generated_text.split("<|assistant|>")[-1].strip()
-            else:
-                # Intentar extraer la última parte del texto si no encontramos la etiqueta
-                optimized = generated_text.split(optimization_prompt)[-1].strip()
-            return optimized, "SARA-Zephyr model used successfully"
-        else:
-            return ("Error processing your idea. Please try again with a different description or use Retry button.",
-                    "Invalid model response format")
-    except Exception as e:
-        print(f"Error in optimize_user_prompt_with_zephyr: {str(e)}")
-        return (f"Error generating prompt: {str(e)}. Please try again with a simpler description or use Retry button.",
-                f"Error: {str(e)}")
-def fallback_generate_prompt(user_idea, scene_info=None):
-    """Función de respaldo para generar prompts cuando el modelo principal falla"""
-    if not user_idea.strip():
-        return "Please enter your idea first."
-    try:
-        # Crear un generador de respaldo específico para esta función
-        from transformers import pipeline
-        import torch
-        fallback_generator = pipeline(
-            "text-generation",
-            model="HuggingFaceH4/zephyr-7b-beta",
-            torch_dtype=torch.float32,
-            device_map="auto" if torch.cuda.is_available() else None
-        )
-        # Create context from scene if available
-        context = ""
-        if scene_info and scene_info.get('basic_description'):
-            context = f"Image context: {scene_info['basic_description']}"
-        # Enforce structure based on approach
-        optimization_prompt = f"""<|system|>
-You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
-Key principles:
-- Focus on MOTION, not static description
-- Use positive phrasing
-- Be specific about camera work
-- Include lighting/atmosphere details
-- Follow the SARA structure: Subject + Action + Reference + Atmosphere
-<|user|>
-User's idea: "{user_idea}"
-{context}
-Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
-<|assistant|>"""
-        response = fallback_generator(
-            optimization_prompt,
-            max_new_tokens=100,
-            do_sample=True,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.95
-        )
-        # Extract optimized prompt
-        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
-            generated_text = response[0]["generated_text"]
-            # Extraer solo la respuesta del asistente
-            if "<|assistant|>" in generated_text:
-                optimized = generated_text.split("<|assistant|>")[-1].strip()
-            else:
-                # Intentar extraer la última parte del texto si no encontramos la etiqueta
-                optimized = generated_text.split(optimization_prompt)[-1].strip()
-            return optimized
-        else:
-            return "Error processing your idea with the fallback model. Here's a template: Subject walks smoothly while camera remains steady, cinematic atmosphere."
-    except Exception as e:
-        print(f"Error in fallback_generate_prompt: {str(e)}")
-        # Generación manual de respaldo en caso de error total
-        words = user_idea.strip().split()
-        if len(words) > 2:
-            subject = "The subject"
-            if "man" in words or "boy" in words:
-                subject = "The man"
-            elif "woman" in words or "girl" in words:
-                subject = "The woman"
-            elif "child" in words or "kid" in words:
-                subject = "The child"
-            action = "moves naturally"
-            for verb in ["walk", "run", "jump", "sit", "stand", "dance", "move", "turn"]:
-                if any(verb in word.lower() for word in words):
-                    action = verb + "s smoothly"
-                    break
-            return f"{subject} {action} while camera remains steady, cinematic atmosphere."
-        else:
-            return "The subject moves naturally while camera remains steady, cinematic atmosphere."
 def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info=None):
-    """Refine a prompt based on user feedback using Zephyr with SARA framework"""
     if not feedback.strip():
         return current_prompt, chat_history
-    # Verificar que el modelo está cargado
-    if zephyr_generator is None:
-        # Intenta cargar los modelos si no están cargados
-        success = load_models()
-        if not success:
-            return "Error: Unable to load text generation model. Please try again.", chat_history
-    # Verificar que zephyr_generator tiene el atributo tokenizer
-    if not hasattr(zephyr_generator, 'tokenizer'):
-        return "Error: Text generation model is not properly initialized. Please restart the application.", chat_history
     # Create refinement context
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
-    try:
-        # Construct Zephyr refinement prompt
-        refinement_prompt = f"""<|system|>
 You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
 Key principles:
 - Focus on MOTION, not static description
@@ -548,35 +262,18 @@ Feedback: "{feedback}"
 {context}
 Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
 <|assistant|>"""
-        response = zephyr_generator(
-            refinement_prompt,
-            max_new_tokens=100,
-            do_sample=True,
-            temperature=0.7,
-            top_k=50,
-            top_p=0.95
-        )
-        # Extract refined prompt
-        if isinstance(response, list) and len(response) > 0 and "generated_text" in response[0]:
-            generated_text = response[0]["generated_text"]
-            # Extraer solo la respuesta del asistente
-            if "<|assistant|>" in generated_text:
-                refined = generated_text.split("<|assistant|>")[-1].strip()
-            else:
-                # Intentar extraer la última parte del texto si no encontramos la etiqueta
-                refined = generated_text.split(refinement_prompt)[-1].strip()
-            # Update chat history
-            new_chat_history = chat_history + [[feedback, refined]]
-            return refined, new_chat_history
-        else:
-            return current_prompt, chat_history
-    except Exception as e:
-        print(f"Error in refine_prompt_with_zephyr: {str(e)}")
-        return f"Error refining prompt: {str(e)}. Please try again with a simpler request.", chat_history
 def generate_gen4_prompts(scene_info, foundation=""):
     """Generate Gen-4 style prompts iteratively"""
@@ -592,7 +289,6 @@ def generate_gen4_prompts(scene_info, foundation=""):
                 subject = "The person"
             else:
                 subject = "The subject"
             # Generate actions based on scene
             if any(word in description.lower() for word in ['sitting', 'seated']):
                 actions = ['speaks to camera', 'gestures while seated', 'leans forward', 'adjusts posture']
@@ -600,14 +296,11 @@ def generate_gen4_prompts(scene_info, foundation=""):
                 actions = ['speaks directly', 'gestures naturally', 'shifts weight', 'looks around']
             else:
                 actions = ['moves forward', 'turns slightly', 'gestures', 'demonstrates']
             action = random.choice(actions)
             # Build Gen-4 iteratively
             basic = f"{subject} {action}"
             with_motion = f"{basic} smoothly"
             with_camera = f"{with_motion}. Camera captures steadily"
             # Add style based on composition
             composition = scene_info.get('composition', '')
             if 'Wide' in composition:
@@ -616,9 +309,7 @@ def generate_gen4_prompts(scene_info, foundation=""):
                 style_addition = "Intimate portrait lighting"
             else:
                 style_addition = "Professional documentary style"
             with_style = f"{with_camera}. {style_addition}."
             return f"""🚀 **Gen-4 Iterative Building:**
 **Basic**: {basic}
 **+ Motion**: {with_motion}
@@ -640,7 +331,6 @@ def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion,
         parts = []
         if foundation:
             parts.append(foundation)
         # Add motion elements
         motion_parts = []
         if subject_motion:
@@ -649,17 +339,14 @@ def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion,
             motion_parts.extend(scene_motion)
         if motion_parts:
             parts.append(", ".join(motion_parts))
         # Reference (camera stability)
         if camera_motion:
             parts.append(f"while {camera_motion}")
         else:
             parts.append("while background remains steady")
         # Atmosphere
         if style:
             parts.append(style)
         return " ".join(parts)
     else:  # Gen-4 style
         # Gen-4 Structure: Simple iterative building
@@ -674,28 +361,18 @@ def build_custom_prompt(foundation, subject_motion, scene_motion, camera_motion,
             parts.extend(scene_motion)
         if style:
             parts.append(style)
         return ". ".join(parts) if parts else "The subject moves naturally"
 # Create the Gradio interface
 def create_interface():
     """Create the Gradio interface"""
-    # Asegúrate de cargar los modelos antes de crear la interfaz
-    try:
-        load_models()
-    except Exception as e:
-        print(f"⚠️ Warning: Initial model loading failed: {str(e)}")
-        print("Models will be loaded on demand.")
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Video Prompt Generator") as demo:
         # Header
-        gr.Markdown("# 🎬 AI Video Prompt Generator - 🤖 SARA Framework Powered")
         gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
         # State variables
         scene_state = gr.State({})
         chat_history_state = gr.State([])
         with gr.Tabs():
             # Tab 1: Learning Guide
             with gr.Tab("📚 Prompting Guide"):
@@ -713,7 +390,6 @@ def create_interface():
                     - **Camera Motion**: Pan, tilt, dolly, zoom, orbit, tracking
                     - **Environmental**: Wind, water flow, particle effects, lighting changes
                     """)
             # Tab 2: Image Analysis
             with gr.Tab("📷 Image Analysis"):
                 with gr.Row():
@@ -725,7 +401,6 @@ def create_interface():
                         analyze_btn = gr.Button("🔍 Analyze Image", variant="primary")
                     with gr.Column(scale=2):
                         analysis_output = gr.Markdown(label="AI Analysis Results")
                 # Sample prompts section
                 with gr.Group():
                     gr.Markdown("### 💡 Sample Prompts")
@@ -739,7 +414,6 @@ def create_interface():
                         )
                         for i in range(3)
                     ]
             # Tab 3: AI Prompt Generator
             with gr.Tab("🤖 AI Prompt Generator"):
                 with gr.Row():
@@ -750,13 +424,6 @@ def create_interface():
                             lines=3
                         )
                         optimize_btn = gr.Button("🚀 Generate Optimized Prompt", variant="primary")
-                        with gr.Row():
-                            retry_btn = gr.Button("🔄 Retry with Default Model", variant="secondary")
-                            model_status = gr.Textbox(
-                                label="Model Status",
-                                value="",
-                                interactive=False
-                            )
                         optimized_prompt = gr.Textbox(
                             label="AI-Optimized Video Prompt",
                             lines=4,
@@ -774,7 +441,6 @@ def create_interface():
                         # Chat history
                         with gr.Accordion("💬 Refinement History", open=False):
                             chat_display = gr.Chatbot(height=300, type='messages')
             # Tab 4: Gen-4 Method
             with gr.Tab("📝 Gen-4 Official"):
                 gr.Markdown("*Official Gen-4 method: Simple → Complex building*")
@@ -791,7 +457,6 @@ def create_interface():
                     interactive=False,
                     show_copy_button=True
                 )
             # Tab 5: Custom Builder
             with gr.Tab("🛠️ Custom Builder"):
                 gr.Markdown("## Build Your Custom Prompt")
@@ -834,7 +499,6 @@ def create_interface():
                     interactive=True,
                     show_copy_button=True
                 )
         # Event handlers
         analyze_btn.click(
             fn=analyze_image_with_zephyr,
@@ -849,12 +513,7 @@ def create_interface():
         optimize_btn.click(
             fn=optimize_user_prompt_with_zephyr,
             inputs=[user_idea, scene_state],
-            outputs=[optimized_prompt, model_status]
-        )
-        retry_btn.click(
-            fn=lambda idea, scene_info: (fallback_generate_prompt(idea, scene_info), "Using default model"),
-            inputs=[user_idea, scene_state],
-            outputs=[optimized_prompt, model_status]
         )
         refine_btn.click(
             fn=refine_prompt_with_zephyr,
@@ -881,7 +540,7 @@ def create_interface():
 # Launch the app
 if __name__ == "__main__":
-    print("🎬 Starting AI Video Prompt Generator with SARA LORA Adapter...")
     print(f"📊 Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
     print("🔧 Loading models (this may take a few minutes)...")
     try:
@@ -899,15 +558,10 @@ if __name__ == "__main__":
         print(f"❌ Error launching app: {e}")
         print("🔧 Make sure you have sufficient CPU resources and all dependencies installed.")
         print("📦 Required packages:")
-        print("   pip install torch transformers gradio pillow accelerate bitsandbytes peft")
         # Alternative launch attempt
         print("\n🔄 Attempting alternative launch...")
         try:
-            # Intenta instalar las dependencias necesarias
-            import subprocess
-            print("🔄 Installing/updating necessary dependencies...")
-            subprocess.call(["pip", "install", "-U", "transformers", "accelerate", "peft", "huggingface_hub"])
             demo = create_interface()
             demo.launch(
                 share=False,

 import gradio as gr
 import torch
+from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
+from peft import PeftModel
 # Check GPU availability
 use_gpu = torch.cuda.is_available()
 def load_models():
     """Load models only when needed"""
     global processor, model, zephyr_generator
+    if processor is None or model is None or zephyr_generator is None:
         print("Loading BLIP model...")
         processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-large")
         model = BlipForConditionalGeneration.from_pretrained(
             torch_dtype=torch.float32  # Use float32 for CPU
         )
         print("✅ BLIP model loaded successfully!")
+        print("Loading SARA-Zephyr fine-tuned model...")
+        # Load base model
+        base_model_id = "HuggingFaceH4/zephyr-7b-beta"
+        base_model = AutoModelForCausalLM.from_pretrained(
+            base_model_id,
+            torch_dtype=torch.float16 if use_gpu else torch.float32,  # Use float16 for GPU
+            device_map="auto" if use_gpu else None
+        )
+        # Apply LoRA adapters
+        lora_model_id = "Malaji71/SARA-Zephyr"
         try:
+            model_with_lora = PeftModel.from_pretrained(base_model, lora_model_id)
+            print("✅ LoRA adapters applied successfully.")
         except Exception as e:
+            print(f"❌ Error applying LoRA adapters: {str(e)}")
+            raise ValueError("Failed to apply LoRA adapters.")
+        # Load tokenizer
+        tokenizer = AutoTokenizer.from_pretrained(base_model_id)
+        # Create pipeline for text generation
+        zephyr_generator = pipeline(
+            "text-generation",
+            model=model_with_lora,
+            tokenizer=tokenizer,
+            max_new_tokens=128,
+            temperature=0.7,
+            top_p=0.95,
+            repetition_penalty=1.15,
+            device_map="auto" if use_gpu else None
+        )
+        print("✅ SARA-Zephyr fine-tuned model loaded successfully!")
 # Universal Video Prompting Guide combining Gen-4 + SARA
 unified_instructions = """
         return "Please upload an image first.", {}
     try:
         # Lazy load models
+        load_models()
         # Convert to PIL if needed
         if not isinstance(image, Image.Image):
             image = Image.fromarray(image)
         # Get image dimensions
         width, height = image.size
         aspect_ratio = width / height
             composition = "Vertical portrait shot"
         else:
             composition = "Balanced composition"
         # Generate caption with BLIP
         inputs = processor(image, return_tensors="pt")
         out = model.generate(**inputs, max_length=50, num_beams=3)
         basic_caption = processor.decode(out[0], skip_special_tokens=True)
         # Use Zephyr for advanced analysis
         enhanced_analysis = analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition)
         # Create comprehensive analysis
         analysis = f"""📊 **Image Analysis:**
 • **Dimensions**: {width} x {height}
 {chr(10).join(f"• {insight}" for insight in enhanced_analysis['motion_insights'])}
 🎯 **Recommended Approach**:
 {enhanced_analysis['recommended_approach']}"""
         # Scene info for prompt generation
         scene_info = {
             'basic_description': basic_caption,
         }
         return analysis, scene_info
     except Exception as e:
         return f"Error analyzing image: {str(e)}", {}
 def analyze_scene_with_zephyr(basic_caption, aspect_ratio, composition):
+    """Use SARA-Zephyr for advanced scene analysis"""
+    analysis_prompt = f"""<|system|>
 You are a video prompt engineering expert specializing in the SARA framework. Analyze this image description for video creation potential.
 <|user|>
 Image description: "{basic_caption}"
 4. Best prompting approach (SARA vs Gen-4)
 Be concise and practical.
 <|assistant|>"""
+    response = zephyr_generator(
+        analysis_prompt,
+        max_new_tokens=200,
+        do_sample=True,
+        temperature=0.7,
+        pad_token_id=zephyr_generator.tokenizer.eos_token_id
+    )
+    ai_analysis = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
+    lines = ai_analysis.split('\n')
+    motion_insights = []
+    recommended_approach = "SARA framework recommended for precise control"
+    for line in lines:
+        if line.strip():
+            if any(keyword in line.lower() for keyword in ['motion', 'movement', 'camera', 'lighting']):
+                motion_insights.append(line.strip('- ').strip())
+            elif 'sara' in line.lower() or 'gen-4' in line.lower():
+                recommended_approach = line.strip('- ').strip()
+    return {
+        'scene_interpretation': ai_analysis.split('\n')[0] if ai_analysis else "Scene analysis completed",
+        'motion_insights': motion_insights[:6],
+        'recommended_approach': recommended_approach
+    }
 def generate_sample_prompts_with_zephyr(scene_info=None):
+    """Generate sample prompts using SARA-Zephyr"""
     if scene_info and scene_info.get('basic_description'):
+        # Use Zephyr to generate contextual prompts
+        context_prompt = f"""<|system|>
 Generate 3 professional video prompts using the SARA framework based on this image analysis.
 <|user|>
 Image description: {scene_info['basic_description']}
 Aspect Ratio: {scene_info.get('aspect_ratio', 'N/A'):.2f}
 Remember the SARA framework: Subject + Action + Reference + Atmosphere
 <|assistant|>"""
+        response = zephyr_generator(
+            context_prompt,
+            max_new_tokens=200,
+            do_sample=True,
+            temperature=0.8,
+            pad_token_id=zephyr_generator.tokenizer.eos_token_id
+        )
+        # Extract and clean prompts
+        prompts_text = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
+        prompts = [p.strip('123.-• ') for p in prompts_text.split('\n') if p.strip()]
+        # Return first 3 clean prompts
+        if len(prompts) >= 3:
+            return prompts[:3]
     # Fallback prompts if Zephyr fails or no scene info
     base_prompts = [
         "The subject walks forward smoothly while the background remains steady, cinematic atmosphere.",
     return base_prompts
 def optimize_user_prompt_with_zephyr(user_idea, scene_info=None):
+    """Optimize user's prompt idea using SARA-Zephyr while respecting SARA/Gen-4 structure"""
     if not user_idea.strip():
+        return "Please enter your idea first."
     # Create context from scene if available
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
+    # Enforce structure based on approach
+    optimization_prompt = f"""<|system|>
 You are an expert in video prompting, specializing in the SARA framework. Transform user ideas into professional prompts compatible with AI video models like Sora, Gen-4, Pika, Runway, and Luma.
 Key principles:
 - Focus on MOTION, not static description
 {context}
 Please create an optimized video prompt using the SARA framework. Respond with just the prompt.
 <|assistant|>"""
+    response = zephyr_generator(
+        optimization_prompt,
+        max_new_tokens=100,
+        do_sample=True,
+        temperature=0.7,
+        pad_token_id=zephyr_generator.tokenizer.eos_token_id
+    )
+    # Extract optimized prompt
+    optimized = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
+    return optimized
 def refine_prompt_with_zephyr(current_prompt, feedback, chat_history, scene_info=None):
+    """Refine a prompt based on user feedback using SARA-Zephyr"""
     if not feedback.strip():
         return current_prompt, chat_history
     # Create refinement context
     context = ""
     if scene_info and scene_info.get('basic_description'):
         context = f"Image context: {scene_info['basic_description']}"
+    # Construct Zephyr refinement prompt
+    refinement_prompt = f"""<|system|>
 You are an expert in refining video prompts using the SARA framework. Based on the user's feedback, improve the current prompt while maintaining its core structure.
 Key principles:
 - Focus on MOTION, not static description
 {context}
 Please refine the prompt while keeping it under 100 words. Respond with just the refined prompt.
 <|assistant|>"""
+    response = zephyr_generator(
+        refinement_prompt,
+        max_new_tokens=100,
+        do_sample=True,
+        temperature=0.7,
+        pad_token_id=zephyr_generator.tokenizer.eos_token_id
+    )
+    # Extract refined prompt
+    refined = response[0]['generated_text'].split("<|assistant|>")[-1].strip()
+    # Update chat history
+    new_chat_history = chat_history + [[feedback, refined]]
+    return refined, new_chat_history
 def generate_gen4_prompts(scene_info, foundation=""):
     """Generate Gen-4 style prompts iteratively"""
                 subject = "The person"
             else:
                 subject = "The subject"
             # Generate actions based on scene
             if any(word in description.lower() for word in ['sitting', 'seated']):
                 actions = ['speaks to camera', 'gestures while seated', 'leans forward', 'adjusts posture']
                 actions = ['speaks directly', 'gestures naturally', 'shifts weight', 'looks around']
             else:
                 actions = ['moves forward', 'turns slightly', 'gestures', 'demonstrates']
             action = random.choice(actions)
             # Build Gen-4 iteratively
             basic = f"{subject} {action}"
             with_motion = f"{basic} smoothly"
             with_camera = f"{with_motion}. Camera captures steadily"
             # Add style based on composition
             composition = scene_info.get('composition', '')
             if 'Wide' in composition:
                 style_addition = "Intimate portrait lighting"
             else:
                 style_addition = "Professional documentary style"
             with_style = f"{with_camera}. {style_addition}."
             return f"""🚀 **Gen-4 Iterative Building:**
 **Basic**: {basic}
 **+ Motion**: {with_motion}
         parts = []
         if foundation:
             parts.append(foundation)
         # Add motion elements
         motion_parts = []
         if subject_motion:
             motion_parts.extend(scene_motion)
         if motion_parts:
             parts.append(", ".join(motion_parts))
         # Reference (camera stability)
         if camera_motion:
             parts.append(f"while {camera_motion}")
         else:
             parts.append("while background remains steady")
         # Atmosphere
         if style:
             parts.append(style)
         return " ".join(parts)
     else:  # Gen-4 style
         # Gen-4 Structure: Simple iterative building
             parts.extend(scene_motion)
         if style:
             parts.append(style)
         return ". ".join(parts) if parts else "The subject moves naturally"
 # Create the Gradio interface
 def create_interface():
     """Create the Gradio interface"""
     with gr.Blocks(theme=gr.themes.Soft(), title="AI Video Prompt Generator") as demo:
         # Header
+        gr.Markdown("# 🎬 AI Video Prompt Generator - 🤖 SARA-Zephyr AI Powered")
         gr.Markdown("*Professional prompts for Sora, Gen-4, Pika, Luma, Runway and more*")
         # State variables
         scene_state = gr.State({})
         chat_history_state = gr.State([])
         with gr.Tabs():
             # Tab 1: Learning Guide
             with gr.Tab("📚 Prompting Guide"):
                     - **Camera Motion**: Pan, tilt, dolly, zoom, orbit, tracking
                     - **Environmental**: Wind, water flow, particle effects, lighting changes
                     """)
             # Tab 2: Image Analysis
             with gr.Tab("📷 Image Analysis"):
                 with gr.Row():
                         analyze_btn = gr.Button("🔍 Analyze Image", variant="primary")
                     with gr.Column(scale=2):
                         analysis_output = gr.Markdown(label="AI Analysis Results")
                 # Sample prompts section
                 with gr.Group():
                     gr.Markdown("### 💡 Sample Prompts")
                         )
                         for i in range(3)
                     ]
             # Tab 3: AI Prompt Generator
             with gr.Tab("🤖 AI Prompt Generator"):
                 with gr.Row():
                             lines=3
                         )
                         optimize_btn = gr.Button("🚀 Generate Optimized Prompt", variant="primary")
                         optimized_prompt = gr.Textbox(
                             label="AI-Optimized Video Prompt",
                             lines=4,
                         # Chat history
                         with gr.Accordion("💬 Refinement History", open=False):
                             chat_display = gr.Chatbot(height=300, type='messages')
             # Tab 4: Gen-4 Method
             with gr.Tab("📝 Gen-4 Official"):
                 gr.Markdown("*Official Gen-4 method: Simple → Complex building*")
                     interactive=False,
                     show_copy_button=True
                 )
             # Tab 5: Custom Builder
             with gr.Tab("🛠️ Custom Builder"):
                 gr.Markdown("## Build Your Custom Prompt")
                     interactive=True,
                     show_copy_button=True
                 )
         # Event handlers
         analyze_btn.click(
             fn=analyze_image_with_zephyr,
         optimize_btn.click(
             fn=optimize_user_prompt_with_zephyr,
             inputs=[user_idea, scene_state],
+            outputs=[optimized_prompt]
         )
         refine_btn.click(
             fn=refine_prompt_with_zephyr,
 # Launch the app
 if __name__ == "__main__":
+    print("🎬 Starting AI Video Prompt Generator with SARA-Zephyr...")
     print(f"📊 Status: {'GPU' if use_gpu else 'CPU'} Mode Enabled")
     print("🔧 Loading models (this may take a few minutes)...")
     try:
         print(f"❌ Error launching app: {e}")
         print("🔧 Make sure you have sufficient CPU resources and all dependencies installed.")
         print("📦 Required packages:")
+        print("   pip install torch transformers gradio pillow accelerate bitsandbytes")
         # Alternative launch attempt
         print("\n🔄 Attempting alternative launch...")
         try:
             demo = create_interface()
             demo.launch(
                 share=False,