Spaces:

Carlosxxxxxx
/

Aduc-sdr-cinematic-video

Build error

App Files Files Community

euiia commited on Sep 4, 2025

Commit

98772e2

verified ·

1 Parent(s): f022eee

Update engineers/deformes4D.py

Browse files

Files changed (1) hide show

engineers/deformes4D.py +46 -16

engineers/deformes4D.py CHANGED Viewed

@@ -1,13 +1,13 @@
-# engineers/deformes4D_engine.py
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
-# Version: 2.1.0
 #
 # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
-# "Film Crew" specialist within the ADUC-SDR architecture. It has been refactored
-# to delegate all VAE operations to the dedicated VaeManager, cleaning up its own
-# logic and adhering to the specialist-based architecture.
 import os
 import time
@@ -22,9 +22,9 @@ import subprocess
 import gc
 import shutil
 from pathlib import Path
-from typing import List, Tuple, Generator, Dict, Any, Optional
-from aduc_types import LatentConditioningItem
 from managers.ltx_manager import ltx_manager_singleton
 from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
 from managers.vae_manager import vae_manager_singleton
@@ -35,8 +35,6 @@ from tools.video_encode_tool import video_encode_tool_singleton
 logger = logging.getLogger(__name__)
 class Deformes4DEngine:
     """
     Implements the Camera (Ψ) and Distiller (Δ) of the ADUC-SDR architecture.
@@ -59,6 +57,18 @@ class Deformes4DEngine:
         with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
             for frame in video_np: writer.append_data(frame)
     def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
         """Resizes and fits an image to the target resolution for VAE encoding."""
         if image.size != target_resolution:
@@ -80,9 +90,6 @@ class Deformes4DEngine:
                                 video_resolution: int, use_continuity_director: bool,
                                 guidance_scale: float, stg_scale: float, num_inference_steps: int,
                                 progress: gr.Progress = gr.Progress()):
-        """
-        Step 3: Production. Generates the original master video from keyframes.
-        """
         FPS = 24
         FRAMES_PER_LATENT_CHUNK = 8
         LATENT_PROCESSING_CHUNK_SIZE = 4
@@ -96,6 +103,7 @@ class Deformes4DEngine:
         total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
         frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
         latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
         DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
         DESTINATION_FRAME_TARGET = total_frames_brutos - 1
@@ -122,6 +130,7 @@ class Deformes4DEngine:
             decision = gemini_manager_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "The beginning.", storyboard[i], future_story_prompt)
             transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
             story_history += f"\n- Act {fragment_index}: {motion_prompt}"
             conditioning_items = []
             if eco_latent_for_next_loop is None:
                img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
@@ -129,21 +138,43 @@ class Deformes4DEngine:
             else:
                conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
                conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
-            img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
-            conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
             current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
             logger.info(f"Calling LTX to generate video latents for fragment {fragment_index} ({total_frames_brutos} frames)...")
             latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
             num_latent_frames = latents_brutos.shape[2]
             logger.info(f"LTX responded with a latent tensor of shape {latents_brutos.shape}, representing ~{num_latent_frames * 8 + 1} video frames at {FPS} FPS.")
             last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
             eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
             dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
             latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
             latents_video = latents_video[:, :, 1:, :, :]
             del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
             if transition_type == "cut":
                 eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
             cpu_latent = latents_video.cpu()
             latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
             torch.save(cpu_latent, latent_path)
@@ -166,7 +197,6 @@ class Deformes4DEngine:
             logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
             base_name = f"clip_{i:04d}_{run_timestamp}"
             current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
             pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
             self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
             del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
@@ -273,7 +303,7 @@ class Deformes4DEngine:
     def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
         """Internal helper to call the LTX manager."""
         final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
-        return self.ltx_manager_singleton.generate_latent_fragment(**final_ltx_params)
     def _quantize_to_multiple(self, n, m):
         """Helper to round n to the nearest multiple of m."""

+# engineers/deformes4D.py
 #
 # Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
 #
+# Version: 2.2.0
 #
 # This file contains the Deformes4D Engine, which acts as the primary "Editor" or
+# "Film Crew" specialist within the ADUC-SDR architecture. It implements the Camera (Ψ)
+# and Distiller (Δ) concepts. Its core responsibilities include the low-level orchestration
+# of video fragment generation, latent manipulation, and final rendering/post-production tasks.
 import os
 import time
 import gc
 import shutil
 from pathlib import Path
+from typing import List, Tuple, Generator, Dict, Any
+from aduc_types import LatentConditioningItem
 from managers.ltx_manager import ltx_manager_singleton
 from managers.latent_enhancer_manager import latent_enhancer_specialist_singleton
 from managers.vae_manager import vae_manager_singleton
 logger = logging.getLogger(__name__)
 class Deformes4DEngine:
     """
     Implements the Camera (Ψ) and Distiller (Δ) of the ADUC-SDR architecture.
         with imageio.get_writer(path, fps=fps, codec='libx264', quality=8, output_params=['-pix_fmt', 'yuv420p']) as writer:
             for frame in video_np: writer.append_data(frame)
+    def read_video_to_tensor(self, video_path: str) -> torch.Tensor:
+        """Reads a video file and converts it into a pixel-space tensor."""
+        with imageio.get_reader(video_path, 'ffmpeg') as reader:
+            frames = [frame for frame in reader]
+        frames_np = np.stack(frames, axis=0).astype(np.float32) / 255.0
+        # (F, H, W, C) -> (C, F, H, W)
+        tensor = torch.from_numpy(frames_np).permute(3, 0, 1, 2)
+        tensor = tensor.unsqueeze(0) # (B, C, F, H, W)
+        tensor = (tensor * 2.0) - 1.0 # Normalize to [-1, 1]
+        return tensor.to(self.device)
     def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
         """Resizes and fits an image to the target resolution for VAE encoding."""
         if image.size != target_resolution:
                                 video_resolution: int, use_continuity_director: bool,
                                 guidance_scale: float, stg_scale: float, num_inference_steps: int,
                                 progress: gr.Progress = gr.Progress()):
         FPS = 24
         FRAMES_PER_LATENT_CHUNK = 8
         LATENT_PROCESSING_CHUNK_SIZE = 4
         total_frames_brutos = self._quantize_to_multiple(int(seconds_per_fragment * FPS), FRAMES_PER_LATENT_CHUNK)
         frames_a_podar = self._quantize_to_multiple(int(total_frames_brutos * (trim_percent / 100)), FRAMES_PER_LATENT_CHUNK)
         latents_a_podar = frames_a_podar // FRAMES_PER_LATENT_CHUNK
+        total_latent_frames = total_frames_brutos // FRAMES_PER_LATENT_CHUNK
         DEJAVU_FRAME_TARGET = frames_a_podar - 1 if frames_a_podar > 0 else 0
         DESTINATION_FRAME_TARGET = total_frames_brutos - 1
             decision = gemini_manager_singleton.get_cinematic_decision(global_prompt, story_history, past_keyframe_path, start_keyframe_path, destination_keyframe_path, storyboard[i - 1] if i > 0 else "The beginning.", storyboard[i], future_story_prompt)
             transition_type, motion_prompt = decision["transition_type"], decision["motion_prompt"]
             story_history += f"\n- Act {fragment_index}: {motion_prompt}"
             conditioning_items = []
             if eco_latent_for_next_loop is None:
                img_start = self._preprocess_image_for_latent_conversion(Image.open(start_keyframe_path).convert("RGB"), target_resolution_tuple)
             else:
                conditioning_items.append(LatentConditioningItem(eco_latent_for_next_loop, 0, 1.0))
                conditioning_items.append(LatentConditioningItem(dejavu_latent_for_next_loop, DEJAVU_FRAME_TARGET, handler_strength))
+            if transition_type == "cut":
+                logger.info(f"Cinematic Director chose a 'cut'. Creating FFmpeg transition bridge...")
+                bridge_duration_seconds = FRAMES_PER_LATENT_CHUNK / FPS
+                bridge_video_path = video_encode_tool_singleton.create_transition_bridge(
+                    start_image_path=start_keyframe_path, end_image_path=destination_keyframe_path,
+                    duration=bridge_duration_seconds, fps=FPS, target_resolution=target_resolution_tuple,
+                    workspace_dir=self.workspace_dir
+                )
+                bridge_pixel_tensor = self.read_video_to_tensor(bridge_video_path)
+                bridge_latent_tensor = vae_manager_singleton.encode(bridge_pixel_tensor)
+                final_fade_latent = bridge_latent_tensor[:, :, -1:, :, :]
+                conditioning_items.append(LatentConditioningItem(final_fade_latent, total_latent_frames - 1, 0.95))
+                img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
+                conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength * 0.5))
+                del bridge_pixel_tensor, bridge_latent_tensor, final_fade_latent
+                if os.path.exists(bridge_video_path): os.remove(bridge_video_path)
+            else:
+               img_dest = self._preprocess_image_for_latent_conversion(Image.open(destination_keyframe_path).convert("RGB"), target_resolution_tuple)
+               conditioning_items.append(LatentConditioningItem(self.pil_to_latent(img_dest), DESTINATION_FRAME_TARGET, destination_convergence_strength))
             current_ltx_params = {**base_ltx_params, "motion_prompt": motion_prompt}
             logger.info(f"Calling LTX to generate video latents for fragment {fragment_index} ({total_frames_brutos} frames)...")
             latents_brutos, _ = self._generate_latent_tensor_internal(conditioning_items, current_ltx_params, target_resolution_tuple, total_frames_brutos)
             num_latent_frames = latents_brutos.shape[2]
             logger.info(f"LTX responded with a latent tensor of shape {latents_brutos.shape}, representing ~{num_latent_frames * 8 + 1} video frames at {FPS} FPS.")
             last_trim = latents_brutos[:, :, -(latents_a_podar+1):, :, :].clone()
             eco_latent_for_next_loop = last_trim[:, :, :2, :, :].clone()
             dejavu_latent_for_next_loop = last_trim[:, :, -1:, :, :].clone()
             latents_video = latents_brutos[:, :, :-(latents_a_podar-1), :, :].clone()
             latents_video = latents_video[:, :, 1:, :, :]
             del last_trim, latents_brutos; gc.collect(); torch.cuda.empty_cache()
             if transition_type == "cut":
                 eco_latent_for_next_loop, dejavu_latent_for_next_loop = None, None
             cpu_latent = latents_video.cpu()
             latent_path = os.path.join(temp_latent_dir, f"latent_fragment_{i:04d}.pt")
             torch.save(cpu_latent, latent_path)
             logger.info(f"Batch {i+1} concatenated. Latent shape: {sub_group_latent.shape}")
             base_name = f"clip_{i:04d}_{run_timestamp}"
             current_clip_path = os.path.join(temp_video_clips_dir, f"{base_name}.mp4")
             pixel_tensor = vae_manager_singleton.decode(sub_group_latent)
             self.save_video_from_tensor(pixel_tensor, current_clip_path, fps=FPS)
             del pixel_tensor, sub_group_latent; gc.collect(); torch.cuda.empty_cache()
     def _generate_latent_tensor_internal(self, conditioning_items, ltx_params, target_resolution, total_frames_to_generate):
         """Internal helper to call the LTX manager."""
         final_ltx_params = {**ltx_params, 'width': target_resolution[0], 'height': target_resolution[1], 'video_total_frames': total_frames_to_generate, 'video_fps': 24, 'current_fragment_index': int(time.time()), 'conditioning_items_data': conditioning_items}
+        return ltx_manager_singleton.generate_latent_fragment(**final_ltx_params)
     def _quantize_to_multiple(self, n, m):
         """Helper to round n to the nearest multiple of m."""