Spaces:
Build error
Build error
| # aduc_framework/engineers/deformes3D.py | |
| # | |
| # Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos | |
| # | |
| # Versão 5.2.0 (Clean Architecture Executor with Streaming & DNA Provenance) | |
| # | |
| # Esta versão implementa o executor do Diretor Autônomo, emitindo atualizações | |
| # em tempo real para a UI e registrando a proveniência de cada keyframe | |
| # no DNA Digital da geração. | |
| import os | |
| import logging | |
| import torch | |
| from PIL import Image, ImageOps | |
| import numpy as np | |
| from typing import List, Dict, Any, Callable, Optional, Generator, Tuple | |
| from .deformes2D_thinker import deformes2d_thinker_singleton | |
| from ..types import LatentConditioningItem | |
| from ..managers.ltx_manager import ltx_manager_singleton | |
| from ..managers.vae_manager import vae_manager_singleton | |
| logger = logging.getLogger(__name__) | |
| ProgressCallback = Optional[Callable[[float, str], None]] | |
| class Deformes3DEngine: | |
| """ | |
| Executor ADUC para a geração de keyframes, operando sob as ordens de um | |
| Diretor de IA autônomo e emitindo resultados em tempo real. | |
| """ | |
| _DIRECTOR_PARAMS = { | |
| "max_retries_per_act": 3, | |
| "ltx_frames": 33, | |
| "ltx_fps": 24, | |
| "ltx_guidance_scale": 2.0, | |
| "ltx_inference_steps": 7, | |
| "conditioning_weights": { | |
| "memory_penultimate": 0.1, | |
| "memory_last": 0.5, | |
| "future_base": 0.7, | |
| "future_context": 0.05 | |
| }, | |
| "conditioning_frames": { | |
| "memory_penultimate": 0, | |
| "memory_last": 0, | |
| "future_anchor": 32 | |
| } | |
| } | |
| def __init__(self): | |
| self.workspace_dir: Optional[str] = None | |
| self.device = 'cuda' if torch.cuda.is_available() else 'cpu' | |
| logger.info("Deformes3DEngine (Clean Executor) instanciado.") | |
| def initialize(self, workspace_dir: str): | |
| self.workspace_dir = workspace_dir | |
| logger.info(f"3D Engine (Clean Executor) inicializado com workspace: {self.workspace_dir}.") | |
| def generate_keyframes( | |
| self, | |
| generation_state: Dict[str, Any], | |
| progress_callback: ProgressCallback = None | |
| ) -> Generator[List[Dict[str, Any]], None, None]: | |
| """ | |
| Ponto de entrada que orquestra o ciclo de direção autônoma, | |
| emitindo a lista de keyframes a cada nova geração. | |
| """ | |
| if not self.workspace_dir: | |
| raise RuntimeError("Deformes3DEngine não foi inicializado.") | |
| yield from self._manage_directorial_loop(generation_state, progress_callback) | |
| def _manage_directorial_loop( | |
| self, | |
| generation_state: Dict[str, Any], | |
| progress_callback: ProgressCallback | |
| ) -> Generator[List[Dict[str, Any]], None, None]: | |
| """ | |
| SRP: Gerencia o estado e o fluxo do loop, consultando o diretor, | |
| delegando a execução e emitindo atualizações. | |
| """ | |
| prompt_geral = generation_state.get("Promt_geral", "") | |
| midias_usuario = [media["caminho"] for media in generation_state.get("midias_referencia", [])] | |
| atos_iniciais = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])] | |
| resolution = generation_state.get("parametros_geracao", {}).get("pre_producao", {}).get('resolution', 512) | |
| all_keyframes = [] | |
| dynamic_script = list(atos_iniciais) | |
| current_act_index = 0 | |
| retries_for_current_act = 0 | |
| logger.info("--- INICIANDO CICLO DE DIREÇÃO AUTÔNOMA PARA GERAÇÃO DE KEYFRAMES ---") | |
| while current_act_index < len(dynamic_script): | |
| if progress_callback: | |
| progress = current_act_index / len(dynamic_script) if len(dynamic_script) > 0 else 0 | |
| progress_callback(progress, f"Diretor avaliando Ato {current_act_index + 1}") | |
| context = { | |
| "prompt_geral": prompt_geral, "midias_usuario": midias_usuario, | |
| "roteiro_completo": dynamic_script, "indice_ato_atual": current_act_index, | |
| "keyframes_gerados": all_keyframes, "tentativas_anteriores": retries_for_current_act | |
| } | |
| decision = deformes2d_thinker_singleton.get_directorial_decision(context) | |
| action = decision.get("acao", "avancar") | |
| if action == "corrigir" and retries_for_current_act < self._DIRECTOR_PARAMS["max_retries_per_act"]: | |
| logger.warning(f"ORDEM: Corrigir Ato {current_act_index}. Motivo: {decision.get('justificativa', 'N/A')}") | |
| if all_keyframes and all_keyframes[-1]['id'] == current_act_index - 1: | |
| all_keyframes.pop() | |
| retries_for_current_act += 1 | |
| continue | |
| if action == "improvisar": | |
| logger.info(f"ORDEM: Improvisar a partir do Ato {current_act_index + 1}. Motivo: {decision.get('justificativa', 'N/A')}") | |
| dynamic_script = decision.get("novo_roteiro", dynamic_script) | |
| new_keyframe = self._generate_and_save_keyframe( | |
| decision, current_act_index, all_keyframes, resolution | |
| ) | |
| all_keyframes.append(new_keyframe) | |
| yield all_keyframes | |
| current_act_index += 1 | |
| retries_for_current_act = 0 | |
| logger.info("--- GERAÇÃO DE KEYFRAMES PELO DIRETOR AUTÔNOMO CONCLUÍDA ---") | |
| def _generate_and_save_keyframe(self, decision: Dict, act_index: int, all_keyframes: List, resolution: int) -> Dict: | |
| """ | |
| SRP: Executa a geração de um único keyframe e retorna seus metadados completos. | |
| """ | |
| prompt = decision.get("prompt_proximo_keyframe", "Cena de transição cinematográfica.") | |
| base_media_path = decision.get("midia_base_escolhida") | |
| context_media_paths = decision.get("midias_contexto_escolhidas", []) | |
| conditioning_items, dna_data = self._prepare_ltx_conditioning( | |
| act_index, all_keyframes, base_media_path, context_media_paths, resolution | |
| ) | |
| generated_latents, _ = ltx_manager_singleton.generate_latent_fragment( | |
| height=resolution, width=resolution, conditioning_items_data=conditioning_items, | |
| motion_prompt=prompt, video_total_frames=self._DIRECTOR_PARAMS["ltx_frames"], | |
| video_fps=self._DIRECTOR_PARAMS["ltx_fps"], guidance_scale=self._DIRECTOR_PARAMS["ltx_guidance_scale"], | |
| num_inference_steps=self._DIRECTOR_PARAMS["ltx_inference_steps"] | |
| ) | |
| new_latent = generated_latents[:, :, -1:, :, :].clone() | |
| pixel_path = os.path.join(self.workspace_dir, f"keyframe_{act_index:04d}_pixel.png") | |
| latent_path = os.path.join(self.workspace_dir, f"keyframe_{act_index:04d}_latent.pt") | |
| pixel_tensor = vae_manager_singleton.decode(new_latent) | |
| self._save_image_from_tensor(pixel_tensor, pixel_path) | |
| torch.save(new_latent.cpu(), latent_path) | |
| return { | |
| "id": act_index, "caminho_pixel": pixel_path, "caminho_latent": latent_path, | |
| "prompt_keyframe": prompt, "is_cut_point": decision.get("is_cut", False), | |
| "entradas_latentes": dna_data | |
| } | |
| def _prepare_ltx_conditioning(self, act_index: int, keyframes: List[Dict], base_path: str, context_paths: List[str], res: int) -> Tuple[List[LatentConditioningItem], List[Dict]]: | |
| """SRP: Constrói a lista de condicionais para o LTX e os dados para o DNA.""" | |
| items, dna_data = [], [] | |
| res_tuple = (res, res) | |
| weights = self._DIRECTOR_PARAMS["conditioning_weights"] | |
| frames = self._DIRECTOR_PARAMS["conditioning_frames"] | |
| def to_latent_tensor(path): | |
| pil = self._preprocess_image_for_latent_conversion(Image.open(path).convert("RGB"), res_tuple) | |
| tensor = self._pil_to_pixel_tensor(pil) | |
| return vae_manager_singleton.encode(tensor.to(self.device)) | |
| if not base_path: | |
| logger.warning("Diretor não escolheu uma mídia base. A geração pode ser instável.") | |
| return items, dna_data | |
| items.append(LatentConditioningItem(to_latent_tensor(base_path), frames["future_anchor"], weights["future_base"])) | |
| dna_data.append({"caminho_origem": base_path, "frame_alvo": frames["future_anchor"], "forca_condicionamento": weights["future_base"]}) | |
| if act_index > 0 and keyframes: | |
| last_kf_path = keyframes[-1]["caminho_latent"] | |
| last_latent = torch.load(last_kf_path, map_location=self.device) | |
| items.append(LatentConditioningItem(last_latent, frames["memory_last"], weights["memory_last"])) | |
| dna_data.append({"caminho_origem": last_kf_path, "frame_alvo": frames["memory_last"], "forca_condicionamento": weights["memory_last"]}) | |
| if act_index > 1 and len(keyframes) >= 2: | |
| penultimate_kf_path = keyframes[-2]["caminho_latent"] | |
| penultimate_latent = torch.load(penultimate_kf_path, map_location=self.device) | |
| items.append(LatentConditioningItem(penultimate_latent, frames["memory_penultimate"], weights["memory_penultimate"])) | |
| dna_data.append({"caminho_origem": penultimate_kf_path, "frame_alvo": frames["memory_penultimate"], "forca_condicionamento": weights["memory_penultimate"]}) | |
| for path in context_paths[:2]: | |
| items.append(LatentConditioningItem(to_latent_tensor(path), frames["future_anchor"], weights["future_context"])) | |
| dna_data.append({"caminho_origem": path, "frame_alvo": frames["future_anchor"], "forca_condicionamento": weights["future_context"]}) | |
| return items, dna_data | |
| def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image: | |
| return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) if image.size != target_resolution else image | |
| def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor: | |
| image_np = np.array(pil_image, dtype=np.float32) / 255.0 | |
| return (torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) * 2.0) - 1.0 | |
| def _save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str): | |
| tensor = pixel_tensor.squeeze(0).squeeze(1).permute(1, 2, 0) | |
| tensor = (tensor.clamp(-1, 1) + 1) / 2.0 | |
| image_np = (tensor.cpu().float().numpy() * 255).astype(np.uint8) | |
| Image.fromarray(image_np).save(path) | |
| deformes3d_engine_singleton = Deformes3DEngine() |