Spaces:
Build error
Build error
File size: 10,399 Bytes
99c6a62 6681346 99c6a62 6681346 99c6a62 041c39a 6681346 99c6a62 041c39a 5438346 99c6a62 041c39a 6a764b3 041c39a 6a764b3 5553c30 041c39a 5553c30 6a764b3 041c39a 99c6a62 041c39a 99c6a62 041c39a 99c6a62 041c39a 99c6a62 5438346 99c6a62 6681346 5438346 99c6a62 5438346 99c6a62 5438346 99c6a62 041c39a 5438346 041c39a 5438346 041c39a 5438346 041c39a 6681346 041c39a 99c6a62 041c39a 99c6a62 5438346 041c39a 99c6a62 041c39a 99c6a62 041c39a 6681346 041c39a 5438346 041c39a 6681346 041c39a 6681346 041c39a 6681346 041c39a 6681346 041c39a 6681346 041c39a 5438346 6681346 5438346 6681346 041c39a 6681346 041c39a 6681346 041c39a 6681346 041c39a 6681346 041c39a 6681346 99c6a62 041c39a 99c6a62 041c39a 99c6a62 612544a | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 | # aduc_framework/engineers/deformes3D.py
#
# Copyright (C) August 4, 2025 Carlos Rodrigues dos Santos
#
# Versão 5.2.0 (Clean Architecture Executor with Streaming & DNA Provenance)
#
# Esta versão implementa o executor do Diretor Autônomo, emitindo atualizações
# em tempo real para a UI e registrando a proveniência de cada keyframe
# no DNA Digital da geração.
import os
import logging
import torch
from PIL import Image, ImageOps
import numpy as np
from typing import List, Dict, Any, Callable, Optional, Generator, Tuple
from .deformes2D_thinker import deformes2d_thinker_singleton
from ..types import LatentConditioningItem
from ..managers.ltx_manager import ltx_manager_singleton
from ..managers.vae_manager import vae_manager_singleton
logger = logging.getLogger(__name__)
ProgressCallback = Optional[Callable[[float, str], None]]
class Deformes3DEngine:
"""
Executor ADUC para a geração de keyframes, operando sob as ordens de um
Diretor de IA autônomo e emitindo resultados em tempo real.
"""
_DIRECTOR_PARAMS = {
"max_retries_per_act": 3,
"ltx_frames": 33,
"ltx_fps": 24,
"ltx_guidance_scale": 2.0,
"ltx_inference_steps": 7,
"conditioning_weights": {
"memory_penultimate": 0.1,
"memory_last": 0.5,
"future_base": 0.7,
"future_context": 0.05
},
"conditioning_frames": {
"memory_penultimate": 0,
"memory_last": 0,
"future_anchor": 32
}
}
def __init__(self):
self.workspace_dir: Optional[str] = None
self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
logger.info("Deformes3DEngine (Clean Executor) instanciado.")
def initialize(self, workspace_dir: str):
self.workspace_dir = workspace_dir
logger.info(f"3D Engine (Clean Executor) inicializado com workspace: {self.workspace_dir}.")
def generate_keyframes(
self,
generation_state: Dict[str, Any],
progress_callback: ProgressCallback = None
) -> Generator[List[Dict[str, Any]], None, None]:
"""
Ponto de entrada que orquestra o ciclo de direção autônoma,
emitindo a lista de keyframes a cada nova geração.
"""
if not self.workspace_dir:
raise RuntimeError("Deformes3DEngine não foi inicializado.")
yield from self._manage_directorial_loop(generation_state, progress_callback)
def _manage_directorial_loop(
self,
generation_state: Dict[str, Any],
progress_callback: ProgressCallback
) -> Generator[List[Dict[str, Any]], None, None]:
"""
SRP: Gerencia o estado e o fluxo do loop, consultando o diretor,
delegando a execução e emitindo atualizações.
"""
prompt_geral = generation_state.get("Promt_geral", "")
midias_usuario = [media["caminho"] for media in generation_state.get("midias_referencia", [])]
atos_iniciais = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])]
resolution = generation_state.get("parametros_geracao", {}).get("pre_producao", {}).get('resolution', 512)
all_keyframes = []
dynamic_script = list(atos_iniciais)
current_act_index = 0
retries_for_current_act = 0
logger.info("--- INICIANDO CICLO DE DIREÇÃO AUTÔNOMA PARA GERAÇÃO DE KEYFRAMES ---")
while current_act_index < len(dynamic_script):
if progress_callback:
progress = current_act_index / len(dynamic_script) if len(dynamic_script) > 0 else 0
progress_callback(progress, f"Diretor avaliando Ato {current_act_index + 1}")
context = {
"prompt_geral": prompt_geral, "midias_usuario": midias_usuario,
"roteiro_completo": dynamic_script, "indice_ato_atual": current_act_index,
"keyframes_gerados": all_keyframes, "tentativas_anteriores": retries_for_current_act
}
decision = deformes2d_thinker_singleton.get_directorial_decision(context)
action = decision.get("acao", "avancar")
if action == "corrigir" and retries_for_current_act < self._DIRECTOR_PARAMS["max_retries_per_act"]:
logger.warning(f"ORDEM: Corrigir Ato {current_act_index}. Motivo: {decision.get('justificativa', 'N/A')}")
if all_keyframes and all_keyframes[-1]['id'] == current_act_index - 1:
all_keyframes.pop()
retries_for_current_act += 1
continue
if action == "improvisar":
logger.info(f"ORDEM: Improvisar a partir do Ato {current_act_index + 1}. Motivo: {decision.get('justificativa', 'N/A')}")
dynamic_script = decision.get("novo_roteiro", dynamic_script)
new_keyframe = self._generate_and_save_keyframe(
decision, current_act_index, all_keyframes, resolution
)
all_keyframes.append(new_keyframe)
yield all_keyframes
current_act_index += 1
retries_for_current_act = 0
logger.info("--- GERAÇÃO DE KEYFRAMES PELO DIRETOR AUTÔNOMO CONCLUÍDA ---")
def _generate_and_save_keyframe(self, decision: Dict, act_index: int, all_keyframes: List, resolution: int) -> Dict:
"""
SRP: Executa a geração de um único keyframe e retorna seus metadados completos.
"""
prompt = decision.get("prompt_proximo_keyframe", "Cena de transição cinematográfica.")
base_media_path = decision.get("midia_base_escolhida")
context_media_paths = decision.get("midias_contexto_escolhidas", [])
conditioning_items, dna_data = self._prepare_ltx_conditioning(
act_index, all_keyframes, base_media_path, context_media_paths, resolution
)
generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
height=resolution, width=resolution, conditioning_items_data=conditioning_items,
motion_prompt=prompt, video_total_frames=self._DIRECTOR_PARAMS["ltx_frames"],
video_fps=self._DIRECTOR_PARAMS["ltx_fps"], guidance_scale=self._DIRECTOR_PARAMS["ltx_guidance_scale"],
num_inference_steps=self._DIRECTOR_PARAMS["ltx_inference_steps"]
)
new_latent = generated_latents[:, :, -1:, :, :].clone()
pixel_path = os.path.join(self.workspace_dir, f"keyframe_{act_index:04d}_pixel.png")
latent_path = os.path.join(self.workspace_dir, f"keyframe_{act_index:04d}_latent.pt")
pixel_tensor = vae_manager_singleton.decode(new_latent)
self._save_image_from_tensor(pixel_tensor, pixel_path)
torch.save(new_latent.cpu(), latent_path)
return {
"id": act_index, "caminho_pixel": pixel_path, "caminho_latent": latent_path,
"prompt_keyframe": prompt, "is_cut_point": decision.get("is_cut", False),
"entradas_latentes": dna_data
}
def _prepare_ltx_conditioning(self, act_index: int, keyframes: List[Dict], base_path: str, context_paths: List[str], res: int) -> Tuple[List[LatentConditioningItem], List[Dict]]:
"""SRP: Constrói a lista de condicionais para o LTX e os dados para o DNA."""
items, dna_data = [], []
res_tuple = (res, res)
weights = self._DIRECTOR_PARAMS["conditioning_weights"]
frames = self._DIRECTOR_PARAMS["conditioning_frames"]
def to_latent_tensor(path):
pil = self._preprocess_image_for_latent_conversion(Image.open(path).convert("RGB"), res_tuple)
tensor = self._pil_to_pixel_tensor(pil)
return vae_manager_singleton.encode(tensor.to(self.device))
if not base_path:
logger.warning("Diretor não escolheu uma mídia base. A geração pode ser instável.")
return items, dna_data
items.append(LatentConditioningItem(to_latent_tensor(base_path), frames["future_anchor"], weights["future_base"]))
dna_data.append({"caminho_origem": base_path, "frame_alvo": frames["future_anchor"], "forca_condicionamento": weights["future_base"]})
if act_index > 0 and keyframes:
last_kf_path = keyframes[-1]["caminho_latent"]
last_latent = torch.load(last_kf_path, map_location=self.device)
items.append(LatentConditioningItem(last_latent, frames["memory_last"], weights["memory_last"]))
dna_data.append({"caminho_origem": last_kf_path, "frame_alvo": frames["memory_last"], "forca_condicionamento": weights["memory_last"]})
if act_index > 1 and len(keyframes) >= 2:
penultimate_kf_path = keyframes[-2]["caminho_latent"]
penultimate_latent = torch.load(penultimate_kf_path, map_location=self.device)
items.append(LatentConditioningItem(penultimate_latent, frames["memory_penultimate"], weights["memory_penultimate"]))
dna_data.append({"caminho_origem": penultimate_kf_path, "frame_alvo": frames["memory_penultimate"], "forca_condicionamento": weights["memory_penultimate"]})
for path in context_paths[:2]:
items.append(LatentConditioningItem(to_latent_tensor(path), frames["future_anchor"], weights["future_context"]))
dna_data.append({"caminho_origem": path, "frame_alvo": frames["future_anchor"], "forca_condicionamento": weights["future_context"]})
return items, dna_data
def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) if image.size != target_resolution else image
def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
image_np = np.array(pil_image, dtype=np.float32) / 255.0
return (torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) * 2.0) - 1.0
def _save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
tensor = pixel_tensor.squeeze(0).squeeze(1).permute(1, 2, 0)
tensor = (tensor.clamp(-1, 1) + 1) / 2.0
image_np = (tensor.cpu().float().numpy() * 255).astype(np.uint8)
Image.fromarray(image_np).save(path)
deformes3d_engine_singleton = Deformes3DEngine() |