File size: 10,399 Bytes
99c6a62
 
 
 
6681346
99c6a62
6681346
 
 
99c6a62
 
 
 
 
041c39a
6681346
99c6a62
 
 
 
 
 
 
 
 
 
 
041c39a
5438346
99c6a62
041c39a
 
 
6a764b3
 
 
 
041c39a
6a764b3
 
 
5553c30
041c39a
 
 
5553c30
6a764b3
041c39a
 
 
99c6a62
 
041c39a
 
99c6a62
 
 
041c39a
99c6a62
041c39a
99c6a62
 
 
5438346
99c6a62
6681346
5438346
99c6a62
 
5438346
99c6a62
5438346
99c6a62
041c39a
 
 
 
5438346
041c39a
5438346
 
041c39a
 
 
 
5438346
041c39a
 
 
 
 
 
 
 
 
 
6681346
041c39a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
99c6a62
041c39a
99c6a62
5438346
 
041c39a
 
99c6a62
041c39a
99c6a62
041c39a
 
6681346
041c39a
5438346
041c39a
 
 
6681346
041c39a
 
 
 
6681346
 
 
041c39a
 
 
 
 
 
 
 
 
 
 
 
 
6681346
 
041c39a
 
6681346
 
 
041c39a
 
 
 
6681346
041c39a
 
 
 
 
5438346
6681346
5438346
6681346
 
041c39a
6681346
 
 
041c39a
6681346
 
 
 
 
041c39a
6681346
041c39a
 
6681346
 
041c39a
6681346
99c6a62
 
041c39a
99c6a62
 
041c39a
 
 
 
 
 
 
99c6a62
 
612544a
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
# aduc_framework/engineers/deformes3D.py
#
# Copyright (C) August 4, 2025  Carlos Rodrigues dos Santos
#
# Versão 5.2.0 (Clean Architecture Executor with Streaming & DNA Provenance)
#
# Esta versão implementa o executor do Diretor Autônomo, emitindo atualizações
# em tempo real para a UI e registrando a proveniência de cada keyframe
# no DNA Digital da geração.

import os
import logging
import torch
from PIL import Image, ImageOps
import numpy as np
from typing import List, Dict, Any, Callable, Optional, Generator, Tuple

from .deformes2D_thinker import deformes2d_thinker_singleton
from ..types import LatentConditioningItem
from ..managers.ltx_manager import ltx_manager_singleton
from ..managers.vae_manager import vae_manager_singleton

logger = logging.getLogger(__name__)
ProgressCallback = Optional[Callable[[float, str], None]]

class Deformes3DEngine:
    """
    Executor ADUC para a geração de keyframes, operando sob as ordens de um
    Diretor de IA autônomo e emitindo resultados em tempo real.
    """
    
    _DIRECTOR_PARAMS = {
        "max_retries_per_act": 3,
        "ltx_frames": 33,
        "ltx_fps": 24,
        "ltx_guidance_scale": 2.0,
        "ltx_inference_steps": 7,
        "conditioning_weights": {
            "memory_penultimate": 0.1,
            "memory_last": 0.5,
            "future_base": 0.7,
            "future_context": 0.05
        },
        "conditioning_frames": {
            "memory_penultimate": 0,
            "memory_last": 0,
            "future_anchor": 32
        }
    }

    def __init__(self):
        self.workspace_dir: Optional[str] = None
        self.device = 'cuda' if torch.cuda.is_available() else 'cpu'
        logger.info("Deformes3DEngine (Clean Executor) instanciado.")

    def initialize(self, workspace_dir: str):
        self.workspace_dir = workspace_dir
        logger.info(f"3D Engine (Clean Executor) inicializado com workspace: {self.workspace_dir}.")

    def generate_keyframes(
        self,
        generation_state: Dict[str, Any],
        progress_callback: ProgressCallback = None
    ) -> Generator[List[Dict[str, Any]], None, None]:
        """
        Ponto de entrada que orquestra o ciclo de direção autônoma,
        emitindo a lista de keyframes a cada nova geração.
        """
        if not self.workspace_dir:
            raise RuntimeError("Deformes3DEngine não foi inicializado.")

        yield from self._manage_directorial_loop(generation_state, progress_callback)

    def _manage_directorial_loop(
        self,
        generation_state: Dict[str, Any],
        progress_callback: ProgressCallback
    ) -> Generator[List[Dict[str, Any]], None, None]:
        """
        SRP: Gerencia o estado e o fluxo do loop, consultando o diretor,
        delegando a execução e emitindo atualizações.
        """
        prompt_geral = generation_state.get("Promt_geral", "")
        midias_usuario = [media["caminho"] for media in generation_state.get("midias_referencia", [])]
        atos_iniciais = [ato["resumo_ato"] for ato in generation_state.get("Atos", [])]
        resolution = generation_state.get("parametros_geracao", {}).get("pre_producao", {}).get('resolution', 512)

        all_keyframes = []
        dynamic_script = list(atos_iniciais)
        current_act_index = 0
        retries_for_current_act = 0

        logger.info("--- INICIANDO CICLO DE DIREÇÃO AUTÔNOMA PARA GERAÇÃO DE KEYFRAMES ---")

        while current_act_index < len(dynamic_script):
            if progress_callback:
                progress = current_act_index / len(dynamic_script) if len(dynamic_script) > 0 else 0
                progress_callback(progress, f"Diretor avaliando Ato {current_act_index + 1}")

            context = {
                "prompt_geral": prompt_geral, "midias_usuario": midias_usuario,
                "roteiro_completo": dynamic_script, "indice_ato_atual": current_act_index,
                "keyframes_gerados": all_keyframes, "tentativas_anteriores": retries_for_current_act
            }
            decision = deformes2d_thinker_singleton.get_directorial_decision(context)
            action = decision.get("acao", "avancar")

            if action == "corrigir" and retries_for_current_act < self._DIRECTOR_PARAMS["max_retries_per_act"]:
                logger.warning(f"ORDEM: Corrigir Ato {current_act_index}. Motivo: {decision.get('justificativa', 'N/A')}")
                if all_keyframes and all_keyframes[-1]['id'] == current_act_index - 1:
                    all_keyframes.pop()
                retries_for_current_act += 1
                continue

            if action == "improvisar":
                logger.info(f"ORDEM: Improvisar a partir do Ato {current_act_index + 1}. Motivo: {decision.get('justificativa', 'N/A')}")
                dynamic_script = decision.get("novo_roteiro", dynamic_script)

            new_keyframe = self._generate_and_save_keyframe(
                decision, current_act_index, all_keyframes, resolution
            )
            all_keyframes.append(new_keyframe)

            yield all_keyframes

            current_act_index += 1
            retries_for_current_act = 0

        logger.info("--- GERAÇÃO DE KEYFRAMES PELO DIRETOR AUTÔNOMO CONCLUÍDA ---")

    def _generate_and_save_keyframe(self, decision: Dict, act_index: int, all_keyframes: List, resolution: int) -> Dict:
        """
        SRP: Executa a geração de um único keyframe e retorna seus metadados completos.
        """
        prompt = decision.get("prompt_proximo_keyframe", "Cena de transição cinematográfica.")
        base_media_path = decision.get("midia_base_escolhida")
        context_media_paths = decision.get("midias_contexto_escolhidas", [])

        conditioning_items, dna_data = self._prepare_ltx_conditioning(
            act_index, all_keyframes, base_media_path, context_media_paths, resolution
        )

        generated_latents, _ = ltx_manager_singleton.generate_latent_fragment(
            height=resolution, width=resolution, conditioning_items_data=conditioning_items,
            motion_prompt=prompt, video_total_frames=self._DIRECTOR_PARAMS["ltx_frames"],
            video_fps=self._DIRECTOR_PARAMS["ltx_fps"], guidance_scale=self._DIRECTOR_PARAMS["ltx_guidance_scale"],
            num_inference_steps=self._DIRECTOR_PARAMS["ltx_inference_steps"]
        )
        new_latent = generated_latents[:, :, -1:, :, :].clone()

        pixel_path = os.path.join(self.workspace_dir, f"keyframe_{act_index:04d}_pixel.png")
        latent_path = os.path.join(self.workspace_dir, f"keyframe_{act_index:04d}_latent.pt")
        
        pixel_tensor = vae_manager_singleton.decode(new_latent)
        self._save_image_from_tensor(pixel_tensor, pixel_path)
        torch.save(new_latent.cpu(), latent_path)

        return {
            "id": act_index, "caminho_pixel": pixel_path, "caminho_latent": latent_path,
            "prompt_keyframe": prompt, "is_cut_point": decision.get("is_cut", False),
            "entradas_latentes": dna_data
        }

    def _prepare_ltx_conditioning(self, act_index: int, keyframes: List[Dict], base_path: str, context_paths: List[str], res: int) -> Tuple[List[LatentConditioningItem], List[Dict]]:
        """SRP: Constrói a lista de condicionais para o LTX e os dados para o DNA."""
        items, dna_data = [], []
        res_tuple = (res, res)
        weights = self._DIRECTOR_PARAMS["conditioning_weights"]
        frames = self._DIRECTOR_PARAMS["conditioning_frames"]

        def to_latent_tensor(path):
            pil = self._preprocess_image_for_latent_conversion(Image.open(path).convert("RGB"), res_tuple)
            tensor = self._pil_to_pixel_tensor(pil)
            return vae_manager_singleton.encode(tensor.to(self.device))

        if not base_path:
            logger.warning("Diretor não escolheu uma mídia base. A geração pode ser instável.")
            return items, dna_data

        items.append(LatentConditioningItem(to_latent_tensor(base_path), frames["future_anchor"], weights["future_base"]))
        dna_data.append({"caminho_origem": base_path, "frame_alvo": frames["future_anchor"], "forca_condicionamento": weights["future_base"]})
        
        if act_index > 0 and keyframes:
            last_kf_path = keyframes[-1]["caminho_latent"]
            last_latent = torch.load(last_kf_path, map_location=self.device)
            items.append(LatentConditioningItem(last_latent, frames["memory_last"], weights["memory_last"]))
            dna_data.append({"caminho_origem": last_kf_path, "frame_alvo": frames["memory_last"], "forca_condicionamento": weights["memory_last"]})
            
            if act_index > 1 and len(keyframes) >= 2:
                penultimate_kf_path = keyframes[-2]["caminho_latent"]
                penultimate_latent = torch.load(penultimate_kf_path, map_location=self.device)
                items.append(LatentConditioningItem(penultimate_latent, frames["memory_penultimate"], weights["memory_penultimate"]))
                dna_data.append({"caminho_origem": penultimate_kf_path, "frame_alvo": frames["memory_penultimate"], "forca_condicionamento": weights["memory_penultimate"]})

        for path in context_paths[:2]:
            items.append(LatentConditioningItem(to_latent_tensor(path), frames["future_anchor"], weights["future_context"]))
            dna_data.append({"caminho_origem": path, "frame_alvo": frames["future_anchor"], "forca_condicionamento": weights["future_context"]})
        
        return items, dna_data

    def _preprocess_image_for_latent_conversion(self, image: Image.Image, target_resolution: tuple) -> Image.Image:
        return ImageOps.fit(image, target_resolution, Image.Resampling.LANCZOS) if image.size != target_resolution else image
        
    def _pil_to_pixel_tensor(self, pil_image: Image.Image) -> torch.Tensor:
        image_np = np.array(pil_image, dtype=np.float32) / 255.0
        return (torch.from_numpy(image_np).permute(2, 0, 1).unsqueeze(0).unsqueeze(2) * 2.0) - 1.0

    def _save_image_from_tensor(self, pixel_tensor: torch.Tensor, path: str):
        tensor = pixel_tensor.squeeze(0).squeeze(1).permute(1, 2, 0)
        tensor = (tensor.clamp(-1, 1) + 1) / 2.0
        image_np = (tensor.cpu().float().numpy() * 255).astype(np.uint8)
        Image.fromarray(image_np).save(path)

deformes3d_engine_singleton = Deformes3DEngine()