TestingwithNegEuler

Sleeping

App Files Files Community

dagloop5 commited on May 28

Commit

c963051

verified ·

1 Parent(s): 1c287c3

Update app.py

Browse files

Files changed (1) hide show

app.py +67 -0

app.py CHANGED Viewed

@@ -95,6 +95,73 @@ try:
 except Exception as e:
     print(f"[ATTN] xformers patch FAILED: {type(e).__name__}: {e}")
 logging.getLogger().setLevel(logging.INFO)
 MAX_SEED = np.iinfo(np.int32).max

 except Exception as e:
     print(f"[ATTN] xformers patch FAILED: {type(e).__name__}: {e}")
+# Add this patch after imports in app.py
+def _patch_attention_for_kv_cache():
+    """Patch Attention.forward to accept pre-projected K/V."""
+    from ltx_core.model.transformer.attention import Attention
+    _original_forward = Attention.forward
+    def patched_forward(
+        self,
+        x: torch.Tensor,
+        context: torch.Tensor | None = None,
+        mask: torch.Tensor | None = None,
+        pe: torch.Tensor | None = None,
+        k_pe: torch.Tensor | None = None,
+        perturbation_mask: torch.Tensor | None = None,
+        all_perturbed: bool = False,
+        # NEW: pre-computed KV for cross-attention
+        cached_k: torch.Tensor | None = None,
+        cached_v: torch.Tensor | None = None,
+    ) -> torch.Tensor:
+        context = x if context is None else context
+        use_attention = not all_perturbed
+        v = cached_v if cached_v is not None else self.to_v(context)
+        if not use_attention:
+            out = v
+        else:
+            if cached_k is not None:
+                q = self.to_q(x)
+                q = self.q_norm(q)
+                k = cached_k
+                if pe is not None:
+                    q = apply_rotary_emb(q, pe, self.rope_type)
+                    k = apply_rotary_emb(k, pe if k_pe is None else k_pe, self.rope_type)
+            else:
+                q = self.to_q(x)
+                k = self.to_k(context)
+                q = self.q_norm(q)
+                k = self.k_norm(k)
+                if pe is not None:
+                    q = apply_rotary_emb(q, pe, self.rope_type)
+                    k = apply_rotary_emb(k, pe if k_pe is None else k_pe, self.rope_type)
+            out = self.attention_function(q, k, v, self.heads, mask)
+            if perturbation_mask is not None:
+                out = out * perturbation_mask + v * (1 - perturbation_mask)
+        # Gating logic remains the same
+        if self.to_gate_logits is not None:
+            gate_logits = self.to_gate_logits(x)
+            b, t, _ = out.shape
+            out = out.view(b, t, self.heads, self.dim_head)
+            gates = 2.0 * torch.sigmoid(gate_logits)
+            out = out * gates.unsqueeze(-1)
+            out = out.view(b, t, self.heads * self.dim_head)
+        return self.to_out(out)
+    Attention.forward = patched_forward
+_patch_attention_for_kv_cache()
 logging.getLogger().setLevel(logging.INFO)
 MAX_SEED = np.iinfo(np.int32).max