Add files using upload-large-folder tool

Browse files

Files changed (3) hide show

README.md +37 -0
config.json +41 -0
diffusion_pytorch_model.safetensors +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,40 @@
 ---
 license: bsd-2-clause
 ---

 ---
 license: bsd-2-clause
+base_model:
+- LanguageBind/Open-Sora-Plan-v1.2.0
+library_name: diffusers
+tags:
+- OutDreamer
+- video-outpainting
+- diffusion-transformer
+- DiT
 ---
+# OutDreamer checkpoint for video outpainting
+This repository provides the OutDreamer checkpoint for **OutDreamer: Video Outpainting with a Diffusion Transformer**.
+OutDreamer is a DiT-based video outpainting framework designed to extend video content beyond the original frame boundaries while maintaining spatial and temporal consistency. The model introduces an efficient video control branch, a conditional outpainting branch, mask-driven self-attention, latent alignment loss, and a cross-video-clip refiner for long video outpainting.
+The method and its results are detailed in the arXiv paper: [OutDreamer: Video Outpainting with a Diffusion Transformer](https://arxiv.org/abs/2506.22298).
+## How to Use
+**Important:** This checkpoint is intended to be used with the OutDreamer codebase and is not a standalone Hugging Face pipeline.
+For project details, please refer to the OutDreamer GitHub repository: [zhongzero/OutDreamer](https://github.com/zhongzero/OutDreamer)
+For setup and inference scripts compatible with this checkpoint, please refer to the reproduction repository: [zhongzero/OutDreamer-unofficial](https://github.com/zhongzero/OutDreamer-unofficial)
+## Citation
+If you find this work helpful for your research, please cite:
+```BibTeX
+@article{zhong2026outdreamer,
+  title={Outdreamer: Video outpainting with a diffusion transformer},
+  author={Zhong, Linhao and Li, Fan and Huang, Yi and Liu, Jianzhuang and Pei, Renjing and Song, Fenglong},
+  journal={IEEE Transactions on Image Processing},
+  year={2026},
+  publisher={IEEE}
+}
+```

config.json ADDED Viewed

	@@ -0,0 +1,41 @@

+{
+  "_class_name": "OpenSoraCNext",
+  "_diffusers_version": "0.28.0",
+  "activation_fn": "gelu-approximate",
+  "attention_bias": true,
+  "attention_head_dim": 96,
+  "attention_mode": "xformers",
+  "attention_type": "default",
+  "caption_channels": 4096,
+  "control_in_channels": 8,
+  "cross_attention_dim": 2304,
+  "double_self_attention": false,
+  "downsampler": null,
+  "dropout": 0.0,
+  "in_channels": 4,
+  "interpolation_scale_h": 1.0,
+  "interpolation_scale_t": 1.0,
+  "interpolation_scale_w": 1.0,
+  "norm_elementwise_affine": false,
+  "norm_eps": 1e-06,
+  "norm_num_groups": 32,
+  "norm_type": "ada_norm_single",
+  "num_attention_heads": 24,
+  "num_embeds_ada_norm": 1000,
+  "num_layers": 32,
+  "num_vector_embeds": null,
+  "only_cross_attention": false,
+  "out_channels": 4,
+  "patch_size": 2,
+  "patch_size_t": 1,
+  "sample_size": [
+    60,
+    80
+  ],
+  "sample_size_t": 8,
+  "upcast_attention": false,
+  "use_additional_conditions": null,
+  "use_linear_projection": false,
+  "use_rope": true,
+  "use_stable_fp32": false
+}

diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5c873bd3fc6a5efcc70fa2e4134214285cea5537780f6d36ad23c15d3b40ecdc
+size 6278569664