wandb_version: 1 experiment: desc: null value: debug: ${debug} tasks: - training num_nodes: 1 training: precision: 16-mixed compile: false lr: 2.0e-05 batch_size: 64 max_epochs: -1 max_steps: 700000 max_time: null data: num_workers: 4 shuffle: true optim: accumulate_grad_batches: 1 gradient_clip_val: 1.0 checkpointing: every_n_train_steps: 2500 every_n_epochs: null train_time_interval: null enable_version_counter: false validation: precision: 16-mixed compile: false batch_size: 4 val_every_n_step: 2500 val_every_n_epoch: null limit_batch: 1 inference_mode: false data: num_workers: 4 shuffle: false test: precision: 16-mixed compile: false batch_size: 1 limit_batch: 1 inference_mode: false data: num_workers: 4 shuffle: false logging: metrics: null _name: exp_video dataset: desc: null value: debug: ${debug} metadata: data/${dataset.name}/metadata.json data_mean: 0.5 data_std: 0.5 save_dir: /proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft/vae_features n_frames: 200 context_length: 1 resolution: 128 observation_shape: - 3 - ${dataset.resolution} - ${dataset.resolution} external_cond_dim: 0 validation_multiplier: 1 frame_skip: 1 action_cond_dim: 25 _name: video_minecraft_latent precomputed_feature_dir: /proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft/vae_features use_explicit_memory_frames: false n_frames_valid: 700 angle_range: 110 pos_range: 8 customized_validation: true add_timestamp_embedding: true algorithm: desc: null value: debug: ${debug} lr: ${experiment.training.lr} x_shape: ${dataset.observation_shape} frame_stack: 1 frame_skip: ${dataset.frame_skip} data_mean: ${dataset.data_mean} data_std: ${dataset.data_std} external_cond_dim: 0 context_frames: 1 weight_decay: 0.002 warmup_steps: 1000 optimizer_beta: - 0.9 - 0.99 uncertainty_scale: 1 guidance_scale: 0.0 chunk_size: 1 scheduling_matrix: autoregressive noise_level: random_all causal: true diffusion: objective: pred_v beta_schedule: sigmoid schedule_fn_kwargs: {} clip_noise: 20.0 use_snr: false use_cum_snr: false use_fused_snr: true snr_clip: 5.0 cum_snr_decay: 0.96 timesteps: 1000 sampling_timesteps: 20 ddim_sampling_eta: 0.0 stabilization_level: 15 architecture: network_size: 64 attn_heads: 4 attn_dim_head: 64 dim_mults: - 1 - 2 - 4 - 8 resolution: ${dataset.resolution} attn_resolutions: - 16 - 32 - 64 - 128 use_init_temporal_attn: true use_linear_attn: true time_emb_type: rotary n_frames: ${dataset.n_frames} metadata: ${dataset.metadata} action_cond_dim: 25 use_plucker: true memory_condition_length: 0 log_video: true use_compressed_causal_memory: false compressed_memory_dim: 256 compressed_memory_slots: 64 compressed_memory_topk: 4 use_mamba_memory_pipeline: true training_stage: stage_a_memory_pretrain stage_c_memory_aux_weight: 0.1 diff_window_size: 8 memory_gap_aux_weight: 0.1 memory_gap_aux_weight_init: 0.5 memory_gap_aux_weight_final: 0.1 memory_gap_aux_decay_fraction: 0.3 memory_gap_aux_decay_steps: 0 memory_gap_aux_exp_k: 5.0 use_precomputed_features: true mamba_latent_channels: 16 mamba_model_dim: 256 mamba_depth: 4 mamba_cond_dim: 256 mamba_d_state: 16 mamba_d_conv: 4 mamba_expand: 2 allow_mamba_fallback: false strict_causal_training: true strict_causal_evaluation: true use_oracle_pose_eval: false enable_memory_noise_curriculum: true curriculum_phase_boundaries: - 0.2 - 0.7 curriculum_noise_ranges: - - 600 - 1000 - - 200 - 900 - - 0 - 400 curriculum_horizons: - 8 - 64 - 256 _name: df_video_mamba3stage require_pose_prediction: false use_memory_attention: false relative_embedding: false memory_retrieval_topk: 8 debug: desc: null value: false wandb: desc: null value: entity: turlin project: worldmem mode: online resume: desc: null value: null load: desc: null value: null name: desc: null value: train_stage_a_mamba customized_load: desc: null value: true seperate_load: desc: null value: true diffusion_model_path: desc: null value: /proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/diffusion_only.ckpt vae_path: desc: null value: /proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/vae_only.ckpt output_dir: desc: null value: /proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/bimamba_stage_a/ _wandb: desc: null value: python_version: 3.10.19 cli_version: 0.17.9 framework: huggingface huggingface_version: 5.2.0 is_jupyter_run: false is_kaggle_kernel: true start_time: 1772193742 t: 1: - 1 - 9 - 11 - 29 - 41 - 49 - 50 - 51 - 55 - 63 - 103 - 105 - 106 3: - 13 - 16 - 23 4: 3.10.19 5: 0.17.9 6: 5.2.0 8: - 2 - 5 13: linux-x86_64