| wandb_version: 1 |
|
|
| experiment: |
| desc: null |
| value: |
| debug: ${debug} |
| tasks: |
| - training |
| num_nodes: 1 |
| training: |
| precision: 16-mixed |
| compile: false |
| lr: 2.0e-05 |
| batch_size: 64 |
| max_epochs: -1 |
| max_steps: 700000 |
| max_time: null |
| data: |
| num_workers: 4 |
| shuffle: true |
| optim: |
| accumulate_grad_batches: 1 |
| gradient_clip_val: 1.0 |
| checkpointing: |
| every_n_train_steps: 2500 |
| every_n_epochs: null |
| train_time_interval: null |
| enable_version_counter: false |
| validation: |
| precision: 16-mixed |
| compile: false |
| batch_size: 4 |
| val_every_n_step: 2500 |
| val_every_n_epoch: null |
| limit_batch: 1 |
| inference_mode: false |
| data: |
| num_workers: 4 |
| shuffle: false |
| test: |
| precision: 16-mixed |
| compile: false |
| batch_size: 1 |
| limit_batch: 1 |
| inference_mode: false |
| data: |
| num_workers: 4 |
| shuffle: false |
| logging: |
| metrics: null |
| _name: exp_video |
| dataset: |
| desc: null |
| value: |
| debug: ${debug} |
| metadata: data/${dataset.name}/metadata.json |
| data_mean: 0.5 |
| data_std: 0.5 |
| save_dir: /proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft |
| n_frames: 200 |
| context_length: 1 |
| resolution: 128 |
| observation_shape: |
| - 3 |
| - ${dataset.resolution} |
| - ${dataset.resolution} |
| external_cond_dim: 0 |
| validation_multiplier: 1 |
| frame_skip: 1 |
| action_cond_dim: 25 |
| _name: video_minecraft_latent |
| precomputed_feature_dir: /proj/cvl/users/x_fahkh2/WorldMem_Repro/datasets/minecraft/vae_features |
| use_explicit_memory_frames: false |
| n_frames_valid: 700 |
| angle_range: 110 |
| pos_range: 8 |
| customized_validation: true |
| add_timestamp_embedding: true |
| algorithm: |
| desc: null |
| value: |
| debug: ${debug} |
| lr: ${experiment.training.lr} |
| x_shape: ${dataset.observation_shape} |
| frame_stack: 1 |
| frame_skip: ${dataset.frame_skip} |
| data_mean: ${dataset.data_mean} |
| data_std: ${dataset.data_std} |
| external_cond_dim: 0 |
| context_frames: 1 |
| weight_decay: 0.002 |
| warmup_steps: 1000 |
| optimizer_beta: |
| - 0.9 |
| - 0.99 |
| uncertainty_scale: 1 |
| guidance_scale: 0.0 |
| chunk_size: 1 |
| scheduling_matrix: autoregressive |
| noise_level: random_all |
| causal: true |
| diffusion: |
| objective: pred_v |
| beta_schedule: sigmoid |
| schedule_fn_kwargs: {} |
| clip_noise: 20.0 |
| use_snr: false |
| use_cum_snr: false |
| use_fused_snr: true |
| snr_clip: 5.0 |
| cum_snr_decay: 0.96 |
| timesteps: 1000 |
| sampling_timesteps: 20 |
| ddim_sampling_eta: 0.0 |
| stabilization_level: 15 |
| architecture: |
| network_size: 64 |
| attn_heads: 4 |
| attn_dim_head: 64 |
| dim_mults: |
| - 1 |
| - 2 |
| - 4 |
| - 8 |
| resolution: ${dataset.resolution} |
| attn_resolutions: |
| - 16 |
| - 32 |
| - 64 |
| - 128 |
| use_init_temporal_attn: true |
| use_linear_attn: true |
| time_emb_type: rotary |
| n_frames: ${dataset.n_frames} |
| metadata: ${dataset.metadata} |
| action_cond_dim: 25 |
| use_plucker: true |
| memory_condition_length: 0 |
| log_video: true |
| use_compressed_causal_memory: false |
| compressed_memory_dim: 256 |
| compressed_memory_slots: 64 |
| compressed_memory_topk: 4 |
| use_mamba_memory_pipeline: true |
| training_stage: stage_a_memory_pretrain |
| stage_c_memory_aux_weight: 0.1 |
| diff_window_size: 8 |
| memory_gap_aux_weight: 0.1 |
| memory_gap_aux_weight_init: 0.5 |
| memory_gap_aux_weight_final: 0.1 |
| memory_gap_aux_decay_fraction: 0.3 |
| memory_gap_aux_decay_steps: 0 |
| memory_gap_aux_exp_k: 5.0 |
| use_precomputed_features: true |
| mamba_latent_channels: 16 |
| mamba_model_dim: 256 |
| mamba_depth: 4 |
| mamba_cond_dim: 256 |
| mamba_d_state: 16 |
| mamba_d_conv: 4 |
| mamba_expand: 2 |
| allow_mamba_fallback: false |
| strict_causal_training: true |
| strict_causal_evaluation: true |
| use_oracle_pose_eval: false |
| enable_memory_noise_curriculum: true |
| curriculum_phase_boundaries: |
| - 0.2 |
| - 0.7 |
| curriculum_noise_ranges: |
| - - 600 |
| - 1000 |
| - - 200 |
| - 900 |
| - - 0 |
| - 400 |
| curriculum_horizons: |
| - 8 |
| - 64 |
| - 256 |
| _name: df_video_mamba3stage |
| require_pose_prediction: false |
| use_memory_attention: false |
| relative_embedding: false |
| memory_retrieval_topk: 8 |
| debug: |
| desc: null |
| value: false |
| wandb: |
| desc: null |
| value: |
| entity: turlin |
| project: worldmem |
| mode: online |
| resume: |
| desc: null |
| value: null |
| load: |
| desc: null |
| value: null |
| name: |
| desc: null |
| value: train_stage_a_mamba |
| customized_load: |
| desc: null |
| value: true |
| seperate_load: |
| desc: null |
| value: true |
| diffusion_model_path: |
| desc: null |
| value: /proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/diffusion_only.ckpt |
| vae_path: |
| desc: null |
| value: /proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/vae_only.ckpt |
| output_dir: |
| desc: null |
| value: /proj/cvl/users/x_fahkh2/WorldMem_Repro/checkpoints/bimamba_stage_a/ |
| _wandb: |
| desc: null |
| value: |
| python_version: 3.10.19 |
| cli_version: 0.17.9 |
| framework: huggingface |
| huggingface_version: 5.2.0 |
| is_jupyter_run: false |
| is_kaggle_kernel: true |
| start_time: 1772196396 |
| t: |
| 1: |
| - 1 |
| - 9 |
| - 11 |
| - 29 |
| - 41 |
| - 49 |
| - 50 |
| - 51 |
| - 55 |
| - 63 |
| - 103 |
| - 105 |
| - 106 |
| 3: |
| - 13 |
| - 16 |
| - 23 |
| 4: 3.10.19 |
| 5: 0.17.9 |
| 6: 5.2.0 |
| 8: |
| - 2 |
| - 5 |
| 13: linux-x86_64 |
|
|