Add HandX diffusion checkpoints (layers 4/8/12)
Browse files- layers12/config.yaml +88 -0
- layers12/model.pt +3 -0
- layers4/config.yaml +88 -0
- layers4/model.pt +3 -0
- layers8/config.yaml +88 -0
- layers8/model.pt +3 -0
layers12/config.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data:
|
| 2 |
+
_target_: src.diffusion.data_loader.handx.HandXDataset
|
| 3 |
+
contact_label: false
|
| 4 |
+
data_dir: data/testdata
|
| 5 |
+
data_file_name: can_pos_all_wotextfeat.npz
|
| 6 |
+
difference: false
|
| 7 |
+
fixed_length: 60
|
| 8 |
+
normalize: true
|
| 9 |
+
num_workers: 8
|
| 10 |
+
ratio: 1.0
|
| 11 |
+
repr: joint_pos_w_scalar_rot
|
| 12 |
+
model:
|
| 13 |
+
activation: gelu
|
| 14 |
+
arch: trans_dec_treble_residual
|
| 15 |
+
cond_mask_prob: 0.1
|
| 16 |
+
cond_mode: text
|
| 17 |
+
contact_prediction: false
|
| 18 |
+
diffusion:
|
| 19 |
+
contact_loss: false
|
| 20 |
+
diffusion_steps: 1000
|
| 21 |
+
lambda_acce: 0.1
|
| 22 |
+
lambda_contact: 1.0
|
| 23 |
+
lambda_contact_predict: 1.0
|
| 24 |
+
lambda_fc: 0.0
|
| 25 |
+
lambda_ig: 0.0
|
| 26 |
+
lambda_rcxyz: 0.0
|
| 27 |
+
lambda_repr: 0.5
|
| 28 |
+
lambda_vel: 0.2
|
| 29 |
+
lambda_w_ig: 0.0
|
| 30 |
+
noise_schedule: cosine
|
| 31 |
+
repr: joint_pos_w_scalar_rot
|
| 32 |
+
sigma_small: true
|
| 33 |
+
dropout: 0.1
|
| 34 |
+
ff_size: 1024
|
| 35 |
+
latent_dim: 512
|
| 36 |
+
layers: 12
|
| 37 |
+
max_text_length: null
|
| 38 |
+
num_heads: 4
|
| 39 |
+
repr: joint_pos_w_scalar_rot
|
| 40 |
+
text_model: t5-base
|
| 41 |
+
treble_mask_prob: 1.0
|
| 42 |
+
seed: 42
|
| 43 |
+
train:
|
| 44 |
+
dataloader:
|
| 45 |
+
batch_size: 4096
|
| 46 |
+
num_workers: 16
|
| 47 |
+
shuffle: true
|
| 48 |
+
eval_cfg:
|
| 49 |
+
dataloader:
|
| 50 |
+
batch_size: 512
|
| 51 |
+
num_workers: 8
|
| 52 |
+
shuffle: false
|
| 53 |
+
eval_interval: 50000
|
| 54 |
+
num_samples_on_train: 2048
|
| 55 |
+
num_samples_on_val: 512
|
| 56 |
+
num_samples_per_condition: 16
|
| 57 |
+
eval_during_training: false
|
| 58 |
+
log_interval: 16
|
| 59 |
+
num_steps: 100000000
|
| 60 |
+
optimizer:
|
| 61 |
+
lr: 0.0001
|
| 62 |
+
weight_decay: 1.0e-05
|
| 63 |
+
overwrite: true
|
| 64 |
+
resume_checkpoint: ''
|
| 65 |
+
sample:
|
| 66 |
+
guidance_param: 2.5
|
| 67 |
+
save_dir: exps/data100_layers12
|
| 68 |
+
save_interval: 5000
|
| 69 |
+
train_platform_type: WandbPlatform
|
| 70 |
+
val_cfg:
|
| 71 |
+
dataloader:
|
| 72 |
+
batch_size: 4096
|
| 73 |
+
num_workers: 16
|
| 74 |
+
shuffle: false
|
| 75 |
+
val_interval: 2048
|
| 76 |
+
val_during_training: true
|
| 77 |
+
viz_cfg:
|
| 78 |
+
denoising_steps:
|
| 79 |
+
- 999
|
| 80 |
+
- 500
|
| 81 |
+
- 250
|
| 82 |
+
- 100
|
| 83 |
+
- 50
|
| 84 |
+
- 10
|
| 85 |
+
- 5
|
| 86 |
+
- 0
|
| 87 |
+
samples_count: 1
|
| 88 |
+
viz_during_training: true
|
layers12/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9247465c2277d85c40ec6de649dd26f7a94eccfc3ab356c451cdc1c32208e335
|
| 3 |
+
size 176343850
|
layers4/config.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data:
|
| 2 |
+
_target_: src.diffusion.data_loader.handx.HandXDataset
|
| 3 |
+
contact_label: true
|
| 4 |
+
data_dir: data/testdata
|
| 5 |
+
data_file_name: can_pos_all_wotextfeat.npz
|
| 6 |
+
difference: false
|
| 7 |
+
fixed_length: 60
|
| 8 |
+
normalize: true
|
| 9 |
+
num_workers: 8
|
| 10 |
+
ratio: 1.0
|
| 11 |
+
repr: joint_pos_w_scalar_rot
|
| 12 |
+
model:
|
| 13 |
+
activation: gelu
|
| 14 |
+
arch: trans_dec_treble_residual
|
| 15 |
+
cond_mask_prob: 0.1
|
| 16 |
+
cond_mode: text
|
| 17 |
+
contact_prediction: false
|
| 18 |
+
diffusion:
|
| 19 |
+
contact_loss: false
|
| 20 |
+
diffusion_steps: 1000
|
| 21 |
+
lambda_acce: 0.1
|
| 22 |
+
lambda_contact: 1.0
|
| 23 |
+
lambda_contact_predict: 1.0
|
| 24 |
+
lambda_fc: 0.0
|
| 25 |
+
lambda_ig: 0.0
|
| 26 |
+
lambda_rcxyz: 0.0
|
| 27 |
+
lambda_repr: 0.5
|
| 28 |
+
lambda_vel: 0.2
|
| 29 |
+
lambda_w_ig: 0.0
|
| 30 |
+
noise_schedule: cosine
|
| 31 |
+
repr: joint_pos_w_scalar_rot
|
| 32 |
+
sigma_small: true
|
| 33 |
+
dropout: 0.1
|
| 34 |
+
ff_size: 1024
|
| 35 |
+
latent_dim: 256
|
| 36 |
+
layers: 4
|
| 37 |
+
max_text_length: null
|
| 38 |
+
num_heads: 4
|
| 39 |
+
repr: joint_pos_w_scalar_rot
|
| 40 |
+
text_model: t5-base
|
| 41 |
+
treble_mask_prob: 1.0
|
| 42 |
+
seed: 42
|
| 43 |
+
train:
|
| 44 |
+
dataloader:
|
| 45 |
+
batch_size: 6144
|
| 46 |
+
num_workers: 8
|
| 47 |
+
shuffle: true
|
| 48 |
+
eval_cfg:
|
| 49 |
+
dataloader:
|
| 50 |
+
batch_size: 512
|
| 51 |
+
num_workers: 8
|
| 52 |
+
shuffle: false
|
| 53 |
+
eval_interval: 50000
|
| 54 |
+
num_samples_on_train: 2048
|
| 55 |
+
num_samples_on_val: 512
|
| 56 |
+
num_samples_per_condition: 16
|
| 57 |
+
eval_during_training: false
|
| 58 |
+
log_interval: 16
|
| 59 |
+
num_steps: 100000000
|
| 60 |
+
optimizer:
|
| 61 |
+
lr: 0.0001
|
| 62 |
+
weight_decay: 1.0e-05
|
| 63 |
+
overwrite: true
|
| 64 |
+
resume_checkpoint: ''
|
| 65 |
+
sample:
|
| 66 |
+
guidance_param: 2.5
|
| 67 |
+
save_dir: exps/data100_layers4
|
| 68 |
+
save_interval: 5000
|
| 69 |
+
train_platform_type: WandbPlatform
|
| 70 |
+
val_cfg:
|
| 71 |
+
dataloader:
|
| 72 |
+
batch_size: 6144
|
| 73 |
+
num_workers: 8
|
| 74 |
+
shuffle: false
|
| 75 |
+
val_interval: 1000
|
| 76 |
+
val_during_training: true
|
| 77 |
+
viz_cfg:
|
| 78 |
+
denoising_steps:
|
| 79 |
+
- 999
|
| 80 |
+
- 500
|
| 81 |
+
- 250
|
| 82 |
+
- 100
|
| 83 |
+
- 50
|
| 84 |
+
- 10
|
| 85 |
+
- 5
|
| 86 |
+
- 0
|
| 87 |
+
samples_count: 1
|
| 88 |
+
viz_during_training: true
|
layers4/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:47d5fd2b55439ab6a57031951a31a95cc2243f4f3a9ad6aa5058b562a2682960
|
| 3 |
+
size 28787534
|
layers8/config.yaml
ADDED
|
@@ -0,0 +1,88 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
data:
|
| 2 |
+
_target_: src.diffusion.data_loader.handx.HandXDataset
|
| 3 |
+
contact_label: true
|
| 4 |
+
data_dir: data/testdata
|
| 5 |
+
data_file_name: can_pos_all_wotextfeat.npz
|
| 6 |
+
difference: false
|
| 7 |
+
fixed_length: 60
|
| 8 |
+
normalize: true
|
| 9 |
+
num_workers: 8
|
| 10 |
+
ratio: 1.0
|
| 11 |
+
repr: joint_pos_w_scalar_rot
|
| 12 |
+
model:
|
| 13 |
+
activation: gelu
|
| 14 |
+
arch: trans_dec_treble_residual
|
| 15 |
+
cond_mask_prob: 0.1
|
| 16 |
+
cond_mode: text
|
| 17 |
+
contact_prediction: false
|
| 18 |
+
diffusion:
|
| 19 |
+
contact_loss: false
|
| 20 |
+
diffusion_steps: 1000
|
| 21 |
+
lambda_acce: 0.1
|
| 22 |
+
lambda_contact: 1.0
|
| 23 |
+
lambda_contact_predict: 1.0
|
| 24 |
+
lambda_fc: 0.0
|
| 25 |
+
lambda_ig: 0.0
|
| 26 |
+
lambda_rcxyz: 0.0
|
| 27 |
+
lambda_repr: 0.5
|
| 28 |
+
lambda_vel: 0.2
|
| 29 |
+
lambda_w_ig: 0.0
|
| 30 |
+
noise_schedule: cosine
|
| 31 |
+
repr: joint_pos_w_scalar_rot
|
| 32 |
+
sigma_small: true
|
| 33 |
+
dropout: 0.1
|
| 34 |
+
ff_size: 1024
|
| 35 |
+
latent_dim: 512
|
| 36 |
+
layers: 8
|
| 37 |
+
max_text_length: null
|
| 38 |
+
num_heads: 4
|
| 39 |
+
repr: joint_pos_w_scalar_rot
|
| 40 |
+
text_model: t5-base
|
| 41 |
+
treble_mask_prob: 1.0
|
| 42 |
+
seed: 42
|
| 43 |
+
train:
|
| 44 |
+
dataloader:
|
| 45 |
+
batch_size: 4096
|
| 46 |
+
num_workers: 8
|
| 47 |
+
shuffle: true
|
| 48 |
+
eval_cfg:
|
| 49 |
+
dataloader:
|
| 50 |
+
batch_size: 512
|
| 51 |
+
num_workers: 8
|
| 52 |
+
shuffle: false
|
| 53 |
+
eval_interval: 50000
|
| 54 |
+
num_samples_on_train: 2048
|
| 55 |
+
num_samples_on_val: 512
|
| 56 |
+
num_samples_per_condition: 16
|
| 57 |
+
eval_during_training: false
|
| 58 |
+
log_interval: 16
|
| 59 |
+
num_steps: 100000000
|
| 60 |
+
optimizer:
|
| 61 |
+
lr: 0.0001
|
| 62 |
+
weight_decay: 1.0e-05
|
| 63 |
+
overwrite: true
|
| 64 |
+
resume_checkpoint: ''
|
| 65 |
+
sample:
|
| 66 |
+
guidance_param: 2.5
|
| 67 |
+
save_dir: exps/data100_layers8
|
| 68 |
+
save_interval: 5000
|
| 69 |
+
train_platform_type: WandbPlatform
|
| 70 |
+
val_cfg:
|
| 71 |
+
dataloader:
|
| 72 |
+
batch_size: 4096
|
| 73 |
+
num_workers: 8
|
| 74 |
+
shuffle: false
|
| 75 |
+
val_interval: 1000
|
| 76 |
+
val_during_training: true
|
| 77 |
+
viz_cfg:
|
| 78 |
+
denoising_steps:
|
| 79 |
+
- 999
|
| 80 |
+
- 500
|
| 81 |
+
- 250
|
| 82 |
+
- 100
|
| 83 |
+
- 50
|
| 84 |
+
- 10
|
| 85 |
+
- 5
|
| 86 |
+
- 0
|
| 87 |
+
samples_count: 1
|
| 88 |
+
viz_during_training: true
|
layers8/model.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:cde0968c91854a903dc88236cba809acfb3e8f860e3663db9a0e251ea1221c36
|
| 3 |
+
size 125849906
|