{ "data": { "augmentations": { "affine_p": 0.0, "blur_p": 0.5, "cut_p": 0.0, "flip_p": 0.5, "gamma_p": 1.0, "grayscale_p": 0.2, "invert_p": 0.0, "jitter_p": 1.0, "noise_pad": 1.0, "random_blur": 2.0, "random_gamma": 0.5, "random_jitter": 0.5, "random_rotation": 0.0, "random_scale": 4.0, "random_shear": 0.0, "random_translate_x": 0.04, "random_translate_y": 0.01, "rotation_p": 0.0, "scale_p": 0.0, "shape_mult": 14, "test_context": 1.0, "translate_p": 0.0 }, "crop": "garg", "data_root": "datasets", "image_shape": [ 518, 518 ], "mini": 1.0, "normalization": "imagenet", "num_frames": 1, "pair": 1, "resize_method": "contextcrop", "sampling": { "KITTI": 1.0 }, "shape_constraints": { "height_min": 15, "pixels_max": 600000.0, "pixels_min": 200000.0, "ratio_bounds": [ 0.5, 2.5 ], "sample": true, "shape_mult": 14, "width_min": 15 }, "train_datasets": [ "KITTI" ], "val_datasets": [ "KITTI" ] }, "eps": 1e-06, "generic": { "deterministic": true, "name_page": "ufish", "seed": 42 }, "model": { "camera": { "augment": true, "tau": 50000, "weak_ratio": 0.9 }, "expansion": 4, "layer_scale": 0.0001, "name": "UniK3D", "num_heads": 8, "num_steps": 100000, "pixel_decoder": { "depths": [ 2, 2, 2 ], "detach": 0.1, "dropout": 0.0, "hidden_dim": 512, "kernel_size": 3, "name": "Decoder", "num_prompt_blocks": 1, "out_dim": 64, "use_norm": false }, "pixel_encoder": { "cls_token_embed_dims": [ 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ], "depths": [ 6, 12, 18, 24 ], "embed_dim": 1024, "embed_dims": [ 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024 ], "freeze_norm": true, "frozen_stages": 0, "lr": 3e-06, "name": "dinov2_vitl14", "num_register_tokens": 0, "output_idx": [ 6, 12, 18, 24 ], "pretrained": null, "stacking_fn": "last", "use_norm": true, "wd": 0.1 } }, "training": { "batch_size": 8, "clipping": 1.0, "cycle_beta": true, "drop_path": 0.0, "ema": 0.9995, "f16": "f16", "ld": 1.0, "losses": { "camera": { "alpha": 1.0, "dims": [ 1, 2 ], "fn": "l1", "gamma": 1.0, "input_fn": "linear", "name": "PolarRegression", "output_fn": "sqrt", "polar_asym": 0.7, "polar_weight": 3.0, "weight": 1.0 }, "confidence": { "input_fn": "log", "name": "Confidence", "output_fn": "sqrt", "weight": 0.1 }, "scale": { "alpha": 1.0, "fn": "l1", "gamma": 1.0, "input_fn": "log", "name": "Scale", "output_fn": "sqrt", "weight": 1.0 } }, "lr": 5e-05, "lr_final": 1e-06, "lr_warmup": 1.0, "n_iters": 250000, "nsteps_accumulation_gradient": 4, "validation_interval": 2500, "warmup_iters": 75000, "wd": 0.1, "wd_final": 0.1 } }