{ "architectures": [ "DVLAForActionPrediction" ], "attention_dropout": 0.0, "auto_map": { "AutoConfig": "DAMO-NLP-SG/VideoLLaMA3-7B-Image--configuration_videollama3.Videollama3Qwen2Config", "AutoModelForCausalLM": "DAMO-NLP-SG/VideoLLaMA3-7B-Image--modeling_videollama3.Videollama3Qwen2ForCausalLM" }, "bos_token_id": 151643, "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 3584, "image_token_index": 151665, "initializer_range": 0.02, "intermediate_size": 18944, "max_position_embeddings": 32768, "max_window_layers": 28, "mm_projector_type": "mlp2x_gelu", "model_type": "dvla", "n_action_bins": 256, "norm_stats": { "bridge_orig": { "action": { "mask": [ true, true, true, true, true, true, false ], "max": [ 0.41691166162490845, 0.25864794850349426, 0.21218234300613403, 3.122201919555664, 1.8618112802505493, 6.272472858428955, 1.0 ], "mean": [ 0.00023162984871305525, 0.00013371460954658687, -0.00013210502220317721, -0.00014224917686078697, -0.0003957618318963796, 0.00025795758119784296, 0.5765169858932495 ], "min": [ -0.4007510244846344, -0.12928041815757751, -0.22553899884223938, -3.2010786533355713, -1.8618112802505493, -6.279075622558594, 0.0 ], "q01": [ -0.028744795620441438, -0.041743524745106696, -0.02612247958779335, -0.08074554242193699, -0.09273611158132553, -0.20663773745298386, 0.0 ], "q99": [ 0.028299489319324488, 0.04087499871850013, 0.040158278569579056, 0.08191050931811326, 0.07803218640387051, 0.2031656354665755, 1.0 ], "std": [ 0.009774772450327873, 0.013701396994292736, 0.012669386342167854, 0.02856358513236046, 0.03065265156328678, 0.07559732347726822, 0.4959997236728668 ] }, "num_trajectories": 48624, "num_transitions": 1728258, "proprio": { "max": [ 0.5735528469085693, 0.4034728705883026, 0.36494991183280945, 1.3517684936523438, 1.570796251296997, 3.141204357147217, 0.0, 1.1121242046356201 ], "mean": [ 0.30948513746261597, 0.030666163191199303, 0.06444736570119858, 0.0068027242086827755, -0.07731744647026062, 0.10813789069652557, 0.0, 0.7086173892021179 ], "min": [ -0.04167502000927925, -0.3945816159248352, -0.15537554025650024, -3.141592502593994, -1.4992541074752808, -3.14153790473938, 0.0, 0.04637829214334488 ], "q01": [ 0.17092366471886636, -0.17033958569169044, -0.05617850091308355, -0.3667951425909996, -0.5440364676713944, -1.3451742899417878, 0.0, 0.052190229296684265 ], "q99": [ 0.45397736728191357, 0.23543687969446145, 0.19510107412934283, 0.3818274670839303, 0.2794133839011189, 1.8331137096881818, 0.0, 1.0105689764022827 ], "std": [ 0.06067129969596863, 0.09202422946691513, 0.0516839325428009, 0.13189035654067993, 0.1705130785703659, 0.5743006467819214, 0.0, 0.3522878885269165 ] } }, "fractal20220817_data": { "action": { "mask": [ true, true, true, true, true, true, false ], "max": [ 2.9984593391418457, 22.09052848815918, 2.7507524490356445, 1.570636510848999, 1.5321086645126343, 1.5691522359848022, 1.0 ], "mean": [ 0.0070160930044949055, 0.006287453696131706, -0.012577778659760952, 0.04329700767993927, -0.005719248205423355, 0.0009455775725655258, 0.535513699054718 ], "min": [ -2.0204520225524902, -5.497899532318115, -2.031663417816162, -1.569917917251587, -1.569892168045044, -1.570419430732727, 0.0 ], "q01": [ -0.22409614741802214, -0.14817706286907195, -0.2316769856214523, -0.3531744807958603, -0.4192173588275909, -0.4368023717403412, 0.0 ], "q99": [ 0.17836674451827994, 0.14942033141851413, 0.21879618436098075, 0.5897578334808342, 0.35292886614799457, 0.4487913608551022, 1.0 ], "std": [ 0.0693204402923584, 0.060718413442373276, 0.07362836599349976, 0.15637660026550293, 0.13175784051418304, 0.146182581782341, 0.4972381293773651 ] }, "num_trajectories": 78480, "num_transitions": 3404867, "proprio": { "max": [ 1.0534898042678833, 0.48018959164619446, 1.6896663904190063, 0.9999993443489075, 0.9999874830245972, 0.9554369449615479, 0.9914546012878418, 1.0 ], "mean": [ 0.5597754120826721, -0.0832320973277092, 0.7760888934135437, -0.24704332649707794, 0.49434685707092285, 0.09261937439441681, 0.20921756327152252, 0.42606088519096375 ], "min": [ -0.4436439275741577, -0.9970501065254211, -0.006579156965017319, -0.8643477559089661, -0.7079970240592957, -0.7688722014427185, -0.4999994933605194, 0.0 ], "q01": [ 0.32485780119895935, -0.2830313414335251, 0.14108185589313507, -0.6863587307929992, -0.6807721078395843, -0.36089446604251857, -0.4545843780040741, 0.0 ], "q99": [ 0.8753369486331939, 0.21245125174522383, 1.072377111911773, 0.9382593035697933, 0.9568788063526152, 0.4598054605722425, 0.7213053238391873, 1.0 ], "std": [ 0.12438706308603287, 0.1155894473195076, 0.24558264017105103, 0.5133047699928284, 0.522326648235321, 0.16654427349567413, 0.27579590678215027, 0.4560266435146332 ] } } }, "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "pad_to_multiple_of": 8, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 1000000.0, "sliding_window": null, "tie_word_embeddings": false, "torch_dtype": "float16", "transformers_version": "4.50.3", "use_cache": false, "use_sliding_window": false, "use_token_compression": false, "vision_encoder": "videollamae3vision_encoder", "vision_encoder_config": { "_attn_implementation_autoset": false, "_name_or_path": "", "add_cross_attention": false, "architectures": null, "attention_dropout": 0.0, "bad_words_ids": null, "begin_suppress_tokens": null, "bos_token_id": null, "chunk_size_feed_forward": 0, "cross_attention_hidden_size": null, "decoder_start_token_id": null, "diversity_penalty": 0.0, "do_sample": false, "early_stopping": false, "encoder_no_repeat_ngram_size": 0, "eos_token_id": null, "exponential_decay_length_penalty": null, "finetuning_task": null, "forced_bos_token_id": null, "forced_eos_token_id": null, "hidden_act": "gelu_pytorch_tanh", "hidden_size": 1152, "id2label": { "0": "LABEL_0", "1": "LABEL_1" }, "intermediate_size": 4304, "is_decoder": false, "is_encoder_decoder": false, "label2id": { "LABEL_0": 0, "LABEL_1": 1 }, "layer_norm_eps": 1e-06, "length_penalty": 1.0, "max_length": 20, "min_length": 0, "model_type": "videollama3_vision_encoder", "no_repeat_ngram_size": 0, "num_attention_heads": 16, "num_beam_groups": 1, "num_beams": 1, "num_channels": 3, "num_hidden_layers": 27, "num_return_sequences": 1, "output_attentions": false, "output_hidden_states": false, "output_scores": false, "pad_token_id": null, "patch_size": 14, "prefix": null, "problem_type": null, "pruned_heads": {}, "remove_invalid_values": false, "repetition_penalty": 1.0, "return_dict": true, "return_dict_in_generate": false, "sep_token_id": null, "suppress_tokens": null, "task_specific_params": null, "temperature": 1.0, "tf_legacy_loss": false, "tie_encoder_decoder": false, "tie_word_embeddings": true, "tokenizer_class": null, "top_k": 50, "top_p": 1.0, "torch_dtype": null, "torchscript": false, "typical_p": 1.0, "use_bfloat16": false }, "vocab_size": 152064 }