| { |
| "architectures": [ |
| "DVLAForActionPrediction" |
| ], |
| "attention_dropout": 0.0, |
| "auto_map": { |
| "AutoConfig": "DAMO-NLP-SG/VideoLLaMA3-7B-Image--configuration_videollama3.Videollama3Qwen2Config", |
| "AutoModelForCausalLM": "DAMO-NLP-SG/VideoLLaMA3-7B-Image--modeling_videollama3.Videollama3Qwen2ForCausalLM" |
| }, |
| "bos_token_id": 151643, |
| "eos_token_id": 151645, |
| "hidden_act": "silu", |
| "hidden_size": 3584, |
| "image_token_index": 151665, |
| "initializer_range": 0.02, |
| "intermediate_size": 18944, |
| "max_position_embeddings": 32768, |
| "max_window_layers": 28, |
| "mm_projector_type": "mlp2x_gelu", |
| "model_type": "dvla", |
| "n_action_bins": 256, |
| "norm_stats": { |
| "bridge_orig": { |
| "action": { |
| "mask": [ |
| true, |
| true, |
| true, |
| true, |
| true, |
| true, |
| false |
| ], |
| "max": [ |
| 0.41691166162490845, |
| 0.25864794850349426, |
| 0.21218234300613403, |
| 3.122201919555664, |
| 1.8618112802505493, |
| 6.272472858428955, |
| 1.0 |
| ], |
| "mean": [ |
| 0.00023162984871305525, |
| 0.00013371460954658687, |
| -0.00013210502220317721, |
| -0.00014224917686078697, |
| -0.0003957618318963796, |
| 0.00025795758119784296, |
| 0.5765169858932495 |
| ], |
| "min": [ |
| -0.4007510244846344, |
| -0.12928041815757751, |
| -0.22553899884223938, |
| -3.2010786533355713, |
| -1.8618112802505493, |
| -6.279075622558594, |
| 0.0 |
| ], |
| "q01": [ |
| -0.028744795620441438, |
| -0.041743524745106696, |
| -0.02612247958779335, |
| -0.08074554242193699, |
| -0.09273611158132553, |
| -0.20663773745298386, |
| 0.0 |
| ], |
| "q99": [ |
| 0.028299489319324488, |
| 0.04087499871850013, |
| 0.040158278569579056, |
| 0.08191050931811326, |
| 0.07803218640387051, |
| 0.2031656354665755, |
| 1.0 |
| ], |
| "std": [ |
| 0.009774772450327873, |
| 0.013701396994292736, |
| 0.012669386342167854, |
| 0.02856358513236046, |
| 0.03065265156328678, |
| 0.07559732347726822, |
| 0.4959997236728668 |
| ] |
| }, |
| "num_trajectories": 48624, |
| "num_transitions": 1728258, |
| "proprio": { |
| "max": [ |
| 0.5735528469085693, |
| 0.4034728705883026, |
| 0.36494991183280945, |
| 1.3517684936523438, |
| 1.570796251296997, |
| 3.141204357147217, |
| 0.0, |
| 1.1121242046356201 |
| ], |
| "mean": [ |
| 0.30948513746261597, |
| 0.030666163191199303, |
| 0.06444736570119858, |
| 0.0068027242086827755, |
| -0.07731744647026062, |
| 0.10813789069652557, |
| 0.0, |
| 0.7086173892021179 |
| ], |
| "min": [ |
| -0.04167502000927925, |
| -0.3945816159248352, |
| -0.15537554025650024, |
| -3.141592502593994, |
| -1.4992541074752808, |
| -3.14153790473938, |
| 0.0, |
| 0.04637829214334488 |
| ], |
| "q01": [ |
| 0.17092366471886636, |
| -0.17033958569169044, |
| -0.05617850091308355, |
| -0.3667951425909996, |
| -0.5440364676713944, |
| -1.3451742899417878, |
| 0.0, |
| 0.052190229296684265 |
| ], |
| "q99": [ |
| 0.45397736728191357, |
| 0.23543687969446145, |
| 0.19510107412934283, |
| 0.3818274670839303, |
| 0.2794133839011189, |
| 1.8331137096881818, |
| 0.0, |
| 1.0105689764022827 |
| ], |
| "std": [ |
| 0.06067129969596863, |
| 0.09202422946691513, |
| 0.0516839325428009, |
| 0.13189035654067993, |
| 0.1705130785703659, |
| 0.5743006467819214, |
| 0.0, |
| 0.3522878885269165 |
| ] |
| } |
| }, |
| "fractal20220817_data": { |
| "action": { |
| "mask": [ |
| true, |
| true, |
| true, |
| true, |
| true, |
| true, |
| false |
| ], |
| "max": [ |
| 2.9984593391418457, |
| 22.09052848815918, |
| 2.7507524490356445, |
| 1.570636510848999, |
| 1.5321086645126343, |
| 1.5691522359848022, |
| 1.0 |
| ], |
| "mean": [ |
| 0.0070160930044949055, |
| 0.006287453696131706, |
| -0.012577778659760952, |
| 0.04329700767993927, |
| -0.005719248205423355, |
| 0.0009455775725655258, |
| 0.535513699054718 |
| ], |
| "min": [ |
| -2.0204520225524902, |
| -5.497899532318115, |
| -2.031663417816162, |
| -1.569917917251587, |
| -1.569892168045044, |
| -1.570419430732727, |
| 0.0 |
| ], |
| "q01": [ |
| -0.22409614741802214, |
| -0.14817706286907195, |
| -0.2316769856214523, |
| -0.3531744807958603, |
| -0.4192173588275909, |
| -0.4368023717403412, |
| 0.0 |
| ], |
| "q99": [ |
| 0.17836674451827994, |
| 0.14942033141851413, |
| 0.21879618436098075, |
| 0.5897578334808342, |
| 0.35292886614799457, |
| 0.4487913608551022, |
| 1.0 |
| ], |
| "std": [ |
| 0.0693204402923584, |
| 0.060718413442373276, |
| 0.07362836599349976, |
| 0.15637660026550293, |
| 0.13175784051418304, |
| 0.146182581782341, |
| 0.4972381293773651 |
| ] |
| }, |
| "num_trajectories": 78480, |
| "num_transitions": 3404867, |
| "proprio": { |
| "max": [ |
| 1.0534898042678833, |
| 0.48018959164619446, |
| 1.6896663904190063, |
| 0.9999993443489075, |
| 0.9999874830245972, |
| 0.9554369449615479, |
| 0.9914546012878418, |
| 1.0 |
| ], |
| "mean": [ |
| 0.5597754120826721, |
| -0.0832320973277092, |
| 0.7760888934135437, |
| -0.24704332649707794, |
| 0.49434685707092285, |
| 0.09261937439441681, |
| 0.20921756327152252, |
| 0.42606088519096375 |
| ], |
| "min": [ |
| -0.4436439275741577, |
| -0.9970501065254211, |
| -0.006579156965017319, |
| -0.8643477559089661, |
| -0.7079970240592957, |
| -0.7688722014427185, |
| -0.4999994933605194, |
| 0.0 |
| ], |
| "q01": [ |
| 0.32485780119895935, |
| -0.2830313414335251, |
| 0.14108185589313507, |
| -0.6863587307929992, |
| -0.6807721078395843, |
| -0.36089446604251857, |
| -0.4545843780040741, |
| 0.0 |
| ], |
| "q99": [ |
| 0.8753369486331939, |
| 0.21245125174522383, |
| 1.072377111911773, |
| 0.9382593035697933, |
| 0.9568788063526152, |
| 0.4598054605722425, |
| 0.7213053238391873, |
| 1.0 |
| ], |
| "std": [ |
| 0.12438706308603287, |
| 0.1155894473195076, |
| 0.24558264017105103, |
| 0.5133047699928284, |
| 0.522326648235321, |
| 0.16654427349567413, |
| 0.27579590678215027, |
| 0.4560266435146332 |
| ] |
| } |
| } |
| }, |
| "num_attention_heads": 28, |
| "num_hidden_layers": 28, |
| "num_key_value_heads": 4, |
| "pad_to_multiple_of": 8, |
| "rms_norm_eps": 1e-06, |
| "rope_scaling": null, |
| "rope_theta": 1000000.0, |
| "sliding_window": null, |
| "tie_word_embeddings": false, |
| "torch_dtype": "float16", |
| "transformers_version": "4.50.3", |
| "use_cache": false, |
| "use_sliding_window": false, |
| "use_token_compression": false, |
| "vision_encoder": "videollamae3vision_encoder", |
| "vision_encoder_config": { |
| "_attn_implementation_autoset": false, |
| "_name_or_path": "", |
| "add_cross_attention": false, |
| "architectures": null, |
| "attention_dropout": 0.0, |
| "bad_words_ids": null, |
| "begin_suppress_tokens": null, |
| "bos_token_id": null, |
| "chunk_size_feed_forward": 0, |
| "cross_attention_hidden_size": null, |
| "decoder_start_token_id": null, |
| "diversity_penalty": 0.0, |
| "do_sample": false, |
| "early_stopping": false, |
| "encoder_no_repeat_ngram_size": 0, |
| "eos_token_id": null, |
| "exponential_decay_length_penalty": null, |
| "finetuning_task": null, |
| "forced_bos_token_id": null, |
| "forced_eos_token_id": null, |
| "hidden_act": "gelu_pytorch_tanh", |
| "hidden_size": 1152, |
| "id2label": { |
| "0": "LABEL_0", |
| "1": "LABEL_1" |
| }, |
| "intermediate_size": 4304, |
| "is_decoder": false, |
| "is_encoder_decoder": false, |
| "label2id": { |
| "LABEL_0": 0, |
| "LABEL_1": 1 |
| }, |
| "layer_norm_eps": 1e-06, |
| "length_penalty": 1.0, |
| "max_length": 20, |
| "min_length": 0, |
| "model_type": "videollama3_vision_encoder", |
| "no_repeat_ngram_size": 0, |
| "num_attention_heads": 16, |
| "num_beam_groups": 1, |
| "num_beams": 1, |
| "num_channels": 3, |
| "num_hidden_layers": 27, |
| "num_return_sequences": 1, |
| "output_attentions": false, |
| "output_hidden_states": false, |
| "output_scores": false, |
| "pad_token_id": null, |
| "patch_size": 14, |
| "prefix": null, |
| "problem_type": null, |
| "pruned_heads": {}, |
| "remove_invalid_values": false, |
| "repetition_penalty": 1.0, |
| "return_dict": true, |
| "return_dict_in_generate": false, |
| "sep_token_id": null, |
| "suppress_tokens": null, |
| "task_specific_params": null, |
| "temperature": 1.0, |
| "tf_legacy_loss": false, |
| "tie_encoder_decoder": false, |
| "tie_word_embeddings": true, |
| "tokenizer_class": null, |
| "top_k": 50, |
| "top_p": 1.0, |
| "torch_dtype": null, |
| "torchscript": false, |
| "typical_p": 1.0, |
| "use_bfloat16": false |
| }, |
| "vocab_size": 152064 |
| } |
|
|