Robotics
PyTorch
Cosmos
xperience10m_task_baseline_suite
embodied-ai
multimodal
xperience-10m
baseline
evaluation
qwen3-omni
Instructions to use cy0307/ropedia-xperience-10m-task-baselines with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Cosmos
How to use cy0307/ropedia-xperience-10m-task-baselines with Cosmos:
# No code snippets available yet for this library. # To use this model, check the repository files and the library's documentation. # Want to help? PRs adding snippets are welcome at: # https://github.com/huggingface/huggingface.js
- Notebooks
- Google Colab
- Kaggle
File size: 27,893 Bytes
697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 697d349 a8124a8 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 | task,task_display,modality_group,modality_display,status,score,primary_metric,primary_metric_value,target_variant,target_source_overlap,reason,accuracy,macro_f1,balanced_accuracy,num_classes,num_train,num_test,unseen_test_classes,unseen_test_class_count,mse,mae,r2,micro_f1,exact_match,precision,recall,num_objects,mrr,top1_accuracy,top5_accuracy,top10_accuracy,median_rank,mean_rank,num_queries
timeline_action,Current Action Recognition,all_features,All Features,computed,0.00905456968081885,macro_f1,0.00905456968081885,,false,,0.017241379310344827,0.00905456968081885,0.01720647773279352,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,video,Video,computed,0.0066280033140016575,macro_f1,0.0066280033140016575,,false,,0.011494252873563218,0.0066280033140016575,0.0036199095022624436,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,depth,Depth,computed,0.0030075187969924814,macro_f1,0.0030075187969924814,,false,,0.005747126436781609,0.0030075187969924814,0.001619433198380567,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,pose_slam,Pose + SLAM,computed,0.0,macro_f1,0.0,,false,,0.0,0.0,0.0,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,motion_capture,Motion Capture,computed,0.0055147058823529415,macro_f1,0.0055147058823529415,,false,,0.008620689655172414,0.0055147058823529415,0.0028846153846153848,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,inertial,Inertial,computed,0.003055767761650115,macro_f1,0.003055767761650115,,false,,0.005747126436781609,0.003055767761650115,0.0018099547511312218,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,audio,Audio,computed,0.006925207756232688,macro_f1,0.006925207756232688,,false,,0.014367816091954023,0.006925207756232688,0.004048582995951417,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,language,Language,computed,0.027777777777777776,macro_f1,0.027777777777777776,,false,,0.05747126436781609,0.027777777777777776,0.03615384615384616,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_action,Current Action Recognition,no_language,All Except Language,computed,0.0043859649122807015,macro_f1,0.0043859649122807015,,false,,0.008620689655172414,0.0043859649122807015,0.0024291497975708503,19,813,348,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,all_features,All Features,computed,0.011256354393609296,macro_f1,0.011256354393609296,,false,,0.02586206896551724,0.011256354393609296,0.02788220551378446,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,video,Video,computed,0.011740041928721174,macro_f1,0.011740041928721174,,false,,0.040229885057471264,0.011740041928721174,0.01637426900584795,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,depth,Depth,computed,0.009467455621301775,macro_f1,0.009467455621301775,,false,,0.022988505747126436,0.009467455621301775,0.010796221322537112,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,pose_slam,Pose + SLAM,computed,0.002331002331002331,macro_f1,0.002331002331002331,,false,,0.0028735632183908046,0.002331002331002331,0.001349527665317139,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,motion_capture,Motion Capture,computed,0.006756756756756756,macro_f1,0.006756756756756756,,false,,0.008620689655172414,0.006756756756756756,0.0043859649122807015,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,inertial,Inertial,computed,0.004662004662004662,macro_f1,0.004662004662004662,,false,,0.005747126436781609,0.004662004662004662,0.002699055330634278,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,audio,Audio,computed,0.016194331983805668,macro_f1,0.016194331983805668,,false,,0.022988505747126436,0.016194331983805668,0.010796221322537112,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,language,Language,computed,0.04828150572831424,macro_f1,0.04828150572831424,,false,,0.14655172413793102,0.04828150572831424,0.0939327485380117,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
timeline_subtask,Current Subtask Recognition,no_language,All Except Language,computed,0.007427055702917773,macro_f1,0.007427055702917773,,false,,0.020114942528735632,0.007427055702917773,0.009446693657219972,15,813,348,Move bottle to coffee equipment|Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,all_features,All Features,computed,0.46213292117465227,macro_f1,0.46213292117465227,,false,,0.8591954022988506,0.46213292117465227,0.4503012048192771,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,video,Video,computed,0.46625766871165636,macro_f1,0.46625766871165636,,false,,0.8735632183908046,0.46625766871165636,0.4578313253012048,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,depth,Depth,computed,0.4604651162790698,macro_f1,0.4604651162790698,,false,,0.853448275862069,0.4604651162790698,0.44728915662650603,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,pose_slam,Pose + SLAM,computed,0.48444444444444446,macro_f1,0.48444444444444446,,false,,0.9396551724137931,0.48444444444444446,0.4924698795180723,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,motion_capture,Motion Capture,computed,0.5439056356487549,macro_f1,0.5439056356487549,,false,,0.896551724137931,0.5439056356487549,0.5591114457831325,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,inertial,Inertial,computed,0.48520710059171596,macro_f1,0.48520710059171596,,false,,0.9425287356321839,0.48520710059171596,0.4939759036144578,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,audio,Audio,computed,0.48444444444444446,macro_f1,0.48444444444444446,,false,,0.9396551724137931,0.48444444444444446,0.4924698795180723,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,language,Language,computed,0.7051957831325302,macro_f1,0.7051957831325302,,false,,0.9482758620689655,0.7051957831325302,0.7051957831325302,2,813,348,,0,,,,,,,,,,,,,,,
transition_detection,Action Transition Detection,no_language,All Except Language,computed,0.45454545454545453,macro_f1,0.45454545454545453,,false,,0.8333333333333334,0.45454545454545453,0.4367469879518072,2,813,348,,0,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,all_features,All Features,computed,0.004971002485501243,macro_f1,0.004971002485501243,future action label from windows.csv,false,,0.008645533141210375,0.004971002485501243,0.0027573529411764708,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,video,Video,computed,0.006349206349206349,macro_f1,0.006349206349206349,future action label from windows.csv,false,,0.011527377521613832,0.006349206349206349,0.003472222222222222,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,depth,Depth,computed,0.001594896331738437,macro_f1,0.001594896331738437,future action label from windows.csv,false,,0.002881844380403458,0.001594896331738437,0.0008223684210526315,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,pose_slam,Pose + SLAM,computed,0.0,macro_f1,0.0,future action label from windows.csv,false,,0.0,0.0,0.0,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,motion_capture,Motion Capture,computed,0.00322061191626409,macro_f1,0.00322061191626409,future action label from windows.csv,false,,0.005763688760806916,0.00322061191626409,0.001736111111111111,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,inertial,Inertial,computed,0.00196078431372549,macro_f1,0.00196078431372549,future action label from windows.csv,false,,0.002881844380403458,0.00196078431372549,0.0010416666666666667,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,audio,Audio,computed,0.004761904761904762,macro_f1,0.004761904761904762,future action label from windows.csv,false,,0.008645533141210375,0.004761904761904762,0.0026041666666666665,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,language,Language,computed,0.04193971166448231,macro_f1,0.04193971166448231,future action label from windows.csv,false,,0.1844380403458213,0.04193971166448231,0.07142857142857142,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
next_action,Next-Action Prediction,no_language,All Except Language,computed,0.004576659038901602,macro_f1,0.004576659038901602,future action label from windows.csv,false,,0.008645533141210375,0.004576659038901602,0.0024671052631578946,19,810,347,Place item on table|Wait/Prepare for pouring|Pour coffee|Pour milk into coffee,4,,,,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,all_features,All Features,computed,0.11919872659288855,mae,7.3893513679504395,future hand feature vector from shared_windows.npz,true,,,,,,,,,,4377.7705078125,7.3893513679504395,-4562.174276179636,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,video,Video,computed,0.4956350584748486,mae,1.0176135301589966,future hand feature vector from shared_windows.npz,false,,,,,,,,,,1.7896661758422852,1.0176135301589966,-0.8654605965108897,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,depth,Depth,computed,0.04014931629731973,mae,23.907024383544922,future hand feature vector from shared_windows.npz,false,,,,,,,,,,72553.34375,23.907024383544922,-75625.0610993949,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,pose_slam,Pose + SLAM,computed,0.5611809661721311,mae,0.7819563746452332,future hand feature vector from shared_windows.npz,false,,,,,,,,,,1.2600995302200317,0.7819563746452332,-0.3134661692106211,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,motion_capture,Motion Capture,computed,0.0839705207556719,mae,10.908941268920898,future hand feature vector from shared_windows.npz,true,,,,,,,,,,6293.8876953125,10.908941268920898,-6559.441194341517,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,inertial,Inertial,computed,0.5679183061202404,mae,0.7608166337013245,future hand feature vector from shared_windows.npz,false,,,,,,,,,,1.1916581392288208,0.7608166337013245,-0.24212624676650907,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,audio,Audio,computed,0.4677426495622507,mae,1.137927770614624,future hand feature vector from shared_windows.npz,false,,,,,,,,,,3.675895929336548,1.137927770614624,-2.831574197208222,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,language,Language,computed,0.451525705011023,mae,1.2147133350372314,future hand feature vector from shared_windows.npz,false,,,,,,,,,,2.3450045585632324,1.2147133350372314,-1.4443180759924243,,,,,,,,,,,,
hand_trajectory_forecast,Future Hand Motion Forecasting,no_language,All Except Language,computed,0.10781931019562568,mae,8.274776458740234,future hand feature vector from shared_windows.npz,true,,,,,,,,,,5327.0849609375,8.274776458740234,-5551.693117832905,,,,,,,,,,,,
contact_prediction,Contact State Prediction,all_features,All Features,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,video,Video,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,depth,Depth,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,pose_slam,Pose + SLAM,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,motion_capture,Motion Capture,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,inertial,Inertial,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,audio,Audio,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,language,Language,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
contact_prediction,Contact State Prediction,no_language,All Except Language,computed,1.0,macro_f1,1.0,contact proxy derived from body_contacts feature block,false,,1.0,1.0,1.0,1,813,348,,0,,,,,,,,,,,,,,,
object_relevance,Relevant Object Prediction,all_features,All Features,computed,0.1830870279146141,micro_f1,0.1830870279146141,object sets exported from annotation.hdf5 caption_frame_info_map,true,,,0.06426101593989779,,,813,348,,,,,,0.1830870279146141,0.031609195402298854,0.20667284522706209,0.1643330876934414,34,,,,,,,
object_relevance,Relevant Object Prediction,video,Video,computed,0.14804270462633454,micro_f1,0.14804270462633454,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.04379950367755125,,,813,348,,,,,,0.14804270462633454,0.008620689655172414,0.14315209910529939,0.15327929255711129,34,,,,,,,
object_relevance,Relevant Object Prediction,depth,Depth,computed,0.20134228187919462,micro_f1,0.20134228187919462,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.0649677953734521,,,813,348,,,,,,0.20134228187919462,0.011494252873563218,0.18484288354898337,0.2210759027266028,34,,,,,,,
object_relevance,Relevant Object Prediction,pose_slam,Pose + SLAM,computed,0.19528071602929212,micro_f1,0.19528071602929212,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.05592381693865655,,,813,348,,,,,,0.19528071602929212,0.0,0.21798365122615804,0.17686072218128224,34,,,,,,,
object_relevance,Relevant Object Prediction,motion_capture,Motion Capture,computed,0.11607786589762077,micro_f1,0.11607786589762077,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.045395437036303915,,,813,348,,,,,,0.11607786589762077,0.0028735632183908046,0.11362032462949895,0.11864406779661017,34,,,,,,,
object_relevance,Relevant Object Prediction,inertial,Inertial,computed,0.1716082659478886,micro_f1,0.1716082659478886,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.04806995854957751,,,813,348,,,,,,0.1716082659478886,0.0,0.21979286536248563,0.14075165806927045,34,,,,,,,
object_relevance,Relevant Object Prediction,audio,Audio,computed,0.15894039735099336,micro_f1,0.15894039735099336,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.0640376063191102,,,813,348,,,,,,0.15894039735099336,0.005747126436781609,0.16859504132231404,0.1503316138540899,34,,,,,,,
object_relevance,Relevant Object Prediction,language,Language,computed,0.23021032504780117,micro_f1,0.23021032504780117,object sets exported from annotation.hdf5 caption_frame_info_map,true,,,0.0947530205484707,,,813,348,,,,,,0.23021032504780117,0.15229885057471265,0.23926868044515104,0.22181282240235814,34,,,,,,,
object_relevance,Relevant Object Prediction,no_language,All Except Language,computed,0.15813953488372093,micro_f1,0.15813953488372093,object sets exported from annotation.hdf5 caption_frame_info_map,false,,,0.05335536055344564,,,813,348,,,,,,0.15813953488372093,0.011494252873563218,0.15368567454798332,0.16285924834193072,34,,,,,,,
caption_grounding,Language-to-Time Grounding,all_features,All Features,computed,0.20975713431835175,mrr,0.20975713431835175,,true,,,,,,,,,,,,,,,,,,0.20975713431835175,0.08908045977011494,0.32471264367816094,0.4454022988505747,13.0,23.19827651977539,348
caption_grounding,Language-to-Time Grounding,video,Video,computed,0.022670436650514603,mrr,0.022670436650514603,,false,,,,,,,,,,,,,,,,,,0.022670436650514603,0.0028735632183908046,0.02586206896551724,0.034482758620689655,162.0,161.4770050048828,348
caption_grounding,Language-to-Time Grounding,depth,Depth,computed,0.02443847246468067,mrr,0.02443847246468067,,false,,,,,,,,,,,,,,,,,,0.02443847246468067,0.0028735632183908046,0.020114942528735632,0.03735632183908046,114.0,137.90805053710938,348
caption_grounding,Language-to-Time Grounding,pose_slam,Pose + SLAM,computed,0.02946249581873417,mrr,0.02946249581873417,,false,,,,,,,,,,,,,,,,,,0.02946249581873417,0.008620689655172414,0.028735632183908046,0.04597701149425287,143.5,155.4712677001953,348
caption_grounding,Language-to-Time Grounding,motion_capture,Motion Capture,computed,0.030569594353437424,mrr,0.030569594353437424,,false,,,,,,,,,,,,,,,,,,0.030569594353437424,0.008620689655172414,0.02586206896551724,0.04885057471264368,110.5,130.32470703125,348
caption_grounding,Language-to-Time Grounding,inertial,Inertial,computed,0.02470344305038452,mrr,0.02470344305038452,,false,,,,,,,,,,,,,,,,,,0.02470344305038452,0.0028735632183908046,0.022988505747126436,0.04597701149425287,123.0,138.61207580566406,348
caption_grounding,Language-to-Time Grounding,audio,Audio,computed,0.03902389109134674,mrr,0.03902389109134674,,false,,,,,,,,,,,,,,,,,,0.03902389109134674,0.011494252873563218,0.04885057471264368,0.07758620689655173,141.0,152.14942932128906,348
caption_grounding,Language-to-Time Grounding,language,Language,computed,0.24527303874492645,mrr,0.24527303874492645,,true,,,,,,,,,,,,,,,,,,0.24527303874492645,0.12643678160919541,0.34770114942528735,0.47126436781609193,12.0,15.106322288513184,348
caption_grounding,Language-to-Time Grounding,no_language,All Except Language,computed,0.03208239749073982,mrr,0.03208239749073982,,false,,,,,,,,,,,,,,,,,,0.03208239749073982,0.0028735632183908046,0.040229885057471264,0.06896551724137931,132.0,137.30746459960938,348
cross_modal_retrieval,Cross-Modal Window Retrieval,all_features,All Features,computed,0.972757875919342,mrr,0.972757875919342,,true,,,,,,,,,,,,,,,,,,0.972757875919342,0.9683908045977011,0.9741379310344828,0.9798850574712644,1.0,2.0862069129943848,348
cross_modal_retrieval,Cross-Modal Window Retrieval,video,Video,computed,0.9701701402664185,mrr,0.9701701402664185,,true,,,,,,,,,,,,,,,,,,0.9701701402664185,0.9626436781609196,0.9798850574712644,0.9798850574712644,1.0,3.844827651977539,348
cross_modal_retrieval,Cross-Modal Window Retrieval,depth,Depth,computed,0.6656051278114319,mrr,0.6656051278114319,,true,,,,,,,,,,,,,,,,,,0.6656051278114319,0.5660919540229885,0.7902298850574713,0.8620689655172413,1.0,5.729885101318359,348
cross_modal_retrieval,Cross-Modal Window Retrieval,pose_slam,Pose + SLAM,computed,0.42622581124305725,mrr,0.42622581124305725,,false,,,,,,,,,,,,,,,,,,0.42622581124305725,0.3017241379310345,0.5488505747126436,0.6551724137931034,4.0,15.623562812805176,348
cross_modal_retrieval,Cross-Modal Window Retrieval,motion_capture,Motion Capture,computed,0.2553335726261139,mrr,0.2553335726261139,,false,,,,,,,,,,,,,,,,,,0.2553335726261139,0.15804597701149425,0.35344827586206895,0.3994252873563218,21.5,49.181034088134766,348
cross_modal_retrieval,Cross-Modal Window Retrieval,inertial,Inertial,computed,0.2840072810649872,mrr,0.2840072810649872,,false,,,,,,,,,,,,,,,,,,0.2840072810649872,0.16379310344827586,0.3735632183908046,0.5229885057471264,10.0,20.577587127685547,348
cross_modal_retrieval,Cross-Modal Window Retrieval,audio,Audio,computed,0.02334633097052574,mrr,0.02334633097052574,,false,,,,,,,,,,,,,,,,,,0.02334633097052574,0.005747126436781609,0.014367816091954023,0.031609195402298854,152.5,161.44540405273438,348
cross_modal_retrieval,Cross-Modal Window Retrieval,language,Language,computed,0.031006580218672752,mrr,0.031006580218672752,,false,,,,,,,,,,,,,,,,,,0.031006580218672752,0.005747126436781609,0.031609195402298854,0.05747126436781609,138.0,146.83045959472656,348
cross_modal_retrieval,Cross-Modal Window Retrieval,no_language,All Except Language,computed,0.9725185036659241,mrr,0.9725185036659241,,true,,,,,,,,,,,,,,,,,,0.9725185036659241,0.9683908045977011,0.9741379310344828,0.9770114942528736,1.0,2.181034564971924,348
modality_reconstruction,Sensor-to-Visual Reconstruction,all_features,All Features,computed,0.20013932390163613,mae,3.9965193271636963,,true,,,,,,,,,,4488.36962890625,3.9965193271636963,0.4789615393741845,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,video,Video,computed,0.611318891594774,mae,0.635807454586029,,true,,,,,,,,,,8679.7548828125,0.635807454586029,-0.007601057526781085,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,depth,Depth,computed,0.062215385980961393,mae,15.07319450378418,,true,,,,,,,,,,38000.71875,15.07319450378418,-3.4113648334674167,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,pose_slam,Pose + SLAM,computed,0.5359235021455191,mae,0.8659379482269287,,false,,,,,,,,,,8678.9697265625,0.8659379482269287,-0.007509963078260462,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,motion_capture,Motion Capture,computed,0.07724422027114182,mae,11.945952415466309,,false,,,,,,,,,,16462.224609375,11.945952415466309,-0.911039534454414,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,inertial,Inertial,computed,0.5185351442505587,mae,0.9285095930099487,,false,,,,,,,,,,8680.1376953125,0.9285095930099487,-0.007645498747803181,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,audio,Audio,computed,0.4237932455142579,mae,1.359641194343567,,false,,,,,,,,,,8681.8916015625,1.359641194343567,-0.007849137404613904,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,language,Language,computed,0.411308516935754,mae,1.4312649965286255,,false,,,,,,,,,,8681.390625,1.4312649965286255,-0.00779095493123938,,,,,,,,,,,,
modality_reconstruction,Sensor-to-Visual Reconstruction,no_language,All Except Language,computed,0.198028246205017,mae,4.0497846603393555,,true,,,,,,,,,,4352.54638671875,4.0497846603393555,0.49472875505192115,,,,,,,,,,,,
temporal_order,Temporal Order Verification,all_features,All Features,computed,0.5172413793103449,macro_f1,0.5172413793103449,,false,,0.5172413793103449,0.5172413793103449,0.5172413793103449,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,video,Video,computed,0.5172413793103449,macro_f1,0.5172413793103449,,false,,0.5172413793103449,0.5172413793103449,0.5172413793103449,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,depth,Depth,computed,0.49424869738982513,macro_f1,0.49424869738982513,,false,,0.4942528735632184,0.49424869738982513,0.49425287356321834,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,pose_slam,Pose + SLAM,computed,0.5258620689655172,macro_f1,0.5258620689655172,,false,,0.5258620689655172,0.5258620689655172,0.5258620689655172,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,motion_capture,Motion Capture,computed,0.4942528735632184,macro_f1,0.4942528735632184,,false,,0.4942528735632184,0.4942528735632184,0.4942528735632184,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,inertial,Inertial,computed,0.5,macro_f1,0.5,,false,,0.5,0.5,0.5,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,audio,Audio,computed,0.4425287356321839,macro_f1,0.4425287356321839,,false,,0.4425287356321839,0.4425287356321839,0.4425287356321839,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,language,Language,computed,0.4236751152073733,macro_f1,0.4236751152073733,,false,,0.47126436781609193,0.4236751152073733,0.47126436781609193,2,1624,696,,0,,,,,,,,,,,,,,,
temporal_order,Temporal Order Verification,no_language,All Except Language,computed,0.5057471264367817,macro_f1,0.5057471264367817,,false,,0.5057471264367817,0.5057471264367817,0.5057471264367817,2,1624,696,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,all_features,All Features,computed,0.4159692457248906,macro_f1,0.4159692457248906,,false,,0.4479768786127168,0.4159692457248906,0.44797687861271673,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,video,Video,computed,0.49488307322727143,macro_f1,0.49488307322727143,,false,,0.4985549132947977,0.49488307322727143,0.4985549132947977,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,depth,Depth,computed,0.46659963973021656,macro_f1,0.46659963973021656,,false,,0.4682080924855491,0.46659963973021656,0.4682080924855491,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,pose_slam,Pose + SLAM,computed,0.4929686094043242,macro_f1,0.4929686094043242,,false,,0.5057803468208093,0.4929686094043242,0.5057803468208092,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,motion_capture,Motion Capture,computed,0.4133918268956141,macro_f1,0.4133918268956141,,false,,0.4638728323699422,0.4133918268956141,0.4638728323699422,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,inertial,Inertial,computed,0.48899072503396884,macro_f1,0.48899072503396884,,false,,0.49421965317919075,0.48899072503396884,0.49421965317919075,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,audio,Audio,computed,0.5152624305034046,macro_f1,0.5152624305034046,,false,,0.5187861271676301,0.5152624305034046,0.5187861271676301,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,language,Language,computed,0.4942161609504254,macro_f1,0.4942161609504254,,false,,0.5,0.4942161609504254,0.5,2,1614,692,,0,,,,,,,,,,,,,,,
misalignment_detection,Cross-Modal Misalignment Detection,no_language,All Except Language,computed,0.4136154066851563,macro_f1,0.4136154066851563,,false,,0.44942196531791906,0.4136154066851563,0.4494219653179191,2,1614,692,,0,,,,,,,,,,,,,,,
|