{ "mrr": 0.01684125567132316, "median_rank": 180.5, "mean_rank": 178.382183908046, "num_queries": 348, "top1_accuracy": 0.0028735632183908046, "top5_accuracy": 0.014367816091954023, "top10_accuracy": 0.020114942528735632, "task": "caption_grounding", "input": "caption objects/interaction text query + candidate sensor windows", "split": "chronological", "num_train_windows": 813, "num_test_windows": 348, "target_dim": 896, "output": "matching time window", "model": "neural_mlp", "head": "z-score -> MLP projection/regression", "neural_epochs": 80, "neural_hidden_dim": 128, "neural_batch_size": 128, "neural_learning_rate": 0.001, "neural_weight_decay": 0.0001, "neural_dropout": 0.1, "neural_device": "cpu", "train_final_loss": 0.06317874967483723, "task_display_name": "Language Grounding" }