Spaces:

UII-AI
/

MedVidBench-Leaderboard

Running

App Files Files Community

MedGRPO Team commited on Jan 7

Commit

83aad2b

1 Parent(s): 4510cf8

Make validation flexible: accept both 'answer'/'response' and 'gnd'/'ground_truth' field names

Browse files

Files changed (1) hide show

app.py +31 -9

app.py CHANGED Viewed

@@ -190,8 +190,8 @@ def validate_results_file(file_path: str) -> Tuple[bool, str]:
     [
         {
             "question": "...",
-            "response": "...",
-            "ground_truth": "...",
             "qa_type": "tal/stg/next_action/dvc/vs/rc/skill_assessment/cvs_assessment",
             "metadata": {"video_id": "...", "fps": "...", ...},
             "data_source": "AVOS/CholecT50/...",
@@ -199,6 +199,11 @@ def validate_results_file(file_path: str) -> Tuple[bool, str]:
         },
         ...
     ]
     """
     try:
         with open(file_path, 'r') as f:
@@ -215,12 +220,24 @@ def validate_results_file(file_path: str) -> Tuple[bool, str]:
         if len(records) == 0:
             return False, "Empty results file"
-        # Check first record has required fields
         sample = records[0]
-        required_fields = ["question", "response", "qa_type"]
-        missing = [f for f in required_fields if f not in sample]
-        if missing:
-            return False, f"Missing required fields: {missing}"
         # Check qa_type is valid
         valid_qa_types = ["tal", "stg", "next_action", "dense_captioning", "video_summary", "region_caption",
@@ -571,8 +588,8 @@ with gr.Blocks(title="MedGRPO Leaderboard", theme=gr.themes.Soft()) as demo:
             [
               {
                 "question": "<video>\\nQuestion text...",
-                "response": "Model's answer...",
-                "ground_truth": "Correct answer...",
                 "qa_type": "tal",
                 "metadata": {"video_id": "...", "fps": "1.0", ...},
                 "data_source": "AVOS",
@@ -582,6 +599,11 @@ with gr.Blocks(title="MedGRPO Leaderboard", theme=gr.themes.Soft()) as demo:
             ]
             ```
             **Valid qa_types**: `tal`, `stg`, `next_action`, `dense_captioning`, `video_summary`, `region_caption`, `skill_assessment`, `cvs_assessment`
             #### ⚙️ Evaluation Process

     [
         {
             "question": "...",
+            "response": "..." or "answer": "...",  # Either field name accepted
+            "ground_truth": "..." or "gnd": "...",  # Either field name accepted
             "qa_type": "tal/stg/next_action/dvc/vs/rc/skill_assessment/cvs_assessment",
             "metadata": {"video_id": "...", "fps": "...", ...},
             "data_source": "AVOS/CholecT50/...",
         },
         ...
     ]
+    Notes:
+    - Accepts both 'response' and 'answer' for model output
+    - Accepts both 'ground_truth' and 'gnd' for reference answer
+    - Can be either a list or dict (dict values will be extracted)
     """
     try:
         with open(file_path, 'r') as f:
         if len(records) == 0:
             return False, "Empty results file"
+        # Check first record has required fields (flexible field names)
         sample = records[0]
+        # Check for question field
+        if "question" not in sample:
+            return False, "Missing required field: 'question'"
+        # Check for response (accept 'response' or 'answer')
+        if "response" not in sample and "answer" not in sample:
+            return False, "Missing required field: 'response' or 'answer'"
+        # Check for ground truth (accept 'ground_truth' or 'gnd')
+        if "ground_truth" not in sample and "gnd" not in sample:
+            return False, "Missing required field: 'ground_truth' or 'gnd'"
+        # Check for qa_type
+        if "qa_type" not in sample:
+            return False, "Missing required field: 'qa_type'"
         # Check qa_type is valid
         valid_qa_types = ["tal", "stg", "next_action", "dense_captioning", "video_summary", "region_caption",
             [
               {
                 "question": "<video>\\nQuestion text...",
+                "response": "Model's answer...",  // or "answer"
+                "ground_truth": "Correct answer...",  // or "gnd"
                 "qa_type": "tal",
                 "metadata": {"video_id": "...", "fps": "1.0", ...},
                 "data_source": "AVOS",
             ]
             ```
+            **Note**: Both field naming conventions are accepted:
+            - Model output: `response` or `answer`
+            - Reference answer: `ground_truth` or `gnd`
+            - Format can be list or dict (dict values will be extracted)
             **Valid qa_types**: `tal`, `stg`, `next_action`, `dense_captioning`, `video_summary`, `region_caption`, `skill_assessment`, `cvs_assessment`
             #### ⚙️ Evaluation Process