Spaces:

UII-AI
/

MedVidBench-Leaderboard

Running

App Files Files Community

MedGRPO Team commited on Apr 16

Commit

1db37ad

1 Parent(s): 0cba76d

update

Browse files

Files changed (4) hide show

README.md +9 -9
app.py +13 -13
test_hf_token.py +3 -3
upload_initial_data.py +1 -1

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ tags:
 Interactive leaderboard for evaluating Video-Language Models on the **MedVidBench benchmark** - 8 medical video understanding tasks across 8 surgical datasets.
-🏆 **Live Demo**: [huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard](https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard)
 📄 **Paper**: [arXiv:2512.06581](https://arxiv.org/abs/2512.06581)
@@ -139,7 +139,7 @@ The leaderboard supports **two formats** for submission:
 ### 3. Upload to Leaderboard
-1. Visit the [leaderboard](https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard)
 2. Go to the **Submit Results** tab
 3. Fill in:
    - **Model Name** (e.g., "Qwen2.5-VL-7B-MedVidBench")
@@ -221,11 +221,11 @@ To compute the **average score** fairly across tasks:
 ## Links
 - 📄 **Paper**: [https://arxiv.org/abs/2512.06581](https://arxiv.org/abs/2512.06581)
-- 🌐 **Project**: [https://uii-america.github.io/MedGRPO/](https://uii-america.github.io/MedGRPO/)
-- 💾 **Dataset**: [https://huggingface.co/datasets/UIIAmerica/MedVidBench](https://huggingface.co/datasets/UIIAmerica/MedVidBench)
-- 💻 **GitHub**: [https://github.com/UII-America/MedGRPO-Code](https://github.com/UII-America/MedGRPO-Code)
-- 🎮 **Demo**: [https://huggingface.co/spaces/UIIAmerica/MedGRPO-Demo](https://huggingface.co/spaces/UIIAmerica/MedGRPO-Demo)
-- 🏆 **Leaderboard**: [https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard](https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard)
 ## Citation
@@ -246,5 +246,5 @@ To compute the **average score** fairly across tasks:
 ## Contact
 For questions or issues:
-- Open an issue on [GitHub](https://github.com/UII-America/MedGRPO-Code)
-- Visit the [project page](https://uii-america.github.io/MedGRPO/)

 Interactive leaderboard for evaluating Video-Language Models on the **MedVidBench benchmark** - 8 medical video understanding tasks across 8 surgical datasets.
+🏆 **Live Demo**: [huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
 📄 **Paper**: [arXiv:2512.06581](https://arxiv.org/abs/2512.06581)
 ### 3. Upload to Leaderboard
+1. Visit the [leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
 2. Go to the **Submit Results** tab
 3. Fill in:
    - **Model Name** (e.g., "Qwen2.5-VL-7B-MedVidBench")
 ## Links
 - 📄 **Paper**: [https://arxiv.org/abs/2512.06581](https://arxiv.org/abs/2512.06581)
+- 🌐 **Project**: [https://uii-ai.github.io/MedGRPO/](https://uii-ai.github.io/MedGRPO/)
+- 💾 **Dataset**: [https://huggingface.co/datasets/UII-AI/MedVidBench](https://huggingface.co/datasets/UII-AI/MedVidBench)
+- 💻 **GitHub**: [https://github.com/UII-AI/MedGRPO-Code](https://github.com/UII-AI/MedGRPO-Code)
+- 🎮 **Demo**: [https://huggingface.co/spaces/UII-AI/MedGRPO-Demo](https://huggingface.co/spaces/UII-AI/MedGRPO-Demo)
+- 🏆 **Leaderboard**: [https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
 ## Citation
 ## Contact
 For questions or issues:
+- Open an issue on [GitHub](https://github.com/UII-AI/MedGRPO-Code)
+- Visit the [project page](https://uii-ai.github.io/MedGRPO/)

app.py CHANGED Viewed

@@ -32,7 +32,7 @@ def load_ground_truth():
         # Download from private repository
         print("⏳ Downloading ground truth from private repository...")
         gt_file = hf_hub_download(
-            repo_id="UIIAmerica/MedVidBench-GroundTruth",
             filename="ground_truth.json",
             repo_type="dataset",
             token=token,
@@ -228,7 +228,7 @@ def load_leaderboard() -> pd.DataFrame:
             print("⏳ Downloading leaderboard from private repository...")
             try:
                 leaderboard_file = hf_hub_download(
-                    repo_id="UIIAmerica/MedVidBench-GroundTruth",
                     filename="leaderboard.json",
                     repo_type="dataset",
                     token=token,
@@ -305,7 +305,7 @@ def save_leaderboard(df: pd.DataFrame):
             return
         print("⏳ Uploading leaderboard to private repository...")
-        print(f"   Target: UIIAmerica/MedVidBench-GroundTruth/leaderboard.json")
         print(f"   Entries: {len(df)}")
         api = HfApi()
@@ -314,7 +314,7 @@ def save_leaderboard(df: pd.DataFrame):
         result = api.upload_file(
             path_or_fileobj=str(LEADERBOARD_FILE),
             path_in_repo="leaderboard.json",
-            repo_id="UIIAmerica/MedVidBench-GroundTruth",
             repo_type="dataset",
             token=token,
             commit_message=f"Update leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
@@ -334,7 +334,7 @@ def save_leaderboard(df: pd.DataFrame):
             print("   → Fix: Regenerate HF_TOKEN with write permission")
         elif "404" in error_msg or "Not Found" in error_msg:
             print("   → Issue: Repository not found")
-            print("   → Fix: Create UIIAmerica/MedVidBench-GroundTruth repo")
         elif "403" in error_msg or "Forbidden" in error_msg:
             print("   → Issue: Token lacks write permission")
             print("   → Fix: Use token with write access to dataset")
@@ -354,7 +354,7 @@ def load_official_leaderboard() -> pd.DataFrame:
         if token:
             try:
                 official_file = hf_hub_download(
-                    repo_id="UIIAmerica/MedVidBench-GroundTruth",
                     filename="official_leaderboard.json",
                     repo_type="dataset",
                     token=token,
@@ -410,7 +410,7 @@ def save_official_leaderboard(df: pd.DataFrame):
         api.upload_file(
             path_or_fileobj=str(OFFICIAL_LEADERBOARD_FILE),
             path_in_repo="official_leaderboard.json",
-            repo_id="UIIAmerica/MedVidBench-GroundTruth",
             repo_type="dataset",
             token=token,
             commit_message=f"Update official leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
@@ -600,7 +600,7 @@ def backup_results_to_repo(model_name: str, results_dir: Path):
             api.upload_file(
                 path_or_fileobj=str(eval_output),
                 path_in_repo=f"results/{model_name}/eval_output.txt",
-                repo_id="UIIAmerica/MedVidBench-GroundTruth",
                 repo_type="dataset",
                 token=token,
                 commit_message=f"Backup results for {model_name}"
@@ -612,7 +612,7 @@ def backup_results_to_repo(model_name: str, results_dir: Path):
             api.upload_file(
                 path_or_fileobj=str(input_file),
                 path_in_repo=f"results/{model_name}/input.json",
-                repo_id="UIIAmerica/MedVidBench-GroundTruth",
                 repo_type="dataset",
                 token=token,
                 commit_message=f"Backup predictions for {model_name}"
@@ -1752,7 +1752,7 @@ def run_llm_judge_evaluation(model_name: str, progress=gr.Progress()) -> str:
                     # Download the predictions file
                     predictions_path = hf_hub_download(
-                        repo_id="UIIAmerica/MedVidBench-GroundTruth",
                         filename=f"results/{model_name.replace(' ', '_')}/input.json",
                         repo_type="dataset",
                         token=token,
@@ -2000,7 +2000,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
     **MedVidBench** is a comprehensive benchmark for evaluating Video-Language Models on medical and surgical video understanding.
     It covers **8 tasks** across **8 surgical datasets** with **6,245 test samples**, evaluated on **10 metrics** including LLM-based caption judging.
-    📄 [Paper](https://arxiv.org/abs/2512.06581) &nbsp; 🌐 [Project Page](https://uii-america.github.io/MedGRPO/) &nbsp; 💾 [Dataset](https://huggingface.co/datasets/UIIAmerica/MedVidBench) &nbsp; 🤖 [Model](https://huggingface.co/UIIAmerica/Qwen2.5-VL-7B-MedGRPO) &nbsp; 💻 [GitHub](https://github.com/UII-America/MedGRPO-Code) &nbsp; 🎮 [Demo](https://huggingface.co/spaces/UIIAmerica/MedGRPO-Demo)
     """)
     with gr.Tabs():
@@ -2055,7 +2055,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
             3. **Provide model API access** so we can independently verify results
             4. Once verified, your model is added to the Official Leaderboard
-            For questions, contact us via [GitHub](https://github.com/UII-America/MedGRPO-Code).
             """)
         # Tab 2: Community Submissions
@@ -2306,7 +2306,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
             ### Contact
-            For questions or issues, open an issue on [GitHub](https://github.com/UII-America/MedGRPO-Code) or visit the [project page](https://uii-america.github.io/MedGRPO/).
             """)
         # Tab 5: Admin Panel (Password Protected)

         # Download from private repository
         print("⏳ Downloading ground truth from private repository...")
         gt_file = hf_hub_download(
+            repo_id="UII-AI/MedVidBench-GroundTruth",
             filename="ground_truth.json",
             repo_type="dataset",
             token=token,
             print("⏳ Downloading leaderboard from private repository...")
             try:
                 leaderboard_file = hf_hub_download(
+                    repo_id="UII-AI/MedVidBench-GroundTruth",
                     filename="leaderboard.json",
                     repo_type="dataset",
                     token=token,
             return
         print("⏳ Uploading leaderboard to private repository...")
+        print(f"   Target: UII-AI/MedVidBench-GroundTruth/leaderboard.json")
         print(f"   Entries: {len(df)}")
         api = HfApi()
         result = api.upload_file(
             path_or_fileobj=str(LEADERBOARD_FILE),
             path_in_repo="leaderboard.json",
+            repo_id="UII-AI/MedVidBench-GroundTruth",
             repo_type="dataset",
             token=token,
             commit_message=f"Update leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
             print("   → Fix: Regenerate HF_TOKEN with write permission")
         elif "404" in error_msg or "Not Found" in error_msg:
             print("   → Issue: Repository not found")
+            print("   → Fix: Create UII-AI/MedVidBench-GroundTruth repo")
         elif "403" in error_msg or "Forbidden" in error_msg:
             print("   → Issue: Token lacks write permission")
             print("   → Fix: Use token with write access to dataset")
         if token:
             try:
                 official_file = hf_hub_download(
+                    repo_id="UII-AI/MedVidBench-GroundTruth",
                     filename="official_leaderboard.json",
                     repo_type="dataset",
                     token=token,
         api.upload_file(
             path_or_fileobj=str(OFFICIAL_LEADERBOARD_FILE),
             path_in_repo="official_leaderboard.json",
+            repo_id="UII-AI/MedVidBench-GroundTruth",
             repo_type="dataset",
             token=token,
             commit_message=f"Update official leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
             api.upload_file(
                 path_or_fileobj=str(eval_output),
                 path_in_repo=f"results/{model_name}/eval_output.txt",
+                repo_id="UII-AI/MedVidBench-GroundTruth",
                 repo_type="dataset",
                 token=token,
                 commit_message=f"Backup results for {model_name}"
             api.upload_file(
                 path_or_fileobj=str(input_file),
                 path_in_repo=f"results/{model_name}/input.json",
+                repo_id="UII-AI/MedVidBench-GroundTruth",
                 repo_type="dataset",
                 token=token,
                 commit_message=f"Backup predictions for {model_name}"
                     # Download the predictions file
                     predictions_path = hf_hub_download(
+                        repo_id="UII-AI/MedVidBench-GroundTruth",
                         filename=f"results/{model_name.replace(' ', '_')}/input.json",
                         repo_type="dataset",
                         token=token,
     **MedVidBench** is a comprehensive benchmark for evaluating Video-Language Models on medical and surgical video understanding.
     It covers **8 tasks** across **8 surgical datasets** with **6,245 test samples**, evaluated on **10 metrics** including LLM-based caption judging.
+    📄 [Paper](https://arxiv.org/abs/2512.06581) &nbsp; 🌐 [Project Page](https://uii-ai.github.io/MedGRPO/) &nbsp; 💾 [Dataset](https://huggingface.co/datasets/UII-AI/MedVidBench) &nbsp; 🤖 [Model](https://huggingface.co/UII-AI/Qwen2.5-VL-7B-MedGRPO) &nbsp; 💻 [GitHub](https://github.com/UII-AI/MedGRPO-Code) &nbsp; 🎮 [Demo](https://huggingface.co/spaces/UII-AI/MedGRPO-Demo)
     """)
     with gr.Tabs():
             3. **Provide model API access** so we can independently verify results
             4. Once verified, your model is added to the Official Leaderboard
+            For questions, contact us via [GitHub](https://github.com/UII-AI/MedGRPO-Code).
             """)
         # Tab 2: Community Submissions
             ### Contact
+            For questions or issues, open an issue on [GitHub](https://github.com/UII-AI/MedGRPO-Code) or visit the [project page](https://uii-ai.github.io/MedGRPO/).
             """)
         # Tab 5: Admin Panel (Password Protected)

test_hf_token.py CHANGED Viewed

@@ -9,7 +9,7 @@ import sys
 from huggingface_hub import HfApi
 from pathlib import Path
-REPO_ID = "UIIAmerica/MedVidBench-GroundTruth"
 REPO_TYPE = "dataset"
 def test_hf_token():
@@ -80,12 +80,12 @@ def test_hf_token():
             print(f"\n→ Issue: Repository '{REPO_ID}' not found")
             print("→ Fix: Create the repository:")
             print(f"  1. Go to: https://huggingface.co/new-dataset")
-            print(f"  2. Owner: UIIAmerica")
             print(f"  3. Name: MedVidBench-GroundTruth")
             print(f"  4. Visibility: Private")
         elif "403" in error_msg or "Forbidden" in error_msg:
             print("\n→ Issue: No access to private repository")
-            print("→ Fix: Ensure you're a member of UIIAmerica organization")
         sys.exit(1)

 from huggingface_hub import HfApi
 from pathlib import Path
+REPO_ID = "UII-AI/MedVidBench-GroundTruth"
 REPO_TYPE = "dataset"
 def test_hf_token():
             print(f"\n→ Issue: Repository '{REPO_ID}' not found")
             print("→ Fix: Create the repository:")
             print(f"  1. Go to: https://huggingface.co/new-dataset")
+            print(f"  2. Owner: UII-AI")
             print(f"  3. Name: MedVidBench-GroundTruth")
             print(f"  4. Visibility: Private")
         elif "403" in error_msg or "Forbidden" in error_msg:
             print("\n→ Issue: No access to private repository")
+            print("→ Fix: Ensure you're a member of UII-AI organization")
         sys.exit(1)

upload_initial_data.py CHANGED Viewed

@@ -14,7 +14,7 @@ from pathlib import Path
 from huggingface_hub import HfApi
 # Configuration
-REPO_ID = "UIIAmerica/MedVidBench-GroundTruth"
 REPO_TYPE = "dataset"
 def create_initial_leaderboard():

 from huggingface_hub import HfApi
 # Configuration
+REPO_ID = "UII-AI/MedVidBench-GroundTruth"
 REPO_TYPE = "dataset"
 def create_initial_leaderboard():