MedGRPO Team commited on
Commit
1db37ad
ยท
1 Parent(s): 0cba76d
Files changed (4) hide show
  1. README.md +9 -9
  2. app.py +13 -13
  3. test_hf_token.py +3 -3
  4. upload_initial_data.py +1 -1
README.md CHANGED
@@ -20,7 +20,7 @@ tags:
20
 
21
  Interactive leaderboard for evaluating Video-Language Models on the **MedVidBench benchmark** - 8 medical video understanding tasks across 8 surgical datasets.
22
 
23
- ๐Ÿ† **Live Demo**: [huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard](https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard)
24
 
25
  ๐Ÿ“„ **Paper**: [arXiv:2512.06581](https://arxiv.org/abs/2512.06581)
26
 
@@ -139,7 +139,7 @@ The leaderboard supports **two formats** for submission:
139
 
140
  ### 3. Upload to Leaderboard
141
 
142
- 1. Visit the [leaderboard](https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard)
143
  2. Go to the **Submit Results** tab
144
  3. Fill in:
145
  - **Model Name** (e.g., "Qwen2.5-VL-7B-MedVidBench")
@@ -221,11 +221,11 @@ To compute the **average score** fairly across tasks:
221
  ## Links
222
 
223
  - ๐Ÿ“„ **Paper**: [https://arxiv.org/abs/2512.06581](https://arxiv.org/abs/2512.06581)
224
- - ๐ŸŒ **Project**: [https://uii-america.github.io/MedGRPO/](https://uii-america.github.io/MedGRPO/)
225
- - ๐Ÿ’พ **Dataset**: [https://huggingface.co/datasets/UIIAmerica/MedVidBench](https://huggingface.co/datasets/UIIAmerica/MedVidBench)
226
- - ๐Ÿ’ป **GitHub**: [https://github.com/UII-America/MedGRPO-Code](https://github.com/UII-America/MedGRPO-Code)
227
- - ๐ŸŽฎ **Demo**: [https://huggingface.co/spaces/UIIAmerica/MedGRPO-Demo](https://huggingface.co/spaces/UIIAmerica/MedGRPO-Demo)
228
- - ๐Ÿ† **Leaderboard**: [https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard](https://huggingface.co/spaces/UIIAmerica/MedVidBench-Leaderboard)
229
 
230
  ## Citation
231
 
@@ -246,5 +246,5 @@ To compute the **average score** fairly across tasks:
246
  ## Contact
247
 
248
  For questions or issues:
249
- - Open an issue on [GitHub](https://github.com/UII-America/MedGRPO-Code)
250
- - Visit the [project page](https://uii-america.github.io/MedGRPO/)
 
20
 
21
  Interactive leaderboard for evaluating Video-Language Models on the **MedVidBench benchmark** - 8 medical video understanding tasks across 8 surgical datasets.
22
 
23
+ ๐Ÿ† **Live Demo**: [huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
24
 
25
  ๐Ÿ“„ **Paper**: [arXiv:2512.06581](https://arxiv.org/abs/2512.06581)
26
 
 
139
 
140
  ### 3. Upload to Leaderboard
141
 
142
+ 1. Visit the [leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
143
  2. Go to the **Submit Results** tab
144
  3. Fill in:
145
  - **Model Name** (e.g., "Qwen2.5-VL-7B-MedVidBench")
 
221
  ## Links
222
 
223
  - ๐Ÿ“„ **Paper**: [https://arxiv.org/abs/2512.06581](https://arxiv.org/abs/2512.06581)
224
+ - ๐ŸŒ **Project**: [https://uii-ai.github.io/MedGRPO/](https://uii-ai.github.io/MedGRPO/)
225
+ - ๐Ÿ’พ **Dataset**: [https://huggingface.co/datasets/UII-AI/MedVidBench](https://huggingface.co/datasets/UII-AI/MedVidBench)
226
+ - ๐Ÿ’ป **GitHub**: [https://github.com/UII-AI/MedGRPO-Code](https://github.com/UII-AI/MedGRPO-Code)
227
+ - ๐ŸŽฎ **Demo**: [https://huggingface.co/spaces/UII-AI/MedGRPO-Demo](https://huggingface.co/spaces/UII-AI/MedGRPO-Demo)
228
+ - ๐Ÿ† **Leaderboard**: [https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
229
 
230
  ## Citation
231
 
 
246
  ## Contact
247
 
248
  For questions or issues:
249
+ - Open an issue on [GitHub](https://github.com/UII-AI/MedGRPO-Code)
250
+ - Visit the [project page](https://uii-ai.github.io/MedGRPO/)
app.py CHANGED
@@ -32,7 +32,7 @@ def load_ground_truth():
32
  # Download from private repository
33
  print("โณ Downloading ground truth from private repository...")
34
  gt_file = hf_hub_download(
35
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
36
  filename="ground_truth.json",
37
  repo_type="dataset",
38
  token=token,
@@ -228,7 +228,7 @@ def load_leaderboard() -> pd.DataFrame:
228
  print("โณ Downloading leaderboard from private repository...")
229
  try:
230
  leaderboard_file = hf_hub_download(
231
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
232
  filename="leaderboard.json",
233
  repo_type="dataset",
234
  token=token,
@@ -305,7 +305,7 @@ def save_leaderboard(df: pd.DataFrame):
305
  return
306
 
307
  print("โณ Uploading leaderboard to private repository...")
308
- print(f" Target: UIIAmerica/MedVidBench-GroundTruth/leaderboard.json")
309
  print(f" Entries: {len(df)}")
310
 
311
  api = HfApi()
@@ -314,7 +314,7 @@ def save_leaderboard(df: pd.DataFrame):
314
  result = api.upload_file(
315
  path_or_fileobj=str(LEADERBOARD_FILE),
316
  path_in_repo="leaderboard.json",
317
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
318
  repo_type="dataset",
319
  token=token,
320
  commit_message=f"Update leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
@@ -334,7 +334,7 @@ def save_leaderboard(df: pd.DataFrame):
334
  print(" โ†’ Fix: Regenerate HF_TOKEN with write permission")
335
  elif "404" in error_msg or "Not Found" in error_msg:
336
  print(" โ†’ Issue: Repository not found")
337
- print(" โ†’ Fix: Create UIIAmerica/MedVidBench-GroundTruth repo")
338
  elif "403" in error_msg or "Forbidden" in error_msg:
339
  print(" โ†’ Issue: Token lacks write permission")
340
  print(" โ†’ Fix: Use token with write access to dataset")
@@ -354,7 +354,7 @@ def load_official_leaderboard() -> pd.DataFrame:
354
  if token:
355
  try:
356
  official_file = hf_hub_download(
357
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
358
  filename="official_leaderboard.json",
359
  repo_type="dataset",
360
  token=token,
@@ -410,7 +410,7 @@ def save_official_leaderboard(df: pd.DataFrame):
410
  api.upload_file(
411
  path_or_fileobj=str(OFFICIAL_LEADERBOARD_FILE),
412
  path_in_repo="official_leaderboard.json",
413
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
414
  repo_type="dataset",
415
  token=token,
416
  commit_message=f"Update official leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
@@ -600,7 +600,7 @@ def backup_results_to_repo(model_name: str, results_dir: Path):
600
  api.upload_file(
601
  path_or_fileobj=str(eval_output),
602
  path_in_repo=f"results/{model_name}/eval_output.txt",
603
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
604
  repo_type="dataset",
605
  token=token,
606
  commit_message=f"Backup results for {model_name}"
@@ -612,7 +612,7 @@ def backup_results_to_repo(model_name: str, results_dir: Path):
612
  api.upload_file(
613
  path_or_fileobj=str(input_file),
614
  path_in_repo=f"results/{model_name}/input.json",
615
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
616
  repo_type="dataset",
617
  token=token,
618
  commit_message=f"Backup predictions for {model_name}"
@@ -1752,7 +1752,7 @@ def run_llm_judge_evaluation(model_name: str, progress=gr.Progress()) -> str:
1752
 
1753
  # Download the predictions file
1754
  predictions_path = hf_hub_download(
1755
- repo_id="UIIAmerica/MedVidBench-GroundTruth",
1756
  filename=f"results/{model_name.replace(' ', '_')}/input.json",
1757
  repo_type="dataset",
1758
  token=token,
@@ -2000,7 +2000,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
2000
  **MedVidBench** is a comprehensive benchmark for evaluating Video-Language Models on medical and surgical video understanding.
2001
  It covers **8 tasks** across **8 surgical datasets** with **6,245 test samples**, evaluated on **10 metrics** including LLM-based caption judging.
2002
 
2003
- ๐Ÿ“„ [Paper](https://arxiv.org/abs/2512.06581)   ๐ŸŒ [Project Page](https://uii-america.github.io/MedGRPO/)   ๐Ÿ’พ [Dataset](https://huggingface.co/datasets/UIIAmerica/MedVidBench)   ๐Ÿค– [Model](https://huggingface.co/UIIAmerica/Qwen2.5-VL-7B-MedGRPO)   ๐Ÿ’ป [GitHub](https://github.com/UII-America/MedGRPO-Code)   ๐ŸŽฎ [Demo](https://huggingface.co/spaces/UIIAmerica/MedGRPO-Demo)
2004
  """)
2005
 
2006
  with gr.Tabs():
@@ -2055,7 +2055,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
2055
  3. **Provide model API access** so we can independently verify results
2056
  4. Once verified, your model is added to the Official Leaderboard
2057
 
2058
- For questions, contact us via [GitHub](https://github.com/UII-America/MedGRPO-Code).
2059
  """)
2060
 
2061
  # Tab 2: Community Submissions
@@ -2306,7 +2306,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
2306
 
2307
  ### Contact
2308
 
2309
- For questions or issues, open an issue on [GitHub](https://github.com/UII-America/MedGRPO-Code) or visit the [project page](https://uii-america.github.io/MedGRPO/).
2310
  """)
2311
 
2312
  # Tab 5: Admin Panel (Password Protected)
 
32
  # Download from private repository
33
  print("โณ Downloading ground truth from private repository...")
34
  gt_file = hf_hub_download(
35
+ repo_id="UII-AI/MedVidBench-GroundTruth",
36
  filename="ground_truth.json",
37
  repo_type="dataset",
38
  token=token,
 
228
  print("โณ Downloading leaderboard from private repository...")
229
  try:
230
  leaderboard_file = hf_hub_download(
231
+ repo_id="UII-AI/MedVidBench-GroundTruth",
232
  filename="leaderboard.json",
233
  repo_type="dataset",
234
  token=token,
 
305
  return
306
 
307
  print("โณ Uploading leaderboard to private repository...")
308
+ print(f" Target: UII-AI/MedVidBench-GroundTruth/leaderboard.json")
309
  print(f" Entries: {len(df)}")
310
 
311
  api = HfApi()
 
314
  result = api.upload_file(
315
  path_or_fileobj=str(LEADERBOARD_FILE),
316
  path_in_repo="leaderboard.json",
317
+ repo_id="UII-AI/MedVidBench-GroundTruth",
318
  repo_type="dataset",
319
  token=token,
320
  commit_message=f"Update leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
 
334
  print(" โ†’ Fix: Regenerate HF_TOKEN with write permission")
335
  elif "404" in error_msg or "Not Found" in error_msg:
336
  print(" โ†’ Issue: Repository not found")
337
+ print(" โ†’ Fix: Create UII-AI/MedVidBench-GroundTruth repo")
338
  elif "403" in error_msg or "Forbidden" in error_msg:
339
  print(" โ†’ Issue: Token lacks write permission")
340
  print(" โ†’ Fix: Use token with write access to dataset")
 
354
  if token:
355
  try:
356
  official_file = hf_hub_download(
357
+ repo_id="UII-AI/MedVidBench-GroundTruth",
358
  filename="official_leaderboard.json",
359
  repo_type="dataset",
360
  token=token,
 
410
  api.upload_file(
411
  path_or_fileobj=str(OFFICIAL_LEADERBOARD_FILE),
412
  path_in_repo="official_leaderboard.json",
413
+ repo_id="UII-AI/MedVidBench-GroundTruth",
414
  repo_type="dataset",
415
  token=token,
416
  commit_message=f"Update official leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
 
600
  api.upload_file(
601
  path_or_fileobj=str(eval_output),
602
  path_in_repo=f"results/{model_name}/eval_output.txt",
603
+ repo_id="UII-AI/MedVidBench-GroundTruth",
604
  repo_type="dataset",
605
  token=token,
606
  commit_message=f"Backup results for {model_name}"
 
612
  api.upload_file(
613
  path_or_fileobj=str(input_file),
614
  path_in_repo=f"results/{model_name}/input.json",
615
+ repo_id="UII-AI/MedVidBench-GroundTruth",
616
  repo_type="dataset",
617
  token=token,
618
  commit_message=f"Backup predictions for {model_name}"
 
1752
 
1753
  # Download the predictions file
1754
  predictions_path = hf_hub_download(
1755
+ repo_id="UII-AI/MedVidBench-GroundTruth",
1756
  filename=f"results/{model_name.replace(' ', '_')}/input.json",
1757
  repo_type="dataset",
1758
  token=token,
 
2000
  **MedVidBench** is a comprehensive benchmark for evaluating Video-Language Models on medical and surgical video understanding.
2001
  It covers **8 tasks** across **8 surgical datasets** with **6,245 test samples**, evaluated on **10 metrics** including LLM-based caption judging.
2002
 
2003
+ ๐Ÿ“„ [Paper](https://arxiv.org/abs/2512.06581)   ๐ŸŒ [Project Page](https://uii-ai.github.io/MedGRPO/)   ๐Ÿ’พ [Dataset](https://huggingface.co/datasets/UII-AI/MedVidBench)   ๐Ÿค– [Model](https://huggingface.co/UII-AI/Qwen2.5-VL-7B-MedGRPO)   ๐Ÿ’ป [GitHub](https://github.com/UII-AI/MedGRPO-Code)   ๐ŸŽฎ [Demo](https://huggingface.co/spaces/UII-AI/MedGRPO-Demo)
2004
  """)
2005
 
2006
  with gr.Tabs():
 
2055
  3. **Provide model API access** so we can independently verify results
2056
  4. Once verified, your model is added to the Official Leaderboard
2057
 
2058
+ For questions, contact us via [GitHub](https://github.com/UII-AI/MedGRPO-Code).
2059
  """)
2060
 
2061
  # Tab 2: Community Submissions
 
2306
 
2307
  ### Contact
2308
 
2309
+ For questions or issues, open an issue on [GitHub](https://github.com/UII-AI/MedGRPO-Code) or visit the [project page](https://uii-ai.github.io/MedGRPO/).
2310
  """)
2311
 
2312
  # Tab 5: Admin Panel (Password Protected)
test_hf_token.py CHANGED
@@ -9,7 +9,7 @@ import sys
9
  from huggingface_hub import HfApi
10
  from pathlib import Path
11
 
12
- REPO_ID = "UIIAmerica/MedVidBench-GroundTruth"
13
  REPO_TYPE = "dataset"
14
 
15
  def test_hf_token():
@@ -80,12 +80,12 @@ def test_hf_token():
80
  print(f"\nโ†’ Issue: Repository '{REPO_ID}' not found")
81
  print("โ†’ Fix: Create the repository:")
82
  print(f" 1. Go to: https://huggingface.co/new-dataset")
83
- print(f" 2. Owner: UIIAmerica")
84
  print(f" 3. Name: MedVidBench-GroundTruth")
85
  print(f" 4. Visibility: Private")
86
  elif "403" in error_msg or "Forbidden" in error_msg:
87
  print("\nโ†’ Issue: No access to private repository")
88
- print("โ†’ Fix: Ensure you're a member of UIIAmerica organization")
89
 
90
  sys.exit(1)
91
 
 
9
  from huggingface_hub import HfApi
10
  from pathlib import Path
11
 
12
+ REPO_ID = "UII-AI/MedVidBench-GroundTruth"
13
  REPO_TYPE = "dataset"
14
 
15
  def test_hf_token():
 
80
  print(f"\nโ†’ Issue: Repository '{REPO_ID}' not found")
81
  print("โ†’ Fix: Create the repository:")
82
  print(f" 1. Go to: https://huggingface.co/new-dataset")
83
+ print(f" 2. Owner: UII-AI")
84
  print(f" 3. Name: MedVidBench-GroundTruth")
85
  print(f" 4. Visibility: Private")
86
  elif "403" in error_msg or "Forbidden" in error_msg:
87
  print("\nโ†’ Issue: No access to private repository")
88
+ print("โ†’ Fix: Ensure you're a member of UII-AI organization")
89
 
90
  sys.exit(1)
91
 
upload_initial_data.py CHANGED
@@ -14,7 +14,7 @@ from pathlib import Path
14
  from huggingface_hub import HfApi
15
 
16
  # Configuration
17
- REPO_ID = "UIIAmerica/MedVidBench-GroundTruth"
18
  REPO_TYPE = "dataset"
19
 
20
  def create_initial_leaderboard():
 
14
  from huggingface_hub import HfApi
15
 
16
  # Configuration
17
+ REPO_ID = "UII-AI/MedVidBench-GroundTruth"
18
  REPO_TYPE = "dataset"
19
 
20
  def create_initial_leaderboard():