MedGRPO Team commited on
Commit ยท
1db37ad
1
Parent(s): 0cba76d
update
Browse files- README.md +9 -9
- app.py +13 -13
- test_hf_token.py +3 -3
- upload_initial_data.py +1 -1
README.md
CHANGED
|
@@ -20,7 +20,7 @@ tags:
|
|
| 20 |
|
| 21 |
Interactive leaderboard for evaluating Video-Language Models on the **MedVidBench benchmark** - 8 medical video understanding tasks across 8 surgical datasets.
|
| 22 |
|
| 23 |
-
๐ **Live Demo**: [huggingface.co/spaces/
|
| 24 |
|
| 25 |
๐ **Paper**: [arXiv:2512.06581](https://arxiv.org/abs/2512.06581)
|
| 26 |
|
|
@@ -139,7 +139,7 @@ The leaderboard supports **two formats** for submission:
|
|
| 139 |
|
| 140 |
### 3. Upload to Leaderboard
|
| 141 |
|
| 142 |
-
1. Visit the [leaderboard](https://huggingface.co/spaces/
|
| 143 |
2. Go to the **Submit Results** tab
|
| 144 |
3. Fill in:
|
| 145 |
- **Model Name** (e.g., "Qwen2.5-VL-7B-MedVidBench")
|
|
@@ -221,11 +221,11 @@ To compute the **average score** fairly across tasks:
|
|
| 221 |
## Links
|
| 222 |
|
| 223 |
- ๐ **Paper**: [https://arxiv.org/abs/2512.06581](https://arxiv.org/abs/2512.06581)
|
| 224 |
-
- ๐ **Project**: [https://uii-
|
| 225 |
-
- ๐พ **Dataset**: [https://huggingface.co/datasets/
|
| 226 |
-
- ๐ป **GitHub**: [https://github.com/UII-
|
| 227 |
-
- ๐ฎ **Demo**: [https://huggingface.co/spaces/
|
| 228 |
-
- ๐ **Leaderboard**: [https://huggingface.co/spaces/
|
| 229 |
|
| 230 |
## Citation
|
| 231 |
|
|
@@ -246,5 +246,5 @@ To compute the **average score** fairly across tasks:
|
|
| 246 |
## Contact
|
| 247 |
|
| 248 |
For questions or issues:
|
| 249 |
-
- Open an issue on [GitHub](https://github.com/UII-
|
| 250 |
-
- Visit the [project page](https://uii-
|
|
|
|
| 20 |
|
| 21 |
Interactive leaderboard for evaluating Video-Language Models on the **MedVidBench benchmark** - 8 medical video understanding tasks across 8 surgical datasets.
|
| 22 |
|
| 23 |
+
๐ **Live Demo**: [huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
|
| 24 |
|
| 25 |
๐ **Paper**: [arXiv:2512.06581](https://arxiv.org/abs/2512.06581)
|
| 26 |
|
|
|
|
| 139 |
|
| 140 |
### 3. Upload to Leaderboard
|
| 141 |
|
| 142 |
+
1. Visit the [leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
|
| 143 |
2. Go to the **Submit Results** tab
|
| 144 |
3. Fill in:
|
| 145 |
- **Model Name** (e.g., "Qwen2.5-VL-7B-MedVidBench")
|
|
|
|
| 221 |
## Links
|
| 222 |
|
| 223 |
- ๐ **Paper**: [https://arxiv.org/abs/2512.06581](https://arxiv.org/abs/2512.06581)
|
| 224 |
+
- ๐ **Project**: [https://uii-ai.github.io/MedGRPO/](https://uii-ai.github.io/MedGRPO/)
|
| 225 |
+
- ๐พ **Dataset**: [https://huggingface.co/datasets/UII-AI/MedVidBench](https://huggingface.co/datasets/UII-AI/MedVidBench)
|
| 226 |
+
- ๐ป **GitHub**: [https://github.com/UII-AI/MedGRPO-Code](https://github.com/UII-AI/MedGRPO-Code)
|
| 227 |
+
- ๐ฎ **Demo**: [https://huggingface.co/spaces/UII-AI/MedGRPO-Demo](https://huggingface.co/spaces/UII-AI/MedGRPO-Demo)
|
| 228 |
+
- ๐ **Leaderboard**: [https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard](https://huggingface.co/spaces/UII-AI/MedVidBench-Leaderboard)
|
| 229 |
|
| 230 |
## Citation
|
| 231 |
|
|
|
|
| 246 |
## Contact
|
| 247 |
|
| 248 |
For questions or issues:
|
| 249 |
+
- Open an issue on [GitHub](https://github.com/UII-AI/MedGRPO-Code)
|
| 250 |
+
- Visit the [project page](https://uii-ai.github.io/MedGRPO/)
|
app.py
CHANGED
|
@@ -32,7 +32,7 @@ def load_ground_truth():
|
|
| 32 |
# Download from private repository
|
| 33 |
print("โณ Downloading ground truth from private repository...")
|
| 34 |
gt_file = hf_hub_download(
|
| 35 |
-
repo_id="
|
| 36 |
filename="ground_truth.json",
|
| 37 |
repo_type="dataset",
|
| 38 |
token=token,
|
|
@@ -228,7 +228,7 @@ def load_leaderboard() -> pd.DataFrame:
|
|
| 228 |
print("โณ Downloading leaderboard from private repository...")
|
| 229 |
try:
|
| 230 |
leaderboard_file = hf_hub_download(
|
| 231 |
-
repo_id="
|
| 232 |
filename="leaderboard.json",
|
| 233 |
repo_type="dataset",
|
| 234 |
token=token,
|
|
@@ -305,7 +305,7 @@ def save_leaderboard(df: pd.DataFrame):
|
|
| 305 |
return
|
| 306 |
|
| 307 |
print("โณ Uploading leaderboard to private repository...")
|
| 308 |
-
print(f" Target:
|
| 309 |
print(f" Entries: {len(df)}")
|
| 310 |
|
| 311 |
api = HfApi()
|
|
@@ -314,7 +314,7 @@ def save_leaderboard(df: pd.DataFrame):
|
|
| 314 |
result = api.upload_file(
|
| 315 |
path_or_fileobj=str(LEADERBOARD_FILE),
|
| 316 |
path_in_repo="leaderboard.json",
|
| 317 |
-
repo_id="
|
| 318 |
repo_type="dataset",
|
| 319 |
token=token,
|
| 320 |
commit_message=f"Update leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
|
|
@@ -334,7 +334,7 @@ def save_leaderboard(df: pd.DataFrame):
|
|
| 334 |
print(" โ Fix: Regenerate HF_TOKEN with write permission")
|
| 335 |
elif "404" in error_msg or "Not Found" in error_msg:
|
| 336 |
print(" โ Issue: Repository not found")
|
| 337 |
-
print(" โ Fix: Create
|
| 338 |
elif "403" in error_msg or "Forbidden" in error_msg:
|
| 339 |
print(" โ Issue: Token lacks write permission")
|
| 340 |
print(" โ Fix: Use token with write access to dataset")
|
|
@@ -354,7 +354,7 @@ def load_official_leaderboard() -> pd.DataFrame:
|
|
| 354 |
if token:
|
| 355 |
try:
|
| 356 |
official_file = hf_hub_download(
|
| 357 |
-
repo_id="
|
| 358 |
filename="official_leaderboard.json",
|
| 359 |
repo_type="dataset",
|
| 360 |
token=token,
|
|
@@ -410,7 +410,7 @@ def save_official_leaderboard(df: pd.DataFrame):
|
|
| 410 |
api.upload_file(
|
| 411 |
path_or_fileobj=str(OFFICIAL_LEADERBOARD_FILE),
|
| 412 |
path_in_repo="official_leaderboard.json",
|
| 413 |
-
repo_id="
|
| 414 |
repo_type="dataset",
|
| 415 |
token=token,
|
| 416 |
commit_message=f"Update official leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
|
|
@@ -600,7 +600,7 @@ def backup_results_to_repo(model_name: str, results_dir: Path):
|
|
| 600 |
api.upload_file(
|
| 601 |
path_or_fileobj=str(eval_output),
|
| 602 |
path_in_repo=f"results/{model_name}/eval_output.txt",
|
| 603 |
-
repo_id="
|
| 604 |
repo_type="dataset",
|
| 605 |
token=token,
|
| 606 |
commit_message=f"Backup results for {model_name}"
|
|
@@ -612,7 +612,7 @@ def backup_results_to_repo(model_name: str, results_dir: Path):
|
|
| 612 |
api.upload_file(
|
| 613 |
path_or_fileobj=str(input_file),
|
| 614 |
path_in_repo=f"results/{model_name}/input.json",
|
| 615 |
-
repo_id="
|
| 616 |
repo_type="dataset",
|
| 617 |
token=token,
|
| 618 |
commit_message=f"Backup predictions for {model_name}"
|
|
@@ -1752,7 +1752,7 @@ def run_llm_judge_evaluation(model_name: str, progress=gr.Progress()) -> str:
|
|
| 1752 |
|
| 1753 |
# Download the predictions file
|
| 1754 |
predictions_path = hf_hub_download(
|
| 1755 |
-
repo_id="
|
| 1756 |
filename=f"results/{model_name.replace(' ', '_')}/input.json",
|
| 1757 |
repo_type="dataset",
|
| 1758 |
token=token,
|
|
@@ -2000,7 +2000,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
|
|
| 2000 |
**MedVidBench** is a comprehensive benchmark for evaluating Video-Language Models on medical and surgical video understanding.
|
| 2001 |
It covers **8 tasks** across **8 surgical datasets** with **6,245 test samples**, evaluated on **10 metrics** including LLM-based caption judging.
|
| 2002 |
|
| 2003 |
-
๐ [Paper](https://arxiv.org/abs/2512.06581) ๐ [Project Page](https://uii-
|
| 2004 |
""")
|
| 2005 |
|
| 2006 |
with gr.Tabs():
|
|
@@ -2055,7 +2055,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
|
|
| 2055 |
3. **Provide model API access** so we can independently verify results
|
| 2056 |
4. Once verified, your model is added to the Official Leaderboard
|
| 2057 |
|
| 2058 |
-
For questions, contact us via [GitHub](https://github.com/UII-
|
| 2059 |
""")
|
| 2060 |
|
| 2061 |
# Tab 2: Community Submissions
|
|
@@ -2306,7 +2306,7 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
|
|
| 2306 |
|
| 2307 |
### Contact
|
| 2308 |
|
| 2309 |
-
For questions or issues, open an issue on [GitHub](https://github.com/UII-
|
| 2310 |
""")
|
| 2311 |
|
| 2312 |
# Tab 5: Admin Panel (Password Protected)
|
|
|
|
| 32 |
# Download from private repository
|
| 33 |
print("โณ Downloading ground truth from private repository...")
|
| 34 |
gt_file = hf_hub_download(
|
| 35 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 36 |
filename="ground_truth.json",
|
| 37 |
repo_type="dataset",
|
| 38 |
token=token,
|
|
|
|
| 228 |
print("โณ Downloading leaderboard from private repository...")
|
| 229 |
try:
|
| 230 |
leaderboard_file = hf_hub_download(
|
| 231 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 232 |
filename="leaderboard.json",
|
| 233 |
repo_type="dataset",
|
| 234 |
token=token,
|
|
|
|
| 305 |
return
|
| 306 |
|
| 307 |
print("โณ Uploading leaderboard to private repository...")
|
| 308 |
+
print(f" Target: UII-AI/MedVidBench-GroundTruth/leaderboard.json")
|
| 309 |
print(f" Entries: {len(df)}")
|
| 310 |
|
| 311 |
api = HfApi()
|
|
|
|
| 314 |
result = api.upload_file(
|
| 315 |
path_or_fileobj=str(LEADERBOARD_FILE),
|
| 316 |
path_in_repo="leaderboard.json",
|
| 317 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 318 |
repo_type="dataset",
|
| 319 |
token=token,
|
| 320 |
commit_message=f"Update leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
|
|
|
|
| 334 |
print(" โ Fix: Regenerate HF_TOKEN with write permission")
|
| 335 |
elif "404" in error_msg or "Not Found" in error_msg:
|
| 336 |
print(" โ Issue: Repository not found")
|
| 337 |
+
print(" โ Fix: Create UII-AI/MedVidBench-GroundTruth repo")
|
| 338 |
elif "403" in error_msg or "Forbidden" in error_msg:
|
| 339 |
print(" โ Issue: Token lacks write permission")
|
| 340 |
print(" โ Fix: Use token with write access to dataset")
|
|
|
|
| 354 |
if token:
|
| 355 |
try:
|
| 356 |
official_file = hf_hub_download(
|
| 357 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 358 |
filename="official_leaderboard.json",
|
| 359 |
repo_type="dataset",
|
| 360 |
token=token,
|
|
|
|
| 410 |
api.upload_file(
|
| 411 |
path_or_fileobj=str(OFFICIAL_LEADERBOARD_FILE),
|
| 412 |
path_in_repo="official_leaderboard.json",
|
| 413 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 414 |
repo_type="dataset",
|
| 415 |
token=token,
|
| 416 |
commit_message=f"Update official leaderboard: {len(df)} entries ({datetime.now().strftime('%Y-%m-%d %H:%M:%S')})"
|
|
|
|
| 600 |
api.upload_file(
|
| 601 |
path_or_fileobj=str(eval_output),
|
| 602 |
path_in_repo=f"results/{model_name}/eval_output.txt",
|
| 603 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 604 |
repo_type="dataset",
|
| 605 |
token=token,
|
| 606 |
commit_message=f"Backup results for {model_name}"
|
|
|
|
| 612 |
api.upload_file(
|
| 613 |
path_or_fileobj=str(input_file),
|
| 614 |
path_in_repo=f"results/{model_name}/input.json",
|
| 615 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 616 |
repo_type="dataset",
|
| 617 |
token=token,
|
| 618 |
commit_message=f"Backup predictions for {model_name}"
|
|
|
|
| 1752 |
|
| 1753 |
# Download the predictions file
|
| 1754 |
predictions_path = hf_hub_download(
|
| 1755 |
+
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 1756 |
filename=f"results/{model_name.replace(' ', '_')}/input.json",
|
| 1757 |
repo_type="dataset",
|
| 1758 |
token=token,
|
|
|
|
| 2000 |
**MedVidBench** is a comprehensive benchmark for evaluating Video-Language Models on medical and surgical video understanding.
|
| 2001 |
It covers **8 tasks** across **8 surgical datasets** with **6,245 test samples**, evaluated on **10 metrics** including LLM-based caption judging.
|
| 2002 |
|
| 2003 |
+
๐ [Paper](https://arxiv.org/abs/2512.06581) ๐ [Project Page](https://uii-ai.github.io/MedGRPO/) ๐พ [Dataset](https://huggingface.co/datasets/UII-AI/MedVidBench) ๐ค [Model](https://huggingface.co/UII-AI/Qwen2.5-VL-7B-MedGRPO) ๐ป [GitHub](https://github.com/UII-AI/MedGRPO-Code) ๐ฎ [Demo](https://huggingface.co/spaces/UII-AI/MedGRPO-Demo)
|
| 2004 |
""")
|
| 2005 |
|
| 2006 |
with gr.Tabs():
|
|
|
|
| 2055 |
3. **Provide model API access** so we can independently verify results
|
| 2056 |
4. Once verified, your model is added to the Official Leaderboard
|
| 2057 |
|
| 2058 |
+
For questions, contact us via [GitHub](https://github.com/UII-AI/MedGRPO-Code).
|
| 2059 |
""")
|
| 2060 |
|
| 2061 |
# Tab 2: Community Submissions
|
|
|
|
| 2306 |
|
| 2307 |
### Contact
|
| 2308 |
|
| 2309 |
+
For questions or issues, open an issue on [GitHub](https://github.com/UII-AI/MedGRPO-Code) or visit the [project page](https://uii-ai.github.io/MedGRPO/).
|
| 2310 |
""")
|
| 2311 |
|
| 2312 |
# Tab 5: Admin Panel (Password Protected)
|
test_hf_token.py
CHANGED
|
@@ -9,7 +9,7 @@ import sys
|
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
-
REPO_ID = "
|
| 13 |
REPO_TYPE = "dataset"
|
| 14 |
|
| 15 |
def test_hf_token():
|
|
@@ -80,12 +80,12 @@ def test_hf_token():
|
|
| 80 |
print(f"\nโ Issue: Repository '{REPO_ID}' not found")
|
| 81 |
print("โ Fix: Create the repository:")
|
| 82 |
print(f" 1. Go to: https://huggingface.co/new-dataset")
|
| 83 |
-
print(f" 2. Owner:
|
| 84 |
print(f" 3. Name: MedVidBench-GroundTruth")
|
| 85 |
print(f" 4. Visibility: Private")
|
| 86 |
elif "403" in error_msg or "Forbidden" in error_msg:
|
| 87 |
print("\nโ Issue: No access to private repository")
|
| 88 |
-
print("โ Fix: Ensure you're a member of
|
| 89 |
|
| 90 |
sys.exit(1)
|
| 91 |
|
|
|
|
| 9 |
from huggingface_hub import HfApi
|
| 10 |
from pathlib import Path
|
| 11 |
|
| 12 |
+
REPO_ID = "UII-AI/MedVidBench-GroundTruth"
|
| 13 |
REPO_TYPE = "dataset"
|
| 14 |
|
| 15 |
def test_hf_token():
|
|
|
|
| 80 |
print(f"\nโ Issue: Repository '{REPO_ID}' not found")
|
| 81 |
print("โ Fix: Create the repository:")
|
| 82 |
print(f" 1. Go to: https://huggingface.co/new-dataset")
|
| 83 |
+
print(f" 2. Owner: UII-AI")
|
| 84 |
print(f" 3. Name: MedVidBench-GroundTruth")
|
| 85 |
print(f" 4. Visibility: Private")
|
| 86 |
elif "403" in error_msg or "Forbidden" in error_msg:
|
| 87 |
print("\nโ Issue: No access to private repository")
|
| 88 |
+
print("โ Fix: Ensure you're a member of UII-AI organization")
|
| 89 |
|
| 90 |
sys.exit(1)
|
| 91 |
|
upload_initial_data.py
CHANGED
|
@@ -14,7 +14,7 @@ from pathlib import Path
|
|
| 14 |
from huggingface_hub import HfApi
|
| 15 |
|
| 16 |
# Configuration
|
| 17 |
-
REPO_ID = "
|
| 18 |
REPO_TYPE = "dataset"
|
| 19 |
|
| 20 |
def create_initial_leaderboard():
|
|
|
|
| 14 |
from huggingface_hub import HfApi
|
| 15 |
|
| 16 |
# Configuration
|
| 17 |
+
REPO_ID = "UII-AI/MedVidBench-GroundTruth"
|
| 18 |
REPO_TYPE = "dataset"
|
| 19 |
|
| 20 |
def create_initial_leaderboard():
|