MedGRPO Team commited on
Commit
2362e57
·
1 Parent(s): f0e43d6
Files changed (2) hide show
  1. app.py +212 -0
  2. cleanup_test_data.py +195 -0
app.py CHANGED
@@ -238,6 +238,97 @@ def save_leaderboard(df: pd.DataFrame):
238
  json.dump(df.to_dict('records'), f, indent=2)
239
 
240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
241
  def detect_evaluation_output_format(file_path: str) -> Tuple[bool, str]:
242
  """
243
  Detect if uploaded file is pre-processed evaluation output with LLM judge scores.
@@ -1300,6 +1391,127 @@ with gr.Blocks(title="MedVidBench Leaderboard", theme=gr.themes.Soft()) as demo:
1300
  - Email: [Contact via GitHub](https://github.com/YuhaoSu)
1301
  """)
1302
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1303
  if __name__ == "__main__":
1304
  # Launch with queue for better concurrency
1305
  demo.queue(default_concurrency_limit=5)
 
238
  json.dump(df.to_dict('records'), f, indent=2)
239
 
240
 
241
+ # ============================================================================
242
+ # Admin Functions
243
+ # ============================================================================
244
+
245
+ def check_admin_password(password: str) -> bool:
246
+ """
247
+ Check if provided password matches admin password.
248
+ Admin password is set via ADMIN_PASSWORD environment variable.
249
+ """
250
+ admin_password = os.environ.get('ADMIN_PASSWORD', '')
251
+
252
+ if not admin_password:
253
+ # If no admin password set, use a default (should be changed in production)
254
+ admin_password = 'admin-2025'
255
+
256
+ return password == admin_password
257
+
258
+
259
+ def delete_model_submission(model_name: str) -> Tuple[bool, str]:
260
+ """
261
+ Delete a model submission from leaderboard and cleanup associated files.
262
+
263
+ Args:
264
+ model_name: Name of the model to delete
265
+
266
+ Returns:
267
+ (success, message)
268
+ """
269
+ try:
270
+ # Load current leaderboard
271
+ df = load_leaderboard()
272
+
273
+ if df.empty:
274
+ return False, "Leaderboard is empty"
275
+
276
+ # Check if model exists
277
+ if model_name not in df['model_name'].values:
278
+ return False, f"Model '{model_name}' not found in leaderboard"
279
+
280
+ # Get model info before deletion
281
+ model_row = df[df['model_name'] == model_name].iloc[0]
282
+ organization = model_row.get('organization', 'Unknown')
283
+ date = model_row.get('date', 'Unknown')
284
+
285
+ # Remove from leaderboard
286
+ df = df[df['model_name'] != model_name].reset_index(drop=True)
287
+ save_leaderboard(df)
288
+
289
+ # Cleanup associated files
290
+ model_dir_name = model_name.replace(" ", "_")
291
+ results_dir = RESULTS_DIR / model_dir_name
292
+ submissions_dir = SUBMISSIONS_DIR / model_dir_name
293
+
294
+ cleanup_info = []
295
+
296
+ # Remove results directory
297
+ if results_dir.exists():
298
+ shutil.rmtree(results_dir)
299
+ cleanup_info.append(f"Removed results: {results_dir}")
300
+
301
+ # Remove submissions directory
302
+ if submissions_dir.exists():
303
+ shutil.rmtree(submissions_dir)
304
+ cleanup_info.append(f"Removed submissions: {submissions_dir}")
305
+
306
+ message = f"✓ Successfully deleted model '{model_name}'\n"
307
+ message += f" Organization: {organization}\n"
308
+ message += f" Date: {date}\n\n"
309
+ if cleanup_info:
310
+ message += "Cleaned up:\n" + "\n".join(f" - {info}" for info in cleanup_info)
311
+
312
+ return True, message
313
+
314
+ except Exception as e:
315
+ return False, f"Error deleting model: {str(e)}"
316
+
317
+
318
+ def get_leaderboard_for_admin() -> pd.DataFrame:
319
+ """Get leaderboard data formatted for admin view."""
320
+ df = load_leaderboard()
321
+
322
+ if df.empty:
323
+ return pd.DataFrame(columns=["rank", "model_name", "organization", "date", "contact"])
324
+
325
+ # Select key columns for admin view
326
+ admin_cols = ["rank", "model_name", "organization", "date", "contact"]
327
+ available_cols = [col for col in admin_cols if col in df.columns]
328
+
329
+ return df[available_cols]
330
+
331
+
332
  def detect_evaluation_output_format(file_path: str) -> Tuple[bool, str]:
333
  """
334
  Detect if uploaded file is pre-processed evaluation output with LLM judge scores.
 
1391
  - Email: [Contact via GitHub](https://github.com/YuhaoSu)
1392
  """)
1393
 
1394
+ # Tab 5: Admin Panel (Password Protected)
1395
+ with gr.Tab("🔒 Admin"):
1396
+ gr.Markdown("""
1397
+ ### Admin Panel
1398
+
1399
+ This panel allows administrators to manage leaderboard submissions.
1400
+
1401
+ **Features**:
1402
+ - View all submissions
1403
+ - Delete individual models
1404
+ - Cleanup test/dummy data
1405
+
1406
+ **Note**: Admin password is set via `ADMIN_PASSWORD` environment variable in HuggingFace Spaces settings.
1407
+ """)
1408
+
1409
+ # Password authentication
1410
+ with gr.Row():
1411
+ admin_password_input = gr.Textbox(
1412
+ label="Admin Password",
1413
+ type="password",
1414
+ placeholder="Enter admin password",
1415
+ scale=3
1416
+ )
1417
+ login_btn = gr.Button("🔓 Login", variant="primary", scale=1)
1418
+
1419
+ login_status = gr.Markdown("", visible=True)
1420
+
1421
+ # Admin panel (hidden by default, shown after successful login)
1422
+ with gr.Column(visible=False) as admin_panel:
1423
+ gr.Markdown("### 📋 Current Submissions")
1424
+
1425
+ # Table showing all submissions
1426
+ admin_table = gr.Dataframe(
1427
+ value=get_leaderboard_for_admin(),
1428
+ interactive=False,
1429
+ label="Leaderboard Entries",
1430
+ wrap=True
1431
+ )
1432
+
1433
+ with gr.Row():
1434
+ refresh_admin_btn = gr.Button("🔄 Refresh List", size="sm")
1435
+ delete_model_input = gr.Textbox(
1436
+ label="Model Name to Delete",
1437
+ placeholder="Enter exact model name",
1438
+ scale=2
1439
+ )
1440
+ delete_btn = gr.Button("🗑️ Delete Model", variant="stop", scale=1)
1441
+
1442
+ delete_status = gr.Markdown("")
1443
+
1444
+ gr.Markdown("""
1445
+ ---
1446
+
1447
+ ### ⚠️ Deletion Instructions
1448
+
1449
+ 1. **Find the model** in the table above (check the `model_name` column)
1450
+ 2. **Copy the exact model name** (case-sensitive)
1451
+ 3. **Paste it** in the "Model Name to Delete" field
1452
+ 4. **Click "Delete Model"** - this will:
1453
+ - Remove the entry from leaderboard
1454
+ - Delete all associated files (`/data/results/{model_name}/`, `/data/submissions/{model_name}/`)
1455
+ - Update rankings
1456
+
1457
+ **⚠️ Warning**: Deletion is permanent and cannot be undone!
1458
+
1459
+ ### 🔐 Security Notes
1460
+
1461
+ - Set `ADMIN_PASSWORD` in HuggingFace Spaces → Settings → Secrets
1462
+ - Default password (if not set): `medvidben ch_admin_2025` (change this!)
1463
+ - Admin actions are not logged (consider adding audit trail)
1464
+ """)
1465
+
1466
+ # Login handler
1467
+ def handle_login(password):
1468
+ if check_admin_password(password):
1469
+ return (
1470
+ "✓ Login successful! Admin panel unlocked.",
1471
+ gr.update(visible=True), # Show admin panel
1472
+ get_leaderboard_for_admin() # Load initial data
1473
+ )
1474
+ else:
1475
+ return (
1476
+ "❌ Invalid password. Please try again.",
1477
+ gr.update(visible=False), # Keep admin panel hidden
1478
+ get_leaderboard_for_admin()
1479
+ )
1480
+
1481
+ login_btn.click(
1482
+ fn=handle_login,
1483
+ inputs=[admin_password_input],
1484
+ outputs=[login_status, admin_panel, admin_table]
1485
+ )
1486
+
1487
+ # Refresh handler
1488
+ def refresh_admin_table():
1489
+ return get_leaderboard_for_admin()
1490
+
1491
+ refresh_admin_btn.click(
1492
+ fn=refresh_admin_table,
1493
+ outputs=[admin_table]
1494
+ )
1495
+
1496
+ # Delete handler
1497
+ def handle_delete(model_name):
1498
+ if not model_name or not model_name.strip():
1499
+ return "❌ Please enter a model name", get_leaderboard_for_admin()
1500
+
1501
+ success, message = delete_model_submission(model_name.strip())
1502
+
1503
+ if success:
1504
+ # Refresh table after successful deletion
1505
+ return f"## ✓ Deletion Successful\n\n{message}", get_leaderboard_for_admin()
1506
+ else:
1507
+ return f"## ❌ Deletion Failed\n\n{message}", get_leaderboard_for_admin()
1508
+
1509
+ delete_btn.click(
1510
+ fn=handle_delete,
1511
+ inputs=[delete_model_input],
1512
+ outputs=[delete_status, admin_table]
1513
+ )
1514
+
1515
  if __name__ == "__main__":
1516
  # Launch with queue for better concurrency
1517
  demo.queue(default_concurrency_limit=5)
cleanup_test_data.py ADDED
@@ -0,0 +1,195 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Cleanup script to remove test/dummy submissions from leaderboard.
4
+
5
+ Usage:
6
+ python cleanup_test_data.py --model-name "TestModel"
7
+ python cleanup_test_data.py --all-test # Remove all models with "test" in name
8
+ python cleanup_test_data.py --list # List all models
9
+ """
10
+
11
+ import json
12
+ import argparse
13
+ import shutil
14
+ from pathlib import Path
15
+
16
+ # Configuration
17
+ PERSISTENT_DIR = Path("/data") if Path("/data").exists() else Path(".")
18
+ LEADERBOARD_FILE = PERSISTENT_DIR / "leaderboard.json"
19
+ RESULTS_DIR = PERSISTENT_DIR / "results"
20
+ SUBMISSIONS_DIR = PERSISTENT_DIR / "submissions"
21
+
22
+
23
+ def load_leaderboard():
24
+ """Load leaderboard from file."""
25
+ if not LEADERBOARD_FILE.exists():
26
+ print(f"❌ Leaderboard file not found: {LEADERBOARD_FILE}")
27
+ return []
28
+
29
+ with open(LEADERBOARD_FILE, 'r') as f:
30
+ data = json.load(f)
31
+ return data if isinstance(data, list) else []
32
+
33
+
34
+ def save_leaderboard(data):
35
+ """Save leaderboard to file."""
36
+ # Update ranks
37
+ for i, entry in enumerate(data, 1):
38
+ entry['rank'] = i
39
+
40
+ with open(LEADERBOARD_FILE, 'w') as f:
41
+ json.dump(data, f, indent=2)
42
+
43
+ print(f"✓ Saved leaderboard with {len(data)} entries")
44
+
45
+
46
+ def list_models():
47
+ """List all models in leaderboard."""
48
+ data = load_leaderboard()
49
+
50
+ if not data:
51
+ print("Leaderboard is empty")
52
+ return
53
+
54
+ print(f"\n{'='*80}")
55
+ print(f"LEADERBOARD MODELS ({len(data)} total)")
56
+ print(f"{'='*80}\n")
57
+
58
+ for entry in data:
59
+ rank = entry.get('rank', '?')
60
+ model_name = entry.get('model_name', 'Unknown')
61
+ organization = entry.get('organization', 'Unknown')
62
+ date = entry.get('date', 'Unknown')
63
+
64
+ print(f"#{rank}: {model_name}")
65
+ print(f" Organization: {organization}")
66
+ print(f" Date: {date}")
67
+ print()
68
+
69
+
70
+ def delete_model(model_name, dry_run=False):
71
+ """Delete a model from leaderboard and cleanup associated files."""
72
+ data = load_leaderboard()
73
+
74
+ # Find the model
75
+ model_entry = None
76
+ for entry in data:
77
+ if entry.get('model_name') == model_name:
78
+ model_entry = entry
79
+ break
80
+
81
+ if not model_entry:
82
+ print(f"❌ Model not found: {model_name}")
83
+ return False
84
+
85
+ print(f"\n{'='*80}")
86
+ print(f"DELETING MODEL: {model_name}")
87
+ print(f"{'='*80}\n")
88
+ print(f"Organization: {model_entry.get('organization')}")
89
+ print(f"Date: {model_entry.get('date')}")
90
+ print(f"Rank: {model_entry.get('rank')}")
91
+
92
+ if dry_run:
93
+ print("\n⚠️ DRY RUN - No changes will be made")
94
+
95
+ # Associated files/directories
96
+ model_dir_name = model_name.replace(" ", "_")
97
+ results_dir = RESULTS_DIR / model_dir_name
98
+
99
+ # Remove from leaderboard
100
+ if not dry_run:
101
+ data = [e for e in data if e.get('model_name') != model_name]
102
+ save_leaderboard(data)
103
+ print(f"✓ Removed from leaderboard")
104
+ else:
105
+ print(f"[DRY RUN] Would remove from leaderboard")
106
+
107
+ # Remove results directory
108
+ if results_dir.exists():
109
+ if not dry_run:
110
+ shutil.rmtree(results_dir)
111
+ print(f"✓ Removed results directory: {results_dir}")
112
+ else:
113
+ print(f"[DRY RUN] Would remove: {results_dir}")
114
+
115
+ # Check for submission files (might not exist after evaluation)
116
+ submission_dir = SUBMISSIONS_DIR / model_dir_name
117
+ if submission_dir.exists():
118
+ if not dry_run:
119
+ shutil.rmtree(submission_dir)
120
+ print(f"✓ Removed submissions directory: {submission_dir}")
121
+ else:
122
+ print(f"[DRY RUN] Would remove: {submission_dir}")
123
+
124
+ print(f"\n{'='*80}")
125
+ if dry_run:
126
+ print("✓ DRY RUN COMPLETE - No changes made")
127
+ else:
128
+ print("✓ MODEL DELETED SUCCESSFULLY")
129
+ print(f"{'='*80}\n")
130
+
131
+ return True
132
+
133
+
134
+ def delete_test_models(dry_run=False):
135
+ """Delete all models with 'test' in name (case-insensitive)."""
136
+ data = load_leaderboard()
137
+
138
+ test_models = [
139
+ entry for entry in data
140
+ if 'test' in entry.get('model_name', '').lower() or
141
+ 'test' in entry.get('organization', '').lower()
142
+ ]
143
+
144
+ if not test_models:
145
+ print("No test models found")
146
+ return
147
+
148
+ print(f"\n{'='*80}")
149
+ print(f"FOUND {len(test_models)} TEST MODELS")
150
+ print(f"{'='*80}\n")
151
+
152
+ for entry in test_models:
153
+ print(f"- {entry.get('model_name')} ({entry.get('organization')})")
154
+
155
+ print()
156
+
157
+ if not dry_run:
158
+ confirm = input("Delete all these models? (yes/no): ")
159
+ if confirm.lower() != 'yes':
160
+ print("Cancelled")
161
+ return
162
+
163
+ for entry in test_models:
164
+ model_name = entry.get('model_name')
165
+ delete_model(model_name, dry_run=dry_run)
166
+
167
+
168
+ def main():
169
+ parser = argparse.ArgumentParser(
170
+ description="Cleanup test/dummy submissions from MedVidBench leaderboard"
171
+ )
172
+
173
+ group = parser.add_mutually_exclusive_group(required=True)
174
+ group.add_argument("--list", action="store_true",
175
+ help="List all models in leaderboard")
176
+ group.add_argument("--model-name", type=str,
177
+ help="Delete specific model by name")
178
+ group.add_argument("--all-test", action="store_true",
179
+ help="Delete all models with 'test' in name")
180
+
181
+ parser.add_argument("--dry-run", action="store_true",
182
+ help="Show what would be deleted without actually deleting")
183
+
184
+ args = parser.parse_args()
185
+
186
+ if args.list:
187
+ list_models()
188
+ elif args.model_name:
189
+ delete_model(args.model_name, dry_run=args.dry_run)
190
+ elif args.all_test:
191
+ delete_test_models(dry_run=args.dry_run)
192
+
193
+
194
+ if __name__ == "__main__":
195
+ main()