MedGRPO Team Claude Opus 4.7 (1M context) commited on
Commit
4ad37f6
Β·
1 Parent(s): 03adf33

Each leaderboard ranks within itself; no rank gaps

Browse files

The previous global-rank scheme (commit 03adf33) made the same model show
the same rank number in both Official and Community tables by ranking
against the union of all submissions. Side effect: the Official table
displayed ranks like 2, 3, 4, ..., 8, 10 β€” gaps where models exist only
in the Community table.

Revert to per-table ranking: each table sorts and ranks against its own
rows via sort_by_avg_rank. Same model in both tables can have different
rank numbers (e.g., uAI-NEXUS-MedVLM-1.0d-4B-SFT at official-4 vs
community-5 because the community table has more competitors). Both
tables now display sequential ranks 1, 2, 3, ... with no gaps.

The dominance tiebreaker added in commit 3a73928 (count of #1-metric
wins) ensures top models like 1.0b-4B-RL stay at rank 1 across both
tables when they win the most metrics outright.

Removes three now-unused helpers: _read_leaderboard_json,
_global_rank_lookup, _apply_global_rank.

Verified against actual leaderboard JSONs:
- Official (15 rows): ranks 1..15 sequential
- Community (36 rows): ranks 1..36 sequential
- 1.0b-4B-RL stays at rank 1 in both

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +4 -92
app.py CHANGED
@@ -216,89 +216,6 @@ TEST_SET_STATS = {
216
  }
217
 
218
 
219
- def _read_leaderboard_json(filename: str) -> List[dict]:
220
- """Read a leaderboard JSON (raw, no sort) from the private HF repo with
221
- a local file fallback. Used by apply_global_rank to build the union of
222
- official + community models so ranks are consistent across both tables.
223
- """
224
- try:
225
- token = os.environ.get('HF_TOKEN')
226
- if token:
227
- try:
228
- path = hf_hub_download(
229
- repo_id="UII-AI/MedVidBench-GroundTruth",
230
- filename=filename,
231
- repo_type="dataset",
232
- token=token,
233
- cache_dir="./cache",
234
- )
235
- with open(path) as f:
236
- return json.load(f) or []
237
- except Exception:
238
- pass
239
- except Exception:
240
- pass
241
- local = PERSISTENT_DIR / filename
242
- if local.exists():
243
- with open(local) as f:
244
- return json.load(f) or []
245
- return []
246
-
247
-
248
- def _global_rank_lookup() -> dict:
249
- """Compute a global rank for each model by ranking against the union of
250
- all submissions (official βˆͺ community). Returns {model_name: global_rank}.
251
-
252
- This ensures the same model gets the same rank number in either the
253
- Official or Community leaderboard view, regardless of which set of
254
- competitors is being displayed.
255
- """
256
- official_raw = _read_leaderboard_json("official_leaderboard.json")
257
- community_raw = _read_leaderboard_json("leaderboard.json")
258
-
259
- # Union by model_name; if a name is in both, prefer official entry's
260
- # numeric values (they're the maintainer-verified ones, though in
261
- # practice they're identical to community).
262
- union_by_name = {}
263
- for entry in community_raw:
264
- name = entry.get("model_name")
265
- if name:
266
- union_by_name[name] = entry
267
- for entry in official_raw:
268
- name = entry.get("model_name")
269
- if name:
270
- union_by_name[name] = entry
271
-
272
- if not union_by_name:
273
- return {}
274
-
275
- df = pd.DataFrame(list(union_by_name.values()))
276
- if 'cvs_acc' not in df.columns:
277
- return {}
278
- df = sort_by_avg_rank(df)
279
- return {row["model_name"]: int(row["rank"]) for _, row in df.iterrows()}
280
-
281
-
282
- def _apply_global_rank(df: pd.DataFrame) -> pd.DataFrame:
283
- """Re-rank the rows of df using global ranks (computed against the union
284
- of official + community models). Rows are re-sorted by global rank and
285
- the 'rank' column is reassigned to display the global rank value.
286
- """
287
- if df.empty:
288
- return df
289
- lookup = _global_rank_lookup()
290
- if not lookup:
291
- return df
292
- df = df.copy()
293
- # Fall back to a large number for any model not in the lookup (shouldn't
294
- # happen, but keeps the sort total-ordered).
295
- df["_global_rank"] = df["model_name"].map(lookup).fillna(10**9).astype(int)
296
- df = df.sort_values("_global_rank", ascending=True, kind="mergesort").reset_index(drop=True)
297
- df["rank"] = df["_global_rank"]
298
- df = df.drop(columns=["_global_rank"])
299
- return df
300
-
301
-
302
  def sort_by_avg_rank(df: pd.DataFrame) -> pd.DataFrame:
303
  """Sort the leaderboard by average rank across all metrics.
304
 
@@ -390,12 +307,11 @@ def load_leaderboard() -> pd.DataFrame:
390
  if 'average' in df.columns:
391
  df = df.drop('average', axis=1)
392
 
393
- # Sort by average rank across all metrics (lower avg rank = better).
394
- # Then re-rank against the union of official + community so the
395
- # same model gets the same rank in either view.
396
  if 'cvs_acc' in df.columns:
397
  df = sort_by_avg_rank(df)
398
- df = _apply_global_rank(df)
399
 
400
  print(f"βœ“ Loaded leaderboard from private repo: {len(df)} entries")
401
  return df
@@ -416,11 +332,9 @@ def load_leaderboard() -> pd.DataFrame:
416
  if 'average' in df.columns:
417
  df = df.drop('average', axis=1)
418
 
419
- # Sort by avg-rank, then apply union-based global rank so the
420
- # number shown matches the official table for the same model.
421
  if 'cvs_acc' in df.columns:
422
  df = sort_by_avg_rank(df)
423
- df = _apply_global_rank(df)
424
 
425
  print(f"βœ“ Loaded leaderboard from local file: {len(df)} entries")
426
  return df
@@ -516,7 +430,6 @@ def load_official_leaderboard() -> pd.DataFrame:
516
  df = pd.DataFrame(data)
517
  if 'cvs_acc' in df.columns:
518
  df = sort_by_avg_rank(df)
519
- df = _apply_global_rank(df)
520
  print(f"βœ“ Loaded official leaderboard from private repo: {len(df)} entries")
521
  return df
522
  except Exception as e:
@@ -532,7 +445,6 @@ def load_official_leaderboard() -> pd.DataFrame:
532
  df = pd.DataFrame(data)
533
  if 'cvs_acc' in df.columns:
534
  df = sort_by_avg_rank(df)
535
- df = _apply_global_rank(df)
536
  print(f"βœ“ Loaded official leaderboard from local file: {len(df)} entries")
537
  return df
538
 
 
216
  }
217
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  def sort_by_avg_rank(df: pd.DataFrame) -> pd.DataFrame:
220
  """Sort the leaderboard by average rank across all metrics.
221
 
 
307
  if 'average' in df.columns:
308
  df = df.drop('average', axis=1)
309
 
310
+ # Sort by average rank within this table only β€” each leaderboard
311
+ # is ranked independently against its own rows so the displayed
312
+ # rank column is sequential (1, 2, 3, ... with no gaps).
313
  if 'cvs_acc' in df.columns:
314
  df = sort_by_avg_rank(df)
 
315
 
316
  print(f"βœ“ Loaded leaderboard from private repo: {len(df)} entries")
317
  return df
 
332
  if 'average' in df.columns:
333
  df = df.drop('average', axis=1)
334
 
335
+ # Sort by avg-rank within this table only (see note in repo loader).
 
336
  if 'cvs_acc' in df.columns:
337
  df = sort_by_avg_rank(df)
 
338
 
339
  print(f"βœ“ Loaded leaderboard from local file: {len(df)} entries")
340
  return df
 
430
  df = pd.DataFrame(data)
431
  if 'cvs_acc' in df.columns:
432
  df = sort_by_avg_rank(df)
 
433
  print(f"βœ“ Loaded official leaderboard from private repo: {len(df)} entries")
434
  return df
435
  except Exception as e:
 
445
  df = pd.DataFrame(data)
446
  if 'cvs_acc' in df.columns:
447
  df = sort_by_avg_rank(df)
 
448
  print(f"βœ“ Loaded official leaderboard from local file: {len(df)} entries")
449
  return df
450