Each leaderboard ranks within itself; no rank gaps
Browse filesThe previous global-rank scheme (commit 03adf33) made the same model show
the same rank number in both Official and Community tables by ranking
against the union of all submissions. Side effect: the Official table
displayed ranks like 2, 3, 4, ..., 8, 10 β gaps where models exist only
in the Community table.
Revert to per-table ranking: each table sorts and ranks against its own
rows via sort_by_avg_rank. Same model in both tables can have different
rank numbers (e.g., uAI-NEXUS-MedVLM-1.0d-4B-SFT at official-4 vs
community-5 because the community table has more competitors). Both
tables now display sequential ranks 1, 2, 3, ... with no gaps.
The dominance tiebreaker added in commit 3a73928 (count of #1-metric
wins) ensures top models like 1.0b-4B-RL stay at rank 1 across both
tables when they win the most metrics outright.
Removes three now-unused helpers: _read_leaderboard_json,
_global_rank_lookup, _apply_global_rank.
Verified against actual leaderboard JSONs:
- Official (15 rows): ranks 1..15 sequential
- Community (36 rows): ranks 1..36 sequential
- 1.0b-4B-RL stays at rank 1 in both
Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
|
@@ -216,89 +216,6 @@ TEST_SET_STATS = {
|
|
| 216 |
}
|
| 217 |
|
| 218 |
|
| 219 |
-
def _read_leaderboard_json(filename: str) -> List[dict]:
|
| 220 |
-
"""Read a leaderboard JSON (raw, no sort) from the private HF repo with
|
| 221 |
-
a local file fallback. Used by apply_global_rank to build the union of
|
| 222 |
-
official + community models so ranks are consistent across both tables.
|
| 223 |
-
"""
|
| 224 |
-
try:
|
| 225 |
-
token = os.environ.get('HF_TOKEN')
|
| 226 |
-
if token:
|
| 227 |
-
try:
|
| 228 |
-
path = hf_hub_download(
|
| 229 |
-
repo_id="UII-AI/MedVidBench-GroundTruth",
|
| 230 |
-
filename=filename,
|
| 231 |
-
repo_type="dataset",
|
| 232 |
-
token=token,
|
| 233 |
-
cache_dir="./cache",
|
| 234 |
-
)
|
| 235 |
-
with open(path) as f:
|
| 236 |
-
return json.load(f) or []
|
| 237 |
-
except Exception:
|
| 238 |
-
pass
|
| 239 |
-
except Exception:
|
| 240 |
-
pass
|
| 241 |
-
local = PERSISTENT_DIR / filename
|
| 242 |
-
if local.exists():
|
| 243 |
-
with open(local) as f:
|
| 244 |
-
return json.load(f) or []
|
| 245 |
-
return []
|
| 246 |
-
|
| 247 |
-
|
| 248 |
-
def _global_rank_lookup() -> dict:
|
| 249 |
-
"""Compute a global rank for each model by ranking against the union of
|
| 250 |
-
all submissions (official βͺ community). Returns {model_name: global_rank}.
|
| 251 |
-
|
| 252 |
-
This ensures the same model gets the same rank number in either the
|
| 253 |
-
Official or Community leaderboard view, regardless of which set of
|
| 254 |
-
competitors is being displayed.
|
| 255 |
-
"""
|
| 256 |
-
official_raw = _read_leaderboard_json("official_leaderboard.json")
|
| 257 |
-
community_raw = _read_leaderboard_json("leaderboard.json")
|
| 258 |
-
|
| 259 |
-
# Union by model_name; if a name is in both, prefer official entry's
|
| 260 |
-
# numeric values (they're the maintainer-verified ones, though in
|
| 261 |
-
# practice they're identical to community).
|
| 262 |
-
union_by_name = {}
|
| 263 |
-
for entry in community_raw:
|
| 264 |
-
name = entry.get("model_name")
|
| 265 |
-
if name:
|
| 266 |
-
union_by_name[name] = entry
|
| 267 |
-
for entry in official_raw:
|
| 268 |
-
name = entry.get("model_name")
|
| 269 |
-
if name:
|
| 270 |
-
union_by_name[name] = entry
|
| 271 |
-
|
| 272 |
-
if not union_by_name:
|
| 273 |
-
return {}
|
| 274 |
-
|
| 275 |
-
df = pd.DataFrame(list(union_by_name.values()))
|
| 276 |
-
if 'cvs_acc' not in df.columns:
|
| 277 |
-
return {}
|
| 278 |
-
df = sort_by_avg_rank(df)
|
| 279 |
-
return {row["model_name"]: int(row["rank"]) for _, row in df.iterrows()}
|
| 280 |
-
|
| 281 |
-
|
| 282 |
-
def _apply_global_rank(df: pd.DataFrame) -> pd.DataFrame:
|
| 283 |
-
"""Re-rank the rows of df using global ranks (computed against the union
|
| 284 |
-
of official + community models). Rows are re-sorted by global rank and
|
| 285 |
-
the 'rank' column is reassigned to display the global rank value.
|
| 286 |
-
"""
|
| 287 |
-
if df.empty:
|
| 288 |
-
return df
|
| 289 |
-
lookup = _global_rank_lookup()
|
| 290 |
-
if not lookup:
|
| 291 |
-
return df
|
| 292 |
-
df = df.copy()
|
| 293 |
-
# Fall back to a large number for any model not in the lookup (shouldn't
|
| 294 |
-
# happen, but keeps the sort total-ordered).
|
| 295 |
-
df["_global_rank"] = df["model_name"].map(lookup).fillna(10**9).astype(int)
|
| 296 |
-
df = df.sort_values("_global_rank", ascending=True, kind="mergesort").reset_index(drop=True)
|
| 297 |
-
df["rank"] = df["_global_rank"]
|
| 298 |
-
df = df.drop(columns=["_global_rank"])
|
| 299 |
-
return df
|
| 300 |
-
|
| 301 |
-
|
| 302 |
def sort_by_avg_rank(df: pd.DataFrame) -> pd.DataFrame:
|
| 303 |
"""Sort the leaderboard by average rank across all metrics.
|
| 304 |
|
|
@@ -390,12 +307,11 @@ def load_leaderboard() -> pd.DataFrame:
|
|
| 390 |
if 'average' in df.columns:
|
| 391 |
df = df.drop('average', axis=1)
|
| 392 |
|
| 393 |
-
# Sort by average rank
|
| 394 |
-
#
|
| 395 |
-
#
|
| 396 |
if 'cvs_acc' in df.columns:
|
| 397 |
df = sort_by_avg_rank(df)
|
| 398 |
-
df = _apply_global_rank(df)
|
| 399 |
|
| 400 |
print(f"β Loaded leaderboard from private repo: {len(df)} entries")
|
| 401 |
return df
|
|
@@ -416,11 +332,9 @@ def load_leaderboard() -> pd.DataFrame:
|
|
| 416 |
if 'average' in df.columns:
|
| 417 |
df = df.drop('average', axis=1)
|
| 418 |
|
| 419 |
-
# Sort by avg-rank
|
| 420 |
-
# number shown matches the official table for the same model.
|
| 421 |
if 'cvs_acc' in df.columns:
|
| 422 |
df = sort_by_avg_rank(df)
|
| 423 |
-
df = _apply_global_rank(df)
|
| 424 |
|
| 425 |
print(f"β Loaded leaderboard from local file: {len(df)} entries")
|
| 426 |
return df
|
|
@@ -516,7 +430,6 @@ def load_official_leaderboard() -> pd.DataFrame:
|
|
| 516 |
df = pd.DataFrame(data)
|
| 517 |
if 'cvs_acc' in df.columns:
|
| 518 |
df = sort_by_avg_rank(df)
|
| 519 |
-
df = _apply_global_rank(df)
|
| 520 |
print(f"β Loaded official leaderboard from private repo: {len(df)} entries")
|
| 521 |
return df
|
| 522 |
except Exception as e:
|
|
@@ -532,7 +445,6 @@ def load_official_leaderboard() -> pd.DataFrame:
|
|
| 532 |
df = pd.DataFrame(data)
|
| 533 |
if 'cvs_acc' in df.columns:
|
| 534 |
df = sort_by_avg_rank(df)
|
| 535 |
-
df = _apply_global_rank(df)
|
| 536 |
print(f"β Loaded official leaderboard from local file: {len(df)} entries")
|
| 537 |
return df
|
| 538 |
|
|
|
|
| 216 |
}
|
| 217 |
|
| 218 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 219 |
def sort_by_avg_rank(df: pd.DataFrame) -> pd.DataFrame:
|
| 220 |
"""Sort the leaderboard by average rank across all metrics.
|
| 221 |
|
|
|
|
| 307 |
if 'average' in df.columns:
|
| 308 |
df = df.drop('average', axis=1)
|
| 309 |
|
| 310 |
+
# Sort by average rank within this table only β each leaderboard
|
| 311 |
+
# is ranked independently against its own rows so the displayed
|
| 312 |
+
# rank column is sequential (1, 2, 3, ... with no gaps).
|
| 313 |
if 'cvs_acc' in df.columns:
|
| 314 |
df = sort_by_avg_rank(df)
|
|
|
|
| 315 |
|
| 316 |
print(f"β Loaded leaderboard from private repo: {len(df)} entries")
|
| 317 |
return df
|
|
|
|
| 332 |
if 'average' in df.columns:
|
| 333 |
df = df.drop('average', axis=1)
|
| 334 |
|
| 335 |
+
# Sort by avg-rank within this table only (see note in repo loader).
|
|
|
|
| 336 |
if 'cvs_acc' in df.columns:
|
| 337 |
df = sort_by_avg_rank(df)
|
|
|
|
| 338 |
|
| 339 |
print(f"β Loaded leaderboard from local file: {len(df)} entries")
|
| 340 |
return df
|
|
|
|
| 430 |
df = pd.DataFrame(data)
|
| 431 |
if 'cvs_acc' in df.columns:
|
| 432 |
df = sort_by_avg_rank(df)
|
|
|
|
| 433 |
print(f"β Loaded official leaderboard from private repo: {len(df)} entries")
|
| 434 |
return df
|
| 435 |
except Exception as e:
|
|
|
|
| 445 |
df = pd.DataFrame(data)
|
| 446 |
if 'cvs_acc' in df.columns:
|
| 447 |
df = sort_by_avg_rank(df)
|
|
|
|
| 448 |
print(f"β Loaded official leaderboard from local file: {len(df)} entries")
|
| 449 |
return df
|
| 450 |
|