Spaces:

UII-AI
/

MedVidBench-Leaderboard

Running

MedGRPO Team Claude Opus 4.7 (1M context) commited on 5 days ago

Commit

4ad37f6

1 Parent(s): 03adf33

Each leaderboard ranks within itself; no rank gaps

The previous global-rank scheme (commit 03adf33) made the same model show
the same rank number in both Official and Community tables by ranking
against the union of all submissions. Side effect: the Official table
displayed ranks like 2, 3, 4, ..., 8, 10 — gaps where models exist only
in the Community table.

Revert to per-table ranking: each table sorts and ranks against its own
rows via sort_by_avg_rank. Same model in both tables can have different
rank numbers (e.g., uAI-NEXUS-MedVLM-1.0d-4B-SFT at official-4 vs
community-5 because the community table has more competitors). Both
tables now display sequential ranks 1, 2, 3, ... with no gaps.

The dominance tiebreaker added in commit 3a73928 (count of #1-metric
wins) ensures top models like 1.0b-4B-RL stay at rank 1 across both
tables when they win the most metrics outright.

Removes three now-unused helpers: _read_leaderboard_json,
_global_rank_lookup, _apply_global_rank.

Verified against actual leaderboard JSONs:
- Official (15 rows): ranks 1..15 sequential
- Community (36 rows): ranks 1..36 sequential
- 1.0b-4B-RL stays at rank 1 in both

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>

Files changed (1) hide show

app.py +4 -92

app.py CHANGED Viewed

@@ -216,89 +216,6 @@ TEST_SET_STATS = {
 }
-def _read_leaderboard_json(filename: str) -> List[dict]:
-    """Read a leaderboard JSON (raw, no sort) from the private HF repo with
-    a local file fallback. Used by apply_global_rank to build the union of
-    official + community models so ranks are consistent across both tables.
-    """
-    try:
-        token = os.environ.get('HF_TOKEN')
-        if token:
-            try:
-                path = hf_hub_download(
-                    repo_id="UII-AI/MedVidBench-GroundTruth",
-                    filename=filename,
-                    repo_type="dataset",
-                    token=token,
-                    cache_dir="./cache",
-                )
-                with open(path) as f:
-                    return json.load(f) or []
-            except Exception:
-                pass
-    except Exception:
-        pass
-    local = PERSISTENT_DIR / filename
-    if local.exists():
-        with open(local) as f:
-            return json.load(f) or []
-    return []
-def _global_rank_lookup() -> dict:
-    """Compute a global rank for each model by ranking against the union of
-    all submissions (official ∪ community). Returns {model_name: global_rank}.
-    This ensures the same model gets the same rank number in either the
-    Official or Community leaderboard view, regardless of which set of
-    competitors is being displayed.
-    """
-    official_raw = _read_leaderboard_json("official_leaderboard.json")
-    community_raw = _read_leaderboard_json("leaderboard.json")
-    # Union by model_name; if a name is in both, prefer official entry's
-    # numeric values (they're the maintainer-verified ones, though in
-    # practice they're identical to community).
-    union_by_name = {}
-    for entry in community_raw:
-        name = entry.get("model_name")
-        if name:
-            union_by_name[name] = entry
-    for entry in official_raw:
-        name = entry.get("model_name")
-        if name:
-            union_by_name[name] = entry
-    if not union_by_name:
-        return {}
-    df = pd.DataFrame(list(union_by_name.values()))
-    if 'cvs_acc' not in df.columns:
-        return {}
-    df = sort_by_avg_rank(df)
-    return {row["model_name"]: int(row["rank"]) for _, row in df.iterrows()}
-def _apply_global_rank(df: pd.DataFrame) -> pd.DataFrame:
-    """Re-rank the rows of df using global ranks (computed against the union
-    of official + community models). Rows are re-sorted by global rank and
-    the 'rank' column is reassigned to display the global rank value.
-    """
-    if df.empty:
-        return df
-    lookup = _global_rank_lookup()
-    if not lookup:
-        return df
-    df = df.copy()
-    # Fall back to a large number for any model not in the lookup (shouldn't
-    # happen, but keeps the sort total-ordered).
-    df["_global_rank"] = df["model_name"].map(lookup).fillna(10**9).astype(int)
-    df = df.sort_values("_global_rank", ascending=True, kind="mergesort").reset_index(drop=True)
-    df["rank"] = df["_global_rank"]
-    df = df.drop(columns=["_global_rank"])
-    return df
 def sort_by_avg_rank(df: pd.DataFrame) -> pd.DataFrame:
     """Sort the leaderboard by average rank across all metrics.
@@ -390,12 +307,11 @@ def load_leaderboard() -> pd.DataFrame:
                     if 'average' in df.columns:
                         df = df.drop('average', axis=1)
-                    # Sort by average rank across all metrics (lower avg rank = better).
-                    # Then re-rank against the union of official + community so the
-                    # same model gets the same rank in either view.
                     if 'cvs_acc' in df.columns:
                         df = sort_by_avg_rank(df)
-                        df = _apply_global_rank(df)
                     print(f"✓ Loaded leaderboard from private repo: {len(df)} entries")
                     return df
@@ -416,11 +332,9 @@ def load_leaderboard() -> pd.DataFrame:
             if 'average' in df.columns:
                 df = df.drop('average', axis=1)
-            # Sort by avg-rank, then apply union-based global rank so the
-            # number shown matches the official table for the same model.
             if 'cvs_acc' in df.columns:
                 df = sort_by_avg_rank(df)
-                df = _apply_global_rank(df)
             print(f"✓ Loaded leaderboard from local file: {len(df)} entries")
             return df
@@ -516,7 +430,6 @@ def load_official_leaderboard() -> pd.DataFrame:
                     df = pd.DataFrame(data)
                     if 'cvs_acc' in df.columns:
                         df = sort_by_avg_rank(df)
-                        df = _apply_global_rank(df)
                     print(f"✓ Loaded official leaderboard from private repo: {len(df)} entries")
                     return df
             except Exception as e:
@@ -532,7 +445,6 @@ def load_official_leaderboard() -> pd.DataFrame:
             df = pd.DataFrame(data)
             if 'cvs_acc' in df.columns:
                 df = sort_by_avg_rank(df)
-                df = _apply_global_rank(df)
             print(f"✓ Loaded official leaderboard from local file: {len(df)} entries")
             return df

 }
 def sort_by_avg_rank(df: pd.DataFrame) -> pd.DataFrame:
     """Sort the leaderboard by average rank across all metrics.
                     if 'average' in df.columns:
                         df = df.drop('average', axis=1)
+                    # Sort by average rank within this table only — each leaderboard
+                    # is ranked independently against its own rows so the displayed
+                    # rank column is sequential (1, 2, 3, ... with no gaps).
                     if 'cvs_acc' in df.columns:
                         df = sort_by_avg_rank(df)
                     print(f"✓ Loaded leaderboard from private repo: {len(df)} entries")
                     return df
             if 'average' in df.columns:
                 df = df.drop('average', axis=1)
+            # Sort by avg-rank within this table only (see note in repo loader).
             if 'cvs_acc' in df.columns:
                 df = sort_by_avg_rank(df)
             print(f"✓ Loaded leaderboard from local file: {len(df)} entries")
             return df
                     df = pd.DataFrame(data)
                     if 'cvs_acc' in df.columns:
                         df = sort_by_avg_rank(df)
                     print(f"✓ Loaded official leaderboard from private repo: {len(df)} entries")
                     return df
             except Exception as e:
             df = pd.DataFrame(data)
             if 'cvs_acc' in df.columns:
                 df = sort_by_avg_rank(df)
             print(f"✓ Loaded official leaderboard from local file: {len(df)} entries")
             return df