Spaces:

rufasharon
/

metadata_hierarchy_tfm2026

Sleeping

App Files Files Community

RoophaSharon commited on 6 days ago

Commit

ef5da89

1 Parent(s): 2b56f2e

Navigation router (branding + Demo View + collapsible Build hierarchy); full-range LoD slider (1-9, default 7); replace deprecated use_container_width

Browse files

Files changed (6) hide show

demo.py +26 -560
requirements.txt +1 -1
pages/2_Approach_1.py → views/run_approach_1.py +22 -22
pages/3_Approach_2.py → views/run_approach_2.py +16 -17
pages/1_Baseline.py → views/run_baseline.py +10 -10
views/viewer.py +562 -0

demo.py CHANGED Viewed

@@ -1,580 +1,46 @@
 """
 Metadata Hierarchy Explorer — TFM 2026
-Pre-built results viewer for Baseline, Approach 1, and Approach 2.
-Rendering faithfully replicates each app's display pipeline:
-  - Baseline    : raw tree, Greens, Sunburst + Treemap
-  - Approach 1  : raw tree, Blues,  Sunburst + Treemap + Node-link + Facets
-  - Approach 2  : compress one-child chains, Viridis, Sunburst + Treemap + Node-link
-Level-of-Detail controls (depth, leaf labels, hidden nodes, compress chains)
-match the controls in the individual apps.
 """
-from __future__ import annotations
-import json
-from collections import defaultdict
-from pathlib import Path
-import numpy as np
-import plotly.graph_objects as go
 import streamlit as st
-# ─────────────────────────────────────────────────────────────────────────────
-# PAGE CONFIG
-# ─────────────────────────────────────────────────────────────────────────────
 st.set_page_config(
     page_title="Metadata Hierarchy Explorer",
     page_icon="🌿",
     layout="wide",
 )
-ROOT = Path(__file__).parent / "outputs"
-DEFAULT_DEPTH = 7
-# ─────────────────────────────────────────────────────────────────────────────
-# PRE-BUILT OUTPUT PATHS
-# ─────────────────────────────────────────────────────────────────────────────
-PREBUILT = {
-    "Baseline": {
-        "AI-MIND": {"hierarchy": ROOT / "baseline" / "ai-mind-variable-descriptions_in__baseline_hierarchy.json"},
-        "HCP":     {"hierarchy": ROOT / "baseline" / "HCP_S1200_DataDictionary_Oct_30_2023_baseline_hierarchy.json"},
-    },
-    "Approach 1": {
-        "AI-MIND": {
-            "hierarchy": ROOT / "approach_1" / "ai-mind-variable-descriptions_in__approach1_hierarchy.json",
-            "facets":    ROOT / "approach_1" / "ai-mind-variable-descriptions_in__approach1_facets.json",
-        },
-        "HCP": {
-            "hierarchy": ROOT / "approach_1" / "HCP_S1200_DataDictionary_Oct_30_2023_approach1_hierarchy.json",
-            "facets":    ROOT / "approach_1" / "HCP_S1200_DataDictionary_Oct_30_2023_approach1_facets.json",
-        },
-    },
-    "Approach 2": {
-        "AI-MIND": {"hierarchy": ROOT / "approach_2" / "ai-mind-variable-descriptions_in__approach2_lod.json"},
-        "HCP":     {"hierarchy": ROOT / "approach_2" / "HCP_S1200_DataDictionary_Oct_30_2023_approach2_lod.json"},
-    },
-}
-# Per-approach rendering config (matches each source app)
-CONFIG = {
-    "Baseline":   {"color": "Greens",  "compress": False, "node_link": False},
-    "Approach 1": {"color": "Blues",   "compress": False, "node_link": True},
-    "Approach 2": {"color": "Viridis", "compress": True,  "node_link": True},
-}
-APPROACH_DESC = {
-    "Baseline": (
-        "Pure clustering baseline — TF-IDF representation + recursive agglomerative "
-        "(cosine) clustering, number of clusters chosen by silhouette. No external APIs, "
-        "no neural embeddings. Node labels are the most discriminative terms per cluster."
-    ),
-    "Approach 1": (
-        "Global embedding pipeline — SBERT + N×M concept-table alignment (Gonçalves 2019) "
-        "+ HiExpan refinement (Shen et al. KDD 2018) + Castanet parallel facets. Optionally "
-        "retrieves concept context from Wikidata / Wikipedia / WordNet / BioPortal."
-    ),
-    "Approach 2": (
-        "Dataset-constrained multi-aspect hierarchy — group-anchored L1/L2 → phrase-slot "
-        "mining → FASTopic semantic aspect discovery (Wu et al. NeurIPS 2024) → GMM/KMeans "
-        "clustering → deterministic 5-stage label generation. Optional local-LLM refinement."
-    ),
-}
-# ─────────────────────────────────────────────────────────────────────────────
-# TREE TRANSFORMS  (copied from approach_2.py — display-only, exact behaviour)
-# ─────────────────────────────────────────────────���───────────────────────────
-def _filter_dissolved(nodes: list) -> list:
-    drop_ids = {int(n["id"]) for n in nodes
-                if n.get("type") == "dissolved" or n.get("isShown") is False}
-    if not drop_ids:
-        return nodes
-    out = []
-    for n in nodes:
-        if int(n["id"]) in drop_ids:
-            continue
-        m = dict(n)
-        m["related"] = [int(c) for c in n.get("related", []) if int(c) not in drop_ids]
-        out.append(m)
-    return out
-def compress_one_child_chains(nodes: list) -> list:
-    """Collapse chains where an aggregation node has exactly one aggregation child
-    (e.g. 'DMS → DMS Recommended Standard' becomes 'DMS / DMS Recommended Standard')."""
-    nodes = _filter_dissolved(nodes)
-    nm = {int(n["id"]): dict(n) for n in nodes}
-    def _is_chain_link(n):
-        if n.get("type") != "aggregation":
-            return False
-        children = n.get("related", [])
-        return (len(children) == 1
-                and nm.get(int(children[0]), {}).get("type") == "aggregation")
-    changed = True
-    while changed:
-        changed = False
-        for nid, n in list(nm.items()):
-            if _is_chain_link(n):
-                child_id = int(n["related"][0])
-                child = nm[child_id]
-                new_node = dict(child)
-                new_node["id"] = nid
-                new_node["name"] = f"{n['name']} / {child['name']}"
-                new_node["desc"] = f"{n.get('desc', '')} | {child.get('desc', '')}"
-                nm[nid] = new_node
-                if child_id in nm:
-                    del nm[child_id]
-                for other in nm.values():
-                    other["related"] = [nid if int(c) == child_id else int(c)
-                                        for c in other.get("related", [])]
-                changed = True
-                break
-    return list(nm.values())
-# ─────────────────────────────────────────────────────────────────────────────
-# RENDER HELPERS  (DAG-safe value map — copied from approach_2.py)
-# ─────────────────────────────────────────────────────────────────────────────
-def _leaf_ids(nodes: list, nid: int) -> list:
-    m = {int(n["id"]): n for n in nodes}
-    out = []
-    def rec(x):
-        n = m.get(int(x))
-        if not n:
-            return
-        if n.get("type") == "attribute":
-            out.append(int(x)); return
-        for c in n.get("related", []):
-            rec(int(c))
-    rec(nid)
-    return list(dict.fromkeys(out))
-def _parent_map(nodes: list) -> dict:
-    pm = {}
-    for n in nodes:
-        for c in n.get("related", []):
-            if int(c) not in pm:
-                pm[int(c)] = int(n["id"])
-    return pm
-def _tree_value_map(nodes: list, pm: dict) -> dict:
-    kids = {}
-    for child, par in pm.items():
-        kids.setdefault(int(par), []).append(int(child))
-    nodemap = {int(n["id"]): n for n in nodes}
-    memo = {}
-    def count(nid: int) -> int:
-        if nid in memo:
-            return memo[nid]
-        memo[nid] = 1
-        n = nodemap.get(nid)
-        if n is not None and n.get("type") == "attribute":
-            memo[nid] = 1
-            return 1
-        ch = kids.get(nid, [])
-        v = sum(count(c) for c in ch) if ch else 1
-        memo[nid] = max(1, v)
-        return memo[nid]
-    return {nid: count(nid) for nid in nodemap}
-def _wrap_hover(text: str, width: int = 80) -> str:
-    import textwrap as _tw
-    s = str(text or "")
-    if not s:
-        return ""
-    lines = []
-    for raw_line in s.split("\n"):
-        lines.extend(_tw.wrap(raw_line, width=width) or [""])
-    return "<br>".join(lines)
-def plot_sunburst(nodes: list, color: str, max_depth: int = DEFAULT_DEPTH):
-    nodes = _filter_dissolved(nodes)
-    pm = _parent_map(nodes)
-    vm = _tree_value_map(nodes, pm)
-    ids, labels, parents, values, hover = [], [], [], [], []
-    for n in nodes:
-        nid = int(n["id"])
-        lc = len(_leaf_ids(nodes, nid))
-        ids.append(str(nid))
-        labels.append(str(n.get("name", ""))[:40])
-        parents.append("" if nid == 0 else str(pm.get(nid, 0)))
-        values.append(vm.get(nid, 1))
-        hover.append(f"<b>{n.get('name', '')}</b><br>Type: {n.get('type', '')}<br>"
-                     f"Variables: {lc}<br><br>{_wrap_hover(n.get('desc', ''))}")
-    fig = go.Figure(go.Sunburst(
-        ids=ids, labels=labels, parents=parents, values=values,
-        branchvalues="total", hovertext=hover, hoverinfo="text",
-        maxdepth=max_depth, insidetextorientation="radial",
-        marker=dict(colorscale=color, line=dict(width=1, color="white"))))
-    fig.update_layout(height=700, margin=dict(l=10, r=10, t=40, b=10),
-                      title=dict(text="Click sector to drill down — click centre to go back",
-                                 font=dict(size=13), x=0.5))
-    return fig
-def plot_treemap(nodes: list, color: str, max_depth: int = DEFAULT_DEPTH):
-    nodes = _filter_dissolved(nodes)
-    pm = _parent_map(nodes)
-    vm = _tree_value_map(nodes, pm)
-    ids, labels, parents, values, hover = [], [], [], [], []
-    for n in nodes:
-        nid = int(n["id"])
-        lc = len(_leaf_ids(nodes, nid))
-        ids.append(str(nid))
-        labels.append(str(n.get("name", ""))[:40])
-        parents.append("" if nid == 0 else str(pm.get(nid, 0)))
-        values.append(vm.get(nid, 1))
-        hover.append(f"<b>{n.get('name', '')}</b><br>Variables: {lc}<br>"
-                     f"{_wrap_hover(n.get('desc', ''))}")
-    fig = go.Figure(go.Treemap(
-        ids=ids, labels=labels, parents=parents, values=values,
-        branchvalues="total", hovertext=hover, hoverinfo="text",
-        textinfo="label+value", maxdepth=max_depth,
-        marker=dict(colorscale=color, line=dict(width=1, color="white"))))
-    fig.update_layout(height=700, margin=dict(l=10, r=10, t=10, b=10))
-    return fig
-# ─────────────────────────────────────────────────────────────────────────────
-# NODE-LINK TREE  (Reingold-Tilford layout — copied from approach_2.py)
-# ─────────────────────────────────────────────────────────────────────────────
-def _node_color(n: dict) -> str:
-    t = n.get("type", "")
-    if t == "root":      return "#c44e52"
-    if t == "attribute": return "#4C72B0"
-    if t == "collapsed": return "#bbbbbb"
-    return "#8C8C8C"
-def _display_graph(nodes: list, max_depth: int, show_hidden: bool):
-    m = {int(n["id"]): n for n in nodes}
-    dnodes: dict = {}
-    edges: list = []
-    counter = 10 ** 9
-    def rec(nid, depth):
-        nonlocal counter
-        n = m.get(int(nid))
-        if not n:
-            return
-        if not show_hidden and n.get("isShown") is False and depth > 0:
-            return
-        dnodes[int(nid)] = n
-        if depth >= max_depth and n.get("related"):
-            counter += 1
-            cid = counter
-            n_leaves = len(_leaf_ids(nodes, nid))
-            dnodes[cid] = {"id": cid, "name": f"… {n_leaves} variables",
-                           "type": "collapsed", "related": [],
-                           "desc": f"Collapsed: {n.get('name')}"}
-            edges.append((int(nid), cid))
-            return
-        for c in n.get("related", []):
-            ch = m.get(int(c))
-            if not ch:
-                continue
-            if not show_hidden and ch.get("isShown") is False:
-                continue
-            edges.append((int(nid), int(c)))
-            rec(int(c), depth + 1)
-    rec(0, 0)
-    return list(dnodes.values()), edges
-def _positions(edges: list):
-    H_SCALE, V_SPACE = 3.0, 1.8
-    children: dict = defaultdict(list)
-    for p, c in edges:
-        children[p].append(c)
-    pos: dict = {}
-    counter = {"v": 0}
-    def rec(nid, depth):
-        ch = children.get(nid, [])
-        if not ch:
-            y_pos = counter["v"] * V_SPACE
-            counter["v"] += 1
-            pos[nid] = (depth * H_SCALE, y_pos)
-            return y_pos
-        child_ys = [rec(c, depth + 1) for c in ch]
-        y_pos = float(np.mean(child_ys))
-        pos[nid] = (depth * H_SCALE, y_pos)
-        return y_pos
-    rec(0, 0)
-    return pos
-def plot_node_link(nodes: list, max_depth: int, show_hidden: bool, show_leaf_labels: bool):
-    nodes = _filter_dissolved(nodes)
-    dnodes, edges = _display_graph(nodes, max_depth, show_hidden)
-    pos = _positions(edges)
-    ex, ey = [], []
-    for p, c in edges:
-        if p not in pos or c not in pos:
-            continue
-        x0, y0 = pos[p]
-        x1, y1 = pos[c]
-        xm = (x0 + x1) / 2
-        ex += [x0, xm, xm, x1, None]
-        ey += [y0, y0, y1, y1, None]
-    traces = [go.Scatter(x=ex, y=ey, mode="lines",
-                         line=dict(width=1, color="#c8c8c8"),
-                         hoverinfo="skip", showlegend=False)]
-    agg_x, agg_y, agg_lab, agg_col, agg_hov = [], [], [], [], []
-    lf_x, lf_y, lf_lab, lf_col, lf_hov = [], [], [], [], []
-    for n in dnodes:
-        nid = int(n["id"])
-        if nid not in pos:
-            continue
-        x, y = pos[nid]
-        lc = len(_leaf_ids(nodes, nid))
-        lab = str(n.get("name", ""))[:32]
-        hov = (f"<b>{n.get('name', '')}</b><br>Type: {n.get('type', '')}<br>"
-               f"Variables: {lc}")
-        if n.get("type") == "attribute":
-            lf_x.append(x); lf_y.append(y); lf_col.append(_node_color(n))
-            lf_lab.append(lab if show_leaf_labels else "")
-            lf_hov.append(hov)
-        else:
-            agg_x.append(x); agg_y.append(y); agg_col.append(_node_color(n))
-            agg_lab.append(lab); agg_hov.append(hov)
-    traces.append(go.Scatter(
-        x=lf_x, y=lf_y, mode="markers+text" if show_leaf_labels else "markers",
-        text=lf_lab, textposition="middle right", textfont=dict(size=9),
-        marker=dict(size=7, color=lf_col, line=dict(width=0.5, color="white")),
-        hovertext=lf_hov, hoverinfo="text", showlegend=False))
-    traces.append(go.Scatter(
-        x=agg_x, y=agg_y, mode="markers+text", text=agg_lab,
-        textposition="middle right", textfont=dict(size=10),
-        marker=dict(size=13, color=agg_col, line=dict(width=1, color="white")),
-        hovertext=agg_hov, hoverinfo="text", showlegend=False))
-    n_rows = max(len(lf_y), len(agg_y), 1)
-    fig = go.Figure(traces)
-    fig.update_layout(
-        height=max(600, n_rows * 16),
-        margin=dict(l=10, r=140, t=10, b=10),
-        xaxis=dict(visible=False), yaxis=dict(visible=False),
-        plot_bgcolor="white",
-    )
-    return fig
-# ─────────────────────────────────────────────────────────────────────────────
-# STATS / SAFE RENDERING
-# ─────────────────────────────────────────────────────────────────────────────
-def _tree_depth(nodes: list) -> int:
-    """Max depth of the rendered single-parent tree (root = depth 0)."""
-    nodes = _filter_dissolved(nodes)
-    m = {int(n["id"]): n for n in nodes}
-    best = {"d": 0}
-    def rec(nid, d):
-        best["d"] = max(best["d"], d)
-        for c in m.get(int(nid), {}).get("related", []):
-            if int(c) in m:
-                rec(int(c), d + 1)
-    rec(0, 0)
-    return best["d"]
-def safe_render_depth(nodes: list, requested: int) -> int:
-    """Plotly sunburst/treemap silently blank when asked to draw too many sectors
-    at once (large hierarchies like HCP). Cap the *initial* render depth — the
-    chart stays fully drillable by clicking, so no data is lost."""
-    n = len(_filter_dissolved(nodes))
-    if n > 400:
-        return min(requested, 3)
-    if n > 150:
-        return min(requested, 4)
-    return requested
-# ─────────────────────────────────────────────────────────────────────────────
-# IO
-# ─────────────────────────────────────────────────────────────────────────────
-@st.cache_data(show_spinner=False)
-def _load_json(path_str: str):
-    with open(path_str, encoding="utf-8") as f:
-        return json.load(f)
-def _read_bytes(path_str: str) -> bytes:
-    with open(path_str, "rb") as f:
-        return f.read()
-@st.cache_data(show_spinner=False)
-def _outputs_zip(root_str: str) -> bytes:
-    """Zip the entire bundled outputs/ folder for one-click download."""
-    import io, zipfile
-    root = Path(root_str)
-    buf = io.BytesIO()
-    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
-        for p in sorted(root.rglob("*")):
-            if p.is_file():
-                zf.write(p, arcname=p.relative_to(root.parent).as_posix())
-    return buf.getvalue()
-def count_nodes(nodes: list) -> tuple[int, int]:
-    nodes = _filter_dissolved(nodes)
-    leaves = sum(1 for n in nodes if n.get("type") == "attribute")
-    aggs = sum(1 for n in nodes if n.get("type") == "aggregation")
-    return leaves, aggs
-def concept_aligned_pct(nodes: list) -> float | None:
-    """% of aggregation nodes that carry a concept/provenance label (Approach 1)."""
-    aggs = [n for n in _filter_dissolved(nodes) if n.get("type") == "aggregation"]
-    if not aggs:
-        return None
-    aligned = sum(1 for n in aggs
-                  if n.get("provenance") or n.get("concept") or n.get("source_evidence"))
-    return 100.0 * aligned / len(aggs) if aligned else None
-# ────────��────────────────────────────────────────────────────────────────────
-# SIDEBAR
-# ─────────────────────────────────────────────────────────────────────────────
 with st.sidebar:
     st.title("🌿 Hierarchy Explorer")
     st.caption("TFM 2026 — Metadata hierarchy construction")
     st.markdown("---")
-    approach = st.radio("**Select Approach**",
-                        ["Baseline", "Approach 1", "Approach 2"], index=0)
-    dataset = st.radio("**Select Dataset**", ["AI-MIND", "HCP"], index=0)
-    st.markdown("---")
-    st.caption("Results are pre-built from the thesis experiments. To run on your "
-               "own data, clone the repository and run the individual apps.")
-    st.markdown("[📦 GitHub Repository]"
-                "(https://github.com/RoophaSharon/tfm_metadata_hierarchy_2026)")
-# ─────────────────────────────────────────────────────────────────────────────
-# MAIN
-# ─────────────────────────────────────────────────────────────────────────────
-cfg = CONFIG[approach]
-color = cfg["color"]
-st.title(f"📊 {approach} — {dataset} Dataset")
-st.markdown(f"> {APPROACH_DESC[approach]}")
-paths = PREBUILT[approach][dataset]
-hier_path = paths.get("hierarchy")
-if hier_path is None or not hier_path.exists():
-    st.error(f"Pre-built result not found: `{hier_path}`")
-    st.stop()
-raw_nodes = _load_json(str(hier_path))
-leaves, aggs = count_nodes(raw_nodes)
-c1, c2, c3 = st.columns(3)
-c1.metric("Leaf Variables", leaves)
-c2.metric("Aggregation Nodes", aggs)
-c3.metric("Total Nodes", leaves + aggs)
-# ── Build summary (collapsed) ────────────────────────────────────────────────
-facet_path = paths.get("facets")
-n_facets = None
-if facet_path is not None and facet_path.exists():
-    try:
-        n_facets = len(_load_json(str(facet_path)))
-    except Exception:
-        n_facets = None
-with st.expander("ℹ️ Build summary", expanded=False):
-    bs1, bs2, bs3, bs4 = st.columns(4)
-    bs1.metric("Variables", leaves)
-    bs2.metric("Internal nodes", aggs)
-    bs3.metric("Tree depth", _tree_depth(raw_nodes))
-    bs4.metric("Facets", n_facets if n_facets is not None else "—")
-    pct = concept_aligned_pct(raw_nodes)
-    if pct is not None:
-        st.caption(f"Concept-aligned aggregation nodes: **{pct:.1f}%**")
-    st.caption(
-        f"Source file: `{hier_path.name}` · "
-        f"Approach: **{approach}** · Dataset: **{dataset}**. "
-        "Tree topology and labels are reproduced exactly from the pre-built "
-        "thesis output (the algorithms are not re-run in this viewer)."
-    )
-# ── Downloads ────────────────────────────────────────────────────────────────
-d1, d2, d3 = st.columns(3)
-with d1:
-    st.download_button("⬇️ Hierarchy JSON", data=_read_bytes(str(hier_path)),
-                       file_name=hier_path.name, mime="application/json",
-                       use_container_width=True)
-with d2:
-    if facet_path is not None and facet_path.exists():
-        st.download_button("⬇️ Facets JSON", data=_read_bytes(str(facet_path)),
-                           file_name=facet_path.name, mime="application/json",
-                           use_container_width=True)
-    else:
-        st.button("⬇️ Facets JSON", disabled=True, use_container_width=True,
-                  help="This approach/dataset has no facet tree.")
-with d3:
-    st.download_button("⬇️ All outputs (ZIP)", data=_outputs_zip(str(ROOT)),
-                       file_name="metadata_hierarchy_outputs.zip",
-                       mime="application/zip", use_container_width=True)
-st.markdown("---")
-# ── Level-of-Detail controls (above chart — matches the apps) ────────────────
-view_options = ["Sunburst (drill-down)", "Treemap"]
-if cfg["node_link"]:
-    view_options.append("Node-link tree")
-if cfg["compress"]:
-    vc1, vc2, vc3, vc4, vc5 = st.columns([2.4, 2, 1, 1, 1.2])
-else:
-    vc1, vc2, vc3, vc4 = st.columns([2.4, 2, 1, 1])
-    vc5 = None
-with vc1:
-    viz_mode = st.radio("View mode", view_options, horizontal=True, index=0,
-                        help="Sunburst best for large hierarchies [Taxonomizer]. "
-                             "Node-link best for moderate-depth structure inspection.")
-with vc2:
-    depth = st.slider("Depth (Level of Detail)", 1, 8, DEFAULT_DEPTH, 1)
-with vc3:
-    show_leaf_labels = st.checkbox("Leaf labels", value=False)
-with vc4:
-    show_hidden = st.checkbox("Hidden nodes", value=False)
-if vc5 is not None:
-    with vc5:
-        compress_chains = st.checkbox("Compress chains", value=True,
-                                      help="Merge one-child aggregation chains "
-                                           '(e.g. "DMS → DMS Recommended Standard") for '
-                                           "display. Export JSON keeps original structure.")
-else:
-    compress_chains = False
-st.divider()
-display_nodes = compress_one_child_chains(raw_nodes) if compress_chains else raw_nodes
-if viz_mode == "Sunburst (drill-down)":
-    eff = safe_render_depth(display_nodes, depth)
-    if eff < depth:
-        st.caption(f"Large hierarchy — showing {eff} levels initially to render "
-                   "reliably. **Click any sector to drill deeper.**")
-    st.plotly_chart(plot_sunburst(display_nodes, color, eff), use_container_width=True)
-elif viz_mode == "Treemap":
-    eff = safe_render_depth(display_nodes, depth)
-    if eff < depth:
-        st.caption(f"Large hierarchy — showing {eff} levels initially to render "
-                   "reliably. **Click a tile to drill deeper.**")
-    st.plotly_chart(plot_treemap(display_nodes, color, eff), use_container_width=True)
-else:
-    st.plotly_chart(plot_node_link(display_nodes, depth, show_hidden, show_leaf_labels),
-                    use_container_width=True)
-# ── Facets (Approach 1 only) ─────────────────────────────────────────────────
-if facet_path is not None and facet_path.exists():
     st.markdown("---")
-    st.subheader("🔀 Parallel facets")
-    facets = _load_json(str(facet_path))
-    names = list(facets.keys())
-    if not names:
-        st.info("No facets available for this dataset.")
-    else:
-        sel = st.selectbox("Select facet", names)
-        fnodes = facets[sel]
-        ft1, ft2 = st.tabs(["Sunburst", "Treemap"])
-        with ft1:
-            st.plotly_chart(plot_sunburst(fnodes, color, depth), use_container_width=True)
-        with ft2:
-            st.plotly_chart(plot_treemap(fnodes, color), use_container_width=True)

 """
 Metadata Hierarchy Explorer — TFM 2026
+Navigation router (Streamlit st.navigation).
+Sidebar layout:
+    🌿 Hierarchy Explorer / TFM 2026      (branding, top)
+    📊 Demo View                          (pre-built results viewer)
+    … the Demo View's own controls …      (Select Approach / Dataset, etc.)
+    🛠️ Build hierarchy  (collapsible)     (upload a CSV and run an app)
+         • Baseline  • Approach 1  • Approach 2
 """
 import streamlit as st
 st.set_page_config(
     page_title="Metadata Hierarchy Explorer",
     page_icon="🌿",
     layout="wide",
 )
+# ── Pages ────────────────────────────────────────────────────────────────────
+viewer = st.Page("views/viewer.py",        title="Demo View",   icon="📊", default=True)
+base   = st.Page("views/run_baseline.py",  title="Baseline",    icon="🟢")
+appr1  = st.Page("views/run_approach_1.py", title="Approach 1", icon="🌳")
+appr2  = st.Page("views/run_approach_2.py", title="Approach 2", icon="🔬")
+# Hidden default nav — we render our own links so we control the order.
+pg = st.navigation([viewer, base, appr1, appr2], position="hidden")
+# ── Sidebar TOP: branding + Demo View link ──────────────────────────────────
 with st.sidebar:
     st.title("🌿 Hierarchy Explorer")
     st.caption("TFM 2026 — Metadata hierarchy construction")
     st.markdown("---")
+    st.page_link(viewer, label="Demo View", icon="📊")
+# ── The selected page renders here (its own sidebar controls included) ───────
+pg.run()
+# ── Sidebar BOTTOM: collapsible "Build hierarchy" group ─────────────────────
+with st.sidebar:
     st.markdown("---")
+    with st.expander("🛠️ Build hierarchy", expanded=False):
+        st.caption("Upload your own CSV and run an algorithm live.")
+        st.page_link(base,  label="Baseline",   icon="🟢")
+        st.page_link(appr1, label="Approach 1", icon="🌳")
+        st.page_link(appr2, label="Approach 2", icon="🔬")

requirements.txt CHANGED Viewed

@@ -1,4 +1,4 @@
-streamlit>=1.30
 pandas>=2.0
 numpy>=1.24
 scikit-learn>=1.3

+streamlit>=1.43
 pandas>=2.0
 numpy>=1.24
 scikit-learn>=1.3

pages/2_Approach_1.py → views/run_approach_1.py RENAMED Viewed

@@ -57,7 +57,7 @@ except Exception:
 warnings.filterwarnings('ignore')
-st.set_page_config(page_title='Metadata Hierarchy — Approach 1', page_icon='🌳', layout='wide')
 st.title('Metadata Hierarchy Builder — Approach 1')
 st.caption(
     'Automatic concept-label extraction from metadata text + HiExpan refinement + Castanet facets. '
@@ -3876,7 +3876,7 @@ if uploads:
                 if warn:
                     st.warning('Looked like raw data — columns converted to metadata rows.')
                 st.write(f'Rows: **{len(df):,}**, Columns: **{len(df.columns)}**')
-                st.dataframe(df.head(10), use_container_width=True)
         except Exception as e:
             st.error(f'Failed to load {p.name}: {e}')
@@ -3902,7 +3902,7 @@ if uploads:
                                         key=f'meta_{name}')
             prev = list(dict.fromkeys(leaf + group + text + meta))
             if prev:
-                st.dataframe(df[prev].head(6), use_container_width=True)
             configs[name] = {'leaf_cols': leaf, 'group_cols': group,
                              'text_cols': text, 'metadata_cols': meta}
@@ -4202,12 +4202,12 @@ with tabs[0]:
     if viz_mode == 'Sunburst (drill-down)':
         st.caption('Hover for concept provenance (confidence, source, alternatives). Click to drill down.')
-        st.plotly_chart(plot_sunburst(nodes, depth), use_container_width=True)
     elif viz_mode == 'Treemap':
-        st.plotly_chart(plot_treemap(nodes), use_container_width=True)
     else:
         st.plotly_chart(plot_node_link(nodes, depth, show_hidden, show_leaf_labels),
-                        use_container_width=True)
     pr  = path_rows(nodes)
     max_d = max((r['depth'] for r in pr), default=0)
     c1, c2, c3 = st.columns(3)
@@ -4225,7 +4225,7 @@ with tabs[0]:
             exp_rows = [{'Segment': seg, 'Expansion': v['expansion'],
                          'Evidence': ', '.join(v['evidence'])}
                         for seg, v in code_exp.items()]
-            st.dataframe(pd.DataFrame(exp_rows), use_container_width=True)
     # Concept label provenance for internal nodes
     prov_rows = []
@@ -4241,7 +4241,7 @@ with tabs[0]:
             })
     if prov_rows:
         with st.expander('Concept label provenance for internal nodes', expanded=False):
-            st.dataframe(pd.DataFrame(prov_rows), use_container_width=True)
 # ── Tab 1: Faceted view ───────────────────────────────────────────────────────
 with tabs[1]:
@@ -4251,11 +4251,11 @@ with tabs[1]:
         'Concept facet uses automatically assigned labels from embedding alignment.'
     )
     if facet_trees:
-        st.plotly_chart(plot_facets_parallel(facet_trees), use_container_width=True)
         st.markdown('### Per-facet detail')
         sel_facet = st.selectbox('Inspect facet tree', list(facet_trees.keys()))
         ft = facet_trees[sel_facet]
-        st.plotly_chart(plot_sunburst(ft, max_depth=3), use_container_width=True)
         n_groups = len([n for n in ft if n.get('type') == 'aggregation'])
         st.info(f'Facet **{sel_facet}**: {n_groups} groups, '
                 f'{len([n for n in ft if n.get("type")=="attribute"])} variables')
@@ -4273,11 +4273,11 @@ with tabs[2]:
         st.markdown('### Sibling coherence — before refinement (worst first)')
         before = hiexpan_report.get('coherence_before', [])
         if before:
-            st.dataframe(pd.DataFrame(before), use_container_width=True)
         st.markdown('### Sibling coherence — after refinement')
         after = hiexpan_report.get('coherence_after', [])
         if after:
-            st.dataframe(pd.DataFrame(after), use_container_width=True)
             b_mean = np.mean([r['coherence_score'] for r in before]) if before else float('nan')
             a_mean = np.mean([r['coherence_score'] for r in after])
             st.metric('Mean coherence improvement',
@@ -4324,7 +4324,7 @@ with tabs[3]:
     if can is not None:
         conflict_df = compute_conflict_table(can, nodes)
         if len(conflict_df):
-            st.dataframe(conflict_df, use_container_width=True)
         else:
             st.success('No low-confidence placements detected.')
     else:
@@ -4393,7 +4393,7 @@ with tabs[4]:
                                  'type': c.get('type'),
                                  'relation': c.get('info', {}).get('relation_label', ''),
                                  'desc': str(c.get('desc', ''))[:120]}
-                                for c in cns if c]), use_container_width=True)
 # ── Tab 5: Search ─────────────────────────────────────────────────────────────
 with tabs[5]:
@@ -4407,14 +4407,14 @@ with tabs[5]:
                          'relation': n.get('info', {}).get('relation_label', ''),
                          'n_children': len(n.get('related', [])),
                          'desc': str(n.get('desc', ''))[:200]})
-    st.dataframe(pd.DataFrame(out_), use_container_width=True)
 # ── Tab 6: Semantic map ───────────────────────────────────────────────────────
 with tabs[6]:
     if can is None or len(can) < 3:
         st.info('Semantic map available after build.')
     else:
-        st.plotly_chart(semantic_map(can), use_container_width=True)
 # ── Tab 7: Metadata ───────────────────────────────────────────────────────────
 with tabs[7]:
@@ -4422,7 +4422,7 @@ with tabs[7]:
         st.info('Available after build.')
     else:
         show_cols = [c for c in can.columns if c != '_raw']
-        st.dataframe(can[show_cols], use_container_width=True)
 # ── Tab 8: Export ─────────────────────────────────────────────────────────────
 with tabs[8]:
@@ -4438,7 +4438,7 @@ with tabs[8]:
             data=json.dumps(nodes, indent=2, ensure_ascii=False).encode('utf-8'),
             file_name=f'{_base}_approach1_hierarchy.json',
             mime='application/json',
-            use_container_width=True,
         )
     with col2:
         if facet_trees:
@@ -4447,7 +4447,7 @@ with tabs[8]:
                 data=json.dumps(facet_trees, indent=2, ensure_ascii=False).encode('utf-8'),
                 file_name=f'{_base}_approach1_facets.json',
                 mime='application/json',
-                use_container_width=True,
             )
     col3, col4 = st.columns(2)
@@ -4458,7 +4458,7 @@ with tabs[8]:
                 data=can.drop(columns=['_raw'], errors='ignore').to_csv(index=False).encode('utf-8'),
                 file_name=f'{_base}_approach1_canonical.csv',
                 mime='text/csv',
-                use_container_width=True,
             )
     with col4:
         _prov_df = st.session_state.get('prov_df', pd.DataFrame())
@@ -4468,7 +4468,7 @@ with tabs[8]:
                 data=_prov_df.to_csv(index=False).encode('utf-8'),
                 file_name=f'{_base}_approach1_concept_labels.csv',
                 mime='text/csv',
-                use_container_width=True,
             )
     st.divider()
@@ -4481,7 +4481,7 @@ with tabs[8]:
         'dataset name — convenient for `evaluate_all.py`.'
     )
     if st.button('💾 Save all to outputs/approach_1/', type='primary',
-                 use_container_width=True):
         try:
             _out_dir.mkdir(parents=True, exist_ok=True)
             saved = []

 warnings.filterwarnings('ignore')
+# set_page_config handled by the navigation router (demo.py)
 st.title('Metadata Hierarchy Builder — Approach 1')
 st.caption(
     'Automatic concept-label extraction from metadata text + HiExpan refinement + Castanet facets. '
                 if warn:
                     st.warning('Looked like raw data — columns converted to metadata rows.')
                 st.write(f'Rows: **{len(df):,}**, Columns: **{len(df.columns)}**')
+                st.dataframe(df.head(10), width='stretch')
         except Exception as e:
             st.error(f'Failed to load {p.name}: {e}')
                                         key=f'meta_{name}')
             prev = list(dict.fromkeys(leaf + group + text + meta))
             if prev:
+                st.dataframe(df[prev].head(6), width='stretch')
             configs[name] = {'leaf_cols': leaf, 'group_cols': group,
                              'text_cols': text, 'metadata_cols': meta}
     if viz_mode == 'Sunburst (drill-down)':
         st.caption('Hover for concept provenance (confidence, source, alternatives). Click to drill down.')
+        st.plotly_chart(plot_sunburst(nodes, depth), width='stretch')
     elif viz_mode == 'Treemap':
+        st.plotly_chart(plot_treemap(nodes), width='stretch')
     else:
         st.plotly_chart(plot_node_link(nodes, depth, show_hidden, show_leaf_labels),
+                        width='stretch')
     pr  = path_rows(nodes)
     max_d = max((r['depth'] for r in pr), default=0)
     c1, c2, c3 = st.columns(3)
             exp_rows = [{'Segment': seg, 'Expansion': v['expansion'],
                          'Evidence': ', '.join(v['evidence'])}
                         for seg, v in code_exp.items()]
+            st.dataframe(pd.DataFrame(exp_rows), width='stretch')
     # Concept label provenance for internal nodes
     prov_rows = []
             })
     if prov_rows:
         with st.expander('Concept label provenance for internal nodes', expanded=False):
+            st.dataframe(pd.DataFrame(prov_rows), width='stretch')
 # ── Tab 1: Faceted view ───────────────────────────────────────────────────────
 with tabs[1]:
         'Concept facet uses automatically assigned labels from embedding alignment.'
     )
     if facet_trees:
+        st.plotly_chart(plot_facets_parallel(facet_trees), width='stretch')
         st.markdown('### Per-facet detail')
         sel_facet = st.selectbox('Inspect facet tree', list(facet_trees.keys()))
         ft = facet_trees[sel_facet]
+        st.plotly_chart(plot_sunburst(ft, max_depth=3), width='stretch')
         n_groups = len([n for n in ft if n.get('type') == 'aggregation'])
         st.info(f'Facet **{sel_facet}**: {n_groups} groups, '
                 f'{len([n for n in ft if n.get("type")=="attribute"])} variables')
         st.markdown('### Sibling coherence — before refinement (worst first)')
         before = hiexpan_report.get('coherence_before', [])
         if before:
+            st.dataframe(pd.DataFrame(before), width='stretch')
         st.markdown('### Sibling coherence — after refinement')
         after = hiexpan_report.get('coherence_after', [])
         if after:
+            st.dataframe(pd.DataFrame(after), width='stretch')
             b_mean = np.mean([r['coherence_score'] for r in before]) if before else float('nan')
             a_mean = np.mean([r['coherence_score'] for r in after])
             st.metric('Mean coherence improvement',
     if can is not None:
         conflict_df = compute_conflict_table(can, nodes)
         if len(conflict_df):
+            st.dataframe(conflict_df, width='stretch')
         else:
             st.success('No low-confidence placements detected.')
     else:
                                  'type': c.get('type'),
                                  'relation': c.get('info', {}).get('relation_label', ''),
                                  'desc': str(c.get('desc', ''))[:120]}
+                                for c in cns if c]), width='stretch')
 # ── Tab 5: Search ─────────────────────────────────────────────────────────────
 with tabs[5]:
                          'relation': n.get('info', {}).get('relation_label', ''),
                          'n_children': len(n.get('related', [])),
                          'desc': str(n.get('desc', ''))[:200]})
+    st.dataframe(pd.DataFrame(out_), width='stretch')
 # ── Tab 6: Semantic map ───────────────────────────────────────────────────────
 with tabs[6]:
     if can is None or len(can) < 3:
         st.info('Semantic map available after build.')
     else:
+        st.plotly_chart(semantic_map(can), width='stretch')
 # ── Tab 7: Metadata ───────────────────────────────────────────────────────────
 with tabs[7]:
         st.info('Available after build.')
     else:
         show_cols = [c for c in can.columns if c != '_raw']
+        st.dataframe(can[show_cols], width='stretch')
 # ── Tab 8: Export ─────────────────────────────────────────────────────────────
 with tabs[8]:
             data=json.dumps(nodes, indent=2, ensure_ascii=False).encode('utf-8'),
             file_name=f'{_base}_approach1_hierarchy.json',
             mime='application/json',
+            width='stretch',
         )
     with col2:
         if facet_trees:
                 data=json.dumps(facet_trees, indent=2, ensure_ascii=False).encode('utf-8'),
                 file_name=f'{_base}_approach1_facets.json',
                 mime='application/json',
+                width='stretch',
             )
     col3, col4 = st.columns(2)
                 data=can.drop(columns=['_raw'], errors='ignore').to_csv(index=False).encode('utf-8'),
                 file_name=f'{_base}_approach1_canonical.csv',
                 mime='text/csv',
+                width='stretch',
             )
     with col4:
         _prov_df = st.session_state.get('prov_df', pd.DataFrame())
                 data=_prov_df.to_csv(index=False).encode('utf-8'),
                 file_name=f'{_base}_approach1_concept_labels.csv',
                 mime='text/csv',
+                width='stretch',
             )
     st.divider()
         'dataset name — convenient for `evaluate_all.py`.'
     )
     if st.button('💾 Save all to outputs/approach_1/', type='primary',
+                 width='stretch'):
         try:
             _out_dir.mkdir(parents=True, exist_ok=True)
             saved = []

pages/3_Approach_2.py → views/run_approach_2.py RENAMED Viewed

@@ -3467,8 +3467,7 @@ def plot_node_link(nodes: list, max_depth: int = 4,
 # ──────────────────────────────────────────────────────────────────────────────
 # STREAMLIT APP
 # ──────────────────────────────────────────────────────────────────────────────
-st.set_page_config(page_title='Approach 2 — Multi-Aspect Hierarchy', page_icon='🔬',
-                   layout='wide')
 st.title('🔬 Approach 2 — Role-Decomposed Metadata Hierarchy')
 st.caption('Group anchoring → LLM role extraction → role-nested LoD tree. '
            'Full method details and citations in the Method tab.')
@@ -3613,7 +3612,7 @@ if uploads:
             cfg_by[f.name] = detect_roles(df)
             with st.expander(f'📄 {f.name}', expanded=False):
                 st.write(f'Rows: **{len(df):,}**  Columns: **{len(df.columns)}**')
-                st.dataframe(df.head(8), use_container_width=True)
         except Exception as e:
             st.error(f'Could not load {f.name}: {e}')
@@ -3813,13 +3812,13 @@ with tabs[0]:
     if viz_mode == 'Sunburst (drill-down)':
         st.plotly_chart(plot_sunburst(display_nodes, max_depth=depth_display),
-                        use_container_width=True)
     elif viz_mode == 'Treemap':
-        st.plotly_chart(plot_treemap(display_nodes), use_container_width=True)
     else:
         st.plotly_chart(plot_node_link(display_nodes, depth_display,
                                         show_hidden, show_leaf_labels),
-                        use_container_width=True)
     n_l = len([n for n in nodes if n.get('type') == 'attribute'])
     n_i = len([n for n in nodes if n.get('type') == 'aggregation'])
@@ -3912,7 +3911,7 @@ with tabs[1]:
             W_df  = pd.DataFrame(
                 W, columns=[f'Aspect {k+1}: {alabs[k][:30]}' for k in range(W.shape[1])])
             W_df.insert(0, 'Variable', can['_label'].tolist())
-            st.dataframe(W_df.round(4), use_container_width=True)
 with tabs[2]:
     st.markdown('### Role decomposition')
@@ -3935,7 +3934,7 @@ with tabs[2]:
         if reg_rows:
             reg_df = pd.DataFrame(reg_rows).sort_values(
                 'Regularity', ascending=False, na_position='last')
-            st.dataframe(reg_df, use_container_width=True, hide_index=True)
         # ── Per-variable role table ───────────────────────────────────────────
         st.markdown('#### Per-variable role table')
@@ -3996,7 +3995,7 @@ with tabs[2]:
         if role_rows:
             role_df = pd.DataFrame(role_rows)
-            st.dataframe(role_df, use_container_width=True, hide_index=True)
             st.download_button(
                 '⬇️ Download per-variable role CSV',
                 data=role_df.to_csv(index=False).encode('utf-8'),
@@ -4021,7 +4020,7 @@ with tabs[2]:
                     'Reasons':       ', '.join(f'{k}:{v}' for k, v in
                                                 (a.get('summary', {}) or {}).items()),
                 })
-            st.dataframe(pd.DataFrame(sum_rows), use_container_width=True,
                           hide_index=True)
             # Drill-down per group
@@ -4050,7 +4049,7 @@ with tabs[2]:
                         })
                     if row_rows:
                         st.dataframe(pd.DataFrame(row_rows),
-                                      use_container_width=True, hide_index=True)
                         # Download as CSV for offline analysis
                         csv_bytes = pd.DataFrame(row_rows).to_csv(index=False).encode('utf-8')
                         st.download_button(
@@ -4129,16 +4128,16 @@ with tabs[3]:
                               & (prov_df['LLM proposed'].astype(str).str.len() > 0)]
                 if len(rej):
                     st.dataframe(rej[['Node', 'LLM proposed', 'LLM reason']],
-                                  use_container_width=True, hide_index=True)
         # ── Full provenance table ─────────────────────────────────────────────
         st.write('**Full per-node provenance**')
-        st.dataframe(prov_df, use_container_width=True, hide_index=True)
 with tabs[4]:
     if can is not None:
         st.dataframe(can.drop(columns=['_row'], errors='ignore'),
-                     use_container_width=True)
 with tabs[5]:
     # ── derive a per-CSV base name from the uploaded files ────────────────────
@@ -4169,7 +4168,7 @@ with tabs[5]:
                 data=json.dumps(nodes, indent=2, ensure_ascii=False).encode(),
                 file_name=f'{csv_basis}_approach2_lod.json',
                 mime='application/json',
-                use_container_width=True,
             )
     with col2:
         if can is not None:
@@ -4178,7 +4177,7 @@ with tabs[5]:
                 data=can.to_csv(index=False).encode('utf-8'),
                 file_name=f'{csv_basis}_approach2_canonical.csv',
                 mime='text/csv',
-                use_container_width=True,
             )
     st.divider()
@@ -4191,7 +4190,7 @@ with tabs[5]:
         'dataset name — convenient for `evaluate_all.py`.'
     )
     if st.button('💾 Save all to outputs/approach_2/', type='primary',
-                 use_container_width=True):
         try:
             _out_dir.mkdir(parents=True, exist_ok=True)
             saved = []

 # ──────────────────────────────────────────────────────────────────────────────
 # STREAMLIT APP
 # ──────────────────────────────────────────────────────────────────────────────
+# set_page_config handled by the navigation router (demo.py)
 st.title('🔬 Approach 2 — Role-Decomposed Metadata Hierarchy')
 st.caption('Group anchoring → LLM role extraction → role-nested LoD tree. '
            'Full method details and citations in the Method tab.')
             cfg_by[f.name] = detect_roles(df)
             with st.expander(f'📄 {f.name}', expanded=False):
                 st.write(f'Rows: **{len(df):,}**  Columns: **{len(df.columns)}**')
+                st.dataframe(df.head(8), width='stretch')
         except Exception as e:
             st.error(f'Could not load {f.name}: {e}')
     if viz_mode == 'Sunburst (drill-down)':
         st.plotly_chart(plot_sunburst(display_nodes, max_depth=depth_display),
+                        width='stretch')
     elif viz_mode == 'Treemap':
+        st.plotly_chart(plot_treemap(display_nodes), width='stretch')
     else:
         st.plotly_chart(plot_node_link(display_nodes, depth_display,
                                         show_hidden, show_leaf_labels),
+                        width='stretch')
     n_l = len([n for n in nodes if n.get('type') == 'attribute'])
     n_i = len([n for n in nodes if n.get('type') == 'aggregation'])
             W_df  = pd.DataFrame(
                 W, columns=[f'Aspect {k+1}: {alabs[k][:30]}' for k in range(W.shape[1])])
             W_df.insert(0, 'Variable', can['_label'].tolist())
+            st.dataframe(W_df.round(4), width='stretch')
 with tabs[2]:
     st.markdown('### Role decomposition')
         if reg_rows:
             reg_df = pd.DataFrame(reg_rows).sort_values(
                 'Regularity', ascending=False, na_position='last')
+            st.dataframe(reg_df, width='stretch', hide_index=True)
         # ── Per-variable role table ───────────────────────────────────────────
         st.markdown('#### Per-variable role table')
         if role_rows:
             role_df = pd.DataFrame(role_rows)
+            st.dataframe(role_df, width='stretch', hide_index=True)
             st.download_button(
                 '⬇️ Download per-variable role CSV',
                 data=role_df.to_csv(index=False).encode('utf-8'),
                     'Reasons':       ', '.join(f'{k}:{v}' for k, v in
                                                 (a.get('summary', {}) or {}).items()),
                 })
+            st.dataframe(pd.DataFrame(sum_rows), width='stretch',
                           hide_index=True)
             # Drill-down per group
                         })
                     if row_rows:
                         st.dataframe(pd.DataFrame(row_rows),
+                                      width='stretch', hide_index=True)
                         # Download as CSV for offline analysis
                         csv_bytes = pd.DataFrame(row_rows).to_csv(index=False).encode('utf-8')
                         st.download_button(
                               & (prov_df['LLM proposed'].astype(str).str.len() > 0)]
                 if len(rej):
                     st.dataframe(rej[['Node', 'LLM proposed', 'LLM reason']],
+                                  width='stretch', hide_index=True)
         # ── Full provenance table ─────────────────────────────────────────────
         st.write('**Full per-node provenance**')
+        st.dataframe(prov_df, width='stretch', hide_index=True)
 with tabs[4]:
     if can is not None:
         st.dataframe(can.drop(columns=['_row'], errors='ignore'),
+                     width='stretch')
 with tabs[5]:
     # ── derive a per-CSV base name from the uploaded files ────────────────────
                 data=json.dumps(nodes, indent=2, ensure_ascii=False).encode(),
                 file_name=f'{csv_basis}_approach2_lod.json',
                 mime='application/json',
+                width='stretch',
             )
     with col2:
         if can is not None:
                 data=can.to_csv(index=False).encode('utf-8'),
                 file_name=f'{csv_basis}_approach2_canonical.csv',
                 mime='text/csv',
+                width='stretch',
             )
     st.divider()
         'dataset name — convenient for `evaluate_all.py`.'
     )
     if st.button('💾 Save all to outputs/approach_2/', type='primary',
+                 width='stretch'):
         try:
             _out_dir.mkdir(parents=True, exist_ok=True)
             saved = []

pages/1_Baseline.py → views/run_baseline.py RENAMED Viewed

@@ -40,7 +40,7 @@ from sklearn.preprocessing import LabelEncoder
 warnings.filterwarnings('ignore')
-st.set_page_config(page_title='Metadata Hierarchy — Baseline', page_icon='🌿', layout='wide')
 st.title('Metadata Hierarchy Builder — Baseline (Taxonomizer)')
 st.caption(
     'Pure Taxonomizer baseline: TF-IDF text objects + recursive agglomerative '
@@ -562,11 +562,11 @@ with st.spinner('Loading file…'):
 st.subheader('Step 1 — File preview')
 with st.expander(f'📄 {uploaded.name}  ({len(df):,} rows, {len(df.columns)} columns)',
                  expanded=False):
-    st.dataframe(df.head(10), use_container_width=True)
     score_cols = [c for c in ['column', 'leaf_score', 'group_score', 'text_score', 'metadata_score']
                   if c in prof.columns]
     st.dataframe(prof[score_cols].sort_values('leaf_score', ascending=False),
-                 use_container_width=True)
 st.subheader('Step 2 — Confirm column roles')
 cols = list(df.columns)
@@ -639,11 +639,11 @@ c4.metric('Avg branching', _sm['avg_branching_factor'])
 tabs = st.tabs(['Sunburst', 'Treemap', 'Node detail', 'Canonical table', 'Export', '📊 Evaluation'])
 with tabs[0]:
-    st.plotly_chart(plot_sunburst(nodes, max_depth=display_depth), use_container_width=True)
     st.caption('Green = Baseline. Click a sector to drill down; click the centre to go back.')
 with tabs[1]:
-    st.plotly_chart(plot_treemap(nodes), use_container_width=True)
 with tabs[2]:
     nm = _nmap(nodes)
@@ -661,10 +661,10 @@ with tabs[2]:
             sub = can[can['_leaf_id'].isin(leaf_ids_set)]
             st.write(f'**{len(lids)} variables** under "{sel_node["name"]}"')
             st.dataframe(sub[['_leaf_label', '_group_path', '_text']].reset_index(drop=True),
-                         use_container_width=True)
 with tabs[3]:
-    st.dataframe(can, use_container_width=True)
 with tabs[4]:
     _base = safe_name(project_name)
@@ -675,7 +675,7 @@ with tabs[4]:
             data=json.dumps(nodes, indent=2, ensure_ascii=False).encode('utf-8'),
             file_name=f'{_base}_baseline_hierarchy.json',
             mime='application/json',
-            use_container_width=True,
         )
     with col2:
         st.download_button(
@@ -683,7 +683,7 @@ with tabs[4]:
             data=can.to_csv(index=False).encode('utf-8'),
             file_name=f'{_base}_baseline_canonical.csv',
             mime='text/csv',
-            use_container_width=True,
         )
     st.divider()
@@ -696,7 +696,7 @@ with tabs[4]:
         'dataset name — convenient for `evaluate_all.py`.'
     )
     if st.button('💾 Save all to outputs/baseline/', type='primary',
-                 use_container_width=True):
         try:
             _out_dir.mkdir(parents=True, exist_ok=True)
             (_out_dir / f'{_base}_baseline_hierarchy.json').write_text(

 warnings.filterwarnings('ignore')
+# set_page_config handled by the navigation router (demo.py)
 st.title('Metadata Hierarchy Builder — Baseline (Taxonomizer)')
 st.caption(
     'Pure Taxonomizer baseline: TF-IDF text objects + recursive agglomerative '
 st.subheader('Step 1 — File preview')
 with st.expander(f'📄 {uploaded.name}  ({len(df):,} rows, {len(df.columns)} columns)',
                  expanded=False):
+    st.dataframe(df.head(10), width='stretch')
     score_cols = [c for c in ['column', 'leaf_score', 'group_score', 'text_score', 'metadata_score']
                   if c in prof.columns]
     st.dataframe(prof[score_cols].sort_values('leaf_score', ascending=False),
+                 width='stretch')
 st.subheader('Step 2 — Confirm column roles')
 cols = list(df.columns)
 tabs = st.tabs(['Sunburst', 'Treemap', 'Node detail', 'Canonical table', 'Export', '📊 Evaluation'])
 with tabs[0]:
+    st.plotly_chart(plot_sunburst(nodes, max_depth=display_depth), width='stretch')
     st.caption('Green = Baseline. Click a sector to drill down; click the centre to go back.')
 with tabs[1]:
+    st.plotly_chart(plot_treemap(nodes), width='stretch')
 with tabs[2]:
     nm = _nmap(nodes)
             sub = can[can['_leaf_id'].isin(leaf_ids_set)]
             st.write(f'**{len(lids)} variables** under "{sel_node["name"]}"')
             st.dataframe(sub[['_leaf_label', '_group_path', '_text']].reset_index(drop=True),
+                         width='stretch')
 with tabs[3]:
+    st.dataframe(can, width='stretch')
 with tabs[4]:
     _base = safe_name(project_name)
             data=json.dumps(nodes, indent=2, ensure_ascii=False).encode('utf-8'),
             file_name=f'{_base}_baseline_hierarchy.json',
             mime='application/json',
+            width='stretch',
         )
     with col2:
         st.download_button(
             data=can.to_csv(index=False).encode('utf-8'),
             file_name=f'{_base}_baseline_canonical.csv',
             mime='text/csv',
+            width='stretch',
         )
     st.divider()
         'dataset name — convenient for `evaluate_all.py`.'
     )
     if st.button('💾 Save all to outputs/baseline/', type='primary',
+                 width='stretch'):
         try:
             _out_dir.mkdir(parents=True, exist_ok=True)
             (_out_dir / f'{_base}_baseline_hierarchy.json').write_text(

views/viewer.py ADDED Viewed

	@@ -0,0 +1,562 @@

+"""
+Metadata Hierarchy Explorer — TFM 2026
+Pre-built results viewer for Baseline, Approach 1, and Approach 2.
+Rendering faithfully replicates each app's display pipeline:
+  - Baseline    : raw tree, Greens, Sunburst + Treemap
+  - Approach 1  : raw tree, Blues,  Sunburst + Treemap + Node-link + Facets
+  - Approach 2  : compress one-child chains, Viridis, Sunburst + Treemap + Node-link
+Level-of-Detail controls (depth, leaf labels, hidden nodes, compress chains)
+match the controls in the individual apps.
+"""
+from __future__ import annotations
+import json
+from collections import defaultdict
+from pathlib import Path
+import numpy as np
+import plotly.graph_objects as go
+import streamlit as st
+# Page config is set by the navigation router (demo.py).
+ROOT = Path(__file__).resolve().parent.parent / "outputs"
+DEFAULT_DEPTH = 7
+# ─────────────────────────────────────────────────────────────────────────────
+# PRE-BUILT OUTPUT PATHS
+# ─────────────────────────────────────────────────────────────────────────────
+PREBUILT = {
+    "Baseline": {
+        "AI-MIND": {"hierarchy": ROOT / "baseline" / "ai-mind-variable-descriptions_in__baseline_hierarchy.json"},
+        "HCP":     {"hierarchy": ROOT / "baseline" / "HCP_S1200_DataDictionary_Oct_30_2023_baseline_hierarchy.json"},
+    },
+    "Approach 1": {
+        "AI-MIND": {
+            "hierarchy": ROOT / "approach_1" / "ai-mind-variable-descriptions_in__approach1_hierarchy.json",
+            "facets":    ROOT / "approach_1" / "ai-mind-variable-descriptions_in__approach1_facets.json",
+        },
+        "HCP": {
+            "hierarchy": ROOT / "approach_1" / "HCP_S1200_DataDictionary_Oct_30_2023_approach1_hierarchy.json",
+            "facets":    ROOT / "approach_1" / "HCP_S1200_DataDictionary_Oct_30_2023_approach1_facets.json",
+        },
+    },
+    "Approach 2": {
+        "AI-MIND": {"hierarchy": ROOT / "approach_2" / "ai-mind-variable-descriptions_in__approach2_lod.json"},
+        "HCP":     {"hierarchy": ROOT / "approach_2" / "HCP_S1200_DataDictionary_Oct_30_2023_approach2_lod.json"},
+    },
+}
+# Per-approach rendering config (matches each source app)
+CONFIG = {
+    "Baseline":   {"color": "Greens",  "compress": False, "node_link": False},
+    "Approach 1": {"color": "Blues",   "compress": False, "node_link": True},
+    "Approach 2": {"color": "Viridis", "compress": True,  "node_link": True},
+}
+APPROACH_DESC = {
+    "Baseline": (
+        "Pure clustering baseline — TF-IDF representation + recursive agglomerative "
+        "(cosine) clustering, number of clusters chosen by silhouette. No external APIs, "
+        "no neural embeddings. Node labels are the most discriminative terms per cluster."
+    ),
+    "Approach 1": (
+        "Global embedding pipeline — SBERT + N×M concept-table alignment (Gonçalves 2019) "
+        "+ HiExpan refinement (Shen et al. KDD 2018) + Castanet parallel facets. Optionally "
+        "retrieves concept context from Wikidata / Wikipedia / WordNet / BioPortal."
+    ),
+    "Approach 2": (
+        "Dataset-constrained multi-aspect hierarchy — group-anchored L1/L2 → phrase-slot "
+        "mining → FASTopic semantic aspect discovery (Wu et al. NeurIPS 2024) → GMM/KMeans "
+        "clustering → deterministic 5-stage label generation. Optional local-LLM refinement."
+    ),
+}
+# ─────────────────────────────────────────────────────────────────────────────
+# TREE TRANSFORMS  (copied from approach_2.py — display-only, exact behaviour)
+# ─────────────────────────────────────────────────────────────────────────────
+def _filter_dissolved(nodes: list) -> list:
+    drop_ids = {int(n["id"]) for n in nodes
+                if n.get("type") == "dissolved" or n.get("isShown") is False}
+    if not drop_ids:
+        return nodes
+    out = []
+    for n in nodes:
+        if int(n["id"]) in drop_ids:
+            continue
+        m = dict(n)
+        m["related"] = [int(c) for c in n.get("related", []) if int(c) not in drop_ids]
+        out.append(m)
+    return out
+def compress_one_child_chains(nodes: list) -> list:
+    """Collapse chains where an aggregation node has exactly one aggregation child
+    (e.g. 'DMS → DMS Recommended Standard' becomes 'DMS / DMS Recommended Standard')."""
+    nodes = _filter_dissolved(nodes)
+    nm = {int(n["id"]): dict(n) for n in nodes}
+    def _is_chain_link(n):
+        if n.get("type") != "aggregation":
+            return False
+        children = n.get("related", [])
+        return (len(children) == 1
+                and nm.get(int(children[0]), {}).get("type") == "aggregation")
+    changed = True
+    while changed:
+        changed = False
+        for nid, n in list(nm.items()):
+            if _is_chain_link(n):
+                child_id = int(n["related"][0])
+                child = nm[child_id]
+                new_node = dict(child)
+                new_node["id"] = nid
+                new_node["name"] = f"{n['name']} / {child['name']}"
+                new_node["desc"] = f"{n.get('desc', '')} | {child.get('desc', '')}"
+                nm[nid] = new_node
+                if child_id in nm:
+                    del nm[child_id]
+                for other in nm.values():
+                    other["related"] = [nid if int(c) == child_id else int(c)
+                                        for c in other.get("related", [])]
+                changed = True
+                break
+    return list(nm.values())
+# ─────────────────────────────────────────────────────────────────────────────
+# RENDER HELPERS  (DAG-safe value map — copied from approach_2.py)
+# ─────────────────────────────────────────────────────────────────────────────
+def _leaf_ids(nodes: list, nid: int) -> list:
+    m = {int(n["id"]): n for n in nodes}
+    out = []
+    def rec(x):
+        n = m.get(int(x))
+        if not n:
+            return
+        if n.get("type") == "attribute":
+            out.append(int(x)); return
+        for c in n.get("related", []):
+            rec(int(c))
+    rec(nid)
+    return list(dict.fromkeys(out))
+def _parent_map(nodes: list) -> dict:
+    pm = {}
+    for n in nodes:
+        for c in n.get("related", []):
+            if int(c) not in pm:
+                pm[int(c)] = int(n["id"])
+    return pm
+def _tree_value_map(nodes: list, pm: dict) -> dict:
+    kids = {}
+    for child, par in pm.items():
+        kids.setdefault(int(par), []).append(int(child))
+    nodemap = {int(n["id"]): n for n in nodes}
+    memo = {}
+    def count(nid: int) -> int:
+        if nid in memo:
+            return memo[nid]
+        memo[nid] = 1
+        n = nodemap.get(nid)
+        if n is not None and n.get("type") == "attribute":
+            memo[nid] = 1
+            return 1
+        ch = kids.get(nid, [])
+        v = sum(count(c) for c in ch) if ch else 1
+        memo[nid] = max(1, v)
+        return memo[nid]
+    return {nid: count(nid) for nid in nodemap}
+def _wrap_hover(text: str, width: int = 80) -> str:
+    import textwrap as _tw
+    s = str(text or "")
+    if not s:
+        return ""
+    lines = []
+    for raw_line in s.split("\n"):
+        lines.extend(_tw.wrap(raw_line, width=width) or [""])
+    return "<br>".join(lines)
+def plot_sunburst(nodes: list, color: str, max_depth: int = DEFAULT_DEPTH):
+    nodes = _filter_dissolved(nodes)
+    pm = _parent_map(nodes)
+    vm = _tree_value_map(nodes, pm)
+    ids, labels, parents, values, hover = [], [], [], [], []
+    for n in nodes:
+        nid = int(n["id"])
+        lc = len(_leaf_ids(nodes, nid))
+        ids.append(str(nid))
+        labels.append(str(n.get("name", ""))[:40])
+        parents.append("" if nid == 0 else str(pm.get(nid, 0)))
+        values.append(vm.get(nid, 1))
+        hover.append(f"<b>{n.get('name', '')}</b><br>Type: {n.get('type', '')}<br>"
+                     f"Variables: {lc}<br><br>{_wrap_hover(n.get('desc', ''))}")
+    fig = go.Figure(go.Sunburst(
+        ids=ids, labels=labels, parents=parents, values=values,
+        branchvalues="total", hovertext=hover, hoverinfo="text",
+        maxdepth=max_depth, insidetextorientation="radial",
+        marker=dict(colorscale=color, line=dict(width=1, color="white"))))
+    fig.update_layout(height=700, margin=dict(l=10, r=10, t=40, b=10),
+                      title=dict(text="Click sector to drill down — click centre to go back",
+                                 font=dict(size=13), x=0.5))
+    return fig
+def plot_treemap(nodes: list, color: str, max_depth: int = DEFAULT_DEPTH):
+    nodes = _filter_dissolved(nodes)
+    pm = _parent_map(nodes)
+    vm = _tree_value_map(nodes, pm)
+    ids, labels, parents, values, hover = [], [], [], [], []
+    for n in nodes:
+        nid = int(n["id"])
+        lc = len(_leaf_ids(nodes, nid))
+        ids.append(str(nid))
+        labels.append(str(n.get("name", ""))[:40])
+        parents.append("" if nid == 0 else str(pm.get(nid, 0)))
+        values.append(vm.get(nid, 1))
+        hover.append(f"<b>{n.get('name', '')}</b><br>Variables: {lc}<br>"
+                     f"{_wrap_hover(n.get('desc', ''))}")
+    fig = go.Figure(go.Treemap(
+        ids=ids, labels=labels, parents=parents, values=values,
+        branchvalues="total", hovertext=hover, hoverinfo="text",
+        textinfo="label+value", maxdepth=max_depth,
+        marker=dict(colorscale=color, line=dict(width=1, color="white"))))
+    fig.update_layout(height=700, margin=dict(l=10, r=10, t=10, b=10))
+    return fig
+# ─────────────────────────────────────────────────────────────────────────────
+# NODE-LINK TREE  (Reingold-Tilford layout — copied from approach_2.py)
+# ─────────────────────────────────────────────────────────────────────────────
+def _node_color(n: dict) -> str:
+    t = n.get("type", "")
+    if t == "root":      return "#c44e52"
+    if t == "attribute": return "#4C72B0"
+    if t == "collapsed": return "#bbbbbb"
+    return "#8C8C8C"
+def _display_graph(nodes: list, max_depth: int, show_hidden: bool):
+    m = {int(n["id"]): n for n in nodes}
+    dnodes: dict = {}
+    edges: list = []
+    counter = 10 ** 9
+    def rec(nid, depth):
+        nonlocal counter
+        n = m.get(int(nid))
+        if not n:
+            return
+        if not show_hidden and n.get("isShown") is False and depth > 0:
+            return
+        dnodes[int(nid)] = n
+        if depth >= max_depth and n.get("related"):
+            counter += 1
+            cid = counter
+            n_leaves = len(_leaf_ids(nodes, nid))
+            dnodes[cid] = {"id": cid, "name": f"… {n_leaves} variables",
+                           "type": "collapsed", "related": [],
+                           "desc": f"Collapsed: {n.get('name')}"}
+            edges.append((int(nid), cid))
+            return
+        for c in n.get("related", []):
+            ch = m.get(int(c))
+            if not ch:
+                continue
+            if not show_hidden and ch.get("isShown") is False:
+                continue
+            edges.append((int(nid), int(c)))
+            rec(int(c), depth + 1)
+    rec(0, 0)
+    return list(dnodes.values()), edges
+def _positions(edges: list):
+    H_SCALE, V_SPACE = 3.0, 1.8
+    children: dict = defaultdict(list)
+    for p, c in edges:
+        children[p].append(c)
+    pos: dict = {}
+    counter = {"v": 0}
+    def rec(nid, depth):
+        ch = children.get(nid, [])
+        if not ch:
+            y_pos = counter["v"] * V_SPACE
+            counter["v"] += 1
+            pos[nid] = (depth * H_SCALE, y_pos)
+            return y_pos
+        child_ys = [rec(c, depth + 1) for c in ch]
+        y_pos = float(np.mean(child_ys))
+        pos[nid] = (depth * H_SCALE, y_pos)
+        return y_pos
+    rec(0, 0)
+    return pos
+def plot_node_link(nodes: list, max_depth: int, show_hidden: bool, show_leaf_labels: bool):
+    nodes = _filter_dissolved(nodes)
+    dnodes, edges = _display_graph(nodes, max_depth, show_hidden)
+    pos = _positions(edges)
+    ex, ey = [], []
+    for p, c in edges:
+        if p not in pos or c not in pos:
+            continue
+        x0, y0 = pos[p]
+        x1, y1 = pos[c]
+        xm = (x0 + x1) / 2
+        ex += [x0, xm, xm, x1, None]
+        ey += [y0, y0, y1, y1, None]
+    traces = [go.Scatter(x=ex, y=ey, mode="lines",
+                         line=dict(width=1, color="#c8c8c8"),
+                         hoverinfo="skip", showlegend=False)]
+    agg_x, agg_y, agg_lab, agg_col, agg_hov = [], [], [], [], []
+    lf_x, lf_y, lf_lab, lf_col, lf_hov = [], [], [], [], []
+    for n in dnodes:
+        nid = int(n["id"])
+        if nid not in pos:
+            continue
+        x, y = pos[nid]
+        lc = len(_leaf_ids(nodes, nid))
+        lab = str(n.get("name", ""))[:32]
+        hov = (f"<b>{n.get('name', '')}</b><br>Type: {n.get('type', '')}<br>"
+               f"Variables: {lc}")
+        if n.get("type") == "attribute":
+            lf_x.append(x); lf_y.append(y); lf_col.append(_node_color(n))
+            lf_lab.append(lab if show_leaf_labels else "")
+            lf_hov.append(hov)
+        else:
+            agg_x.append(x); agg_y.append(y); agg_col.append(_node_color(n))
+            agg_lab.append(lab); agg_hov.append(hov)
+    traces.append(go.Scatter(
+        x=lf_x, y=lf_y, mode="markers+text" if show_leaf_labels else "markers",
+        text=lf_lab, textposition="middle right", textfont=dict(size=9),
+        marker=dict(size=7, color=lf_col, line=dict(width=0.5, color="white")),
+        hovertext=lf_hov, hoverinfo="text", showlegend=False))
+    traces.append(go.Scatter(
+        x=agg_x, y=agg_y, mode="markers+text", text=agg_lab,
+        textposition="middle right", textfont=dict(size=10),
+        marker=dict(size=13, color=agg_col, line=dict(width=1, color="white")),
+        hovertext=agg_hov, hoverinfo="text", showlegend=False))
+    n_rows = max(len(lf_y), len(agg_y), 1)
+    fig = go.Figure(traces)
+    fig.update_layout(
+        height=max(600, n_rows * 16),
+        margin=dict(l=10, r=140, t=10, b=10),
+        xaxis=dict(visible=False), yaxis=dict(visible=False),
+        plot_bgcolor="white",
+    )
+    return fig
+# ─────────────────────────────────────────────────────────────────────────────
+# STATS / SAFE RENDERING
+# ─────────────────────────────────────────────────────────────────────────────
+def _tree_depth(nodes: list) -> int:
+    """Max depth of the rendered single-parent tree (root = depth 0)."""
+    nodes = _filter_dissolved(nodes)
+    m = {int(n["id"]): n for n in nodes}
+    best = {"d": 0}
+    def rec(nid, d):
+        best["d"] = max(best["d"], d)
+        for c in m.get(int(nid), {}).get("related", []):
+            if int(c) in m:
+                rec(int(c), d + 1)
+    rec(0, 0)
+    return best["d"]
+def safe_render_depth(nodes: list, requested: int) -> int:
+    """Plotly sunburst/treemap silently blank when asked to draw too many sectors
+    at once (large hierarchies like HCP). Cap the *initial* render depth — the
+    chart stays fully drillable by clicking, so no data is lost."""
+    n = len(_filter_dissolved(nodes))
+    if n > 400:
+        return min(requested, 3)
+    if n > 150:
+        return min(requested, 4)
+    return requested
+# ─────────────────────────────────────────────────────────────────────────────
+# IO
+# ─────────────────────────────────────────────────────────────────────────────
+@st.cache_data(show_spinner=False)
+def _load_json(path_str: str):
+    with open(path_str, encoding="utf-8") as f:
+        return json.load(f)
+def _read_bytes(path_str: str) -> bytes:
+    with open(path_str, "rb") as f:
+        return f.read()
+@st.cache_data(show_spinner=False)
+def _outputs_zip(root_str: str) -> bytes:
+    """Zip the entire bundled outputs/ folder for one-click download."""
+    import io, zipfile
+    root = Path(root_str)
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, "w", zipfile.ZIP_DEFLATED) as zf:
+        for p in sorted(root.rglob("*")):
+            if p.is_file():
+                zf.write(p, arcname=p.relative_to(root.parent).as_posix())
+    return buf.getvalue()
+def count_nodes(nodes: list) -> tuple[int, int]:
+    nodes = _filter_dissolved(nodes)
+    leaves = sum(1 for n in nodes if n.get("type") == "attribute")
+    aggs = sum(1 for n in nodes if n.get("type") == "aggregation")
+    return leaves, aggs
+def concept_aligned_pct(nodes: list) -> float | None:
+    """% of aggregation nodes that carry a concept/provenance label (Approach 1)."""
+    aggs = [n for n in _filter_dissolved(nodes) if n.get("type") == "aggregation"]
+    if not aggs:
+        return None
+    aligned = sum(1 for n in aggs
+                  if n.get("provenance") or n.get("concept") or n.get("source_evidence"))
+    return 100.0 * aligned / len(aggs) if aligned else None
+# ─────────────────────────────────────────────────────────────────────────────
+# SIDEBAR
+# ─────────────────────────────────────────────────────────────────────────────
+with st.sidebar:
+    approach = st.radio("**Select Approach**",
+                        ["Baseline", "Approach 1", "Approach 2"], index=0)
+    dataset = st.radio("**Select Dataset**", ["AI-MIND", "HCP"], index=0)
+    st.markdown("---")
+    st.caption("Results are pre-built from the thesis experiments. To run on your "
+               "own data, clone the repository and run the individual apps.")
+    st.markdown("[📦 GitHub Repository]"
+                "(https://github.com/RoophaSharon/tfm_metadata_hierarchy_2026)")
+# ─────────────────────────────────────────────────────────────────────────────
+# MAIN
+# ─────────────────────────────────────────────────────────────────────────────
+cfg = CONFIG[approach]
+color = cfg["color"]
+st.title(f"📊 {approach} — {dataset} Dataset")
+st.markdown(f"> {APPROACH_DESC[approach]}")
+paths = PREBUILT[approach][dataset]
+hier_path = paths.get("hierarchy")
+if hier_path is None or not hier_path.exists():
+    st.error(f"Pre-built result not found: `{hier_path}`")
+    st.stop()
+raw_nodes = _load_json(str(hier_path))
+leaves, aggs = count_nodes(raw_nodes)
+c1, c2, c3 = st.columns(3)
+c1.metric("Leaf Variables", leaves)
+c2.metric("Aggregation Nodes", aggs)
+c3.metric("Total Nodes", leaves + aggs)
+# ── Build summary (collapsed) ────────────────────────────────────────────────
+facet_path = paths.get("facets")
+n_facets = None
+if facet_path is not None and facet_path.exists():
+    try:
+        n_facets = len(_load_json(str(facet_path)))
+    except Exception:
+        n_facets = None
+with st.expander("ℹ️ Build summary", expanded=False):
+    bs1, bs2, bs3, bs4 = st.columns(4)
+    bs1.metric("Variables", leaves)
+    bs2.metric("Internal nodes", aggs)
+    bs3.metric("Tree depth", _tree_depth(raw_nodes))
+    bs4.metric("Facets", n_facets if n_facets is not None else "—")
+    pct = concept_aligned_pct(raw_nodes)
+    if pct is not None:
+        st.caption(f"Concept-aligned aggregation nodes: **{pct:.1f}%**")
+    st.caption(
+        f"Source file: `{hier_path.name}` · "
+        f"Approach: **{approach}** · Dataset: **{dataset}**. "
+        "Tree topology and labels are reproduced exactly from the pre-built "
+        "thesis output (the algorithms are not re-run in this viewer)."
+    )
+# ── Downloads ────────────────────────────────────────────────────────────────
+d1, d2, d3 = st.columns(3)
+with d1:
+    st.download_button("⬇️ Hierarchy JSON", data=_read_bytes(str(hier_path)),
+                       file_name=hier_path.name, mime="application/json",
+                       width='stretch')
+with d2:
+    if facet_path is not None and facet_path.exists():
+        st.download_button("⬇️ Facets JSON", data=_read_bytes(str(facet_path)),
+                           file_name=facet_path.name, mime="application/json",
+                           width='stretch')
+    else:
+        st.button("⬇️ Facets JSON", disabled=True, width='stretch',
+                  help="This approach/dataset has no facet tree.")
+with d3:
+    st.download_button("⬇️ All outputs (ZIP)", data=_outputs_zip(str(ROOT)),
+                       file_name="metadata_hierarchy_outputs.zip",
+                       mime="application/zip", width='stretch')
+st.markdown("---")
+# ── Level-of-Detail controls (above chart — matches the apps) ────────────────
+view_options = ["Sunburst (drill-down)", "Treemap"]
+if cfg["node_link"]:
+    view_options.append("Node-link tree")
+if cfg["compress"]:
+    vc1, vc2, vc3, vc4, vc5 = st.columns([2.4, 2, 1, 1, 1.2])
+else:
+    vc1, vc2, vc3, vc4 = st.columns([2.4, 2, 1, 1])
+    vc5 = None
+with vc1:
+    viz_mode = st.radio("View mode", view_options, horizontal=True, index=0,
+                        help="Sunburst best for large hierarchies [Taxonomizer]. "
+                             "Node-link best for moderate-depth structure inspection.")
+with vc2:
+    depth = st.slider("Depth (Level of Detail)", 1, 9, DEFAULT_DEPTH, 1,
+                      help="Maximum tree levels shown. Set high to see the whole "
+                           "hierarchy, lower to peel back to the interior.")
+with vc3:
+    show_leaf_labels = st.checkbox("Leaf labels", value=False)
+with vc4:
+    show_hidden = st.checkbox("Hidden nodes", value=False)
+if vc5 is not None:
+    with vc5:
+        compress_chains = st.checkbox("Compress chains", value=True,
+                                      help="Merge one-child aggregation chains "
+                                           '(e.g. "DMS → DMS Recommended Standard") for '
+                                           "display. Export JSON keeps original structure.")
+else:
+    compress_chains = False
+st.divider()
+display_nodes = compress_one_child_chains(raw_nodes) if compress_chains else raw_nodes
+if viz_mode == "Sunburst (drill-down)":
+    st.plotly_chart(plot_sunburst(display_nodes, color, depth), width='stretch')
+elif viz_mode == "Treemap":
+    st.plotly_chart(plot_treemap(display_nodes, color, depth), width='stretch')
+else:
+    st.plotly_chart(plot_node_link(display_nodes, depth, show_hidden, show_leaf_labels),
+                    width='stretch')
+# ── Facets (Approach 1 only) ─────────────────────────────────────────────────
+if facet_path is not None and facet_path.exists():
+    st.markdown("---")
+    st.subheader("🔀 Parallel facets")
+    facets = _load_json(str(facet_path))
+    names = list(facets.keys())
+    if not names:
+        st.info("No facets available for this dataset.")
+    else:
+        sel = st.selectbox("Select facet", names)
+        fnodes = facets[sel]
+        ft1, ft2 = st.tabs(["Sunburst", "Treemap"])
+        with ft1:
+            st.plotly_chart(plot_sunburst(fnodes, color, depth), width='stretch')
+        with ft2:
+            st.plotly_chart(plot_treemap(fnodes, color), width='stretch')