""" AI Inference TCO Calculator By Julien Simon | AI Operating Partner, Fortino Capital Pricing as of March 22, 2026 """ import gradio as gr import plotly.graph_objects as go import pandas as pd from models import MODEL_LIBRARY, API_MODELS from gpus import GPU_LIBRARY, GPU_PROVIDERS # ═════════════════════════════════════════════════════════════════════════════ # HELPERS # ═════════════════════════════════════════════════════════════════════════════ def sf(val, default=0): """Safe float conversion.""" try: return float(val) if val is not None else default except (ValueError, TypeError): return default def fmt_c(val, decimals=2): """Format as currency string.""" return f"${val:,.{decimals}f}" def fmt_n(val, decimals=0): """Format number with commas.""" return f"{val:,.{decimals}f}" def fmt_p(val, decimals=1): """Format as percentage string.""" return f"{val * 100:.{decimals}f}%" def _card(label, value, style="default"): """Return an HTML metric card using CSS classes for theme compatibility.""" return ( f'
' f'
{label}
' f'
{value}
' ) def _cards_row(*cards): """Wrap card HTML strings in a flex row.""" return f'
{"".join(cards)}
' def get_model_prices(model_name): """Return (input_price, output_price) from model library.""" if model_name and model_name in MODEL_LIBRARY: m = MODEL_LIBRARY[model_name] if m["input"] is not None: return float(m["input"]), float(m["output"]) return 0.0, 0.0 def get_gpu_instances(provider): """Return instance choices for a given provider.""" if not provider or provider == "(Custom)": return gr.update(choices=["(Custom)"], value="(Custom)") instances = [k for k, v in GPU_LIBRARY.items() if v["provider"] == provider] return gr.update(choices=instances, value=instances[0] if instances else "(Custom)") def get_gpu_price(instance_name): """Return hourly cost from GPU library, or leave unchanged for Custom.""" if instance_name and instance_name in GPU_LIBRARY: return float(GPU_LIBRARY[instance_name]["cost_hr"]) return gr.update() # ═════════════════════════════════════════════════════════════════════════════ # CALCULATION FUNCTIONS # ═════════════════════════════════════════════════════════════════════════════ def calc_usage(input_tpr, output_tpr, req_day, days_year): input_day = input_tpr * req_day output_day = output_tpr * req_day return { "input_day": input_day, "output_day": output_day, "total_day": input_day + output_day, "input_year_M": input_day * days_year / 1e6, "output_year_M": output_day * days_year / 1e6, } def calc_api(name, in_price, out_price, in_year_M, out_year_M, req_day, days_year): a_in = in_year_M * in_price a_out = out_year_M * out_price total = a_in + a_out total_req = req_day * days_year return { "name": name, "in_price": in_price, "out_price": out_price, "in_M": in_year_M, "out_M": out_year_M, "a_in": a_in, "a_out": a_out, "total": total, "monthly": total / 12, "per_1k": (total / total_req * 1000) if total_req > 0 else 0, } def calc_smart_routing(providers): """60% cheapest, 40% 2nd cheapest among non-zero providers.""" valid = [p for p in providers if p["total"] > 0] if not valid: return {"annual": 0, "monthly": 0, "savings": 0} valid.sort(key=lambda x: x["total"]) if len(valid) == 1: blended = valid[0]["total"] else: blended = 0.6 * valid[0]["total"] + 0.4 * valid[1]["total"] avg = sum(p["total"] for p in valid) / len(valid) return { "annual": blended, "monthly": blended / 12, "savings": 1 - blended / avg if avg > 0 else 0, } def calc_self_hosted(gpu_cost_hr, num_gpus, hours_day, days_year, throughput, utilization, sw_cost, net_cost, total_day, total_year_M): gpu = gpu_cost_hr * num_gpus * hours_day * days_year total = gpu + sw_cost + net_cost max_tok = throughput * num_gpus * 3600 * hours_day * utilization / 100 headroom = ((max_tok - total_day) / total_day) if total_day > 0 else float("inf") return { "gpu": gpu, "sw": sw_cost, "net": net_cost, "total": total, "monthly": total / 12, "max_tok": max_tok, "headroom": headroom, "cost_per_M": total / total_year_M if total_year_M > 0 else 0, } def calc_local(hw_cost, num_dev, lifetime, watts, elec_rate, hours_day, days_year, throughput, it_support, total_day, total_year_M): hw_a = (hw_cost * num_dev / lifetime) if lifetime > 0 else 0 elec = watts * num_dev * hours_day / 1000 * days_year * elec_rate sw = 500 total = hw_a + elec + it_support + sw max_tok = throughput * 3600 * hours_day * num_dev headroom = ((max_tok - total_day) / total_day) if total_day > 0 else float("inf") return { "hw_a": hw_a, "elec": elec, "it": it_support, "sw": sw, "total": total, "monthly": total / 12, "max_tok": max_tok, "headroom": headroom, "cost_per_M": total / total_year_M if total_year_M > 0 else 0, } # ═════════════════════════════════════════════════════════════════════════════ # CHART FUNCTIONS # ═════════════════════════════════════════════════════════════════════════════ COLORS = { "navy": "#1e3a5f", "blue": "#2563eb", "blue_light": "#93c5fd", "blue_muted": "#60a5fa", "indigo": "#4f46e5", "violet": "#7c3aed", "violet_light": "#a78bfa", "amber": "#d97706", "amber_light": "#fbbf24", "emerald": "#059669", "emerald_light": "#34d399", "slate": "#475569", "slate_light": "#94a3b8", "teal": "#0d9488", } CHART_LAYOUT = dict( template="none", height=420, margin=dict(t=56, b=44, l=56, r=28), font=dict(family="Inter, system-ui, sans-serif", size=13, color="#1e293b"), title_font=dict(size=15, family="Inter, system-ui, sans-serif", color="#0f172a"), paper_bgcolor="rgba(0,0,0,0)", plot_bgcolor="rgba(0,0,0,0)", ) def chart_api(providers): names = [p["name"] for p in providers] fig = go.Figure(data=[ go.Bar( name="Input Cost", x=names, y=[p["a_in"] for p in providers], marker=dict(color=COLORS["blue_light"], line=dict(width=0)), text=[fmt_c(p["a_in"], 0) for p in providers], textposition="auto", textfont=dict(size=12, color="#1e293b"), insidetextfont=dict(size=12, color="#1e293b"), outsidetextfont=dict(size=12, color="#1e293b"), ), go.Bar( name="Output Cost", x=names, y=[p["a_out"] for p in providers], marker=dict(color=COLORS["blue"], line=dict(width=0)), text=[fmt_c(p["a_out"], 0) for p in providers], textposition="auto", textfont=dict(size=12, color="#ffffff"), insidetextfont=dict(size=12, color="#ffffff"), outsidetextfont=dict(size=12, color="#1e293b"), ), ]) fig.update_layout( barmode="group", title=dict(text="Annual API Costs by Provider"), yaxis=dict(title="Annual Cost ($)", gridcolor="rgba(128,128,128,0.2)", zeroline=False), xaxis=dict(tickfont=dict(size=12)), legend=dict(orientation="h", y=-0.18, x=0.5, xanchor="center", font=dict(size=12)), bargap=0.25, bargroupgap=0.08, **CHART_LAYOUT, ) return fig def chart_donut(labels, values, title, colors=None): if colors is None: colors = [COLORS["blue"], COLORS["amber"], COLORS["emerald"], COLORS["violet"], COLORS["slate"]] filtered = [(l, v, c) for l, v, c in zip(labels, values, colors) if v > 0] if not filtered: filtered = [(labels[0], 0.01, colors[0])] fl, fv, fc = zip(*filtered) fig = go.Figure(data=[go.Pie( labels=list(fl), values=list(fv), hole=0.55, marker=dict(colors=list(fc), line=dict(color="rgba(128,128,128,0.3)", width=1.5)), textinfo="label+percent", textposition="outside", textfont=dict(size=12, color="#1e293b"), hovertemplate="%{label}
$%{value:,.0f}
%{percent}", pull=[0.02] * len(fl), )]) fig.update_layout( title=dict(text=title), showlegend=False, **CHART_LAYOUT, ) return fig def chart_comparison_bars(categories, values, title): bar_colors = [COLORS["blue"], COLORS["violet"], COLORS["amber"], COLORS["emerald"]] fig = go.Figure(data=[go.Bar( x=categories, y=values, marker=dict(color=bar_colors, line=dict(width=0)), text=[fmt_c(v, 0) if v >= 100 else fmt_c(v, 2) for v in values], textposition="outside", textfont=dict(size=12, color="#1e293b"), )]) fig.update_layout( title=dict(text=title), yaxis=dict(title="Cost ($)", gridcolor="rgba(128,128,128,0.2)", zeroline=False), xaxis=dict(tickfont=dict(size=11)), bargap=0.35, **CHART_LAYOUT, ) return fig # ═════════════════════════════════════════════════════════════════════════════ # MASTER UPDATE FUNCTION # ═════════════════════════════════════════════════════════════════════════════ def master_update( input_tpr, output_tpr, req_day, days_year, model_1, price_in_1, price_out_1, model_2, price_in_2, price_out_2, model_3, price_in_3, price_out_3, model_4_name, price_in_4, price_out_4, gpu_cost_hr, num_gpus, gpu_util, gpu_hours, gpu_throughput, sh_sw_cost, sh_net_cost, hw_cost, num_dev, watts, elec_rate, hw_life, local_hours, local_throughput, it_support, ): # ── Safe conversions ── input_tpr = sf(input_tpr, 500) output_tpr = sf(output_tpr, 200) req_day = max(sf(req_day, 10000), 1) days_year = max(sf(days_year, 365), 1) price_in_1 = sf(price_in_1); price_out_1 = sf(price_out_1) price_in_2 = sf(price_in_2); price_out_2 = sf(price_out_2) price_in_3 = sf(price_in_3); price_out_3 = sf(price_out_3) price_in_4 = sf(price_in_4); price_out_4 = sf(price_out_4) gpu_cost_hr = sf(gpu_cost_hr, 2.5) num_gpus = sf(num_gpus, 1) gpu_util = sf(gpu_util, 70) gpu_hours = sf(gpu_hours, 24) gpu_throughput = sf(gpu_throughput, 2300) sh_sw_cost = sf(sh_sw_cost, 2000) sh_net_cost = sf(sh_net_cost, 3000) hw_cost = sf(hw_cost, 1999) num_dev = sf(num_dev, 1) watts = sf(watts, 575) elec_rate = sf(elec_rate, 0.12) hw_life = max(sf(hw_life, 3), 1) local_hours = sf(local_hours, 24) local_throughput = sf(local_throughput, 100) it_support = sf(it_support, 5000) # ── Usage ── u = calc_usage(input_tpr, output_tpr, req_day, days_year) total_year_M = u["input_year_M"] + u["output_year_M"] usage_md = ( _cards_row( _card("Input tokens / day", fmt_n(u["input_day"])), _card("Output tokens / day", fmt_n(u["output_day"])), _card("Total tokens / day", fmt_n(u["total_day"]), "highlight"), _card("Input tokens / year", f"{fmt_n(u['input_year_M'])}M"), _card("Output tokens / year", f"{fmt_n(u['output_year_M'])}M"), ) ) # ── API providers ── p1 = calc_api(model_1 or "Provider 1", price_in_1, price_out_1, u["input_year_M"], u["output_year_M"], req_day, days_year) p2 = calc_api(model_2 or "Provider 2", price_in_2, price_out_2, u["input_year_M"], u["output_year_M"], req_day, days_year) p3 = calc_api(model_3 or "Provider 3", price_in_3, price_out_3, u["input_year_M"], u["output_year_M"], req_day, days_year) p4 = calc_api(model_4_name or "Custom Provider", price_in_4, price_out_4, u["input_year_M"], u["output_year_M"], req_day, days_year) provs = [p1, p2, p3, p4] # Deduplicate provider names for DataFrame columns seen = {} for p in provs: n = p["name"] seen[n] = seen.get(n, 0) + 1 if seen[n] > 1: p["col"] = f"{n} ({seen[n]})" else: p["col"] = n # API cost table def api_col(p): return [ fmt_c(p["in_price"]), fmt_c(p["out_price"]), fmt_n(p["in_M"]), fmt_n(p["out_M"]), fmt_c(p["a_in"]), fmt_c(p["a_out"]), fmt_c(p["total"]), fmt_c(p["monthly"]), fmt_c(p["per_1k"], 3), ] api_df = pd.DataFrame({ "Metric": [ "Input price / 1M tokens ($)", "Output price / 1M tokens ($)", "Input tokens/year (M)", "Output tokens/year (M)", "Annual input cost ($)", "Annual output cost ($)", "Total annual cost ($)", "Monthly cost ($)", "Cost per 1K requests ($)", ], p1["col"]: api_col(p1), p2["col"]: api_col(p2), p3["col"]: api_col(p3), p4["col"]: api_col(p4), }) # Smart routing sr = calc_smart_routing(provs) smart_md = ( '
' '
Smart Routing Scenario
' '
' 'Route 60% to cheapest provider, 40% to 2nd cheapest (across all 4)
' + _cards_row( _card("Blended Annual Cost", fmt_c(sr["annual"]), "highlight"), _card("Monthly Blended", fmt_c(sr["monthly"])), _card("Savings vs. Average", fmt_p(sr["savings"]), "success"), ) + '
' ) api_fig = chart_api(provs) # ── Self-hosted GPU ── sh = calc_self_hosted( gpu_cost_hr, num_gpus, gpu_hours, days_year, gpu_throughput, gpu_util, sh_sw_cost, sh_net_cost, u["total_day"], total_year_M, ) sh_df = pd.DataFrame({ "Cost Component": [ "GPU compute cost", "Software licenses (est.)", "Data transfer / networking (est.)", ], "Annual Cost ($)": [ fmt_c(sh["gpu"]), fmt_c(sh["sw"]), fmt_c(sh["net"]), ], "Notes": [ "$/hr x GPUs x hrs/day x days/yr", "Inference framework, monitoring, etc.", "Egress, VPN, load balancing", ], }) headroom_str = fmt_p(sh["headroom"]) if sh["headroom"] != float("inf") and sh["headroom"] <= 1000 else "N/A" headroom_style = "warning" if sh["headroom"] < 0 else "default" headroom_warn = "" if sh["headroom"] < 0: headroom_warn = ( '
' 'Capacity insufficient — add more GPUs.
' ) sh_summary = ( _cards_row( _card("Total Annual Cost", fmt_c(sh["total"]), "highlight"), _card("Monthly Cost", fmt_c(sh["monthly"])), _card("Cost per 1M Tokens", fmt_c(sh["cost_per_M"])), ) + '
Capacity Analysis
' + _cards_row( _card("Max Tokens / Day", fmt_n(sh["max_tok"])), _card("Your Daily Need", fmt_n(u["total_day"])), _card("Capacity Headroom", headroom_str, headroom_style), ) + headroom_warn ) sh_fig = chart_donut( ["GPU Compute", "Software", "Networking"], [sh["gpu"], sh["sw"], sh["net"]], "Self-Hosted GPU — Cost Breakdown", ) # ── Local / Edge ── le = calc_local( hw_cost, num_dev, hw_life, watts, elec_rate, local_hours, days_year, local_throughput, it_support, u["total_day"], total_year_M, ) le_df = pd.DataFrame({ "Cost Component": [ "Hardware (amortized)", "Electricity", "IT support / maintenance", "Software / licensing (est.)", ], "Annual Cost ($)": [ fmt_c(le["hw_a"]), fmt_c(le["elec"]), fmt_c(le["it"]), fmt_c(le["sw"]), ], "Notes": [ "Purchase cost x devices / lifetime years", "Watts x devices x hrs/day / 1000 x days/yr x $/kWh", "Annual support cost", "Ollama, llama.cpp, monitoring tools", ], }) le_headroom_str = fmt_p(le["headroom"]) if le["headroom"] != float("inf") and le["headroom"] <= 1000 else "N/A" le_headroom_style = "warning" if le["headroom"] < 0 else "default" le_warn = "" if le["headroom"] < 0: le_warn = ( '
' 'Capacity insufficient — add more devices.
' ) le_summary = ( _cards_row( _card("Total Annual Cost", fmt_c(le["total"]), "highlight"), _card("Monthly Cost", fmt_c(le["monthly"])), _card("Cost per 1M Tokens", fmt_c(le["cost_per_M"])), ) + '
Capacity Analysis
' + _cards_row( _card("Max Tokens / Day", fmt_n(le["max_tok"])), _card("Your Daily Need", fmt_n(u["total_day"])), _card("Capacity Headroom", le_headroom_str, le_headroom_style), ) + le_warn ) le_fig = chart_donut( ["Hardware", "Electricity", "IT Support", "Software"], [le["hw_a"], le["elec"], le["it"], le["sw"]], "Local / Edge — Cost Breakdown", [COLORS["emerald"], COLORS["amber"], COLORS["blue"], COLORS["slate"]], ) # ── Comparison ── valid_api = [p for p in provs if p["total"] > 0] best_api = min(valid_api, key=lambda x: x["total"]) if valid_api else provs[0] best_api_annual = best_api["total"] options = { "API (Best Single)": best_api_annual, "API (Smart Routing)": sr["annual"], "Self-Hosted GPU": sh["total"], "Local / Edge": le["total"], } monthly_opts = { "API (Best Single)": best_api["monthly"], "API (Smart Routing)": sr["monthly"], "Self-Hosted GPU": sh["monthly"], "Local / Edge": le["monthly"], } cpm = { "API (Best Single)": best_api_annual / total_year_M if total_year_M > 0 else 0, "API (Smart Routing)": sr["annual"] / total_year_M if total_year_M > 0 else 0, "Self-Hosted GPU": sh["cost_per_M"], "Local / Edge": le["cost_per_M"], } comp_df = pd.DataFrame({ "Metric": [ "Annual total cost ($)", "Monthly cost ($)", "Cost per 1M tokens ($)", "Data leaves your network?", "ML team required?", "Scales with volume?", "EU AI Act compliant?", "Time to deploy", ], "API (Best Single)": [ fmt_c(best_api_annual), fmt_c(best_api["monthly"]), fmt_c(cpm["API (Best Single)"], 2), "Yes", "No", "Linear cost increase", "Depends on vendor DPA", "Days", ], "API (Smart Routing)": [ fmt_c(sr["annual"]), fmt_c(sr["monthly"]), fmt_c(cpm["API (Smart Routing)"], 2), "Yes", "No", "Linear cost increase", "Depends on vendor DPA", "Days", ], "Self-Hosted GPU": [ fmt_c(sh["total"]), fmt_c(sh["monthly"]), fmt_c(sh["cost_per_M"], 2), "No (your cloud VPC)", "Yes", "Fixed cost (to capacity)", "Full control", "Weeks", ], "Local / Edge": [ fmt_c(le["total"]), fmt_c(le["monthly"]), fmt_c(le["cost_per_M"], 2), "No (fully local)", "Minimal", "Fixed cost (to capacity)", "Full control", "Days to weeks", ], }) # Lowest cost option lowest_name = min(options, key=options.get) lowest_val = options[lowest_name] highest_val = max(options.values()) savings_val = highest_val - lowest_val savings_pct = savings_val / highest_val if highest_val > 0 else 0 # Break-even: at what daily request volume does self-hosted beat best API? # API cost scales linearly with volume; self-hosted is ~fixed (GPU rental) api_cost_per_req = best_api["total"] / (req_day * days_year) if req_day > 0 else 0 if api_cost_per_req > 0: be_req_day = sh["total"] / (api_cost_per_req * days_year) if be_req_day <= req_day: breakeven = f"{fmt_n(be_req_day)} req/day" else: breakeven = f"Need {fmt_n(be_req_day)} req/day" else: breakeven = "N/A" comp_summary = ( # Winner banner '
' '
' '
' '
Lowest Cost Option
' f'
{lowest_name}
' '
' '
' '
Best API Provider
' f'
{best_api["name"]}
' '
' # Metric cards + _cards_row( _card("Annual Savings", fmt_c(savings_val), "success"), _card("Savings %", fmt_p(savings_pct), "success"), _card("Break-Even (Self-Hosted)", breakeven, "warning" if "Need" in breakeven else "default"), ) ) cats = list(options.keys()) comp_annual_fig = chart_comparison_bars( cats, list(options.values()), "Annual Cost Comparison" ) comp_per_M_fig = chart_comparison_bars( cats, list(cpm.values()), "Cost per 1M Tokens" ) return ( usage_md, api_df, smart_md, api_fig, sh_df, sh_summary, sh_fig, le_df, le_summary, le_fig, comp_summary, comp_df, comp_annual_fig, comp_per_M_fig, ) # ═════════════════════════════════════════════════════════════════════════════ # UI LAYOUT # ═════════════════════════════════════════════════════════════════════════════ CSS = """ /* ── Base ── */ .gradio-container { max-width: 1280px !important; margin: 0 auto !important; } footer { display: none !important; } /* ── Dropdown list: show all models without scrollbar ── */ ul.options { max-height: none !important; } /* ── Read-only number inputs: look normal, just not editable ── */ input[type="number"]:disabled { opacity: 1 !important; -webkit-text-fill-color: inherit !important; cursor: default !important; } /* ── Header banner ── */ .hero-banner { background: linear-gradient(135deg, #0f172a 0%, #1e293b 50%, #0f172a 100%); border-radius: 14px; padding: 2.25rem 2.75rem; margin-bottom: 0.75rem; border: 1px solid #334155; position: relative; } .hero-banner h1 { color: #f8fafc !important; font-size: 1.75rem !important; font-weight: 700 !important; margin: 0 0 0.35rem 0 !important; letter-spacing: -0.03em; } .hero-banner .byline { color: #94a3b8 !important; font-size: 0.9rem !important; margin: 0 0 0.6rem 0 !important; font-weight: 400; } .hero-banner .tagline { color: #cbd5e1 !important; font-size: 0.925rem !important; margin: 0 !important; line-height: 1.5; } /* ── Section dividers in inputs tab ── */ .section-label h3 { font-size: 0.95rem !important; font-weight: 600 !important; border-left: 3px solid #3b82f6; padding-left: 0.75rem; margin: 0 !important; } .section-label p { font-size: 0.82rem !important; opacity: 0.65; padding-left: 0.95rem; margin: 0.15rem 0 0 0 !important; } /* ── Tab styling ── */ .tab-nav button { font-weight: 500 !important; font-size: 0.875rem !important; letter-spacing: 0.01em; } .tab-nav button.selected { font-weight: 600 !important; } /* ── Dataframe polish ── */ .dataframe-container table { font-size: 0.875rem; } .dataframe-container th { font-weight: 600 !important; text-transform: uppercase; font-size: 0.78rem !important; letter-spacing: 0.03em; } /* ── Chart containers (theme-aware Plotly) ── */ .plot-container { border-radius: 10px; overflow: hidden; } .plot-container, .plot-container > div, .plot-container .js-plotly-plot, .plot-container .plotly, .plot-container .svg-container, .plot-container .main-svg { background: transparent !important; background-color: transparent !important; } .plot-container .js-plotly-plot .legendtext, .plot-container .js-plotly-plot .gtitle, .plot-container .js-plotly-plot .xtick text, .plot-container .js-plotly-plot .ytick text, .plot-container .js-plotly-plot .g-xtitle text, .plot-container .js-plotly-plot .g-ytitle text { fill: #1e293b !important; } .plot-container .js-plotly-plot .gridlayer line { stroke: rgba(128,128,128,0.2) !important; } .plot-container .js-plotly-plot .zerolinelayer line { stroke: rgba(128,128,128,0.3) !important; } /* ── Metric cards (theme-aware) ── */ .tco-card { flex: 1; border-radius: 10px; padding: 1rem 1.25rem; text-align: center; min-width: 140px; border: 1px solid var(--border-color-primary, rgba(128,128,128,0.2)); background: var(--background-fill-secondary, rgba(128,128,128,0.06)); } .tco-card-label { font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.06em; opacity: 0.6; margin-bottom: 0.3rem; font-weight: 600; } .tco-card-value { font-size: 1.4rem; font-weight: 700; line-height: 1.2; } /* Card variants */ .tco-card-highlight { background: rgba(59,130,246,0.08); border-color: rgba(59,130,246,0.3); } .tco-card-highlight .tco-card-value { color: #3b82f6; } .tco-card-highlight .tco-card-label { color: #3b82f6; opacity: 0.8; } .tco-card-success { background: rgba(5,150,105,0.08); border-color: rgba(5,150,105,0.3); } .tco-card-success .tco-card-value { color: #059669; } .tco-card-success .tco-card-label { color: #059669; opacity: 0.8; } .tco-card-warning { background: rgba(217,119,6,0.08); border-color: rgba(217,119,6,0.3); } .tco-card-warning .tco-card-value { color: #d97706; } .tco-card-warning .tco-card-label { color: #d97706; opacity: 0.8; } """ def build_app(): theme = gr.themes.Default( primary_hue="blue", font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"], ) # Force light mode by intercepting matchMedia before Gradio reads it. # This prevents Gradio from ever detecting dark mode — no fighting, # no MutationObserver loops, no timing issues. THEME_HEAD = """ """ # Gradio 6.x moved theme/css/head to launch(); detect version for compat gradio_major = int(gr.__version__.split(".")[0]) blocks_kwargs = {"title": "AI Infrastructure TCO Calculator"} if gradio_major < 6: blocks_kwargs.update(theme=theme, css=CSS, head=THEME_HEAD) with gr.Blocks(**blocks_kwargs) as demo: gr.Markdown( '
' '

AI Infrastructure TCO Calculator

' '

By Julien Simon  |  AI Operating Partner, Fortino Capital  |  March 22, 2026 Pricing

' '

Compare API costs, self-hosted GPU, and local/edge deployment for AI inference workloads.
' 'Fill in your parameters below, then explore the analysis tabs.

' '
' ) with gr.Tabs(): # ─────────────────── Tab 1: Your Inputs ─────────────────── with gr.Tab("Your Inputs"): gr.Markdown("### Usage Parameters\nDefine your workload volume and operating schedule", elem_classes="section-label") with gr.Row(): input_tpr = gr.Number( value=500, label="Avg tokens/request (input)", info="Typical prompt length in tokens") output_tpr = gr.Number( value=200, label="Avg tokens/request (output)", info="Typical response length in tokens") req_day = gr.Number( value=10000, label="Requests per day", info="Total API/inference calls per day") days_year = gr.Number( value=365, label="Days of operation / year", info="365 for always-on") usage_md = gr.Markdown() gr.Markdown("---") gr.Markdown("### API Pricing (per 1M tokens)\nSelect models from dropdowns for Providers 1-3. Provider 4 is fully custom.", elem_classes="section-label") with gr.Row(): with gr.Column(): model_1 = gr.Dropdown( choices=API_MODELS, value="Claude Sonnet 4.6", label="Provider 1: Model", ) price_in_1 = gr.Number( value=3, label="Provider 1: Input $ / 1M tokens", interactive=False) price_out_1 = gr.Number( value=15, label="Provider 1: Output $ / 1M tokens", interactive=False) with gr.Column(): model_2 = gr.Dropdown( choices=API_MODELS, value="GPT-5", label="Provider 2: Model") price_in_2 = gr.Number( value=1.25, label="Provider 2: Input $ / 1M tokens", interactive=False) price_out_2 = gr.Number( value=10, label="Provider 2: Output $ / 1M tokens", interactive=False) with gr.Column(): model_3 = gr.Dropdown( choices=API_MODELS, value="Gemini 2.5 Flash", label="Provider 3: Model") price_in_3 = gr.Number( value=0.15, label="Provider 3: Input $ / 1M tokens", interactive=False) price_out_3 = gr.Number( value=0.6, label="Provider 3: Output $ / 1M tokens", interactive=False) with gr.Column(): model_4_name = gr.Textbox( value="Custom Provider", label="Provider 4: Name (custom)", info="e.g., Together.ai, Groq, Fireworks") price_in_4 = gr.Number( value=0.5, label="Provider 4: Input $ / 1M tokens") price_out_4 = gr.Number( value=1.5, label="Provider 4: Output $ / 1M tokens") gr.Markdown("---") gr.Markdown("### Self-Hosted GPU Parameters\nCloud GPU rental with your own inference stack", elem_classes="section-label") with gr.Row(): gpu_provider = gr.Dropdown( choices=["(Custom)"] + GPU_PROVIDERS, value="(Custom)", label="GPU provider", info="Select provider, then pick instance" ) gpu_instance = gr.Dropdown( choices=["(Custom)"], value="(Custom)", label="GPU instance", info="Auto-fills hourly cost" ) gpu_cost_hr = gr.Number( value=2.5, label="GPU cost / hour ($)", info="H100: $1.49-$3.90, H200: $2.50-$4.31, B200: $3.75-$5.87") with gr.Row(): num_gpus = gr.Number( value=1, label="Number of instances", info="7B model: 1 GPU. 70B model: 2-4 GPUs") gpu_util = gr.Slider( minimum=0, maximum=100, value=70, step=5, label="GPU utilization (%)", info="60-80% typical") gpu_hours = gr.Number( value=24, label="Hours / day running", info="24 for always-on") with gr.Row(): gpu_throughput = gr.Number( value=2300, label="Throughput (tokens/sec/GPU)", info="vLLM on H100: ~2300 for 8B model") sh_sw_cost = gr.Number( value=2000, label="Software licenses (annual $)", info="Inference framework, monitoring, etc.") sh_net_cost = gr.Number( value=3000, label="Networking (annual $)", info="Egress, VPN, load balancing") gr.Markdown("---") gr.Markdown("### Local / Edge Parameters\nOn-premises or edge deployment with consumer hardware", elem_classes="section-label") with gr.Row(): hw_cost = gr.Number( value=1999, label="Hardware purchase cost ($)", info="One-time CapEx. RTX 5090: $1,999") num_dev = gr.Number( value=1, label="Number of devices") watts = gr.Number( value=575, label="Power consumption (W / device)", info="RTX 5090: 575W, M4 Max: ~60W") elec_rate = gr.Number( value=0.12, label="Electricity cost ($ / kWh)", info="Average commercial rate") with gr.Row(): hw_life = gr.Number( value=3, label="Hardware lifetime (years)") local_hours = gr.Number( value=24, label="Hours / day running") local_throughput = gr.Number( value=100, label="Inference throughput (tok/s)", info="llama.cpp quantized: 50-100 on consumer GPU") it_support = gr.Number( value=5000, label="IT support cost (annual $)", info="Maintenance, updates, monitoring") # ─────────────────── Tab 2: API Costs ───────────────────── with gr.Tab("API Costs"): gr.Markdown("### LLM API Cost Analysis\nCosts for 4 API providers based on your usage inputs", elem_classes="section-label") api_table = gr.Dataframe(label="API Cost Comparison", interactive=False) smart_md = gr.Markdown() api_chart = gr.Plot() # ─────────────────── Tab 3: Self-Hosted GPU ─────────────── with gr.Tab("Self-Hosted GPU"): gr.Markdown("### Self-Hosted GPU Cost Analysis\nCloud GPU rental with managed inference infrastructure", elem_classes="section-label") sh_table = gr.Dataframe(label="Cost Breakdown", interactive=False) sh_summary = gr.Markdown() sh_chart = gr.Plot() # ─────────────────── Tab 4: Local / Edge ────────────────── with gr.Tab("Local / Edge"): gr.Markdown("### Local / Edge Deployment Cost Analysis\nOn-premises deployment with consumer or edge hardware", elem_classes="section-label") le_table = gr.Dataframe(label="Cost Breakdown", interactive=False) le_summary = gr.Markdown() le_chart = gr.Plot() # ─────────────────── Tab 5: Comparison ──────────────────── with gr.Tab("Comparison"): gr.Markdown("### Side-by-Side Comparison\nAll costs annualized. API (Best Single) = cheapest among your 4 selected providers.", elem_classes="section-label") comp_summary = gr.Markdown() comp_table = gr.Dataframe(label="Comparison", interactive=False) with gr.Row(): comp_annual_chart = gr.Plot() comp_per_M_chart = gr.Plot() # ─────────────────── Tab 6: Model Library ───────────────── with gr.Tab("Model Library"): gr.Markdown("### Model Library — March 22, 2026 Pricing\nSources: [openai.com/api/pricing](https://openai.com/api/pricing), [docs.anthropic.com](https://docs.anthropic.com/en/docs/about-claude/models), [ai.google.dev](https://ai.google.dev/gemini-api/docs/pricing), [openrouter.ai](https://openrouter.ai)", elem_classes="section-label") lib_rows = [] for name, m in MODEL_LIBRARY.items(): inp = f"${m['input']}" if m["input"] is not None else "N/A (self-hosted)" out = f"${m['output']}" if m["output"] is not None else "N/A (self-hosted)" lib_rows.append([name, m["provider"], inp, out, m["notes"]]) lib_df = pd.DataFrame( lib_rows, columns=["Model Name", "Provider", "Input $/M tok", "Output $/M tok", "Notes"], ) gr.Dataframe(value=lib_df, label="Model Library", interactive=False) # ─────────────────── Tab 7: GPU Library ───────────────── with gr.Tab("GPU Library"): gr.Markdown("### GPU Instance Library — March 22, 2026 Pricing\nPer-GPU on-demand hourly rates across major cloud providers\n\n**Regions:** AWS us-east-1, GCP us-central1, Azure East US, CoreWeave US-East, Crusoe us-north1, FluidStack US, Lambda US, RunPod US, Together US, Vast.ai US (marketplace)", elem_classes="section-label") gpu_df = pd.DataFrame([ {"Instance": k, "Provider": v["provider"], "GPU": v["gpu"], "$/hr": v["cost_hr"], "VRAM (GB)": v["vram_gb"], "Notes": v["notes"]} for k, v in GPU_LIBRARY.items() ]) gr.Dataframe(value=gpu_df, label="GPU Instance Pricing (March 22, 2026)", interactive=False) gr.Markdown("*Sources: [aws.amazon.com](https://aws.amazon.com/ec2/pricing/on-demand/), [cloud.google.com](https://cloud.google.com/compute/gpus-pricing), [azure.microsoft.com](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/), [coreweave.com](https://www.coreweave.com/pricing), [crusoe.ai](https://www.crusoe.ai/cloud/pricing), [fluidstack.io](https://www.fluidstack.io/pricing), [lambda.ai](https://lambda.ai/pricing), [runpod.io](https://www.runpod.io/gpu-pricing), [together.ai](https://www.together.ai/pricing), [vast.ai](https://vast.ai)*") # ─────────────────── Event Wiring ───────────────────────────── all_inputs = [ input_tpr, output_tpr, req_day, days_year, model_1, price_in_1, price_out_1, model_2, price_in_2, price_out_2, model_3, price_in_3, price_out_3, model_4_name, price_in_4, price_out_4, gpu_cost_hr, num_gpus, gpu_util, gpu_hours, gpu_throughput, sh_sw_cost, sh_net_cost, hw_cost, num_dev, watts, elec_rate, hw_life, local_hours, local_throughput, it_support, ] all_outputs = [ usage_md, api_table, smart_md, api_chart, sh_table, sh_summary, sh_chart, le_table, le_summary, le_chart, comp_summary, comp_table, comp_annual_chart, comp_per_M_chart, ] # Model dropdowns: auto-populate prices, then recalculate for dd, pi, po in [ (model_1, price_in_1, price_out_1), (model_2, price_in_2, price_out_2), (model_3, price_in_3, price_out_3), ]: dd.change( fn=get_model_prices, inputs=[dd], outputs=[pi, po], ).then( fn=master_update, inputs=all_inputs, outputs=all_outputs, ) # GPU provider → update instance list → update price → recalculate gpu_provider.change( fn=get_gpu_instances, inputs=[gpu_provider], outputs=[gpu_instance], ).then( fn=get_gpu_price, inputs=[gpu_instance], outputs=[gpu_cost_hr], ).then( fn=master_update, inputs=all_inputs, outputs=all_outputs, ) # GPU instance dropdown: auto-populate cost, then recalculate gpu_instance.change( fn=get_gpu_price, inputs=[gpu_instance], outputs=[gpu_cost_hr], ).then( fn=master_update, inputs=all_inputs, outputs=all_outputs, ) # All non-dropdown inputs trigger recalculation change_inputs = [ input_tpr, output_tpr, req_day, days_year, model_4_name, price_in_4, price_out_4, gpu_cost_hr, num_gpus, gpu_util, gpu_hours, gpu_throughput, sh_sw_cost, sh_net_cost, hw_cost, num_dev, watts, elec_rate, hw_life, local_hours, local_throughput, it_support, ] for inp in change_inputs: inp.change(fn=master_update, inputs=all_inputs, outputs=all_outputs) # Populate all outputs on initial page load demo.load(fn=master_update, inputs=all_inputs, outputs=all_outputs) # Gradio 6+ passes theme/css/head via launch() instead of Blocks() launch_kwargs = {} if gradio_major >= 6: launch_kwargs = {"theme": theme, "css": CSS, "head": THEME_HEAD} return demo, launch_kwargs # ═════════════════════════════════════════════════════════════════════════════ # LAUNCH # ═════════════════════════════════════════════════════════════════════════════ if __name__ == "__main__": demo, launch_kwargs = build_app() demo.launch(**launch_kwargs)