"""
AI Inference TCO Calculator
By Julien Simon | AI Operating Partner, Fortino Capital
Pricing as of March 22, 2026
"""
import gradio as gr
import plotly.graph_objects as go
import pandas as pd
from models import MODEL_LIBRARY, API_MODELS
from gpus import GPU_LIBRARY, GPU_PROVIDERS
# ═════════════════════════════════════════════════════════════════════════════
# HELPERS
# ═════════════════════════════════════════════════════════════════════════════
def sf(val, default=0):
"""Safe float conversion."""
try:
return float(val) if val is not None else default
except (ValueError, TypeError):
return default
def fmt_c(val, decimals=2):
"""Format as currency string."""
return f"${val:,.{decimals}f}"
def fmt_n(val, decimals=0):
"""Format number with commas."""
return f"{val:,.{decimals}f}"
def fmt_p(val, decimals=1):
"""Format as percentage string."""
return f"{val * 100:.{decimals}f}%"
def _card(label, value, style="default"):
"""Return an HTML metric card using CSS classes for theme compatibility."""
return (
f'
'
)
def _cards_row(*cards):
"""Wrap card HTML strings in a flex row."""
return f'{"".join(cards)}
'
def get_model_prices(model_name):
"""Return (input_price, output_price) from model library."""
if model_name and model_name in MODEL_LIBRARY:
m = MODEL_LIBRARY[model_name]
if m["input"] is not None:
return float(m["input"]), float(m["output"])
return 0.0, 0.0
def get_gpu_instances(provider):
"""Return instance choices for a given provider."""
if not provider or provider == "(Custom)":
return gr.update(choices=["(Custom)"], value="(Custom)")
instances = [k for k, v in GPU_LIBRARY.items() if v["provider"] == provider]
return gr.update(choices=instances, value=instances[0] if instances else "(Custom)")
def get_gpu_price(instance_name):
"""Return hourly cost from GPU library, or leave unchanged for Custom."""
if instance_name and instance_name in GPU_LIBRARY:
return float(GPU_LIBRARY[instance_name]["cost_hr"])
return gr.update()
# ═════════════════════════════════════════════════════════════════════════════
# CALCULATION FUNCTIONS
# ═════════════════════════════════════════════════════════════════════════════
def calc_usage(input_tpr, output_tpr, req_day, days_year):
input_day = input_tpr * req_day
output_day = output_tpr * req_day
return {
"input_day": input_day,
"output_day": output_day,
"total_day": input_day + output_day,
"input_year_M": input_day * days_year / 1e6,
"output_year_M": output_day * days_year / 1e6,
}
def calc_api(name, in_price, out_price, in_year_M, out_year_M, req_day, days_year):
a_in = in_year_M * in_price
a_out = out_year_M * out_price
total = a_in + a_out
total_req = req_day * days_year
return {
"name": name,
"in_price": in_price,
"out_price": out_price,
"in_M": in_year_M,
"out_M": out_year_M,
"a_in": a_in,
"a_out": a_out,
"total": total,
"monthly": total / 12,
"per_1k": (total / total_req * 1000) if total_req > 0 else 0,
}
def calc_smart_routing(providers):
"""60% cheapest, 40% 2nd cheapest among non-zero providers."""
valid = [p for p in providers if p["total"] > 0]
if not valid:
return {"annual": 0, "monthly": 0, "savings": 0}
valid.sort(key=lambda x: x["total"])
if len(valid) == 1:
blended = valid[0]["total"]
else:
blended = 0.6 * valid[0]["total"] + 0.4 * valid[1]["total"]
avg = sum(p["total"] for p in valid) / len(valid)
return {
"annual": blended,
"monthly": blended / 12,
"savings": 1 - blended / avg if avg > 0 else 0,
}
def calc_self_hosted(gpu_cost_hr, num_gpus, hours_day, days_year, throughput,
utilization, sw_cost, net_cost, total_day, total_year_M):
gpu = gpu_cost_hr * num_gpus * hours_day * days_year
total = gpu + sw_cost + net_cost
max_tok = throughput * num_gpus * 3600 * hours_day * utilization / 100
headroom = ((max_tok - total_day) / total_day) if total_day > 0 else float("inf")
return {
"gpu": gpu, "sw": sw_cost, "net": net_cost,
"total": total, "monthly": total / 12,
"max_tok": max_tok, "headroom": headroom,
"cost_per_M": total / total_year_M if total_year_M > 0 else 0,
}
def calc_local(hw_cost, num_dev, lifetime, watts, elec_rate, hours_day, days_year,
throughput, it_support, total_day, total_year_M):
hw_a = (hw_cost * num_dev / lifetime) if lifetime > 0 else 0
elec = watts * num_dev * hours_day / 1000 * days_year * elec_rate
sw = 500
total = hw_a + elec + it_support + sw
max_tok = throughput * 3600 * hours_day * num_dev
headroom = ((max_tok - total_day) / total_day) if total_day > 0 else float("inf")
return {
"hw_a": hw_a, "elec": elec, "it": it_support, "sw": sw,
"total": total, "monthly": total / 12,
"max_tok": max_tok, "headroom": headroom,
"cost_per_M": total / total_year_M if total_year_M > 0 else 0,
}
# ═════════════════════════════════════════════════════════════════════════════
# CHART FUNCTIONS
# ═════════════════════════════════════════════════════════════════════════════
COLORS = {
"navy": "#1e3a5f", "blue": "#2563eb", "blue_light": "#93c5fd",
"blue_muted": "#60a5fa", "indigo": "#4f46e5",
"violet": "#7c3aed", "violet_light": "#a78bfa",
"amber": "#d97706", "amber_light": "#fbbf24",
"emerald": "#059669", "emerald_light": "#34d399",
"slate": "#475569", "slate_light": "#94a3b8",
"teal": "#0d9488",
}
CHART_LAYOUT = dict(
template="none",
height=420,
margin=dict(t=56, b=44, l=56, r=28),
font=dict(family="Inter, system-ui, sans-serif", size=13, color="#1e293b"),
title_font=dict(size=15, family="Inter, system-ui, sans-serif", color="#0f172a"),
paper_bgcolor="rgba(0,0,0,0)",
plot_bgcolor="rgba(0,0,0,0)",
)
def chart_api(providers):
names = [p["name"] for p in providers]
fig = go.Figure(data=[
go.Bar(
name="Input Cost", x=names,
y=[p["a_in"] for p in providers],
marker=dict(color=COLORS["blue_light"], line=dict(width=0)),
text=[fmt_c(p["a_in"], 0) for p in providers],
textposition="auto",
textfont=dict(size=12, color="#1e293b"),
insidetextfont=dict(size=12, color="#1e293b"),
outsidetextfont=dict(size=12, color="#1e293b"),
),
go.Bar(
name="Output Cost", x=names,
y=[p["a_out"] for p in providers],
marker=dict(color=COLORS["blue"], line=dict(width=0)),
text=[fmt_c(p["a_out"], 0) for p in providers],
textposition="auto",
textfont=dict(size=12, color="#ffffff"),
insidetextfont=dict(size=12, color="#ffffff"),
outsidetextfont=dict(size=12, color="#1e293b"),
),
])
fig.update_layout(
barmode="group",
title=dict(text="Annual API Costs by Provider"),
yaxis=dict(title="Annual Cost ($)", gridcolor="rgba(128,128,128,0.2)", zeroline=False),
xaxis=dict(tickfont=dict(size=12)),
legend=dict(orientation="h", y=-0.18, x=0.5, xanchor="center",
font=dict(size=12)),
bargap=0.25, bargroupgap=0.08,
**CHART_LAYOUT,
)
return fig
def chart_donut(labels, values, title, colors=None):
if colors is None:
colors = [COLORS["blue"], COLORS["amber"], COLORS["emerald"],
COLORS["violet"], COLORS["slate"]]
filtered = [(l, v, c) for l, v, c in zip(labels, values, colors) if v > 0]
if not filtered:
filtered = [(labels[0], 0.01, colors[0])]
fl, fv, fc = zip(*filtered)
fig = go.Figure(data=[go.Pie(
labels=list(fl), values=list(fv), hole=0.55,
marker=dict(colors=list(fc), line=dict(color="rgba(128,128,128,0.3)", width=1.5)),
textinfo="label+percent", textposition="outside",
textfont=dict(size=12, color="#1e293b"),
hovertemplate="%{label}
$%{value:,.0f}
%{percent}",
pull=[0.02] * len(fl),
)])
fig.update_layout(
title=dict(text=title), showlegend=False,
**CHART_LAYOUT,
)
return fig
def chart_comparison_bars(categories, values, title):
bar_colors = [COLORS["blue"], COLORS["violet"], COLORS["amber"], COLORS["emerald"]]
fig = go.Figure(data=[go.Bar(
x=categories, y=values,
marker=dict(color=bar_colors, line=dict(width=0)),
text=[fmt_c(v, 0) if v >= 100 else fmt_c(v, 2) for v in values],
textposition="outside", textfont=dict(size=12, color="#1e293b"),
)])
fig.update_layout(
title=dict(text=title),
yaxis=dict(title="Cost ($)", gridcolor="rgba(128,128,128,0.2)", zeroline=False),
xaxis=dict(tickfont=dict(size=11)),
bargap=0.35,
**CHART_LAYOUT,
)
return fig
# ═════════════════════════════════════════════════════════════════════════════
# MASTER UPDATE FUNCTION
# ═════════════════════════════════════════════════════════════════════════════
def master_update(
input_tpr, output_tpr, req_day, days_year,
model_1, price_in_1, price_out_1,
model_2, price_in_2, price_out_2,
model_3, price_in_3, price_out_3,
model_4_name, price_in_4, price_out_4,
gpu_cost_hr, num_gpus, gpu_util, gpu_hours, gpu_throughput,
sh_sw_cost, sh_net_cost,
hw_cost, num_dev, watts, elec_rate, hw_life,
local_hours, local_throughput, it_support,
):
# ── Safe conversions ──
input_tpr = sf(input_tpr, 500)
output_tpr = sf(output_tpr, 200)
req_day = max(sf(req_day, 10000), 1)
days_year = max(sf(days_year, 365), 1)
price_in_1 = sf(price_in_1); price_out_1 = sf(price_out_1)
price_in_2 = sf(price_in_2); price_out_2 = sf(price_out_2)
price_in_3 = sf(price_in_3); price_out_3 = sf(price_out_3)
price_in_4 = sf(price_in_4); price_out_4 = sf(price_out_4)
gpu_cost_hr = sf(gpu_cost_hr, 2.5)
num_gpus = sf(num_gpus, 1)
gpu_util = sf(gpu_util, 70)
gpu_hours = sf(gpu_hours, 24)
gpu_throughput = sf(gpu_throughput, 2300)
sh_sw_cost = sf(sh_sw_cost, 2000)
sh_net_cost = sf(sh_net_cost, 3000)
hw_cost = sf(hw_cost, 1999)
num_dev = sf(num_dev, 1)
watts = sf(watts, 575)
elec_rate = sf(elec_rate, 0.12)
hw_life = max(sf(hw_life, 3), 1)
local_hours = sf(local_hours, 24)
local_throughput = sf(local_throughput, 100)
it_support = sf(it_support, 5000)
# ── Usage ──
u = calc_usage(input_tpr, output_tpr, req_day, days_year)
total_year_M = u["input_year_M"] + u["output_year_M"]
usage_md = (
_cards_row(
_card("Input tokens / day", fmt_n(u["input_day"])),
_card("Output tokens / day", fmt_n(u["output_day"])),
_card("Total tokens / day", fmt_n(u["total_day"]), "highlight"),
_card("Input tokens / year", f"{fmt_n(u['input_year_M'])}M"),
_card("Output tokens / year", f"{fmt_n(u['output_year_M'])}M"),
)
)
# ── API providers ──
p1 = calc_api(model_1 or "Provider 1", price_in_1, price_out_1,
u["input_year_M"], u["output_year_M"], req_day, days_year)
p2 = calc_api(model_2 or "Provider 2", price_in_2, price_out_2,
u["input_year_M"], u["output_year_M"], req_day, days_year)
p3 = calc_api(model_3 or "Provider 3", price_in_3, price_out_3,
u["input_year_M"], u["output_year_M"], req_day, days_year)
p4 = calc_api(model_4_name or "Custom Provider", price_in_4, price_out_4,
u["input_year_M"], u["output_year_M"], req_day, days_year)
provs = [p1, p2, p3, p4]
# Deduplicate provider names for DataFrame columns
seen = {}
for p in provs:
n = p["name"]
seen[n] = seen.get(n, 0) + 1
if seen[n] > 1:
p["col"] = f"{n} ({seen[n]})"
else:
p["col"] = n
# API cost table
def api_col(p):
return [
fmt_c(p["in_price"]), fmt_c(p["out_price"]),
fmt_n(p["in_M"]), fmt_n(p["out_M"]),
fmt_c(p["a_in"]), fmt_c(p["a_out"]),
fmt_c(p["total"]), fmt_c(p["monthly"]),
fmt_c(p["per_1k"], 3),
]
api_df = pd.DataFrame({
"Metric": [
"Input price / 1M tokens ($)", "Output price / 1M tokens ($)",
"Input tokens/year (M)", "Output tokens/year (M)",
"Annual input cost ($)", "Annual output cost ($)",
"Total annual cost ($)", "Monthly cost ($)",
"Cost per 1K requests ($)",
],
p1["col"]: api_col(p1),
p2["col"]: api_col(p2),
p3["col"]: api_col(p3),
p4["col"]: api_col(p4),
})
# Smart routing
sr = calc_smart_routing(provs)
smart_md = (
''
'
Smart Routing Scenario
'
'
'
'Route 60% to cheapest provider, 40% to 2nd cheapest (across all 4)
'
+ _cards_row(
_card("Blended Annual Cost", fmt_c(sr["annual"]), "highlight"),
_card("Monthly Blended", fmt_c(sr["monthly"])),
_card("Savings vs. Average", fmt_p(sr["savings"]), "success"),
)
+ '
'
)
api_fig = chart_api(provs)
# ── Self-hosted GPU ──
sh = calc_self_hosted(
gpu_cost_hr, num_gpus, gpu_hours, days_year, gpu_throughput,
gpu_util, sh_sw_cost, sh_net_cost, u["total_day"], total_year_M,
)
sh_df = pd.DataFrame({
"Cost Component": [
"GPU compute cost",
"Software licenses (est.)", "Data transfer / networking (est.)",
],
"Annual Cost ($)": [
fmt_c(sh["gpu"]),
fmt_c(sh["sw"]), fmt_c(sh["net"]),
],
"Notes": [
"$/hr x GPUs x hrs/day x days/yr",
"Inference framework, monitoring, etc.",
"Egress, VPN, load balancing",
],
})
headroom_str = fmt_p(sh["headroom"]) if sh["headroom"] != float("inf") and sh["headroom"] <= 1000 else "N/A"
headroom_style = "warning" if sh["headroom"] < 0 else "default"
headroom_warn = ""
if sh["headroom"] < 0:
headroom_warn = (
''
'Capacity insufficient — add more GPUs.
'
)
sh_summary = (
_cards_row(
_card("Total Annual Cost", fmt_c(sh["total"]), "highlight"),
_card("Monthly Cost", fmt_c(sh["monthly"])),
_card("Cost per 1M Tokens", fmt_c(sh["cost_per_M"])),
)
+ 'Capacity Analysis
'
+ _cards_row(
_card("Max Tokens / Day", fmt_n(sh["max_tok"])),
_card("Your Daily Need", fmt_n(u["total_day"])),
_card("Capacity Headroom", headroom_str, headroom_style),
)
+ headroom_warn
)
sh_fig = chart_donut(
["GPU Compute", "Software", "Networking"],
[sh["gpu"], sh["sw"], sh["net"]],
"Self-Hosted GPU — Cost Breakdown",
)
# ── Local / Edge ──
le = calc_local(
hw_cost, num_dev, hw_life, watts, elec_rate, local_hours, days_year,
local_throughput, it_support, u["total_day"], total_year_M,
)
le_df = pd.DataFrame({
"Cost Component": [
"Hardware (amortized)", "Electricity",
"IT support / maintenance", "Software / licensing (est.)",
],
"Annual Cost ($)": [
fmt_c(le["hw_a"]), fmt_c(le["elec"]),
fmt_c(le["it"]), fmt_c(le["sw"]),
],
"Notes": [
"Purchase cost x devices / lifetime years",
"Watts x devices x hrs/day / 1000 x days/yr x $/kWh",
"Annual support cost",
"Ollama, llama.cpp, monitoring tools",
],
})
le_headroom_str = fmt_p(le["headroom"]) if le["headroom"] != float("inf") and le["headroom"] <= 1000 else "N/A"
le_headroom_style = "warning" if le["headroom"] < 0 else "default"
le_warn = ""
if le["headroom"] < 0:
le_warn = (
''
'Capacity insufficient — add more devices.
'
)
le_summary = (
_cards_row(
_card("Total Annual Cost", fmt_c(le["total"]), "highlight"),
_card("Monthly Cost", fmt_c(le["monthly"])),
_card("Cost per 1M Tokens", fmt_c(le["cost_per_M"])),
)
+ 'Capacity Analysis
'
+ _cards_row(
_card("Max Tokens / Day", fmt_n(le["max_tok"])),
_card("Your Daily Need", fmt_n(u["total_day"])),
_card("Capacity Headroom", le_headroom_str, le_headroom_style),
)
+ le_warn
)
le_fig = chart_donut(
["Hardware", "Electricity", "IT Support", "Software"],
[le["hw_a"], le["elec"], le["it"], le["sw"]],
"Local / Edge — Cost Breakdown",
[COLORS["emerald"], COLORS["amber"], COLORS["blue"], COLORS["slate"]],
)
# ── Comparison ──
valid_api = [p for p in provs if p["total"] > 0]
best_api = min(valid_api, key=lambda x: x["total"]) if valid_api else provs[0]
best_api_annual = best_api["total"]
options = {
"API (Best Single)": best_api_annual,
"API (Smart Routing)": sr["annual"],
"Self-Hosted GPU": sh["total"],
"Local / Edge": le["total"],
}
monthly_opts = {
"API (Best Single)": best_api["monthly"],
"API (Smart Routing)": sr["monthly"],
"Self-Hosted GPU": sh["monthly"],
"Local / Edge": le["monthly"],
}
cpm = {
"API (Best Single)": best_api_annual / total_year_M if total_year_M > 0 else 0,
"API (Smart Routing)": sr["annual"] / total_year_M if total_year_M > 0 else 0,
"Self-Hosted GPU": sh["cost_per_M"],
"Local / Edge": le["cost_per_M"],
}
comp_df = pd.DataFrame({
"Metric": [
"Annual total cost ($)", "Monthly cost ($)", "Cost per 1M tokens ($)",
"Data leaves your network?", "ML team required?",
"Scales with volume?", "EU AI Act compliant?", "Time to deploy",
],
"API (Best Single)": [
fmt_c(best_api_annual), fmt_c(best_api["monthly"]),
fmt_c(cpm["API (Best Single)"], 2),
"Yes", "No", "Linear cost increase",
"Depends on vendor DPA", "Days",
],
"API (Smart Routing)": [
fmt_c(sr["annual"]), fmt_c(sr["monthly"]),
fmt_c(cpm["API (Smart Routing)"], 2),
"Yes", "No", "Linear cost increase",
"Depends on vendor DPA", "Days",
],
"Self-Hosted GPU": [
fmt_c(sh["total"]), fmt_c(sh["monthly"]),
fmt_c(sh["cost_per_M"], 2),
"No (your cloud VPC)", "Yes", "Fixed cost (to capacity)",
"Full control", "Weeks",
],
"Local / Edge": [
fmt_c(le["total"]), fmt_c(le["monthly"]),
fmt_c(le["cost_per_M"], 2),
"No (fully local)", "Minimal", "Fixed cost (to capacity)",
"Full control", "Days to weeks",
],
})
# Lowest cost option
lowest_name = min(options, key=options.get)
lowest_val = options[lowest_name]
highest_val = max(options.values())
savings_val = highest_val - lowest_val
savings_pct = savings_val / highest_val if highest_val > 0 else 0
# Break-even: at what daily request volume does self-hosted beat best API?
# API cost scales linearly with volume; self-hosted is ~fixed (GPU rental)
api_cost_per_req = best_api["total"] / (req_day * days_year) if req_day > 0 else 0
if api_cost_per_req > 0:
be_req_day = sh["total"] / (api_cost_per_req * days_year)
if be_req_day <= req_day:
breakeven = f"{fmt_n(be_req_day)} req/day"
else:
breakeven = f"Need {fmt_n(be_req_day)} req/day"
else:
breakeven = "N/A"
comp_summary = (
# Winner banner
''
'
'
'
'
'
Lowest Cost Option
'
f'
{lowest_name}
'
'
'
'
'
'
Best API Provider
'
f'
{best_api["name"]}
'
'
'
# Metric cards
+ _cards_row(
_card("Annual Savings", fmt_c(savings_val), "success"),
_card("Savings %", fmt_p(savings_pct), "success"),
_card("Break-Even (Self-Hosted)", breakeven,
"warning" if "Need" in breakeven else "default"),
)
)
cats = list(options.keys())
comp_annual_fig = chart_comparison_bars(
cats, list(options.values()), "Annual Cost Comparison"
)
comp_per_M_fig = chart_comparison_bars(
cats, list(cpm.values()), "Cost per 1M Tokens"
)
return (
usage_md,
api_df, smart_md, api_fig,
sh_df, sh_summary, sh_fig,
le_df, le_summary, le_fig,
comp_summary, comp_df, comp_annual_fig, comp_per_M_fig,
)
# ═════════════════════════════════════════════════════════════════════════════
# UI LAYOUT
# ═════════════════════════════════════════════════════════════════════════════
CSS = """
/* ── Base ── */
.gradio-container { max-width: 1280px !important; margin: 0 auto !important; }
footer { display: none !important; }
/* ── Dropdown list: show all models without scrollbar ── */
ul.options { max-height: none !important; }
/* ── Read-only number inputs: look normal, just not editable ── */
input[type="number"]:disabled {
opacity: 1 !important;
-webkit-text-fill-color: inherit !important;
cursor: default !important;
}
/* ── Header banner ── */
.hero-banner {
background: linear-gradient(135deg, #0f172a 0%, #1e293b 50%, #0f172a 100%);
border-radius: 14px;
padding: 2.25rem 2.75rem;
margin-bottom: 0.75rem;
border: 1px solid #334155;
position: relative;
}
.hero-banner h1 {
color: #f8fafc !important; font-size: 1.75rem !important;
font-weight: 700 !important; margin: 0 0 0.35rem 0 !important;
letter-spacing: -0.03em;
}
.hero-banner .byline {
color: #94a3b8 !important; font-size: 0.9rem !important;
margin: 0 0 0.6rem 0 !important; font-weight: 400;
}
.hero-banner .tagline {
color: #cbd5e1 !important; font-size: 0.925rem !important;
margin: 0 !important; line-height: 1.5;
}
/* ── Section dividers in inputs tab ── */
.section-label h3 {
font-size: 0.95rem !important; font-weight: 600 !important;
border-left: 3px solid #3b82f6;
padding-left: 0.75rem; margin: 0 !important;
}
.section-label p {
font-size: 0.82rem !important; opacity: 0.65;
padding-left: 0.95rem; margin: 0.15rem 0 0 0 !important;
}
/* ── Tab styling ── */
.tab-nav button {
font-weight: 500 !important; font-size: 0.875rem !important;
letter-spacing: 0.01em;
}
.tab-nav button.selected {
font-weight: 600 !important;
}
/* ── Dataframe polish ── */
.dataframe-container table { font-size: 0.875rem; }
.dataframe-container th {
font-weight: 600 !important; text-transform: uppercase;
font-size: 0.78rem !important; letter-spacing: 0.03em;
}
/* ── Chart containers (theme-aware Plotly) ── */
.plot-container { border-radius: 10px; overflow: hidden; }
.plot-container,
.plot-container > div,
.plot-container .js-plotly-plot,
.plot-container .plotly,
.plot-container .svg-container,
.plot-container .main-svg {
background: transparent !important;
background-color: transparent !important;
}
.plot-container .js-plotly-plot .legendtext,
.plot-container .js-plotly-plot .gtitle,
.plot-container .js-plotly-plot .xtick text,
.plot-container .js-plotly-plot .ytick text,
.plot-container .js-plotly-plot .g-xtitle text,
.plot-container .js-plotly-plot .g-ytitle text {
fill: #1e293b !important;
}
.plot-container .js-plotly-plot .gridlayer line {
stroke: rgba(128,128,128,0.2) !important;
}
.plot-container .js-plotly-plot .zerolinelayer line {
stroke: rgba(128,128,128,0.3) !important;
}
/* ── Metric cards (theme-aware) ── */
.tco-card {
flex: 1; border-radius: 10px; padding: 1rem 1.25rem;
text-align: center; min-width: 140px;
border: 1px solid var(--border-color-primary, rgba(128,128,128,0.2));
background: var(--background-fill-secondary, rgba(128,128,128,0.06));
}
.tco-card-label {
font-size: 0.72rem; text-transform: uppercase; letter-spacing: 0.06em;
opacity: 0.6; margin-bottom: 0.3rem; font-weight: 600;
}
.tco-card-value {
font-size: 1.4rem; font-weight: 700; line-height: 1.2;
}
/* Card variants */
.tco-card-highlight {
background: rgba(59,130,246,0.08);
border-color: rgba(59,130,246,0.3);
}
.tco-card-highlight .tco-card-value { color: #3b82f6; }
.tco-card-highlight .tco-card-label { color: #3b82f6; opacity: 0.8; }
.tco-card-success {
background: rgba(5,150,105,0.08);
border-color: rgba(5,150,105,0.3);
}
.tco-card-success .tco-card-value { color: #059669; }
.tco-card-success .tco-card-label { color: #059669; opacity: 0.8; }
.tco-card-warning {
background: rgba(217,119,6,0.08);
border-color: rgba(217,119,6,0.3);
}
.tco-card-warning .tco-card-value { color: #d97706; }
.tco-card-warning .tco-card-label { color: #d97706; opacity: 0.8; }
"""
def build_app():
theme = gr.themes.Default(
primary_hue="blue",
font=[gr.themes.GoogleFont("Inter"), "system-ui", "sans-serif"],
)
# Force light mode by intercepting matchMedia before Gradio reads it.
# This prevents Gradio from ever detecting dark mode — no fighting,
# no MutationObserver loops, no timing issues.
THEME_HEAD = """
"""
# Gradio 6.x moved theme/css/head to launch(); detect version for compat
gradio_major = int(gr.__version__.split(".")[0])
blocks_kwargs = {"title": "AI Infrastructure TCO Calculator"}
if gradio_major < 6:
blocks_kwargs.update(theme=theme, css=CSS, head=THEME_HEAD)
with gr.Blocks(**blocks_kwargs) as demo:
gr.Markdown(
''
'
AI Infrastructure TCO Calculator
'
'
By Julien Simon | AI Operating Partner, Fortino Capital | March 22, 2026 Pricing
'
'
Compare API costs, self-hosted GPU, and local/edge deployment for AI inference workloads.
'
'Fill in your parameters below, then explore the analysis tabs.
'
'
'
)
with gr.Tabs():
# ─────────────────── Tab 1: Your Inputs ───────────────────
with gr.Tab("Your Inputs"):
gr.Markdown("### Usage Parameters\nDefine your workload volume and operating schedule", elem_classes="section-label")
with gr.Row():
input_tpr = gr.Number(
value=500, label="Avg tokens/request (input)",
info="Typical prompt length in tokens")
output_tpr = gr.Number(
value=200, label="Avg tokens/request (output)",
info="Typical response length in tokens")
req_day = gr.Number(
value=10000, label="Requests per day",
info="Total API/inference calls per day")
days_year = gr.Number(
value=365, label="Days of operation / year",
info="365 for always-on")
usage_md = gr.Markdown()
gr.Markdown("---")
gr.Markdown("### API Pricing (per 1M tokens)\nSelect models from dropdowns for Providers 1-3. Provider 4 is fully custom.", elem_classes="section-label")
with gr.Row():
with gr.Column():
model_1 = gr.Dropdown(
choices=API_MODELS, value="Claude Sonnet 4.6",
label="Provider 1: Model",
)
price_in_1 = gr.Number(
value=3, label="Provider 1: Input $ / 1M tokens", interactive=False)
price_out_1 = gr.Number(
value=15, label="Provider 1: Output $ / 1M tokens", interactive=False)
with gr.Column():
model_2 = gr.Dropdown(
choices=API_MODELS, value="GPT-5",
label="Provider 2: Model")
price_in_2 = gr.Number(
value=1.25, label="Provider 2: Input $ / 1M tokens", interactive=False)
price_out_2 = gr.Number(
value=10, label="Provider 2: Output $ / 1M tokens", interactive=False)
with gr.Column():
model_3 = gr.Dropdown(
choices=API_MODELS, value="Gemini 2.5 Flash",
label="Provider 3: Model")
price_in_3 = gr.Number(
value=0.15, label="Provider 3: Input $ / 1M tokens", interactive=False)
price_out_3 = gr.Number(
value=0.6, label="Provider 3: Output $ / 1M tokens", interactive=False)
with gr.Column():
model_4_name = gr.Textbox(
value="Custom Provider",
label="Provider 4: Name (custom)",
info="e.g., Together.ai, Groq, Fireworks")
price_in_4 = gr.Number(
value=0.5, label="Provider 4: Input $ / 1M tokens")
price_out_4 = gr.Number(
value=1.5, label="Provider 4: Output $ / 1M tokens")
gr.Markdown("---")
gr.Markdown("### Self-Hosted GPU Parameters\nCloud GPU rental with your own inference stack", elem_classes="section-label")
with gr.Row():
gpu_provider = gr.Dropdown(
choices=["(Custom)"] + GPU_PROVIDERS,
value="(Custom)",
label="GPU provider",
info="Select provider, then pick instance"
)
gpu_instance = gr.Dropdown(
choices=["(Custom)"],
value="(Custom)",
label="GPU instance",
info="Auto-fills hourly cost"
)
gpu_cost_hr = gr.Number(
value=2.5, label="GPU cost / hour ($)",
info="H100: $1.49-$3.90, H200: $2.50-$4.31, B200: $3.75-$5.87")
with gr.Row():
num_gpus = gr.Number(
value=1, label="Number of instances",
info="7B model: 1 GPU. 70B model: 2-4 GPUs")
gpu_util = gr.Slider(
minimum=0, maximum=100, value=70, step=5,
label="GPU utilization (%)",
info="60-80% typical")
gpu_hours = gr.Number(
value=24, label="Hours / day running",
info="24 for always-on")
with gr.Row():
gpu_throughput = gr.Number(
value=2300, label="Throughput (tokens/sec/GPU)",
info="vLLM on H100: ~2300 for 8B model")
sh_sw_cost = gr.Number(
value=2000, label="Software licenses (annual $)",
info="Inference framework, monitoring, etc.")
sh_net_cost = gr.Number(
value=3000, label="Networking (annual $)",
info="Egress, VPN, load balancing")
gr.Markdown("---")
gr.Markdown("### Local / Edge Parameters\nOn-premises or edge deployment with consumer hardware", elem_classes="section-label")
with gr.Row():
hw_cost = gr.Number(
value=1999, label="Hardware purchase cost ($)",
info="One-time CapEx. RTX 5090: $1,999")
num_dev = gr.Number(
value=1, label="Number of devices")
watts = gr.Number(
value=575, label="Power consumption (W / device)",
info="RTX 5090: 575W, M4 Max: ~60W")
elec_rate = gr.Number(
value=0.12, label="Electricity cost ($ / kWh)",
info="Average commercial rate")
with gr.Row():
hw_life = gr.Number(
value=3, label="Hardware lifetime (years)")
local_hours = gr.Number(
value=24, label="Hours / day running")
local_throughput = gr.Number(
value=100, label="Inference throughput (tok/s)",
info="llama.cpp quantized: 50-100 on consumer GPU")
it_support = gr.Number(
value=5000, label="IT support cost (annual $)",
info="Maintenance, updates, monitoring")
# ─────────────────── Tab 2: API Costs ─────────────────────
with gr.Tab("API Costs"):
gr.Markdown("### LLM API Cost Analysis\nCosts for 4 API providers based on your usage inputs", elem_classes="section-label")
api_table = gr.Dataframe(label="API Cost Comparison", interactive=False)
smart_md = gr.Markdown()
api_chart = gr.Plot()
# ─────────────────── Tab 3: Self-Hosted GPU ───────────────
with gr.Tab("Self-Hosted GPU"):
gr.Markdown("### Self-Hosted GPU Cost Analysis\nCloud GPU rental with managed inference infrastructure", elem_classes="section-label")
sh_table = gr.Dataframe(label="Cost Breakdown", interactive=False)
sh_summary = gr.Markdown()
sh_chart = gr.Plot()
# ─────────────────── Tab 4: Local / Edge ──────────────────
with gr.Tab("Local / Edge"):
gr.Markdown("### Local / Edge Deployment Cost Analysis\nOn-premises deployment with consumer or edge hardware", elem_classes="section-label")
le_table = gr.Dataframe(label="Cost Breakdown", interactive=False)
le_summary = gr.Markdown()
le_chart = gr.Plot()
# ─────────────────── Tab 5: Comparison ────────────────────
with gr.Tab("Comparison"):
gr.Markdown("### Side-by-Side Comparison\nAll costs annualized. API (Best Single) = cheapest among your 4 selected providers.", elem_classes="section-label")
comp_summary = gr.Markdown()
comp_table = gr.Dataframe(label="Comparison", interactive=False)
with gr.Row():
comp_annual_chart = gr.Plot()
comp_per_M_chart = gr.Plot()
# ─────────────────── Tab 6: Model Library ─────────────────
with gr.Tab("Model Library"):
gr.Markdown("### Model Library — March 22, 2026 Pricing\nSources: [openai.com/api/pricing](https://openai.com/api/pricing), [docs.anthropic.com](https://docs.anthropic.com/en/docs/about-claude/models), [ai.google.dev](https://ai.google.dev/gemini-api/docs/pricing), [openrouter.ai](https://openrouter.ai)", elem_classes="section-label")
lib_rows = []
for name, m in MODEL_LIBRARY.items():
inp = f"${m['input']}" if m["input"] is not None else "N/A (self-hosted)"
out = f"${m['output']}" if m["output"] is not None else "N/A (self-hosted)"
lib_rows.append([name, m["provider"], inp, out, m["notes"]])
lib_df = pd.DataFrame(
lib_rows,
columns=["Model Name", "Provider", "Input $/M tok",
"Output $/M tok", "Notes"],
)
gr.Dataframe(value=lib_df, label="Model Library", interactive=False)
# ─────────────────── Tab 7: GPU Library ─────────────────
with gr.Tab("GPU Library"):
gr.Markdown("### GPU Instance Library — March 22, 2026 Pricing\nPer-GPU on-demand hourly rates across major cloud providers\n\n**Regions:** AWS us-east-1, GCP us-central1, Azure East US, CoreWeave US-East, Crusoe us-north1, FluidStack US, Lambda US, RunPod US, Together US, Vast.ai US (marketplace)", elem_classes="section-label")
gpu_df = pd.DataFrame([
{"Instance": k, "Provider": v["provider"], "GPU": v["gpu"],
"$/hr": v["cost_hr"], "VRAM (GB)": v["vram_gb"], "Notes": v["notes"]}
for k, v in GPU_LIBRARY.items()
])
gr.Dataframe(value=gpu_df, label="GPU Instance Pricing (March 22, 2026)", interactive=False)
gr.Markdown("*Sources: [aws.amazon.com](https://aws.amazon.com/ec2/pricing/on-demand/), [cloud.google.com](https://cloud.google.com/compute/gpus-pricing), [azure.microsoft.com](https://azure.microsoft.com/en-us/pricing/details/virtual-machines/), [coreweave.com](https://www.coreweave.com/pricing), [crusoe.ai](https://www.crusoe.ai/cloud/pricing), [fluidstack.io](https://www.fluidstack.io/pricing), [lambda.ai](https://lambda.ai/pricing), [runpod.io](https://www.runpod.io/gpu-pricing), [together.ai](https://www.together.ai/pricing), [vast.ai](https://vast.ai)*")
# ─────────────────── Event Wiring ─────────────────────────────
all_inputs = [
input_tpr, output_tpr, req_day, days_year,
model_1, price_in_1, price_out_1,
model_2, price_in_2, price_out_2,
model_3, price_in_3, price_out_3,
model_4_name, price_in_4, price_out_4,
gpu_cost_hr, num_gpus, gpu_util, gpu_hours, gpu_throughput,
sh_sw_cost, sh_net_cost,
hw_cost, num_dev, watts, elec_rate, hw_life,
local_hours, local_throughput, it_support,
]
all_outputs = [
usage_md,
api_table, smart_md, api_chart,
sh_table, sh_summary, sh_chart,
le_table, le_summary, le_chart,
comp_summary, comp_table, comp_annual_chart, comp_per_M_chart,
]
# Model dropdowns: auto-populate prices, then recalculate
for dd, pi, po in [
(model_1, price_in_1, price_out_1),
(model_2, price_in_2, price_out_2),
(model_3, price_in_3, price_out_3),
]:
dd.change(
fn=get_model_prices, inputs=[dd], outputs=[pi, po],
).then(
fn=master_update, inputs=all_inputs, outputs=all_outputs,
)
# GPU provider → update instance list → update price → recalculate
gpu_provider.change(
fn=get_gpu_instances, inputs=[gpu_provider], outputs=[gpu_instance],
).then(
fn=get_gpu_price, inputs=[gpu_instance], outputs=[gpu_cost_hr],
).then(
fn=master_update, inputs=all_inputs, outputs=all_outputs,
)
# GPU instance dropdown: auto-populate cost, then recalculate
gpu_instance.change(
fn=get_gpu_price, inputs=[gpu_instance], outputs=[gpu_cost_hr],
).then(
fn=master_update, inputs=all_inputs, outputs=all_outputs,
)
# All non-dropdown inputs trigger recalculation
change_inputs = [
input_tpr, output_tpr, req_day, days_year,
model_4_name, price_in_4, price_out_4,
gpu_cost_hr, num_gpus, gpu_util, gpu_hours, gpu_throughput,
sh_sw_cost, sh_net_cost,
hw_cost, num_dev, watts, elec_rate, hw_life,
local_hours, local_throughput, it_support,
]
for inp in change_inputs:
inp.change(fn=master_update, inputs=all_inputs, outputs=all_outputs)
# Populate all outputs on initial page load
demo.load(fn=master_update, inputs=all_inputs, outputs=all_outputs)
# Gradio 6+ passes theme/css/head via launch() instead of Blocks()
launch_kwargs = {}
if gradio_major >= 6:
launch_kwargs = {"theme": theme, "css": CSS, "head": THEME_HEAD}
return demo, launch_kwargs
# ═════════════════════════════════════════════════════════════════════════════
# LAUNCH
# ═════════════════════════════════════════════════════════════════════════════
if __name__ == "__main__":
demo, launch_kwargs = build_app()
demo.launch(**launch_kwargs)