"""FraudSentinel Tier-1 inference helper. Loads the trained LightGBM scorers and returns a risk score + a routing decision (whether to escalate the case to the Tier-2 LLM for explanation). Usage: from infer import CardScorer, AMLScorer cs = CardScorer() # loads cc_lgbm_model.txt + preproc out = cs.score(transaction_dict) # -> {"risk": 0.97, "route_to_llm": True} """ import json import numpy as np import pandas as pd import lightgbm as lgb import joblib # ---------------- Card fraud scorer ---------------- class CardScorer: def __init__(self, model="cc_lgbm_model.txt", preproc="cc_lgbm_preproc.joblib", metrics="cc_lgbm_metrics.json"): self.model = lgb.Booster(model_file=model) pp = joblib.load(preproc) self.cat_p95 = pp["cat_p95"]; self.cat_rate = pp["cat_rate"] self.features = pp["features"]; self.cat_cols = pp["cat_cols"] self.threshold = json.load(open(metrics))["routing_threshold"] def _row(self, t): import math R = 6371.0 def hav(a, b, c, d): p1, p2 = math.radians(a), math.radians(c) dphi = math.radians(c-a); dl = math.radians(d-b) x = math.sin(dphi/2)**2 + math.cos(p1)*math.cos(p2)*math.sin(dl/2)**2 return 2*R*math.asin(min(1, math.sqrt(x))) dt = pd.to_datetime(t["trans_date_trans_time"]) amt = float(t["amt"]); cat = t["category"] p95 = self.cat_p95.get(cat, np.median(list(self.cat_p95.values()))) f = { "log_amt": np.log1p(amt), "amt": amt, "hour": dt.hour, "dow": dt.dayofweek, "is_night": int(dt.hour >= 22 or dt.hour <= 4), "age": (dt - pd.to_datetime(t["dob"])).days/365.25, "geo_km": hav(t["lat"], t["long"], t["merch_lat"], t["merch_long"]), "log_city_pop": np.log1p(t.get("city_pop", 0)), "mins_since_last": t.get("mins_since_last", 99999), "tx_24h": t.get("tx_24h", 0), "amt_24h": t.get("amt_24h", 0.0), "tx_1h": t.get("tx_1h", 0), "amt_over_p95": int(amt > p95), "amt_to_p95": amt/(p95+1e-6), "cat_fraud_rate": self.cat_rate.get(cat, 0.0), "category": cat, "gender": t.get("gender"), "state": t.get("state"), } df = pd.DataFrame([f]) for c in self.cat_cols: df[c] = df[c].astype("category") return df[self.features] def score(self, t): r = float(self.model.predict(self._row(t))[0]) return {"risk": r, "route_to_llm": r >= self.threshold, "tier": "card"} # ---------------- AML scorer (recall-oriented pre-filter) ---------------- class AMLScorer: def __init__(self, model="aml_lgbm_model.txt", preproc="aml_lgbm_preproc.joblib", metrics="aml_lgbm_metrics.json"): self.model = lgb.Booster(model_file=model) pp = joblib.load(preproc) self.g = pp["graph"]; self.features = pp["features"]; self.cat_cols = pp["cat_cols"] self.threshold = json.load(open(metrics))["routing_threshold"] def _row(self, t): ts = pd.to_datetime(t["Timestamp"], format="%Y/%m/%d %H:%M", errors="coerce") snd, rcv = t["Account"], t["Account.1"] paid = float(t["Amount Paid"]); recv = float(t["Amount Received"]) gm = self.g f = { "hour": ts.hour if pd.notna(ts) else 0, "dow": ts.dayofweek if pd.notna(ts) else 0, "log_paid": np.log1p(paid), "log_recv": np.log1p(recv), "amt_diff": abs(paid-recv), "ccy_mismatch": int(t["Receiving Currency"] != t["Payment Currency"]), "self_loop": int(snd == rcv), "is_round": int(paid % 100 == 0), "same_bank": int(t["From Bank"] == t["To Bank"]), "snd_out_deg": gm["out_deg"].get(snd, 0), "snd_in_deg": gm["in_deg"].get(snd, 0), "snd_out_cnt": gm["out_cnt"].get(snd, 0), "snd_in_cnt": gm["in_cnt"].get(snd, 0), "snd_out_amt_mean": gm["out_amt_mean"].get(snd, 0.0), "rcv_out_deg": gm["out_deg"].get(rcv, 0), "rcv_in_deg": gm["in_deg"].get(rcv, 0), "rcv_in_cnt": gm["in_cnt"].get(rcv, 0), "gather_scatter": int(gm["in_deg"].get(snd, 0) >= 5 and gm["out_deg"].get(snd, 0) >= 5), "amt_to_snd_mean": paid/(gm["out_amt_mean"].get(snd, 0.0)+1e-6), "Receiving Currency": t["Receiving Currency"], "Payment Currency": t["Payment Currency"], "Payment Format": t["Payment Format"], } df = pd.DataFrame([f]) for c in self.cat_cols: df[c] = df[c].astype("category") return df[self.features] def score(self, t): r = float(self.model.predict(self._row(t))[0]) return {"risk": r, "route_to_llm": r >= self.threshold, "tier": "aml", "note": "recall-oriented pre-filter; use GNN for precision"} if __name__ == "__main__": cs = CardScorer() demo = {"trans_date_trans_time": "2020-06-10 02:14:00", "amt": 980.0, "category": "shopping_net", "lat": 40.0, "long": -75.0, "merch_lat": 48.0, "merch_long": 2.0, "dob": "1985-01-01", "city_pop": 50000, "gender": "M", "state": "PA", "tx_24h": 5, "amt_24h": 3200.0, "tx_1h": 3, "mins_since_last": 4} print("CARD demo:", cs.score(demo))