import io import pandas as pd import torch import torch.nn as nn import timm from PIL import Image from torchvision import transforms from sklearn.preprocessing import PowerTransformer class OneHotEncoder: def __init__(self, all_category_list, all_ingredient_list): self.all_food_categories = sorted([*set(all_category_list)]) self.all_ingredients = sorted([*set(all_ingredient_list)]) self.int_to_category = {i: name for i, name in enumerate(self.all_food_categories)} self.int_to_ingredient = {i: name for i, name in enumerate(self.all_ingredients)} class CoAtNetModelPytorch(nn.Module): def __init__(self, num_categories, num_ingredients): super().__init__() self.coatnet = timm.create_model('timm/coatnet_3_rw_224.sw_in12k', pretrained=False, num_classes=0) coatnet_output_dim = self.coatnet.num_features self.shared_layers = nn.Sequential( nn.Linear(coatnet_output_dim, 1024), nn.GELU(), nn.BatchNorm1d(1024), nn.Dropout(0.3) ) self.category_head = nn.Sequential( nn.Linear(1024, 256), nn.GELU(), nn.BatchNorm1d(256), nn.Linear(256, num_categories) ) self.ingredients_head = nn.Sequential( nn.Linear(1024, 512), nn.GELU(), nn.BatchNorm1d(512), nn.Linear(512, num_ingredients) ) def make_regression_head(): return nn.Sequential( nn.Linear(1024, 64), nn.GELU(), nn.BatchNorm1d(64), nn.Linear(64, 32), nn.GELU(), nn.Linear(32, 1) ) self.calorie_head, self.carbs_head = make_regression_head(), make_regression_head() self.protein_head, self.fat_head = make_regression_head(), make_regression_head() def forward(self, x): features = self.coatnet(x) shared_output = self.shared_layers(features) return { "category_output": self.category_head(shared_output), "ingredients_output": self.ingredients_head(shared_output), "calorie_output": self.calorie_head(shared_output).squeeze(-1), "carbs_output": self.carbs_head(shared_output).squeeze(-1), "protein_output": self.protein_head(shared_output).squeeze(-1), "fat_output": self.fat_head(shared_output).squeeze(-1), } class Predictor: def __init__(self, model_path, metadata_path): self.device = "cpu" self.confidence_threshold = 0.75 metadata = pd.read_csv(metadata_path, sep="\t").dropna(subset=["Komposisi"]) metadata = metadata[metadata["Komposisi"].str.strip() != ""] all_categories = sorted(metadata["Kategori"].unique().tolist()) unique_ingredients = {i.strip() for sl in metadata["Komposisi"] if isinstance(sl, str) for i in sl.split(",")} self.one_hot_encoder = OneHotEncoder(all_categories, list(unique_ingredients)) num_categories = len(all_categories) num_ingredients = len(unique_ingredients) self.model = CoAtNetModelPytorch(num_categories=num_categories, num_ingredients=num_ingredients) self.model.load_state_dict(torch.load(model_path, map_location=self.device, weights_only=True)) self.model.to(self.device) self.model.eval() self.fitted_transformers = {} nutrisi_cols = ['Kalori(kcal)', 'Karbohidrat(g)', 'Protein(g)', 'Lemak(g)'] for col in nutrisi_cols: power_transformer = PowerTransformer(method='yeo-johnson', standardize=False) data_to_transform = metadata[col].dropna().values.reshape(-1, 1) power_transformer.fit(data_to_transform) tfrecord_col_name = col.split('(')[0].lower() self.fitted_transformers[tfrecord_col_name] = power_transformer self.data_transform = transforms.Compose([ transforms.Resize((224, 224)), transforms.ToTensor(), transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]) ]) def predict(self, image_bytes): image = Image.open(io.BytesIO(image_bytes)).convert("RGB") image_tensor = self.data_transform(image).unsqueeze(0).to(self.device) with torch.no_grad(): outputs = self.model(image_tensor) category_probs = torch.softmax(outputs["category_output"], dim=1) max_prob, category_idx = torch.max(category_probs, dim=1) if max_prob.item() < self.confidence_threshold: return { "food_name": "Tidak Dikenali", "details": "Tingkat kepercayaan prediksi di bawah 80%.", "nutrition": None, "ingredients": "" } raw_name = self.one_hot_encoder.int_to_category.get(category_idx.item(), "Tidak Dikenali") food_name = raw_name.replace("_", " ").title() ingredient_probs = torch.sigmoid(outputs["ingredients_output"]) ingredient_indices = (ingredient_probs > 0.60).nonzero(as_tuple=True)[1] ingredients_list = [self.one_hot_encoder.int_to_ingredient.get(i.item(), "") for i in ingredient_indices[:8]] ingredients = ", ".join(filter(None, ingredients_list)) or "Tidak ada komposisi yang dominan (>80%)" def inverse_transform_value(value_tensor, name): value_np = value_tensor.cpu().numpy().reshape(-1, 1) transformed = self.fitted_transformers[name].inverse_transform(value_np)[0][0] return max(0, round(float(transformed), 2)) return { "food_name": food_name, "nutrition": { "calories": inverse_transform_value(outputs["calorie_output"], 'kalori'), "carbohydrates": inverse_transform_value(outputs["carbs_output"], 'karbohidrat'), "protein": inverse_transform_value(outputs["protein_output"], 'protein'), "fat": inverse_transform_value(outputs["fat_output"], 'lemak'), }, "ingredients": ingredients }