tanishq74
/

retinasense-vit

+#!/usr/bin/env python3
+"""
+Threshold Optimization for RetinaSense v2
+==========================================
+Optimizes classification thresholds per class to maximize F1 scores.
+Current model has AUC=0.91 but uses fixed argmax decision.
+With class imbalance, per-class thresholds can significantly improve performance.
+"""
+import torch
+import torch.nn as nn
+import torchvision.models as models
+from torch.utils.data import Dataset, DataLoader
+import numpy as np
+import pandas as pd
+from pathlib import Path
+import json
+from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, roc_auc_score
+import matplotlib.pyplot as plt
+import seaborn as sns
+from tqdm import tqdm
+import warnings
+warnings.filterwarnings('ignore')
+# Device
+device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
+print(f"🔧 Using device: {device}")
+# Paths
+DATA_DIR = Path('./data')
+CACHE_DIR = Path('./preprocessed_cache')
+MODEL_PATH = Path('./outputs_v2/best_model.pth')
+OUTPUT_DIR = Path('./outputs_v2')
+OUTPUT_DIR.mkdir(exist_ok=True)
+# Config
+BATCH_SIZE = 64
+NUM_WORKERS = 8
+IMG_SIZE = 300
+# Class names
+DISEASE_CLASSES = ['Normal', 'Diabetes/DR', 'Glaucoma', 'Cataract', 'AMD']
+class CachedDataset(Dataset):
+    """Dataset that loads pre-cached preprocessed images"""
+    def __init__(self, csv_path, cache_dir, mode='train'):
+        self.cache_dir = Path(cache_dir)
+        self.mode = mode
+        # Load CSV
+        df = pd.read_csv(csv_path)
+        # Split train/val
+        val_size = int(0.15 * len(df))
+        if mode == 'train':
+            self.df = df.iloc[val_size:].reset_index(drop=True)
+        else:
+            self.df = df.iloc[:val_size].reset_index(drop=True)
+        print(f"📊 {mode.upper()} set: {len(self.df)} samples")
+    def __len__(self):
+        return len(self.df)
+    def __getitem__(self, idx):
+        row = self.df.iloc[idx]
+        img_id = row['image_id']
+        # Load cached image
+        cache_path = self.cache_dir / f"{img_id}.npy"
+        img = np.load(cache_path)
+        # Convert to tensor
+        img = torch.from_numpy(img).float()
+        # Labels
+        disease = int(row['disease_label'])
+        severity = int(row['severity_label']) if 'severity_label' in row else 0
+        return img, disease, severity, img_id
+class MultiTaskModel(nn.Module):
+    """Multi-task model for disease classification + severity grading"""
+    def __init__(self, num_disease_classes=5, num_severity_classes=5, dropout=0.4):
+        super().__init__()
+        # Load EfficientNet-B3 backbone
+        backbone = models.efficientnet_b3(weights='IMAGENET1K_V1')
+        self.backbone = nn.Sequential(*list(backbone.children())[:-1])
+        # Feature dimension
+        self.feature_dim = 1536
+        # Global pooling and dropout
+        self.pool = nn.AdaptiveAvgPool2d(1)
+        self.dropout = nn.Dropout(dropout)
+        # Disease classification head
+        self.disease_head = nn.Sequential(
+            nn.Linear(1536, 512),
+            nn.BatchNorm1d(512),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(512, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.2),
+            nn.Linear(256, num_disease_classes)
+        )
+        # Severity grading head (simpler than disease head)
+        self.severity_head = nn.Sequential(
+            nn.Linear(1536, 256),
+            nn.BatchNorm1d(256),
+            nn.ReLU(),
+            nn.Dropout(0.3),
+            nn.Linear(256, num_severity_classes)
+        )
+    def forward(self, x):
+        # Extract features
+        features = self.backbone(x)
+        features = self.pool(features)
+        features = features.flatten(1)
+        features = self.dropout(features)
+        # Predictions
+        disease_logits = self.disease_head(features)
+        severity_logits = self.severity_head(features)
+        return disease_logits, severity_logits
+def load_model():
+    """Load trained model from checkpoint"""
+    print(f"📥 Loading model from {MODEL_PATH}")
+    model = MultiTaskModel(num_disease_classes=5, num_severity_classes=5, dropout=0.4)
+    checkpoint = torch.load(MODEL_PATH, map_location=device, weights_only=False)
+    model.load_state_dict(checkpoint['model_state_dict'])
+    model = model.to(device)
+    model.eval()
+    epoch = checkpoint.get('epoch', 'unknown')
+    val_acc = checkpoint.get('val_acc', 0)
+    val_f1 = checkpoint.get('val_macro_f1', checkpoint.get('val_f1', 0))
+    print(f"✅ Loaded model from epoch {epoch}")
+    if val_acc > 0:
+        print(f"   Val Acc: {val_acc:.2f}%, Macro F1: {val_f1:.3f}")
+    return model
+def get_predictions(model, dataloader):
+    """Get all predictions and ground truth labels"""
+    print("🔮 Getting predictions on validation set...")
+    all_probs = []
+    all_labels = []
+    all_ids = []
+    with torch.no_grad():
+        for imgs, diseases, severities, img_ids in tqdm(dataloader, desc="Predicting"):
+            imgs = imgs.to(device, non_blocking=True)
+            # Get predictions
+            disease_logits, _ = model(imgs)
+            probs = torch.softmax(disease_logits, dim=1)
+            all_probs.append(probs.cpu().numpy())
+            all_labels.append(diseases.numpy())
+            all_ids.extend(img_ids)
+    all_probs = np.vstack(all_probs)
+    all_labels = np.concatenate(all_labels)
+    print(f"✅ Got predictions for {len(all_labels)} samples")
+    print(f"   Probability shape: {all_probs.shape}")
+    return all_probs, all_labels, all_ids
+def find_optimal_threshold_ovr(y_true, y_probs, class_idx):
+    """
+    Find optimal threshold for one-vs-rest using Youden's J statistic
+    Args:
+        y_true: Ground truth labels (n_samples,)
+        y_probs: Predicted probabilities for this class (n_samples,)
+        class_idx: Index of the class
+    Returns:
+        best_threshold, best_f1
+    """
+    # Convert to binary (one-vs-rest)
+    y_binary = (y_true == class_idx).astype(int)
+    # Try thresholds from 0.1 to 0.9
+    thresholds = np.arange(0.1, 0.91, 0.01)
+    best_f1 = 0
+    best_threshold = 0.5
+    for thresh in thresholds:
+        y_pred = (y_probs >= thresh).astype(int)
+        # Calculate F1 (handle zero division)
+        try:
+            f1 = f1_score(y_binary, y_pred, zero_division=0)
+            if f1 > best_f1:
+                best_f1 = f1
+                best_threshold = thresh
+        except:
+            continue
+    return best_threshold, best_f1
+def optimize_thresholds(y_true, y_probs):
+    """
+    Optimize thresholds for all classes using one-vs-rest approach
+    Returns:
+        optimal_thresholds: dict mapping class_idx -> threshold
+    """
+    print("🎯 Optimizing thresholds per class...")
+    optimal_thresholds = {}
+    for class_idx in range(5):
+        class_name = DISEASE_CLASSES[class_idx]
+        class_probs = y_probs[:, class_idx]
+        # Find optimal threshold
+        best_thresh, best_f1 = find_optimal_threshold_ovr(y_true, class_probs, class_idx)
+        optimal_thresholds[class_idx] = best_thresh
+        # Count samples
+        n_samples = (y_true == class_idx).sum()
+        print(f"   {class_name:15s}: threshold={best_thresh:.3f}, F1={best_f1:.3f}, n={n_samples}")
+    return optimal_thresholds
+def predict_with_thresholds(y_probs, thresholds):
+    """
+    Make predictions using optimized thresholds
+    Strategy: For each sample, take the class with highest probability
+    if it exceeds its threshold. Otherwise, predict the most likely class.
+    """
+    n_samples = y_probs.shape[0]
+    predictions = np.zeros(n_samples, dtype=int)
+    for i in range(n_samples):
+        probs = y_probs[i]
+        # Get class with max probability
+        max_class = np.argmax(probs)
+        max_prob = probs[max_class]
+        # Check if it exceeds threshold
+        if max_prob >= thresholds[max_class]:
+            predictions[i] = max_class
+        else:
+            # Try other classes in order of probability
+            sorted_classes = np.argsort(probs)[::-1]
+            assigned = False
+            for cls in sorted_classes:
+                if probs[cls] >= thresholds[cls]:
+                    predictions[i] = cls
+                    assigned = True
+                    break
+            # If no class exceeds threshold, fall back to max probability
+            if not assigned:
+                predictions[i] = max_class
+    return predictions
+def evaluate(y_true, y_pred, y_probs, title="Evaluation"):
+    """Comprehensive evaluation with all metrics"""
+    print(f"\n{'='*50}")
+    print(f"{title}")
+    print(f"{'='*50}")
+    # Overall metrics
+    accuracy = (y_true == y_pred).mean() * 100
+    macro_f1 = f1_score(y_true, y_pred, average='macro', zero_division=0)
+    weighted_f1 = f1_score(y_true, y_pred, average='weighted', zero_division=0)
+    print(f"Accuracy: {accuracy:.2f}%")
+    print(f"Macro F1: {macro_f1:.3f}")
+    print(f"Weighted F1: {weighted_f1:.3f}")
+    # AUC-ROC
+    try:
+        auc = roc_auc_score(y_true, y_probs, multi_class='ovr', average='macro')
+        print(f"Macro AUC-ROC: {auc:.3f}")
+    except:
+        auc = 0.0
+        print("AUC-ROC: N/A")
+    # Per-class metrics
+    print(f"\n{'Class':<15} {'F1':>6} {'Prec':>6} {'Rec':>6} {'Supp':>6}")
+    print("-" * 50)
+    f1_scores = f1_score(y_true, y_pred, average=None, zero_division=0)
+    precisions = precision_score(y_true, y_pred, average=None, zero_division=0)
+    recalls = recall_score(y_true, y_pred, average=None, zero_division=0)
+    per_class_results = {}
+    for i, class_name in enumerate(DISEASE_CLASSES):
+        support = (y_true == i).sum()
+        per_class_results[class_name] = {
+            'f1': f1_scores[i],
+            'precision': precisions[i],
+            'recall': recalls[i],
+            'support': int(support)
+        }
+        print(f"{class_name:<15} {f1_scores[i]:>6.3f} {precisions[i]:>6.3f} {recalls[i]:>6.3f} {support:>6d}")
+    return {
+        'accuracy': accuracy,
+        'macro_f1': macro_f1,
+        'weighted_f1': weighted_f1,
+        'auc': auc,
+        'per_class': per_class_results,
+        'confusion_matrix': confusion_matrix(y_true, y_pred).tolist()
+    }
+def plot_comparison(results_baseline, results_optimized, optimal_thresholds, output_path):
+    """Plot before/after comparison"""
+    fig, axes = plt.subplots(2, 2, figsize=(14, 10))
+    # F1 scores comparison
+    ax = axes[0, 0]
+    classes = DISEASE_CLASSES
+    baseline_f1 = [results_baseline['per_class'][c]['f1'] for c in classes]
+    optimized_f1 = [results_optimized['per_class'][c]['f1'] for c in classes]
+    x = np.arange(len(classes))
+    width = 0.35
+    ax.bar(x - width/2, baseline_f1, width, label='Baseline (argmax)', alpha=0.8)
+    ax.bar(x + width/2, optimized_f1, width, label='Optimized thresholds', alpha=0.8)
+    ax.set_ylabel('F1 Score')
+    ax.set_title('Per-Class F1 Score Comparison')
+    ax.set_xticks(x)
+    ax.set_xticklabels(classes, rotation=45, ha='right')
+    ax.legend()
+    ax.grid(axis='y', alpha=0.3)
+    # Overall metrics comparison
+    ax = axes[0, 1]
+    metrics = ['Accuracy', 'Macro F1', 'Weighted F1', 'AUC-ROC']
+    baseline_vals = [
+        results_baseline['accuracy']/100,
+        results_baseline['macro_f1'],
+        results_baseline['weighted_f1'],
+        results_baseline['auc']
+    ]
+    optimized_vals = [
+        results_optimized['accuracy']/100,
+        results_optimized['macro_f1'],
+        results_optimized['weighted_f1'],
+        results_optimized['auc']
+    ]
+    x = np.arange(len(metrics))
+    ax.bar(x - width/2, baseline_vals, width, label='Baseline', alpha=0.8)
+    ax.bar(x + width/2, optimized_vals, width, label='Optimized', alpha=0.8)
+    ax.set_ylabel('Score')
+    ax.set_title('Overall Metrics Comparison')
+    ax.set_xticks(x)
+    ax.set_xticklabels(metrics, rotation=45, ha='right')
+    ax.legend()
+    ax.set_ylim([0, 1])
+    ax.grid(axis='y', alpha=0.3)
+    # Optimal thresholds
+    ax = axes[1, 0]
+    thresholds_list = [optimal_thresholds[i] for i in range(5)]
+    bars = ax.bar(classes, thresholds_list, alpha=0.8, color='steelblue')
+    # Add default threshold line
+    ax.axhline(y=0.5, color='red', linestyle='--', label='Default (0.5)', alpha=0.5)
+    ax.set_ylabel('Optimal Threshold')
+    ax.set_title('Optimized Thresholds per Class')
+    ax.set_xticklabels(classes, rotation=45, ha='right')
+    ax.legend()
+    ax.set_ylim([0, 1])
+    ax.grid(axis='y', alpha=0.3)
+    # Add threshold values on bars
+    for bar, thresh in zip(bars, thresholds_list):
+        height = bar.get_height()
+        ax.text(bar.get_x() + bar.get_width()/2., height,
+                f'{thresh:.2f}',
+                ha='center', va='bottom', fontsize=9)
+    # Improvement heatmap
+    ax = axes[1, 1]
+    improvements = []
+    for class_name in classes:
+        baseline = results_baseline['per_class'][class_name]['f1']
+        optimized = results_optimized['per_class'][class_name]['f1']
+        improvement = optimized - baseline
+        improvements.append(improvement)
+    colors = ['red' if x < 0 else 'green' for x in improvements]
+    bars = ax.barh(classes, improvements, color=colors, alpha=0.7)
+    ax.axvline(x=0, color='black', linestyle='-', linewidth=0.8)
+    ax.set_xlabel('F1 Score Change')
+    ax.set_title('Per-Class F1 Improvement')
+    ax.grid(axis='x', alpha=0.3)
+    # Add values
+    for i, (bar, val) in enumerate(zip(bars, improvements)):
+        x_pos = val + (0.01 if val > 0 else -0.01)
+        ha = 'left' if val > 0 else 'right'
+        ax.text(x_pos, i, f'{val:+.3f}', va='center', ha=ha, fontsize=9)
+    plt.tight_layout()
+    plt.savefig(output_path, dpi=150, bbox_inches='tight')
+    print(f"📊 Comparison plot saved to {output_path}")
+def main():
+    print("🎯 Threshold Optimization for RetinaSense v2")
+    print("=" * 50)
+    # Load model
+    model = load_model()
+    # Load validation data
+    val_dataset = CachedDataset(
+        csv_path=DATA_DIR / 'train_processed.csv',
+        cache_dir=CACHE_DIR,
+        mode='val'
+    )
+    val_loader = DataLoader(
+        val_dataset,
+        batch_size=BATCH_SIZE,
+        shuffle=False,
+        num_workers=NUM_WORKERS,
+        pin_memory=True,
+        persistent_workers=True
+    )
+    # Get predictions
+    y_probs, y_true, img_ids = get_predictions(model, val_loader)
+    # Baseline: argmax predictions
+    y_pred_baseline = np.argmax(y_probs, axis=1)
+    # Evaluate baseline
+    print("\n" + "="*50)
+    print("BASELINE EVALUATION (argmax)")
+    print("="*50)
+    results_baseline = evaluate(y_true, y_pred_baseline, y_probs, "Baseline")
+    # Optimize thresholds
+    print("\n" + "="*50)
+    print("THRESHOLD OPTIMIZATION")
+    print("="*50)
+    optimal_thresholds = optimize_thresholds(y_true, y_probs)
+    # Predict with optimized thresholds
+    y_pred_optimized = predict_with_thresholds(y_probs, optimal_thresholds)
+    # Evaluate optimized
+    print("\n" + "="*50)
+    print("OPTIMIZED EVALUATION")
+    print("="*50)
+    results_optimized = evaluate(y_true, y_pred_optimized, y_probs, "Optimized")
+    # Save results
+    results = {
+        'optimal_thresholds': optimal_thresholds,
+        'baseline': results_baseline,
+        'optimized': results_optimized
+    }
+    output_json = OUTPUT_DIR / 'threshold_optimization_results.json'
+    with open(output_json, 'w') as f:
+        json.dump(results, f, indent=2)
+    print(f"\n✅ Results saved to {output_json}")
+    # Plot comparison
+    plot_path = OUTPUT_DIR / 'threshold_comparison.png'
+    plot_comparison(results_baseline, results_optimized, optimal_thresholds, plot_path)
+    # Summary
+    print("\n" + "="*50)
+    print("SUMMARY")
+    print("="*50)
+    print(f"Baseline Macro F1:   {results_baseline['macro_f1']:.3f}")
+    print(f"Optimized Macro F1:  {results_optimized['macro_f1']:.3f}")
+    print(f"Improvement:         {results_optimized['macro_f1'] - results_baseline['macro_f1']:+.3f}")
+    print(f"\nBaseline Accuracy:   {results_baseline['accuracy']:.2f}%")
+    print(f"Optimized Accuracy:  {results_optimized['accuracy']:.2f}%")
+    print(f"Improvement:         {results_optimized['accuracy'] - results_baseline['accuracy']:+.2f}%")
+    print("\n✅ Threshold optimization complete!")
+    print(f"📁 Results saved to {OUTPUT_DIR}/")
+if __name__ == '__main__':
+    main()