Spaces:

Sathya77
/

Dense-Iso-ViT-SR

Running

App Files Files Community

SathyaSantosh77 commited on 10 days ago

Commit

a043ce4

1 Parent(s): 66e1339

fix zerogpu device handling

Browse files

Files changed (1) hide show

app.py +12 -30

app.py CHANGED Viewed

@@ -182,30 +182,23 @@ def ssim(pred, target, window_size=11):
 # ── Load model ────────────────────────────────────────────────────────────────
-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-model = ImageSRTransformer().to(DEVICE)
 checkpoint = torch.load(
     "sr_best_v4_resumed.pt",
-    map_location=DEVICE,
     weights_only=False
 )
 model.load_state_dict(checkpoint["model_state_dict"])
 model.eval()
-print(f"Model loaded — device: {DEVICE}")
-print(f"Best val PSNR: {checkpoint['val_psnr']:.2f} dB")
 # ── Inference ─────────────────────────────────────────────────────────────────
 @spaces.GPU
 def run_sr(img_pil):
-    """
-    Takes any PIL image.
-    1. Crops centre 256×256 → ground truth
-    2. Bicubic downscale to 64×64 → LR
-    3. Runs Dense-Iso-ViT SR
-    4. Returns (lr_display, sr_output, ground_truth, metrics_str)
-    """
     w, h = img_pil.size
     if w < 256 or h < 256:
         scale = max(256 / w, 256 / h)
@@ -213,42 +206,33 @@ def run_sr(img_pil):
             (int(w * scale), int(h * scale)), Image.BICUBIC)
         w, h = img_pil.size
-    # centre crop 256×256
     left = (w - 256) // 2
     top  = (h - 256) // 2
     gt   = img_pil.crop((left, top, left + 256, top + 256))
-    # LR — bicubic 64×64
     lr   = gt.resize((64, 64), Image.BICUBIC)
-    # tensors
-    lr_t  = TF.to_tensor(lr).unsqueeze(0).to(DEVICE)
-    gt_t  = TF.to_tensor(gt).unsqueeze(0).to(DEVICE)
     with torch.no_grad():
-        with torch.autocast(device_type=DEVICE, dtype=torch.bfloat16):
             sr_t = model(lr_t)
         sr_t = sr_t.float().clamp(0, 1)
-    # LR display — bilinear upscale to 256 for side-by-side
     lr_display_t = F.interpolate(
         lr_t.float(), size=(256, 256),
         mode="bilinear", align_corners=False)
-    # metrics — LR baseline vs SR
     psnr_lr = psnr(lr_display_t, gt_t).item()
     ssim_lr = ssim(lr_display_t, gt_t)
     psnr_sr = psnr(sr_t, gt_t).item()
     ssim_sr = ssim(sr_t, gt_t)
-    # to PIL
     def to_pil(t):
         return TF.to_pil_image(t.squeeze(0).cpu())
-    lr_img = to_pil(lr_display_t)
-    sr_img = to_pil(sr_t)
-    gt_img = gt
     metrics = (
         f"**LR baseline** — PSNR: {psnr_lr:.2f} dB | SSIM: {ssim_lr:.4f}\n\n"
         f"**SR output** — PSNR: {psnr_sr:.2f} dB | SSIM: {ssim_sr:.4f}\n\n"
@@ -256,7 +240,7 @@ def run_sr(img_pil):
         f"ΔSSIM: +{ssim_sr - ssim_lr:.4f}"
     )
-    return lr_img, sr_img, gt_img, metrics
 # ── Example images ────────────────────────────────────────────────────────────
@@ -383,7 +367,6 @@ body, .gradio-container {
 # ── Architecture info (collapsible) ──────────────────────────────────────────
 ARCH_INFO = """
-## Dense-Iso-ViT core claim
 > "Isotropic constant-resolution hierarchical ViT with inter-stage dense feature aggregation — eliminating spatial bottlenecks while preserving coordinate integrity throughout all processing stages."
@@ -405,7 +388,6 @@ DRCT uses spatial downsampling and local block-level residuals. Dense-Iso-ViT ma
 ### Results
 | Benchmark | PSNR | SSIM |
 |-----------|------|------|
-| LSDIR test | 24.11 dB | — |
 | DIV2K validation | 25.20 dB | 0.8298 |
 """

 # ── Load model ────────────────────────────────────────────────────────────────
+model = ImageSRTransformer()
 checkpoint = torch.load(
     "sr_best_v4_resumed.pt",
+    map_location="cpu",
     weights_only=False
 )
 model.load_state_dict(checkpoint["model_state_dict"])
 model.eval()
+print(f"Model loaded — val PSNR: {checkpoint['val_psnr']:.2f} dB")
 # ── Inference ─────────────────────────────────────────────────────────────────
 @spaces.GPU
 def run_sr(img_pil):
+    device = "cuda" if torch.cuda.is_available() else "cpu"
+    model.to(device)
     w, h = img_pil.size
     if w < 256 or h < 256:
         scale = max(256 / w, 256 / h)
             (int(w * scale), int(h * scale)), Image.BICUBIC)
         w, h = img_pil.size
     left = (w - 256) // 2
     top  = (h - 256) // 2
     gt   = img_pil.crop((left, top, left + 256, top + 256))
     lr   = gt.resize((64, 64), Image.BICUBIC)
+    lr_t = TF.to_tensor(lr).unsqueeze(0).to(device)
+    gt_t = TF.to_tensor(gt).unsqueeze(0).to(device)
     with torch.no_grad():
+        with torch.autocast(device_type="cuda",
+                            dtype=torch.bfloat16,
+                            enabled=(device == "cuda")):
             sr_t = model(lr_t)
         sr_t = sr_t.float().clamp(0, 1)
     lr_display_t = F.interpolate(
         lr_t.float(), size=(256, 256),
         mode="bilinear", align_corners=False)
     psnr_lr = psnr(lr_display_t, gt_t).item()
     ssim_lr = ssim(lr_display_t, gt_t)
     psnr_sr = psnr(sr_t, gt_t).item()
     ssim_sr = ssim(sr_t, gt_t)
     def to_pil(t):
         return TF.to_pil_image(t.squeeze(0).cpu())
     metrics = (
         f"**LR baseline** — PSNR: {psnr_lr:.2f} dB | SSIM: {ssim_lr:.4f}\n\n"
         f"**SR output** — PSNR: {psnr_sr:.2f} dB | SSIM: {ssim_sr:.4f}\n\n"
         f"ΔSSIM: +{ssim_sr - ssim_lr:.4f}"
     )
+    return to_pil(lr_display_t), to_pil(sr_t), gt, metrics
 # ── Example images ────────────────────────────────────────────────────────────
 # ── Architecture info (collapsible) ──────────────────────────────────────────
 ARCH_INFO = """
 > "Isotropic constant-resolution hierarchical ViT with inter-stage dense feature aggregation — eliminating spatial bottlenecks while preserving coordinate integrity throughout all processing stages."
 ### Results
 | Benchmark | PSNR | SSIM |
 |-----------|------|------|
 | DIV2K validation | 25.20 dB | 0.8298 |
 """