Initial release of MedPMC-CLIP

Files changed (5) hide show

.gitattributes +1 -0
README.md +70 -0
inference_example.py +40 -0
open_clip_pytorch_model.safetensors +3 -0
requirements.txt +4 -0

.gitattributes ADDED Viewed

	@@ -0,0 +1 @@


1	+ *.safetensors filter=lfs diff=lfs merge=lfs -text

README.md ADDED Viewed

	@@ -0,0 +1,70 @@

+---
+library_name: open_clip
+tags:
+- clip
+- openclip
+- medical
+- biomedical
+- vision-language
+- image-text-retrieval
+- medpmc
+---
+# MedPMC-CLIP
+MedPMC-CLIP is a medical vision-language model based on the OpenCLIP `ViT-L-14` architecture.
+This repository provides the checkpoint in **OpenCLIP format**. Text inputs should be tokenized using the default OpenCLIP tokenizer for `ViT-L-14`.
+```python
+tokenizer = open_clip.get_tokenizer("ViT-L-14")
+```
+## Files
+- `open_clip_pytorch_model.safetensors`: OpenCLIP-format model checkpoint
+- `inference_example.py`: example code for image-text similarity
+- `export_meta.json`: export metadata
+- `requirements.txt`: minimal dependencies
+## Usage
+```python
+import torch
+import open_clip
+from safetensors.torch import load_file
+from PIL import Image
+model_name = "ViT-L-14"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model, _, preprocess = open_clip.create_model_and_transforms(
+    model_name,
+    pretrained=None,
+)
+state_dict = load_file("open_clip_pytorch_model.safetensors")
+model.load_state_dict(state_dict, strict=True)
+model = model.to(device)
+model.eval()
+tokenizer = open_clip.get_tokenizer(model_name)
+image = preprocess(Image.open("example.jpg").convert("RGB")).unsqueeze(0).to(device)
+text = tokenizer(["fundus photograph", "chest radiograph", "histopathology image"]).to(device)
+with torch.no_grad():
+    image_features = model.encode_image(image)
+    text_features = model.encode_text(text)
+    image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+    text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+    similarity = image_features @ text_features.T
+print(similarity)
+```
+## Citation
+Citation information will be added upon release.

inference_example.py ADDED Viewed

	@@ -0,0 +1,40 @@

+import torch
+import open_clip
+from safetensors.torch import load_file
+from PIL import Image
+model_name = "ViT-L-14"
+checkpoint_path = "open_clip_pytorch_model.safetensors"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+model, _, preprocess = open_clip.create_model_and_transforms(
+    model_name,
+    pretrained=None,
+)
+state_dict = load_file(checkpoint_path)
+model.load_state_dict(state_dict, strict=True)
+model = model.to(device)
+model.eval()
+tokenizer = open_clip.get_tokenizer(model_name)
+image = preprocess(Image.open("example.jpg").convert("RGB")).unsqueeze(0).to(device)
+texts = tokenizer([
+    "chest radiograph",
+    "fundus photograph",
+    "histopathology image",
+]).to(device)
+with torch.no_grad():
+    image_features = model.encode_image(image)
+    text_features = model.encode_text(texts)
+    image_features = image_features / image_features.norm(dim=-1, keepdim=True)
+    text_features = text_features / text_features.norm(dim=-1, keepdim=True)
+    similarity = image_features @ text_features.T
+    probs = similarity.softmax(dim=-1)
+print(probs)

open_clip_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a2878f8ec808a8f7e13e868c280223ff608c495124f3b8465770fd939ebdc302
+size 1710517724

requirements.txt ADDED Viewed

	@@ -0,0 +1,4 @@

+torch
+open_clip_torch
+safetensors
+pillow