Hyunjae Kim commited on
Commit
7d8fbac
·
0 Parent(s):

Initial release of MedPMC-CLIP

Browse files
.gitattributes ADDED
@@ -0,0 +1 @@
 
 
1
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: open_clip
3
+ tags:
4
+ - clip
5
+ - openclip
6
+ - medical
7
+ - biomedical
8
+ - vision-language
9
+ - image-text-retrieval
10
+ - medpmc
11
+ ---
12
+
13
+ # MedPMC-CLIP
14
+
15
+ MedPMC-CLIP is a medical vision-language model based on the OpenCLIP `ViT-L-14` architecture.
16
+
17
+ This repository provides the checkpoint in **OpenCLIP format**. Text inputs should be tokenized using the default OpenCLIP tokenizer for `ViT-L-14`.
18
+
19
+ ```python
20
+ tokenizer = open_clip.get_tokenizer("ViT-L-14")
21
+ ```
22
+
23
+ ## Files
24
+
25
+ - `open_clip_pytorch_model.safetensors`: OpenCLIP-format model checkpoint
26
+ - `inference_example.py`: example code for image-text similarity
27
+ - `export_meta.json`: export metadata
28
+ - `requirements.txt`: minimal dependencies
29
+
30
+ ## Usage
31
+
32
+ ```python
33
+ import torch
34
+ import open_clip
35
+ from safetensors.torch import load_file
36
+ from PIL import Image
37
+
38
+ model_name = "ViT-L-14"
39
+ device = "cuda" if torch.cuda.is_available() else "cpu"
40
+
41
+ model, _, preprocess = open_clip.create_model_and_transforms(
42
+ model_name,
43
+ pretrained=None,
44
+ )
45
+
46
+ state_dict = load_file("open_clip_pytorch_model.safetensors")
47
+ model.load_state_dict(state_dict, strict=True)
48
+ model = model.to(device)
49
+ model.eval()
50
+
51
+ tokenizer = open_clip.get_tokenizer(model_name)
52
+
53
+ image = preprocess(Image.open("example.jpg").convert("RGB")).unsqueeze(0).to(device)
54
+ text = tokenizer(["fundus photograph", "chest radiograph", "histopathology image"]).to(device)
55
+
56
+ with torch.no_grad():
57
+ image_features = model.encode_image(image)
58
+ text_features = model.encode_text(text)
59
+
60
+ image_features = image_features / image_features.norm(dim=-1, keepdim=True)
61
+ text_features = text_features / text_features.norm(dim=-1, keepdim=True)
62
+
63
+ similarity = image_features @ text_features.T
64
+
65
+ print(similarity)
66
+ ```
67
+
68
+ ## Citation
69
+
70
+ Citation information will be added upon release.
inference_example.py ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import torch
2
+ import open_clip
3
+ from safetensors.torch import load_file
4
+ from PIL import Image
5
+
6
+ model_name = "ViT-L-14"
7
+ checkpoint_path = "open_clip_pytorch_model.safetensors"
8
+
9
+ device = "cuda" if torch.cuda.is_available() else "cpu"
10
+
11
+ model, _, preprocess = open_clip.create_model_and_transforms(
12
+ model_name,
13
+ pretrained=None,
14
+ )
15
+
16
+ state_dict = load_file(checkpoint_path)
17
+ model.load_state_dict(state_dict, strict=True)
18
+ model = model.to(device)
19
+ model.eval()
20
+
21
+ tokenizer = open_clip.get_tokenizer(model_name)
22
+
23
+ image = preprocess(Image.open("example.jpg").convert("RGB")).unsqueeze(0).to(device)
24
+ texts = tokenizer([
25
+ "chest radiograph",
26
+ "fundus photograph",
27
+ "histopathology image",
28
+ ]).to(device)
29
+
30
+ with torch.no_grad():
31
+ image_features = model.encode_image(image)
32
+ text_features = model.encode_text(texts)
33
+
34
+ image_features = image_features / image_features.norm(dim=-1, keepdim=True)
35
+ text_features = text_features / text_features.norm(dim=-1, keepdim=True)
36
+
37
+ similarity = image_features @ text_features.T
38
+ probs = similarity.softmax(dim=-1)
39
+
40
+ print(probs)
open_clip_pytorch_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a2878f8ec808a8f7e13e868c280223ff608c495124f3b8465770fd939ebdc302
3
+ size 1710517724
requirements.txt ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ torch
2
+ open_clip_torch
3
+ safetensors
4
+ pillow