nzs234 commited on
Commit
081eb5b
·
verified ·
1 Parent(s): 478681c

Upload standalone local aesthetic bundle (SigLIP2 + head)

Browse files
README.md ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: transformers
3
+ tags:
4
+ - image-classification
5
+ - aesthetic-scoring
6
+ - siglip2
7
+ - custom-code
8
+ license: other
9
+ ---
10
+
11
+ # SigLIP2 Aesthetic Scorer (Local Bundle)
12
+
13
+ This repository contains a standalone local bundle for aesthetic scoring:
14
+
15
+ - Backbone: `google/siglip2-so400m-patch16-512` (saved locally)
16
+ - Head: custom MLP regressor (`head.safetensors`)
17
+ - Score range: `1..9`
18
+
19
+ ## Files
20
+
21
+ - `backbone/` : local SigLIP2 backbone + processor files
22
+ - `head.safetensors` : MLP head weights
23
+ - `metadata.json` : model config and score range
24
+
25
+ ## Output
26
+
27
+ The model outputs a continuous score, and common usage rounds it to integer `score_1 ... score_9`.
backbone/config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "SiglipModel"
4
+ ],
5
+ "dtype": "float32",
6
+ "initializer_factor": 1.0,
7
+ "model_type": "siglip",
8
+ "text_config": {
9
+ "attention_dropout": 0.0,
10
+ "bos_token_id": 49406,
11
+ "dtype": "float32",
12
+ "eos_token_id": 49407,
13
+ "hidden_act": "gelu_pytorch_tanh",
14
+ "hidden_size": 1152,
15
+ "intermediate_size": 4304,
16
+ "layer_norm_eps": 1e-06,
17
+ "max_position_embeddings": 64,
18
+ "model_type": "siglip_text_model",
19
+ "num_attention_heads": 16,
20
+ "num_hidden_layers": 27,
21
+ "pad_token_id": 1,
22
+ "projection_size": 1152,
23
+ "vocab_size": 256000
24
+ },
25
+ "transformers_version": "5.2.0",
26
+ "vision_config": {
27
+ "attention_dropout": 0.0,
28
+ "dtype": "float32",
29
+ "hidden_act": "gelu_pytorch_tanh",
30
+ "hidden_size": 1152,
31
+ "image_size": 512,
32
+ "intermediate_size": 4304,
33
+ "layer_norm_eps": 1e-06,
34
+ "model_type": "siglip_vision_model",
35
+ "num_attention_heads": 16,
36
+ "num_channels": 3,
37
+ "num_hidden_layers": 27,
38
+ "patch_size": 16
39
+ }
40
+ }
backbone/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a621bd212e1b3329b428595f9693217e19587afe826adf3e5c241a16392e8973
3
+ size 4546331880
backbone/preprocessor_config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "data_format": "channels_first",
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "image_mean": [
7
+ 0.5,
8
+ 0.5,
9
+ 0.5
10
+ ],
11
+ "image_processor_type": "SiglipImageProcessorFast",
12
+ "image_std": [
13
+ 0.5,
14
+ 0.5,
15
+ 0.5
16
+ ],
17
+ "resample": 2,
18
+ "rescale_factor": 0.00392156862745098,
19
+ "size": {
20
+ "height": 512,
21
+ "width": 512
22
+ }
23
+ }
head.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce5df2a40a4d30116f225cc56bdbf07c82efe1fadbcf41ba26d589513afce139
3
+ size 14374204
metadata.json ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "format": "aesthetic_local_standalone_v1",
3
+ "source_checkpoint": "E:\\AI\\Tagger2_Inference\\models\\local_aesthetic_run1\\best_by_test_rmse.pt",
4
+ "backbone_dir": "backbone",
5
+ "head_file": "head.safetensors",
6
+ "model": {
7
+ "backbone_name": "google/siglip2-so400m-patch16-512",
8
+ "hidden_dim": 2048,
9
+ "dropout": 0.2,
10
+ "full_finetune": false,
11
+ "processor_use_fast": false
12
+ },
13
+ "data": {
14
+ "score_min": 1.0,
15
+ "score_max": 9.0
16
+ }
17
+ }