Sentence Similarity
sentence-transformers
Safetensors
Korean
qwen3_vl
feature-extraction
Generated from Trainer
dataset_size:375895
loss:MatryoshkaLoss
loss:CachedMultipleNegativesRankingLoss
Eval Results (legacy)
Instructions to use whybe-choi/Qwen3-VL-Embedding-2B-ko-vdr-preview-v0.1 with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use whybe-choi/Qwen3-VL-Embedding-2B-ko-vdr-preview-v0.1 with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("whybe-choi/Qwen3-VL-Embedding-2B-ko-vdr-preview-v0.1") sentences = [ "컴퓨터시스템설계 및 분석가와 시스템소프트웨어개발자의 2021-2031년 고용 증감률 차이는 어떤 요인에 기인하나요?", "2023년 일·가정 양립 실태조사에서 사업체 규모별 상시근로자 수와 표본 배분 수의 차이는 어떻게 다른가요?", "「소재·부품·장비 2.0전략」으로 확대된 GVC 핵심품목 수와 2022년 국내 첨단화학소재 시장 규모는 각각 얼마인가요?", "이차전지 장비 분야에서 고졸 인력의 퇴직률과 채용률은 사업체 규모별로 어떻게 다른가요?" ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [4, 4] - Notebooks
- Google Colab
- Kaggle
Upload folder using huggingface_hub
Browse files- .gitattributes +7 -0
- 1_Pooling/config.json +5 -0
- README.md +0 -0
- assets/example_image_0.jpg +3 -0
- assets/example_image_1.jpg +3 -0
- assets/example_image_2.jpg +3 -0
- assets/image_0.jpg +3 -0
- assets/image_1.jpg +3 -0
- assets/image_2.jpg +3 -0
- chat_template.jinja +125 -0
- config.json +66 -0
- config_sentence_transformers.json +15 -0
- model.safetensors +3 -0
- modules.json +20 -0
- processor_config.json +64 -0
- sentence_bert_config.json +29 -0
- tokenizer.json +3 -0
- tokenizer_config.json +17 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,10 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
assets/example_image_0.jpg filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
assets/example_image_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
assets/example_image_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
assets/image_0.jpg filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
assets/image_1.jpg filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
assets/image_2.jpg filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"embedding_dimension": 2048,
|
| 3 |
+
"pooling_mode": "lasttoken",
|
| 4 |
+
"include_prompt": true
|
| 5 |
+
}
|
README.md
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
assets/example_image_0.jpg
ADDED
|
Git LFS Details
|
assets/example_image_1.jpg
ADDED
|
Git LFS Details
|
assets/example_image_2.jpg
ADDED
|
Git LFS Details
|
assets/image_0.jpg
ADDED
|
Git LFS Details
|
assets/image_1.jpg
ADDED
|
Git LFS Details
|
assets/image_2.jpg
ADDED
|
Git LFS Details
|
chat_template.jinja
ADDED
|
@@ -0,0 +1,125 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{%- set default_system_message = 'Represent the user\'s input.' -%}
|
| 2 |
+
{%- if tools %}
|
| 3 |
+
{{- '<|im_start|>system\n' }}
|
| 4 |
+
{%- if messages[0].role == 'system' %}
|
| 5 |
+
{%- if messages[0].content is string %}
|
| 6 |
+
{{- messages[0].content }}
|
| 7 |
+
{%- else %}
|
| 8 |
+
{%- for content in messages[0].content %}
|
| 9 |
+
{%- if 'text' in content %}
|
| 10 |
+
{{- content.text }}
|
| 11 |
+
{%- endif %}
|
| 12 |
+
{%- endfor %}
|
| 13 |
+
{%- endif %}
|
| 14 |
+
{{- '\n\n' }}
|
| 15 |
+
{%- else %}
|
| 16 |
+
{{- default_system_message + '\n\n' }}
|
| 17 |
+
{%- endif %}
|
| 18 |
+
{{- "# Tools\n\nYou may call one or more functions to assist with the user query.\n\nYou are provided with function signatures within <tools></tools> XML tags:\n<tools>" }}
|
| 19 |
+
{%- for tool in tools %}
|
| 20 |
+
{{- "\n" }}
|
| 21 |
+
{{- tool | tojson }}
|
| 22 |
+
{%- endfor %}
|
| 23 |
+
{{- "\n</tools>\n\nFor each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:\n<tool_call>\n{\"name\": <function-name>, \"arguments\": <args-json-object>}\n</tool_call><|im_end|>\n" }}
|
| 24 |
+
{%- else %}
|
| 25 |
+
{%- if messages[0].role == 'system' %}
|
| 26 |
+
{{- '<|im_start|>system\n' }}
|
| 27 |
+
{%- if messages[0].content is string %}
|
| 28 |
+
{{- messages[0].content }}
|
| 29 |
+
{%- else %}
|
| 30 |
+
{%- for content in messages[0].content %}
|
| 31 |
+
{%- if 'text' in content %}
|
| 32 |
+
{{- content.text }}
|
| 33 |
+
{%- endif %}
|
| 34 |
+
{%- endfor %}
|
| 35 |
+
{%- endif %}
|
| 36 |
+
{{- '<|im_end|>\n' }}
|
| 37 |
+
{%- else %}
|
| 38 |
+
{{- '<|im_start|>system\n' + default_system_message + '<|im_end|>\n' }}
|
| 39 |
+
{%- endif %}
|
| 40 |
+
{%- endif %}
|
| 41 |
+
{%- set image_count = namespace(value=0) %}
|
| 42 |
+
{%- set video_count = namespace(value=0) %}
|
| 43 |
+
{%- for message in messages %}
|
| 44 |
+
{%- if message.role == "user" %}
|
| 45 |
+
{{- '<|im_start|>' + message.role + '\n' }}
|
| 46 |
+
{%- if message.content is string %}
|
| 47 |
+
{{- message.content }}
|
| 48 |
+
{%- else %}
|
| 49 |
+
{%- for content in message.content %}
|
| 50 |
+
{%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
|
| 51 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 52 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 53 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 54 |
+
{%- elif content.type == 'video' or 'video' in content %}
|
| 55 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 56 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 57 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 58 |
+
{%- elif 'text' in content %}
|
| 59 |
+
{{- content.text }}
|
| 60 |
+
{%- endif %}
|
| 61 |
+
{%- endfor %}
|
| 62 |
+
{%- endif %}
|
| 63 |
+
{{- '<|im_end|>\n' }}
|
| 64 |
+
{%- elif message.role == "assistant" %}
|
| 65 |
+
{{- '<|im_start|>' + message.role + '\n' }}
|
| 66 |
+
{%- if message.content is string %}
|
| 67 |
+
{{- message.content }}
|
| 68 |
+
{%- else %}
|
| 69 |
+
{%- for content_item in message.content %}
|
| 70 |
+
{%- if 'text' in content_item %}
|
| 71 |
+
{{- content_item.text }}
|
| 72 |
+
{%- endif %}
|
| 73 |
+
{%- endfor %}
|
| 74 |
+
{%- endif %}
|
| 75 |
+
{%- if message.tool_calls %}
|
| 76 |
+
{%- for tool_call in message.tool_calls %}
|
| 77 |
+
{%- if (loop.first and message.content) or (not loop.first) %}
|
| 78 |
+
{{- '\n' }}
|
| 79 |
+
{%- endif %}
|
| 80 |
+
{%- if tool_call.function %}
|
| 81 |
+
{%- set tool_call = tool_call.function %}
|
| 82 |
+
{%- endif %}
|
| 83 |
+
{{- '<tool_call>\n{"name": "' }}
|
| 84 |
+
{{- tool_call.name }}
|
| 85 |
+
{{- '", "arguments": ' }}
|
| 86 |
+
{%- if tool_call.arguments is string %}
|
| 87 |
+
{{- tool_call.arguments }}
|
| 88 |
+
{%- else %}
|
| 89 |
+
{{- tool_call.arguments | tojson }}
|
| 90 |
+
{%- endif %}
|
| 91 |
+
{{- '}\n</tool_call>' }}
|
| 92 |
+
{%- endfor %}
|
| 93 |
+
{%- endif %}
|
| 94 |
+
{{- '<|im_end|>\n' }}
|
| 95 |
+
{%- elif message.role == "tool" %}
|
| 96 |
+
{%- if loop.first or (messages[loop.index0 - 1].role != "tool") %}
|
| 97 |
+
{{- '<|im_start|>user' }}
|
| 98 |
+
{%- endif %}
|
| 99 |
+
{{- '\n<tool_response>\n' }}
|
| 100 |
+
{%- if message.content is string %}
|
| 101 |
+
{{- message.content }}
|
| 102 |
+
{%- else %}
|
| 103 |
+
{%- for content in message.content %}
|
| 104 |
+
{%- if content.type == 'image' or 'image' in content or 'image_url' in content %}
|
| 105 |
+
{%- set image_count.value = image_count.value + 1 %}
|
| 106 |
+
{%- if add_vision_id %}Picture {{ image_count.value }}: {% endif -%}
|
| 107 |
+
<|vision_start|><|image_pad|><|vision_end|>
|
| 108 |
+
{%- elif content.type == 'video' or 'video' in content %}
|
| 109 |
+
{%- set video_count.value = video_count.value + 1 %}
|
| 110 |
+
{%- if add_vision_id %}Video {{ video_count.value }}: {% endif -%}
|
| 111 |
+
<|vision_start|><|video_pad|><|vision_end|>
|
| 112 |
+
{%- elif 'text' in content %}
|
| 113 |
+
{{- content.text }}
|
| 114 |
+
{%- endif %}
|
| 115 |
+
{%- endfor %}
|
| 116 |
+
{%- endif %}
|
| 117 |
+
{{- '\n</tool_response>' }}
|
| 118 |
+
{%- if loop.last or (messages[loop.index0 + 1].role != "tool") %}
|
| 119 |
+
{{- '<|im_end|>\n' }}
|
| 120 |
+
{%- endif %}
|
| 121 |
+
{%- endif %}
|
| 122 |
+
{%- endfor %}
|
| 123 |
+
{%- if add_generation_prompt %}
|
| 124 |
+
{{- '<|im_start|>assistant\n' }}
|
| 125 |
+
{%- endif %}
|
config.json
ADDED
|
@@ -0,0 +1,66 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"architectures": [
|
| 3 |
+
"Qwen3VLModel"
|
| 4 |
+
],
|
| 5 |
+
"dtype": "bfloat16",
|
| 6 |
+
"image_token_id": 151655,
|
| 7 |
+
"model_type": "qwen3_vl",
|
| 8 |
+
"text_config": {
|
| 9 |
+
"attention_bias": false,
|
| 10 |
+
"attention_dropout": 0.0,
|
| 11 |
+
"bos_token_id": 151643,
|
| 12 |
+
"dtype": "bfloat16",
|
| 13 |
+
"eos_token_id": 151645,
|
| 14 |
+
"head_dim": 128,
|
| 15 |
+
"hidden_act": "silu",
|
| 16 |
+
"hidden_size": 2048,
|
| 17 |
+
"initializer_range": 0.02,
|
| 18 |
+
"intermediate_size": 6144,
|
| 19 |
+
"max_position_embeddings": 262144,
|
| 20 |
+
"model_type": "qwen3_vl_text",
|
| 21 |
+
"num_attention_heads": 16,
|
| 22 |
+
"num_hidden_layers": 28,
|
| 23 |
+
"num_key_value_heads": 8,
|
| 24 |
+
"pad_token_id": null,
|
| 25 |
+
"rms_norm_eps": 1e-06,
|
| 26 |
+
"rope_parameters": {
|
| 27 |
+
"mrope_interleaved": true,
|
| 28 |
+
"mrope_section": [
|
| 29 |
+
24,
|
| 30 |
+
20,
|
| 31 |
+
20
|
| 32 |
+
],
|
| 33 |
+
"rope_theta": 5000000,
|
| 34 |
+
"rope_type": "default"
|
| 35 |
+
},
|
| 36 |
+
"tie_word_embeddings": true,
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 151936
|
| 39 |
+
},
|
| 40 |
+
"tie_word_embeddings": true,
|
| 41 |
+
"transformers_version": "5.5.4",
|
| 42 |
+
"video_token_id": 151656,
|
| 43 |
+
"vision_config": {
|
| 44 |
+
"deepstack_visual_indexes": [
|
| 45 |
+
5,
|
| 46 |
+
11,
|
| 47 |
+
17
|
| 48 |
+
],
|
| 49 |
+
"depth": 24,
|
| 50 |
+
"dtype": "bfloat16",
|
| 51 |
+
"hidden_act": "gelu_pytorch_tanh",
|
| 52 |
+
"hidden_size": 1024,
|
| 53 |
+
"in_channels": 3,
|
| 54 |
+
"initializer_range": 0.02,
|
| 55 |
+
"intermediate_size": 4096,
|
| 56 |
+
"model_type": "qwen3_vl",
|
| 57 |
+
"num_heads": 16,
|
| 58 |
+
"num_position_embeddings": 2304,
|
| 59 |
+
"out_hidden_size": 2048,
|
| 60 |
+
"patch_size": 16,
|
| 61 |
+
"spatial_merge_size": 2,
|
| 62 |
+
"temporal_patch_size": 2
|
| 63 |
+
},
|
| 64 |
+
"vision_end_token_id": 151653,
|
| 65 |
+
"vision_start_token_id": 151652
|
| 66 |
+
}
|
config_sentence_transformers.json
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"__version__": {
|
| 3 |
+
"pytorch": "2.11.0+cu130",
|
| 4 |
+
"sentence_transformers": "5.4.1",
|
| 5 |
+
"transformers": "5.5.4"
|
| 6 |
+
},
|
| 7 |
+
"default_prompt_name": "default",
|
| 8 |
+
"model_type": "SentenceTransformer",
|
| 9 |
+
"prompts": {
|
| 10 |
+
"default": "Represent the user's input.",
|
| 11 |
+
"document": "",
|
| 12 |
+
"query": "Find a screenshot that relevant to the user\u2019s question."
|
| 13 |
+
},
|
| 14 |
+
"similarity_fn_name": "cosine"
|
| 15 |
+
}
|
model.safetensors
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8edf654e2b63b71baeaeaf2ccbb7f6e6f8e6723233a0b04c3cd542fef3622cea
|
| 3 |
+
size 4255136560
|
modules.json
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[
|
| 2 |
+
{
|
| 3 |
+
"idx": 0,
|
| 4 |
+
"name": "0",
|
| 5 |
+
"path": "",
|
| 6 |
+
"type": "sentence_transformers.base.modules.transformer.Transformer"
|
| 7 |
+
},
|
| 8 |
+
{
|
| 9 |
+
"idx": 1,
|
| 10 |
+
"name": "1",
|
| 11 |
+
"path": "1_Pooling",
|
| 12 |
+
"type": "sentence_transformers.sentence_transformer.modules.pooling.Pooling"
|
| 13 |
+
},
|
| 14 |
+
{
|
| 15 |
+
"idx": 2,
|
| 16 |
+
"name": "2",
|
| 17 |
+
"path": "2_Normalize",
|
| 18 |
+
"type": "sentence_transformers.sentence_transformer.modules.normalize.Normalize"
|
| 19 |
+
}
|
| 20 |
+
]
|
processor_config.json
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"image_processor": {
|
| 3 |
+
"data_format": "channels_first",
|
| 4 |
+
"default_to_square": true,
|
| 5 |
+
"do_convert_rgb": true,
|
| 6 |
+
"do_normalize": true,
|
| 7 |
+
"do_rescale": true,
|
| 8 |
+
"do_resize": true,
|
| 9 |
+
"image_mean": [
|
| 10 |
+
0.5,
|
| 11 |
+
0.5,
|
| 12 |
+
0.5
|
| 13 |
+
],
|
| 14 |
+
"image_processor_type": "Qwen2VLImageProcessor",
|
| 15 |
+
"image_std": [
|
| 16 |
+
0.5,
|
| 17 |
+
0.5,
|
| 18 |
+
0.5
|
| 19 |
+
],
|
| 20 |
+
"merge_size": 2,
|
| 21 |
+
"patch_size": 16,
|
| 22 |
+
"resample": 3,
|
| 23 |
+
"rescale_factor": 0.00392156862745098,
|
| 24 |
+
"size": {
|
| 25 |
+
"longest_edge": 1048576,
|
| 26 |
+
"shortest_edge": 4096
|
| 27 |
+
},
|
| 28 |
+
"temporal_patch_size": 2
|
| 29 |
+
},
|
| 30 |
+
"processor_class": "Qwen3VLProcessor",
|
| 31 |
+
"video_processor": {
|
| 32 |
+
"data_format": "channels_first",
|
| 33 |
+
"default_to_square": true,
|
| 34 |
+
"do_convert_rgb": true,
|
| 35 |
+
"do_normalize": true,
|
| 36 |
+
"do_rescale": true,
|
| 37 |
+
"do_resize": true,
|
| 38 |
+
"do_sample_frames": true,
|
| 39 |
+
"fps": 2,
|
| 40 |
+
"image_mean": [
|
| 41 |
+
0.5,
|
| 42 |
+
0.5,
|
| 43 |
+
0.5
|
| 44 |
+
],
|
| 45 |
+
"image_std": [
|
| 46 |
+
0.5,
|
| 47 |
+
0.5,
|
| 48 |
+
0.5
|
| 49 |
+
],
|
| 50 |
+
"max_frames": 768,
|
| 51 |
+
"merge_size": 2,
|
| 52 |
+
"min_frames": 4,
|
| 53 |
+
"patch_size": 16,
|
| 54 |
+
"resample": 3,
|
| 55 |
+
"rescale_factor": 0.00392156862745098,
|
| 56 |
+
"return_metadata": false,
|
| 57 |
+
"size": {
|
| 58 |
+
"longest_edge": 25165824,
|
| 59 |
+
"shortest_edge": 4096
|
| 60 |
+
},
|
| 61 |
+
"temporal_patch_size": 2,
|
| 62 |
+
"video_processor_type": "Qwen3VLVideoProcessor"
|
| 63 |
+
}
|
| 64 |
+
}
|
sentence_bert_config.json
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"transformer_task": "feature-extraction",
|
| 3 |
+
"modality_config": {
|
| 4 |
+
"text": {
|
| 5 |
+
"method": "forward",
|
| 6 |
+
"method_output_name": "last_hidden_state"
|
| 7 |
+
},
|
| 8 |
+
"image": {
|
| 9 |
+
"method": "forward",
|
| 10 |
+
"method_output_name": "last_hidden_state"
|
| 11 |
+
},
|
| 12 |
+
"video": {
|
| 13 |
+
"method": "forward",
|
| 14 |
+
"method_output_name": "last_hidden_state"
|
| 15 |
+
},
|
| 16 |
+
"message": {
|
| 17 |
+
"method": "forward",
|
| 18 |
+
"method_output_name": "last_hidden_state",
|
| 19 |
+
"format": "structured"
|
| 20 |
+
}
|
| 21 |
+
},
|
| 22 |
+
"module_output_name": "token_embeddings",
|
| 23 |
+
"processing_kwargs": {
|
| 24 |
+
"chat_template": {
|
| 25 |
+
"add_generation_prompt": true
|
| 26 |
+
}
|
| 27 |
+
},
|
| 28 |
+
"unpad_inputs": false
|
| 29 |
+
}
|
tokenizer.json
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f4deec245f9380efa31abe72adbcae78599405bf2e69e5828180a5a6e116c67d
|
| 3 |
+
size 11423970
|
tokenizer_config.json
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"backend": "tokenizers",
|
| 4 |
+
"bos_token": null,
|
| 5 |
+
"clean_up_tokenization_spaces": false,
|
| 6 |
+
"eos_token": "<|im_end|>",
|
| 7 |
+
"errors": "replace",
|
| 8 |
+
"is_local": false,
|
| 9 |
+
"max_pixels": 1048576,
|
| 10 |
+
"min_pixels": 4096,
|
| 11 |
+
"model_max_length": 262144,
|
| 12 |
+
"pad_token": "<|endoftext|>",
|
| 13 |
+
"processor_class": "Qwen3VLProcessor",
|
| 14 |
+
"split_special_tokens": false,
|
| 15 |
+
"tokenizer_class": "Qwen2Tokenizer",
|
| 16 |
+
"unk_token": null
|
| 17 |
+
}
|