zhangsq-nju commited on
Commit
4aeec45
·
verified ·
1 Parent(s): accca20

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,7 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Qwen3-1.7B-BF16.gguf filter=lfs diff=lfs merge=lfs -text
37
+ Qwen3-1.7B-EdgeRazor-Q4_0.gguf filter=lfs diff=lfs merge=lfs -text
38
+ Qwen3-1.7B-EdgeRazor-TQ1_0.gguf filter=lfs diff=lfs merge=lfs -text
39
+ Qwen3-1.7B-EdgeRazor-TQ2_0.gguf filter=lfs diff=lfs merge=lfs -text
Qwen3-1.7B-BF16.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ca7ef8959fa7e23e656b80e39a1f5400fc3f2d427133ee84dc2a57633b5a1d
3
+ size 4069679360
Qwen3-1.7B-EdgeRazor-Q4_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a876b60cf705906a10fceac52427b54abdc5511ad7751929bb37d7590cc12439
3
+ size 1054423360
Qwen3-1.7B-EdgeRazor-TQ1_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d665083d69b7c51bed4f16fbbeab196cd6fc0242aeca37092710a8f7f14a55d4
3
+ size 478748992
Qwen3-1.7B-EdgeRazor-TQ2_0.gguf ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7a8c4018eb0505d0a686110a135c311cdd952c0cf8361e7504513b733b1ca1c
3
+ size 544809280
README.md ADDED
@@ -0,0 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: Qwen/Qwen3-1.7B
3
+ pipeline_tag: text-generation
4
+ tags:
5
+ - qwen3
6
+ - edgerazor
7
+ - quantization
8
+ license: apache-2.0
9
+ license_link: https://huggingface.co/Qwen/Qwen3-1.7B/blob/main/LICENSE
10
+ ---
11
+
12
+ <div align="center">
13
+ <br/>
14
+ <img src="./asset/Logo-HF.svg" alt="EdgeRazor Logo" width="60%">
15
+ <h3>
16
+ EdgeRazor for Lightweight LLMs
17
+ </h3>
18
+
19
+ <p>
20
+ <a href="https://arxiv.org/abs/2604.xxxxx" target="blank">
21
+ <img src="https://img.shields.io/badge/arXiv-EdgeRazor-b31b1b?style=flat&logo=arxiv" alt="arXiv EdgeRazor">
22
+ </a>
23
+ <a href="https://github.com/zhangsq-nju/EdgeRazor" target="blank">
24
+ <img src="https://img.shields.io/badge/GitHub-EdgeRazor-blue?style=flat&logo=github" alt="GitHub EdgeRazor">
25
+ </a>
26
+ </p>
27
+
28
+
29
+ </div>
30
+
31
+ ## Contents
32
+
33
+ - [Contents](#contents)
34
+ - [Model Overview](#model-overview)
35
+ - [Model Bit-Widths](#model-bit-widths)
36
+ - [Get Started](#get-started)
37
+ - [Citation](#citation)
38
+
39
+ ## Model Overview
40
+
41
+ - Base Model: [Qwen/Qwen3-1.7B](https://huggingface.co/Qwen/Qwen3-1.7B)
42
+ - Training: [zhangsq-nju/EdgeRazor](https://github.com/zhangsq-nju/EdgeRazor)
43
+ - Inference: [ggml-org/llama.cpp](https://github.com/ggml-org/llama.cpp)
44
+
45
+ ## Model Bit-Widths
46
+
47
+ | Mixed-Precision Recipe | Bit-Width | This Repo | GGUF Type |
48
+ | ---------------------------- | --------- | --------- | ------------- |
49
+ | 100% 4-bit + 0% 1.58-bit | 4 | ✔️ | Q4_0 |
50
+ | 50% 4-bit + 50% 1.58-bit | 2.79 | ✖️ | Not supported |
51
+ | 12.5% 4-bit + 87.5% 1.58-bit | 1.88 | ✖️ | Not supported |
52
+ | 0% 4-bit + 100% 1.58-bit | 1.58 | ✔️ | TQ1_0, TQ2_0 |
53
+
54
+ ## Get Started
55
+
56
+ Use llama.cpp to conduct efficient inference on edge devices.
57
+
58
+ Check the [cli.sh](./cli.sh) script for basic usage.
59
+
60
+ Model list:
61
+
62
+ - `Qwen3-1.7B-BF16.gguf`: BF16 model from the original Qwen3-1.7B
63
+ - `Qwen3-1.7B-EdgeRazor-Q4_0.gguf`: Q4_0 model from the [Qwen3-1.7B-EdgeRazor-4bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-4bit)
64
+ - `Qwen3-1.7B-EdgeRazor-TQ1_0.gguf`: TQ1_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit)
65
+ - `Qwen3-1.7B-EdgeRazor-TQ2_0.gguf`: TQ2_0 model from [Qwen3-1.7B-EdgeRazor-1.58bit](https://huggingface.co/zhangsq-nju/Qwen3-1.7B-EdgeRazor-1.58bit)
66
+
67
+ ## Citation
68
+
69
+ If you find our project useful in your research, please consider kindly citing our papers ✏️:
70
+
71
+ ```
72
+ @article{zhangsh-edgerazor,
73
+ title={{EdgeRazor}: A Lightweight Framework for Large Language Models via Mixed-Precision Quantization-Aware Distillation},
74
+ author={Shu-Hao Zhang and Le-Tong Huang and Xiang-Sheng Deng and Xin-Yi Zou and Chen Wu and Nan Li and Shao-Qun Zhang},
75
+ year={2026},
76
+ }
77
+ ```
asset/Logo-HF.svg ADDED
cli.sh ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+ # For W[N]-A8-KV8, Apple CPU-only Inference: --n-gpu-layers 0
3
+
4
+ CLI=llama-cli
5
+ KV_CACHE_TYPE=q8_0
6
+
7
+ # Inference parameters for non-thinking mode
8
+ TEMPERATURE=0.6
9
+ MIN_P=0.00
10
+ REPEAT_PENALTY=1.0
11
+ PRESENCE_PENALTY=1.5
12
+ TOP_K=20
13
+ TOP_P=0.95
14
+
15
+ MODELS=(
16
+ ./Qwen3-1.7B-EdgeRazor-TQ2_0.gguf
17
+ ./Qwen3-1.7B-EdgeRazor-TQ1_0.gguf
18
+ ./Qwen3-1.7B-EdgeRazor-Q4_0.gguf
19
+ ./Qwen3-1.7B-BF16.gguf
20
+ )
21
+
22
+ # Show available model list
23
+ echo "Available models:"
24
+ for i in "${!MODELS[@]}"; do
25
+ echo " $i) ${MODELS[$i]}"
26
+ done
27
+
28
+ # Select model (default to the first one)
29
+ if [ -z "$1" ]; then
30
+ echo ""
31
+ echo "Usage: $0 <model_index> [prompt]"
32
+ echo " model_index: 0, 1, or 2 (default: 0)"
33
+ echo " prompt: optional prompt for non-interactive mode"
34
+ echo ""
35
+ MODEL_INDEX=0
36
+ else
37
+ MODEL_INDEX=$1
38
+ fi
39
+
40
+ MODEL="${MODELS[$MODEL_INDEX]}"
41
+
42
+ if [ ! -f "$MODEL" ]; then
43
+ echo "Error: Model file not found: $MODEL"
44
+ exit 1
45
+ fi
46
+
47
+ echo "Selected model: $MODEL"
48
+ echo ""
49
+
50
+ # Run CLI
51
+ if [ -z "$2" ]; then
52
+ # Interactive mode
53
+ $CLI \
54
+ --model "$MODEL" \
55
+ --n-gpu-layers 0 \
56
+ --cache-type-k "$KV_CACHE_TYPE" \
57
+ --cache-type-v "$KV_CACHE_TYPE" \
58
+ --temp "$TEMPERATURE" \
59
+ --min-p "$MIN_P" \
60
+ --repeat-penalty "$REPEAT_PENALTY" \
61
+ --presence-penalty "$PRESENCE_PENALTY" \
62
+ --top-k "$TOP_K" \
63
+ --top-p "$TOP_P" \
64
+ --flash-attn \
65
+ --conversation \
66
+ --interactive-first \
67
+ --color
68
+ else
69
+ # Non-interactive mode (single inference)
70
+ PROMPT="$2"
71
+ $CLI \
72
+ --model "$MODEL" \
73
+ --n-gpu-layers 0 \
74
+ --cache-type-k "$KV_CACHE_TYPE" \
75
+ --cache-type-v "$KV_CACHE_TYPE" \
76
+ --temp "$TEMPERATURE" \
77
+ --min-p "$MIN_P" \
78
+ --repeat-penalty "$REPEAT_PENALTY" \
79
+ --presence-penalty "$PRESENCE_PENALTY" \
80
+ --top-k "$TOP_K" \
81
+ --top-p "$TOP_P" \
82
+ --flash-attn \
83
+ --prompt "$PROMPT" \
84
+ --n-predict 512 \
85
+ --color
86
+ fi
params ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "stop": [
3
+ "<|im_start|>",
4
+ "<|im_end|>"
5
+ ],
6
+ "temperature": 0.6,
7
+ "min_p" : 0.00,
8
+ "repeat_penalty" : 1.0,
9
+ "presence_penalty" : 1.5,
10
+ "top_k" : 20,
11
+ "top_p" : 0.95,
12
+ "num_predict" : 32768,
13
+ "num_ctx": 40960
14
+ }