thiswillbeyourgithub Claude Opus 4.8 commited on
Commit ·
72a6e63
1
Parent(s): 422d652
smoothquant: fix istupakov int8 size note comparing output to itself
Browse filesThe post-export download-size note stat'd model_dir/encoder-model.int8.onnx
as the istupakov baseline, but that is the same path the script writes its
output to when --out-name is the canonical encoder-model.int8.onnx, so it read
its own freshly-written output and printed the tautology
"841.6 MB (istupakov int8 is 841.6 MB)".
The real upstream encoder-model.int8.onnx on HF
(istupakov/parakeet-tdt-0.6b-v3-onnx) is 652,183,999 B (622 MiB), not 841.6.
Hardcode that as ISTUPAKOV_INT8_ENCODER_BYTES (sourced from HF, dated) and only
stat an on-disk file when it is a different path than the output.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
scripts/quantize-int8-smoothquant.py
CHANGED
|
@@ -198,6 +198,16 @@ DEFAULT_CALIB_DIR = "calibration_audio"
|
|
| 198 |
|
| 199 |
SAMPLE_RATE = 16000
|
| 200 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 201 |
|
| 202 |
def expand_audio(inputs):
|
| 203 |
"""Resolve --audio entries (files and/or folders) to a flat list of audio files.
|
|
@@ -543,8 +553,17 @@ def main():
|
|
| 543 |
logger.info(f"[sq] pruned {pruned} orphaned initializer(s) (folded smooth scales)")
|
| 544 |
|
| 545 |
out_size = os.path.getsize(out_encoder)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 546 |
baseline = model_dir / "encoder-model.int8.onnx"
|
| 547 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 548 |
logger.info(f"[sq] done in {dt:.0f}s -> {out_encoder.name} {human(out_size)}{base_note}")
|
| 549 |
|
| 550 |
# Fidelity smoke test (NOT just shape): run one calibration window through both
|
|
|
|
| 198 |
|
| 199 |
SAMPLE_RATE = 16000
|
| 200 |
|
| 201 |
+
# Upstream istupakov int8 encoder size, for the post-export download-size note.
|
| 202 |
+
# Measured from HF on 2026-06-09:
|
| 203 |
+
# istupakov/parakeet-tdt-0.6b-v3-onnx / encoder-model.int8.onnx = 652,183,999 B.
|
| 204 |
+
# Hardcoded because this script's own output usually overwrites that filename in
|
| 205 |
+
# the model dir (when --out-name is the canonical encoder-model.int8.onnx), so the
|
| 206 |
+
# on-disk copy can't be stat'd as a baseline without reading our own output back.
|
| 207 |
+
# NOTE: istupakov also quantizes the convs (--op-types MatMul,Conv), which is why
|
| 208 |
+
# their encoder is smaller than this script's MatMul-only default.
|
| 209 |
+
ISTUPAKOV_INT8_ENCODER_BYTES = 652_183_999
|
| 210 |
+
|
| 211 |
|
| 212 |
def expand_audio(inputs):
|
| 213 |
"""Resolve --audio entries (files and/or folders) to a flat list of audio files.
|
|
|
|
| 553 |
logger.info(f"[sq] pruned {pruned} orphaned initializer(s) (folded smooth scales)")
|
| 554 |
|
| 555 |
out_size = os.path.getsize(out_encoder)
|
| 556 |
+
# Download-size comparison vs the upstream istupakov int8 encoder. Only stat an
|
| 557 |
+
# on-disk istupakov file when it is a DIFFERENT path than our output: when
|
| 558 |
+
# --out-name is the canonical encoder-model.int8.onnx, out_encoder overwrites
|
| 559 |
+
# that file, so stat'ing it would read our own output back and print the
|
| 560 |
+
# tautology "X (istupakov int8 is X)". Otherwise fall back to the HF size.
|
| 561 |
baseline = model_dir / "encoder-model.int8.onnx"
|
| 562 |
+
if baseline.exists() and baseline.resolve() != out_encoder.resolve():
|
| 563 |
+
base_bytes = os.path.getsize(baseline)
|
| 564 |
+
else:
|
| 565 |
+
base_bytes = ISTUPAKOV_INT8_ENCODER_BYTES
|
| 566 |
+
base_note = f" (istupakov int8 is {human(base_bytes)})"
|
| 567 |
logger.info(f"[sq] done in {dt:.0f}s -> {out_encoder.name} {human(out_size)}{base_note}")
|
| 568 |
|
| 569 |
# Fidelity smoke test (NOT just shape): run one calibration window through both
|