thiswillbeyourgithub Claude Opus 4.8 commited on
Commit ·
422d652
1
Parent(s): 83ec1af
smoothquant: honor --alpha (work around neural-compressor dropping SmoothQuant* extra_options)
Browse filesonnx-neural-compressor 1.0 silently drops the SmoothQuant* knobs set in
StaticQuantConfig.extra_options: smooth_quant_entry calls
Smoother.transform(**config.to_dict()), but StaticQuantConfig.to_dict()
buries SmoothQuantAlpha/AutoAlphaArgs/SmoothQuantFolding inside a nested
extra_options dict and never emits the top-level alpha/auto_alpha_args/folding
kwargs transform() reads, so it always ran at the default alpha=0.5 regardless
of --alpha. Wrap Smoother.transform to inject those values from extra_options,
armed by main() only after a runtime guard confirms the bug is still present
(cfg.to_dict() lacks a top-level 'alpha'), so a fixed future library disarms it.
Co-Authored-By: Claude Opus 4.8 <noreply@anthropic.com>
scripts/quantize-int8-smoothquant.py
CHANGED
|
@@ -160,6 +160,34 @@ def _safe_get_smooth_scales(self, alpha, target_list=[]):
|
|
| 160 |
_sq_core.Smoother._get_smooth_scale = _safe_get_smooth_scale
|
| 161 |
_sq_core.Smoother._get_smooth_scales = _safe_get_smooth_scales
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
# Audio file extensions recognised when an --audio entry is a folder (or the
|
| 164 |
# default ./calibration_audio folder is scanned). SmoothQuant calibrates on
|
| 165 |
# activations (not labels), so any speech clip works.
|
|
@@ -467,6 +495,22 @@ def main():
|
|
| 467 |
},
|
| 468 |
)
|
| 469 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
logger.info(f"[sq] SmoothQuant(alpha={alpha}) static int8, per-channel, "
|
| 471 |
f"calib={args.calibrate_method}, ops={op_types}, format={args.quant_format} ...")
|
| 472 |
logger.info(f"[sq] {human(os.path.getsize(in_encoder) + os.path.getsize(str(in_encoder) + '.data'))} fp32 encoder")
|
|
|
|
| 160 |
_sq_core.Smoother._get_smooth_scale = _safe_get_smooth_scale
|
| 161 |
_sq_core.Smoother._get_smooth_scales = _safe_get_smooth_scales
|
| 162 |
|
| 163 |
+
# --- alpha (and auto-alpha / folding) pass-through fix ------------------------
|
| 164 |
+
# onnx-neural-compressor 1.0 SILENTLY DROPS the SmoothQuant* knobs you set in
|
| 165 |
+
# StaticQuantConfig.extra_options. quantize() hands the StaticQuantConfig to
|
| 166 |
+
# smooth_quant_entry(), which smooths via `Smoother.transform(**config.to_dict())`
|
| 167 |
+
# -- but StaticQuantConfig.to_dict() buries SmoothQuantAlpha / AutoAlphaArgs /
|
| 168 |
+
# SmoothQuantFolding inside a nested "extra_options" dict and never emits the
|
| 169 |
+
# top-level `alpha` / `auto_alpha_args` / `folding` kwargs that transform() reads.
|
| 170 |
+
# So transform() ALWAYS falls back to its hard-coded defaults (alpha=0.5, the
|
| 171 |
+
# [0.3,0.7] auto grid) regardless of --alpha. The intended path is the dedicated
|
| 172 |
+
# SmoothQuantConfig (whose to_dict() does surface those names), but quantize()
|
| 173 |
+
# never builds one. We wrap transform() to inject the values from extra_options
|
| 174 |
+
# under the names transform() expects. main() only ARMS this (populates
|
| 175 |
+
# _SMOOTH_OVERRIDE) after a runtime guard confirms the bug is still present, so a
|
| 176 |
+
# future fixed library that forwards alpha itself transparently disables the shim.
|
| 177 |
+
# Like the layout shim above, this reaches into library internals and may need
|
| 178 |
+
# revisiting on a neural-compressor upgrade; contained to this export script.
|
| 179 |
+
_SMOOTH_OVERRIDE = {}
|
| 180 |
+
_orig_transform = _sq_core.Smoother.transform
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _forced_transform(self, *args, **kwargs):
|
| 184 |
+
if _SMOOTH_OVERRIDE:
|
| 185 |
+
kwargs.update(_SMOOTH_OVERRIDE)
|
| 186 |
+
return _orig_transform(self, *args, **kwargs)
|
| 187 |
+
|
| 188 |
+
|
| 189 |
+
_sq_core.Smoother.transform = _forced_transform
|
| 190 |
+
|
| 191 |
# Audio file extensions recognised when an --audio entry is a folder (or the
|
| 192 |
# default ./calibration_audio folder is scanned). SmoothQuant calibrates on
|
| 193 |
# activations (not labels), so any speech clip works.
|
|
|
|
| 495 |
},
|
| 496 |
)
|
| 497 |
|
| 498 |
+
# Arm the alpha pass-through shim (see _forced_transform): translate the
|
| 499 |
+
# SmoothQuant* keys we set in extra_options into the top-level transform()
|
| 500 |
+
# kwargs the library forgets to forward. Guard: if a future neural-compressor
|
| 501 |
+
# fixes this, cfg.to_dict() will surface a top-level "alpha" on its own, so we
|
| 502 |
+
# leave the shim disarmed (and say so) rather than double-driving transform().
|
| 503 |
+
eo = cfg.extra_options
|
| 504 |
+
if "alpha" in cfg.to_dict():
|
| 505 |
+
logger.info("[sq] note: onnx-neural-compressor now forwards SmoothQuantAlpha "
|
| 506 |
+
"itself; the alpha pass-through shim is obsolete and stays disarmed.")
|
| 507 |
+
else:
|
| 508 |
+
_SMOOTH_OVERRIDE.update({
|
| 509 |
+
"alpha": eo["SmoothQuantAlpha"],
|
| 510 |
+
"folding": eo["SmoothQuantFolding"],
|
| 511 |
+
"auto_alpha_args": eo["AutoAlphaArgs"],
|
| 512 |
+
})
|
| 513 |
+
|
| 514 |
logger.info(f"[sq] SmoothQuant(alpha={alpha}) static int8, per-channel, "
|
| 515 |
f"calib={args.calibrate_method}, ops={op_types}, format={args.quant_format} ...")
|
| 516 |
logger.info(f"[sq] {human(os.path.getsize(in_encoder) + os.path.getsize(str(in_encoder) + '.data'))} fp32 encoder")
|