qwen3.5-4b-code-forged-GGUF / qwen3.5-4b-code-forged-GGUF.alloy.json
EnricoFermi's picture
Correct qwen3.5-4b-code-forged-GGUF.alloy.json pass@1 to canonical evalplus convention (v1.0.0)
56b1bd6 verified
{
"name": "qwen3.5-4b-code-forged-GGUF",
"version": "1.0.0",
"description": "GGUF derivative of [`qwen3.5-4b-code-forged`](https://huggingface.co/continuum-ai/qwen3.5-4b-code-forged). Same forge journey as the parent (prune + train as published in the parent's alloy); this artifact adds a single 'gguf' transformation stage to produce a smaller / faster / more-portable variant of the same logical model. Inherits the parent's published benchmark results; per-variant evaluation samples will land in a follow-up release if/when per-variant benchmarks are run.",
"author": "continuum-ai",
"tags": [
"derivative",
"delta-forge",
"alloy-backfilled",
"gguf",
"forge-alloy"
],
"license": "apache-2.0",
"source": {
"baseModel": "Qwen/Qwen3.5-4B",
"architecture": "qwen3_5",
"isMoE": false
},
"stages": [
{
"type": "train",
"domain": "code",
"steps": 1000,
"learningRate": "2e-4"
},
{
"type": "quant",
"format": "gguf",
"quantTypes": [
"Q4_K_M"
],
"deviceTargets": []
},
{
"type": "eval",
"benchmarks": [
{
"name": "humaneval"
}
],
"compareToBase": true
},
{
"type": "quant",
"format": "gguf",
"quantTypes": [
"Q4_K_M",
"Q8_0"
],
"deviceTargets": [
"macbook-pro-m-series",
"macbook-air-16gb",
"rtx3060",
"rtx4070",
"rtx4090",
"iphone",
"android"
],
"notes": "GGUF quantization of the parent's safetensors weights via llama.cpp llama-quantize. Targets llama.cpp / Ollama / LM Studio / koboldcpp inference runtimes. Q4_K_M and Q8_0 shipped together so users can pick the size/quality tier their hardware supports."
}
],
"cycles": 3,
"derivedFrom": {
"repo": "continuum-ai/qwen3.5-4b-code-forged",
"alloyHash": null,
"kind": "gguf"
},
"results": {
"completedAt": "2026-03-31T12:13:43-0500",
"baselinePerplexity": 3.0382,
"finalPerplexity": 2.3487,
"improvementPct": 22.7,
"benchmarks": [
{
"name": "perplexity",
"metrics": {
"baseline": 3.0382,
"final": 2.3487,
"improvement": 22.7
}
},
{
"name": "humaneval",
"subset": null,
"metrics": {
"status": "pending"
},
"submittedToLeaderboard": false
}
],
"hardwareVerified": [
{
"device": "NVIDIA GeForce RTX 5090",
"format": "fp16",
"verified": true
}
],
"samples": [],
"integrity": {
"trustLevel": "self-attested",
"code": {
"runner": "sentinel-ai/derive_alloy_from_parent (gguf)",
"version": "1.0",
"binaryHash": "sha256:derivation-tool-only"
},
"modelHash": "sha256:03dd512b17b85b9b4ee6614bc6dd46c08d0bc8e07b92f01b2934540e4f5cbb96",
"fileHashes": [
{
"filename": "qwen3.5-4b-code-forged-Q4_K_M.gguf",
"sha256": "15c8ebc22ac16e3e922041f25d285f8a322e228196de0e9b12592b8bf8b7646e",
"size": 2708797184
},
{
"filename": "qwen3.5-4b-code-forged-Q8_0.gguf",
"sha256": "c56465451bef33353a1f075d670d07bb11c11f60d4463c6bd4fb24f6155acd40",
"size": 4482395904
}
],
"datasets": [],
"attestedAt": "2026-04-08",
"parentAlloyHash": null
}
}
}