ERNIE-Image-Turbo-SDNQ-uint4-static / metrics /stage_debug_metrics.json
WaveCut's picture
Enable quantized matmul and refresh PE-off benchmark
687a1fe verified
{
"model": "WaveCut/ERNIE-Image-Turbo-SDNQ-uint4-static",
"device": "NVIDIA RTX 6000 Ada Generation",
"torch": "2.8.0+cu128",
"cases": [
{
"run": "warmup",
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 42.33458385616541,
"stage_seconds_sum": 41.638948045670986,
"unattributed_seconds": 0.6956358104944229,
"gpu_start_mib": 10172,
"gpu_end_mib": 15368,
"torch_peak_allocated_mib": 12175,
"torch_peak_reserved_mib": 14848,
"stages": {
"pe.forward": {
"seconds": 24.21831013262272,
"calls": 415
},
"text_encoder.forward": {
"seconds": 0.04355049878358841,
"calls": 1
},
"transformer.forward": {
"seconds": 17.202288791537285,
"calls": 8
},
"vae.decode": {
"seconds": 0.1747986227273941,
"calls": 1
}
},
"use_quantized_matmul": false,
"image": "/root/ernie-qmm-debug/00-cyrillic-poster_noqmm_warmup.png"
},
{
"run": "hot_repeat",
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 24.803174294531345,
"stage_seconds_sum": 24.33856651186943,
"unattributed_seconds": 0.4646077826619148,
"gpu_start_mib": 10278,
"gpu_end_mib": 15318,
"torch_peak_allocated_mib": 12175,
"torch_peak_reserved_mib": 14798,
"stages": {
"pe.forward": {
"seconds": 16.506547823548317,
"calls": 421
},
"text_encoder.forward": {
"seconds": 0.040486179292201996,
"calls": 1
},
"transformer.forward": {
"seconds": 7.632116124033928,
"calls": 8
},
"vae.decode": {
"seconds": 0.15941638499498367,
"calls": 1
}
},
"use_quantized_matmul": false,
"image": "/root/ernie-qmm-debug/00-cyrillic-poster_noqmm_hot_repeat.png"
},
{
"run": "warmup",
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 94.24524058401585,
"stage_seconds_sum": 92.17488968372345,
"unattributed_seconds": 2.0703509002923965,
"gpu_start_mib": 10278,
"gpu_end_mib": 15912,
"torch_peak_allocated_mib": 12259,
"torch_peak_reserved_mib": 15392,
"stages": {
"pe.forward": {
"seconds": 80.31031914055347,
"calls": 2048
},
"text_encoder.forward": {
"seconds": 0.11907234787940979,
"calls": 1
},
"transformer.forward": {
"seconds": 11.57931386679411,
"calls": 8
},
"vae.decode": {
"seconds": 0.16618432849645615,
"calls": 1
}
},
"use_quantized_matmul": false,
"image": "/root/ernie-qmm-debug/02-technical-diagram_noqmm_warmup.png"
},
{
"run": "hot_repeat",
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 94.40739246457815,
"stage_seconds_sum": 92.33396648615599,
"unattributed_seconds": 2.073425978422165,
"gpu_start_mib": 10278,
"gpu_end_mib": 15910,
"torch_peak_allocated_mib": 12259,
"torch_peak_reserved_mib": 15390,
"stages": {
"pe.forward": {
"seconds": 80.37759981304407,
"calls": 2048
},
"text_encoder.forward": {
"seconds": 0.1175723597407341,
"calls": 1
},
"transformer.forward": {
"seconds": 11.67346066236496,
"calls": 8
},
"vae.decode": {
"seconds": 0.16533365100622177,
"calls": 1
}
},
"use_quantized_matmul": false,
"image": "/root/ernie-qmm-debug/02-technical-diagram_noqmm_hot_repeat.png"
},
{
"run": "warmup",
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 47.86481238901615,
"stage_seconds_sum": 47.40215189009905,
"unattributed_seconds": 0.4626604989171028,
"gpu_start_mib": 10280,
"gpu_end_mib": 15354,
"torch_peak_allocated_mib": 12175,
"torch_peak_reserved_mib": 14832,
"stages": {
"pe.forward": {
"seconds": 28.630388408899307,
"calls": 408
},
"text_encoder.forward": {
"seconds": 3.4749386832118034,
"calls": 1
},
"transformer.forward": {
"seconds": 15.150532938539982,
"calls": 8
},
"vae.decode": {
"seconds": 0.14629185944795609,
"calls": 1
}
},
"use_quantized_matmul": true,
"image": "/root/ernie-qmm-debug/00-cyrillic-poster_qmm_warmup.png"
},
{
"run": "hot_repeat",
"prompt_id": "00-cyrillic-poster",
"width": 1024,
"height": 1024,
"seconds": 29.076967030763626,
"stage_seconds_sum": 28.608746394515038,
"unattributed_seconds": 0.46822063624858856,
"gpu_start_mib": 10282,
"gpu_end_mib": 15334,
"torch_peak_allocated_mib": 12175,
"torch_peak_reserved_mib": 14812,
"stages": {
"pe.forward": {
"seconds": 16.936102136969566,
"calls": 438
},
"text_encoder.forward": {
"seconds": 3.2992821782827377,
"calls": 1
},
"transformer.forward": {
"seconds": 8.225335031747818,
"calls": 8
},
"vae.decode": {
"seconds": 0.14802704751491547,
"calls": 1
}
},
"use_quantized_matmul": true,
"image": "/root/ernie-qmm-debug/00-cyrillic-poster_qmm_hot_repeat.png"
},
{
"run": "warmup",
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 100.24962972849607,
"stage_seconds_sum": 98.18934270739555,
"unattributed_seconds": 2.060287021100521,
"gpu_start_mib": 10282,
"gpu_end_mib": 15988,
"torch_peak_allocated_mib": 12259,
"torch_peak_reserved_mib": 15466,
"stages": {
"pe.forward": {
"seconds": 83.79404803365469,
"calls": 2048
},
"text_encoder.forward": {
"seconds": 3.445981025695801,
"calls": 1
},
"transformer.forward": {
"seconds": 10.796071700751781,
"calls": 8
},
"vae.decode": {
"seconds": 0.15324194729328156,
"calls": 1
}
},
"use_quantized_matmul": true,
"image": "/root/ernie-qmm-debug/02-technical-diagram_qmm_warmup.png"
},
{
"run": "hot_repeat",
"prompt_id": "02-technical-diagram",
"width": 1200,
"height": 896,
"seconds": 38.25821412354708,
"stage_seconds_sum": 37.577924594283104,
"unattributed_seconds": 0.6802895292639732,
"gpu_start_mib": 10282,
"gpu_end_mib": 15474,
"torch_peak_allocated_mib": 12244,
"torch_peak_reserved_mib": 14952,
"stages": {
"pe.forward": {
"seconds": 25.347995460033417,
"calls": 638
},
"text_encoder.forward": {
"seconds": 3.2810458168387413,
"calls": 1
},
"transformer.forward": {
"seconds": 8.796003498136997,
"calls": 8
},
"vae.decode": {
"seconds": 0.15287981927394867,
"calls": 1
}
},
"use_quantized_matmul": true,
"image": "/root/ernie-qmm-debug/02-technical-diagram_qmm_hot_repeat.png"
}
],
"sdnq": "0.1.9"
}