{ "model": "WaveCut/ERNIE-Image-Turbo-SDNQ-uint4-static", "device": "NVIDIA RTX 6000 Ada Generation", "torch": "2.8.0+cu128", "cases": [ { "run": "warmup", "prompt_id": "00-cyrillic-poster", "width": 1024, "height": 1024, "seconds": 42.33458385616541, "stage_seconds_sum": 41.638948045670986, "unattributed_seconds": 0.6956358104944229, "gpu_start_mib": 10172, "gpu_end_mib": 15368, "torch_peak_allocated_mib": 12175, "torch_peak_reserved_mib": 14848, "stages": { "pe.forward": { "seconds": 24.21831013262272, "calls": 415 }, "text_encoder.forward": { "seconds": 0.04355049878358841, "calls": 1 }, "transformer.forward": { "seconds": 17.202288791537285, "calls": 8 }, "vae.decode": { "seconds": 0.1747986227273941, "calls": 1 } }, "use_quantized_matmul": false, "image": "/root/ernie-qmm-debug/00-cyrillic-poster_noqmm_warmup.png" }, { "run": "hot_repeat", "prompt_id": "00-cyrillic-poster", "width": 1024, "height": 1024, "seconds": 24.803174294531345, "stage_seconds_sum": 24.33856651186943, "unattributed_seconds": 0.4646077826619148, "gpu_start_mib": 10278, "gpu_end_mib": 15318, "torch_peak_allocated_mib": 12175, "torch_peak_reserved_mib": 14798, "stages": { "pe.forward": { "seconds": 16.506547823548317, "calls": 421 }, "text_encoder.forward": { "seconds": 0.040486179292201996, "calls": 1 }, "transformer.forward": { "seconds": 7.632116124033928, "calls": 8 }, "vae.decode": { "seconds": 0.15941638499498367, "calls": 1 } }, "use_quantized_matmul": false, "image": "/root/ernie-qmm-debug/00-cyrillic-poster_noqmm_hot_repeat.png" }, { "run": "warmup", "prompt_id": "02-technical-diagram", "width": 1200, "height": 896, "seconds": 94.24524058401585, "stage_seconds_sum": 92.17488968372345, "unattributed_seconds": 2.0703509002923965, "gpu_start_mib": 10278, "gpu_end_mib": 15912, "torch_peak_allocated_mib": 12259, "torch_peak_reserved_mib": 15392, "stages": { "pe.forward": { "seconds": 80.31031914055347, "calls": 2048 }, "text_encoder.forward": { "seconds": 0.11907234787940979, "calls": 1 }, "transformer.forward": { "seconds": 11.57931386679411, "calls": 8 }, "vae.decode": { "seconds": 0.16618432849645615, "calls": 1 } }, "use_quantized_matmul": false, "image": "/root/ernie-qmm-debug/02-technical-diagram_noqmm_warmup.png" }, { "run": "hot_repeat", "prompt_id": "02-technical-diagram", "width": 1200, "height": 896, "seconds": 94.40739246457815, "stage_seconds_sum": 92.33396648615599, "unattributed_seconds": 2.073425978422165, "gpu_start_mib": 10278, "gpu_end_mib": 15910, "torch_peak_allocated_mib": 12259, "torch_peak_reserved_mib": 15390, "stages": { "pe.forward": { "seconds": 80.37759981304407, "calls": 2048 }, "text_encoder.forward": { "seconds": 0.1175723597407341, "calls": 1 }, "transformer.forward": { "seconds": 11.67346066236496, "calls": 8 }, "vae.decode": { "seconds": 0.16533365100622177, "calls": 1 } }, "use_quantized_matmul": false, "image": "/root/ernie-qmm-debug/02-technical-diagram_noqmm_hot_repeat.png" }, { "run": "warmup", "prompt_id": "00-cyrillic-poster", "width": 1024, "height": 1024, "seconds": 47.86481238901615, "stage_seconds_sum": 47.40215189009905, "unattributed_seconds": 0.4626604989171028, "gpu_start_mib": 10280, "gpu_end_mib": 15354, "torch_peak_allocated_mib": 12175, "torch_peak_reserved_mib": 14832, "stages": { "pe.forward": { "seconds": 28.630388408899307, "calls": 408 }, "text_encoder.forward": { "seconds": 3.4749386832118034, "calls": 1 }, "transformer.forward": { "seconds": 15.150532938539982, "calls": 8 }, "vae.decode": { "seconds": 0.14629185944795609, "calls": 1 } }, "use_quantized_matmul": true, "image": "/root/ernie-qmm-debug/00-cyrillic-poster_qmm_warmup.png" }, { "run": "hot_repeat", "prompt_id": "00-cyrillic-poster", "width": 1024, "height": 1024, "seconds": 29.076967030763626, "stage_seconds_sum": 28.608746394515038, "unattributed_seconds": 0.46822063624858856, "gpu_start_mib": 10282, "gpu_end_mib": 15334, "torch_peak_allocated_mib": 12175, "torch_peak_reserved_mib": 14812, "stages": { "pe.forward": { "seconds": 16.936102136969566, "calls": 438 }, "text_encoder.forward": { "seconds": 3.2992821782827377, "calls": 1 }, "transformer.forward": { "seconds": 8.225335031747818, "calls": 8 }, "vae.decode": { "seconds": 0.14802704751491547, "calls": 1 } }, "use_quantized_matmul": true, "image": "/root/ernie-qmm-debug/00-cyrillic-poster_qmm_hot_repeat.png" }, { "run": "warmup", "prompt_id": "02-technical-diagram", "width": 1200, "height": 896, "seconds": 100.24962972849607, "stage_seconds_sum": 98.18934270739555, "unattributed_seconds": 2.060287021100521, "gpu_start_mib": 10282, "gpu_end_mib": 15988, "torch_peak_allocated_mib": 12259, "torch_peak_reserved_mib": 15466, "stages": { "pe.forward": { "seconds": 83.79404803365469, "calls": 2048 }, "text_encoder.forward": { "seconds": 3.445981025695801, "calls": 1 }, "transformer.forward": { "seconds": 10.796071700751781, "calls": 8 }, "vae.decode": { "seconds": 0.15324194729328156, "calls": 1 } }, "use_quantized_matmul": true, "image": "/root/ernie-qmm-debug/02-technical-diagram_qmm_warmup.png" }, { "run": "hot_repeat", "prompt_id": "02-technical-diagram", "width": 1200, "height": 896, "seconds": 38.25821412354708, "stage_seconds_sum": 37.577924594283104, "unattributed_seconds": 0.6802895292639732, "gpu_start_mib": 10282, "gpu_end_mib": 15474, "torch_peak_allocated_mib": 12244, "torch_peak_reserved_mib": 14952, "stages": { "pe.forward": { "seconds": 25.347995460033417, "calls": 638 }, "text_encoder.forward": { "seconds": 3.2810458168387413, "calls": 1 }, "transformer.forward": { "seconds": 8.796003498136997, "calls": 8 }, "vae.decode": { "seconds": 0.15287981927394867, "calls": 1 } }, "use_quantized_matmul": true, "image": "/root/ernie-qmm-debug/02-technical-diagram_qmm_hot_repeat.png" } ], "sdnq": "0.1.9" }