{"model": "VulnLLM-R-7B", "score": "1315/2189", "overall_pct": 60.07, "by_topic": {"SystemSecurity": "601/1065 (56.4%)", "Vulnerability": "196/334 (58.7%)", "ApplicationSecurity": "442/808 (54.7%)", "WebSecurity": "513/773 (66.4%)", "NetworkSecurity": "134/230 (58.3%)", "PenTest": "337/475 (70.9%)", "SoftwareSecurity": "127/232 (54.7%)", "MemorySafety": "26/48 (54.2%)", "Cryptography": "4/14 (28.6%)"}, "results": [{"id": "282cd7f6-86f9-4174-95c2-df26b2b6395e", "answer": "B", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 0, "prompt_n": 198, "prompt_ms": 195.236, "prompt_per_token_ms": 0.9860404040404039, "prompt_per_second": 1014.157225101928, "predicted_n": 3, "predicted_ms": 62.969, "predicted_per_token_ms": 20.98966666666667, "predicted_per_second": 47.64249074941638}, "tps": 47.64249074941638}, {"id": "28355cb5-4b9a-44b8-8419-38fd17d335e4", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 117, "prompt_ms": 97.134, "prompt_per_token_ms": 0.8302051282051283, "prompt_per_second": 1204.5215887330903, "predicted_n": 2, "predicted_ms": 28.822, "predicted_per_token_ms": 14.411, "predicted_per_second": 69.39143709666227}, "tps": 69.39143709666227}, {"id": "27ea6d4b-b7f6-4370-8877-ba80887727ff", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 126, "prompt_ms": 96.474, "prompt_per_token_ms": 0.7656666666666667, "prompt_per_second": 1306.0513713539399, "predicted_n": 2, "predicted_ms": 28.752, "predicted_per_token_ms": 14.376, "predicted_per_second": 69.56037840845855}, "tps": 69.56037840845855}, {"id": "7a1db287-9f9b-4621-ba2c-9667a765bbab", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 107, "prompt_ms": 80.081, "prompt_per_token_ms": 0.7484205607476636, "prompt_per_second": 1336.1471510096026, "predicted_n": 2, "predicted_ms": 28.884, "predicted_per_token_ms": 14.442, "predicted_per_second": 69.24248719013987}, "tps": 69.24248719013987}, {"id": "a9c613bb-2b24-4a04-8bf5-fdabf6724322", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 99.45, "prompt_per_token_ms": 0.7053191489361702, "prompt_per_second": 1417.7978883861238, "predicted_n": 2, "predicted_ms": 29.729, "predicted_per_token_ms": 14.8645, "predicted_per_second": 67.27437855292811}, "tps": 67.27437855292811}, {"id": "b4a451ca-16d2-4ab7-ba99-6bc146ea5b86", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 97.824, "prompt_per_token_ms": 0.6700273972602739, "prompt_per_second": 1492.4762839385019, "predicted_n": 2, "predicted_ms": 28.619, "predicted_per_token_ms": 14.3095, "predicted_per_second": 69.88364373318424}, "tps": 69.88364373318424}, {"id": "f4274a51-8a0e-47a8-b472-115c3fe85924", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 200, "prompt_ms": 102.675, "prompt_per_token_ms": 0.513375, "prompt_per_second": 1947.8938397857316, "predicted_n": 3, "predicted_ms": 66.686, "predicted_per_token_ms": 22.22866666666667, "predicted_per_second": 44.98695378340281}, "tps": 44.98695378340281}, {"id": "780100a0-98da-49fc-9acb-476ab4edf3a9", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 168, "prompt_ms": 102.186, "prompt_per_token_ms": 0.6082500000000001, "prompt_per_second": 1644.0608302507192, "predicted_n": 2, "predicted_ms": 30.385, "predicted_per_token_ms": 15.1925, "predicted_per_second": 65.82195162086556}, "tps": 65.82195162086556}, {"id": "61dfb73d-1f20-4d58-b491-747e21cf66bb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 117, "prompt_ms": 96.061, "prompt_per_token_ms": 0.821034188034188, "prompt_per_second": 1217.9760777006277, "predicted_n": 2, "predicted_ms": 30.073, "predicted_per_token_ms": 15.0365, "predicted_per_second": 66.50483822698101}, "tps": 66.50483822698101}, {"id": "37c5e212-2d90-4a25-ab6e-e454daba4db3", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 92, "prompt_ms": 94.198, "prompt_per_token_ms": 1.023891304347826, "prompt_per_second": 976.66617125629, "predicted_n": 2, "predicted_ms": 29.649, "predicted_per_token_ms": 14.8245, "predicted_per_second": 67.45590070491416}, "tps": 67.45590070491416}, {"id": "d02c0cdd-0b29-45c1-9a22-db3f518339ef", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 168, "prompt_ms": 99.947, "prompt_per_token_ms": 0.5949226190476191, "prompt_per_second": 1680.8908721622458, "predicted_n": 2, "predicted_ms": 29.551, "predicted_per_token_ms": 14.7755, "predicted_per_second": 67.67960475110826}, "tps": 67.67960475110826}, {"id": "eca9c056-32d5-466d-a94c-c144a8d959a2", "answer": "CD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 99, "prompt_ms": 95.55, "prompt_per_token_ms": 0.9651515151515151, "prompt_per_second": 1036.1067503924646, "predicted_n": 2, "predicted_ms": 31.922, "predicted_per_token_ms": 15.961, "predicted_per_second": 62.652715995238395}, "tps": 62.652715995238395}, {"id": "657f1246-4867-434b-a538-1b160b916803", "answer": "BD", "llm_answer": "AD", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 125, "prompt_ms": 87.2, "prompt_per_token_ms": 0.6976, "prompt_per_second": 1433.48623853211, "predicted_n": 3, "predicted_ms": 52.995, "predicted_per_token_ms": 17.665, "predicted_per_second": 56.60911406736484}, "tps": 56.60911406736484}, {"id": "cc9574c6-5d6e-4814-aeee-fd257d7b85e4", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 94.338, "prompt_per_token_ms": 0.7861499999999999, "prompt_per_second": 1272.0218787763151, "predicted_n": 2, "predicted_ms": 35.645, "predicted_per_token_ms": 17.8225, "predicted_per_second": 56.10885117127226}, "tps": 56.10885117127226}, {"id": "1c26e147-59bf-4fda-af21-f51bd064cf1c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 101, "prompt_ms": 95.285, "prompt_per_token_ms": 0.9434158415841584, "prompt_per_second": 1059.9779608542792, "predicted_n": 2, "predicted_ms": 29.642, "predicted_per_token_ms": 14.821, "predicted_per_second": 67.47183051076176}, "tps": 67.47183051076176}, {"id": "8b8b51f3-e3ba-4d6a-8f18-6fe28acf8e45", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 118, "prompt_ms": 96.766, "prompt_per_token_ms": 0.8200508474576271, "prompt_per_second": 1219.4365789636854, "predicted_n": 2, "predicted_ms": 28.913, "predicted_per_token_ms": 14.4565, "predicted_per_second": 69.1730363504306}, "tps": 69.1730363504306}, {"id": "940df4b2-5377-4a5a-84c1-dc2ace45059b", "answer": "ABC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 98.889, "prompt_per_token_ms": 0.6867291666666666, "prompt_per_second": 1456.1781391256864, "predicted_n": 2, "predicted_ms": 29.682, "predicted_per_token_ms": 14.841, "predicted_per_second": 67.38090425173506}, "tps": 67.38090425173506}, {"id": "9bd639c1-6e40-4de9-8198-6e42a8781743", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 186, "prompt_ms": 100.604, "prompt_per_token_ms": 0.5408817204301075, "prompt_per_second": 1848.833048387738, "predicted_n": 2, "predicted_ms": 28.874, "predicted_per_token_ms": 14.437, "predicted_per_second": 69.26646810279144}, "tps": 69.26646810279144}, {"id": "34a393a9-dee4-47b0-bdd0-ebda4f64cbd3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 75, "prompt_ms": 93.613, "prompt_per_token_ms": 1.2481733333333334, "prompt_per_second": 801.1707775629452, "predicted_n": 2, "predicted_ms": 29.171, "predicted_per_token_ms": 14.5855, "predicted_per_second": 68.56124232971102}, "tps": 68.56124232971102}, {"id": "33e5c40e-3a0b-4b21-a09a-c09aeb006e5b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 118, "prompt_ms": 96.486, "prompt_per_token_ms": 0.817677966101695, "prompt_per_second": 1222.9753539373587, "predicted_n": 2, "predicted_ms": 28.99, "predicted_per_token_ms": 14.495, "predicted_per_second": 68.98930665746809}, "tps": 68.98930665746809}, {"id": "5969085a-2425-4d26-892f-a87a8bc3f539", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 234, "prompt_ms": 104.707, "prompt_per_token_ms": 0.4474658119658119, "prompt_per_second": 2234.8076059862283, "predicted_n": 2, "predicted_ms": 30.895, "predicted_per_token_ms": 15.4475, "predicted_per_second": 64.73539407671144}, "tps": 64.73539407671144}, {"id": "6df3cdb6-2751-4a64-8b74-e8595c0e9ffd", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 184, "prompt_ms": 100.257, "prompt_per_token_ms": 0.544875, "prompt_per_second": 1835.2833218628125, "predicted_n": 2, "predicted_ms": 27.044, "predicted_per_token_ms": 13.522, "predicted_per_second": 73.95355716609969}, "tps": 73.95355716609969}, {"id": "51c2e727-40d0-4783-a620-a924367a6069", "answer": "", "llm_answer": "D", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 80.7, "prompt_per_token_ms": 0.6781512605042017, "prompt_per_second": 1474.5972738537794, "predicted_n": 2, "predicted_ms": 27.67, "predicted_per_token_ms": 13.835, "predicted_per_second": 72.28044813877845}, "tps": 72.28044813877845}, {"id": "a051df55-8b5f-4ba2-abb3-b33bfe754619", "answer": "AB", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 150, "prompt_ms": 97.22, "prompt_per_token_ms": 0.6481333333333333, "prompt_per_second": 1542.8924089693478, "predicted_n": 3, "predicted_ms": 58.326, "predicted_per_token_ms": 19.442, "predicted_per_second": 51.43503754757741}, "tps": 51.43503754757741}, {"id": "de04c560-e141-487c-abcb-22b7d14a0403", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 192, "prompt_ms": 100.075, "prompt_per_token_ms": 0.5212239583333333, "prompt_per_second": 1918.561079190607, "predicted_n": 4, "predicted_ms": 87.829, "predicted_per_token_ms": 21.95725, "predicted_per_second": 45.54304386933701}, "tps": 45.54304386933701}, {"id": "2a288907-b402-4631-96e6-e039ddea1507", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 99.286, "prompt_per_token_ms": 0.7465112781954887, "prompt_per_second": 1339.564490461898, "predicted_n": 2, "predicted_ms": 31.976, "predicted_per_token_ms": 15.988, "predicted_per_second": 62.54691018263698}, "tps": 62.54691018263698}, {"id": "8a7f4b45-112c-4794-8a92-82b457cbd683", "answer": "BCD", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 156, "prompt_ms": 97.462, "prompt_per_token_ms": 0.6247564102564103, "prompt_per_second": 1600.6238328784552, "predicted_n": 2, "predicted_ms": 28.952, "predicted_per_token_ms": 14.476, "predicted_per_second": 69.07985631389886}, "tps": 69.07985631389886}, {"id": "4ca8aaeb-4354-4343-9b06-9406e0e85118", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 81, "prompt_ms": 93.497, "prompt_per_token_ms": 1.154283950617284, "prompt_per_second": 866.337957367616, "predicted_n": 2, "predicted_ms": 40.412, "predicted_per_token_ms": 20.206, "predicted_per_second": 49.49025042066713}, "tps": 49.49025042066713}, {"id": "a684a5f8-bc60-4d08-bbdf-a8fdd340fe61", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 121, "prompt_ms": 95.151, "prompt_per_token_ms": 0.7863719008264463, "prompt_per_second": 1271.6629357547477, "predicted_n": 2, "predicted_ms": 31.105, "predicted_per_token_ms": 15.5525, "predicted_per_second": 64.29834431763382}, "tps": 64.29834431763382}, {"id": "60631cf9-95c2-47c0-b0dc-1443524892a5", "answer": "AB", "llm_answer": "B", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 94.16, "prompt_per_token_ms": 0.7979661016949152, "prompt_per_second": 1253.1860662701786, "predicted_n": 2, "predicted_ms": 30.671, "predicted_per_token_ms": 15.3355, "predicted_per_second": 65.20817710540902}, "tps": 65.20817710540902}, {"id": "f3d075bb-c0ec-4b0a-987a-3f44a3d0b106", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 97.858, "prompt_per_token_ms": 0.6891408450704226, "prompt_per_second": 1451.0821803020704, "predicted_n": 2, "predicted_ms": 28.194, "predicted_per_token_ms": 14.097, "predicted_per_second": 70.93707881109457}, "tps": 70.93707881109457}, {"id": "88117990-6ba6-4386-865e-b4cacd4b2692", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 180, "prompt_ms": 85.244, "prompt_per_token_ms": 0.47357777777777776, "prompt_per_second": 2111.5855661395526, "predicted_n": 2, "predicted_ms": 28.734, "predicted_per_token_ms": 14.367, "predicted_per_second": 69.60395350455906}, "tps": 69.60395350455906}, {"id": "32112383-dc97-478c-b04c-3df08455507b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 82, "prompt_ms": 93.112, "prompt_per_token_ms": 1.1355121951219511, "prompt_per_second": 880.6598505026205, "predicted_n": 2, "predicted_ms": 29.447, "predicted_per_token_ms": 14.7235, "predicted_per_second": 67.91863347709445}, "tps": 67.91863347709445}, {"id": "a894c7d1-ea25-406f-a6c0-ec7488969eb9", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SoftwareSecurity", "MemorySafety"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 99.737, "prompt_per_token_ms": 0.576514450867052, "prompt_per_second": 1734.561897791191, "predicted_n": 4, "predicted_ms": 86.695, "predicted_per_token_ms": 21.67375, "predicted_per_second": 46.13876232770056}, "tps": 46.13876232770056}, {"id": "e41b57c6-eba6-4a7d-ae9d-05238c751af6", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 124, "prompt_ms": 98.345, "prompt_per_token_ms": 0.7931048387096774, "prompt_per_second": 1260.8673547206265, "predicted_n": 2, "predicted_ms": 33.003, "predicted_per_token_ms": 16.5015, "predicted_per_second": 60.600551465018334}, "tps": 60.600551465018334}, {"id": "a860f5b8-a0cd-4b5c-b166-0943736080ef", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 102.013, "prompt_per_token_ms": 0.6220304878048781, "prompt_per_second": 1607.6382421848193, "predicted_n": 2, "predicted_ms": 31.337, "predicted_per_token_ms": 15.6685, "predicted_per_second": 63.82231866483709}, "tps": 63.82231866483709}, {"id": "00d8980f-9175-4392-8464-e070450e42ff", "answer": "B", "llm_answer": "BCD", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 97.337, "prompt_per_token_ms": 0.7210148148148149, "prompt_per_second": 1386.9340538541355, "predicted_n": 64, "predicted_ms": 1853.805, "predicted_per_token_ms": 28.965703125, "predicted_per_second": 34.523587971766176}, "tps": 34.523587971766176}, {"id": "f87694cc-8af0-4e45-af96-862ea23f5fff", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 163, "prompt_ms": 89.532, "prompt_per_token_ms": 0.5492760736196319, "prompt_per_second": 1820.5781173211815, "predicted_n": 3, "predicted_ms": 56.889, "predicted_per_token_ms": 18.963, "predicted_per_second": 52.73427200337499}, "tps": 52.73427200337499}, {"id": "71254b0b-c546-493d-ab90-b9f8cc9d164f", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 97.396, "prompt_per_token_ms": 0.7268358208955223, "prompt_per_second": 1375.8265226498008, "predicted_n": 3, "predicted_ms": 59.243, "predicted_per_token_ms": 19.747666666666667, "predicted_per_second": 50.63889404655402}, "tps": 50.63889404655402}, {"id": "11541420-16d3-44ce-98e9-44a046d318ce", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 164, "prompt_ms": 99.397, "prompt_per_token_ms": 0.606079268292683, "prompt_per_second": 1649.9491936376348, "predicted_n": 3, "predicted_ms": 61.259, "predicted_per_token_ms": 20.419666666666668, "predicted_per_second": 48.9723958928484}, "tps": 48.9723958928484}, {"id": "c45ec093-be55-4207-8b3e-03054a8ccc5c", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["NetworkSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 234, "prompt_ms": 106.815, "prompt_per_token_ms": 0.45647435897435895, "prompt_per_second": 2190.703552871788, "predicted_n": 2, "predicted_ms": 29.693, "predicted_per_token_ms": 14.8465, "predicted_per_second": 67.35594247802513}, "tps": 67.35594247802513}, {"id": "ed76d844-e5ab-4036-b792-0cad86ea465f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 95.451, "prompt_per_token_ms": 0.8300086956521738, "prompt_per_second": 1204.806654723366, "predicted_n": 2, "predicted_ms": 29.292, "predicted_per_token_ms": 14.646, "predicted_per_second": 68.27802813054758}, "tps": 68.27802813054758}, {"id": "69fca540-6531-4597-9d4c-765d8a2a9f07", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 101.808, "prompt_per_token_ms": 0.5784545454545454, "prompt_per_second": 1728.744303001729, "predicted_n": 2, "predicted_ms": 31.989, "predicted_per_token_ms": 15.9945, "predicted_per_second": 62.52149176279346}, "tps": 62.52149176279346}, {"id": "0ac6a24d-1a21-4c95-b60c-817a35ab6a13", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 94.96, "prompt_per_token_ms": 0.8186206896551723, "prompt_per_second": 1221.5669755686606, "predicted_n": 2, "predicted_ms": 30.301, "predicted_per_token_ms": 15.1505, "predicted_per_second": 66.00442229629385}, "tps": 66.00442229629385}, {"id": "85bd0067-1e1f-4a00-a3dd-7e949c56ebe8", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 118, "prompt_ms": 95.063, "prompt_per_token_ms": 0.8056186440677966, "prompt_per_second": 1241.2820971355839, "predicted_n": 2, "predicted_ms": 28.984, "predicted_per_token_ms": 14.492, "predicted_per_second": 69.0035881865857}, "tps": 69.0035881865857}, {"id": "ebca777b-0bd0-45db-bd5e-d941d978a531", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 96.339, "prompt_per_token_ms": 0.7769274193548387, "prompt_per_second": 1287.1215188033923, "predicted_n": 2, "predicted_ms": 27.657, "predicted_per_token_ms": 13.8285, "predicted_per_second": 72.31442311168962}, "tps": 72.31442311168962}, {"id": "27da813c-70c2-4126-81d0-03f7c23881fa", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 124, "prompt_ms": 84.124, "prompt_per_token_ms": 0.6784193548387096, "prompt_per_second": 1474.0145499500738, "predicted_n": 2, "predicted_ms": 28.156, "predicted_per_token_ms": 14.078, "predicted_per_second": 71.03281716152863}, "tps": 71.03281716152863}, {"id": "2ce26b1c-7d8d-4aa1-af16-c5e6ab29e40b", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["Vulnerability", "SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 145, "prompt_ms": 97.887, "prompt_per_token_ms": 0.6750827586206897, "prompt_per_second": 1481.299866172219, "predicted_n": 2, "predicted_ms": 28.978, "predicted_per_token_ms": 14.489, "predicted_per_second": 69.01787562978811}, "tps": 69.01787562978811}, {"id": "3e3d7df5-d964-4bd0-9f77-b9650847e2fd", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 61, "prompt_n": 92, "prompt_ms": 92.466, "prompt_per_token_ms": 1.0050652173913044, "prompt_per_second": 994.9603097354703, "predicted_n": 2, "predicted_ms": 31.062, "predicted_per_token_ms": 15.531, "predicted_per_second": 64.38735432361084}, "tps": 64.38735432361084}, {"id": "94b59f6f-57f5-4ea3-af4f-f4e58ad1cdef", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 99, "prompt_ms": 95.789, "prompt_per_token_ms": 0.9675656565656566, "prompt_per_second": 1033.5215943375545, "predicted_n": 4, "predicted_ms": 90.767, "predicted_per_token_ms": 22.69175, "predicted_per_second": 44.06887965890687}, "tps": 44.06887965890687}, {"id": "a09c67f4-cea8-4639-9d77-a4f414193533", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 97.565, "prompt_per_token_ms": 0.7447709923664122, "prompt_per_second": 1342.6946138471787, "predicted_n": 2, "predicted_ms": 29.905, "predicted_per_token_ms": 14.9525, "predicted_per_second": 66.87844841999666}, "tps": 66.87844841999666}, {"id": "bfde3b64-9ffd-4b6e-b39c-71fc772b4e5d", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 159, "prompt_ms": 98.646, "prompt_per_token_ms": 0.6204150943396226, "prompt_per_second": 1611.8240982908583, "predicted_n": 2, "predicted_ms": 29.379, "predicted_per_token_ms": 14.6895, "predicted_per_second": 68.07583648184077}, "tps": 68.07583648184077}, {"id": "d1265f2e-60df-4260-bdd7-93c7beb12efc", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 139, "prompt_ms": 97.631, "prompt_per_token_ms": 0.7023812949640288, "prompt_per_second": 1423.7281191424854, "predicted_n": 3, "predicted_ms": 60.053, "predicted_per_token_ms": 20.017666666666667, "predicted_per_second": 49.955872312790376}, "tps": 49.955872312790376}, {"id": "9e6eabad-2a35-483a-97d5-58469bb4aace", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 177, "prompt_ms": 101.469, "prompt_per_token_ms": 0.573271186440678, "prompt_per_second": 1744.3751293498508, "predicted_n": 2, "predicted_ms": 29.658, "predicted_per_token_ms": 14.829, "predicted_per_second": 67.43543057522422}, "tps": 67.43543057522422}, {"id": "edcddbbd-0985-44b4-a762-820d7d1d37c5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 96.002, "prompt_per_token_ms": 0.96002, "prompt_per_second": 1041.6449657298808, "predicted_n": 2, "predicted_ms": 29.068, "predicted_per_token_ms": 14.534, "predicted_per_second": 68.8041832943443}, "tps": 68.8041832943443}, {"id": "639425de-9638-4a42-82f1-fa56fb29128b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 147, "prompt_ms": 84.843, "prompt_per_token_ms": 0.5771632653061225, "prompt_per_second": 1732.612000990064, "predicted_n": 2, "predicted_ms": 27.065, "predicted_per_token_ms": 13.5325, "predicted_per_second": 73.89617587289857}, "tps": 73.89617587289857}, {"id": "dc2205c9-362d-4636-8d5e-7e363305650e", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 98.34, "prompt_per_token_ms": 0.614625, "prompt_per_second": 1627.0083384177342, "predicted_n": 2, "predicted_ms": 35.403, "predicted_per_token_ms": 17.7015, "predicted_per_second": 56.49238765076406}, "tps": 56.49238765076406}, {"id": "38bd6938-5b55-487a-80a2-3d947c146965", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 117, "prompt_ms": 95.97, "prompt_per_token_ms": 0.8202564102564103, "prompt_per_second": 1219.1309784307596, "predicted_n": 2, "predicted_ms": 28.986, "predicted_per_token_ms": 14.493, "predicted_per_second": 68.99882701994066}, "tps": 68.99882701994066}, {"id": "2290153c-a8db-4850-abfa-5e0839fde675", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 93, "prompt_ms": 91.63, "prompt_per_token_ms": 0.985268817204301, "prompt_per_second": 1014.9514351195023, "predicted_n": 2, "predicted_ms": 30.552, "predicted_per_token_ms": 15.276, "predicted_per_second": 65.46216286986122}, "tps": 65.46216286986122}, {"id": "21db3c73-8ef3-41ee-b974-cb1545d1dfc8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 104, "prompt_ms": 95.418, "prompt_per_token_ms": 0.9174807692307693, "prompt_per_second": 1089.9411012597204, "predicted_n": 2, "predicted_ms": 31.044, "predicted_per_token_ms": 15.522, "predicted_per_second": 64.42468754026542}, "tps": 64.42468754026542}, {"id": "29c63c06-5d66-4ea4-995e-0688ec2b6f95", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 133, "prompt_ms": 97.661, "prompt_per_token_ms": 0.7342932330827068, "prompt_per_second": 1361.853759433141, "predicted_n": 2, "predicted_ms": 31.326, "predicted_per_token_ms": 15.663, "predicted_per_second": 63.84472961757007}, "tps": 63.84472961757007}, {"id": "63452203-107c-43a4-a0ac-9fa90a26e079", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 125, "prompt_ms": 97.49, "prompt_per_token_ms": 0.77992, "prompt_per_second": 1282.1827879782543, "predicted_n": 2, "predicted_ms": 29.339, "predicted_per_token_ms": 14.6695, "predicted_per_second": 68.16864923821535}, "tps": 68.16864923821535}, {"id": "ed005bfe-e198-41b0-9cee-de25b381d63f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 96.367, "prompt_per_token_ms": 0.7898934426229508, "prompt_per_second": 1265.993545508317, "predicted_n": 2, "predicted_ms": 29.818, "predicted_per_token_ms": 14.909, "predicted_per_second": 67.0735797169495}, "tps": 67.0735797169495}, {"id": "6d475412-2961-40e2-82d0-b97e4700ccfc", "answer": "ABC", "llm_answer": "AB", "score": 0, "topics": ["Cryptography"], "timings": {"cache_n": 58, "prompt_n": 155, "prompt_ms": 97.594, "prompt_per_token_ms": 0.6296387096774193, "prompt_per_second": 1588.2123901059492, "predicted_n": 2, "predicted_ms": 30.26, "predicted_per_token_ms": 15.13, "predicted_per_second": 66.09385327164573}, "tps": 66.09385327164573}, {"id": "d1c03b08-f8bb-471d-a018-1e4fef50dc0b", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 144, "prompt_ms": 97.717, "prompt_per_token_ms": 0.6785902777777778, "prompt_per_second": 1473.6432759908716, "predicted_n": 2, "predicted_ms": 29.142, "predicted_per_token_ms": 14.571, "predicted_per_second": 68.6294694942008}, "tps": 68.6294694942008}, {"id": "2ae220d9-94f7-4c90-9ce9-b004d35c232b", "answer": "A", "llm_answer": "AD", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 83.395, "prompt_per_token_ms": 0.5450653594771242, "prompt_per_second": 1834.642364650159, "predicted_n": 3, "predicted_ms": 58.21, "predicted_per_token_ms": 19.403333333333332, "predicted_per_second": 51.53753650575503}, "tps": 51.53753650575503}, {"id": "38a24d56-4498-40f1-8a1a-0857180854fa", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 100.538, "prompt_per_token_ms": 0.5984404761904761, "prompt_per_second": 1671.009966380871, "predicted_n": 2, "predicted_ms": 29.243, "predicted_per_token_ms": 14.6215, "predicted_per_second": 68.39243579660089}, "tps": 68.39243579660089}, {"id": "ccb0c36d-8aa0-436a-8c29-e355ee84ef75", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 117, "prompt_ms": 96.331, "prompt_per_token_ms": 0.8233418803418804, "prompt_per_second": 1214.5622904361005, "predicted_n": 2, "predicted_ms": 31.682, "predicted_per_token_ms": 15.841, "predicted_per_second": 63.127327820213374}, "tps": 63.127327820213374}, {"id": "41bd101c-80a7-45af-99a5-90a6bc83db4f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 172, "prompt_ms": 100.382, "prompt_per_token_ms": 0.5836162790697674, "prompt_per_second": 1713.4546034149548, "predicted_n": 2, "predicted_ms": 28.66, "predicted_per_token_ms": 14.33, "predicted_per_second": 69.78367062107466}, "tps": 69.78367062107466}, {"id": "0e5de29b-9d0e-47dc-80e6-9ef68e329099", "answer": "CD", "llm_answer": "CD", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 103, "prompt_ms": 94.55, "prompt_per_token_ms": 0.9179611650485436, "prompt_per_second": 1089.3707033315707, "predicted_n": 3, "predicted_ms": 57.255, "predicted_per_token_ms": 19.085, "predicted_per_second": 52.39717055279014}, "tps": 52.39717055279014}, {"id": "3a26c0ee-6ddb-4900-a7b4-eea97826bf64", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 100.248, "prompt_per_token_ms": 0.5794682080924856, "prompt_per_second": 1725.7202138696034, "predicted_n": 3, "predicted_ms": 52.649, "predicted_per_token_ms": 17.549666666666667, "predicted_per_second": 56.9811392429106}, "tps": 56.9811392429106}, {"id": "c60dde0b-fbcf-4b4c-b98b-05688c6af02d", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 157, "prompt_ms": 87.113, "prompt_per_token_ms": 0.554859872611465, "prompt_per_second": 1802.256838818546, "predicted_n": 4, "predicted_ms": 89.082, "predicted_per_token_ms": 22.2705, "predicted_per_second": 44.902449428616336}, "tps": 44.902449428616336}, {"id": "4032e4d4-c876-47a5-9b78-ee2049bc871e", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 97.172, "prompt_per_token_ms": 0.6351111111111111, "prompt_per_second": 1574.5276417074879, "predicted_n": 2, "predicted_ms": 28.761, "predicted_per_token_ms": 14.3805, "predicted_per_second": 69.53861131393207}, "tps": 69.53861131393207}, {"id": "fca502ab-a566-4f7a-89b7-d45e951502e0", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 99, "prompt_ms": 94.194, "prompt_per_token_ms": 0.9514545454545454, "prompt_per_second": 1051.0223581119817, "predicted_n": 2, "predicted_ms": 29.246, "predicted_per_token_ms": 14.623, "predicted_per_second": 68.38542022840731}, "tps": 68.38542022840731}, {"id": "0e616afd-3284-4d08-af2f-14a638db1336", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 86, "prompt_ms": 98.792, "prompt_per_token_ms": 1.1487441860465117, "prompt_per_second": 870.5158312413961, "predicted_n": 2, "predicted_ms": 28.823, "predicted_per_token_ms": 14.4115, "predicted_per_second": 69.38902959442112}, "tps": 69.38902959442112}, {"id": "dac4ab1b-de47-4192-838e-7bcff76c6d19", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 103, "prompt_ms": 95.051, "prompt_per_token_ms": 0.9228252427184466, "prompt_per_second": 1083.6287887555102, "predicted_n": 2, "predicted_ms": 28.741, "predicted_per_token_ms": 14.3705, "predicted_per_second": 69.58700114818552}, "tps": 69.58700114818552}, {"id": "6da9d669-5654-4d82-9b42-15d8e18d3701", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 151, "prompt_ms": 97.913, "prompt_per_token_ms": 0.648430463576159, "prompt_per_second": 1542.1854094961855, "predicted_n": 2, "predicted_ms": 29.311, "predicted_per_token_ms": 14.6555, "predicted_per_second": 68.23376889222476}, "tps": 68.23376889222476}, {"id": "996638ad-eeef-48ee-81bc-ca03616b344f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 112, "prompt_ms": 94.693, "prompt_per_token_ms": 0.8454732142857143, "prompt_per_second": 1182.769581700865, "predicted_n": 2, "predicted_ms": 30.105, "predicted_per_token_ms": 15.0525, "predicted_per_second": 66.43414715163594}, "tps": 66.43414715163594}, {"id": "cc5410ee-4b25-4205-a81b-73f8dc93c05f", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 96.027, "prompt_per_token_ms": 0.8137881355932204, "prompt_per_second": 1228.8210607433325, "predicted_n": 2, "predicted_ms": 28.474, "predicted_per_token_ms": 14.237, "predicted_per_second": 70.23951675212474}, "tps": 70.23951675212474}, {"id": "e23d21db-5f7e-4bfe-b668-cf03722a22c6", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 142, "prompt_ms": 99.581, "prompt_per_token_ms": 0.701274647887324, "prompt_per_second": 1425.9748345567928, "predicted_n": 2, "predicted_ms": 29.404, "predicted_per_token_ms": 14.702, "predicted_per_second": 68.0179567405795}, "tps": 68.0179567405795}, {"id": "7923eb03-1ffa-49b0-b452-54a4fd2d78ea", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 82.722, "prompt_per_token_ms": 0.7256315789473684, "prompt_per_second": 1378.109813592515, "predicted_n": 8, "predicted_ms": 205.022, "predicted_per_token_ms": 25.62775, "predicted_per_second": 39.02020270995308}, "tps": 39.02020270995308}, {"id": "9fb580d7-9d6d-41c1-a2aa-fa02accd11b0", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 97.274, "prompt_per_token_ms": 0.754062015503876, "prompt_per_second": 1326.1508727923185, "predicted_n": 2, "predicted_ms": 29.124, "predicted_per_token_ms": 14.562, "predicted_per_second": 68.67188572998215}, "tps": 68.67188572998215}, {"id": "92453f4a-d1a4-4d72-94d0-bede5ea7acd4", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 115, "prompt_ms": 97.593, "prompt_per_token_ms": 0.8486347826086957, "prompt_per_second": 1178.363202278852, "predicted_n": 2, "predicted_ms": 31.0, "predicted_per_token_ms": 15.5, "predicted_per_second": 64.51612903225806}, "tps": 64.51612903225806}, {"id": "9c6104db-42c2-4f77-9a3b-e8473c0a998b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 96.84, "prompt_per_token_ms": 0.8494736842105264, "prompt_per_second": 1177.1995043370507, "predicted_n": 2, "predicted_ms": 32.2, "predicted_per_token_ms": 16.1, "predicted_per_second": 62.11180124223602}, "tps": 62.11180124223602}, {"id": "3dd06430-92b0-4b73-825a-c70c743050c1", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 115, "prompt_ms": 96.404, "prompt_per_token_ms": 0.838295652173913, "prompt_per_second": 1192.8965603087008, "predicted_n": 2, "predicted_ms": 31.208, "predicted_per_token_ms": 15.604, "predicted_per_second": 64.0861317610869}, "tps": 64.0861317610869}, {"id": "5fdb6e82-4a63-4ad1-9df1-60e8b9bccbb0", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 101.611, "prompt_per_token_ms": 0.604827380952381, "prompt_per_second": 1653.3643011091317, "predicted_n": 2, "predicted_ms": 30.159, "predicted_per_token_ms": 15.0795, "predicted_per_second": 66.31519612719255}, "tps": 66.31519612719255}, {"id": "fd2f46bb-f69a-4e5d-bc26-d62c67b00c29", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 98.016, "prompt_per_token_ms": 0.6806666666666668, "prompt_per_second": 1469.1478942213516, "predicted_n": 2, "predicted_ms": 29.48, "predicted_per_token_ms": 14.74, "predicted_per_second": 67.84260515603799}, "tps": 67.84260515603799}, {"id": "4dffab17-7b53-4bd7-b61d-259c0ac1f8e1", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 182, "prompt_ms": 100.537, "prompt_per_token_ms": 0.5524010989010989, "prompt_per_second": 1810.2788028288091, "predicted_n": 2, "predicted_ms": 31.807, "predicted_per_token_ms": 15.9035, "predicted_per_second": 62.879240418775744}, "tps": 62.879240418775744}, {"id": "4bb7c6cc-ae21-4c44-9269-0f31f207254b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 100.055, "prompt_per_token_ms": 0.7198201438848921, "prompt_per_second": 1389.2359202438656, "predicted_n": 2, "predicted_ms": 26.917, "predicted_per_token_ms": 13.4585, "predicted_per_second": 74.30248541813724}, "tps": 74.30248541813724}, {"id": "ec99f5aa-7ff4-4416-8c6b-324b8b03a733", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 123, "prompt_ms": 88.869, "prompt_per_token_ms": 0.7225121951219512, "prompt_per_second": 1384.0596833541506, "predicted_n": 2, "predicted_ms": 29.361, "predicted_per_token_ms": 14.6805, "predicted_per_second": 68.11757092742073}, "tps": 68.11757092742073}, {"id": "8014a5fd-6810-4bf8-94d3-47b45734c22e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 132, "prompt_ms": 97.881, "prompt_per_token_ms": 0.7415227272727273, "prompt_per_second": 1348.576332485365, "predicted_n": 2, "predicted_ms": 29.11, "predicted_per_token_ms": 14.555, "predicted_per_second": 68.70491240123668}, "tps": 68.70491240123668}, {"id": "1029347f-7ce8-473b-97b7-b45020caf68c", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 99.044, "prompt_per_token_ms": 0.5930778443113772, "prompt_per_second": 1686.1193005129035, "predicted_n": 2, "predicted_ms": 29.429, "predicted_per_token_ms": 14.7145, "predicted_per_second": 67.96017533725238}, "tps": 67.96017533725238}, {"id": "f7483ada-2916-443d-901d-f498adcf737d", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 54, "prompt_ms": 91.246, "prompt_per_token_ms": 1.6897407407407405, "prompt_per_second": 591.8067641321263, "predicted_n": 2, "predicted_ms": 30.685, "predicted_per_token_ms": 15.3425, "predicted_per_second": 65.17842594101353}, "tps": 65.17842594101353}, {"id": "c90e1f2d-32bd-4859-8765-bb748454808b", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 154, "prompt_ms": 98.36, "prompt_per_token_ms": 0.6387012987012987, "prompt_per_second": 1565.6771045140301, "predicted_n": 2, "predicted_ms": 28.91, "predicted_per_token_ms": 14.455, "predicted_per_second": 69.18021445866482}, "tps": 69.18021445866482}, {"id": "bb5ac0b2-b3d2-4024-a045-403fc90cd9cc", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 109, "prompt_ms": 94.897, "prompt_per_token_ms": 0.8706146788990826, "prompt_per_second": 1148.6137601820922, "predicted_n": 2, "predicted_ms": 28.812, "predicted_per_token_ms": 14.406, "predicted_per_second": 69.41552131056504}, "tps": 69.41552131056504}, {"id": "a26c27f2-33a6-43d6-b3a2-b12443d05ee3", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity", "PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 178, "prompt_ms": 100.684, "prompt_per_token_ms": 0.5656404494382022, "prompt_per_second": 1767.9075126137222, "predicted_n": 2, "predicted_ms": 29.937, "predicted_per_token_ms": 14.9685, "predicted_per_second": 66.80696128536593}, "tps": 66.80696128536593}, {"id": "131911cc-2e67-43d8-b3ec-0dc333b7038b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 87, "prompt_ms": 94.053, "prompt_per_token_ms": 1.0810689655172414, "prompt_per_second": 925.0103664954866, "predicted_n": 2, "predicted_ms": 29.549, "predicted_per_token_ms": 14.7745, "predicted_per_second": 67.68418559003689}, "tps": 67.68418559003689}, {"id": "dd43f00b-919e-4629-8702-339e2097cafd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 68, "prompt_ms": 92.491, "prompt_per_token_ms": 1.3601617647058823, "prompt_per_second": 735.2066687569602, "predicted_n": 2, "predicted_ms": 30.215, "predicted_per_token_ms": 15.1075, "predicted_per_second": 66.19228859837828}, "tps": 66.19228859837828}, {"id": "1635d76c-d487-4c43-8c8e-739fef98cd72", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 115, "prompt_ms": 96.396, "prompt_per_token_ms": 0.8382260869565218, "prompt_per_second": 1192.995559981742, "predicted_n": 2, "predicted_ms": 27.298, "predicted_per_token_ms": 13.649, "predicted_per_second": 73.26544069162577}, "tps": 73.26544069162577}, {"id": "6c12ff70-c078-4105-ae61-d64b18152fea", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 198, "prompt_ms": 92.735, "prompt_per_token_ms": 0.46835858585858586, "prompt_per_second": 2135.116191297784, "predicted_n": 2, "predicted_ms": 29.901, "predicted_per_token_ms": 14.9505, "predicted_per_second": 66.88739507039898}, "tps": 66.88739507039898}, {"id": "c55a6ee8-5ce8-4f1a-bff3-992c2dd30d89", "answer": "AD", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 97.974, "prompt_per_token_ms": 0.671054794520548, "prompt_per_second": 1490.191275236287, "predicted_n": 4, "predicted_ms": 86.677, "predicted_per_token_ms": 21.66925, "predicted_per_second": 46.14834385131003}, "tps": 46.14834385131003}, {"id": "a762a0e4-cc31-4f47-88e6-2314ad867e1a", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 128, "prompt_ms": 96.019, "prompt_per_token_ms": 0.7501484375, "prompt_per_second": 1333.0694966621188, "predicted_n": 2, "predicted_ms": 29.561, "predicted_per_token_ms": 14.7805, "predicted_per_second": 67.65670985419979}, "tps": 67.65670985419979}, {"id": "c5b74dba-8274-44b1-b642-d7f3847611f2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 129, "prompt_ms": 99.937, "prompt_per_token_ms": 0.7747054263565891, "prompt_per_second": 1290.813212323764, "predicted_n": 2, "predicted_ms": 28.229, "predicted_per_token_ms": 14.1145, "predicted_per_second": 70.84912678451238}, "tps": 70.84912678451238}, {"id": "0ea86fdb-a3e1-48b6-850b-2526c1eda150", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 97.359, "prompt_per_token_ms": 0.7431984732824427, "prompt_per_second": 1345.535595065685, "predicted_n": 4, "predicted_ms": 85.963, "predicted_per_token_ms": 21.49075, "predicted_per_second": 46.53164733664484}, "tps": 46.53164733664484}, {"id": "e77d25b3-692f-484f-b0ec-43466e098af2", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 83.55, "prompt_per_token_ms": 0.6578740157480315, "prompt_per_second": 1520.0478755236386, "predicted_n": 2, "predicted_ms": 27.869, "predicted_per_token_ms": 13.9345, "predicted_per_second": 71.76432595356849}, "tps": 71.76432595356849}, {"id": "a5e47333-0764-4720-ad0b-5ca0a6f14afe", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 120, "prompt_ms": 95.331, "prompt_per_token_ms": 0.794425, "prompt_per_second": 1258.7720678478145, "predicted_n": 2, "predicted_ms": 29.575, "predicted_per_token_ms": 14.7875, "predicted_per_second": 67.6246830092984}, "tps": 67.6246830092984}, {"id": "d04060e2-c776-4cab-8678-5d4a0f36ecb9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 172, "prompt_ms": 99.666, "prompt_per_token_ms": 0.579453488372093, "prompt_per_second": 1725.764051933458, "predicted_n": 2, "predicted_ms": 29.953, "predicted_per_token_ms": 14.9765, "predicted_per_second": 66.77127499749608}, "tps": 66.77127499749608}, {"id": "648a1490-2851-4e8f-bcb0-80c840a3e53f", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 197, "prompt_ms": 102.613, "prompt_per_token_ms": 0.5208781725888325, "prompt_per_second": 1919.8347187978131, "predicted_n": 2, "predicted_ms": 29.286, "predicted_per_token_ms": 14.643, "predicted_per_second": 68.29201666325206}, "tps": 68.29201666325206}, {"id": "6854dc46-a644-4948-ab52-870c6926b5df", "answer": "AC", "llm_answer": "AD", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 136, "prompt_ms": 97.774, "prompt_per_token_ms": 0.7189264705882353, "prompt_per_second": 1390.962832654898, "predicted_n": 3, "predicted_ms": 59.947, "predicted_per_token_ms": 19.982333333333333, "predicted_per_second": 50.044205715048285}, "tps": 50.044205715048285}, {"id": "b4237c97-fd91-4955-a182-572f04ef0b73", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 224, "prompt_ms": 102.385, "prompt_per_token_ms": 0.4570758928571429, "prompt_per_second": 2187.820481515847, "predicted_n": 2, "predicted_ms": 29.981, "predicted_per_token_ms": 14.9905, "predicted_per_second": 66.70891564657616}, "tps": 66.70891564657616}, {"id": "f543bef0-88ad-458f-8ca4-72b082c068e0", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 98, "prompt_ms": 95.393, "prompt_per_token_ms": 0.9733979591836734, "prompt_per_second": 1027.3290493013114, "predicted_n": 3, "predicted_ms": 59.382, "predicted_per_token_ms": 19.794, "predicted_per_second": 50.5203597049611}, "tps": 50.5203597049611}, {"id": "3fc32e2f-feaa-4429-ad89-1945f6c80da5", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 96, "prompt_ms": 93.507, "prompt_per_token_ms": 0.97403125, "prompt_per_second": 1026.661105585678, "predicted_n": 2, "predicted_ms": 29.094, "predicted_per_token_ms": 14.547, "predicted_per_second": 68.74269608854058}, "tps": 68.74269608854058}, {"id": "62d85d4e-63d5-480e-a7eb-85252919e0a7", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 103.655, "prompt_per_token_ms": 0.6244277108433735, "prompt_per_second": 1601.4664029713956, "predicted_n": 2, "predicted_ms": 29.312, "predicted_per_token_ms": 14.656, "predicted_per_second": 68.23144104803494}, "tps": 68.23144104803494}, {"id": "2a973503-706f-480e-8c2b-0951deffe733", "answer": "BC", "llm_answer": "AC", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 131, "prompt_ms": 92.108, "prompt_per_token_ms": 0.703114503816794, "prompt_per_second": 1422.2434533373864, "predicted_n": 4, "predicted_ms": 79.852, "predicted_per_token_ms": 19.963, "predicted_per_second": 50.09267144216801}, "tps": 50.09267144216801}, {"id": "3125078d-9971-4bda-8e46-2f55a4c4668a", "answer": "AB", "llm_answer": "C", "score": 0, "topics": ["MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 97.787, "prompt_per_token_ms": 0.7522076923076924, "prompt_per_second": 1329.4200660619508, "predicted_n": 2, "predicted_ms": 29.202, "predicted_per_token_ms": 14.601, "predicted_per_second": 68.48845969454146}, "tps": 68.48845969454146}, {"id": "0c9d06eb-cdb8-4066-a35a-0d05bde189a5", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 171, "prompt_ms": 99.022, "prompt_per_token_ms": 0.5790760233918129, "prompt_per_second": 1726.8889741673565, "predicted_n": 2, "predicted_ms": 29.025, "predicted_per_token_ms": 14.5125, "predicted_per_second": 68.90611541774332}, "tps": 68.90611541774332}, {"id": "ffe96b08-6e55-4564-8eda-d1fa4e62ce79", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 155, "prompt_ms": 97.296, "prompt_per_token_ms": 0.6277161290322582, "prompt_per_second": 1593.0767965795098, "predicted_n": 2, "predicted_ms": 30.957, "predicted_per_token_ms": 15.4785, "predicted_per_second": 64.60574345059275}, "tps": 64.60574345059275}, {"id": "35282d04-f5b4-4ef4-810f-c1b45dd6b784", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 112, "prompt_ms": 96.138, "prompt_per_token_ms": 0.858375, "prompt_per_second": 1164.991990680064, "predicted_n": 2, "predicted_ms": 30.691, "predicted_per_token_ms": 15.3455, "predicted_per_second": 65.16568375093676}, "tps": 65.16568375093676}, {"id": "0b7d0f1b-532d-4248-bbce-126f2f6b0049", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 95.432, "prompt_per_token_ms": 0.7696129032258064, "prompt_per_second": 1299.3545142090702, "predicted_n": 2, "predicted_ms": 30.192, "predicted_per_token_ms": 15.096, "predicted_per_second": 66.24271330153682}, "tps": 66.24271330153682}, {"id": "26f1233f-6f46-4a8b-98ba-c1116634fd97", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 95, "prompt_ms": 96.83, "prompt_per_token_ms": 1.0192631578947369, "prompt_per_second": 981.1008984818754, "predicted_n": 2, "predicted_ms": 29.296, "predicted_per_token_ms": 14.648, "predicted_per_second": 68.26870562534134}, "tps": 68.26870562534134}, {"id": "f061c39e-a595-4742-aabf-2267d0741000", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 98.818, "prompt_per_token_ms": 0.5712023121387283, "prompt_per_second": 1750.6931935477342, "predicted_n": 2, "predicted_ms": 28.901, "predicted_per_token_ms": 14.4505, "predicted_per_second": 69.2017577246462}, "tps": 69.2017577246462}, {"id": "7c46ed77-1416-43fb-a433-c9506cc6bcc2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 176, "prompt_ms": 101.332, "prompt_per_token_ms": 0.57575, "prompt_per_second": 1736.8649587494574, "predicted_n": 2, "predicted_ms": 30.329, "predicted_per_token_ms": 15.1645, "predicted_per_second": 65.94348643212767}, "tps": 65.94348643212767}, {"id": "045b9ca3-4c09-41af-b8a7-16f88edfc59e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 108, "prompt_ms": 93.645, "prompt_per_token_ms": 0.8670833333333333, "prompt_per_second": 1153.2916866890919, "predicted_n": 2, "predicted_ms": 27.223, "predicted_per_token_ms": 13.6115, "predicted_per_second": 73.46728868971091}, "tps": 73.46728868971091}, {"id": "2a4d9aa2-2a8e-499b-848b-bf2a53faef93", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 119, "prompt_ms": 87.544, "prompt_per_token_ms": 0.7356638655462184, "prompt_per_second": 1359.3164580096866, "predicted_n": 2, "predicted_ms": 27.597, "predicted_per_token_ms": 13.7985, "predicted_per_second": 72.47164546871036}, "tps": 72.47164546871036}, {"id": "9c537a97-d14f-4457-af1b-c5e9de749380", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 169, "prompt_ms": 101.232, "prompt_per_token_ms": 0.5990059171597633, "prompt_per_second": 1669.4325904852221, "predicted_n": 2, "predicted_ms": 28.194, "predicted_per_token_ms": 14.097, "predicted_per_second": 70.93707881109457}, "tps": 70.93707881109457}, {"id": "ca6ae402-157d-435a-bb7d-0092bdf2cd3d", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 142, "prompt_ms": 97.702, "prompt_per_token_ms": 0.6880422535211268, "prompt_per_second": 1453.3991115842052, "predicted_n": 3, "predicted_ms": 59.185, "predicted_per_token_ms": 19.728333333333335, "predicted_per_second": 50.688519050435076}, "tps": 50.688519050435076}, {"id": "6fb2ffd5-2d07-40f3-b32e-982d6ee3f6b3", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 100.715, "prompt_per_token_ms": 0.6994097222222222, "prompt_per_second": 1429.7770937794767, "predicted_n": 2, "predicted_ms": 31.354, "predicted_per_token_ms": 15.677, "predicted_per_second": 63.78771448618996}, "tps": 63.78771448618996}, {"id": "60af27f7-e341-4f96-9f7c-6f0f36e19579", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 138, "prompt_ms": 97.258, "prompt_per_token_ms": 0.7047681159420289, "prompt_per_second": 1418.906413868268, "predicted_n": 2, "predicted_ms": 30.515, "predicted_per_token_ms": 15.2575, "predicted_per_second": 65.54153694904146}, "tps": 65.54153694904146}, {"id": "69e8c7fb-cfd3-49b1-9d09-569652ac363b", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 97.795, "prompt_per_token_ms": 0.6698287671232876, "prompt_per_second": 1492.9188608824581, "predicted_n": 2, "predicted_ms": 29.094, "predicted_per_token_ms": 14.547, "predicted_per_second": 68.74269608854058}, "tps": 68.74269608854058}, {"id": "549d03e0-6fa0-4ad4-a691-0c1aacc4268b", "answer": "", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 85, "prompt_ms": 93.627, "prompt_per_token_ms": 1.1014941176470587, "prompt_per_second": 907.857776068869, "predicted_n": 3, "predicted_ms": 58.028, "predicted_per_token_ms": 19.342666666666666, "predicted_per_second": 51.69917970634866}, "tps": 51.69917970634866}, {"id": "123c52f1-ef4e-4168-b626-c5842bb9bf98", "answer": "BCD", "llm_answer": "BCD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 145, "prompt_ms": 96.948, "prompt_per_token_ms": 0.6686068965517241, "prompt_per_second": 1495.6471510500473, "predicted_n": 6, "predicted_ms": 145.969, "predicted_per_token_ms": 24.328166666666664, "predicted_per_second": 41.10461810384397}, "tps": 41.10461810384397}, {"id": "6ca3a918-0944-4256-a23b-96e4181fbc9b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 93.702, "prompt_per_token_ms": 0.5323977272727273, "prompt_per_second": 1878.29502038377, "predicted_n": 2, "predicted_ms": 26.476, "predicted_per_token_ms": 13.238, "predicted_per_second": 75.54011179936546}, "tps": 75.54011179936546}, {"id": "d6074f53-3db1-4c47-a7ee-21f3cde5e6ba", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 172, "prompt_ms": 95.46, "prompt_per_token_ms": 0.5549999999999999, "prompt_per_second": 1801.801801801802, "predicted_n": 2, "predicted_ms": 31.879, "predicted_per_token_ms": 15.9395, "predicted_per_second": 62.73722513253239}, "tps": 62.73722513253239}, {"id": "3419bb22-648e-4ee5-99af-3e3889b98fa9", "answer": "AD", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 204, "prompt_ms": 101.699, "prompt_per_token_ms": 0.49852450980392155, "prompt_per_second": 2005.919428902939, "predicted_n": 3, "predicted_ms": 60.523, "predicted_per_token_ms": 20.174333333333333, "predicted_per_second": 49.56793285197362}, "tps": 49.56793285197362}, {"id": "3425a7f3-0c3e-4e8f-a668-7706ae306250", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 94.592, "prompt_per_token_ms": 0.9008761904761905, "prompt_per_second": 1110.0304465493912, "predicted_n": 2, "predicted_ms": 30.428, "predicted_per_token_ms": 15.214, "predicted_per_second": 65.72893387669252}, "tps": 65.72893387669252}, {"id": "edae556d-32a9-4c74-a6f4-c00b1d510f9b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 97.011, "prompt_per_token_ms": 0.7520232558139535, "prompt_per_second": 1329.746111265733, "predicted_n": 2, "predicted_ms": 30.026, "predicted_per_token_ms": 15.013, "predicted_per_second": 66.608938919603}, "tps": 66.608938919603}, {"id": "c431eb4c-c92d-4f6c-9805-759179e27363", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 110, "prompt_ms": 97.047, "prompt_per_token_ms": 0.8822454545454546, "prompt_per_second": 1133.471410759735, "predicted_n": 2, "predicted_ms": 31.006, "predicted_per_token_ms": 15.503, "predicted_per_second": 64.50364445591175}, "tps": 64.50364445591175}, {"id": "f6b50586-49eb-4aa0-9ab1-f992349f44e3", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 95, "prompt_ms": 84.655, "prompt_per_token_ms": 0.8911052631578947, "prompt_per_second": 1122.2018782115645, "predicted_n": 2, "predicted_ms": 27.159, "predicted_per_token_ms": 13.5795, "predicted_per_second": 73.64041385912589}, "tps": 73.64041385912589}, {"id": "cdbe5f7a-ae68-4a40-a7ec-b73cae2a718c", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 203, "prompt_ms": 97.628, "prompt_per_token_ms": 0.48092610837438426, "prompt_per_second": 2079.3215061252918, "predicted_n": 2, "predicted_ms": 30.81, "predicted_per_token_ms": 15.405, "predicted_per_second": 64.91398896462188}, "tps": 64.91398896462188}, {"id": "766a753e-f63f-4605-9d81-71db020fb6a3", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 205, "prompt_ms": 101.685, "prompt_per_token_ms": 0.49602439024390244, "prompt_per_second": 2016.0298962482177, "predicted_n": 2, "predicted_ms": 31.697, "predicted_per_token_ms": 15.8485, "predicted_per_second": 63.097454017730385}, "tps": 63.097454017730385}, {"id": "9d357fc9-cf23-4087-9b2e-bc0508c95229", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 97.232, "prompt_per_token_ms": 0.6752222222222222, "prompt_per_second": 1480.993911469475, "predicted_n": 2, "predicted_ms": 29.888, "predicted_per_token_ms": 14.944, "predicted_per_second": 66.91648822269806}, "tps": 66.91648822269806}, {"id": "81713fe6-5936-4f4a-abe1-9733f5a0f450", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 133, "prompt_ms": 99.512, "prompt_per_token_ms": 0.7482105263157894, "prompt_per_second": 1336.522228474958, "predicted_n": 2, "predicted_ms": 31.296, "predicted_per_token_ms": 15.648, "predicted_per_second": 63.90593047034765}, "tps": 63.90593047034765}, {"id": "7f3cf121-79ed-4f29-9193-e7d8bf70bc2d", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 130, "prompt_ms": 96.91, "prompt_per_token_ms": 0.7454615384615384, "prompt_per_second": 1341.4508306676298, "predicted_n": 64, "predicted_ms": 1860.402, "predicted_per_token_ms": 29.06878125, "predicted_per_second": 34.401167059592495}, "tps": 34.401167059592495}, {"id": "dbc4c595-f5a9-4597-a0b5-a21cad308171", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 188, "prompt_ms": 96.709, "prompt_per_token_ms": 0.5144095744680851, "prompt_per_second": 1943.976258672926, "predicted_n": 2, "predicted_ms": 25.792, "predicted_per_token_ms": 12.896, "predicted_per_second": 77.54342431761786}, "tps": 77.54342431761786}, {"id": "57cd7b7e-122a-4fba-959a-9e848de2be3e", "answer": "D", "llm_answer": "D", "score": 1, "topics": [], "timings": {"cache_n": 62, "prompt_n": 84, "prompt_ms": 88.254, "prompt_per_token_ms": 1.0506428571428572, "prompt_per_second": 951.7982187776191, "predicted_n": 2, "predicted_ms": 31.876, "predicted_per_token_ms": 15.938, "predicted_per_second": 62.743129627305805}, "tps": 62.743129627305805}, {"id": "312c5563-0d4e-47fe-9941-9323c2c14b04", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 176, "prompt_ms": 99.156, "prompt_per_token_ms": 0.5633863636363636, "prompt_per_second": 1774.9808382750412, "predicted_n": 2, "predicted_ms": 30.786, "predicted_per_token_ms": 15.393, "predicted_per_second": 64.96459429610861}, "tps": 64.96459429610861}, {"id": "071900a9-8775-41b8-9934-42ad477d239e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 99.405, "prompt_per_token_ms": 0.8796902654867257, "prompt_per_second": 1136.7637442784567, "predicted_n": 2, "predicted_ms": 30.555, "predicted_per_token_ms": 15.2775, "predicted_per_second": 65.45573555882834}, "tps": 65.45573555882834}, {"id": "aea4694b-6564-441f-b5a0-d6341b4fe9f3", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 87, "prompt_ms": 93.525, "prompt_per_token_ms": 1.075, "prompt_per_second": 930.2325581395348, "predicted_n": 2, "predicted_ms": 30.069, "predicted_per_token_ms": 15.0345, "predicted_per_second": 66.513685190728}, "tps": 66.513685190728}, {"id": "cc483822-364a-40cc-ac72-2f524e0ca27a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 62, "prompt_ms": 91.059, "prompt_per_token_ms": 1.4686935483870966, "prompt_per_second": 680.877233442054, "predicted_n": 2, "predicted_ms": 28.959, "predicted_per_token_ms": 14.4795, "predicted_per_second": 69.06315825822715}, "tps": 69.06315825822715}, {"id": "31e2b5b9-f5cc-411b-91ba-03918199ebd5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 99.681, "prompt_per_token_ms": 0.6004879518072289, "prompt_per_second": 1665.312346384968, "predicted_n": 2, "predicted_ms": 30.208, "predicted_per_token_ms": 15.104, "predicted_per_second": 66.20762711864407}, "tps": 66.20762711864407}, {"id": "26617100-9a26-4966-8db5-337dd19d0d3a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 96, "prompt_ms": 93.765, "prompt_per_token_ms": 0.97671875, "prompt_per_second": 1023.8361862102063, "predicted_n": 2, "predicted_ms": 29.159, "predicted_per_token_ms": 14.5795, "predicted_per_second": 68.58945780033609}, "tps": 68.58945780033609}, {"id": "8441b7cb-fa07-4828-8b3c-aa7745bbb1e3", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 197, "prompt_ms": 101.028, "prompt_per_token_ms": 0.5128324873096447, "prompt_per_second": 1949.9544680682582, "predicted_n": 2, "predicted_ms": 30.286, "predicted_per_token_ms": 15.143, "predicted_per_second": 66.03711285742587}, "tps": 66.03711285742587}, {"id": "e3a9b9f9-e6e9-4d41-b269-e91906239b4a", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 153, "prompt_ms": 101.528, "prompt_per_token_ms": 0.6635816993464053, "prompt_per_second": 1506.9734457489558, "predicted_n": 2, "predicted_ms": 29.372, "predicted_per_token_ms": 14.686, "predicted_per_second": 68.0920604657497}, "tps": 68.0920604657497}, {"id": "7aa3af1e-9718-4c7f-be5d-e03e4566776f", "answer": "BD", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 93.763, "prompt_per_token_ms": 0.5682606060606061, "prompt_per_second": 1759.7559805040366, "predicted_n": 4, "predicted_ms": 82.419, "predicted_per_token_ms": 20.60475, "predicted_per_second": 48.53249857435786}, "tps": 48.53249857435786}, {"id": "dbd8d0fe-94b7-49bf-8395-080af11b4e8f", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 162, "prompt_ms": 99.884, "prompt_per_token_ms": 0.616567901234568, "prompt_per_second": 1621.8813824035883, "predicted_n": 2, "predicted_ms": 30.462, "predicted_per_token_ms": 15.231, "predicted_per_second": 65.65557087518876}, "tps": 65.65557087518876}, {"id": "b2fc2024-2768-46ef-952d-abe2dc742f2e", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 153, "prompt_ms": 97.593, "prompt_per_token_ms": 0.6378627450980392, "prompt_per_second": 1567.735390857951, "predicted_n": 3, "predicted_ms": 57.439, "predicted_per_token_ms": 19.146333333333335, "predicted_per_second": 52.22932154111318}, "tps": 52.22932154111318}, {"id": "5baf815b-151e-4cb0-8eac-fbfd6749f783", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 87, "prompt_ms": 94.526, "prompt_per_token_ms": 1.0865057471264368, "prompt_per_second": 920.3816939254809, "predicted_n": 2, "predicted_ms": 29.216, "predicted_per_token_ms": 14.608, "predicted_per_second": 68.45564074479736}, "tps": 68.45564074479736}, {"id": "00a0d75e-f53b-4756-9a62-efee807e6a51", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 96, "prompt_ms": 93.201, "prompt_per_token_ms": 0.9708437499999999, "prompt_per_second": 1030.0318666108733, "predicted_n": 2, "predicted_ms": 30.347, "predicted_per_token_ms": 15.1735, "predicted_per_second": 65.9043727551323}, "tps": 65.9043727551323}, {"id": "9bf00f7b-5f82-4131-b76f-88064c9e5459", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 134, "prompt_ms": 98.884, "prompt_per_token_ms": 0.7379402985074627, "prompt_per_second": 1355.1231746288581, "predicted_n": 2, "predicted_ms": 29.74, "predicted_per_token_ms": 14.87, "predicted_per_second": 67.24949562878278}, "tps": 67.24949562878278}, {"id": "1f17d5a3-fae0-4ebb-b060-d86016ca5fb8", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 134, "prompt_ms": 102.634, "prompt_per_token_ms": 0.7659253731343284, "prompt_per_second": 1305.6102266305513, "predicted_n": 3, "predicted_ms": 57.899, "predicted_per_token_ms": 19.299666666666667, "predicted_per_second": 51.814366396656254}, "tps": 51.814366396656254}, {"id": "0ea4045e-30a8-4325-b4c8-593c63c952b5", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 96.321, "prompt_per_token_ms": 0.7830975609756098, "prompt_per_second": 1276.9800977979878, "predicted_n": 2, "predicted_ms": 30.475, "predicted_per_token_ms": 15.2375, "predicted_per_second": 65.62756357670222}, "tps": 65.62756357670222}, {"id": "f7b84c8f-98bc-42a1-a7ea-9bc484fbb125", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 121, "prompt_ms": 96.829, "prompt_per_token_ms": 0.8002396694214875, "prompt_per_second": 1249.6256286856212, "predicted_n": 2, "predicted_ms": 30.387, "predicted_per_token_ms": 15.1935, "predicted_per_second": 65.81761937670714}, "tps": 65.81761937670714}, {"id": "a23e09cb-2feb-48d0-acaa-cf7dcc31c47e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 189, "prompt_ms": 92.945, "prompt_per_token_ms": 0.49177248677248675, "prompt_per_second": 2033.4606487707786, "predicted_n": 2, "predicted_ms": 26.892, "predicted_per_token_ms": 13.446, "predicted_per_second": 74.37156031533542}, "tps": 74.37156031533542}, {"id": "ba63b521-6546-4a99-a384-495dd3556868", "answer": "ACD", "llm_answer": "A", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 84, "prompt_ms": 88.948, "prompt_per_token_ms": 1.0589047619047618, "prompt_per_second": 944.3719926249045, "predicted_n": 2, "predicted_ms": 30.565, "predicted_per_token_ms": 15.2825, "predicted_per_second": 65.43432030099787}, "tps": 65.43432030099787}, {"id": "41a4378d-1b1c-452b-a629-e268afedab45", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 79, "prompt_ms": 92.205, "prompt_per_token_ms": 1.1671518987341771, "prompt_per_second": 856.7865083238437, "predicted_n": 2, "predicted_ms": 29.566, "predicted_per_token_ms": 14.783, "predicted_per_second": 67.64526821348846}, "tps": 67.64526821348846}, {"id": "d1a365ec-d19a-4c3d-b281-4d205106b69e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 74, "prompt_ms": 92.116, "prompt_per_token_ms": 1.244810810810811, "prompt_per_second": 803.3349255286812, "predicted_n": 2, "predicted_ms": 30.567, "predicted_per_token_ms": 15.2835, "predicted_per_second": 65.43003893087317}, "tps": 65.43003893087317}, {"id": "8a50471a-9200-4a0c-88f8-2ddec1b3e653", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 160, "prompt_ms": 100.012, "prompt_per_token_ms": 0.625075, "prompt_per_second": 1599.8080230372354, "predicted_n": 3, "predicted_ms": 60.198, "predicted_per_token_ms": 20.066, "predicted_per_second": 49.83554270906009}, "tps": 49.83554270906009}, {"id": "991f8ded-c5cd-423b-aa8d-ff53c9bf9f55", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.335, "prompt_per_token_ms": 0.7104744525547445, "prompt_per_second": 1407.5101453742232, "predicted_n": 2, "predicted_ms": 29.765, "predicted_per_token_ms": 14.8825, "predicted_per_second": 67.19301192675961}, "tps": 67.19301192675961}, {"id": "825c5936-6312-43a7-93fb-9494011c1394", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 131, "prompt_ms": 97.421, "prompt_per_token_ms": 0.7436717557251908, "prompt_per_second": 1344.6792785949642, "predicted_n": 2, "predicted_ms": 29.019, "predicted_per_token_ms": 14.5095, "predicted_per_second": 68.92036252110687}, "tps": 68.92036252110687}, {"id": "d715208c-a46f-4ea2-9fcd-ec756c26e99b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 108, "prompt_ms": 94.812, "prompt_per_token_ms": 0.8778888888888888, "prompt_per_second": 1139.0963169219085, "predicted_n": 2, "predicted_ms": 29.414, "predicted_per_token_ms": 14.707, "predicted_per_second": 67.99483239273815}, "tps": 67.99483239273815}, {"id": "a1d254bd-fad1-4da4-a8d2-e38036d3babf", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.201, "prompt_per_token_ms": 0.6772482758620689, "prompt_per_second": 1476.5633751183798, "predicted_n": 2, "predicted_ms": 30.276, "predicted_per_token_ms": 15.138, "predicted_per_second": 66.05892456070815}, "tps": 66.05892456070815}, {"id": "cc1a6a63-b48f-414b-92f9-be598dddaf7e", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 154, "prompt_ms": 97.73, "prompt_per_token_ms": 0.6346103896103896, "prompt_per_second": 1575.7699785122275, "predicted_n": 4, "predicted_ms": 87.108, "predicted_per_token_ms": 21.777, "predicted_per_second": 45.92000734720117}, "tps": 45.92000734720117}, {"id": "c7457c95-b25c-4d55-9c9c-f70fb4ec1e6c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 41, "prompt_ms": 79.228, "prompt_per_token_ms": 1.932390243902439, "prompt_per_second": 517.493815317817, "predicted_n": 2, "predicted_ms": 29.442, "predicted_per_token_ms": 14.721, "predicted_per_second": 67.93016778751443}, "tps": 67.93016778751443}, {"id": "25b9db98-bc7f-480c-af15-5174f5e4d084", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 158, "prompt_ms": 99.016, "prompt_per_token_ms": 0.6266835443037975, "prompt_per_second": 1595.7017047749857, "predicted_n": 2, "predicted_ms": 28.885, "predicted_per_token_ms": 14.4425, "predicted_per_second": 69.24009001211701}, "tps": 69.24009001211701}, {"id": "4d22ea1b-47f5-4a68-ab28-789b1c098aaa", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 102, "prompt_ms": 94.093, "prompt_per_token_ms": 0.9224803921568627, "prompt_per_second": 1084.0338813726844, "predicted_n": 2, "predicted_ms": 29.24, "predicted_per_token_ms": 14.62, "predicted_per_second": 68.39945280437757}, "tps": 68.39945280437757}, {"id": "bc0798bf-20e4-4e51-84b9-83afb887212a", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 76, "prompt_ms": 90.308, "prompt_per_token_ms": 1.188263157894737, "prompt_per_second": 841.5644239712981, "predicted_n": 2, "predicted_ms": 31.321, "predicted_per_token_ms": 15.6605, "predicted_per_second": 63.85492161808371}, "tps": 63.85492161808371}, {"id": "3049bbe5-6098-4edd-a427-36844924f2c5", "answer": "ABCD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 157, "prompt_ms": 98.594, "prompt_per_token_ms": 0.6279872611464967, "prompt_per_second": 1592.388989187983, "predicted_n": 2, "predicted_ms": 30.986, "predicted_per_token_ms": 15.493, "predicted_per_second": 64.54527851287678}, "tps": 64.54527851287678}, {"id": "8d15521b-5e1b-452a-9a81-336f56ab5bd5", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 147, "prompt_ms": 98.692, "prompt_per_token_ms": 0.6713741496598639, "prompt_per_second": 1489.4824301868439, "predicted_n": 4, "predicted_ms": 89.498, "predicted_per_token_ms": 22.3745, "predicted_per_second": 44.69373617287537}, "tps": 44.69373617287537}, {"id": "1d71672f-55f5-4956-b5de-35008bc85e3b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 69, "prompt_ms": 91.214, "prompt_per_token_ms": 1.3219420289855073, "prompt_per_second": 756.462823689346, "predicted_n": 2, "predicted_ms": 30.579, "predicted_per_token_ms": 15.2895, "predicted_per_second": 65.4043624709768}, "tps": 65.4043624709768}, {"id": "2c007ab1-0153-4b6a-903c-f7b2f7b44dee", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 102, "prompt_ms": 93.883, "prompt_per_token_ms": 0.920421568627451, "prompt_per_second": 1086.458677289818, "predicted_n": 2, "predicted_ms": 31.094, "predicted_per_token_ms": 15.547, "predicted_per_second": 64.32109088570142}, "tps": 64.32109088570142}, {"id": "fbd83bbf-df9d-4c96-a2de-9bc2c0831722", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 103.803, "prompt_per_token_ms": 0.617875, "prompt_per_second": 1618.4503338053812, "predicted_n": 3, "predicted_ms": 60.546, "predicted_per_token_ms": 20.182, "predicted_per_second": 49.54910316123278}, "tps": 49.54910316123278}, {"id": "08ef0aaa-515a-40e5-9425-7f95e00706ad", "answer": "ABD", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 162, "prompt_ms": 87.506, "prompt_per_token_ms": 0.5401604938271605, "prompt_per_second": 1851.3016250314265, "predicted_n": 6, "predicted_ms": 144.573, "predicted_per_token_ms": 24.0955, "predicted_per_second": 41.5015251810504}, "tps": 41.5015251810504}, {"id": "501f67b3-6656-42ea-96a4-bfadffbed533", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 160, "prompt_ms": 97.588, "prompt_per_token_ms": 0.6099249999999999, "prompt_per_second": 1639.5458458007133, "predicted_n": 2, "predicted_ms": 28.731, "predicted_per_token_ms": 14.3655, "predicted_per_second": 69.61122132887822}, "tps": 69.61122132887822}, {"id": "017b8ac8-3f4d-42d9-b4b6-3356f2c3ca42", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 94.228, "prompt_per_token_ms": 0.73615625, "prompt_per_second": 1358.407267478881, "predicted_n": 2, "predicted_ms": 29.106, "predicted_per_token_ms": 14.553, "predicted_per_second": 68.71435442864014}, "tps": 68.71435442864014}, {"id": "540afbd2-f362-4157-8770-71e34542958c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["MemorySafety", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 155, "prompt_ms": 97.596, "prompt_per_token_ms": 0.6296516129032258, "prompt_per_second": 1588.1798434362063, "predicted_n": 2, "predicted_ms": 28.874, "predicted_per_token_ms": 14.437, "predicted_per_second": 69.26646810279144}, "tps": 69.26646810279144}, {"id": "a8c1c9e5-214b-44d7-950d-c1b8bc404e5f", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 97, "prompt_ms": 94.358, "prompt_per_token_ms": 0.9727628865979382, "prompt_per_second": 1027.9997456495473, "predicted_n": 2, "predicted_ms": 29.203, "predicted_per_token_ms": 14.6015, "predicted_per_second": 68.48611444029723}, "tps": 68.48611444029723}, {"id": "d4d8d29c-48f2-45ea-8bba-cd2b54caca84", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 166, "prompt_ms": 100.725, "prompt_per_token_ms": 0.6067771084337349, "prompt_per_second": 1648.0516257135766, "predicted_n": 2, "predicted_ms": 30.416, "predicted_per_token_ms": 15.208, "predicted_per_second": 65.75486586007365}, "tps": 65.75486586007365}, {"id": "f8fccbdf-dda6-4658-aa46-8d0a93a07505", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 77, "prompt_ms": 92.254, "prompt_per_token_ms": 1.1981038961038961, "prompt_per_second": 834.6521560040758, "predicted_n": 2, "predicted_ms": 30.745, "predicted_per_token_ms": 15.3725, "predicted_per_second": 65.05122784192551}, "tps": 65.05122784192551}, {"id": "d5a7116b-bbd9-49dd-bfa2-d285007da122", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 113, "prompt_ms": 95.654, "prompt_per_token_ms": 0.8464955752212389, "prompt_per_second": 1181.3410834884062, "predicted_n": 2, "predicted_ms": 30.362, "predicted_per_token_ms": 15.181, "predicted_per_second": 65.87181345102431}, "tps": 65.87181345102431}, {"id": "e70a4db5-5f34-42aa-9aaa-236a9f989201", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 119, "prompt_ms": 95.312, "prompt_per_token_ms": 0.8009411764705883, "prompt_per_second": 1248.5311398354877, "predicted_n": 2, "predicted_ms": 29.028, "predicted_per_token_ms": 14.514, "predicted_per_second": 68.89899407468651}, "tps": 68.89899407468651}, {"id": "6951d3e5-a532-499a-a195-3e30b2ba0450", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 83.549, "prompt_per_token_ms": 0.5925460992907802, "prompt_per_second": 1687.6324073298304, "predicted_n": 2, "predicted_ms": 28.495, "predicted_per_token_ms": 14.2475, "predicted_per_second": 70.1877522372346}, "tps": 70.1877522372346}, {"id": "c738b7fb-c232-4813-b98c-59ab97fe1c23", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 99, "prompt_ms": 93.266, "prompt_per_token_ms": 0.9420808080808082, "prompt_per_second": 1061.480067763172, "predicted_n": 2, "predicted_ms": 32.027, "predicted_per_token_ms": 16.0135, "predicted_per_second": 62.44731008211821}, "tps": 62.44731008211821}, {"id": "c218e17b-e064-49a5-a562-a23c9fd70cb2", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 95.515, "prompt_per_token_ms": 0.7829098360655737, "prompt_per_second": 1277.2862901115009, "predicted_n": 2, "predicted_ms": 33.166, "predicted_per_token_ms": 16.583, "predicted_per_second": 60.30271965265634}, "tps": 60.30271965265634}, {"id": "679e4e78-f97f-4261-be7e-43bc29382619", "answer": "AC", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 151, "prompt_ms": 98.673, "prompt_per_token_ms": 0.6534635761589405, "prompt_per_second": 1530.3071762285529, "predicted_n": 3, "predicted_ms": 61.772, "predicted_per_token_ms": 20.590666666666667, "predicted_per_second": 48.56569319432752}, "tps": 48.56569319432752}, {"id": "0822b969-958f-4ecd-9e32-327985cb5581", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 151, "prompt_ms": 99.578, "prompt_per_token_ms": 0.6594569536423841, "prompt_per_second": 1516.3992046435958, "predicted_n": 2, "predicted_ms": 30.497, "predicted_per_token_ms": 15.2485, "predicted_per_second": 65.58022100534478}, "tps": 65.58022100534478}, {"id": "fb9dd846-166b-4080-a6f1-c83fa550b78e", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 196, "prompt_ms": 102.214, "prompt_per_token_ms": 0.5215, "prompt_per_second": 1917.5455417066155, "predicted_n": 2, "predicted_ms": 31.619, "predicted_per_token_ms": 15.8095, "predicted_per_second": 63.25310730889655}, "tps": 63.25310730889655}, {"id": "50ec97b8-fba3-4a42-bd94-1098eb8ade56", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 171, "prompt_ms": 99.948, "prompt_per_token_ms": 0.5844912280701754, "prompt_per_second": 1710.889662624565, "predicted_n": 2, "predicted_ms": 28.84, "predicted_per_token_ms": 14.42, "predicted_per_second": 69.34812760055479}, "tps": 69.34812760055479}, {"id": "5340b62a-c7fc-4862-9599-16aef8033d15", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 99.216, "prompt_per_token_ms": 0.5941077844311377, "prompt_per_second": 1683.196258667957, "predicted_n": 2, "predicted_ms": 32.393, "predicted_per_token_ms": 16.1965, "predicted_per_second": 61.741734325317196}, "tps": 61.741734325317196}, {"id": "2bcc4631-1278-4a5f-ade2-e8a5e0f8ce0e", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 103, "prompt_ms": 95.174, "prompt_per_token_ms": 0.9240194174757282, "prompt_per_second": 1082.2283396725995, "predicted_n": 2, "predicted_ms": 31.086, "predicted_per_token_ms": 15.543, "predicted_per_second": 64.33764395547836}, "tps": 64.33764395547836}, {"id": "de41ccd8-b9e3-445f-88fc-e2b0e613105a", "answer": "ABC", "llm_answer": "BC", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 153, "prompt_ms": 91.856, "prompt_per_token_ms": 0.6003660130718954, "prompt_per_second": 1665.6505835220346, "predicted_n": 4, "predicted_ms": 78.943, "predicted_per_token_ms": 19.73575, "predicted_per_second": 50.66947037736088}, "tps": 50.66947037736088}, {"id": "fd1a2601-c6a5-4b3c-9f3a-0ef03d09b470", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 149, "prompt_ms": 97.676, "prompt_per_token_ms": 0.6555436241610738, "prompt_per_second": 1525.4514926901184, "predicted_n": 2, "predicted_ms": 29.573, "predicted_per_token_ms": 14.7865, "predicted_per_second": 67.6292564163257}, "tps": 67.6292564163257}, {"id": "4b44b715-57d1-4f0d-bb07-c6a35247b081", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 104, "prompt_ms": 94.056, "prompt_per_token_ms": 0.9043846153846153, "prompt_per_second": 1105.7242493833462, "predicted_n": 2, "predicted_ms": 30.536, "predicted_per_token_ms": 15.268, "predicted_per_second": 65.49646319098768}, "tps": 65.49646319098768}, {"id": "65640306-d2dd-4bcf-b200-70e0d3e15bf3", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 120, "prompt_ms": 96.04, "prompt_per_token_ms": 0.8003333333333333, "prompt_per_second": 1249.4793835901708, "predicted_n": 3, "predicted_ms": 58.492, "predicted_per_token_ms": 19.497333333333334, "predicted_per_second": 51.28906517130548}, "tps": 51.28906517130548}, {"id": "5f9bc5fc-89b6-41bb-946d-fee2abb908bf", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["Cryptography"], "timings": {"cache_n": 59, "prompt_n": 93, "prompt_ms": 93.685, "prompt_per_token_ms": 1.0073655913978494, "prompt_per_second": 992.6882638629451, "predicted_n": 2, "predicted_ms": 30.272, "predicted_per_token_ms": 15.136, "predicted_per_second": 66.06765327695561}, "tps": 66.06765327695561}, {"id": "fffea5ee-2add-4699-85c9-a704226fb28f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 224, "prompt_ms": 106.208, "prompt_per_token_ms": 0.47414285714285714, "prompt_per_second": 2109.0689966857485, "predicted_n": 2, "predicted_ms": 31.429, "predicted_per_token_ms": 15.7145, "predicted_per_second": 63.63549587960164}, "tps": 63.63549587960164}, {"id": "cfa2b854-0da2-4a4b-b32e-d6bd356c903f", "answer": "AC", "llm_answer": "BC", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 87.079, "prompt_per_token_ms": 0.7572086956521739, "prompt_per_second": 1320.6398787308078, "predicted_n": 3, "predicted_ms": 53.406, "predicted_per_token_ms": 17.802, "predicted_per_second": 56.17346365576901}, "tps": 56.17346365576901}, {"id": "b0a3c93f-700c-4ffb-a589-2ac0918d07c2", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 109, "prompt_ms": 94.946, "prompt_per_token_ms": 0.8710642201834862, "prompt_per_second": 1148.0209803467233, "predicted_n": 2, "predicted_ms": 28.071, "predicted_per_token_ms": 14.0355, "predicted_per_second": 71.24790709272915}, "tps": 71.24790709272915}, {"id": "a2f827ad-e4ed-42c5-86b3-c1a9737263e9", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.681, "prompt_per_token_ms": 0.713, "prompt_per_second": 1402.5245441795232, "predicted_n": 2, "predicted_ms": 28.067, "predicted_per_token_ms": 14.0335, "predicted_per_second": 71.25806106815834}, "tps": 71.25806106815834}, {"id": "c91daf7e-5ced-4a89-968d-79564310034b", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 91, "prompt_ms": 92.497, "prompt_per_token_ms": 1.0164505494505494, "prompt_per_second": 983.8156913197186, "predicted_n": 2, "predicted_ms": 29.682, "predicted_per_token_ms": 14.841, "predicted_per_second": 67.38090425173506}, "tps": 67.38090425173506}, {"id": "b2fc7ddf-d036-42ab-b999-60c97d42f03a", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 117, "prompt_ms": 96.503, "prompt_per_token_ms": 0.8248119658119658, "prompt_per_second": 1212.397542045325, "predicted_n": 2, "predicted_ms": 29.282, "predicted_per_token_ms": 14.641, "predicted_per_second": 68.30134553650707}, "tps": 68.30134553650707}, {"id": "2308b0d7-03e5-4f1c-a151-41bc3f7de2e2", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 191, "prompt_ms": 99.668, "prompt_per_token_ms": 0.5218219895287959, "prompt_per_second": 1916.3623229120678, "predicted_n": 2, "predicted_ms": 29.613, "predicted_per_token_ms": 14.8065, "predicted_per_second": 67.5379056495458}, "tps": 67.5379056495458}, {"id": "55804eb8-0da6-4b0a-9eab-a66e3cbeb157", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 96.164, "prompt_per_token_ms": 0.7632063492063492, "prompt_per_second": 1310.2616363711993, "predicted_n": 3, "predicted_ms": 63.717, "predicted_per_token_ms": 21.239, "predicted_per_second": 47.08319600734498}, "tps": 47.08319600734498}, {"id": "2456b235-1fea-4bd1-a56a-1b546725c287", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 111, "prompt_ms": 96.18, "prompt_per_token_ms": 0.8664864864864865, "prompt_per_second": 1154.0860885839052, "predicted_n": 2, "predicted_ms": 31.152, "predicted_per_token_ms": 15.576, "predicted_per_second": 64.20133538777607}, "tps": 64.20133538777607}, {"id": "a6ab20d9-6a0a-473c-870b-5faabbf2eb88", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 97.514, "prompt_per_token_ms": 0.7223259259259259, "prompt_per_second": 1384.4165965912587, "predicted_n": 2, "predicted_ms": 29.371, "predicted_per_token_ms": 14.6855, "predicted_per_second": 68.09437880902932}, "tps": 68.09437880902932}, {"id": "d9b24291-2563-4b6f-8e0d-3965ea255910", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 98.067, "prompt_per_token_ms": 0.7486030534351145, "prompt_per_second": 1335.8214282072463, "predicted_n": 2, "predicted_ms": 26.463, "predicted_per_token_ms": 13.2315, "predicted_per_second": 75.57722102558289}, "tps": 75.57722102558289}, {"id": "9bd70193-62c6-4b2c-ac51-88c7fe35e184", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 195, "prompt_ms": 90.77, "prompt_per_token_ms": 0.4654871794871795, "prompt_per_second": 2148.286878924755, "predicted_n": 2, "predicted_ms": 29.459, "predicted_per_token_ms": 14.7295, "predicted_per_second": 67.89096710682644}, "tps": 67.89096710682644}, {"id": "3c7acdb8-a771-4bfd-9726-cd601bacc873", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 106, "prompt_ms": 94.344, "prompt_per_token_ms": 0.8900377358490565, "prompt_per_second": 1123.5478673789537, "predicted_n": 2, "predicted_ms": 28.229, "predicted_per_token_ms": 14.1145, "predicted_per_second": 70.84912678451238}, "tps": 70.84912678451238}, {"id": "59ab406e-5f9f-40f0-aef5-9fcc74d4d4f7", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 196, "prompt_ms": 101.295, "prompt_per_token_ms": 0.5168112244897959, "prompt_per_second": 1934.9424946937163, "predicted_n": 3, "predicted_ms": 58.54, "predicted_per_token_ms": 19.513333333333332, "predicted_per_second": 51.24701059104885}, "tps": 51.24701059104885}, {"id": "d635df5d-31b1-4cec-b285-e7be8b36403b", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 119, "prompt_ms": 100.803, "prompt_per_token_ms": 0.8470840336134453, "prompt_per_second": 1180.5204210192157, "predicted_n": 4, "predicted_ms": 93.111, "predicted_per_token_ms": 23.27775, "predicted_per_second": 42.95947847193135}, "tps": 42.95947847193135}, {"id": "21c388dd-5877-46a8-a61b-7eeb0c041e65", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 99.018, "prompt_per_token_ms": 0.7675813953488372, "prompt_per_second": 1302.7934314973036, "predicted_n": 2, "predicted_ms": 29.521, "predicted_per_token_ms": 14.7605, "predicted_per_second": 67.74838250736764}, "tps": 67.74838250736764}, {"id": "e629e09c-5c6f-4d4a-a9ff-5c57f2ddd3e9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest", "NetworkSecurity"], "timings": {"cache_n": 61, "prompt_n": 137, "prompt_ms": 98.747, "prompt_per_token_ms": 0.7207810218978102, "prompt_per_second": 1387.3839205241677, "predicted_n": 2, "predicted_ms": 30.863, "predicted_per_token_ms": 15.4315, "predicted_per_second": 64.8025143375563}, "tps": 64.8025143375563}, {"id": "884f2395-9462-4091-ab4d-a979d1f12a01", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["SystemSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 98.744, "prompt_per_token_ms": 0.6857222222222222, "prompt_per_second": 1458.3164546706635, "predicted_n": 3, "predicted_ms": 60.208, "predicted_per_token_ms": 20.069333333333333, "predicted_per_second": 49.82726547967047}, "tps": 49.82726547967047}, {"id": "6f4bec5f-34fe-4745-852b-7d648102721e", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 108, "prompt_ms": 96.537, "prompt_per_token_ms": 0.8938611111111112, "prompt_per_second": 1118.74203673203, "predicted_n": 2, "predicted_ms": 29.882, "predicted_per_token_ms": 14.941, "predicted_per_second": 66.92992436918546}, "tps": 66.92992436918546}, {"id": "7c6de9ab-aa86-412e-91ff-3ce5d6601694", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 142, "prompt_ms": 94.266, "prompt_per_token_ms": 0.6638450704225353, "prompt_per_second": 1506.3755754991193, "predicted_n": 4, "predicted_ms": 79.863, "predicted_per_token_ms": 19.96575, "predicted_per_second": 50.085771884351956}, "tps": 50.085771884351956}, {"id": "fd8738c8-f3e4-4666-b98d-896754054bc1", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 164, "prompt_ms": 98.792, "prompt_per_token_ms": 0.602390243902439, "prompt_per_second": 1660.0534456231276, "predicted_n": 64, "predicted_ms": 1846.303, "predicted_per_token_ms": 28.848484375, "predicted_per_second": 34.663866115150114}, "tps": 34.663866115150114}, {"id": "4c2c6b36-8a33-47ca-886b-10f8fcd116ff", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 94.602, "prompt_per_token_ms": 0.7629193548387098, "prompt_per_second": 1310.7545295025475, "predicted_n": 2, "predicted_ms": 30.896, "predicted_per_token_ms": 15.448, "predicted_per_second": 64.7332988089073}, "tps": 64.7332988089073}, {"id": "99531e57-3de6-4cf8-87f1-535228c0939d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 95.692, "prompt_per_token_ms": 0.8109491525423729, "prompt_per_second": 1233.1229360866114, "predicted_n": 2, "predicted_ms": 30.127, "predicted_per_token_ms": 15.0635, "predicted_per_second": 66.38563414877021}, "tps": 66.38563414877021}, {"id": "a70a4813-25a6-4122-b347-50ee1b4cabc7", "answer": "ABD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 171, "prompt_ms": 97.43, "prompt_per_token_ms": 0.569766081871345, "prompt_per_second": 1755.1062301139277, "predicted_n": 3, "predicted_ms": 63.758, "predicted_per_token_ms": 21.252666666666666, "predicted_per_second": 47.05291884939929}, "tps": 47.05291884939929}, {"id": "5b245412-af4b-4202-b87c-5f46603d33f0", "answer": "C", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 97.605, "prompt_per_token_ms": 0.7283955223880597, "prompt_per_second": 1372.8804876799345, "predicted_n": 64, "predicted_ms": 1840.071, "predicted_per_token_ms": 28.751109375, "predicted_per_second": 34.781266592430406}, "tps": 34.781266592430406}, {"id": "c21562e1-47e9-4eb7-9e77-f8b2fa65daa6", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 110, "prompt_ms": 96.085, "prompt_per_token_ms": 0.8734999999999999, "prompt_per_second": 1144.8196908986836, "predicted_n": 2, "predicted_ms": 28.708, "predicted_per_token_ms": 14.354, "predicted_per_second": 69.66699177929497}, "tps": 69.66699177929497}, {"id": "968c0cc1-2180-462f-a281-a93687c57d35", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 174, "prompt_ms": 100.31, "prompt_per_token_ms": 0.5764942528735633, "prompt_per_second": 1734.622669723856, "predicted_n": 4, "predicted_ms": 88.233, "predicted_per_token_ms": 22.05825, "predicted_per_second": 45.33451203064613}, "tps": 45.33451203064613}, {"id": "483f12bd-952c-440b-adda-513b2823031e", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 182, "prompt_ms": 101.241, "prompt_per_token_ms": 0.5562692307692307, "prompt_per_second": 1797.6906589227683, "predicted_n": 64, "predicted_ms": 1857.619, "predicted_per_token_ms": 29.025296875, "predicted_per_second": 34.45270531793656}, "tps": 34.45270531793656}, {"id": "9fc20d03-8a7e-4fe1-9a55-8f3190455863", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 97.349, "prompt_per_token_ms": 0.5935914634146342, "prompt_per_second": 1684.6603457662636, "predicted_n": 2, "predicted_ms": 30.897, "predicted_per_token_ms": 15.4485, "predicted_per_second": 64.73120367673238}, "tps": 64.73120367673238}, {"id": "f02f8076-638f-4e90-add3-cd99d0ec9677", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 121, "prompt_ms": 95.612, "prompt_per_token_ms": 0.7901818181818181, "prompt_per_second": 1265.5315232397609, "predicted_n": 4, "predicted_ms": 91.875, "predicted_per_token_ms": 22.96875, "predicted_per_second": 43.53741496598639}, "tps": 43.53741496598639}, {"id": "9674bd39-7c70-4e7e-bb76-b41c26459888", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 102.107, "prompt_per_token_ms": 0.5902138728323699, "prompt_per_second": 1694.3010763218977, "predicted_n": 2, "predicted_ms": 32.798, "predicted_per_token_ms": 16.399, "predicted_per_second": 60.97932800780535}, "tps": 60.97932800780535}, {"id": "f598749d-e147-44c3-b6ec-65e59143cb0f", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 100.935, "prompt_per_token_ms": 0.7947637795275591, "prompt_per_second": 1258.235498092832, "predicted_n": 3, "predicted_ms": 60.483, "predicted_per_token_ms": 20.160999999999998, "predicted_per_second": 49.60071425028521}, "tps": 49.60071425028521}, {"id": "f73b1250-d13d-4215-a58d-b31bd93a6543", "answer": "BC", "llm_answer": "AC", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 97.543, "prompt_per_token_ms": 0.617360759493671, "prompt_per_second": 1619.7984478640187, "predicted_n": 2, "predicted_ms": 31.228, "predicted_per_token_ms": 15.614, "predicted_per_second": 64.0450877417702}, "tps": 64.0450877417702}, {"id": "54bc5eb4-b247-4cab-97f6-73c09bf1808d", "answer": "BD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 96.322, "prompt_per_token_ms": 0.6642896551724138, "prompt_per_second": 1505.3674134673283, "predicted_n": 2, "predicted_ms": 29.874, "predicted_per_token_ms": 14.937, "predicted_per_second": 66.9478476266988}, "tps": 66.9478476266988}, {"id": "4d86c3b8-64c9-48e5-b650-c4dfc9acd7b7", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 170, "prompt_ms": 99.176, "prompt_per_token_ms": 0.5833882352941177, "prompt_per_second": 1714.1243849318384, "predicted_n": 2, "predicted_ms": 29.806, "predicted_per_token_ms": 14.903, "predicted_per_second": 67.10058377507885}, "tps": 67.10058377507885}, {"id": "bde747d4-9c70-48bc-a3bd-3423c97693fe", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 204, "prompt_ms": 101.775, "prompt_per_token_ms": 0.4988970588235294, "prompt_per_second": 2004.4215180545318, "predicted_n": 2, "predicted_ms": 31.166, "predicted_per_token_ms": 15.583, "predicted_per_second": 64.17249566835655}, "tps": 64.17249566835655}, {"id": "1265496f-54b3-471d-baf8-e9d8bd603c06", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 82, "prompt_ms": 80.31, "prompt_per_token_ms": 0.979390243902439, "prompt_per_second": 1021.0434566056531, "predicted_n": 2, "predicted_ms": 28.198, "predicted_per_token_ms": 14.099, "predicted_per_second": 70.92701610043265}, "tps": 70.92701610043265}, {"id": "89d7714d-bae4-4d77-8eb0-41f571e94c9e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "MemorySafety", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 56, "prompt_ms": 86.278, "prompt_per_token_ms": 1.5406785714285716, "prompt_per_second": 649.064651475463, "predicted_n": 2, "predicted_ms": 34.564, "predicted_per_token_ms": 17.282, "predicted_per_second": 57.863673185973845}, "tps": 57.863673185973845}, {"id": "9fa7566c-64ac-4eb5-84d2-49ad7c74b903", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 123, "prompt_ms": 96.298, "prompt_per_token_ms": 0.7829105691056911, "prompt_per_second": 1277.2850941867953, "predicted_n": 2, "predicted_ms": 32.207, "predicted_per_token_ms": 16.1035, "predicted_per_second": 62.09830161145093}, "tps": 62.09830161145093}, {"id": "017b71e5-a989-455c-befa-96ee53dda7ab", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 100.342, "prompt_per_token_ms": 0.8801929824561403, "prompt_per_second": 1136.1144884495027, "predicted_n": 2, "predicted_ms": 29.165, "predicted_per_token_ms": 14.5825, "predicted_per_second": 68.57534716269501}, "tps": 68.57534716269501}, {"id": "948842cd-b8b0-4278-984b-c8f3f9a1cfaf", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 104, "prompt_ms": 95.878, "prompt_per_token_ms": 0.9219038461538461, "prompt_per_second": 1084.7118212728676, "predicted_n": 2, "predicted_ms": 29.695, "predicted_per_token_ms": 14.8475, "predicted_per_second": 67.35140596059942}, "tps": 67.35140596059942}, {"id": "7e6a32b7-6914-415d-91c3-7990bddc12d1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 189, "prompt_ms": 100.008, "prompt_per_token_ms": 0.5291428571428571, "prompt_per_second": 1889.8488120950324, "predicted_n": 2, "predicted_ms": 29.474, "predicted_per_token_ms": 14.737, "predicted_per_second": 67.85641582411617}, "tps": 67.85641582411617}, {"id": "0b13218e-02ae-4bd4-bd5e-bfe64e61a266", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 117, "prompt_ms": 97.218, "prompt_per_token_ms": 0.830923076923077, "prompt_per_second": 1203.4808368820588, "predicted_n": 2, "predicted_ms": 30.289, "predicted_per_token_ms": 15.1445, "predicted_per_second": 66.03057215490772}, "tps": 66.03057215490772}, {"id": "c21b724f-45a7-48de-b338-ba1882b7e19c", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 199, "prompt_ms": 102.768, "prompt_per_token_ms": 0.5164221105527638, "prompt_per_second": 1936.4004359333646, "predicted_n": 2, "predicted_ms": 29.706, "predicted_per_token_ms": 14.853, "predicted_per_second": 67.32646603379789}, "tps": 67.32646603379789}, {"id": "b1c57d01-b009-458c-9e0d-84aef19aa760", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 93, "prompt_ms": 94.068, "prompt_per_token_ms": 1.0114838709677418, "prompt_per_second": 988.6465110345707, "predicted_n": 2, "predicted_ms": 30.738, "predicted_per_token_ms": 15.369, "predicted_per_second": 65.06604203266315}, "tps": 65.06604203266315}, {"id": "58fa94fb-ce05-4ea1-975f-e9e2ecbc7e98", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 96.065, "prompt_per_token_ms": 0.8426754385964912, "prompt_per_second": 1186.6965075729977, "predicted_n": 2, "predicted_ms": 29.81, "predicted_per_token_ms": 14.905, "predicted_per_second": 67.09158000670917}, "tps": 67.09158000670917}, {"id": "76d17c2e-bbae-4508-8e0b-665a38fc965a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 82.047, "prompt_per_token_ms": 0.85465625, "prompt_per_second": 1170.0610625617023, "predicted_n": 2, "predicted_ms": 26.141, "predicted_per_token_ms": 13.0705, "predicted_per_second": 76.5081672468536}, "tps": 76.5081672468536}, {"id": "4f923c67-7efe-4710-9ce6-8e92af98e293", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 93.29, "prompt_per_token_ms": 0.7523387096774194, "prompt_per_second": 1329.188551827634, "predicted_n": 2, "predicted_ms": 30.031, "predicted_per_token_ms": 15.0155, "predicted_per_second": 66.59784888948087}, "tps": 66.59784888948087}, {"id": "f300e054-f7ab-4873-8232-1dba3078e664", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 110, "prompt_ms": 95.182, "prompt_per_token_ms": 0.8652909090909091, "prompt_per_second": 1155.6806959299026, "predicted_n": 2, "predicted_ms": 28.89, "predicted_per_token_ms": 14.445, "predicted_per_second": 69.22810661128418}, "tps": 69.22810661128418}, {"id": "10f5a4e3-3be0-4416-ab95-b46e3f31907c", "answer": "A", "llm_answer": "AD", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 95.165, "prompt_per_token_ms": 0.7493307086614174, "prompt_per_second": 1334.5242473598487, "predicted_n": 3, "predicted_ms": 58.648, "predicted_per_token_ms": 19.549333333333333, "predicted_per_second": 51.15263947619697}, "tps": 51.15263947619697}, {"id": "80236cbe-8fac-426c-9544-1a03ba1ed2c6", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 112, "prompt_ms": 94.364, "prompt_per_token_ms": 0.8425357142857143, "prompt_per_second": 1186.8933067695307, "predicted_n": 2, "predicted_ms": 30.156, "predicted_per_token_ms": 15.078, "predicted_per_second": 66.32179334129195}, "tps": 66.32179334129195}, {"id": "f33bcf29-a1ee-4cd5-82bf-ac002975b211", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 106, "prompt_ms": 93.753, "prompt_per_token_ms": 0.8844622641509434, "prompt_per_second": 1130.630486491099, "predicted_n": 2, "predicted_ms": 32.0, "predicted_per_token_ms": 16.0, "predicted_per_second": 62.5}, "tps": 62.5}, {"id": "84bdc80d-820e-47a2-aad4-1a06531f6310", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 180, "prompt_ms": 102.852, "prompt_per_token_ms": 0.5714, "prompt_per_second": 1750.0875043752185, "predicted_n": 2, "predicted_ms": 29.049, "predicted_per_token_ms": 14.5245, "predicted_per_second": 68.84918585837723}, "tps": 68.84918585837723}, {"id": "7c74ff1d-9c67-4607-bd0e-8d6b63ca45fb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 115, "prompt_ms": 95.675, "prompt_per_token_ms": 0.8319565217391304, "prompt_per_second": 1201.9858897308598, "predicted_n": 2, "predicted_ms": 30.314, "predicted_per_token_ms": 15.157, "predicted_per_second": 65.97611664577423}, "tps": 65.97611664577423}, {"id": "39d14c7a-364d-4d41-b07b-5867ef55d56d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 97.2, "prompt_per_token_ms": 0.7043478260869566, "prompt_per_second": 1419.753086419753, "predicted_n": 2, "predicted_ms": 29.657, "predicted_per_token_ms": 14.8285, "predicted_per_second": 67.43770442054152}, "tps": 67.43770442054152}, {"id": "383fb0c2-b431-43d9-88cc-71d97a9d8dc6", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 97.664, "prompt_per_token_ms": 0.781312, "prompt_per_second": 1279.8984272608125, "predicted_n": 2, "predicted_ms": 27.816, "predicted_per_token_ms": 13.908, "predicted_per_second": 71.90106413574921}, "tps": 71.90106413574921}, {"id": "dc97526a-b305-49c6-a4d8-10f5d5bee3e4", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 266, "prompt_ms": 88.647, "prompt_per_token_ms": 0.33325939849624064, "prompt_per_second": 3000.665561158302, "predicted_n": 2, "predicted_ms": 30.239, "predicted_per_token_ms": 15.1195, "predicted_per_second": 66.1397532987202}, "tps": 66.1397532987202}, {"id": "41af35e8-8a8e-4202-81bc-8bc5dd6aef0e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 96.513, "prompt_per_token_ms": 0.8540973451327434, "prompt_per_second": 1170.8267280055536, "predicted_n": 2, "predicted_ms": 28.893, "predicted_per_token_ms": 14.4465, "predicted_per_second": 69.22091856158931}, "tps": 69.22091856158931}, {"id": "a2440bd7-65ff-45c6-aa68-3eb0018c53db", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 94.795, "prompt_per_token_ms": 0.854009009009009, "prompt_per_second": 1170.9478348014136, "predicted_n": 2, "predicted_ms": 29.952, "predicted_per_token_ms": 14.976, "predicted_per_second": 66.77350427350427}, "tps": 66.77350427350427}, {"id": "bac3e3e0-a11e-403c-8dfe-8a6efd9b1d17", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 97, "prompt_ms": 95.405, "prompt_per_token_ms": 0.9835567010309278, "prompt_per_second": 1016.7182013521303, "predicted_n": 2, "predicted_ms": 33.188, "predicted_per_token_ms": 16.594, "predicted_per_second": 60.262745570688196}, "tps": 60.262745570688196}, {"id": "9b40b7bd-219d-4559-8194-f6d669e96608", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 132, "prompt_ms": 99.994, "prompt_per_token_ms": 0.757530303030303, "prompt_per_second": 1320.079204752285, "predicted_n": 2, "predicted_ms": 31.931, "predicted_per_token_ms": 15.9655, "predicted_per_second": 62.63505684131408}, "tps": 62.63505684131408}, {"id": "30ddec76-0262-48b1-aeb3-dd97cd0ca781", "answer": "BCD", "llm_answer": "BD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 181, "prompt_ms": 100.914, "prompt_per_token_ms": 0.5575359116022099, "prompt_per_second": 1793.6064371643183, "predicted_n": 2, "predicted_ms": 29.951, "predicted_per_token_ms": 14.9755, "predicted_per_second": 66.77573369837401}, "tps": 66.77573369837401}, {"id": "0435bcd3-626d-494f-92a8-1371e0e70ab1", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "MemorySafety"], "timings": {"cache_n": 62, "prompt_n": 126, "prompt_ms": 96.358, "prompt_per_token_ms": 0.7647460317460317, "prompt_per_second": 1307.6236534589757, "predicted_n": 2, "predicted_ms": 28.538, "predicted_per_token_ms": 14.269, "predicted_per_second": 70.08199593524424}, "tps": 70.08199593524424}, {"id": "1924d6ed-68b2-4cab-8815-474344b19b0b", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 128, "prompt_ms": 95.846, "prompt_per_token_ms": 0.748796875, "prompt_per_second": 1335.4756588694363, "predicted_n": 2, "predicted_ms": 29.237, "predicted_per_token_ms": 14.6185, "predicted_per_second": 68.40647125218047}, "tps": 68.40647125218047}, {"id": "5243e0ec-b17b-48ff-87ba-9c97adff8b43", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 92, "prompt_ms": 93.905, "prompt_per_token_ms": 1.0207065217391305, "prompt_per_second": 979.7135402800702, "predicted_n": 2, "predicted_ms": 29.966, "predicted_per_token_ms": 14.983, "predicted_per_second": 66.74230794900888}, "tps": 66.74230794900888}, {"id": "7cbb6109-00db-4be9-b95a-57885a5d3367", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 162, "prompt_ms": 100.082, "prompt_per_token_ms": 0.6177901234567901, "prompt_per_second": 1618.672688395516, "predicted_n": 2, "predicted_ms": 25.796, "predicted_per_token_ms": 12.898, "predicted_per_second": 77.53140021708792}, "tps": 77.53140021708792}, {"id": "6d1cd0b3-f193-4607-a3e4-83d83fdd027a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 90, "prompt_ms": 76.555, "prompt_per_token_ms": 0.8506111111111112, "prompt_per_second": 1175.6253673829274, "predicted_n": 2, "predicted_ms": 29.093, "predicted_per_token_ms": 14.5465, "predicted_per_second": 68.74505894888804}, "tps": 68.74505894888804}, {"id": "b718daa1-9b47-46b2-a7be-4ed4671ef61c", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 131, "prompt_ms": 100.106, "prompt_per_token_ms": 0.7641679389312976, "prompt_per_second": 1308.6128703574213, "predicted_n": 3, "predicted_ms": 60.402, "predicted_per_token_ms": 20.134, "predicted_per_second": 49.66722956193503}, "tps": 49.66722956193503}, {"id": "07973067-682b-4ba2-bc13-f41657d0c0ef", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 144, "prompt_ms": 97.47, "prompt_per_token_ms": 0.676875, "prompt_per_second": 1477.3776546629733, "predicted_n": 2, "predicted_ms": 29.104, "predicted_per_token_ms": 14.552, "predicted_per_second": 68.71907641561297}, "tps": 68.71907641561297}, {"id": "acffcf83-2f96-4e32-b53a-46e8f17e0bae", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 117, "prompt_ms": 95.962, "prompt_per_token_ms": 0.8201880341880342, "prompt_per_second": 1219.2326129092764, "predicted_n": 2, "predicted_ms": 34.484, "predicted_per_token_ms": 17.242, "predicted_per_second": 57.99791207516529}, "tps": 57.99791207516529}, {"id": "349600d8-c9ff-40db-8540-cd8ad9cd2dbf", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 142, "prompt_ms": 98.008, "prompt_per_token_ms": 0.6901971830985916, "prompt_per_second": 1448.861317443474, "predicted_n": 64, "predicted_ms": 1846.761, "predicted_per_token_ms": 28.855640625, "predicted_per_second": 34.65526941493783}, "tps": 34.65526941493783}, {"id": "a3a37a81-bdbb-417a-b042-6114592228b9", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 185, "prompt_ms": 101.445, "prompt_per_token_ms": 0.5483513513513513, "prompt_per_second": 1823.6482823204692, "predicted_n": 2, "predicted_ms": 27.297, "predicted_per_token_ms": 13.6485, "predicted_per_second": 73.26812470234825}, "tps": 73.26812470234825}, {"id": "8920a370-7704-4bf6-b4a4-558598347ef1", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 82.002, "prompt_per_token_ms": 0.6456850393700787, "prompt_per_second": 1548.7427135923515, "predicted_n": 2, "predicted_ms": 29.932, "predicted_per_token_ms": 14.966, "predicted_per_second": 66.8181210744354}, "tps": 66.8181210744354}, {"id": "48988829-0759-4844-b993-2c115e84ea27", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.237, "prompt_per_token_ms": 0.6774965517241379, "prompt_per_second": 1476.0222726671216, "predicted_n": 2, "predicted_ms": 30.563, "predicted_per_token_ms": 15.2815, "predicted_per_second": 65.43860223145634}, "tps": 65.43860223145634}, {"id": "46e95f58-15e3-497c-9f88-052d1528977c", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 96.193, "prompt_per_token_ms": 0.7515078125, "prompt_per_second": 1330.6581559988772, "predicted_n": 2, "predicted_ms": 28.902, "predicted_per_token_ms": 14.451, "predicted_per_second": 69.19936336585702}, "tps": 69.19936336585702}, {"id": "64ee8252-3440-4f26-b25a-3e3e8140f454", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 159, "prompt_ms": 98.702, "prompt_per_token_ms": 0.6207672955974842, "prompt_per_second": 1610.9096066948998, "predicted_n": 4, "predicted_ms": 94.585, "predicted_per_token_ms": 23.64625, "predicted_per_second": 42.29000370037533}, "tps": 42.29000370037533}, {"id": "c2534dd1-3d89-49ab-826a-0e4ec1653b68", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 171, "prompt_ms": 99.733, "prompt_per_token_ms": 0.583233918128655, "prompt_per_second": 1714.5779230545556, "predicted_n": 2, "predicted_ms": 30.456, "predicted_per_token_ms": 15.228, "predicted_per_second": 65.66850538481744}, "tps": 65.66850538481744}, {"id": "0e05d118-e272-48cb-9e16-1e375d809c5a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 128, "prompt_ms": 96.825, "prompt_per_token_ms": 0.7564453125, "prompt_per_second": 1321.972631035373, "predicted_n": 2, "predicted_ms": 30.695, "predicted_per_token_ms": 15.3475, "predicted_per_second": 65.15719172503665}, "tps": 65.15719172503665}, {"id": "6ac83ccb-b108-49f4-a2d1-a1e5b4e7f781", "answer": "ABCD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 97.91, "prompt_per_token_ms": 0.6399346405228757, "prompt_per_second": 1562.6595853334695, "predicted_n": 2, "predicted_ms": 30.93, "predicted_per_token_ms": 15.465, "predicted_per_second": 64.66214031684449}, "tps": 64.66214031684449}, {"id": "ab0b25ad-642b-4324-8827-6114ac7ed7ae", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 97.176, "prompt_per_token_ms": 0.7475076923076923, "prompt_per_second": 1337.778875442496, "predicted_n": 2, "predicted_ms": 29.161, "predicted_per_token_ms": 14.5805, "predicted_per_second": 68.58475360927265}, "tps": 68.58475360927265}, {"id": "e0256d82-2920-4f82-8a45-3d6702c6b561", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 97.925, "prompt_per_token_ms": 0.7362781954887218, "prompt_per_second": 1358.1822823589482, "predicted_n": 2, "predicted_ms": 29.409, "predicted_per_token_ms": 14.7045, "predicted_per_second": 68.00639260090449}, "tps": 68.00639260090449}, {"id": "16567bc2-f96e-48e6-919e-3b531182c69c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 118, "prompt_ms": 87.376, "prompt_per_token_ms": 0.7404745762711865, "prompt_per_second": 1350.4852591100532, "predicted_n": 2, "predicted_ms": 25.867, "predicted_per_token_ms": 12.9335, "predicted_per_second": 77.31859125526732}, "tps": 77.31859125526732}, {"id": "2a2549e7-cbbd-4583-8f5c-247983cc6b2a", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 95.677, "prompt_per_token_ms": 0.5833963414634147, "prompt_per_second": 1714.1005675345168, "predicted_n": 2, "predicted_ms": 32.168, "predicted_per_token_ms": 16.084, "predicted_per_second": 62.17358865953743}, "tps": 62.17358865953743}, {"id": "2b091e44-d561-4f3b-87a0-2db307c8793c", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 133, "prompt_ms": 97.524, "prompt_per_token_ms": 0.7332631578947368, "prompt_per_second": 1363.7668676428366, "predicted_n": 3, "predicted_ms": 58.536, "predicted_per_token_ms": 19.512, "predicted_per_second": 51.25051250512506}, "tps": 51.25051250512506}, {"id": "15f2287a-7cb2-4a8d-a5fb-c4e25c974d64", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 121, "prompt_ms": 97.157, "prompt_per_token_ms": 0.8029504132231404, "prompt_per_second": 1245.4069186986014, "predicted_n": 2, "predicted_ms": 30.223, "predicted_per_token_ms": 15.1115, "predicted_per_second": 66.17476756112895}, "tps": 66.17476756112895}, {"id": "e640d5e9-26b9-416e-9d48-7b1421542192", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 196, "prompt_ms": 102.137, "prompt_per_token_ms": 0.5211071428571429, "prompt_per_second": 1918.991158933589, "predicted_n": 2, "predicted_ms": 30.559, "predicted_per_token_ms": 15.2795, "predicted_per_second": 65.44716777381458}, "tps": 65.44716777381458}, {"id": "cda6385f-b225-4cff-b1be-a5826a6260dc", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 93.813, "prompt_per_token_ms": 0.902048076923077, "prompt_per_second": 1108.5883619541003, "predicted_n": 2, "predicted_ms": 30.228, "predicted_per_token_ms": 15.114, "predicted_per_second": 66.1638216223369}, "tps": 66.1638216223369}, {"id": "95f18104-74d9-4901-927e-1f9c1303b863", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 101.029, "prompt_per_token_ms": 0.5839826589595376, "prompt_per_second": 1712.3796137742627, "predicted_n": 2, "predicted_ms": 30.258, "predicted_per_token_ms": 15.129, "predicted_per_second": 66.09822195782934}, "tps": 66.09822195782934}, {"id": "be1b91f7-77c1-42ea-a083-bcdee27e1757", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 61, "prompt_n": 96, "prompt_ms": 93.819, "prompt_per_token_ms": 0.97728125, "prompt_per_second": 1023.2468902887474, "predicted_n": 4, "predicted_ms": 86.724, "predicted_per_token_ms": 21.681, "predicted_per_second": 46.12333379456667}, "tps": 46.12333379456667}, {"id": "3303b914-ce60-4e67-9f96-88061a35f999", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 187, "prompt_ms": 106.316, "prompt_per_token_ms": 0.5685347593582888, "prompt_per_second": 1758.9074081041422, "predicted_n": 2, "predicted_ms": 32.006, "predicted_per_token_ms": 16.003, "predicted_per_second": 62.488283446853714}, "tps": 62.488283446853714}, {"id": "05659502-7e61-4593-afd5-bbd941052fd6", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 95.202, "prompt_per_token_ms": 0.7555714285714286, "prompt_per_second": 1323.5016071090943, "predicted_n": 2, "predicted_ms": 29.919, "predicted_per_token_ms": 14.9595, "predicted_per_second": 66.8471539824192}, "tps": 66.8471539824192}, {"id": "3420c1d9-7791-41ff-9a5e-13c7503f91d2", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 170, "prompt_ms": 96.12, "prompt_per_token_ms": 0.5654117647058824, "prompt_per_second": 1768.6225551394089, "predicted_n": 2, "predicted_ms": 30.899, "predicted_per_token_ms": 15.4495, "predicted_per_second": 64.72701381921745}, "tps": 64.72701381921745}, {"id": "07df9285-875b-4cbc-b15c-3ca6edd9fa69", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 188, "prompt_ms": 100.345, "prompt_per_token_ms": 0.53375, "prompt_per_second": 1873.5362997658078, "predicted_n": 2, "predicted_ms": 29.788, "predicted_per_token_ms": 14.894, "predicted_per_second": 67.14113065664026}, "tps": 67.14113065664026}, {"id": "b908bc08-64af-48b8-b596-8236c488193d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 141, "prompt_ms": 97.111, "prompt_per_token_ms": 0.6887304964539007, "prompt_per_second": 1451.9467413578275, "predicted_n": 2, "predicted_ms": 30.022, "predicted_per_token_ms": 15.011, "predicted_per_second": 66.61781360335755}, "tps": 66.61781360335755}, {"id": "691d9dcf-28b7-432e-962b-40825317323d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 111, "prompt_ms": 94.554, "prompt_per_token_ms": 0.8518378378378378, "prompt_per_second": 1173.9323561139665, "predicted_n": 2, "predicted_ms": 29.125, "predicted_per_token_ms": 14.5625, "predicted_per_second": 68.6695278969957}, "tps": 68.6695278969957}, {"id": "0c9450ec-f3a5-4033-aff4-e140b30dfe72", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 143, "prompt_ms": 98.227, "prompt_per_token_ms": 0.6869020979020979, "prompt_per_second": 1455.8115385790059, "predicted_n": 2, "predicted_ms": 29.338, "predicted_per_token_ms": 14.669, "predicted_per_second": 68.17097279978185}, "tps": 68.17097279978185}, {"id": "68d01126-2744-4534-a2ca-cc13cd0e729c", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 184, "prompt_ms": 99.616, "prompt_per_token_ms": 0.5413913043478261, "prompt_per_second": 1847.09283649213, "predicted_n": 3, "predicted_ms": 59.445, "predicted_per_token_ms": 19.815, "predicted_per_second": 50.46681806712087}, "tps": 50.46681806712087}, {"id": "0c66c1b1-a2b4-4859-ad16-46aa75b3165b", "answer": "AB", "llm_answer": "ABD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 121, "prompt_ms": 94.31, "prompt_per_token_ms": 0.7794214876033058, "prompt_per_second": 1283.00286289895, "predicted_n": 3, "predicted_ms": 63.529, "predicted_per_token_ms": 21.176333333333336, "predicted_per_second": 47.22252829416487}, "tps": 47.22252829416487}, {"id": "f9e18abd-23d8-47ad-8aca-3a3539091bac", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 97.973, "prompt_per_token_ms": 0.8445948275862069, "prompt_per_second": 1183.9996733794005, "predicted_n": 2, "predicted_ms": 29.855, "predicted_per_token_ms": 14.9275, "predicted_per_second": 66.9904538603249}, "tps": 66.9904538603249}, {"id": "068c932c-727a-4b78-9fb7-54ffff99e53c", "answer": "ACD", "llm_answer": "AD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 97.674, "prompt_per_token_ms": 0.6976714285714286, "prompt_per_second": 1433.339476216803, "predicted_n": 3, "predicted_ms": 53.888, "predicted_per_token_ms": 17.962666666666667, "predicted_per_second": 55.67102137767222}, "tps": 55.67102137767222}, {"id": "7927d3a0-47b8-43f3-a705-c232719f4d34", "answer": "ABCD", "llm_answer": "ABCD", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 64, "prompt_n": 94, "prompt_ms": 86.265, "prompt_per_token_ms": 0.9177127659574468, "prompt_per_second": 1089.6655654089143, "predicted_n": 8, "predicted_ms": 208.952, "predicted_per_token_ms": 26.119, "predicted_per_second": 38.28630498870554}, "tps": 38.28630498870554}, {"id": "2fad5729-27c3-4096-bc6b-c9eb61d32667", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 97.393, "prompt_per_token_ms": 0.6907304964539007, "prompt_per_second": 1447.742650909203, "predicted_n": 2, "predicted_ms": 30.602, "predicted_per_token_ms": 15.301, "predicted_per_second": 65.35520554212142}, "tps": 65.35520554212142}, {"id": "d2a844a3-f276-4476-baca-9f6e667e5e16", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 100.171, "prompt_per_token_ms": 0.7054295774647887, "prompt_per_second": 1417.575945133821, "predicted_n": 2, "predicted_ms": 29.49, "predicted_per_token_ms": 14.745, "predicted_per_second": 67.81959986436081}, "tps": 67.81959986436081}, {"id": "5a1b5bf7-7efb-4602-ae8a-a57927038e09", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.135, "prompt_per_token_ms": 0.709014598540146, "prompt_per_second": 1410.40819478046, "predicted_n": 2, "predicted_ms": 29.613, "predicted_per_token_ms": 14.8065, "predicted_per_second": 67.5379056495458}, "tps": 67.5379056495458}, {"id": "5a2bed74-2fa5-4b05-8fdb-5f70845095b0", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 220, "prompt_ms": 102.068, "prompt_per_token_ms": 0.46394545454545455, "prompt_per_second": 2155.425794568327, "predicted_n": 2, "predicted_ms": 31.083, "predicted_per_token_ms": 15.5415, "predicted_per_second": 64.34385355338931}, "tps": 64.34385355338931}, {"id": "aec974c2-1722-4fdc-b260-01d8489ffac2", "answer": "BCD", "llm_answer": "BCD", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 117, "prompt_ms": 95.745, "prompt_per_token_ms": 0.8183333333333334, "prompt_per_second": 1221.9959266802443, "predicted_n": 6, "predicted_ms": 146.116, "predicted_per_token_ms": 24.352666666666668, "predicted_per_second": 41.06326480330696}, "tps": 41.06326480330696}, {"id": "21f50866-ed07-480d-bde5-5b7e3c7df0c5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 98.391, "prompt_per_token_ms": 0.819925, "prompt_per_second": 1219.623746074336, "predicted_n": 2, "predicted_ms": 28.933, "predicted_per_token_ms": 14.4665, "predicted_per_second": 69.12522033663983}, "tps": 69.12522033663983}, {"id": "0cbdd376-208b-43b4-ac34-331177196ed5", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 91, "prompt_ms": 82.176, "prompt_per_token_ms": 0.9030329670329671, "prompt_per_second": 1107.3792834890965, "predicted_n": 2, "predicted_ms": 26.478, "predicted_per_token_ms": 13.239, "predicted_per_second": 75.53440592189742}, "tps": 75.53440592189742}, {"id": "25fb83f9-1115-48a6-a573-72e8157989ae", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 119, "prompt_ms": 87.981, "prompt_per_token_ms": 0.7393361344537814, "prompt_per_second": 1352.5647583000875, "predicted_n": 3, "predicted_ms": 73.08, "predicted_per_token_ms": 24.36, "predicted_per_second": 41.050903119868636}, "tps": 41.050903119868636}, {"id": "30c2d121-2dea-456a-b645-8066f7b2c2fd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 93, "prompt_ms": 102.855, "prompt_per_token_ms": 1.1059677419354839, "prompt_per_second": 904.1855038646637, "predicted_n": 2, "predicted_ms": 32.846, "predicted_per_token_ms": 16.423, "predicted_per_second": 60.890214942458755}, "tps": 60.890214942458755}, {"id": "0f98e3b6-13bd-4c00-8fef-262f4cdfd3bb", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 140, "prompt_ms": 98.106, "prompt_per_token_ms": 0.7007571428571429, "prompt_per_second": 1427.0279085886693, "predicted_n": 3, "predicted_ms": 61.887, "predicted_per_token_ms": 20.629, "predicted_per_second": 48.47544718600029}, "tps": 48.47544718600029}, {"id": "1d218c70-9cb6-4131-a909-99de0317af51", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 138, "prompt_ms": 96.724, "prompt_per_token_ms": 0.7008985507246377, "prompt_per_second": 1426.7400024812869, "predicted_n": 2, "predicted_ms": 28.738, "predicted_per_token_ms": 14.369, "predicted_per_second": 69.59426543252836}, "tps": 69.59426543252836}, {"id": "e6e58518-1657-41e3-af5a-f8b2d38803a9", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 95.761, "prompt_per_token_ms": 0.7600079365079365, "prompt_per_second": 1315.7757333361183, "predicted_n": 2, "predicted_ms": 29.138, "predicted_per_token_ms": 14.569, "predicted_per_second": 68.63889079552474}, "tps": 68.63889079552474}, {"id": "b2e3ad40-91da-4903-aa8c-4a54d26e7ed0", "answer": "B", "llm_answer": "A", "score": 0, "topics": [], "timings": {"cache_n": 60, "prompt_n": 115, "prompt_ms": 96.015, "prompt_per_token_ms": 0.8349130434782609, "prompt_per_second": 1197.7295214289434, "predicted_n": 2, "predicted_ms": 30.351, "predicted_per_token_ms": 15.1755, "predicted_per_second": 65.89568712727753}, "tps": 65.89568712727753}, {"id": "c366fdb8-d554-4570-83d3-a622a32f41fb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 176, "prompt_ms": 99.611, "prompt_per_token_ms": 0.5659715909090909, "prompt_per_second": 1766.8731365009887, "predicted_n": 2, "predicted_ms": 29.348, "predicted_per_token_ms": 14.674, "predicted_per_second": 68.14774430966335}, "tps": 68.14774430966335}, {"id": "49b722e4-a12a-4cae-b641-c9bdda9b37e3", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 132, "prompt_ms": 98.499, "prompt_per_token_ms": 0.7462045454545454, "prompt_per_second": 1340.1151280723664, "predicted_n": 2, "predicted_ms": 32.01, "predicted_per_token_ms": 16.005, "predicted_per_second": 62.480474851608875}, "tps": 62.480474851608875}, {"id": "6eea0149-4f8d-4893-b761-f8f3146c0e87", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["PenTest", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 123, "prompt_ms": 94.276, "prompt_per_token_ms": 0.7664715447154471, "prompt_per_second": 1304.6798761084476, "predicted_n": 4, "predicted_ms": 81.394, "predicted_per_token_ms": 20.3485, "predicted_per_second": 49.14367152369953}, "tps": 49.14367152369953}, {"id": "4919992b-bffe-49d7-aa37-2679a9ee8fec", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 88.728, "prompt_per_token_ms": 0.8450285714285714, "prompt_per_second": 1183.391939410333, "predicted_n": 2, "predicted_ms": 29.706, "predicted_per_token_ms": 14.853, "predicted_per_second": 67.32646603379789}, "tps": 67.32646603379789}, {"id": "b5776ff2-986e-4094-9a50-0e355bc4f823", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 134, "prompt_ms": 97.606, "prompt_per_token_ms": 0.7284029850746269, "prompt_per_second": 1372.8664221461797, "predicted_n": 2, "predicted_ms": 28.527, "predicted_per_token_ms": 14.2635, "predicted_per_second": 70.10901952536193}, "tps": 70.10901952536193}, {"id": "6a12e232-e46b-4261-a81d-64efeb89511f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 110, "prompt_ms": 93.132, "prompt_per_token_ms": 0.8466545454545455, "prompt_per_second": 1181.1192715715329, "predicted_n": 2, "predicted_ms": 31.043, "predicted_per_token_ms": 15.5215, "predicted_per_second": 64.42676287729923}, "tps": 64.42676287729923}, {"id": "787ae1e0-8793-4659-a698-807eaa1d6574", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 239, "prompt_ms": 104.594, "prompt_per_token_ms": 0.4376317991631799, "prompt_per_second": 2285.025909708014, "predicted_n": 2, "predicted_ms": 32.129, "predicted_per_token_ms": 16.0645, "predicted_per_second": 62.24905848299045}, "tps": 62.24905848299045}, {"id": "3ac6946b-cc31-4a19-939d-4bd94ade70ef", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "WebSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 111, "prompt_ms": 94.702, "prompt_per_token_ms": 0.8531711711711711, "prompt_per_second": 1172.0977381681485, "predicted_n": 2, "predicted_ms": 30.091, "predicted_per_token_ms": 15.0455, "predicted_per_second": 66.46505599680968}, "tps": 66.46505599680968}, {"id": "7a0b7013-e5cd-4950-b53f-0ea5c5b022f0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 96.072, "prompt_per_token_ms": 0.7447441860465116, "prompt_per_second": 1342.7429427929053, "predicted_n": 2, "predicted_ms": 33.513, "predicted_per_token_ms": 16.7565, "predicted_per_second": 59.67833378092084}, "tps": 59.67833378092084}, {"id": "4ce84110-276b-4498-871f-1f300c5f46bb", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.013, "prompt_per_token_ms": 0.675951724137931, "prompt_per_second": 1479.3955903808678, "predicted_n": 2, "predicted_ms": 29.518, "predicted_per_token_ms": 14.759, "predicted_per_second": 67.75526797208482}, "tps": 67.75526797208482}, {"id": "f952ff48-ab5b-4f3a-8b8b-be02fa3da909", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 185, "prompt_ms": 100.131, "prompt_per_token_ms": 0.5412486486486486, "prompt_per_second": 1847.5796706314727, "predicted_n": 2, "predicted_ms": 30.222, "predicted_per_token_ms": 15.111, "predicted_per_second": 66.1769571835087}, "tps": 66.1769571835087}, {"id": "c8fb6837-e4c3-45cb-9401-862bf651b1f2", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 138, "prompt_ms": 99.15, "prompt_per_token_ms": 0.7184782608695652, "prompt_per_second": 1391.8305597579424, "predicted_n": 2, "predicted_ms": 29.41, "predicted_per_token_ms": 14.705, "predicted_per_second": 68.00408024481469}, "tps": 68.00408024481469}, {"id": "804e373a-5664-4461-9c26-4ad5334d8144", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 102, "prompt_ms": 86.631, "prompt_per_token_ms": 0.8493235294117647, "prompt_per_second": 1177.4076254458566, "predicted_n": 2, "predicted_ms": 27.116, "predicted_per_token_ms": 13.558, "predicted_per_second": 73.7571913261543}, "tps": 73.7571913261543}, {"id": "f6e92b2b-313b-443f-916c-e38a7fc1cdcf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 95.563, "prompt_per_token_ms": 0.6682727272727272, "prompt_per_second": 1496.3950482927492, "predicted_n": 2, "predicted_ms": 32.463, "predicted_per_token_ms": 16.2315, "predicted_per_second": 61.608600560638266}, "tps": 61.608600560638266}, {"id": "866ed697-5321-40e0-9325-c5c2ce391df0", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 96.632, "prompt_per_token_ms": 0.7920655737704918, "prompt_per_second": 1262.521731931451, "predicted_n": 2, "predicted_ms": 29.953, "predicted_per_token_ms": 14.9765, "predicted_per_second": 66.77127499749608}, "tps": 66.77127499749608}, {"id": "87631917-8bbb-4b9f-8cce-8b0d28f61a48", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["Cryptography", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 74, "prompt_ms": 92.685, "prompt_per_token_ms": 1.2525, "prompt_per_second": 798.4031936127744, "predicted_n": 2, "predicted_ms": 30.208, "predicted_per_token_ms": 15.104, "predicted_per_second": 66.20762711864407}, "tps": 66.20762711864407}, {"id": "38500b90-855d-47d2-889b-25f3e36c648b", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 104.094, "prompt_per_token_ms": 0.6270722891566265, "prompt_per_second": 1594.7124714200627, "predicted_n": 2, "predicted_ms": 30.447, "predicted_per_token_ms": 15.2235, "predicted_per_second": 65.68791670772161}, "tps": 65.68791670772161}, {"id": "b755a7aa-748b-4dcc-a6f6-45006c85bdf3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 105, "prompt_ms": 95.31, "prompt_per_token_ms": 0.9077142857142857, "prompt_per_second": 1101.6682404784387, "predicted_n": 2, "predicted_ms": 31.052, "predicted_per_token_ms": 15.526, "predicted_per_second": 64.4080896560608}, "tps": 64.4080896560608}, {"id": "d078802f-7667-4407-be6f-df377ea707df", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 122, "prompt_ms": 96.59, "prompt_per_token_ms": 0.7917213114754099, "prompt_per_second": 1263.0707112537527, "predicted_n": 2, "predicted_ms": 31.472, "predicted_per_token_ms": 15.736, "predicted_per_second": 63.54855109303507}, "tps": 63.54855109303507}, {"id": "010c49e8-5a1c-424a-9301-a36494501eee", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 112, "prompt_ms": 94.557, "prompt_per_token_ms": 0.8442589285714286, "prompt_per_second": 1184.4707425150968, "predicted_n": 2, "predicted_ms": 29.87, "predicted_per_token_ms": 14.935, "predicted_per_second": 66.95681285570807}, "tps": 66.95681285570807}, {"id": "43d16e03-37bb-47a6-9caa-ad15d03be18c", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 94, "prompt_ms": 94.477, "prompt_per_token_ms": 1.0050744680851065, "prompt_per_second": 994.951152132265, "predicted_n": 2, "predicted_ms": 32.97, "predicted_per_token_ms": 16.485, "predicted_per_second": 60.66120715802245}, "tps": 60.66120715802245}, {"id": "93a6e363-7807-4623-ae9e-3068cdc907d5", "answer": "AC", "llm_answer": "AD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 153, "prompt_ms": 96.027, "prompt_per_token_ms": 0.6276274509803922, "prompt_per_second": 1593.3018838451685, "predicted_n": 3, "predicted_ms": 56.815, "predicted_per_token_ms": 18.938333333333333, "predicted_per_second": 52.80295696559007}, "tps": 52.80295696559007}, {"id": "38625f1c-e6a9-4b27-a84a-ee2b21e28797", "answer": "A", "llm_answer": "A", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 166, "prompt_ms": 84.321, "prompt_per_token_ms": 0.5079578313253011, "prompt_per_second": 1968.6673545142964, "predicted_n": 2, "predicted_ms": 29.966, "predicted_per_token_ms": 14.983, "predicted_per_second": 66.74230794900888}, "tps": 66.74230794900888}, {"id": "6a4c60c3-5252-43c6-8585-fbe021275ddc", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 155, "prompt_ms": 100.066, "prompt_per_token_ms": 0.6455870967741936, "prompt_per_second": 1548.977674734675, "predicted_n": 2, "predicted_ms": 29.561, "predicted_per_token_ms": 14.7805, "predicted_per_second": 67.65670985419979}, "tps": 67.65670985419979}, {"id": "5f1f92f6-a3a2-4970-803b-8e3e9d3fbc03", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 174, "prompt_ms": 99.872, "prompt_per_token_ms": 0.5739770114942528, "prompt_per_second": 1742.2300544697214, "predicted_n": 2, "predicted_ms": 30.786, "predicted_per_token_ms": 15.393, "predicted_per_second": 64.96459429610861}, "tps": 64.96459429610861}, {"id": "a28287ce-0c14-4044-903b-bad584671532", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 99, "prompt_ms": 94.532, "prompt_per_token_ms": 0.9548686868686869, "prompt_per_second": 1047.2644183980028, "predicted_n": 2, "predicted_ms": 31.06, "predicted_per_token_ms": 15.53, "predicted_per_second": 64.39150032195751}, "tps": 64.39150032195751}, {"id": "11cd1911-79fe-4815-b68a-b5705cc12b67", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 182, "prompt_ms": 100.092, "prompt_per_token_ms": 0.5499560439560439, "prompt_per_second": 1818.3271390320904, "predicted_n": 2, "predicted_ms": 30.059, "predicted_per_token_ms": 15.0295, "predicted_per_second": 66.53581290129412}, "tps": 66.53581290129412}, {"id": "cacb49dc-bd87-4e9c-9f94-ac60f73ce246", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 139, "prompt_ms": 97.241, "prompt_per_token_ms": 0.6995755395683453, "prompt_per_second": 1429.4381999362408, "predicted_n": 2, "predicted_ms": 29.542, "predicted_per_token_ms": 14.771, "predicted_per_second": 67.70022341073725}, "tps": 67.70022341073725}, {"id": "3aefa1fb-e43d-4248-820a-1d2429c8c6de", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 115, "prompt_ms": 95.308, "prompt_per_token_ms": 0.8287652173913044, "prompt_per_second": 1206.614345070718, "predicted_n": 2, "predicted_ms": 28.524, "predicted_per_token_ms": 14.262, "predicted_per_second": 70.11639321273313}, "tps": 70.11639321273313}, {"id": "f9582b54-68c2-45fb-ba21-d6608487f322", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 221, "prompt_ms": 101.74, "prompt_per_token_ms": 0.46036199095022623, "prompt_per_second": 2172.203656379005, "predicted_n": 2, "predicted_ms": 30.53, "predicted_per_token_ms": 15.265, "predicted_per_second": 65.50933508024893}, "tps": 65.50933508024893}, {"id": "cd58d9f2-f046-4c87-a32f-e1a452b4d452", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 176, "prompt_ms": 99.796, "prompt_per_token_ms": 0.5670227272727273, "prompt_per_second": 1763.5977393883522, "predicted_n": 2, "predicted_ms": 29.831, "predicted_per_token_ms": 14.9155, "predicted_per_second": 67.04434983741746}, "tps": 67.04434983741746}, {"id": "8ade0a46-f799-450f-9050-c32d909afcee", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 65, "prompt_n": 123, "prompt_ms": 93.16, "prompt_per_token_ms": 0.7573983739837398, "prompt_per_second": 1320.3091455560327, "predicted_n": 2, "predicted_ms": 28.61, "predicted_per_token_ms": 14.305, "predicted_per_second": 69.90562740300594}, "tps": 69.90562740300594}, {"id": "df8da9aa-2a36-460c-972a-ce062f4df458", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 143, "prompt_ms": 81.789, "prompt_per_token_ms": 0.571951048951049, "prompt_per_second": 1748.401374267933, "predicted_n": 2, "predicted_ms": 29.978, "predicted_per_token_ms": 14.989, "predicted_per_second": 66.71559143371806}, "tps": 66.71559143371806}, {"id": "6f2511d3-e5e1-447b-9581-66d2311d1a5f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 98.926, "prompt_per_token_ms": 0.7551603053435114, "prompt_per_second": 1324.2221458463903, "predicted_n": 2, "predicted_ms": 29.46, "predicted_per_token_ms": 14.73, "predicted_per_second": 67.88866259334691}, "tps": 67.88866259334691}, {"id": "80c72162-a388-44f1-9654-d7981f57daf3", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 95.03, "prompt_per_token_ms": 0.8263478260869566, "prompt_per_second": 1210.144165000526, "predicted_n": 2, "predicted_ms": 31.681, "predicted_per_token_ms": 15.8405, "predicted_per_second": 63.12932041286575}, "tps": 63.12932041286575}, {"id": "018d03e1-4aee-4363-8daf-ee992d0e88c2", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 175, "prompt_ms": 100.286, "prompt_per_token_ms": 0.5730628571428571, "prompt_per_second": 1745.0092734778532, "predicted_n": 3, "predicted_ms": 58.382, "predicted_per_token_ms": 19.460666666666665, "predicted_per_second": 51.385701072248295}, "tps": 51.385701072248295}, {"id": "d9fba682-724f-40b9-982a-8fdf9c402406", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 95.039, "prompt_per_token_ms": 0.7483385826771654, "prompt_per_second": 1336.2935216069193, "predicted_n": 2, "predicted_ms": 28.634, "predicted_per_token_ms": 14.317, "predicted_per_second": 69.84703499336453}, "tps": 69.84703499336453}, {"id": "5e6f3998-8a0e-44f5-9308-4c8470cbebb5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 132, "prompt_ms": 97.537, "prompt_per_token_ms": 0.7389166666666667, "prompt_per_second": 1353.3325814818993, "predicted_n": 2, "predicted_ms": 33.872, "predicted_per_token_ms": 16.936, "predicted_per_second": 59.045819555975434}, "tps": 59.045819555975434}, {"id": "1c64ff34-54ef-43c6-b136-bd405b07fa54", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 96.636, "prompt_per_token_ms": 0.8053, "prompt_per_second": 1241.7732522041476, "predicted_n": 2, "predicted_ms": 28.744, "predicted_per_token_ms": 14.372, "predicted_per_second": 69.57973838018368}, "tps": 69.57973838018368}, {"id": "c260eda4-1f94-4f41-b529-f3fcbe4aef99", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 94.649, "prompt_per_token_ms": 0.8376017699115045, "prompt_per_second": 1193.8847742712549, "predicted_n": 2, "predicted_ms": 30.436, "predicted_per_token_ms": 15.218, "predicted_per_second": 65.71165724799579}, "tps": 65.71165724799579}, {"id": "019fb4d8-8de5-4ce9-a766-28cc5f3d983d", "answer": "ABC", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 213, "prompt_ms": 104.038, "prompt_per_token_ms": 0.4884413145539906, "prompt_per_second": 2047.3288606086237, "predicted_n": 3, "predicted_ms": 62.903, "predicted_per_token_ms": 20.967666666666666, "predicted_per_second": 47.69247889607809}, "tps": 47.69247889607809}, {"id": "dd2e4e65-2e59-4eef-86a8-ec08cc315188", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 81.408, "prompt_per_token_ms": 0.636, "prompt_per_second": 1572.3270440251572, "predicted_n": 2, "predicted_ms": 28.235, "predicted_per_token_ms": 14.1175, "predicted_per_second": 70.83407118824155}, "tps": 70.83407118824155}, {"id": "2d0844ac-b2d2-4154-8e73-de0c113c7344", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 65, "prompt_n": 92, "prompt_ms": 90.864, "prompt_per_token_ms": 0.9876521739130435, "prompt_per_second": 1012.5022010917415, "predicted_n": 2, "predicted_ms": 30.891, "predicted_per_token_ms": 15.4455, "predicted_per_second": 64.7437765044835}, "tps": 64.7437765044835}, {"id": "dfe63374-9219-4f5c-a27a-5befb178d163", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 93, "prompt_ms": 92.313, "prompt_per_token_ms": 0.9926129032258065, "prompt_per_second": 1007.4420720808553, "predicted_n": 2, "predicted_ms": 29.779, "predicted_per_token_ms": 14.8895, "predicted_per_second": 67.1614224789281}, "tps": 67.1614224789281}, {"id": "501f4bd8-4152-4fc7-a707-32e002335860", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 138, "prompt_ms": 98.37, "prompt_per_token_ms": 0.7128260869565217, "prompt_per_second": 1402.8667276608724, "predicted_n": 2, "predicted_ms": 29.815, "predicted_per_token_ms": 14.9075, "predicted_per_second": 67.0803286936106}, "tps": 67.0803286936106}, {"id": "e74a27b0-b719-4660-b828-bb3d18fdef9f", "answer": "A", "llm_answer": "A", "score": 1, "topics": [], "timings": {"cache_n": 62, "prompt_n": 219, "prompt_ms": 106.686, "prompt_per_token_ms": 0.48715068493150687, "prompt_per_second": 2052.7529385298913, "predicted_n": 2, "predicted_ms": 31.186, "predicted_per_token_ms": 15.593, "predicted_per_second": 64.13134098634002}, "tps": 64.13134098634002}, {"id": "c9efb908-a952-492d-b339-59578aae8b3b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 97.309, "prompt_per_token_ms": 0.8535877192982456, "prompt_per_second": 1171.5257581518667, "predicted_n": 2, "predicted_ms": 30.522, "predicted_per_token_ms": 15.261, "predicted_per_second": 65.52650547146321}, "tps": 65.52650547146321}, {"id": "12eb76f3-49f4-407a-b5fe-cb1a8eaca791", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 170, "prompt_ms": 98.857, "prompt_per_token_ms": 0.5815117647058824, "prompt_per_second": 1719.6556642422893, "predicted_n": 2, "predicted_ms": 30.47, "predicted_per_token_ms": 15.235, "predicted_per_second": 65.63833278634723}, "tps": 65.63833278634723}, {"id": "cbc6b64d-1922-4cde-8f4d-aea90122fced", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 141, "prompt_ms": 97.553, "prompt_per_token_ms": 0.6918652482269503, "prompt_per_second": 1445.3681588469858, "predicted_n": 2, "predicted_ms": 28.82, "predicted_per_token_ms": 14.41, "predicted_per_second": 69.39625260235947}, "tps": 69.39625260235947}, {"id": "565eb940-6d24-4a0a-a5f9-1839980d382d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 96.918, "prompt_per_token_ms": 0.702304347826087, "prompt_per_second": 1423.884108215192, "predicted_n": 2, "predicted_ms": 29.681, "predicted_per_token_ms": 14.8405, "predicted_per_second": 67.383174421347}, "tps": 67.383174421347}, {"id": "fa4cce19-1984-43d6-a09b-8a374b352e2a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 94.096, "prompt_per_token_ms": 0.896152380952381, "prompt_per_second": 1115.8816527801393, "predicted_n": 2, "predicted_ms": 28.453, "predicted_per_token_ms": 14.2265, "predicted_per_second": 70.29135767757354}, "tps": 70.29135767757354}, {"id": "79080944-4cdf-44de-aa0d-4ef36b52c2a0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 110, "prompt_ms": 80.903, "prompt_per_token_ms": 0.7354818181818182, "prompt_per_second": 1359.6529176915565, "predicted_n": 2, "predicted_ms": 27.567, "predicted_per_token_ms": 13.7835, "predicted_per_second": 72.55051329488155}, "tps": 72.55051329488155}, {"id": "cd30d3d2-001f-4389-8b38-6f85e10fe92c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 96, "prompt_ms": 94.183, "prompt_per_token_ms": 0.9810729166666667, "prompt_per_second": 1019.2922289585167, "predicted_n": 2, "predicted_ms": 28.699, "predicted_per_token_ms": 14.3495, "predicted_per_second": 69.68883933238091}, "tps": 69.68883933238091}, {"id": "25137ba1-8c14-4a36-ad1a-1cc4143fe815", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 96.877, "prompt_per_token_ms": 0.5835963855421686, "prompt_per_second": 1713.513011344282, "predicted_n": 2, "predicted_ms": 32.958, "predicted_per_token_ms": 16.479, "predicted_per_second": 60.683293889192306}, "tps": 60.683293889192306}, {"id": "51f950ad-bf07-4153-8265-4ff4b3c4067e", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 98, "prompt_ms": 95.473, "prompt_per_token_ms": 0.9742142857142857, "prompt_per_second": 1026.4682161448786, "predicted_n": 3, "predicted_ms": 61.02, "predicted_per_token_ms": 20.34, "predicted_per_second": 49.164208456243855}, "tps": 49.164208456243855}, {"id": "58806859-e182-4267-8e7a-8ccd21b9c4da", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 97.727, "prompt_per_token_ms": 0.7081666666666667, "prompt_per_second": 1412.0969639915272, "predicted_n": 2, "predicted_ms": 30.193, "predicted_per_token_ms": 15.0965, "predicted_per_second": 66.24051932567151}, "tps": 66.24051932567151}, {"id": "ad51eab3-c1e0-447d-ab34-06b6c84ee19a", "answer": "AD", "llm_answer": "BCD", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 146, "prompt_ms": 97.463, "prompt_per_token_ms": 0.6675547945205479, "prompt_per_second": 1498.0043708894657, "predicted_n": 64, "predicted_ms": 1866.627, "predicted_per_token_ms": 29.166046875, "predicted_per_second": 34.28644287262533}, "tps": 34.28644287262533}, {"id": "6d853ce0-bb98-4805-bbfd-8c27d9e6f52b", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 144, "prompt_ms": 88.117, "prompt_per_token_ms": 0.6119236111111112, "prompt_per_second": 1634.19090527367, "predicted_n": 2, "predicted_ms": 27.841, "predicted_per_token_ms": 13.9205, "predicted_per_second": 71.83650012571387}, "tps": 71.83650012571387}, {"id": "932a98ba-5cf5-4c01-85e1-b842d5612bcd", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 97.664, "prompt_per_token_ms": 0.7570852713178294, "prompt_per_second": 1320.8551769331584, "predicted_n": 2, "predicted_ms": 31.78, "predicted_per_token_ms": 15.89, "predicted_per_second": 62.93266205160478}, "tps": 62.93266205160478}, {"id": "57b89d12-dd5a-49be-95a0-5b25ca90e6f4", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 187, "prompt_ms": 99.943, "prompt_per_token_ms": 0.5344545454545454, "prompt_per_second": 1871.0665079095086, "predicted_n": 2, "predicted_ms": 29.465, "predicted_per_token_ms": 14.7325, "predicted_per_second": 67.87714237230612}, "tps": 67.87714237230612}, {"id": "a6186291-af7f-4aa0-8a43-e13710333562", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 131, "prompt_ms": 97.232, "prompt_per_token_ms": 0.7422290076335878, "prompt_per_second": 1347.293072239592, "predicted_n": 3, "predicted_ms": 62.906, "predicted_per_token_ms": 20.968666666666667, "predicted_per_second": 47.690204432009665}, "tps": 47.690204432009665}, {"id": "dbbdd987-c6ef-495b-8861-35e37972fcae", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 90, "prompt_ms": 92.659, "prompt_per_token_ms": 1.0295444444444446, "prompt_per_second": 971.3033812149926, "predicted_n": 2, "predicted_ms": 30.074, "predicted_per_token_ms": 15.037, "predicted_per_second": 66.50262685376072}, "tps": 66.50262685376072}, {"id": "6f5decce-06fa-4883-a5fd-34a2dbea58bd", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 117, "prompt_ms": 95.86, "prompt_per_token_ms": 0.8193162393162393, "prompt_per_second": 1220.529939495097, "predicted_n": 2, "predicted_ms": 29.309, "predicted_per_token_ms": 14.6545, "predicted_per_second": 68.23842505714968}, "tps": 68.23842505714968}, {"id": "79098a56-9ca9-4787-9502-f6be4adda810", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 95.19, "prompt_per_token_ms": 0.8575675675675676, "prompt_per_second": 1166.0888748818152, "predicted_n": 2, "predicted_ms": 29.28, "predicted_per_token_ms": 14.64, "predicted_per_second": 68.30601092896174}, "tps": 68.30601092896174}, {"id": "d2bb5c1c-f7e0-47f9-b50b-28e322b723c4", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 65, "prompt_n": 116, "prompt_ms": 95.805, "prompt_per_token_ms": 0.8259051724137931, "prompt_per_second": 1210.7927561192005, "predicted_n": 4, "predicted_ms": 88.768, "predicted_per_token_ms": 22.192, "predicted_per_second": 45.06128334534967}, "tps": 45.06128334534967}, {"id": "84b19ad2-602e-4328-8531-05e9a17d1b7c", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 205, "prompt_ms": 102.074, "prompt_per_token_ms": 0.49792195121951216, "prompt_per_second": 2008.3468855928052, "predicted_n": 2, "predicted_ms": 31.223, "predicted_per_token_ms": 15.6115, "predicted_per_second": 64.05534381705795}, "tps": 64.05534381705795}, {"id": "db7eb7d2-91c0-4f79-8d52-61bcf7030e12", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 64, "prompt_n": 120, "prompt_ms": 83.49, "prompt_per_token_ms": 0.69575, "prompt_per_second": 1437.297879985627, "predicted_n": 2, "predicted_ms": 26.797, "predicted_per_token_ms": 13.3985, "predicted_per_second": 74.63522036048812}, "tps": 74.63522036048812}, {"id": "28408480-29d1-456a-8a17-cf2ee8f6daee", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 177, "prompt_ms": 97.921, "prompt_per_token_ms": 0.553225988700565, "prompt_per_second": 1807.5795794569092, "predicted_n": 2, "predicted_ms": 32.513, "predicted_per_token_ms": 16.2565, "predicted_per_second": 61.513855996063114}, "tps": 61.513855996063114}, {"id": "d02d7da3-cfb0-4890-93d4-a76378e24541", "answer": "AD", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 161, "prompt_ms": 99.079, "prompt_per_token_ms": 0.6153975155279503, "prompt_per_second": 1624.9659362730752, "predicted_n": 3, "predicted_ms": 57.774, "predicted_per_token_ms": 19.258, "predicted_per_second": 51.92647211548447}, "tps": 51.92647211548447}, {"id": "353011e4-196f-41f7-9c39-636b7503f18a", "answer": "ACD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 97.653, "prompt_per_token_ms": 0.8004344262295082, "prompt_per_second": 1249.3215774220964, "predicted_n": 2, "predicted_ms": 29.34, "predicted_per_token_ms": 14.67, "predicted_per_second": 68.1663258350375}, "tps": 68.1663258350375}, {"id": "b28f821f-3f20-4d2b-ab9d-f7186cd718e5", "answer": "ACD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 97.816, "prompt_per_token_ms": 0.7088115942028985, "prompt_per_second": 1410.8121370736892, "predicted_n": 2, "predicted_ms": 29.575, "predicted_per_token_ms": 14.7875, "predicted_per_second": 67.6246830092984}, "tps": 67.6246830092984}, {"id": "43aafe27-95b8-48e3-a631-56bfada5d32e", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 99, "prompt_ms": 94.446, "prompt_per_token_ms": 0.954, "prompt_per_second": 1048.2180293501049, "predicted_n": 2, "predicted_ms": 28.272, "predicted_per_token_ms": 14.136, "predicted_per_second": 70.74136955291455}, "tps": 70.74136955291455}, {"id": "f34abd9c-0ce5-4928-ab4f-1261ac6155cf", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 131, "prompt_ms": 98.665, "prompt_per_token_ms": 0.7531679389312977, "prompt_per_second": 1327.725130492069, "predicted_n": 2, "predicted_ms": 28.286, "predicted_per_token_ms": 14.143, "predicted_per_second": 70.70635650144948}, "tps": 70.70635650144948}, {"id": "0a04ad67-0946-4a37-8445-50bf6eacf439", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 97.213, "prompt_per_token_ms": 0.7200962962962962, "prompt_per_second": 1388.7031569851767, "predicted_n": 2, "predicted_ms": 28.649, "predicted_per_token_ms": 14.3245, "predicted_per_second": 69.81046458864184}, "tps": 69.81046458864184}, {"id": "d5e581cf-ac05-41a1-b7c1-70fa75fbfa0b", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 169, "prompt_ms": 99.159, "prompt_per_token_ms": 0.5867396449704142, "prompt_per_second": 1704.3334442662795, "predicted_n": 2, "predicted_ms": 35.114, "predicted_per_token_ms": 17.557, "predicted_per_second": 56.957338953124115}, "tps": 56.957338953124115}, {"id": "595d4d64-2228-4924-9ed3-c4fe79368c45", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 160, "prompt_ms": 98.418, "prompt_per_token_ms": 0.6151125000000001, "prompt_per_second": 1625.7188725639617, "predicted_n": 2, "predicted_ms": 27.595, "predicted_per_token_ms": 13.7975, "predicted_per_second": 72.47689798876608}, "tps": 72.47689798876608}, {"id": "b875832e-41ab-41ab-aa0d-a427771a0d62", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 128, "prompt_ms": 80.949, "prompt_per_token_ms": 0.6324140625, "prompt_per_second": 1581.242510716624, "predicted_n": 2, "predicted_ms": 27.439, "predicted_per_token_ms": 13.7195, "predicted_per_second": 72.88895367906994}, "tps": 72.88895367906994}, {"id": "3712d045-4e06-4d92-8417-e1f14de4ed47", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 97.731, "prompt_per_token_ms": 0.8010737704918033, "prompt_per_second": 1248.3244825081092, "predicted_n": 2, "predicted_ms": 28.293, "predicted_per_token_ms": 14.1465, "predicted_per_second": 70.68886296963913}, "tps": 70.68886296963913}, {"id": "890ef2ea-0064-4962-84b3-98e68e0b62bf", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 134, "prompt_ms": 96.399, "prompt_per_token_ms": 0.7193955223880597, "prompt_per_second": 1390.0559134430855, "predicted_n": 2, "predicted_ms": 28.607, "predicted_per_token_ms": 14.3035, "predicted_per_second": 69.9129583668333}, "tps": 69.9129583668333}, {"id": "77b19bf6-67b7-4be5-9a9f-1ad4edaaf535", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 182, "prompt_ms": 100.633, "prompt_per_token_ms": 0.5529285714285714, "prompt_per_second": 1808.5518666838911, "predicted_n": 3, "predicted_ms": 60.73, "predicted_per_token_ms": 20.243333333333332, "predicted_per_second": 49.398979087765525}, "tps": 49.398979087765525}, {"id": "faa0e896-8c81-4b26-9eb9-fe8ab5f98a10", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 93, "prompt_ms": 92.765, "prompt_per_token_ms": 0.9974731182795699, "prompt_per_second": 1002.5332830270037, "predicted_n": 2, "predicted_ms": 31.93, "predicted_per_token_ms": 15.965, "predicted_per_second": 62.63701847792045}, "tps": 62.63701847792045}, {"id": "1fb7ee87-99bd-4fec-895f-ae8c9637a5dd", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 164, "prompt_ms": 99.123, "prompt_per_token_ms": 0.6044085365853659, "prompt_per_second": 1654.510053166268, "predicted_n": 2, "predicted_ms": 30.369, "predicted_per_token_ms": 15.1845, "predicted_per_second": 65.85663011623696}, "tps": 65.85663011623696}, {"id": "9b615dce-eb2c-408f-a4d3-89a6cf77a257", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 112, "prompt_ms": 95.209, "prompt_per_token_ms": 0.8500803571428571, "prompt_per_second": 1176.3593777899148, "predicted_n": 2, "predicted_ms": 28.58, "predicted_per_token_ms": 14.29, "predicted_per_second": 69.97900629811058}, "tps": 69.97900629811058}, {"id": "3a6b83ac-e13e-4d70-86f4-139503315a9a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 166, "prompt_ms": 99.579, "prompt_per_token_ms": 0.5998734939759036, "prompt_per_second": 1667.0181463963286, "predicted_n": 2, "predicted_ms": 29.071, "predicted_per_token_ms": 14.5355, "predicted_per_second": 68.79708300368064}, "tps": 68.79708300368064}, {"id": "c21613d4-e9b5-4a4e-88ea-80e057500f48", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 177, "prompt_ms": 100.916, "prompt_per_token_ms": 0.5701468926553672, "prompt_per_second": 1753.9339648816838, "predicted_n": 3, "predicted_ms": 60.976, "predicted_per_token_ms": 20.325333333333333, "predicted_per_second": 49.199685122015225}, "tps": 49.199685122015225}, {"id": "073971b1-e9c8-47d5-9894-79db3d820305", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 81.409, "prompt_per_token_ms": 0.6672868852459017, "prompt_per_second": 1498.6058052549472, "predicted_n": 2, "predicted_ms": 26.293, "predicted_per_token_ms": 13.1465, "predicted_per_second": 76.06587304605789}, "tps": 76.06587304605789}, {"id": "d0452a6e-eec2-417f-b62f-9bedebebe955", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 147, "prompt_ms": 93.572, "prompt_per_token_ms": 0.6365442176870748, "prompt_per_second": 1570.982772624289, "predicted_n": 3, "predicted_ms": 60.273, "predicted_per_token_ms": 20.091, "predicted_per_second": 49.773530436513866}, "tps": 49.773530436513866}, {"id": "cbd7f4b8-31b4-40bc-9f7c-f596279a09d5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 64, "prompt_n": 133, "prompt_ms": 97.975, "prompt_per_token_ms": 0.7366541353383458, "prompt_per_second": 1357.489155396785, "predicted_n": 2, "predicted_ms": 29.727, "predicted_per_token_ms": 14.8635, "predicted_per_second": 67.2789046994315}, "tps": 67.2789046994315}, {"id": "0cd3fb7a-a11c-4978-95ea-bd8ef014f78c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 151, "prompt_ms": 96.919, "prompt_per_token_ms": 0.6418476821192053, "prompt_per_second": 1558.002042943076, "predicted_n": 2, "predicted_ms": 32.354, "predicted_per_token_ms": 16.177, "predicted_per_second": 61.816158743895656}, "tps": 61.816158743895656}, {"id": "bc023b01-d17a-438c-83cb-fabcbd04e188", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 99.286, "prompt_per_token_ms": 0.7465112781954887, "prompt_per_second": 1339.564490461898, "predicted_n": 2, "predicted_ms": 31.687, "predicted_per_token_ms": 15.8435, "predicted_per_second": 63.11736674345946}, "tps": 63.11736674345946}, {"id": "9c4fdf32-66ab-49e1-a48b-0efd9a27aa78", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 156, "prompt_ms": 96.601, "prompt_per_token_ms": 0.6192371794871795, "prompt_per_second": 1614.8901150091615, "predicted_n": 2, "predicted_ms": 31.565, "predicted_per_token_ms": 15.7825, "predicted_per_second": 63.36131791541264}, "tps": 63.36131791541264}, {"id": "b8937968-6ff7-4dd7-b280-2795bf6cfd9e", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 128, "prompt_ms": 97.137, "prompt_per_token_ms": 0.7588828125, "prompt_per_second": 1317.726509980749, "predicted_n": 2, "predicted_ms": 29.145, "predicted_per_token_ms": 14.5725, "predicted_per_second": 68.62240521530279}, "tps": 68.62240521530279}, {"id": "413ba89c-0478-467b-9899-a77060f6dcb1", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 101, "prompt_ms": 94.161, "prompt_per_token_ms": 0.9322871287128713, "prompt_per_second": 1072.6309193827592, "predicted_n": 2, "predicted_ms": 29.866, "predicted_per_token_ms": 14.933, "predicted_per_second": 66.96578048617157}, "tps": 66.96578048617157}, {"id": "12daac5a-3b65-4420-9bce-4154d86750dc", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 95.507, "prompt_per_token_ms": 0.7640560000000001, "prompt_per_second": 1308.8045902394588, "predicted_n": 2, "predicted_ms": 29.572, "predicted_per_token_ms": 14.786, "predicted_per_second": 67.63154335181929}, "tps": 67.63154335181929}, {"id": "c54381a5-6280-4e69-8ed3-34b2878e7610", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 184, "prompt_ms": 99.692, "prompt_per_token_ms": 0.5418043478260869, "prompt_per_second": 1845.6847089034227, "predicted_n": 2, "predicted_ms": 27.117, "predicted_per_token_ms": 13.5585, "predicted_per_second": 73.75447136482649}, "tps": 73.75447136482649}, {"id": "eb279d8b-8708-4616-bdc8-d800a6e7c231", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 88, "prompt_ms": 82.167, "prompt_per_token_ms": 0.9337159090909091, "prompt_per_second": 1070.9895700220281, "predicted_n": 2, "predicted_ms": 27.878, "predicted_per_token_ms": 13.939, "predicted_per_second": 71.74115790228855}, "tps": 71.74115790228855}, {"id": "e7da4823-07cd-41ee-8a2f-6c2a0727776d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 100.206, "prompt_per_token_ms": 0.7767906976744187, "prompt_per_second": 1287.3480629902401, "predicted_n": 2, "predicted_ms": 30.166, "predicted_per_token_ms": 15.083, "predicted_per_second": 66.29980773055757}, "tps": 66.29980773055757}, {"id": "ce76d584-0eeb-4720-9460-73b218458e08", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 86, "prompt_ms": 92.86, "prompt_per_token_ms": 1.079767441860465, "prompt_per_second": 926.1253499892312, "predicted_n": 2, "predicted_ms": 28.818, "predicted_per_token_ms": 14.409, "predicted_per_second": 69.40106877645916}, "tps": 69.40106877645916}, {"id": "72963d98-6575-44f5-a19a-e637d42d2631", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 240, "prompt_ms": 104.421, "prompt_per_token_ms": 0.4350875, "prompt_per_second": 2298.3882552360155, "predicted_n": 2, "predicted_ms": 31.308, "predicted_per_token_ms": 15.654, "predicted_per_second": 63.88143605468251}, "tps": 63.88143605468251}, {"id": "85980c4a-b73b-441a-86aa-4f0e8a8a027c", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 65, "prompt_n": 109, "prompt_ms": 95.038, "prompt_per_token_ms": 0.8719082568807339, "prompt_per_second": 1146.9096571897555, "predicted_n": 4, "predicted_ms": 92.426, "predicted_per_token_ms": 23.1065, "predicted_per_second": 43.27786553567178}, "tps": 43.27786553567178}, {"id": "7eb1130f-328f-4106-b07c-4639f7542704", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 123, "prompt_ms": 95.422, "prompt_per_token_ms": 0.7757886178861788, "prompt_per_second": 1289.0109199136468, "predicted_n": 2, "predicted_ms": 31.228, "predicted_per_token_ms": 15.614, "predicted_per_second": 64.0450877417702}, "tps": 64.0450877417702}, {"id": "86a4aad8-d3b6-4360-9568-af9f843db9f5", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 97.131, "prompt_per_token_ms": 0.6745208333333333, "prompt_per_second": 1482.5338975198445, "predicted_n": 4, "predicted_ms": 90.982, "predicted_per_token_ms": 22.7455, "predicted_per_second": 43.96474027829681}, "tps": 43.96474027829681}, {"id": "728170e9-7d95-41bb-b035-4223729fdc04", "answer": "ACD", "llm_answer": "AC", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 170, "prompt_ms": 99.364, "prompt_per_token_ms": 0.5844941176470588, "prompt_per_second": 1710.8812044603678, "predicted_n": 2, "predicted_ms": 32.1, "predicted_per_token_ms": 16.05, "predicted_per_second": 62.30529595015576}, "tps": 62.30529595015576}, {"id": "3ba5ff71-18fd-4de1-956b-773381a476fb", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 100, "prompt_ms": 99.831, "prompt_per_token_ms": 0.99831, "prompt_per_second": 1001.6928609349801, "predicted_n": 2, "predicted_ms": 28.764, "predicted_per_token_ms": 14.382, "predicted_per_second": 69.53135864274788}, "tps": 69.53135864274788}, {"id": "44f59918-7a93-449f-93dc-c88c00fb3811", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 109, "prompt_ms": 84.483, "prompt_per_token_ms": 0.7750733944954129, "prompt_per_second": 1290.2003953458093, "predicted_n": 2, "predicted_ms": 27.698, "predicted_per_token_ms": 13.849, "predicted_per_second": 72.20737959419452}, "tps": 72.20737959419452}, {"id": "dc8d3e5a-bbc8-4a8c-a10e-9ad7a4afc1e0", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 98.28, "prompt_per_token_ms": 0.7502290076335878, "prompt_per_second": 1332.926332926333, "predicted_n": 2, "predicted_ms": 29.36, "predicted_per_token_ms": 14.68, "predicted_per_second": 68.11989100817439}, "tps": 68.11989100817439}, {"id": "e7e1ac8b-5a66-4cbd-a099-699105c4e66b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 95.738, "prompt_per_token_ms": 0.8045210084033614, "prompt_per_second": 1242.975620965552, "predicted_n": 2, "predicted_ms": 29.434, "predicted_per_token_ms": 14.717, "predicted_per_second": 67.94863083508866}, "tps": 67.94863083508866}, {"id": "c371636c-e252-4d1c-9cac-238d2186f16d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 96.193, "prompt_per_token_ms": 0.7820569105691056, "prompt_per_second": 1278.679321780171, "predicted_n": 2, "predicted_ms": 33.891, "predicted_per_token_ms": 16.9455, "predicted_per_second": 59.012717240565344}, "tps": 59.012717240565344}, {"id": "eed07a1c-1b92-4510-8eea-606d118d400c", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 148, "prompt_ms": 96.665, "prompt_per_token_ms": 0.653141891891892, "prompt_per_second": 1531.0608803600062, "predicted_n": 2, "predicted_ms": 29.676, "predicted_per_token_ms": 14.838, "predicted_per_second": 67.39452756436178}, "tps": 67.39452756436178}, {"id": "772eaf2d-50f1-4097-a919-44d32f873de9", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 209, "prompt_ms": 102.035, "prompt_per_token_ms": 0.48820574162679425, "prompt_per_second": 2048.316754054981, "predicted_n": 2, "predicted_ms": 30.57, "predicted_per_token_ms": 15.285, "predicted_per_second": 65.42361792607132}, "tps": 65.42361792607132}, {"id": "096e7d32-54e4-4460-a4b9-8cd1d67bf990", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 140, "prompt_ms": 97.798, "prompt_per_token_ms": 0.6985571428571429, "prompt_per_second": 1431.5221170167079, "predicted_n": 2, "predicted_ms": 30.375, "predicted_per_token_ms": 15.1875, "predicted_per_second": 65.84362139917695}, "tps": 65.84362139917695}, {"id": "91c34387-8541-4e48-ae6d-04f5225a1172", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 144, "prompt_ms": 98.753, "prompt_per_token_ms": 0.6857847222222222, "prompt_per_second": 1458.183548854212, "predicted_n": 2, "predicted_ms": 29.697, "predicted_per_token_ms": 14.8485, "predicted_per_second": 67.34687005421424}, "tps": 67.34687005421424}, {"id": "2198d116-a880-42c4-9ddf-76fd34a52cd7", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 106, "prompt_ms": 95.432, "prompt_per_token_ms": 0.9003018867924528, "prompt_per_second": 1110.7385363400117, "predicted_n": 2, "predicted_ms": 29.69, "predicted_per_token_ms": 14.845, "predicted_per_second": 67.36274840013472}, "tps": 67.36274840013472}, {"id": "0e7e958e-18ed-4c54-bfdf-77fbfd6c7bcf", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 76, "prompt_ms": 91.642, "prompt_per_token_ms": 1.205815789473684, "prompt_per_second": 829.3140699679186, "predicted_n": 2, "predicted_ms": 27.765, "predicted_per_token_ms": 13.8825, "predicted_per_second": 72.03313524221142}, "tps": 72.03313524221142}, {"id": "27e50ce9-6abd-4c24-9fd8-baca8d02ab8d", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 125, "prompt_ms": 81.45, "prompt_per_token_ms": 0.6516000000000001, "prompt_per_second": 1534.6838551258438, "predicted_n": 2, "predicted_ms": 30.788, "predicted_per_token_ms": 15.394, "predicted_per_second": 64.96037417175523}, "tps": 64.96037417175523}, {"id": "263123cb-2687-4ee4-b5ed-daff666c732b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 95.671, "prompt_per_token_ms": 0.8039579831932774, "prompt_per_second": 1243.8460975635248, "predicted_n": 2, "predicted_ms": 30.064, "predicted_per_token_ms": 15.032, "predicted_per_second": 66.52474720596062}, "tps": 66.52474720596062}, {"id": "a66504f7-824a-4bc2-974c-999e37e1ca9b", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 219, "prompt_ms": 100.983, "prompt_per_token_ms": 0.4611095890410959, "prompt_per_second": 2168.681857342325, "predicted_n": 2, "predicted_ms": 30.147, "predicted_per_token_ms": 15.0735, "predicted_per_second": 66.34159286164461}, "tps": 66.34159286164461}, {"id": "daa238c2-a95b-475c-92a8-cda4f41c9448", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 100, "prompt_ms": 95.48, "prompt_per_token_ms": 0.9548000000000001, "prompt_per_second": 1047.3397570171762, "predicted_n": 2, "predicted_ms": 36.565, "predicted_per_token_ms": 18.2825, "predicted_per_second": 54.69711472719814}, "tps": 54.69711472719814}, {"id": "8192660d-3029-4fcd-928c-12fd10c4221f", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 96.948, "prompt_per_token_ms": 0.5508409090909091, "prompt_per_second": 1815.4061971366093, "predicted_n": 2, "predicted_ms": 29.731, "predicted_per_token_ms": 14.8655, "predicted_per_second": 67.26985301537115}, "tps": 67.26985301537115}, {"id": "e6bc5c7d-9edb-4ede-8361-f9383144d2f4", "answer": "B", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 133, "prompt_ms": 97.342, "prompt_per_token_ms": 0.7318947368421053, "prompt_per_second": 1366.3166978282757, "predicted_n": 4, "predicted_ms": 88.766, "predicted_per_token_ms": 22.1915, "predicted_per_second": 45.062298627853}, "tps": 45.062298627853}, {"id": "603f6eee-90eb-4c53-99ac-c093426b9b0a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 116, "prompt_ms": 95.911, "prompt_per_token_ms": 0.8268189655172414, "prompt_per_second": 1209.4545985340576, "predicted_n": 2, "predicted_ms": 29.821, "predicted_per_token_ms": 14.9105, "predicted_per_second": 67.06683209818584}, "tps": 67.06683209818584}, {"id": "77bc1ffd-3f46-4d0c-bc24-69ff98fc9ed5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 97.803, "prompt_per_token_ms": 0.5927454545454546, "prompt_per_second": 1687.064813962762, "predicted_n": 2, "predicted_ms": 31.389, "predicted_per_token_ms": 15.6945, "predicted_per_second": 63.716588613845616}, "tps": 63.716588613845616}, {"id": "b560b723-d1d9-44a8-8398-34d5575b9e58", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 105, "prompt_ms": 96.635, "prompt_per_token_ms": 0.9203333333333333, "prompt_per_second": 1086.5628395508875, "predicted_n": 2, "predicted_ms": 29.537, "predicted_per_token_ms": 14.7685, "predicted_per_second": 67.71168365101398}, "tps": 67.71168365101398}, {"id": "3b844253-d779-4184-8f1f-fd7319ce8efb", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 90.93, "prompt_per_token_ms": 0.6403521126760564, "prompt_per_second": 1561.6408226107994, "predicted_n": 2, "predicted_ms": 28.001, "predicted_per_token_ms": 14.0005, "predicted_per_second": 71.42602049926788}, "tps": 71.42602049926788}, {"id": "99504617-5f9d-45d3-9469-e497692389fb", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 173, "prompt_ms": 94.614, "prompt_per_token_ms": 0.5469017341040463, "prompt_per_second": 1828.4820428266428, "predicted_n": 2, "predicted_ms": 32.357, "predicted_per_token_ms": 16.1785, "predicted_per_second": 61.81042741910561}, "tps": 61.81042741910561}, {"id": "c895dde1-080e-488e-860a-80e814f62af1", "answer": "ACD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 96.956, "prompt_per_token_ms": 0.678013986013986, "prompt_per_second": 1474.8958290358512, "predicted_n": 2, "predicted_ms": 31.743, "predicted_per_token_ms": 15.8715, "predicted_per_second": 63.00601707463063}, "tps": 63.00601707463063}, {"id": "b921ae0d-a1ec-455c-89d8-74c6dff19354", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 171, "prompt_ms": 99.861, "prompt_per_token_ms": 0.5839824561403509, "prompt_per_second": 1712.3802084898007, "predicted_n": 2, "predicted_ms": 30.397, "predicted_per_token_ms": 15.1985, "predicted_per_second": 65.79596670724085}, "tps": 65.79596670724085}, {"id": "2df1a1a3-fbc6-43b1-bd42-06623e3229a8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 96.69, "prompt_per_token_ms": 0.8194067796610169, "prompt_per_second": 1220.3950770503673, "predicted_n": 2, "predicted_ms": 28.468, "predicted_per_token_ms": 14.234, "predicted_per_second": 70.2543206407194}, "tps": 70.2543206407194}, {"id": "0f2fe8c2-ddc8-4fb0-b12f-fbcc1e5790d1", "answer": "BD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 105, "prompt_ms": 94.94, "prompt_per_token_ms": 0.9041904761904762, "prompt_per_second": 1105.961659995787, "predicted_n": 2, "predicted_ms": 29.189, "predicted_per_token_ms": 14.5945, "predicted_per_second": 68.51896262290589}, "tps": 68.51896262290589}, {"id": "8fb9728f-0f2d-49f1-a286-c853744a0e7d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 184, "prompt_ms": 100.979, "prompt_per_token_ms": 0.5487989130434783, "prompt_per_second": 1822.1610433852584, "predicted_n": 2, "predicted_ms": 29.446, "predicted_per_token_ms": 14.723, "predicted_per_second": 67.92094002580995}, "tps": 67.92094002580995}, {"id": "f2a9d380-597c-475d-9d4a-13519f83ec0c", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 132, "prompt_ms": 97.657, "prompt_per_token_ms": 0.7398257575757575, "prompt_per_second": 1351.6696191773249, "predicted_n": 2, "predicted_ms": 29.897, "predicted_per_token_ms": 14.9485, "predicted_per_second": 66.89634411479413}, "tps": 66.89634411479413}, {"id": "e9a42fcf-7177-4502-a594-e69f1f6a17a8", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 87, "prompt_ms": 93.545, "prompt_per_token_ms": 1.0752298850574713, "prompt_per_second": 930.033673633011, "predicted_n": 3, "predicted_ms": 66.348, "predicted_per_token_ms": 22.116, "predicted_per_second": 45.21613311629589}, "tps": 45.21613311629589}, {"id": "eca946df-b622-433f-bc31-4e50fc338553", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 77, "prompt_ms": 93.797, "prompt_per_token_ms": 1.2181428571428572, "prompt_per_second": 820.9217778820218, "predicted_n": 2, "predicted_ms": 28.654, "predicted_per_token_ms": 14.327, "predicted_per_second": 69.79828296223913}, "tps": 69.79828296223913}, {"id": "01ef1160-6e04-4773-9b2b-2451d600f35f", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 191, "prompt_ms": 87.344, "prompt_per_token_ms": 0.4572984293193717, "prompt_per_second": 2186.755816083532, "predicted_n": 2, "predicted_ms": 29.794, "predicted_per_token_ms": 14.897, "predicted_per_second": 67.12760958582265}, "tps": 67.12760958582265}, {"id": "d99a1150-bf14-4f21-a92c-3b4c751f9801", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 91, "prompt_ms": 94.318, "prompt_per_token_ms": 1.0364615384615385, "prompt_per_second": 964.8211370046016, "predicted_n": 2, "predicted_ms": 29.234, "predicted_per_token_ms": 14.617, "predicted_per_second": 68.41349114045289}, "tps": 68.41349114045289}, {"id": "6c491b9f-50e0-4781-92b2-45949e4ccf42", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 149, "prompt_ms": 97.392, "prompt_per_token_ms": 0.6536375838926174, "prompt_per_second": 1529.899786430097, "predicted_n": 2, "predicted_ms": 29.248, "predicted_per_token_ms": 14.624, "predicted_per_second": 68.38074398249452}, "tps": 68.38074398249452}, {"id": "e008b67d-9946-4754-b55a-d5959aa94b06", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 112, "prompt_ms": 93.257, "prompt_per_token_ms": 0.8326517857142858, "prompt_per_second": 1200.982231896801, "predicted_n": 2, "predicted_ms": 29.307, "predicted_per_token_ms": 14.6535, "predicted_per_second": 68.2430818575767}, "tps": 68.2430818575767}, {"id": "56d0688e-ffa9-4649-9d98-aa35f0ae8a2c", "answer": "A", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 97, "prompt_ms": 94.405, "prompt_per_token_ms": 0.9732474226804124, "prompt_per_second": 1027.4879508500608, "predicted_n": 2, "predicted_ms": 29.928, "predicted_per_token_ms": 14.964, "predicted_per_second": 66.82705159048382}, "tps": 66.82705159048382}, {"id": "6504ace0-5ac4-4011-9093-39c8eb47644e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 109, "prompt_ms": 93.891, "prompt_per_token_ms": 0.8613853211009175, "prompt_per_second": 1160.920642021067, "predicted_n": 2, "predicted_ms": 30.728, "predicted_per_token_ms": 15.364, "predicted_per_second": 65.08721687060661}, "tps": 65.08721687060661}, {"id": "badc033f-f1cc-4080-899c-18762b95fc2b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 99.724, "prompt_per_token_ms": 0.8451186440677967, "prompt_per_second": 1183.2658136456619, "predicted_n": 2, "predicted_ms": 29.501, "predicted_per_token_ms": 14.7505, "predicted_per_second": 67.79431205721839}, "tps": 67.79431205721839}, {"id": "d07949ae-8eb8-4a9a-a85b-2473410d5e6f", "answer": "ABD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 97.087, "prompt_per_token_ms": 0.7411221374045802, "prompt_per_second": 1349.305262290523, "predicted_n": 2, "predicted_ms": 30.448, "predicted_per_token_ms": 15.224, "predicted_per_second": 65.68575932737782}, "tps": 65.68575932737782}, {"id": "ad5e7454-4f48-4809-962e-43c4caa8a9c0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 172, "prompt_ms": 99.719, "prompt_per_token_ms": 0.5797616279069767, "prompt_per_second": 1724.846819562972, "predicted_n": 2, "predicted_ms": 31.21, "predicted_per_token_ms": 15.605, "predicted_per_second": 64.08202499198974}, "tps": 64.08202499198974}, {"id": "2802d9ab-980e-4f45-aa2a-3698c9872240", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 134, "prompt_ms": 97.047, "prompt_per_token_ms": 0.7242313432835821, "prompt_per_second": 1380.7742640164045, "predicted_n": 2, "predicted_ms": 26.433, "predicted_per_token_ms": 13.2165, "predicted_per_second": 75.66299701131162}, "tps": 75.66299701131162}, {"id": "823a1f0c-d738-4f84-9c02-5fd3be5eee28", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 64, "prompt_n": 173, "prompt_ms": 86.03, "prompt_per_token_ms": 0.49728323699421967, "prompt_per_second": 2010.9264210159247, "predicted_n": 2, "predicted_ms": 30.275, "predicted_per_token_ms": 15.1375, "predicted_per_second": 66.06110652353428}, "tps": 66.06110652353428}, {"id": "1e6f87ef-644a-4c53-8e06-b594d2a70170", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 141, "prompt_ms": 97.725, "prompt_per_token_ms": 0.6930851063829787, "prompt_per_second": 1442.8242517267845, "predicted_n": 2, "predicted_ms": 31.768, "predicted_per_token_ms": 15.884, "predicted_per_second": 62.95643414756988}, "tps": 62.95643414756988}, {"id": "007c98ee-f24d-46d7-9bf4-e1fbb418eeb4", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 99, "prompt_ms": 94.882, "prompt_per_token_ms": 0.9584040404040405, "prompt_per_second": 1043.4012773761092, "predicted_n": 2, "predicted_ms": 30.155, "predicted_per_token_ms": 15.0775, "predicted_per_second": 66.3239927043608}, "tps": 66.3239927043608}, {"id": "b9f740aa-3e0f-4025-afea-d691a9533013", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 175, "prompt_ms": 99.058, "prompt_per_token_ms": 0.5660457142857144, "prompt_per_second": 1766.6417654303539, "predicted_n": 2, "predicted_ms": 32.245, "predicted_per_token_ms": 16.1225, "predicted_per_second": 62.02512017367034}, "tps": 62.02512017367034}, {"id": "ae018421-b2a4-4cae-9dda-da6dc1afbd33", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 160, "prompt_ms": 98.763, "prompt_per_token_ms": 0.61726875, "prompt_per_second": 1620.039893482377, "predicted_n": 2, "predicted_ms": 30.666, "predicted_per_token_ms": 15.333, "predicted_per_second": 65.21880910454576}, "tps": 65.21880910454576}, {"id": "6b40c780-7a76-4e13-a984-14b82d76dd4f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 149, "prompt_ms": 98.531, "prompt_per_token_ms": 0.6612818791946309, "prompt_per_second": 1512.2144299763524, "predicted_n": 2, "predicted_ms": 29.798, "predicted_per_token_ms": 14.899, "predicted_per_second": 67.11859856366199}, "tps": 67.11859856366199}, {"id": "57ae34cc-2a53-499e-b1b9-ce3ffa9f44a0", "answer": "ABD", "llm_answer": "AD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 159, "prompt_ms": 99.625, "prompt_per_token_ms": 0.6265723270440252, "prompt_per_second": 1595.9849435382687, "predicted_n": 2, "predicted_ms": 29.545, "predicted_per_token_ms": 14.7725, "predicted_per_second": 67.69334912844812}, "tps": 67.69334912844812}, {"id": "509cf232-a8d0-4320-8f00-659c7d8c6266", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 133, "prompt_ms": 97.705, "prompt_per_token_ms": 0.734624060150376, "prompt_per_second": 1361.2404687579958, "predicted_n": 2, "predicted_ms": 29.683, "predicted_per_token_ms": 14.8415, "predicted_per_second": 67.37863423508405}, "tps": 67.37863423508405}, {"id": "ee7b164a-ff51-4aff-be54-197bbc6fd0c1", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.871, "prompt_per_token_ms": 0.6818689655172413, "prompt_per_second": 1466.5574334233497, "predicted_n": 2, "predicted_ms": 30.504, "predicted_per_token_ms": 15.252, "predicted_per_second": 65.56517178075006}, "tps": 65.56517178075006}, {"id": "d2c889dd-cd0a-4cbb-bd37-43de18dd7e6f", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 104, "prompt_ms": 93.334, "prompt_per_token_ms": 0.8974423076923077, "prompt_per_second": 1114.2777551588915, "predicted_n": 2, "predicted_ms": 26.325, "predicted_per_token_ms": 13.1625, "predicted_per_second": 75.97340930674264}, "tps": 75.97340930674264}, {"id": "b6b7fdc5-1175-4eda-ba73-98720c6c8bd8", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 116, "prompt_ms": 84.966, "prompt_per_token_ms": 0.7324655172413792, "prompt_per_second": 1365.2519831461998, "predicted_n": 3, "predicted_ms": 59.257, "predicted_per_token_ms": 19.752333333333333, "predicted_per_second": 50.62693015171204}, "tps": 50.62693015171204}, {"id": "8bc4bb88-bcac-40b4-a39a-764fb9e8d55d", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 97, "prompt_ms": 93.848, "prompt_per_token_ms": 0.9675051546391753, "prompt_per_second": 1033.586224533288, "predicted_n": 2, "predicted_ms": 28.755, "predicted_per_token_ms": 14.3775, "predicted_per_second": 69.55312119631368}, "tps": 69.55312119631368}, {"id": "c1e4b5c4-6956-46dc-a8ee-3d66385fd251", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 98, "prompt_ms": 93.626, "prompt_per_token_ms": 0.9553673469387756, "prompt_per_second": 1046.7177920663062, "predicted_n": 64, "predicted_ms": 1842.655, "predicted_per_token_ms": 28.791484375, "predicted_per_second": 34.73249197489492}, "tps": 34.73249197489492}, {"id": "95240c65-da3a-46b3-b1a1-0dbe23447e10", "answer": "ACD", "llm_answer": "AC", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 163, "prompt_ms": 98.29, "prompt_per_token_ms": 0.6030061349693252, "prompt_per_second": 1658.3579204395157, "predicted_n": 3, "predicted_ms": 59.819, "predicted_per_token_ms": 19.939666666666668, "predicted_per_second": 50.15128972400073}, "tps": 50.15128972400073}, {"id": "97a05d2f-9391-46c2-8acc-dd1d67b6b56c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 106, "prompt_ms": 94.56, "prompt_per_token_ms": 0.8920754716981132, "prompt_per_second": 1120.9813874788492, "predicted_n": 2, "predicted_ms": 31.11, "predicted_per_token_ms": 15.555, "predicted_per_second": 64.28801028608164}, "tps": 64.28801028608164}, {"id": "fe3b3331-b239-482c-b535-d913fed29111", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 160, "prompt_ms": 97.455, "prompt_per_token_ms": 0.60909375, "prompt_per_second": 1641.7833872043507, "predicted_n": 2, "predicted_ms": 25.865, "predicted_per_token_ms": 12.9325, "predicted_per_second": 77.32456988208004}, "tps": 77.32456988208004}, {"id": "c5a6b274-747c-4fb1-b9dd-f2b2bdf21266", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 145, "prompt_ms": 86.842, "prompt_per_token_ms": 0.5989103448275862, "prompt_per_second": 1669.6989935745376, "predicted_n": 2, "predicted_ms": 27.227, "predicted_per_token_ms": 13.6135, "predicted_per_second": 73.45649539060491}, "tps": 73.45649539060491}, {"id": "14f5a054-29ae-407d-8bc4-7f4ed7b394fa", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 190, "prompt_ms": 100.893, "prompt_per_token_ms": 0.5310157894736842, "prompt_per_second": 1883.1831742539125, "predicted_n": 2, "predicted_ms": 28.82, "predicted_per_token_ms": 14.41, "predicted_per_second": 69.39625260235947}, "tps": 69.39625260235947}, {"id": "aee6cc10-9021-46ed-b9b0-3966f1617492", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 97, "prompt_ms": 93.862, "prompt_per_token_ms": 0.9676494845360825, "prompt_per_second": 1033.43205983252, "predicted_n": 2, "predicted_ms": 29.869, "predicted_per_token_ms": 14.9345, "predicted_per_second": 66.95905453814993}, "tps": 66.95905453814993}, {"id": "e7605014-9769-48af-a7ed-0bdb55b409dc", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 97.568, "prompt_per_token_ms": 0.878990990990991, "prompt_per_second": 1137.6680878976713, "predicted_n": 3, "predicted_ms": 69.105, "predicted_per_token_ms": 23.035, "predicted_per_second": 43.41219882787063}, "tps": 43.41219882787063}, {"id": "e52cc525-749d-4fb7-ad09-d6c7abf67176", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 124, "prompt_ms": 99.447, "prompt_per_token_ms": 0.801991935483871, "prompt_per_second": 1246.8953311814332, "predicted_n": 2, "predicted_ms": 29.66, "predicted_per_token_ms": 14.83, "predicted_per_second": 67.43088334457181}, "tps": 67.43088334457181}, {"id": "77c4583d-dd11-4bc5-be97-166004866a73", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 110, "prompt_ms": 96.23, "prompt_per_token_ms": 0.8748181818181818, "prompt_per_second": 1143.0946690221344, "predicted_n": 2, "predicted_ms": 29.697, "predicted_per_token_ms": 14.8485, "predicted_per_second": 67.34687005421424}, "tps": 67.34687005421424}, {"id": "2dca13cb-6db6-4889-b2b3-2b5bd3c977bb", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 106, "prompt_ms": 95.335, "prompt_per_token_ms": 0.8993867924528302, "prompt_per_second": 1111.868673624587, "predicted_n": 2, "predicted_ms": 28.387, "predicted_per_token_ms": 14.1935, "predicted_per_second": 70.45478564131469}, "tps": 70.45478564131469}, {"id": "e058c3bf-0566-4e2f-bf96-1048ad19ef40", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 122, "prompt_ms": 95.476, "prompt_per_token_ms": 0.7825901639344263, "prompt_per_second": 1277.808035527253, "predicted_n": 2, "predicted_ms": 31.06, "predicted_per_token_ms": 15.53, "predicted_per_second": 64.39150032195751}, "tps": 64.39150032195751}, {"id": "4a4c52ad-ca37-451a-b45f-a11cfbb8b3f7", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 262, "prompt_ms": 107.697, "prompt_per_token_ms": 0.411057251908397, "prompt_per_second": 2432.751144414422, "predicted_n": 2, "predicted_ms": 31.728, "predicted_per_token_ms": 15.864, "predicted_per_second": 63.035804336863336}, "tps": 63.035804336863336}, {"id": "aa2f5df4-e5c3-4c1d-8b47-679c72706145", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 178, "prompt_ms": 91.325, "prompt_per_token_ms": 0.513061797752809, "prompt_per_second": 1949.0829455242265, "predicted_n": 3, "predicted_ms": 57.362, "predicted_per_token_ms": 19.12066666666667, "predicted_per_second": 52.29943167950908}, "tps": 52.29943167950908}, {"id": "e0f49000-1b7c-4ae0-8e19-513c09417db5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 158, "prompt_ms": 99.514, "prompt_per_token_ms": 0.6298354430379747, "prompt_per_second": 1587.7163012239487, "predicted_n": 2, "predicted_ms": 31.554, "predicted_per_token_ms": 15.777, "predicted_per_second": 63.38340622425049}, "tps": 63.38340622425049}, {"id": "99170c96-194d-4d0a-84ac-40e6e1b15378", "answer": "BC", "llm_answer": "AC", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 145, "prompt_ms": 95.775, "prompt_per_token_ms": 0.6605172413793103, "prompt_per_second": 1513.9650221874183, "predicted_n": 2, "predicted_ms": 30.563, "predicted_per_token_ms": 15.2815, "predicted_per_second": 65.43860223145634}, "tps": 65.43860223145634}, {"id": "88fd7f84-427f-4b2d-82da-f410265622df", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 97.905, "prompt_per_token_ms": 0.6798958333333334, "prompt_per_second": 1470.8135437413819, "predicted_n": 2, "predicted_ms": 31.977, "predicted_per_token_ms": 15.9885, "predicted_per_second": 62.544954185821055}, "tps": 62.544954185821055}, {"id": "94c663ae-3885-44c7-aef4-9b0528d390bf", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 177, "prompt_ms": 100.961, "prompt_per_token_ms": 0.5704011299435028, "prompt_per_second": 1753.1522072879627, "predicted_n": 2, "predicted_ms": 28.801, "predicted_per_token_ms": 14.4005, "predicted_per_second": 69.44203326273394}, "tps": 69.44203326273394}, {"id": "b533dca8-7422-4b22-86d8-e0157d8bc736", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 163, "prompt_ms": 99.442, "prompt_per_token_ms": 0.6100736196319018, "prompt_per_second": 1639.1464371191248, "predicted_n": 2, "predicted_ms": 30.134, "predicted_per_token_ms": 15.067, "predicted_per_second": 66.37021304838389}, "tps": 66.37021304838389}, {"id": "47eb3e63-8afa-424e-8969-2d5dab27a379", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 136, "prompt_ms": 98.99, "prompt_per_token_ms": 0.7278676470588235, "prompt_per_second": 1373.8761491059704, "predicted_n": 2, "predicted_ms": 29.304, "predicted_per_token_ms": 14.652, "predicted_per_second": 68.25006825006825}, "tps": 68.25006825006825}, {"id": "4e9270eb-fef6-44a9-ae23-c46ca2a55234", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 99.311, "prompt_per_token_ms": 0.6168385093167702, "prompt_per_second": 1621.1698603377267, "predicted_n": 2, "predicted_ms": 28.895, "predicted_per_token_ms": 14.4475, "predicted_per_second": 69.21612735767434}, "tps": 69.21612735767434}, {"id": "30d8a272-5248-4cf6-8e52-6c77b490103a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 160, "prompt_ms": 99.575, "prompt_per_token_ms": 0.62234375, "prompt_per_second": 1606.8290233492344, "predicted_n": 2, "predicted_ms": 31.796, "predicted_per_token_ms": 15.898, "predicted_per_second": 62.90099383570261}, "tps": 62.90099383570261}, {"id": "8c52e7ec-2064-4168-ab04-9f3104a1c9d8", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 102, "prompt_ms": 98.311, "prompt_per_token_ms": 0.9638333333333334, "prompt_per_second": 1037.5237765865468, "predicted_n": 2, "predicted_ms": 28.134, "predicted_per_token_ms": 14.067, "predicted_per_second": 71.0883628350039}, "tps": 71.0883628350039}, {"id": "027ab7ed-e446-4149-a82c-55f46a774e8e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 133, "prompt_ms": 81.512, "prompt_per_token_ms": 0.6128721804511278, "prompt_per_second": 1631.6615958386496, "predicted_n": 2, "predicted_ms": 27.371, "predicted_per_token_ms": 13.6855, "predicted_per_second": 73.07003763106938}, "tps": 73.07003763106938}, {"id": "0aed254f-8d18-4ea1-b374-b6d7751029fd", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 172, "prompt_ms": 100.245, "prompt_per_token_ms": 0.5828197674418605, "prompt_per_second": 1715.796299067285, "predicted_n": 2, "predicted_ms": 28.911, "predicted_per_token_ms": 14.4555, "predicted_per_second": 69.17782159039811}, "tps": 69.17782159039811}, {"id": "33fc2bfc-0014-4e3e-aae1-780e708172e4", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 139, "prompt_ms": 97.045, "prompt_per_token_ms": 0.6981654676258993, "prompt_per_second": 1432.325209954145, "predicted_n": 2, "predicted_ms": 29.072, "predicted_per_token_ms": 14.536, "predicted_per_second": 68.79471656576776}, "tps": 68.79471656576776}, {"id": "aa39c79d-8913-4e43-b0e1-2715b4b3a188", "answer": "CD", "llm_answer": "CD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 143, "prompt_ms": 97.936, "prompt_per_token_ms": 0.684867132867133, "prompt_per_second": 1460.137232478353, "predicted_n": 3, "predicted_ms": 58.855, "predicted_per_token_ms": 19.618333333333332, "predicted_per_second": 50.97272958966953}, "tps": 50.97272958966953}, {"id": "80c18574-6ade-4e50-a37a-060d76ee1db4", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 162, "prompt_ms": 99.669, "prompt_per_token_ms": 0.6152407407407408, "prompt_per_second": 1625.3800078259037, "predicted_n": 2, "predicted_ms": 29.841, "predicted_per_token_ms": 14.9205, "predicted_per_second": 67.02188264468349}, "tps": 67.02188264468349}, {"id": "e97e4103-c321-4145-95e3-0c7ad7a59969", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.682, "prompt_per_token_ms": 0.713007299270073, "prompt_per_second": 1402.5101861141254, "predicted_n": 2, "predicted_ms": 32.115, "predicted_per_token_ms": 16.0575, "predicted_per_second": 62.27619492449011}, "tps": 62.27619492449011}, {"id": "07bc2dae-5c16-404e-8b2d-2652be0c67de", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 154, "prompt_ms": 96.376, "prompt_per_token_ms": 0.6258181818181818, "prompt_per_second": 1597.908192911098, "predicted_n": 2, "predicted_ms": 28.504, "predicted_per_token_ms": 14.252, "predicted_per_second": 70.16559079427448}, "tps": 70.16559079427448}, {"id": "bffcc697-5264-4d0a-9d76-4e3a85085fe9", "answer": "ABC", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 161, "prompt_ms": 99.418, "prompt_per_token_ms": 0.6175031055900622, "prompt_per_second": 1619.4250538131926, "predicted_n": 2, "predicted_ms": 28.734, "predicted_per_token_ms": 14.367, "predicted_per_second": 69.60395350455906}, "tps": 69.60395350455906}, {"id": "7360ae18-72ad-4319-af55-85d06d5626d3", "answer": "C", "llm_answer": "C", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 131, "prompt_ms": 97.889, "prompt_per_token_ms": 0.7472442748091602, "prompt_per_second": 1338.2504673660983, "predicted_n": 2, "predicted_ms": 30.409, "predicted_per_token_ms": 15.2045, "predicted_per_second": 65.77000230195009}, "tps": 65.77000230195009}, {"id": "d2368210-3833-48a7-8116-a4f982dfdfd5", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 92, "prompt_ms": 84.393, "prompt_per_token_ms": 0.9173152173913044, "prompt_per_second": 1090.1378076380743, "predicted_n": 2, "predicted_ms": 26.139, "predicted_per_token_ms": 13.0695, "predicted_per_second": 76.51402119438387}, "tps": 76.51402119438387}, {"id": "5cb80c47-fef9-4a9e-8c86-b764870d1ac5", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 159, "prompt_ms": 88.75, "prompt_per_token_ms": 0.5581761006289309, "prompt_per_second": 1791.5492957746478, "predicted_n": 2, "predicted_ms": 29.833, "predicted_per_token_ms": 14.9165, "predicted_per_second": 67.03985519391279}, "tps": 67.03985519391279}, {"id": "b464b1c2-315c-4f18-82b8-6d6674fa4da9", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 121, "prompt_ms": 95.077, "prompt_per_token_ms": 0.7857603305785124, "prompt_per_second": 1272.6526920285664, "predicted_n": 2, "predicted_ms": 31.993, "predicted_per_token_ms": 15.9965, "predicted_per_second": 62.51367486637702}, "tps": 62.51367486637702}, {"id": "ab94a879-2c54-4e3e-9678-a9cb3b60ea4d", "answer": "ABC", "llm_answer": "AD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 171, "prompt_ms": 97.988, "prompt_per_token_ms": 0.5730292397660819, "prompt_per_second": 1745.1116463240398, "predicted_n": 2, "predicted_ms": 31.288, "predicted_per_token_ms": 15.644, "predicted_per_second": 63.92227051904884}, "tps": 63.92227051904884}, {"id": "f251a4eb-6a81-445d-842f-81bcae54186b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 96.446, "prompt_per_token_ms": 0.8173389830508474, "prompt_per_second": 1223.4825705576177, "predicted_n": 2, "predicted_ms": 29.323, "predicted_per_token_ms": 14.6615, "predicted_per_second": 68.20584524093715}, "tps": 68.20584524093715}, {"id": "51222a6e-c629-4ab5-996c-de4d6951ac2f", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 93.358, "prompt_per_token_ms": 0.9931702127659575, "prompt_per_second": 1006.8767540007284, "predicted_n": 2, "predicted_ms": 30.503, "predicted_per_token_ms": 15.2515, "predicted_per_second": 65.56732124709045}, "tps": 65.56732124709045}, {"id": "bc5b3422-3a7a-49ac-a77d-1bee63081b50", "answer": "B", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 99.383, "prompt_per_token_ms": 0.5986927710843373, "prompt_per_second": 1670.3057867039633, "predicted_n": 2, "predicted_ms": 29.908, "predicted_per_token_ms": 14.954, "predicted_per_second": 66.87174000267487}, "tps": 66.87174000267487}, {"id": "903e72a1-df6a-40c4-96c3-cbb0402e83cf", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 93.037, "prompt_per_token_ms": 0.9897553191489362, "prompt_per_second": 1010.3507206810193, "predicted_n": 2, "predicted_ms": 30.404, "predicted_per_token_ms": 15.202, "predicted_per_second": 65.78081831337983}, "tps": 65.78081831337983}, {"id": "7b093d7d-9012-4fd5-9200-6e9286a50498", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 96.377, "prompt_per_token_ms": 0.7588740157480315, "prompt_per_second": 1317.7417848656837, "predicted_n": 2, "predicted_ms": 30.339, "predicted_per_token_ms": 15.1695, "predicted_per_second": 65.92175088170342}, "tps": 65.92175088170342}, {"id": "4eb084c1-517b-482a-b0c9-2bb3402c68d5", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 97.939, "prompt_per_token_ms": 0.640124183006536, "prompt_per_second": 1562.1968776483322, "predicted_n": 2, "predicted_ms": 30.177, "predicted_per_token_ms": 15.0885, "predicted_per_second": 66.27564038837525}, "tps": 66.27564038837525}, {"id": "f1f400bd-0ec0-49b3-9f01-1df7a4dbe538", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 93.98, "prompt_per_token_ms": 0.7174045801526718, "prompt_per_second": 1393.9135986380081, "predicted_n": 2, "predicted_ms": 26.199, "predicted_per_token_ms": 13.0995, "predicted_per_second": 76.33879155692965}, "tps": 76.33879155692965}, {"id": "d610d2b2-9aac-49ad-acab-fc24e0a52182", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 91.408, "prompt_per_token_ms": 0.5193636363636364, "prompt_per_second": 1925.433222475057, "predicted_n": 2, "predicted_ms": 31.964, "predicted_per_token_ms": 15.982, "predicted_per_second": 62.57039169065199}, "tps": 62.57039169065199}, {"id": "8d06dea5-88d7-42be-8999-e33cf4adb41e", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 97.145, "prompt_per_token_ms": 0.7649212598425197, "prompt_per_second": 1307.3241031447837, "predicted_n": 2, "predicted_ms": 30.466, "predicted_per_token_ms": 15.233, "predicted_per_second": 65.64695069914002}, "tps": 65.64695069914002}, {"id": "84333ec3-72d1-4f04-8682-e51f93aa1ca2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 105, "prompt_ms": 94.566, "prompt_per_token_ms": 0.9006285714285714, "prompt_per_second": 1110.3356386016114, "predicted_n": 2, "predicted_ms": 31.032, "predicted_per_token_ms": 15.516, "predicted_per_second": 64.44960041247744}, "tps": 64.44960041247744}, {"id": "636b9144-b96d-4770-8700-7024355ba1ba", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 96.766, "prompt_per_token_ms": 0.7803709677419355, "prompt_per_second": 1281.4418287415, "predicted_n": 2, "predicted_ms": 30.379, "predicted_per_token_ms": 15.1895, "predicted_per_second": 65.83495177589782}, "tps": 65.83495177589782}, {"id": "36b07f4b-b1b3-496c-8026-af02be0c1977", "answer": "ACD", "llm_answer": "AD", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 148, "prompt_ms": 98.248, "prompt_per_token_ms": 0.6638378378378379, "prompt_per_second": 1506.3919876231575, "predicted_n": 3, "predicted_ms": 59.62, "predicted_per_token_ms": 19.87333333333333, "predicted_per_second": 50.318685005031874}, "tps": 50.318685005031874}, {"id": "54fa35cd-2b5d-4667-816a-80d6378f4773", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 84, "prompt_ms": 93.803, "prompt_per_token_ms": 1.1167023809523808, "prompt_per_second": 895.493747534727, "predicted_n": 2, "predicted_ms": 30.832, "predicted_per_token_ms": 15.416, "predicted_per_second": 64.86766995329528}, "tps": 64.86766995329528}, {"id": "0102d6b4-0d82-446a-976d-329cecc90480", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 104, "prompt_ms": 93.897, "prompt_per_token_ms": 0.9028557692307693, "prompt_per_second": 1107.5966218303033, "predicted_n": 2, "predicted_ms": 30.677, "predicted_per_token_ms": 15.3385, "predicted_per_second": 65.19542328128566}, "tps": 65.19542328128566}, {"id": "e2943d3f-386e-4940-8897-db4d2019ebdb", "answer": "ACD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 162, "prompt_ms": 99.828, "prompt_per_token_ms": 0.6162222222222222, "prompt_per_second": 1622.7912008654885, "predicted_n": 2, "predicted_ms": 31.272, "predicted_per_token_ms": 15.636, "predicted_per_second": 63.95497569710924}, "tps": 63.95497569710924}, {"id": "9b3067aa-e203-4e94-a49b-b14b6adac87f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 101.955, "prompt_per_token_ms": 0.8289024390243902, "prompt_per_second": 1206.414594674121, "predicted_n": 2, "predicted_ms": 30.689, "predicted_per_token_ms": 15.3445, "predicted_per_second": 65.16993059402392}, "tps": 65.16993059402392}, {"id": "3a6667d6-9dfc-4b9f-8f8f-1ec43c4a4e49", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 87.282, "prompt_per_token_ms": 0.6019448275862068, "prompt_per_second": 1661.281822139731, "predicted_n": 2, "predicted_ms": 27.647, "predicted_per_token_ms": 13.8235, "predicted_per_second": 72.34057944804138}, "tps": 72.34057944804138}, {"id": "da33cff8-d623-4e13-adde-64bb3ce96a18", "answer": "AB", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 94.632, "prompt_per_token_ms": 0.7062089552238806, "prompt_per_second": 1416.0114971679768, "predicted_n": 3, "predicted_ms": 60.164, "predicted_per_token_ms": 20.054666666666666, "predicted_per_second": 49.86370587062031}, "tps": 49.86370587062031}, {"id": "e186b702-d7fa-469a-9be3-bcb0c54a8666", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 131, "prompt_ms": 96.23, "prompt_per_token_ms": 0.7345801526717558, "prompt_per_second": 1361.3218331081782, "predicted_n": 2, "predicted_ms": 30.429, "predicted_per_token_ms": 15.2145, "predicted_per_second": 65.72677380130797}, "tps": 65.72677380130797}, {"id": "f7cf15b9-1245-4008-b776-7091f8226d2e", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 152, "prompt_ms": 97.501, "prompt_per_token_ms": 0.6414539473684211, "prompt_per_second": 1558.958369657747, "predicted_n": 4, "predicted_ms": 88.469, "predicted_per_token_ms": 22.11725, "predicted_per_second": 45.2135776373645}, "tps": 45.2135776373645}, {"id": "2d44a561-2de0-4a71-acd7-7633a65bd225", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 92, "prompt_ms": 93.028, "prompt_per_token_ms": 1.0111739130434783, "prompt_per_second": 988.9495635722578, "predicted_n": 2, "predicted_ms": 28.435, "predicted_per_token_ms": 14.2175, "predicted_per_second": 70.3358537014243}, "tps": 70.3358537014243}, {"id": "02310a86-6ad6-4a43-8bb8-7bac023163ed", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 122, "prompt_ms": 96.283, "prompt_per_token_ms": 0.7892049180327869, "prompt_per_second": 1267.0980339208375, "predicted_n": 5, "predicted_ms": 123.469, "predicted_per_token_ms": 24.6938, "predicted_per_second": 40.49599494609983}, "tps": 40.49599494609983}, {"id": "a1bd344e-0486-4e89-b55b-4926b0c6d033", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 96.684, "prompt_per_token_ms": 0.8057, "prompt_per_second": 1241.156758098548, "predicted_n": 2, "predicted_ms": 29.308, "predicted_per_token_ms": 14.654, "predicted_per_second": 68.24075337791729}, "tps": 68.24075337791729}, {"id": "40a99051-cada-4294-929b-acd7d9c39ba9", "answer": "BD", "llm_answer": "BCD", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 101.354, "prompt_per_token_ms": 0.7507703703703703, "prompt_per_second": 1331.9651913096673, "predicted_n": 6, "predicted_ms": 146.463, "predicted_per_token_ms": 24.4105, "predicted_per_second": 40.965977755474086}, "tps": 40.965977755474086}, {"id": "5f1f1b39-9cda-469b-abb7-6b0480dabda2", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 134, "prompt_ms": 85.858, "prompt_per_token_ms": 0.6407313432835822, "prompt_per_second": 1560.7165319481003, "predicted_n": 2, "predicted_ms": 27.752, "predicted_per_token_ms": 13.876, "predicted_per_second": 72.06687806284232}, "tps": 72.06687806284232}, {"id": "2108e61d-cf75-4387-b668-00fa8ec3e12a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 98.378, "prompt_per_token_ms": 0.6977163120567376, "prompt_per_second": 1433.247270731261, "predicted_n": 2, "predicted_ms": 29.915, "predicted_per_token_ms": 14.9575, "predicted_per_second": 66.85609226140733}, "tps": 66.85609226140733}, {"id": "1be914ae-51aa-4a60-aef3-813ff2b9bd1a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 76, "prompt_ms": 92.23, "prompt_per_token_ms": 1.2135526315789473, "prompt_per_second": 824.0268892984928, "predicted_n": 2, "predicted_ms": 29.232, "predicted_per_token_ms": 14.616, "predicted_per_second": 68.41817186644774}, "tps": 68.41817186644774}, {"id": "da234937-a67a-47d4-bf51-670371b56b88", "answer": "AD", "llm_answer": "A", "score": 0, "topics": [], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 95.414, "prompt_per_token_ms": 0.8443716814159292, "prompt_per_second": 1184.312574674576, "predicted_n": 2, "predicted_ms": 30.783, "predicted_per_token_ms": 15.3915, "predicted_per_second": 64.9709255108339}, "tps": 64.9709255108339}, {"id": "00a7865f-3dc5-421d-933d-f5b2c66e372e", "answer": "AC", "llm_answer": "AB", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 93.219, "prompt_per_token_ms": 0.93219, "prompt_per_second": 1072.7426812130575, "predicted_n": 2, "predicted_ms": 28.493, "predicted_per_token_ms": 14.2465, "predicted_per_second": 70.19267890359036}, "tps": 70.19267890359036}, {"id": "30b155d5-ebfa-439d-a965-20db77f60108", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 120, "prompt_ms": 95.193, "prompt_per_token_ms": 0.793275, "prompt_per_second": 1260.5968926286598, "predicted_n": 3, "predicted_ms": 59.152, "predicted_per_token_ms": 19.717333333333332, "predicted_per_second": 50.71679740329997}, "tps": 50.71679740329997}, {"id": "1f62b210-dbf9-4ac2-b3c1-9190ea147e9a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 185, "prompt_ms": 100.767, "prompt_per_token_ms": 0.5446864864864864, "prompt_per_second": 1835.9185050661429, "predicted_n": 2, "predicted_ms": 29.095, "predicted_per_token_ms": 14.5475, "predicted_per_second": 68.74033339061695}, "tps": 68.74033339061695}, {"id": "b446d01f-3df5-4625-b8ea-99d7ac7130b1", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 147, "prompt_ms": 97.181, "prompt_per_token_ms": 0.6610952380952381, "prompt_per_second": 1512.6413599366133, "predicted_n": 2, "predicted_ms": 30.306, "predicted_per_token_ms": 15.153, "predicted_per_second": 65.99353263380189}, "tps": 65.99353263380189}, {"id": "82912638-c863-436f-802e-744578d1c532", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 103, "prompt_ms": 93.775, "prompt_per_token_ms": 0.9104368932038835, "prompt_per_second": 1098.3737669954678, "predicted_n": 2, "predicted_ms": 30.871, "predicted_per_token_ms": 15.4355, "predicted_per_second": 64.78572122704156}, "tps": 64.78572122704156}, {"id": "9a4ccefd-5b0a-40f9-827b-fbfc4c802fcd", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 96.879, "prompt_per_token_ms": 0.6870851063829787, "prompt_per_second": 1455.423776050537, "predicted_n": 2, "predicted_ms": 28.616, "predicted_per_token_ms": 14.308, "predicted_per_second": 69.89097008666481}, "tps": 69.89097008666481}, {"id": "e65e4176-b461-43d2-9002-620827d8a74e", "answer": "AC", "llm_answer": "AD", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 84.735, "prompt_per_token_ms": 0.6672047244094488, "prompt_per_second": 1498.7903463739895, "predicted_n": 2, "predicted_ms": 29.266, "predicted_per_token_ms": 14.633, "predicted_per_second": 68.33868653044489}, "tps": 68.33868653044489}, {"id": "89913009-4917-4f1a-88be-7769772e8a48", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 185, "prompt_ms": 102.608, "prompt_per_token_ms": 0.5546378378378378, "prompt_per_second": 1802.9783252767813, "predicted_n": 2, "predicted_ms": 29.501, "predicted_per_token_ms": 14.7505, "predicted_per_second": 67.79431205721839}, "tps": 67.79431205721839}, {"id": "3b1f25bd-25ef-49bf-9df5-28ab8315c94b", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 196, "prompt_ms": 102.074, "prompt_per_token_ms": 0.5207857142857143, "prompt_per_second": 1920.175558908243, "predicted_n": 3, "predicted_ms": 58.02, "predicted_per_token_ms": 19.34, "predicted_per_second": 51.70630816959669}, "tps": 51.70630816959669}, {"id": "a4c6e381-9d21-4c8c-8fa6-8c5d09d0a845", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 134, "prompt_ms": 98.598, "prompt_per_token_ms": 0.7358059701492538, "prompt_per_second": 1359.053936185318, "predicted_n": 2, "predicted_ms": 29.655, "predicted_per_token_ms": 14.8275, "predicted_per_second": 67.44225257123588}, "tps": 67.44225257123588}, {"id": "393e4eb3-b6d6-406b-92ba-f11785412b58", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 185, "prompt_ms": 99.192, "prompt_per_token_ms": 0.536172972972973, "prompt_per_second": 1865.0697636906204, "predicted_n": 2, "predicted_ms": 31.628, "predicted_per_token_ms": 15.814, "predicted_per_second": 63.2351081320349}, "tps": 63.2351081320349}, {"id": "2f85c70e-881f-449e-b122-d55b00f88527", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 111, "prompt_ms": 95.01, "prompt_per_token_ms": 0.855945945945946, "prompt_per_second": 1168.2980738869594, "predicted_n": 2, "predicted_ms": 29.108, "predicted_per_token_ms": 14.554, "predicted_per_second": 68.7096330905593}, "tps": 68.7096330905593}, {"id": "300943f3-c4f2-4bec-aaae-7c0947c60aef", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 130, "prompt_ms": 97.721, "prompt_per_token_ms": 0.7517, "prompt_per_second": 1330.3179459890914, "predicted_n": 2, "predicted_ms": 29.459, "predicted_per_token_ms": 14.7295, "predicted_per_second": 67.89096710682644}, "tps": 67.89096710682644}, {"id": "c03adffe-3d27-4939-8a7e-b164c705f9ea", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 107, "prompt_ms": 94.339, "prompt_per_token_ms": 0.8816728971962616, "prompt_per_second": 1134.2074857694063, "predicted_n": 2, "predicted_ms": 28.75, "predicted_per_token_ms": 14.375, "predicted_per_second": 69.56521739130434}, "tps": 69.56521739130434}, {"id": "d7285ef7-e2b1-4c68-a4be-c0acd529a095", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 134, "prompt_ms": 105.787, "prompt_per_token_ms": 0.7894552238805971, "prompt_per_second": 1266.696285933054, "predicted_n": 2, "predicted_ms": 30.977, "predicted_per_token_ms": 15.4885, "predicted_per_second": 64.56403137811925}, "tps": 64.56403137811925}, {"id": "703ef991-cfd8-4f14-83f7-8ca783b37302", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 92.784, "prompt_per_token_ms": 0.5762981366459627, "prompt_per_second": 1735.2129677530606, "predicted_n": 2, "predicted_ms": 26.653, "predicted_per_token_ms": 13.3265, "predicted_per_second": 75.03845720931977}, "tps": 75.03845720931977}, {"id": "88616a82-7ba2-4c14-909a-0ed6aacfa83d", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 89.915, "prompt_per_token_ms": 0.7024609375, "prompt_per_second": 1423.5667018851136, "predicted_n": 2, "predicted_ms": 27.453, "predicted_per_token_ms": 13.7265, "predicted_per_second": 72.85178304739009}, "tps": 72.85178304739009}, {"id": "9f02a6d7-a1bf-4911-ae66-4bfed8cb8c99", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 82.211, "prompt_per_token_ms": 0.7275309734513274, "prompt_per_second": 1374.5119266278236, "predicted_n": 2, "predicted_ms": 27.743, "predicted_per_token_ms": 13.8715, "predicted_per_second": 72.09025700176622}, "tps": 72.09025700176622}, {"id": "dd0b369a-47ae-4160-8234-e4c4da7edfbb", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 88.088, "prompt_per_token_ms": 0.509179190751445, "prompt_per_second": 1963.945145763328, "predicted_n": 3, "predicted_ms": 54.709, "predicted_per_token_ms": 18.236333333333334, "predicted_per_second": 54.83558463872488}, "tps": 54.83558463872488}, {"id": "1e9594f2-d16b-49c0-9da6-6ab04c81160f", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 141, "prompt_ms": 89.763, "prompt_per_token_ms": 0.6366170212765958, "prompt_per_second": 1570.8031148691555, "predicted_n": 2, "predicted_ms": 25.718, "predicted_per_token_ms": 12.859, "predicted_per_second": 77.76654483241309}, "tps": 77.76654483241309}, {"id": "66246098-b5c8-4aff-9353-7834157006ef", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 107, "prompt_ms": 80.986, "prompt_per_token_ms": 0.7568785046728972, "prompt_per_second": 1321.2160126441606, "predicted_n": 2, "predicted_ms": 27.38, "predicted_per_token_ms": 13.69, "predicted_per_second": 73.04601899196494}, "tps": 73.04601899196494}, {"id": "1b56cd95-e2f7-40b9-b323-69aa26e91a60", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 158, "prompt_ms": 86.173, "prompt_per_token_ms": 0.5453987341772152, "prompt_per_second": 1833.5209404337786, "predicted_n": 2, "predicted_ms": 27.86, "predicted_per_token_ms": 13.93, "predicted_per_second": 71.78750897343862}, "tps": 71.78750897343862}, {"id": "a93c45b9-e1fe-46a0-84cd-04778c3b1856", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 127, "prompt_ms": 89.654, "prompt_per_token_ms": 0.7059370078740157, "prompt_per_second": 1416.5569857451983, "predicted_n": 2, "predicted_ms": 28.146, "predicted_per_token_ms": 14.073, "predicted_per_second": 71.0580544304697}, "tps": 71.0580544304697}, {"id": "65c26287-7bff-47b0-813a-ebc7cbe6af06", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 202, "prompt_ms": 96.839, "prompt_per_token_ms": 0.4794009900990099, "prompt_per_second": 2085.936451223164, "predicted_n": 2, "predicted_ms": 27.777, "predicted_per_token_ms": 13.8885, "predicted_per_second": 72.00201605644958}, "tps": 72.00201605644958}, {"id": "4928ec1c-9388-4e48-8f53-808e55960136", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 66, "prompt_n": 100, "prompt_ms": 81.78, "prompt_per_token_ms": 0.8178, "prompt_per_second": 1222.792858889704, "predicted_n": 2, "predicted_ms": 28.406, "predicted_per_token_ms": 14.203, "predicted_per_second": 70.40766035344646}, "tps": 70.40766035344646}, {"id": "2df4501f-b222-4cee-95db-0abfa5fc6567", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 102, "prompt_ms": 78.872, "prompt_per_token_ms": 0.7732549019607843, "prompt_per_second": 1293.234607972411, "predicted_n": 2, "predicted_ms": 27.193, "predicted_per_token_ms": 13.5965, "predicted_per_second": 73.54833964623248}, "tps": 73.54833964623248}, {"id": "45cab574-5360-4363-b934-d111ead278e3", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 174, "prompt_ms": 84.897, "prompt_per_token_ms": 0.4879137931034483, "prompt_per_second": 2049.542386656772, "predicted_n": 2, "predicted_ms": 27.394, "predicted_per_token_ms": 13.697, "predicted_per_second": 73.00868803387604}, "tps": 73.00868803387604}, {"id": "0352a144-7358-4191-9829-f41d78ed77ec", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 211, "prompt_ms": 99.576, "prompt_per_token_ms": 0.4719241706161137, "prompt_per_second": 2118.984494255644, "predicted_n": 2, "predicted_ms": 30.264, "predicted_per_token_ms": 15.132, "predicted_per_second": 66.08511763150939}, "tps": 66.08511763150939}, {"id": "be3cea8e-9286-40ec-903e-48feafa540e7", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 129, "prompt_ms": 96.026, "prompt_per_token_ms": 0.7443875968992247, "prompt_per_second": 1343.3861662466416, "predicted_n": 2, "predicted_ms": 32.696, "predicted_per_token_ms": 16.348, "predicted_per_second": 61.169562025935896}, "tps": 61.169562025935896}, {"id": "4f15ffdd-06c8-461b-b86e-03148d7a469d", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 100.712, "prompt_per_token_ms": 0.6898082191780822, "prompt_per_second": 1449.6782905711334, "predicted_n": 2, "predicted_ms": 31.19, "predicted_per_token_ms": 15.595, "predicted_per_second": 64.12311638345624}, "tps": 64.12311638345624}, {"id": "425f1a6d-b739-4553-b332-1dbe7d007d54", "answer": "A", "llm_answer": "AD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 214, "prompt_ms": 102.769, "prompt_per_token_ms": 0.4802289719626168, "prompt_per_second": 2082.3400052545026, "predicted_n": 2, "predicted_ms": 30.805, "predicted_per_token_ms": 15.4025, "predicted_per_second": 64.92452523940919}, "tps": 64.92452523940919}, {"id": "32d310c1-925f-4a14-81d4-1187813f3e84", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 129, "prompt_ms": 96.671, "prompt_per_token_ms": 0.7493875968992249, "prompt_per_second": 1334.4229396613255, "predicted_n": 2, "predicted_ms": 29.62, "predicted_per_token_ms": 14.81, "predicted_per_second": 67.5219446320054}, "tps": 67.5219446320054}, {"id": "47d37e9e-ef27-43b5-8433-f809d82b8028", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 103, "prompt_ms": 95.048, "prompt_per_token_ms": 0.9227961165048544, "prompt_per_second": 1083.662991330696, "predicted_n": 2, "predicted_ms": 30.538, "predicted_per_token_ms": 15.269, "predicted_per_second": 65.49217368524461}, "tps": 65.49217368524461}, {"id": "22c185b0-6c0b-4fdc-aef8-b481eec19577", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 94.407, "prompt_per_token_ms": 0.8505135135135135, "prompt_per_second": 1175.760272013728, "predicted_n": 2, "predicted_ms": 29.312, "predicted_per_token_ms": 14.656, "predicted_per_second": 68.23144104803494}, "tps": 68.23144104803494}, {"id": "d2c78265-42c8-4b5d-b615-33e8c899b6f3", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 128, "prompt_ms": 96.397, "prompt_per_token_ms": 0.7531015625, "prompt_per_second": 1327.84215276409, "predicted_n": 2, "predicted_ms": 29.25, "predicted_per_token_ms": 14.625, "predicted_per_second": 68.37606837606837}, "tps": 68.37606837606837}, {"id": "a7a8cb93-0a85-4748-a7ba-f32ca1d93c01", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 95.65, "prompt_per_token_ms": 0.7713709677419355, "prompt_per_second": 1296.393099843178, "predicted_n": 3, "predicted_ms": 51.435, "predicted_per_token_ms": 17.145, "predicted_per_second": 58.326042578011084}, "tps": 58.326042578011084}, {"id": "2dd7c99c-9be2-47af-9c58-24f7f1f4d611", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 124, "prompt_ms": 89.377, "prompt_per_token_ms": 0.7207822580645161, "prompt_per_second": 1387.381541112367, "predicted_n": 2, "predicted_ms": 32.208, "predicted_per_token_ms": 16.104, "predicted_per_second": 62.09637357178341}, "tps": 62.09637357178341}, {"id": "a6cb40b7-765b-4aab-999a-a615a8ed3843", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 147, "prompt_ms": 98.396, "prompt_per_token_ms": 0.6693605442176871, "prompt_per_second": 1493.9631692345217, "predicted_n": 2, "predicted_ms": 29.135, "predicted_per_token_ms": 14.5675, "predicted_per_second": 68.64595846919512}, "tps": 68.64595846919512}, {"id": "32275276-6308-4d4a-8d07-40a4fc746c1f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 95.725, "prompt_per_token_ms": 0.7719758064516129, "prompt_per_second": 1295.3773831287542, "predicted_n": 2, "predicted_ms": 29.012, "predicted_per_token_ms": 14.506, "predicted_per_second": 68.93699158968703}, "tps": 68.93699158968703}, {"id": "eaac27a8-91a6-4769-af04-1e2bfa899a03", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 97.562, "prompt_per_token_ms": 0.7931869918699187, "prompt_per_second": 1260.7367622639963, "predicted_n": 2, "predicted_ms": 29.053, "predicted_per_token_ms": 14.5265, "predicted_per_second": 68.83970674284927}, "tps": 68.83970674284927}, {"id": "49f60ea3-a121-4fc3-b0cc-34e04363dbc0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 97.63, "prompt_per_token_ms": 0.7023741007194244, "prompt_per_second": 1423.742702038308, "predicted_n": 2, "predicted_ms": 29.525, "predicted_per_token_ms": 14.7625, "predicted_per_second": 67.73920406435225}, "tps": 67.73920406435225}, {"id": "f5b9f896-8442-4c98-8065-1f4954094939", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 95, "prompt_ms": 92.846, "prompt_per_token_ms": 0.9773263157894737, "prompt_per_second": 1023.1997070417681, "predicted_n": 2, "predicted_ms": 30.211, "predicted_per_token_ms": 15.1055, "predicted_per_second": 66.20105259673629}, "tps": 66.20105259673629}, {"id": "801810d9-6c58-4146-9714-9cf8e6b62394", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 135, "prompt_ms": 96.192, "prompt_per_token_ms": 0.7125333333333332, "prompt_per_second": 1403.4431137724553, "predicted_n": 2, "predicted_ms": 29.855, "predicted_per_token_ms": 14.9275, "predicted_per_second": 66.9904538603249}, "tps": 66.9904538603249}, {"id": "05a61adf-de55-4abb-a96b-10c81b8f77b0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 97.988, "prompt_per_token_ms": 0.694950354609929, "prompt_per_second": 1438.9517083724538, "predicted_n": 2, "predicted_ms": 29.956, "predicted_per_token_ms": 14.978, "predicted_per_second": 66.76458806249165}, "tps": 66.76458806249165}, {"id": "fe2514cc-b0e1-4465-8759-b1ee0a7d354f", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 97.041, "prompt_per_token_ms": 0.8587699115044247, "prompt_per_second": 1164.456260755763, "predicted_n": 2, "predicted_ms": 29.454, "predicted_per_token_ms": 14.727, "predicted_per_second": 67.90249202145719}, "tps": 67.90249202145719}, {"id": "ca65d3f2-0aa9-45d7-83fc-851e6b4632c8", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 149, "prompt_ms": 96.203, "prompt_per_token_ms": 0.6456577181208054, "prompt_per_second": 1548.8082492229971, "predicted_n": 3, "predicted_ms": 53.674, "predicted_per_token_ms": 17.891333333333332, "predicted_per_second": 55.892983567462835}, "tps": 55.892983567462835}, {"id": "92cb9a09-366a-446a-acf8-4141393fd93d", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 134, "prompt_ms": 88.581, "prompt_per_token_ms": 0.6610522388059702, "prompt_per_second": 1512.7397523170882, "predicted_n": 2, "predicted_ms": 30.244, "predicted_per_token_ms": 15.122, "predicted_per_second": 66.12881893929375}, "tps": 66.12881893929375}, {"id": "b047f025-46db-4ea2-adb5-f40cbd3024de", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 93.182, "prompt_per_token_ms": 0.8959807692307693, "prompt_per_second": 1116.0953832285206, "predicted_n": 2, "predicted_ms": 29.864, "predicted_per_token_ms": 14.932, "predicted_per_second": 66.9702652022502}, "tps": 66.9702652022502}, {"id": "8808e231-01c7-4088-aa52-d32492713b50", "answer": "A", "llm_answer": "ABCD", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 126, "prompt_ms": 95.003, "prompt_per_token_ms": 0.7539920634920635, "prompt_per_second": 1326.2739071397743, "predicted_n": 8, "predicted_ms": 204.016, "predicted_per_token_ms": 25.502, "predicted_per_second": 39.21261077562544}, "tps": 39.21261077562544}, {"id": "ae5f741c-e874-4d00-a49b-78379a7d0284", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 209, "prompt_ms": 102.694, "prompt_per_token_ms": 0.49135885167464116, "prompt_per_second": 2035.1724540868988, "predicted_n": 2, "predicted_ms": 29.035, "predicted_per_token_ms": 14.5175, "predicted_per_second": 68.88238333046323}, "tps": 68.88238333046323}, {"id": "e990d030-7a80-4c69-ac82-c0bc7226f9c3", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 99.001, "prompt_per_token_ms": 0.7795354330708661, "prompt_per_second": 1282.8153250977264, "predicted_n": 2, "predicted_ms": 29.97, "predicted_per_token_ms": 14.985, "predicted_per_second": 66.73340006673341}, "tps": 66.73340006673341}, {"id": "8b37dc0b-82ae-44cb-8955-2d02b8332c96", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 97.557, "prompt_per_token_ms": 0.7681653543307086, "prompt_per_second": 1301.8030484742253, "predicted_n": 2, "predicted_ms": 30.732, "predicted_per_token_ms": 15.366, "predicted_per_second": 65.07874528179097}, "tps": 65.07874528179097}, {"id": "ea06a7ac-06e2-425d-b1b0-9be0a1b0f30a", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 96.199, "prompt_per_token_ms": 0.7885163934426229, "prompt_per_second": 1268.2044511897213, "predicted_n": 2, "predicted_ms": 30.746, "predicted_per_token_ms": 15.373, "predicted_per_second": 65.04911207962012}, "tps": 65.04911207962012}, {"id": "7f62d9d0-e532-4583-924d-b891555f8348", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["Vulnerability", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 109, "prompt_ms": 94.045, "prompt_per_token_ms": 0.8627981651376146, "prompt_per_second": 1159.0196182678503, "predicted_n": 2, "predicted_ms": 30.504, "predicted_per_token_ms": 15.252, "predicted_per_second": 65.56517178075006}, "tps": 65.56517178075006}, {"id": "540b7f93-1eed-40de-b086-0765a8a805d2", "answer": "BD", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 87.903, "prompt_per_token_ms": 0.6814186046511629, "prompt_per_second": 1467.5267055731883, "predicted_n": 4, "predicted_ms": 82.277, "predicted_per_token_ms": 20.56925, "predicted_per_second": 48.61625970805936}, "tps": 48.61625970805936}, {"id": "4314579b-6757-4c8a-a194-49db3cef26e6", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 200, "prompt_ms": 101.129, "prompt_per_token_ms": 0.505645, "prompt_per_second": 1977.6720821920517, "predicted_n": 3, "predicted_ms": 60.623, "predicted_per_token_ms": 20.207666666666665, "predicted_per_second": 49.48616861587186}, "tps": 49.48616861587186}, {"id": "9db3d589-5b01-4673-b335-8462b2f20dfa", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 159, "prompt_ms": 96.91, "prompt_per_token_ms": 0.6094968553459119, "prompt_per_second": 1640.6975544319473, "predicted_n": 4, "predicted_ms": 97.215, "predicted_per_token_ms": 24.30375, "predicted_per_second": 41.14591369644602}, "tps": 41.14591369644602}, {"id": "b5520015-9812-4a56-b113-d7d45f8c708b", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 131, "prompt_ms": 98.081, "prompt_per_token_ms": 0.7487099236641221, "prompt_per_second": 1335.6307541725716, "predicted_n": 2, "predicted_ms": 30.496, "predicted_per_token_ms": 15.248, "predicted_per_second": 65.58237145855195}, "tps": 65.58237145855195}, {"id": "4afa9eff-3b8c-4cb0-a1a7-60dc215b0b7b", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 127, "prompt_ms": 95.889, "prompt_per_token_ms": 0.7550314960629921, "prompt_per_second": 1324.4480597357363, "predicted_n": 3, "predicted_ms": 58.561, "predicted_per_token_ms": 19.520333333333333, "predicted_per_second": 51.22863339082325}, "tps": 51.22863339082325}, {"id": "a7def17b-3864-4a6f-b341-44289f82ee31", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 110, "prompt_ms": 94.869, "prompt_per_token_ms": 0.8624454545454545, "prompt_per_second": 1159.4936175146781, "predicted_n": 2, "predicted_ms": 29.783, "predicted_per_token_ms": 14.8915, "predicted_per_second": 67.15240237719505}, "tps": 67.15240237719505}, {"id": "c8aa36df-a37c-4276-bf19-16be5fced19a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 152, "prompt_ms": 97.285, "prompt_per_token_ms": 0.6400328947368421, "prompt_per_second": 1562.419694711415, "predicted_n": 2, "predicted_ms": 28.945, "predicted_per_token_ms": 14.4725, "predicted_per_second": 69.09656244601831}, "tps": 69.09656244601831}, {"id": "d56d9e8f-e8ea-49a5-a114-1552d731a149", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 156, "prompt_ms": 98.702, "prompt_per_token_ms": 0.6327051282051281, "prompt_per_second": 1580.515085813864, "predicted_n": 2, "predicted_ms": 29.494, "predicted_per_token_ms": 14.747, "predicted_per_second": 67.81040211568455}, "tps": 67.81040211568455}, {"id": "7daec725-2522-44c8-8ce7-37942b725dc1", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 64, "prompt_n": 119, "prompt_ms": 93.877, "prompt_per_token_ms": 0.7888823529411765, "prompt_per_second": 1267.616136007755, "predicted_n": 4, "predicted_ms": 79.956, "predicted_per_token_ms": 19.989, "predicted_per_second": 50.02751513332333}, "tps": 50.02751513332333}, {"id": "559b103c-7490-40d1-842c-8d91b5e3128c", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 96.645, "prompt_per_token_ms": 0.7857317073170731, "prompt_per_second": 1272.69905323607, "predicted_n": 2, "predicted_ms": 28.811, "predicted_per_token_ms": 14.4055, "predicted_per_second": 69.41793065148728}, "tps": 69.41793065148728}, {"id": "996af594-d37d-4cc3-b812-33087a007a10", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 117, "prompt_ms": 95.349, "prompt_per_token_ms": 0.8149487179487179, "prompt_per_second": 1227.0710757323097, "predicted_n": 2, "predicted_ms": 28.666, "predicted_per_token_ms": 14.333, "predicted_per_second": 69.76906439684643}, "tps": 69.76906439684643}, {"id": "22b29a24-e05d-4e9a-ba76-ddeb6b2bccce", "answer": "A", "llm_answer": "A", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 97.287, "prompt_per_token_ms": 0.7049782608695653, "prompt_per_second": 1418.4834561657774, "predicted_n": 2, "predicted_ms": 29.379, "predicted_per_token_ms": 14.6895, "predicted_per_second": 68.07583648184077}, "tps": 68.07583648184077}, {"id": "00005b73-e086-4049-b91e-eb577924c6b1", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 99.131, "prompt_per_token_ms": 0.5632443181818182, "prompt_per_second": 1775.4284734341427, "predicted_n": 2, "predicted_ms": 30.4, "predicted_per_token_ms": 15.2, "predicted_per_second": 65.78947368421053}, "tps": 65.78947368421053}, {"id": "31bc3069-da97-456d-96b9-272b7514c526", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 177, "prompt_ms": 97.811, "prompt_per_token_ms": 0.5526045197740114, "prompt_per_second": 1809.612415781456, "predicted_n": 2, "predicted_ms": 30.059, "predicted_per_token_ms": 15.0295, "predicted_per_second": 66.53581290129412}, "tps": 66.53581290129412}, {"id": "c488d8e8-2505-43fe-90ee-e4a22611c153", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 105, "prompt_ms": 94.082, "prompt_per_token_ms": 0.8960190476190476, "prompt_per_second": 1116.047703067537, "predicted_n": 2, "predicted_ms": 32.383, "predicted_per_token_ms": 16.1915, "predicted_per_second": 61.760800419973435}, "tps": 61.760800419973435}, {"id": "9272ea58-95a8-4530-84e9-b1e7751cbfb7", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 126, "prompt_ms": 96.035, "prompt_per_token_ms": 0.7621825396825397, "prompt_per_second": 1312.02165877024, "predicted_n": 2, "predicted_ms": 30.307, "predicted_per_token_ms": 15.1535, "predicted_per_second": 65.99135513247765}, "tps": 65.99135513247765}, {"id": "710ab3e2-be2e-49c0-b367-7af93d3f1089", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 101, "prompt_ms": 95.1, "prompt_per_token_ms": 0.9415841584158415, "prompt_per_second": 1062.0399579390116, "predicted_n": 2, "predicted_ms": 32.184, "predicted_per_token_ms": 16.092, "predicted_per_second": 62.142679592344024}, "tps": 62.142679592344024}, {"id": "3437a585-16fb-428f-8cff-40f2973f8c7d", "answer": "D", "llm_answer": "ABCD", "score": 0, "topics": [], "timings": {"cache_n": 58, "prompt_n": 133, "prompt_ms": 97.278, "prompt_per_token_ms": 0.7314135338345865, "prompt_per_second": 1367.2156088735376, "predicted_n": 64, "predicted_ms": 1826.873, "predicted_per_token_ms": 28.544890625, "predicted_per_second": 35.03253920770628}, "tps": 35.03253920770628}, {"id": "11a742b8-4af8-4556-9b50-f233fdc1c606", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 148, "prompt_ms": 101.988, "prompt_per_token_ms": 0.6891081081081081, "prompt_per_second": 1451.151115817547, "predicted_n": 2, "predicted_ms": 29.602, "predicted_per_token_ms": 14.801, "predicted_per_second": 67.56300249983109}, "tps": 67.56300249983109}, {"id": "46f0684a-e1df-49e4-95c4-88e0eafd3305", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 59, "prompt_ms": 93.595, "prompt_per_token_ms": 1.5863559322033898, "prompt_per_second": 630.3755542496929, "predicted_n": 2, "predicted_ms": 30.92, "predicted_per_token_ms": 15.46, "predicted_per_second": 64.68305304010349}, "tps": 64.68305304010349}, {"id": "25dd51ea-7878-46e7-bf1f-bc65e7ed3d24", "answer": "AD", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 125, "prompt_ms": 96.573, "prompt_per_token_ms": 0.7725839999999999, "prompt_per_second": 1294.3576361923106, "predicted_n": 4, "predicted_ms": 89.439, "predicted_per_token_ms": 22.35975, "predicted_per_second": 44.72321917731639}, "tps": 44.72321917731639}, {"id": "566f478a-4f0b-4277-b757-db950dfca468", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 95.541, "prompt_per_token_ms": 0.7831229508196721, "prompt_per_second": 1276.9386964758587, "predicted_n": 2, "predicted_ms": 28.573, "predicted_per_token_ms": 14.2865, "predicted_per_second": 69.99615021173835}, "tps": 69.99615021173835}, {"id": "765e4132-e4e9-4be9-810e-7a7fbb120c20", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 58, "prompt_n": 207, "prompt_ms": 100.968, "prompt_per_token_ms": 0.487768115942029, "prompt_per_second": 2050.154504397433, "predicted_n": 2, "predicted_ms": 29.635, "predicted_per_token_ms": 14.8175, "predicted_per_second": 67.48776784207863}, "tps": 67.48776784207863}, {"id": "02e3eb76-8e48-4059-bb81-f88ab382ec36", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 56, "prompt_ms": 89.833, "prompt_per_token_ms": 1.6041607142857142, "prompt_per_second": 623.3789364710074, "predicted_n": 2, "predicted_ms": 28.747, "predicted_per_token_ms": 14.3735, "predicted_per_second": 69.57247712804815}, "tps": 69.57247712804815}, {"id": "6a83b92a-6fbc-49be-8a66-ad438bd79733", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 90.703, "prompt_per_token_ms": 0.6432836879432624, "prompt_per_second": 1554.524106148639, "predicted_n": 2, "predicted_ms": 28.352, "predicted_per_token_ms": 14.176, "predicted_per_second": 70.54176072234763}, "tps": 70.54176072234763}, {"id": "c444b089-7949-4bf3-8568-a9acdf7c54ed", "answer": "AB", "llm_answer": "B", "score": 0, "topics": [], "timings": {"cache_n": 58, "prompt_n": 99, "prompt_ms": 94.967, "prompt_per_token_ms": 0.9592626262626263, "prompt_per_second": 1042.4673834068678, "predicted_n": 2, "predicted_ms": 32.724, "predicted_per_token_ms": 16.362, "predicted_per_second": 61.11722283339446}, "tps": 61.11722283339446}, {"id": "86c1215b-9b9a-4a45-b213-a4d40f64ff60", "answer": "C", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 162, "prompt_ms": 97.389, "prompt_per_token_ms": 0.6011666666666666, "prompt_per_second": 1663.4322151372332, "predicted_n": 2, "predicted_ms": 31.93, "predicted_per_token_ms": 15.965, "predicted_per_second": 62.63701847792045}, "tps": 62.63701847792045}, {"id": "106a9b13-cabe-41f5-92d4-6e25085bc5b6", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 65, "prompt_n": 105, "prompt_ms": 94.244, "prompt_per_token_ms": 0.8975619047619048, "prompt_per_second": 1114.1292814396672, "predicted_n": 4, "predicted_ms": 90.521, "predicted_per_token_ms": 22.63025, "predicted_per_second": 44.188641309751326}, "tps": 44.188641309751326}, {"id": "ab227bb9-ddc1-49ba-9d0f-5c4d69aff23f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 193, "prompt_ms": 101.25, "prompt_per_token_ms": 0.5246113989637305, "prompt_per_second": 1906.1728395061727, "predicted_n": 2, "predicted_ms": 30.858, "predicted_per_token_ms": 15.429, "predicted_per_second": 64.81301445330222}, "tps": 64.81301445330222}, {"id": "d0146cf3-f384-4c73-8f7d-c4ee6b7e90a2", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 141, "prompt_ms": 98.006, "prompt_per_token_ms": 0.6950780141843972, "prompt_per_second": 1438.6874273003693, "predicted_n": 2, "predicted_ms": 29.384, "predicted_per_token_ms": 14.692, "predicted_per_second": 68.06425265450585}, "tps": 68.06425265450585}, {"id": "d398f15b-e343-4377-818e-4b91d7c05343", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 136, "prompt_ms": 97.145, "prompt_per_token_ms": 0.7143014705882352, "prompt_per_second": 1399.9691183282723, "predicted_n": 2, "predicted_ms": 30.011, "predicted_per_token_ms": 15.0055, "predicted_per_second": 66.64223118189997}, "tps": 66.64223118189997}, {"id": "fcead14d-ea7b-44bd-9e6f-9f2f7d094693", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 98.307, "prompt_per_token_ms": 0.6874615384615385, "prompt_per_second": 1454.6268322703368, "predicted_n": 2, "predicted_ms": 29.68, "predicted_per_token_ms": 14.84, "predicted_per_second": 67.38544474393531}, "tps": 67.38544474393531}, {"id": "a1f7b7d0-5bda-4d5e-abc3-af6f63d68c3b", "answer": "AD", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 101.116, "prompt_per_token_ms": 0.727453237410072, "prompt_per_second": 1374.658807706001, "predicted_n": 3, "predicted_ms": 58.107, "predicted_per_token_ms": 19.369, "predicted_per_second": 51.6288915276989}, "tps": 51.6288915276989}, {"id": "e1363b03-2416-48d0-8524-203070f6c472", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 92, "prompt_ms": 90.073, "prompt_per_token_ms": 0.9790543478260869, "prompt_per_second": 1021.3937583959678, "predicted_n": 2, "predicted_ms": 28.056, "predicted_per_token_ms": 14.028, "predicted_per_second": 71.285999429712}, "tps": 71.285999429712}, {"id": "96b550cd-10c8-4ca6-b37c-e8ab02c7a0ff", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 148, "prompt_ms": 92.53, "prompt_per_token_ms": 0.6252027027027027, "prompt_per_second": 1599.4812493245433, "predicted_n": 2, "predicted_ms": 29.402, "predicted_per_token_ms": 14.701, "predicted_per_second": 68.02258349772124}, "tps": 68.02258349772124}, {"id": "e0b79a48-7cb3-41a6-bbb3-7822c827b4b0", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 61, "prompt_n": 193, "prompt_ms": 100.998, "prompt_per_token_ms": 0.5233056994818653, "prompt_per_second": 1910.9289292857281, "predicted_n": 2, "predicted_ms": 31.092, "predicted_per_token_ms": 15.546, "predicted_per_second": 64.32522835456066}, "tps": 64.32522835456066}, {"id": "76cdbbaa-5d8a-4986-a26b-def994325d36", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 95.561, "prompt_per_token_ms": 0.8030336134453782, "prompt_per_second": 1245.2778853297893, "predicted_n": 3, "predicted_ms": 60.748, "predicted_per_token_ms": 20.249333333333333, "predicted_per_second": 49.38434187133733}, "tps": 49.38434187133733}, {"id": "2506b47e-1fb5-4142-9f65-572083bca190", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 130, "prompt_ms": 97.041, "prompt_per_token_ms": 0.7464692307692308, "prompt_per_second": 1339.6399460022053, "predicted_n": 2, "predicted_ms": 30.403, "predicted_per_token_ms": 15.2015, "predicted_per_second": 65.78298194257145}, "tps": 65.78298194257145}, {"id": "3d2656ab-8e41-4c90-a09d-2be91a1ce407", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 97.305, "prompt_per_token_ms": 0.6757291666666667, "prompt_per_second": 1479.8828426082935, "predicted_n": 2, "predicted_ms": 28.9, "predicted_per_token_ms": 14.45, "predicted_per_second": 69.20415224913495}, "tps": 69.20415224913495}, {"id": "dbfac871-5d4a-42f2-9fff-6607326d6bce", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 151, "prompt_ms": 100.584, "prompt_per_token_ms": 0.6661192052980133, "prompt_per_second": 1501.2328004453989, "predicted_n": 2, "predicted_ms": 29.076, "predicted_per_token_ms": 14.538, "predicted_per_second": 68.78525244187647}, "tps": 68.78525244187647}, {"id": "140adc98-2db8-43a3-91a6-4b692af9dffe", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 121, "prompt_ms": 94.922, "prompt_per_token_ms": 0.7844793388429752, "prompt_per_second": 1274.7308316301805, "predicted_n": 2, "predicted_ms": 29.131, "predicted_per_token_ms": 14.5655, "predicted_per_second": 68.65538429851361}, "tps": 68.65538429851361}, {"id": "4fb30b08-07a5-45a7-accf-fa105a63e902", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 98.106, "prompt_per_token_ms": 0.7267111111111111, "prompt_per_second": 1376.0626261390742, "predicted_n": 2, "predicted_ms": 30.744, "predicted_per_token_ms": 15.372, "predicted_per_second": 65.05334374186833}, "tps": 65.05334374186833}, {"id": "2bc0059d-f203-4f70-b3f3-a72c1cd08120", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 97.248, "prompt_per_token_ms": 0.7480615384615384, "prompt_per_second": 1336.788417242514, "predicted_n": 2, "predicted_ms": 28.571, "predicted_per_token_ms": 14.2855, "predicted_per_second": 70.00105001575024}, "tps": 70.00105001575024}, {"id": "6c6f3ff4-94cd-41fe-9b70-41b69b6eb6ef", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 201, "prompt_ms": 93.117, "prompt_per_token_ms": 0.4632686567164179, "prompt_per_second": 2158.5746963497536, "predicted_n": 2, "predicted_ms": 27.157, "predicted_per_token_ms": 13.5785, "predicted_per_second": 73.64583716905402}, "tps": 73.64583716905402}, {"id": "20982aea-8f7c-4e05-b7e3-cfedd7c6c588", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 164, "prompt_ms": 98.933, "prompt_per_token_ms": 0.6032500000000001, "prompt_per_second": 1657.6875259013673, "predicted_n": 2, "predicted_ms": 29.338, "predicted_per_token_ms": 14.669, "predicted_per_second": 68.17097279978185}, "tps": 68.17097279978185}, {"id": "e28f1088-c927-4303-a03c-f17dca841538", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 95, "prompt_ms": 92.968, "prompt_per_token_ms": 0.9786105263157895, "prompt_per_second": 1021.8569830479305, "predicted_n": 2, "predicted_ms": 29.629, "predicted_per_token_ms": 14.8145, "predicted_per_second": 67.50143440548112}, "tps": 67.50143440548112}, {"id": "210f8c0a-85db-483a-b60d-3e2d3f4513cc", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 100.504, "prompt_per_token_ms": 0.7178857142857143, "prompt_per_second": 1392.979383905118, "predicted_n": 2, "predicted_ms": 31.254, "predicted_per_token_ms": 15.627, "predicted_per_second": 63.991809048441795}, "tps": 63.991809048441795}, {"id": "b65e83f6-bda1-4e26-b387-8721a484b997", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 151, "prompt_ms": 98.929, "prompt_per_token_ms": 0.655158940397351, "prompt_per_second": 1526.347178279372, "predicted_n": 2, "predicted_ms": 30.596, "predicted_per_token_ms": 15.298, "predicted_per_second": 65.36802196365538}, "tps": 65.36802196365538}, {"id": "ea3c846b-0c5f-47c9-8b27-5e0e609adc67", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.542, "prompt_per_token_ms": 0.711985401459854, "prompt_per_second": 1404.523179758463, "predicted_n": 2, "predicted_ms": 30.783, "predicted_per_token_ms": 15.3915, "predicted_per_second": 64.9709255108339}, "tps": 64.9709255108339}, {"id": "bab5f2bf-ab96-4e8e-a045-bbaab34abb5d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 155, "prompt_ms": 99.227, "prompt_per_token_ms": 0.6401741935483871, "prompt_per_second": 1562.0748385016175, "predicted_n": 2, "predicted_ms": 29.75, "predicted_per_token_ms": 14.875, "predicted_per_second": 67.22689075630252}, "tps": 67.22689075630252}, {"id": "9edf17f6-1347-4c8e-8ede-5152ae97a410", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 98.497, "prompt_per_token_ms": 0.5969515151515151, "prompt_per_second": 1675.1779242007372, "predicted_n": 2, "predicted_ms": 30.379, "predicted_per_token_ms": 15.1895, "predicted_per_second": 65.83495177589782}, "tps": 65.83495177589782}, {"id": "307c77bd-0281-491d-a1d4-b3883c74cc49", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 96.718, "prompt_per_token_ms": 0.7927704918032787, "prompt_per_second": 1261.399119088484, "predicted_n": 5, "predicted_ms": 118.409, "predicted_per_token_ms": 23.681800000000003, "predicted_per_second": 42.226519943585366}, "tps": 42.226519943585366}, {"id": "75c5e96b-164d-40ac-ab53-d0d927ab4005", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 147, "prompt_ms": 91.442, "prompt_per_token_ms": 0.6220544217687074, "prompt_per_second": 1607.5763872181274, "predicted_n": 2, "predicted_ms": 25.454, "predicted_per_token_ms": 12.727, "predicted_per_second": 78.57311228097744}, "tps": 78.57311228097744}, {"id": "7b6bfc96-3aff-4ed1-9a88-3a33040d2b84", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 81, "prompt_ms": 90.666, "prompt_per_token_ms": 1.1193333333333333, "prompt_per_second": 893.3889219773674, "predicted_n": 2, "predicted_ms": 28.772, "predicted_per_token_ms": 14.386, "predicted_per_second": 69.51202558042542}, "tps": 69.51202558042542}, {"id": "f7d80e0c-0480-4e76-ba70-31a920b89584", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 96, "prompt_ms": 92.545, "prompt_per_token_ms": 0.9640104166666666, "prompt_per_second": 1037.3331892592792, "predicted_n": 3, "predicted_ms": 59.497, "predicted_per_token_ms": 19.832333333333334, "predicted_per_second": 50.4227103887591}, "tps": 50.4227103887591}, {"id": "f04dafc5-00fc-4ddf-944c-baf6ffd102ec", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 152, "prompt_ms": 97.545, "prompt_per_token_ms": 0.6417434210526316, "prompt_per_second": 1558.2551642831513, "predicted_n": 2, "predicted_ms": 32.383, "predicted_per_token_ms": 16.1915, "predicted_per_second": 61.760800419973435}, "tps": 61.760800419973435}, {"id": "a212e74c-7001-4b0d-bf67-d17f4f2666f6", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 182, "prompt_ms": 99.973, "prompt_per_token_ms": 0.5493021978021978, "prompt_per_second": 1820.4915327138328, "predicted_n": 2, "predicted_ms": 30.059, "predicted_per_token_ms": 15.0295, "predicted_per_second": 66.53581290129412}, "tps": 66.53581290129412}, {"id": "21c8cac6-2e25-4729-b15b-b4e8221ce3db", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 106, "prompt_ms": 95.754, "prompt_per_token_ms": 0.9033396226415095, "prompt_per_second": 1107.0033627838002, "predicted_n": 2, "predicted_ms": 30.21, "predicted_per_token_ms": 15.105, "predicted_per_second": 66.20324395895399}, "tps": 66.20324395895399}, {"id": "fa76ffc9-73c4-415e-a1bc-cb9dbef0599c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 97.609, "prompt_per_token_ms": 0.7022230215827338, "prompt_per_second": 1424.049011873905, "predicted_n": 2, "predicted_ms": 29.447, "predicted_per_token_ms": 14.7235, "predicted_per_second": 67.91863347709445}, "tps": 67.91863347709445}, {"id": "0ff6e15e-2ff6-46e2-8733-38795462864d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 99, "prompt_ms": 94.722, "prompt_per_token_ms": 0.9567878787878787, "prompt_per_second": 1045.16374231963, "predicted_n": 2, "predicted_ms": 30.011, "predicted_per_token_ms": 15.0055, "predicted_per_second": 66.64223118189997}, "tps": 66.64223118189997}, {"id": "f22e6494-0c5b-492e-9ec7-3c0529185f8b", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 99.657, "prompt_per_token_ms": 0.7382, "prompt_per_second": 1354.64643727987, "predicted_n": 3, "predicted_ms": 61.06, "predicted_per_token_ms": 20.353333333333335, "predicted_per_second": 49.132001310186695}, "tps": 49.132001310186695}, {"id": "e5d69a43-4a50-48fa-acd0-7b44b41640b8", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 206, "prompt_ms": 101.718, "prompt_per_token_ms": 0.49377669902912624, "prompt_per_second": 2025.206944690222, "predicted_n": 2, "predicted_ms": 28.541, "predicted_per_token_ms": 14.2705, "predicted_per_second": 70.07462948039662}, "tps": 70.07462948039662}, {"id": "5d0334d9-8c66-4218-9fc3-a46106d44a29", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 84.993, "prompt_per_token_ms": 0.5902291666666666, "prompt_per_second": 1694.2571741202216, "predicted_n": 2, "predicted_ms": 30.342, "predicted_per_token_ms": 15.171, "predicted_per_second": 65.9152330103487}, "tps": 65.9152330103487}, {"id": "dcf1eacf-2aa0-4b0b-962a-77304639f9ec", "answer": "AD", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 64, "prompt_n": 135, "prompt_ms": 98.285, "prompt_per_token_ms": 0.728037037037037, "prompt_per_second": 1373.5564938698683, "predicted_n": 4, "predicted_ms": 88.166, "predicted_per_token_ms": 22.0415, "predicted_per_second": 45.36896309234852}, "tps": 45.36896309234852}, {"id": "72cc36d2-2d0f-4b66-a7a1-2c055293ecd7", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 78, "prompt_ms": 91.949, "prompt_per_token_ms": 1.1788333333333334, "prompt_per_second": 848.2963381874736, "predicted_n": 2, "predicted_ms": 30.662, "predicted_per_token_ms": 15.331, "predicted_per_second": 65.22731720044355}, "tps": 65.22731720044355}, {"id": "5449601b-bc8d-420b-88ee-70903af8d16d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 89, "prompt_ms": 92.483, "prompt_per_token_ms": 1.0391348314606743, "prompt_per_second": 962.3390244693619, "predicted_n": 2, "predicted_ms": 28.231, "predicted_per_token_ms": 14.1155, "predicted_per_second": 70.84410754135524}, "tps": 70.84410754135524}, {"id": "21ff0c64-7c30-4114-b53f-5e19fe53fb21", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 113, "prompt_ms": 96.208, "prompt_per_token_ms": 0.8513982300884956, "prompt_per_second": 1174.5384999168468, "predicted_n": 2, "predicted_ms": 28.286, "predicted_per_token_ms": 14.143, "predicted_per_second": 70.70635650144948}, "tps": 70.70635650144948}, {"id": "f23f4744-23c4-4459-9c6c-2ab981082f46", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 100.671, "prompt_per_token_ms": 0.7348248175182482, "prompt_per_second": 1360.8685718826673, "predicted_n": 2, "predicted_ms": 28.632, "predicted_per_token_ms": 14.316, "predicted_per_second": 69.85191394244202}, "tps": 69.85191394244202}, {"id": "309ff891-aae1-417b-8d52-9dae74337140", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 149, "prompt_ms": 97.56, "prompt_per_token_ms": 0.6547651006711409, "prompt_per_second": 1527.2652726527265, "predicted_n": 2, "predicted_ms": 28.387, "predicted_per_token_ms": 14.1935, "predicted_per_second": 70.45478564131469}, "tps": 70.45478564131469}, {"id": "7930812a-cc28-447f-855c-4143bfd43c7b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 95.646, "prompt_per_token_ms": 0.79705, "prompt_per_second": 1254.626434978985, "predicted_n": 2, "predicted_ms": 28.637, "predicted_per_token_ms": 14.3185, "predicted_per_second": 69.8397178475399}, "tps": 69.8397178475399}, {"id": "a5815a72-47d8-4598-aa83-30a7ac47aae6", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest", "ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 93.488, "prompt_per_token_ms": 0.9738333333333333, "prompt_per_second": 1026.8697586856067, "predicted_n": 2, "predicted_ms": 28.292, "predicted_per_token_ms": 14.146, "predicted_per_second": 70.69136151562279}, "tps": 70.69136151562279}, {"id": "da4ece90-b3a0-4fe5-891d-76240553a154", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 63, "prompt_ms": 85.632, "prompt_per_token_ms": 1.3592380952380954, "prompt_per_second": 735.7062780269058, "predicted_n": 2, "predicted_ms": 24.869, "predicted_per_token_ms": 12.4345, "predicted_per_second": 80.4214081788572}, "tps": 80.4214081788572}, {"id": "9b2566ee-9066-4024-bdbc-4fba3c6b482a", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 161, "prompt_ms": 92.004, "prompt_per_token_ms": 0.5714534161490683, "prompt_per_second": 1749.923916351463, "predicted_n": 3, "predicted_ms": 61.759, "predicted_per_token_ms": 20.586333333333332, "predicted_per_second": 48.57591606081705}, "tps": 48.57591606081705}, {"id": "577dda45-a10e-48f5-8e7d-276fc1e7644c", "answer": "ABCD", "llm_answer": "ABCD", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 183, "prompt_ms": 100.391, "prompt_per_token_ms": 0.5485846994535519, "prompt_per_second": 1822.8725682581107, "predicted_n": 3, "predicted_ms": 57.705, "predicted_per_token_ms": 19.235, "predicted_per_second": 51.98856251624643}, "tps": 51.98856251624643}, {"id": "a4242b47-3b29-407d-9459-8f1d87f0470e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 96.122, "prompt_per_token_ms": 0.92425, "prompt_per_second": 1081.9583446037327, "predicted_n": 2, "predicted_ms": 29.047, "predicted_per_token_ms": 14.5235, "predicted_per_second": 68.85392639515268}, "tps": 68.85392639515268}, {"id": "741a8d9e-049e-4acd-9dfb-d14ae5b0faa1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 149, "prompt_ms": 97.793, "prompt_per_token_ms": 0.6563288590604027, "prompt_per_second": 1523.6264354299385, "predicted_n": 2, "predicted_ms": 29.298, "predicted_per_token_ms": 14.649, "predicted_per_second": 68.2640453273261}, "tps": 68.2640453273261}, {"id": "d41e7487-9fb3-4bd8-976a-d9dee258857f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 95.408, "prompt_per_token_ms": 0.808542372881356, "prompt_per_second": 1236.7935602884454, "predicted_n": 2, "predicted_ms": 29.072, "predicted_per_token_ms": 14.536, "predicted_per_second": 68.79471656576776}, "tps": 68.79471656576776}, {"id": "dfdcea54-2f5e-4db3-8ec8-55a3fca0df72", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 116, "prompt_ms": 96.76, "prompt_per_token_ms": 0.8341379310344827, "prompt_per_second": 1198.8424968995453, "predicted_n": 2, "predicted_ms": 28.895, "predicted_per_token_ms": 14.4475, "predicted_per_second": 69.21612735767434}, "tps": 69.21612735767434}, {"id": "2a7356da-6c14-4680-8bb6-f4123960cf95", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 97.48, "prompt_per_token_ms": 0.7115328467153285, "prompt_per_second": 1405.416495691424, "predicted_n": 64, "predicted_ms": 1843.567, "predicted_per_token_ms": 28.805734375, "predicted_per_second": 34.71531004840074}, "tps": 34.71531004840074}, {"id": "e264375f-fbfd-48f7-be67-391599cc8246", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 112, "prompt_ms": 94.611, "prompt_per_token_ms": 0.8447410714285715, "prompt_per_second": 1183.7946961769774, "predicted_n": 2, "predicted_ms": 28.814, "predicted_per_token_ms": 14.407, "predicted_per_second": 69.41070313042272}, "tps": 69.41070313042272}, {"id": "659f2ee2-ad2f-4d42-8499-fbac3580ef82", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 92, "prompt_ms": 93.42, "prompt_per_token_ms": 1.0154347826086956, "prompt_per_second": 984.7998287304646, "predicted_n": 2, "predicted_ms": 30.133, "predicted_per_token_ms": 15.0665, "predicted_per_second": 66.37241562406663}, "tps": 66.37241562406663}, {"id": "381772f9-b4ad-4a36-ad5d-c66e7b73a4d0", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 95.449, "prompt_per_token_ms": 0.7635919999999999, "prompt_per_second": 1309.599891041289, "predicted_n": 2, "predicted_ms": 29.108, "predicted_per_token_ms": 14.554, "predicted_per_second": 68.7096330905593}, "tps": 68.7096330905593}, {"id": "3e963424-6553-4ff4-961f-f8e1ea9c60d1", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["Cryptography"], "timings": {"cache_n": 58, "prompt_n": 132, "prompt_ms": 99.027, "prompt_per_token_ms": 0.7502045454545455, "prompt_per_second": 1332.9697961162108, "predicted_n": 2, "predicted_ms": 29.106, "predicted_per_token_ms": 14.553, "predicted_per_second": 68.71435442864014}, "tps": 68.71435442864014}, {"id": "0e1d80ed-3738-4de0-bbac-87a504a7f0ba", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 121, "prompt_ms": 97.99, "prompt_per_token_ms": 0.8098347107438016, "prompt_per_second": 1234.8198795795488, "predicted_n": 2, "predicted_ms": 30.96, "predicted_per_token_ms": 15.48, "predicted_per_second": 64.59948320413436}, "tps": 64.59948320413436}, {"id": "c6e8f5ac-fa39-4ee7-aeee-eba3822c2abf", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 127, "prompt_ms": 93.637, "prompt_per_token_ms": 0.7372992125984252, "prompt_per_second": 1356.3014620288989, "predicted_n": 2, "predicted_ms": 32.287, "predicted_per_token_ms": 16.1435, "predicted_per_second": 61.94443584105058}, "tps": 61.94443584105058}, {"id": "9f93ab09-4860-4074-a836-65ecc103181a", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 91, "prompt_ms": 93.333, "prompt_per_token_ms": 1.0256373626373627, "prompt_per_second": 975.0034821552935, "predicted_n": 4, "predicted_ms": 88.931, "predicted_per_token_ms": 22.23275, "predicted_per_second": 44.97869134497532}, "tps": 44.97869134497532}, {"id": "4a20a25f-6683-44a7-862e-ad137d8901b3", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 146, "prompt_ms": 97.716, "prompt_per_token_ms": 0.6692876712328767, "prompt_per_second": 1494.1258340496952, "predicted_n": 2, "predicted_ms": 28.755, "predicted_per_token_ms": 14.3775, "predicted_per_second": 69.55312119631368}, "tps": 69.55312119631368}, {"id": "786c4af9-bb48-4e5c-a143-15846a0f3d94", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 81.193, "prompt_per_token_ms": 0.6822941176470588, "prompt_per_second": 1465.6435899646522, "predicted_n": 2, "predicted_ms": 27.158, "predicted_per_token_ms": 13.579, "predicted_per_second": 73.64312541424258}, "tps": 73.64312541424258}, {"id": "95e5de9a-2198-4e77-8456-00802479e016", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 152, "prompt_ms": 97.031, "prompt_per_token_ms": 0.6383618421052631, "prompt_per_second": 1566.509672166627, "predicted_n": 2, "predicted_ms": 29.599, "predicted_per_token_ms": 14.7995, "predicted_per_second": 67.56985033278151}, "tps": 67.56985033278151}, {"id": "9f26a7a1-4951-4c03-a97b-8ce0422885ae", "answer": "BD", "llm_answer": "BCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 120, "prompt_ms": 95.681, "prompt_per_token_ms": 0.7973416666666666, "prompt_per_second": 1254.1674940688329, "predicted_n": 6, "predicted_ms": 150.818, "predicted_per_token_ms": 25.136333333333337, "predicted_per_second": 39.78304976859526}, "tps": 39.78304976859526}, {"id": "f0272950-f10e-4993-ad51-055f53e725e8", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 65, "prompt_n": 135, "prompt_ms": 97.061, "prompt_per_token_ms": 0.7189703703703704, "prompt_per_second": 1390.8779015258444, "predicted_n": 4, "predicted_ms": 88.728, "predicted_per_token_ms": 22.182, "predicted_per_second": 45.0815976918222}, "tps": 45.0815976918222}, {"id": "06e9bd67-33a0-40eb-aec8-9da3a59da19f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 99, "prompt_ms": 94.274, "prompt_per_token_ms": 0.9522626262626263, "prompt_per_second": 1050.1304707554575, "predicted_n": 2, "predicted_ms": 30.994, "predicted_per_token_ms": 15.497, "predicted_per_second": 64.52861844227915}, "tps": 64.52861844227915}, {"id": "b1ed7662-1171-4b13-9dd3-f888833c104b", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 148, "prompt_ms": 98.794, "prompt_per_token_ms": 0.667527027027027, "prompt_per_second": 1498.0666842115918, "predicted_n": 3, "predicted_ms": 58.483, "predicted_per_token_ms": 19.494333333333334, "predicted_per_second": 51.296958090385246}, "tps": 51.296958090385246}, {"id": "890542ed-74bc-4e18-b413-f06fe6dd362d", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 139, "prompt_ms": 97.1, "prompt_per_token_ms": 0.6985611510791366, "prompt_per_second": 1431.513903192585, "predicted_n": 3, "predicted_ms": 58.65, "predicted_per_token_ms": 19.55, "predicted_per_second": 51.150895140664964}, "tps": 51.150895140664964}, {"id": "a22f0614-c5a6-4833-a7f0-91af34e24276", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 90, "prompt_ms": 93.379, "prompt_per_token_ms": 1.0375444444444446, "prompt_per_second": 963.8141337988199, "predicted_n": 3, "predicted_ms": 57.119, "predicted_per_token_ms": 19.039666666666665, "predicted_per_second": 52.521927904900295}, "tps": 52.521927904900295}, {"id": "373000bc-d746-4c4b-a423-76d179f010e2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 106, "prompt_ms": 81.421, "prompt_per_token_ms": 0.768122641509434, "prompt_per_second": 1301.8754375406836, "predicted_n": 2, "predicted_ms": 28.015, "predicted_per_token_ms": 14.0075, "predicted_per_second": 71.39032661074424}, "tps": 71.39032661074424}, {"id": "054d6d88-615a-4d34-ba6d-bf29a7d2527e", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 97.616, "prompt_per_token_ms": 0.7284776119402985, "prompt_per_second": 1372.7257826585806, "predicted_n": 3, "predicted_ms": 60.878, "predicted_per_token_ms": 20.292666666666666, "predicted_per_second": 49.27888564013273}, "tps": 49.27888564013273}, {"id": "fe68ea2f-a08b-43ff-8144-285035b69699", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 116, "prompt_ms": 95.412, "prompt_per_token_ms": 0.8225172413793104, "prompt_per_second": 1215.7799857460277, "predicted_n": 2, "predicted_ms": 30.182, "predicted_per_token_ms": 15.091, "predicted_per_second": 66.2646610562587}, "tps": 66.2646610562587}, {"id": "98201358-d385-45af-b314-9678312f473c", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 97.879, "prompt_per_token_ms": 0.7587519379844961, "prompt_per_second": 1317.9538001001235, "predicted_n": 2, "predicted_ms": 31.247, "predicted_per_token_ms": 15.6235, "predicted_per_second": 64.00614458988063}, "tps": 64.00614458988063}, {"id": "df9a3882-a663-47f5-aec9-a666c85ab51a", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 154, "prompt_ms": 98.345, "prompt_per_token_ms": 0.6386038961038961, "prompt_per_second": 1565.9159082820684, "predicted_n": 2, "predicted_ms": 30.05, "predicted_per_token_ms": 15.025, "predicted_per_second": 66.55574043261231}, "tps": 66.55574043261231}, {"id": "29bc59ee-21d7-4dc9-aa76-f5e98ca1033b", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 64, "prompt_n": 164, "prompt_ms": 99.515, "prompt_per_token_ms": 0.6067987804878049, "prompt_per_second": 1647.9927649098124, "predicted_n": 4, "predicted_ms": 86.723, "predicted_per_token_ms": 21.68075, "predicted_per_second": 46.123865641179385}, "tps": 46.123865641179385}, {"id": "1c76c085-7d5e-45c6-8dd3-53b27a86c033", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 95.157, "prompt_per_token_ms": 0.9062571428571429, "prompt_per_second": 1103.4395788013494, "predicted_n": 2, "predicted_ms": 29.001, "predicted_per_token_ms": 14.5005, "predicted_per_second": 68.96313920209647}, "tps": 68.96313920209647}, {"id": "720da598-d60c-431d-aff2-5baad3f4bb4f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 117, "prompt_ms": 97.557, "prompt_per_token_ms": 0.8338205128205128, "prompt_per_second": 1199.2988714290107, "predicted_n": 2, "predicted_ms": 33.338, "predicted_per_token_ms": 16.669, "predicted_per_second": 59.991601175835385}, "tps": 59.991601175835385}, {"id": "5005e8be-0d28-44d1-9ea0-90cf99350460", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 152, "prompt_ms": 97.075, "prompt_per_token_ms": 0.6386513157894737, "prompt_per_second": 1565.7996394540303, "predicted_n": 56, "predicted_ms": 1589.855, "predicted_per_token_ms": 28.390267857142856, "predicted_per_second": 35.22333797736272}, "tps": 35.22333797736272}, {"id": "21d5dd57-d1c0-4cea-a08b-192e6d4c4063", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 183, "prompt_ms": 99.662, "prompt_per_token_ms": 0.5446010928961749, "prompt_per_second": 1836.2063775561398, "predicted_n": 2, "predicted_ms": 29.777, "predicted_per_token_ms": 14.8885, "predicted_per_second": 67.16593343855996}, "tps": 67.16593343855996}, {"id": "9a80376e-b326-4f9d-ba42-f48f9b4097c1", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 121, "prompt_ms": 95.993, "prompt_per_token_ms": 0.7933305785123966, "prompt_per_second": 1260.508578750534, "predicted_n": 2, "predicted_ms": 32.153, "predicted_per_token_ms": 16.0765, "predicted_per_second": 62.202593848163474}, "tps": 62.202593848163474}, {"id": "f81c42b9-9d55-43d0-8682-6abc2509a98d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 97.429, "prompt_per_token_ms": 0.7611640625, "prompt_per_second": 1313.7772121237003, "predicted_n": 2, "predicted_ms": 29.423, "predicted_per_token_ms": 14.7115, "predicted_per_second": 67.97403391904292}, "tps": 67.97403391904292}, {"id": "97109a9e-c909-48ba-926a-d0a5d4d2311e", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 125, "prompt_ms": 96.73, "prompt_per_token_ms": 0.7738400000000001, "prompt_per_second": 1292.2567972707534, "predicted_n": 2, "predicted_ms": 29.36, "predicted_per_token_ms": 14.68, "predicted_per_second": 68.11989100817439}, "tps": 68.11989100817439}, {"id": "73cd727b-6889-4a5e-a00f-c89617305a74", "answer": "CD", "llm_answer": "BCD", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.791, "prompt_per_token_ms": 0.6813172413793103, "prompt_per_second": 1467.7450375034164, "predicted_n": 7, "predicted_ms": 174.035, "predicted_per_token_ms": 24.862142857142857, "predicted_per_second": 40.22179446663027}, "tps": 40.22179446663027}, {"id": "4e38d090-4e50-49e4-b447-a00e45a213b3", "answer": "", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 202, "prompt_ms": 103.53, "prompt_per_token_ms": 0.5125247524752475, "prompt_per_second": 1951.1252776972858, "predicted_n": 2, "predicted_ms": 29.657, "predicted_per_token_ms": 14.8285, "predicted_per_second": 67.43770442054152}, "tps": 67.43770442054152}, {"id": "30cfae98-b024-49b0-8676-595d03799f78", "answer": "B", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 106, "prompt_ms": 94.778, "prompt_per_token_ms": 0.8941320754716982, "prompt_per_second": 1118.4030049167527, "predicted_n": 5, "predicted_ms": 108.121, "predicted_per_token_ms": 21.6242, "predicted_per_second": 46.2444853451226}, "tps": 46.2444853451226}, {"id": "250924e2-7f95-4c34-afa5-53aa9676a569", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 143, "prompt_ms": 93.73, "prompt_per_token_ms": 0.6554545454545455, "prompt_per_second": 1525.6588072122051, "predicted_n": 2, "predicted_ms": 29.341, "predicted_per_token_ms": 14.6705, "predicted_per_second": 68.1640025902321}, "tps": 68.1640025902321}, {"id": "f7fc8a67-ce56-4449-8734-7b7ecf0ef662", "answer": "ABC", "llm_answer": "AC", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 149, "prompt_ms": 97.405, "prompt_per_token_ms": 0.6537248322147651, "prompt_per_second": 1529.6956008418458, "predicted_n": 2, "predicted_ms": 28.285, "predicted_per_token_ms": 14.1425, "predicted_per_second": 70.7088562842496}, "tps": 70.7088562842496}, {"id": "b9d8faf0-a1ab-4c6a-b493-1ef59cf35eb4", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 128, "prompt_ms": 96.874, "prompt_per_token_ms": 0.756828125, "prompt_per_second": 1321.303961847348, "predicted_n": 2, "predicted_ms": 28.495, "predicted_per_token_ms": 14.2475, "predicted_per_second": 70.1877522372346}, "tps": 70.1877522372346}, {"id": "64147f43-fab2-4d78-bcc9-a47d5266fdac", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 98.141, "prompt_per_token_ms": 0.7163576642335767, "prompt_per_second": 1395.9507239583863, "predicted_n": 2, "predicted_ms": 28.687, "predicted_per_token_ms": 14.3435, "predicted_per_second": 69.71799072750723}, "tps": 69.71799072750723}, {"id": "ea569ad5-39e2-4506-8baa-58c16e265e72", "answer": "B", "llm_answer": "B", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 102, "prompt_ms": 94.064, "prompt_per_token_ms": 0.9221960784313725, "prompt_per_second": 1084.3680898111925, "predicted_n": 2, "predicted_ms": 28.535, "predicted_per_token_ms": 14.2675, "predicted_per_second": 70.08936393902225}, "tps": 70.08936393902225}, {"id": "ce4bbd31-e142-4f80-a29b-9b992cf656ce", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 97.899, "prompt_per_token_ms": 0.730589552238806, "prompt_per_second": 1368.7575971153944, "predicted_n": 2, "predicted_ms": 28.298, "predicted_per_token_ms": 14.149, "predicted_per_second": 70.67637288854337}, "tps": 70.67637288854337}, {"id": "30f114d9-f96d-4297-973d-9458cf3521c1", "answer": "AD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 139, "prompt_ms": 98.914, "prompt_per_token_ms": 0.7116115107913669, "prompt_per_second": 1405.2611359362677, "predicted_n": 5, "predicted_ms": 116.873, "predicted_per_token_ms": 23.3746, "predicted_per_second": 42.78148075261181}, "tps": 42.78148075261181}, {"id": "7163c99a-419c-4183-aee7-ba9014b6171a", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 118, "prompt_ms": 98.607, "prompt_per_token_ms": 0.8356525423728813, "prompt_per_second": 1196.6696076343464, "predicted_n": 5, "predicted_ms": 116.219, "predicted_per_token_ms": 23.2438, "predicted_per_second": 43.02222528158047}, "tps": 43.02222528158047}, {"id": "bf6293ee-4ac1-42eb-a12d-1e7759a65270", "answer": "", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 64, "prompt_n": 72, "prompt_ms": 78.561, "prompt_per_token_ms": 1.0911250000000001, "prompt_per_second": 916.4852789552067, "predicted_n": 2, "predicted_ms": 25.848, "predicted_per_token_ms": 12.924, "predicted_per_second": 77.37542556484061}, "tps": 77.37542556484061}, {"id": "4ce932e9-3a7b-436c-95c2-e7117e63e876", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["MemorySafety"], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 94.257, "prompt_per_token_ms": 0.688007299270073, "prompt_per_second": 1453.472951611021, "predicted_n": 4, "predicted_ms": 88.483, "predicted_per_token_ms": 22.12075, "predicted_per_second": 45.206423832826644}, "tps": 45.206423832826644}, {"id": "a1c55e1a-4c9e-4434-9841-7354aeda3586", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 66, "prompt_n": 162, "prompt_ms": 98.62, "prompt_per_token_ms": 0.6087654320987654, "prompt_per_second": 1642.668829851957, "predicted_n": 2, "predicted_ms": 28.564, "predicted_per_token_ms": 14.282, "predicted_per_second": 70.01820473323065}, "tps": 70.01820473323065}, {"id": "fa48e00e-72cf-4d13-8f14-2baec109355e", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 157, "prompt_ms": 97.507, "prompt_per_token_ms": 0.621063694267516, "prompt_per_second": 1610.1408104033555, "predicted_n": 2, "predicted_ms": 30.698, "predicted_per_token_ms": 15.349, "predicted_per_second": 65.1508241579256}, "tps": 65.1508241579256}, {"id": "736d01a1-635c-4e13-b20f-83b3a8c3336f", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 212, "prompt_ms": 101.338, "prompt_per_token_ms": 0.47800943396226414, "prompt_per_second": 2092.008920641813, "predicted_n": 2, "predicted_ms": 30.85, "predicted_per_token_ms": 15.425, "predicted_per_second": 64.82982171799027}, "tps": 64.82982171799027}, {"id": "f5b58659-3b6c-4011-9372-c2d504367aa0", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 96.249, "prompt_per_token_ms": 0.7578661417322834, "prompt_per_second": 1319.494228511465, "predicted_n": 2, "predicted_ms": 33.161, "predicted_per_token_ms": 16.5805, "predicted_per_second": 60.31181206839359}, "tps": 60.31181206839359}, {"id": "d97a4d69-da40-41b6-aaff-ebeabd6248f9", "answer": "AC", "llm_answer": "BC", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 117, "prompt_ms": 96.951, "prompt_per_token_ms": 0.8286410256410256, "prompt_per_second": 1206.795185196646, "predicted_n": 3, "predicted_ms": 59.766, "predicted_per_token_ms": 19.922, "predicted_per_second": 50.195763477562494}, "tps": 50.195763477562494}, {"id": "2e4273a6-0178-4c96-a29c-de5954857e3f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 94.72, "prompt_per_token_ms": 0.9472, "prompt_per_second": 1055.7432432432433, "predicted_n": 2, "predicted_ms": 29.562, "predicted_per_token_ms": 14.781, "predicted_per_second": 67.65442121642648}, "tps": 67.65442121642648}, {"id": "cca7be5c-42b8-4e47-b7ee-90ce342a4bcd", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 157, "prompt_ms": 99.928, "prompt_per_token_ms": 0.636484076433121, "prompt_per_second": 1571.1312144744218, "predicted_n": 2, "predicted_ms": 30.604, "predicted_per_token_ms": 15.302, "predicted_per_second": 65.35093451836362}, "tps": 65.35093451836362}, {"id": "a4f9f371-24df-44e5-9477-f518c29ff2e8", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 91.15, "prompt_per_token_ms": 0.8066371681415929, "prompt_per_second": 1239.7147558968732, "predicted_n": 2, "predicted_ms": 26.539, "predicted_per_token_ms": 13.2695, "predicted_per_second": 75.3607897810769}, "tps": 75.3607897810769}, {"id": "fc98dcbd-fd14-4c6a-a007-90d4722e6d07", "answer": "AB", "llm_answer": "AC", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 137, "prompt_ms": 90.234, "prompt_per_token_ms": 0.6586423357664233, "prompt_per_second": 1518.2747079814706, "predicted_n": 2, "predicted_ms": 29.938, "predicted_per_token_ms": 14.969, "predicted_per_second": 66.80472977486806}, "tps": 66.80472977486806}, {"id": "419eaba4-9bb4-4fdc-b9d0-b807c4a7cb44", "answer": "BC", "llm_answer": "AB", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 180, "prompt_ms": 100.07, "prompt_per_token_ms": 0.5559444444444444, "prompt_per_second": 1798.7408813830318, "predicted_n": 3, "predicted_ms": 58.939, "predicted_per_token_ms": 19.646333333333335, "predicted_per_second": 50.900083136802465}, "tps": 50.900083136802465}, {"id": "c6e4411d-56c5-4f0c-a0c0-3153cca22e4e", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 187, "prompt_ms": 99.113, "prompt_per_token_ms": 0.5300160427807487, "prompt_per_second": 1886.7353424878672, "predicted_n": 3, "predicted_ms": 67.382, "predicted_per_token_ms": 22.46066666666667, "predicted_per_second": 44.522275978748034}, "tps": 44.522275978748034}, {"id": "f2960f08-2d06-4861-bb99-8c19c01a1c49", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 152, "prompt_ms": 100.283, "prompt_per_token_ms": 0.6597565789473684, "prompt_per_second": 1515.7105391741372, "predicted_n": 4, "predicted_ms": 90.695, "predicted_per_token_ms": 22.67375, "predicted_per_second": 44.103864601135676}, "tps": 44.103864601135676}, {"id": "4086a8ad-f5b4-4d25-a243-3e52d801f878", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 96.081, "prompt_per_token_ms": 0.8354869565217392, "prompt_per_second": 1196.9067765739323, "predicted_n": 2, "predicted_ms": 29.383, "predicted_per_token_ms": 14.6915, "predicted_per_second": 68.06656910458429}, "tps": 68.06656910458429}, {"id": "30743e53-b9e9-421f-8f99-0dc49fe2e905", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 97.767, "prompt_per_token_ms": 0.7084565217391304, "prompt_per_second": 1411.5192242781307, "predicted_n": 2, "predicted_ms": 29.673, "predicted_per_token_ms": 14.8365, "predicted_per_second": 67.4013412866916}, "tps": 67.4013412866916}, {"id": "da9aed39-5ffb-4f59-8c8c-b6c4cb2ddf4b", "answer": "ABC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 176, "prompt_ms": 100.217, "prompt_per_token_ms": 0.5694147727272727, "prompt_per_second": 1756.1890697187105, "predicted_n": 2, "predicted_ms": 30.095, "predicted_per_token_ms": 15.0475, "predicted_per_second": 66.45622196378136}, "tps": 66.45622196378136}, {"id": "45303985-3079-47a1-bea7-c2b7c02cafa9", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 160, "prompt_ms": 96.883, "prompt_per_token_ms": 0.60551875, "prompt_per_second": 1651.4765232290495, "predicted_n": 64, "predicted_ms": 1848.89, "predicted_per_token_ms": 28.88890625, "predicted_per_second": 34.61536381288232}, "tps": 34.61536381288232}, {"id": "6e982709-2d89-4711-b50d-4174a18555fd", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 98.128, "prompt_per_token_ms": 0.8177333333333333, "prompt_per_second": 1222.8925485080713, "predicted_n": 2, "predicted_ms": 29.199, "predicted_per_token_ms": 14.5995, "predicted_per_second": 68.4954964211103}, "tps": 68.4954964211103}, {"id": "17fea1ac-e986-493f-8f2d-ecb922c75adf", "answer": "ABD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 163, "prompt_ms": 98.757, "prompt_per_token_ms": 0.6058711656441718, "prompt_per_second": 1650.5159127960549, "predicted_n": 2, "predicted_ms": 29.59, "predicted_per_token_ms": 14.795, "predicted_per_second": 67.59040216289287}, "tps": 67.59040216289287}, {"id": "a34bfa90-5116-48c7-a78c-5b703720c376", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 137, "prompt_ms": 97.438, "prompt_per_token_ms": 0.7112262773722627, "prompt_per_second": 1406.022291097929, "predicted_n": 2, "predicted_ms": 35.043, "predicted_per_token_ms": 17.5215, "predicted_per_second": 57.0727392061182}, "tps": 57.0727392061182}, {"id": "3d4e2788-8952-47c0-9c96-c7185c5c3416", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 96.951, "prompt_per_token_ms": 0.677979020979021, "prompt_per_second": 1474.9718930181227, "predicted_n": 2, "predicted_ms": 30.118, "predicted_per_token_ms": 15.059, "predicted_per_second": 66.40547181087722}, "tps": 66.40547181087722}, {"id": "fd8a9dac-a3d5-491e-90d4-73274548e409", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 109, "prompt_ms": 95.022, "prompt_per_token_ms": 0.8717614678899083, "prompt_per_second": 1147.102776199196, "predicted_n": 4, "predicted_ms": 89.492, "predicted_per_token_ms": 22.373, "predicted_per_second": 44.6967326688419}, "tps": 44.6967326688419}, {"id": "4a0e751e-8afa-46d5-9017-b224a55ea16c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 106, "prompt_ms": 94.152, "prompt_per_token_ms": 0.8882264150943396, "prompt_per_second": 1125.83906873991, "predicted_n": 2, "predicted_ms": 28.562, "predicted_per_token_ms": 14.281, "predicted_per_second": 70.02310762551642}, "tps": 70.02310762551642}, {"id": "da4d38d7-72e5-4820-bae6-5fcd75929b05", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 121, "prompt_ms": 82.735, "prompt_per_token_ms": 0.6837603305785124, "prompt_per_second": 1462.500755423944, "predicted_n": 2, "predicted_ms": 27.277, "predicted_per_token_ms": 13.6385, "predicted_per_second": 73.32184624408842}, "tps": 73.32184624408842}, {"id": "0a9cf3d0-7c14-42ec-ba3b-2604670594eb", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 112, "prompt_ms": 92.368, "prompt_per_token_ms": 0.8247142857142856, "prompt_per_second": 1212.5411397886714, "predicted_n": 2, "predicted_ms": 31.403, "predicted_per_token_ms": 15.7015, "predicted_per_second": 63.68818265770786}, "tps": 63.68818265770786}, {"id": "31c82208-d599-4ff1-98cd-08a1e61d7711", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 184, "prompt_ms": 98.572, "prompt_per_token_ms": 0.5357173913043478, "prompt_per_second": 1866.6558454733595, "predicted_n": 2, "predicted_ms": 29.373, "predicted_per_token_ms": 14.6865, "predicted_per_second": 68.08974228032547}, "tps": 68.08974228032547}, {"id": "b2d86a9f-a977-46fc-b7b2-a170aab49b9c", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 102, "prompt_ms": 95.338, "prompt_per_token_ms": 0.9346862745098039, "prompt_per_second": 1069.8776982944892, "predicted_n": 2, "predicted_ms": 30.201, "predicted_per_token_ms": 15.1005, "predicted_per_second": 66.22297274924671}, "tps": 66.22297274924671}, {"id": "473fc1eb-e53e-4f51-89a4-2488104fffe0", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 104, "prompt_ms": 93.923, "prompt_per_token_ms": 0.9031057692307692, "prompt_per_second": 1107.2900141605357, "predicted_n": 2, "predicted_ms": 30.033, "predicted_per_token_ms": 15.0165, "predicted_per_second": 66.59341391136417}, "tps": 66.59341391136417}, {"id": "74534f48-9c84-475b-b9f4-1ac4217775a8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 135, "prompt_ms": 96.461, "prompt_per_token_ms": 0.7145259259259259, "prompt_per_second": 1399.5293434652347, "predicted_n": 2, "predicted_ms": 29.064, "predicted_per_token_ms": 14.532, "predicted_per_second": 68.81365262868152}, "tps": 68.81365262868152}, {"id": "d9c5a048-da75-4cb4-9ab8-0b9e02d59a13", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 77, "prompt_ms": 92.863, "prompt_per_token_ms": 1.206012987012987, "prompt_per_second": 829.1784672043763, "predicted_n": 2, "predicted_ms": 29.61, "predicted_per_token_ms": 14.805, "predicted_per_second": 67.54474839581222}, "tps": 67.54474839581222}, {"id": "0c887746-723c-4626-be56-34c3b9fb74fc", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 107, "prompt_ms": 95.014, "prompt_per_token_ms": 0.8879813084112149, "prompt_per_second": 1126.149830551287, "predicted_n": 2, "predicted_ms": 29.123, "predicted_per_token_ms": 14.5615, "predicted_per_second": 68.67424372489097}, "tps": 68.67424372489097}, {"id": "ec39b6b0-00f0-41e9-9b71-974b85c8679f", "answer": "ABD", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 133, "prompt_ms": 95.655, "prompt_per_token_ms": 0.7192105263157895, "prompt_per_second": 1390.4134650567144, "predicted_n": 3, "predicted_ms": 61.466, "predicted_per_token_ms": 20.488666666666667, "predicted_per_second": 48.8074707968633}, "tps": 48.8074707968633}, {"id": "503377b3-5649-42c3-84bb-349d3154aaeb", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 99.132, "prompt_per_token_ms": 0.5936047904191617, "prompt_per_second": 1684.6225235040147, "predicted_n": 3, "predicted_ms": 56.229, "predicted_per_token_ms": 18.743, "predicted_per_second": 53.35325188070213}, "tps": 53.35325188070213}, {"id": "93cf69fb-76c1-4ece-8efc-d057969df93f", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 150, "prompt_ms": 83.016, "prompt_per_token_ms": 0.55344, "prompt_per_second": 1806.880601329864, "predicted_n": 2, "predicted_ms": 28.949, "predicted_per_token_ms": 14.4745, "predicted_per_second": 69.0870150955128}, "tps": 69.0870150955128}, {"id": "c140cbb2-2813-4720-83e3-5b55d72b2fef", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "Vulnerability", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 120, "prompt_ms": 96.844, "prompt_per_token_ms": 0.8070333333333333, "prompt_per_second": 1239.1061914006032, "predicted_n": 2, "predicted_ms": 29.392, "predicted_per_token_ms": 14.696, "predicted_per_second": 68.04572672836146}, "tps": 68.04572672836146}, {"id": "6030da4c-c2e4-486c-80f1-492d730ed386", "answer": "ABD", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 61, "prompt_n": 158, "prompt_ms": 97.363, "prompt_per_token_ms": 0.6162215189873418, "prompt_per_second": 1622.7930528023992, "predicted_n": 2, "predicted_ms": 29.424, "predicted_per_token_ms": 14.712, "predicted_per_second": 67.97172376291464}, "tps": 67.97172376291464}, {"id": "369b13e8-abfa-49ad-b566-2b16263e435b", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["MemorySafety", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 202, "prompt_ms": 102.306, "prompt_per_token_ms": 0.5064653465346535, "prompt_per_second": 1974.4687506109126, "predicted_n": 3, "predicted_ms": 65.554, "predicted_per_token_ms": 21.851333333333333, "predicted_per_second": 45.76379778503218}, "tps": 45.76379778503218}, {"id": "fb0d169e-7be5-4a4e-b370-383872346b4f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 192, "prompt_ms": 99.848, "prompt_per_token_ms": 0.5200416666666666, "prompt_per_second": 1922.922842720936, "predicted_n": 2, "predicted_ms": 29.325, "predicted_per_token_ms": 14.6625, "predicted_per_second": 68.20119352088662}, "tps": 68.20119352088662}, {"id": "ab980528-333e-49e7-8a36-5adf785bc5d3", "answer": "BD", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 94, "prompt_ms": 92.943, "prompt_per_token_ms": 0.9887553191489361, "prompt_per_second": 1011.3725616775873, "predicted_n": 4, "predicted_ms": 86.812, "predicted_per_token_ms": 21.703, "predicted_per_second": 46.07657927475464}, "tps": 46.07657927475464}, {"id": "7e736966-289e-4133-9262-e81969f2bcca", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 144, "prompt_ms": 97.351, "prompt_per_token_ms": 0.6760486111111111, "prompt_per_second": 1479.183572844655, "predicted_n": 3, "predicted_ms": 58.565, "predicted_per_token_ms": 19.521666666666665, "predicted_per_second": 51.22513446597797}, "tps": 51.22513446597797}, {"id": "715663af-a01e-40c5-be22-91e98f4f237e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 88, "prompt_ms": 97.212, "prompt_per_token_ms": 1.1046818181818183, "prompt_per_second": 905.2380364564045, "predicted_n": 2, "predicted_ms": 29.389, "predicted_per_token_ms": 14.6945, "predicted_per_second": 68.052672768723}, "tps": 68.052672768723}, {"id": "65474e82-848d-4ab8-a77e-132883717644", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 110, "prompt_ms": 94.51, "prompt_per_token_ms": 0.8591818181818183, "prompt_per_second": 1163.8980002116177, "predicted_n": 2, "predicted_ms": 27.327, "predicted_per_token_ms": 13.6635, "predicted_per_second": 73.18768983057049}, "tps": 73.18768983057049}, {"id": "ca83945d-955a-49bd-a017-7162004ff442", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 93, "prompt_ms": 77.939, "prompt_per_token_ms": 0.8380537634408601, "prompt_per_second": 1193.240867858197, "predicted_n": 2, "predicted_ms": 28.848, "predicted_per_token_ms": 14.424, "predicted_per_second": 69.32889628397116}, "tps": 69.32889628397116}, {"id": "a64e9aff-9368-4947-8e1a-26d48d1d3c71", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 97.014, "prompt_per_token_ms": 0.7081313868613138, "prompt_per_second": 1412.1673160574762, "predicted_n": 3, "predicted_ms": 59.099, "predicted_per_token_ms": 19.699666666666666, "predicted_per_second": 50.76228024162846}, "tps": 50.76228024162846}, {"id": "855c165f-b3b5-4eeb-8689-141511af8064", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 62, "prompt_ms": 90.594, "prompt_per_token_ms": 1.4611935483870966, "prompt_per_second": 684.372033468, "predicted_n": 2, "predicted_ms": 29.903, "predicted_per_token_ms": 14.9515, "predicted_per_second": 66.88292144600877}, "tps": 66.88292144600877}, {"id": "7ba50534-39cb-45c9-ae5f-eb2194e6be17", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 95, "prompt_ms": 94.624, "prompt_per_token_ms": 0.9960421052631578, "prompt_per_second": 1003.9736219141023, "predicted_n": 2, "predicted_ms": 28.606, "predicted_per_token_ms": 14.303, "predicted_per_second": 69.9154023631406}, "tps": 69.9154023631406}, {"id": "25eb025e-29d6-4ed8-9d1a-c1faf01483e6", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.127, "prompt_per_token_ms": 0.708956204379562, "prompt_per_second": 1410.524365006641, "predicted_n": 2, "predicted_ms": 30.775, "predicted_per_token_ms": 15.3875, "predicted_per_second": 64.98781478472786}, "tps": 64.98781478472786}, {"id": "b8cb36ab-fc8c-4904-9671-8d86c89a442c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 96.007, "prompt_per_token_ms": 0.8496194690265487, "prompt_per_second": 1176.9975105981855, "predicted_n": 2, "predicted_ms": 31.564, "predicted_per_token_ms": 15.782, "predicted_per_second": 63.36332530731213}, "tps": 63.36332530731213}, {"id": "355c08d4-4ab2-4ca4-b559-4369439a1dc5", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 232, "prompt_ms": 103.749, "prompt_per_token_ms": 0.44719396551724133, "prompt_per_second": 2236.1661317217513, "predicted_n": 2, "predicted_ms": 30.996, "predicted_per_token_ms": 15.498, "predicted_per_second": 64.52445476835722}, "tps": 64.52445476835722}, {"id": "9a76be1c-a4c0-478c-9a64-3f5ad8bd92e9", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 94.421, "prompt_per_token_ms": 0.9078942307692308, "prompt_per_second": 1101.4498893254677, "predicted_n": 2, "predicted_ms": 28.652, "predicted_per_token_ms": 14.326, "predicted_per_second": 69.80315510261063}, "tps": 69.80315510261063}, {"id": "288a65c5-4c09-4e45-b8a0-82a82fa3c336", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 96.86, "prompt_per_token_ms": 0.835, "prompt_per_second": 1197.6047904191616, "predicted_n": 3, "predicted_ms": 59.148, "predicted_per_token_ms": 19.716, "predicted_per_second": 50.720227226617965}, "tps": 50.720227226617965}, {"id": "690ce3ec-f2d0-442d-b47a-be8d1147f7be", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 151, "prompt_ms": 92.412, "prompt_per_token_ms": 0.612, "prompt_per_second": 1633.986928104575, "predicted_n": 2, "predicted_ms": 25.702, "predicted_per_token_ms": 12.851, "predicted_per_second": 77.81495603454984}, "tps": 77.81495603454984}, {"id": "b2ccc4b1-ef9d-4f70-a49b-871bbcc36637", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 166, "prompt_ms": 93.062, "prompt_per_token_ms": 0.5606144578313252, "prompt_per_second": 1783.7570651823517, "predicted_n": 2, "predicted_ms": 32.698, "predicted_per_token_ms": 16.349, "predicted_per_second": 61.16582053948254}, "tps": 61.16582053948254}, {"id": "2cad7156-2758-4eb6-9a96-7a790ed637dd", "answer": "BD", "llm_answer": "AD", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 96.049, "prompt_per_token_ms": 0.7562913385826772, "prompt_per_second": 1322.241772428656, "predicted_n": 3, "predicted_ms": 58.164, "predicted_per_token_ms": 19.388, "predicted_per_second": 51.57829585310502}, "tps": 51.57829585310502}, {"id": "d979d27f-c384-4aa6-a739-e0767df9f788", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 109, "prompt_ms": 94.66, "prompt_per_token_ms": 0.8684403669724771, "prompt_per_second": 1151.4895415170083, "predicted_n": 2, "predicted_ms": 32.066, "predicted_per_token_ms": 16.033, "predicted_per_second": 62.37135907191417}, "tps": 62.37135907191417}, {"id": "cebad99a-395c-4436-8623-4e71342b17c0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 97, "prompt_ms": 95.039, "prompt_per_token_ms": 0.9797835051546392, "prompt_per_second": 1020.6336346131587, "predicted_n": 2, "predicted_ms": 29.603, "predicted_per_token_ms": 14.8015, "predicted_per_second": 67.5607201972773}, "tps": 67.5607201972773}, {"id": "90f2b0f4-d6ab-4284-8876-d95630371162", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 81, "prompt_ms": 93.014, "prompt_per_token_ms": 1.148320987654321, "prompt_per_second": 870.8366482465005, "predicted_n": 2, "predicted_ms": 30.122, "predicted_per_token_ms": 15.061, "predicted_per_second": 66.39665360865813}, "tps": 66.39665360865813}, {"id": "e2306e2c-94d9-4c43-9e69-a427a42df9a3", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 147, "prompt_ms": 98.573, "prompt_per_token_ms": 0.67056462585034, "prompt_per_second": 1491.2805737879542, "predicted_n": 2, "predicted_ms": 29.612, "predicted_per_token_ms": 14.806, "predicted_per_second": 67.5401864109145}, "tps": 67.5401864109145}, {"id": "327c650f-cf7d-4379-bed7-1a93ac1124d2", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 97.604, "prompt_per_token_ms": 0.7072753623188406, "prompt_per_second": 1413.8764804721118, "predicted_n": 64, "predicted_ms": 1840.567, "predicted_per_token_ms": 28.758859375, "predicted_per_second": 34.77189366102945}, "tps": 34.77189366102945}, {"id": "db4b5fc8-c886-46ff-9105-e058e14ffbcf", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 98.094, "prompt_per_token_ms": 0.7057122302158273, "prompt_per_second": 1417.0081758313456, "predicted_n": 3, "predicted_ms": 58.951, "predicted_per_token_ms": 19.650333333333332, "predicted_per_second": 50.88972197248563}, "tps": 50.88972197248563}, {"id": "eb8aab2e-e40f-4791-9f57-5dce269a6143", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 204, "prompt_ms": 102.926, "prompt_per_token_ms": 0.5045392156862745, "prompt_per_second": 1982.0064900996833, "predicted_n": 2, "predicted_ms": 31.062, "predicted_per_token_ms": 15.531, "predicted_per_second": 64.38735432361084}, "tps": 64.38735432361084}, {"id": "040272c6-a6c6-448e-8170-f246b71ff22c", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 143, "prompt_ms": 98.366, "prompt_per_token_ms": 0.6878741258741259, "prompt_per_second": 1453.7543460138666, "predicted_n": 2, "predicted_ms": 29.459, "predicted_per_token_ms": 14.7295, "predicted_per_second": 67.89096710682644}, "tps": 67.89096710682644}, {"id": "61678a40-e3ae-40bb-a9f2-18be137fef9b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 100, "prompt_ms": 93.834, "prompt_per_token_ms": 0.9383400000000001, "prompt_per_second": 1065.7117889038088, "predicted_n": 2, "predicted_ms": 32.25, "predicted_per_token_ms": 16.125, "predicted_per_second": 62.01550387596899}, "tps": 62.01550387596899}, {"id": "dc814850-f840-41e7-88e7-d8ce8af41786", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 177, "prompt_ms": 100.601, "prompt_per_token_ms": 0.5683672316384181, "prompt_per_second": 1759.4258506376677, "predicted_n": 2, "predicted_ms": 33.164, "predicted_per_token_ms": 16.582, "predicted_per_second": 60.30635628995296}, "tps": 60.30635628995296}, {"id": "33814255-45ce-4b8c-9c5c-71bffbafe10a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 159, "prompt_ms": 98.787, "prompt_per_token_ms": 0.6213018867924529, "prompt_per_second": 1609.5235203012542, "predicted_n": 2, "predicted_ms": 30.122, "predicted_per_token_ms": 15.061, "predicted_per_second": 66.39665360865813}, "tps": 66.39665360865813}, {"id": "6f929d36-f833-4157-a8be-f6fa1b0f6793", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 95.994, "prompt_per_token_ms": 0.8347304347826087, "prompt_per_second": 1197.991541137988, "predicted_n": 2, "predicted_ms": 30.084, "predicted_per_token_ms": 15.042, "predicted_per_second": 66.48052120728627}, "tps": 66.48052120728627}, {"id": "8de6c752-43fb-4df2-a26e-1cb205767e6f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 154, "prompt_ms": 97.48, "prompt_per_token_ms": 0.632987012987013, "prompt_per_second": 1579.8112433319657, "predicted_n": 2, "predicted_ms": 29.651, "predicted_per_token_ms": 14.8255, "predicted_per_second": 67.45135071329804}, "tps": 67.45135071329804}, {"id": "db2685fc-2eea-4acc-a6f8-1e97424453d5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 105, "prompt_ms": 79.356, "prompt_per_token_ms": 0.7557714285714285, "prompt_per_second": 1323.1513685165585, "predicted_n": 2, "predicted_ms": 26.253, "predicted_per_token_ms": 13.1265, "predicted_per_second": 76.1817697025102}, "tps": 76.1817697025102}, {"id": "28531070-f48e-49f9-a797-241ad1d76f6c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 83, "prompt_ms": 93.96, "prompt_per_token_ms": 1.1320481927710844, "prompt_per_second": 883.354618986803, "predicted_n": 2, "predicted_ms": 28.881, "predicted_per_token_ms": 14.4405, "predicted_per_second": 69.2496797202313}, "tps": 69.2496797202313}, {"id": "31398b3a-babe-4b5f-a8d4-19f06ad2659f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 65, "prompt_ms": 90.937, "prompt_per_token_ms": 1.3990307692307693, "prompt_per_second": 714.7805623673532, "predicted_n": 2, "predicted_ms": 29.256, "predicted_per_token_ms": 14.628, "predicted_per_second": 68.36204539239814}, "tps": 68.36204539239814}, {"id": "859819dd-dc71-4033-aa00-116071ffba22", "answer": "ACD", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 130, "prompt_ms": 97.072, "prompt_per_token_ms": 0.7467076923076923, "prompt_per_second": 1339.2121312015825, "predicted_n": 2, "predicted_ms": 31.371, "predicted_per_token_ms": 15.6855, "predicted_per_second": 63.7531478116732}, "tps": 63.7531478116732}, {"id": "cb6f73dc-3713-4610-a706-2453349658bb", "answer": "ACD", "llm_answer": "C", "score": 0, "topics": [], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 96.834, "prompt_per_token_ms": 0.6916714285714286, "prompt_per_second": 1445.7731788421422, "predicted_n": 2, "predicted_ms": 29.94, "predicted_per_token_ms": 14.97, "predicted_per_second": 66.8002672010688}, "tps": 66.8002672010688}, {"id": "1af16c4e-b82d-40aa-8e9f-3679e9f7bd79", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 98.595, "prompt_per_token_ms": 0.6123913043478261, "prompt_per_second": 1632.942847000355, "predicted_n": 2, "predicted_ms": 28.281, "predicted_per_token_ms": 14.1405, "predicted_per_second": 70.71885718326791}, "tps": 70.71885718326791}, {"id": "be166c38-6ace-4b4d-9b77-3171550c05dd", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 121, "prompt_ms": 98.171, "prompt_per_token_ms": 0.8113305785123968, "prompt_per_second": 1232.543215409846, "predicted_n": 3, "predicted_ms": 57.363, "predicted_per_token_ms": 19.121, "predicted_per_second": 52.29851995188537}, "tps": 52.29851995188537}, {"id": "1d4533fa-c386-42e0-a338-3d7d155cddda", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 103, "prompt_ms": 93.333, "prompt_per_token_ms": 0.9061456310679612, "prompt_per_second": 1103.5753699120355, "predicted_n": 2, "predicted_ms": 28.579, "predicted_per_token_ms": 14.2895, "predicted_per_second": 69.98145491444767}, "tps": 69.98145491444767}, {"id": "5a76a206-0903-4b74-b8e8-b0b1756fe068", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 146, "prompt_ms": 97.422, "prompt_per_token_ms": 0.6672739726027397, "prompt_per_second": 1498.6348052801216, "predicted_n": 2, "predicted_ms": 28.808, "predicted_per_token_ms": 14.404, "predicted_per_second": 69.42515967786726}, "tps": 69.42515967786726}, {"id": "8bbcd18c-9cf7-4dde-a211-9a9345ce4716", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 156, "prompt_ms": 98.171, "prompt_per_token_ms": 0.6293012820512821, "prompt_per_second": 1589.063980197818, "predicted_n": 3, "predicted_ms": 52.688, "predicted_per_token_ms": 17.56266666666667, "predicted_per_second": 56.93896143334345}, "tps": 56.93896143334345}, {"id": "8dfefdda-0cc7-439f-a2ac-650f9e28752b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 84.502, "prompt_per_token_ms": 0.7412456140350877, "prompt_per_second": 1349.0804951362097, "predicted_n": 2, "predicted_ms": 29.275, "predicted_per_token_ms": 14.6375, "predicted_per_second": 68.31767719897523}, "tps": 68.31767719897523}, {"id": "b22b26c1-ca62-40a5-ac12-c8a0f0957e2a", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 65, "prompt_ms": 92.382, "prompt_per_token_ms": 1.4212615384615386, "prompt_per_second": 703.6002684505639, "predicted_n": 2, "predicted_ms": 30.244, "predicted_per_token_ms": 15.122, "predicted_per_second": 66.12881893929375}, "tps": 66.12881893929375}, {"id": "9d034266-a90d-48b8-bade-e4fcd31301d7", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 67, "prompt_ms": 92.612, "prompt_per_token_ms": 1.3822686567164177, "prompt_per_second": 723.4483652226494, "predicted_n": 2, "predicted_ms": 30.349, "predicted_per_token_ms": 15.1745, "predicted_per_second": 65.90002965501334}, "tps": 65.90002965501334}, {"id": "fe5dfa44-fef1-4115-b52b-a9bccfd970c6", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 96.841, "prompt_per_token_ms": 0.7173407407407407, "prompt_per_second": 1394.0376493427373, "predicted_n": 2, "predicted_ms": 30.782, "predicted_per_token_ms": 15.391, "predicted_per_second": 64.97303618998116}, "tps": 64.97303618998116}, {"id": "b1972f40-9e77-40b2-a8a8-9e5938dd3f62", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 96.851, "prompt_per_token_ms": 0.6725763888888889, "prompt_per_second": 1486.8199605579705, "predicted_n": 3, "predicted_ms": 60.837, "predicted_per_token_ms": 20.279, "predicted_per_second": 49.31209625721189}, "tps": 49.31209625721189}, {"id": "58add164-3543-46d4-837f-bd10e0997966", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 97.325, "prompt_per_token_ms": 0.6576013513513513, "prompt_per_second": 1520.6781402517338, "predicted_n": 2, "predicted_ms": 30.467, "predicted_per_token_ms": 15.2335, "predicted_per_second": 65.6447960087964}, "tps": 65.6447960087964}, {"id": "8a079166-f15e-4436-bc6a-3fd195a1b47d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 97, "prompt_ms": 94.857, "prompt_per_token_ms": 0.9779072164948454, "prompt_per_second": 1022.5919014938275, "predicted_n": 2, "predicted_ms": 28.793, "predicted_per_token_ms": 14.3965, "predicted_per_second": 69.46132740596673}, "tps": 69.46132740596673}, {"id": "2f379ec5-7623-4709-86a0-aaece90a2086", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 66, "prompt_n": 85, "prompt_ms": 92.562, "prompt_per_token_ms": 1.0889647058823528, "prompt_per_second": 918.3034074458202, "predicted_n": 3, "predicted_ms": 60.202, "predicted_per_token_ms": 20.067333333333334, "predicted_per_second": 49.832231487326}, "tps": 49.832231487326}, {"id": "a49b1c8f-f5e6-4c74-a362-6941b834bdae", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 153, "prompt_ms": 98.802, "prompt_per_token_ms": 0.645764705882353, "prompt_per_second": 1548.5516487520495, "predicted_n": 2, "predicted_ms": 30.863, "predicted_per_token_ms": 15.4315, "predicted_per_second": 64.8025143375563}, "tps": 64.8025143375563}, {"id": "29566525-453c-4a9b-967e-93ac472a57d0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 108, "prompt_ms": 87.555, "prompt_per_token_ms": 0.8106944444444445, "prompt_per_second": 1233.5103649134828, "predicted_n": 2, "predicted_ms": 26.195, "predicted_per_token_ms": 13.0975, "predicted_per_second": 76.35044855888528}, "tps": 76.35044855888528}, {"id": "516bb064-882f-4154-b3c8-5767fd80a40f", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 88.53, "prompt_per_token_ms": 0.7256557377049181, "prompt_per_second": 1378.0639331300124, "predicted_n": 2, "predicted_ms": 30.3, "predicted_per_token_ms": 15.15, "predicted_per_second": 66.006600660066}, "tps": 66.006600660066}, {"id": "8b6fc654-701e-445d-bd0f-38de8b841a84", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 154, "prompt_ms": 98.075, "prompt_per_token_ms": 0.6368506493506494, "prompt_per_second": 1570.2268671934744, "predicted_n": 2, "predicted_ms": 30.296, "predicted_per_token_ms": 15.148, "predicted_per_second": 66.01531555320834}, "tps": 66.01531555320834}, {"id": "fa9abe0a-4d91-49fc-9abd-ec7c41e0b996", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 142, "prompt_ms": 96.99, "prompt_per_token_ms": 0.6830281690140845, "prompt_per_second": 1464.0684606660482, "predicted_n": 2, "predicted_ms": 30.046, "predicted_per_token_ms": 15.023, "predicted_per_second": 66.56460094521734}, "tps": 66.56460094521734}, {"id": "75492370-ca7d-44a9-9ffd-18de1c35f96d", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 73, "prompt_ms": 90.636, "prompt_per_token_ms": 1.2415890410958903, "prompt_per_second": 805.4194801182754, "predicted_n": 2, "predicted_ms": 29.586, "predicted_per_token_ms": 14.793, "predicted_per_second": 67.59954032312581}, "tps": 67.59954032312581}, {"id": "dbbbd198-c5a7-46aa-bb74-f934d481ce51", "answer": "ACD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 101, "prompt_ms": 93.251, "prompt_per_token_ms": 0.9232772277227723, "prompt_per_second": 1083.098304575822, "predicted_n": 2, "predicted_ms": 29.048, "predicted_per_token_ms": 14.524, "predicted_per_second": 68.85155604516663}, "tps": 68.85155604516663}, {"id": "d36b40ca-3436-4f1e-a830-f7535ab660b3", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 189, "prompt_ms": 102.097, "prompt_per_token_ms": 0.5401957671957671, "prompt_per_second": 1851.1807398846197, "predicted_n": 2, "predicted_ms": 29.703, "predicted_per_token_ms": 14.8515, "predicted_per_second": 67.33326600006734}, "tps": 67.33326600006734}, {"id": "12b81400-3f25-448b-919b-d4cfa759baac", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 80, "prompt_ms": 91.937, "prompt_per_token_ms": 1.1492125, "prompt_per_second": 870.1610885715218, "predicted_n": 2, "predicted_ms": 29.375, "predicted_per_token_ms": 14.6875, "predicted_per_second": 68.08510638297872}, "tps": 68.08510638297872}, {"id": "253432b5-d7cc-4994-bfd1-6853b5d1272c", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["Cryptography"], "timings": {"cache_n": 58, "prompt_n": 154, "prompt_ms": 97.379, "prompt_per_token_ms": 0.6323311688311689, "prompt_per_second": 1581.4497992380286, "predicted_n": 3, "predicted_ms": 58.805, "predicted_per_token_ms": 19.601666666666667, "predicted_per_second": 51.01607006206955}, "tps": 51.01607006206955}, {"id": "8f346f36-3182-4269-9b85-83c9b574649d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 126, "prompt_ms": 96.134, "prompt_per_token_ms": 0.762968253968254, "prompt_per_second": 1310.6705223958227, "predicted_n": 2, "predicted_ms": 31.356, "predicted_per_token_ms": 15.678, "predicted_per_second": 63.78364587319811}, "tps": 63.78364587319811}, {"id": "6675c9d9-401f-4aa7-b641-dc0d3a1eb93c", "answer": "ABD", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 147, "prompt_ms": 85.646, "prompt_per_token_ms": 0.582625850340136, "prompt_per_second": 1716.3673726735633, "predicted_n": 2, "predicted_ms": 25.898, "predicted_per_token_ms": 12.949, "predicted_per_second": 77.22604062089736}, "tps": 77.22604062089736}, {"id": "d5d19138-8e19-4469-9857-f06aa1afce44", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 171, "prompt_ms": 96.77, "prompt_per_token_ms": 0.565906432748538, "prompt_per_second": 1767.076573318177, "predicted_n": 2, "predicted_ms": 29.738, "predicted_per_token_ms": 14.869, "predicted_per_second": 67.25401842760105}, "tps": 67.25401842760105}, {"id": "5d35c8c8-0f4a-45f7-befe-ff27b9719a0e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 153, "prompt_ms": 97.901, "prompt_per_token_ms": 0.6398758169934641, "prompt_per_second": 1562.803240007763, "predicted_n": 2, "predicted_ms": 29.916, "predicted_per_token_ms": 14.958, "predicted_per_second": 66.85385746757588}, "tps": 66.85385746757588}, {"id": "51c5c6b4-dabe-40b0-add9-5e89375e3f61", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 110, "prompt_ms": 95.118, "prompt_per_token_ms": 0.8647090909090909, "prompt_per_second": 1156.4582939086188, "predicted_n": 2, "predicted_ms": 32.31, "predicted_per_token_ms": 16.155, "predicted_per_second": 61.90034045187248}, "tps": 61.90034045187248}, {"id": "3a6cf71b-2c6b-4729-89b6-ff2a7128b81a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["Vulnerability", "SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 96.451, "prompt_per_token_ms": 0.7197835820895522, "prompt_per_second": 1389.306487231859, "predicted_n": 2, "predicted_ms": 29.276, "predicted_per_token_ms": 14.638, "predicted_per_second": 68.31534362617845}, "tps": 68.31534362617845}, {"id": "c3d8fb38-79f9-4385-972d-6297c8a5d92a", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 76, "prompt_ms": 91.644, "prompt_per_token_ms": 1.205842105263158, "prompt_per_second": 829.2959713674653, "predicted_n": 2, "predicted_ms": 29.692, "predicted_per_token_ms": 14.846, "predicted_per_second": 67.35821096591674}, "tps": 67.35821096591674}, {"id": "fef4103e-ad79-41e3-af74-a717a0a7a625", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 112, "prompt_ms": 95.244, "prompt_per_token_ms": 0.8503928571428572, "prompt_per_second": 1175.9270925202636, "predicted_n": 2, "predicted_ms": 29.696, "predicted_per_token_ms": 14.848, "predicted_per_second": 67.34913793103448}, "tps": 67.34913793103448}, {"id": "888053ac-44db-4b44-bb34-b7fed5d3b56c", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 97.145, "prompt_per_token_ms": 0.60715625, "prompt_per_second": 1647.0224921509086, "predicted_n": 3, "predicted_ms": 59.212, "predicted_per_token_ms": 19.737333333333336, "predicted_per_second": 50.66540566101465}, "tps": 50.66540566101465}, {"id": "e1f1e0df-3c8f-4bf8-b3a7-0ddb613ac72a", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 96.642, "prompt_per_token_ms": 0.8403652173913043, "prompt_per_second": 1189.9588170774612, "predicted_n": 2, "predicted_ms": 28.824, "predicted_per_token_ms": 14.412, "predicted_per_second": 69.38662225922842}, "tps": 69.38662225922842}, {"id": "3ff1643f-630b-41b9-aa4d-7645f6cda437", "answer": "AD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 95, "prompt_ms": 91.996, "prompt_per_token_ms": 0.968378947368421, "prompt_per_second": 1032.653593634506, "predicted_n": 2, "predicted_ms": 28.99, "predicted_per_token_ms": 14.495, "predicted_per_second": 68.98930665746809}, "tps": 68.98930665746809}, {"id": "42476626-ba96-4e41-8474-18fb68fdb52b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 119, "prompt_ms": 83.227, "prompt_per_token_ms": 0.6993865546218487, "prompt_per_second": 1429.8244560058633, "predicted_n": 2, "predicted_ms": 27.687, "predicted_per_token_ms": 13.8435, "predicted_per_second": 72.23606746848701}, "tps": 72.23606746848701}, {"id": "f79c6c74-976c-41d2-b560-6be89fe6bbc0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 206, "prompt_ms": 103.027, "prompt_per_token_ms": 0.500131067961165, "prompt_per_second": 1999.4758655498074, "predicted_n": 2, "predicted_ms": 32.435, "predicted_per_token_ms": 16.2175, "predicted_per_second": 61.66178510867889}, "tps": 61.66178510867889}, {"id": "dcabb8e6-1568-4a4a-ba81-45e68dd35d33", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 182, "prompt_ms": 100.005, "prompt_per_token_ms": 0.549478021978022, "prompt_per_second": 1819.9090045497726, "predicted_n": 2, "predicted_ms": 30.802, "predicted_per_token_ms": 15.401, "predicted_per_second": 64.9308486461918}, "tps": 64.9308486461918}, {"id": "c01cc562-c94a-4e84-8387-94f5218ddd1a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 84, "prompt_ms": 92.518, "prompt_per_token_ms": 1.101404761904762, "prompt_per_second": 907.9314295596532, "predicted_n": 2, "predicted_ms": 31.677, "predicted_per_token_ms": 15.8385, "predicted_per_second": 63.13729204154434}, "tps": 63.13729204154434}, {"id": "d14e02fa-e3c8-438c-8bc8-d053d5aec13d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 96.756, "prompt_per_token_ms": 0.750046511627907, "prompt_per_second": 1333.250651122411, "predicted_n": 2, "predicted_ms": 29.662, "predicted_per_token_ms": 14.831, "predicted_per_second": 67.42633672712562}, "tps": 67.42633672712562}, {"id": "fa543433-0721-40a8-83a7-7061fa3ac395", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 138, "prompt_ms": 97.68, "prompt_per_token_ms": 0.7078260869565218, "prompt_per_second": 1412.7764127764128, "predicted_n": 3, "predicted_ms": 59.12, "predicted_per_token_ms": 19.706666666666667, "predicted_per_second": 50.74424898511502}, "tps": 50.74424898511502}, {"id": "eb341719-d15e-411b-b2f5-b0a7e33df9e8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 96.066, "prompt_per_token_ms": 0.750515625, "prompt_per_second": 1332.4172964420295, "predicted_n": 2, "predicted_ms": 30.929, "predicted_per_token_ms": 15.4645, "predicted_per_second": 64.66423098063306}, "tps": 64.66423098063306}, {"id": "64080609-9908-472f-8d17-6aa0f3a53d32", "answer": "BC", "llm_answer": "ABC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 151, "prompt_ms": 98.22, "prompt_per_token_ms": 0.6504635761589403, "prompt_per_second": 1537.3650987578906, "predicted_n": 2, "predicted_ms": 29.559, "predicted_per_token_ms": 14.7795, "predicted_per_second": 67.66128759430292}, "tps": 67.66128759430292}, {"id": "120a71f2-085c-42c4-b686-25b6323a1fdf", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 168, "prompt_ms": 101.351, "prompt_per_token_ms": 0.6032797619047618, "prompt_per_second": 1657.6057463665875, "predicted_n": 2, "predicted_ms": 29.454, "predicted_per_token_ms": 14.727, "predicted_per_second": 67.90249202145719}, "tps": 67.90249202145719}, {"id": "5f3e21ec-7259-43bb-af52-67272127991c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 138, "prompt_ms": 96.058, "prompt_per_token_ms": 0.696072463768116, "prompt_per_second": 1436.6320348123008, "predicted_n": 2, "predicted_ms": 26.423, "predicted_per_token_ms": 13.2115, "predicted_per_second": 75.69163229005034}, "tps": 75.69163229005034}, {"id": "b2033070-1ef1-444e-bace-f266167ba0d4", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 158, "prompt_ms": 85.012, "prompt_per_token_ms": 0.5380506329113924, "prompt_per_second": 1858.5611443090386, "predicted_n": 4, "predicted_ms": 86.959, "predicted_per_token_ms": 21.73975, "predicted_per_second": 45.99868903736243}, "tps": 45.99868903736243}, {"id": "d0a2e674-3839-4199-85ac-b915ec3268f8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 95.436, "prompt_per_token_ms": 0.7759024390243903, "prompt_per_second": 1288.8218282409152, "predicted_n": 2, "predicted_ms": 29.618, "predicted_per_token_ms": 14.809, "predicted_per_second": 67.52650415288001}, "tps": 67.52650415288001}, {"id": "6300218d-bd1a-4019-9fbe-c169685dcabc", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 149, "prompt_ms": 97.801, "prompt_per_token_ms": 0.6563825503355705, "prompt_per_second": 1523.5018046850234, "predicted_n": 2, "predicted_ms": 31.761, "predicted_per_token_ms": 15.8805, "predicted_per_second": 62.97030949907119}, "tps": 62.97030949907119}, {"id": "04b35039-4f12-4c67-80ee-ad9f071d49a2", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 135, "prompt_ms": 97.541, "prompt_per_token_ms": 0.7225259259259259, "prompt_per_second": 1384.033380834726, "predicted_n": 2, "predicted_ms": 30.974, "predicted_per_token_ms": 15.487, "predicted_per_second": 64.57028475495576}, "tps": 64.57028475495576}, {"id": "88ab3210-5124-418c-bfd3-8ab1750a8b00", "answer": "AB", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 97.712, "prompt_per_token_ms": 0.638640522875817, "prompt_per_second": 1565.8261011953496, "predicted_n": 2, "predicted_ms": 29.619, "predicted_per_token_ms": 14.8095, "predicted_per_second": 67.52422431547318}, "tps": 67.52422431547318}, {"id": "faa04771-a27a-42a6-9926-3dde6b68ccc5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 101, "prompt_ms": 96.171, "prompt_per_token_ms": 0.9521881188118813, "prompt_per_second": 1050.2126420646555, "predicted_n": 2, "predicted_ms": 30.869, "predicted_per_token_ms": 15.4345, "predicted_per_second": 64.78991868865205}, "tps": 64.78991868865205}, {"id": "9aa54e61-d1cc-4f22-a29d-3c9fbc47bb3d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 97.44, "prompt_per_token_ms": 0.6719999999999999, "prompt_per_second": 1488.095238095238, "predicted_n": 2, "predicted_ms": 29.371, "predicted_per_token_ms": 14.6855, "predicted_per_second": 68.09437880902932}, "tps": 68.09437880902932}, {"id": "b2b5d010-099c-47b4-9989-465e924b88eb", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 64, "prompt_n": 120, "prompt_ms": 95.196, "prompt_per_token_ms": 0.7933, "prompt_per_second": 1260.5571662674902, "predicted_n": 2, "predicted_ms": 29.671, "predicted_per_token_ms": 14.8355, "predicted_per_second": 67.4058845337198}, "tps": 67.4058845337198}, {"id": "11f5e2c7-9f7e-4f4f-954e-e31394edf430", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 123, "prompt_ms": 95.943, "prompt_per_token_ms": 0.7800243902439025, "prompt_per_second": 1282.0111941465245, "predicted_n": 2, "predicted_ms": 30.389, "predicted_per_token_ms": 15.1945, "predicted_per_second": 65.81328770278719}, "tps": 65.81328770278719}, {"id": "ea6c2263-f528-4ed6-bef0-9a1469d5cb3a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 64, "prompt_n": 110, "prompt_ms": 85.296, "prompt_per_token_ms": 0.7754181818181819, "prompt_per_second": 1289.6267116863626, "predicted_n": 2, "predicted_ms": 27.553, "predicted_per_token_ms": 13.7765, "predicted_per_second": 72.5873770551301}, "tps": 72.5873770551301}, {"id": "c222982d-e375-446a-ae2c-d87525032db8", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 126, "prompt_ms": 90.092, "prompt_per_token_ms": 0.715015873015873, "prompt_per_second": 1398.5703503085733, "predicted_n": 2, "predicted_ms": 29.843, "predicted_per_token_ms": 14.9215, "predicted_per_second": 67.01739101296786}, "tps": 67.01739101296786}, {"id": "8eab60bc-b91c-4ab5-8437-9f0eb712e033", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 175, "prompt_ms": 99.28, "prompt_per_token_ms": 0.5673142857142858, "prompt_per_second": 1762.6913779210313, "predicted_n": 2, "predicted_ms": 29.132, "predicted_per_token_ms": 14.566, "predicted_per_second": 68.65302759851708}, "tps": 68.65302759851708}, {"id": "8145debe-9097-46f9-ac5b-bdbe3cd1b5dc", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 64, "prompt_n": 118, "prompt_ms": 95.157, "prompt_per_token_ms": 0.8064152542372881, "prompt_per_second": 1240.055907605326, "predicted_n": 2, "predicted_ms": 33.282, "predicted_per_token_ms": 16.641, "predicted_per_second": 60.09254251547384}, "tps": 60.09254251547384}, {"id": "03f29389-2ec1-4ec5-baa1-9ef989cf0519", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 155, "prompt_ms": 102.705, "prompt_per_token_ms": 0.6626129032258065, "prompt_per_second": 1509.1767684143908, "predicted_n": 3, "predicted_ms": 62.128, "predicted_per_token_ms": 20.709333333333333, "predicted_per_second": 48.28740664434716}, "tps": 48.28740664434716}, {"id": "4d489625-524f-4792-aaab-99678def4c34", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 98.319, "prompt_per_token_ms": 0.7282888888888889, "prompt_per_second": 1373.0815000152563, "predicted_n": 2, "predicted_ms": 28.799, "predicted_per_token_ms": 14.3995, "predicted_per_second": 69.44685579360394}, "tps": 69.44685579360394}, {"id": "852ab672-9833-423d-8d3e-eeca53370b62", "answer": "AC", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 148, "prompt_ms": 98.407, "prompt_per_token_ms": 0.6649121621621621, "prompt_per_second": 1503.9580517646102, "predicted_n": 6, "predicted_ms": 149.14, "predicted_per_token_ms": 24.856666666666666, "predicted_per_second": 40.23065575968889}, "tps": 40.23065575968889}, {"id": "d323abf1-f2bd-4231-bedc-6d8ba5643d00", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 89, "prompt_ms": 93.49, "prompt_per_token_ms": 1.0504494382022471, "prompt_per_second": 951.9734730987271, "predicted_n": 2, "predicted_ms": 30.314, "predicted_per_token_ms": 15.157, "predicted_per_second": 65.97611664577423}, "tps": 65.97611664577423}, {"id": "1bbeec3b-f19b-4b7b-9852-aea6ca536ade", "answer": "AD", "llm_answer": "D", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 99.231, "prompt_per_token_ms": 0.6163416149068323, "prompt_per_second": 1622.476846953069, "predicted_n": 2, "predicted_ms": 30.285, "predicted_per_token_ms": 15.1425, "predicted_per_second": 66.03929337956083}, "tps": 66.03929337956083}, {"id": "58702a61-fae9-4209-8ff0-603f0945ec67", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 88.793, "prompt_per_token_ms": 0.662634328358209, "prompt_per_second": 1509.1279717995787, "predicted_n": 3, "predicted_ms": 58.523, "predicted_per_token_ms": 19.50766666666667, "predicted_per_second": 51.26189703193616}, "tps": 51.26189703193616}, {"id": "bde79bf0-870a-48a9-a4e5-6457d986d4b7", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 151, "prompt_ms": 97.124, "prompt_per_token_ms": 0.643205298013245, "prompt_per_second": 1554.713562044397, "predicted_n": 3, "predicted_ms": 63.819, "predicted_per_token_ms": 21.273, "predicted_per_second": 47.007944342593895}, "tps": 47.007944342593895}, {"id": "960d2171-2588-40f7-80c3-e028a4d41087", "answer": "BCD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 99.957, "prompt_per_token_ms": 0.7870629921259842, "prompt_per_second": 1270.5463349240174, "predicted_n": 2, "predicted_ms": 29.622, "predicted_per_token_ms": 14.811, "predicted_per_second": 67.51738572682466}, "tps": 67.51738572682466}, {"id": "834ef7e9-7b4a-4287-9ece-d7966508e3ab", "answer": "CD", "llm_answer": "ACD", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 153, "prompt_ms": 98.784, "prompt_per_token_ms": 0.6456470588235295, "prompt_per_second": 1548.8338192419824, "predicted_n": 3, "predicted_ms": 58.292, "predicted_per_token_ms": 19.430666666666667, "predicted_per_second": 51.46503808412818}, "tps": 51.46503808412818}, {"id": "8e1e1847-7502-4855-8914-4c2025002648", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 164, "prompt_ms": 99.814, "prompt_per_token_ms": 0.6086219512195121, "prompt_per_second": 1643.0560843168294, "predicted_n": 3, "predicted_ms": 58.244, "predicted_per_token_ms": 19.414666666666665, "predicted_per_second": 51.50745141130417}, "tps": 51.50745141130417}, {"id": "9a49546c-15fd-4bc8-8ca4-b064790db03f", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 139, "prompt_ms": 98.038, "prompt_per_token_ms": 0.7053093525179855, "prompt_per_second": 1417.8175809380036, "predicted_n": 3, "predicted_ms": 59.225, "predicted_per_token_ms": 19.741666666666667, "predicted_per_second": 50.65428450823133}, "tps": 50.65428450823133}, {"id": "5e354441-18db-4a70-a240-4100cf14e442", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 157, "prompt_ms": 99.355, "prompt_per_token_ms": 0.6328343949044586, "prompt_per_second": 1580.1922399476625, "predicted_n": 2, "predicted_ms": 29.567, "predicted_per_token_ms": 14.7835, "predicted_per_second": 67.64298034971421}, "tps": 67.64298034971421}, {"id": "efb33433-305d-4fb9-954a-cee99390c34e", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 102.879, "prompt_per_token_ms": 0.6160419161676647, "prompt_per_second": 1623.2661670506127, "predicted_n": 2, "predicted_ms": 31.104, "predicted_per_token_ms": 15.552, "predicted_per_second": 64.30041152263375}, "tps": 64.30041152263375}, {"id": "88e60fd9-6509-416e-b7a7-7f016f201247", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 96.601, "prompt_per_token_ms": 0.8548761061946902, "prompt_per_second": 1169.7601474104824, "predicted_n": 2, "predicted_ms": 26.155, "predicted_per_token_ms": 13.0775, "predicted_per_second": 76.46721468170522}, "tps": 76.46721468170522}, {"id": "c39cb5d8-4324-4f91-9b39-2e9e14ffe55f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 117, "prompt_ms": 87.527, "prompt_per_token_ms": 0.7480940170940171, "prompt_per_second": 1336.7303803397808, "predicted_n": 2, "predicted_ms": 31.547, "predicted_per_token_ms": 15.7735, "predicted_per_second": 63.3974704409294}, "tps": 63.3974704409294}, {"id": "20b50c06-ba13-4a63-ba80-f9c030d4e4d4", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 92.522, "prompt_per_token_ms": 0.92522, "prompt_per_second": 1080.8240202330255, "predicted_n": 2, "predicted_ms": 29.221, "predicted_per_token_ms": 14.6105, "predicted_per_second": 68.44392731254919}, "tps": 68.44392731254919}, {"id": "cd5df9f5-0032-4e68-a37d-6673cca674a8", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 99.155, "prompt_per_token_ms": 0.6046036585365854, "prompt_per_second": 1653.9760980283395, "predicted_n": 2, "predicted_ms": 31.081, "predicted_per_token_ms": 15.5405, "predicted_per_second": 64.34799395128857}, "tps": 64.34799395128857}, {"id": "16e4acc0-62b2-46d2-9ec1-925eaff61d8c", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 96.682, "prompt_per_token_ms": 0.8480877192982457, "prompt_per_second": 1179.1233114747317, "predicted_n": 2, "predicted_ms": 30.405, "predicted_per_token_ms": 15.2025, "predicted_per_second": 65.7786548265088}, "tps": 65.7786548265088}, {"id": "6e446d8b-19b8-4030-a6bb-2d00aeeeaf3f", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 57, "prompt_ms": 89.89, "prompt_per_token_ms": 1.5770175438596492, "prompt_per_second": 634.1083546556903, "predicted_n": 2, "predicted_ms": 30.506, "predicted_per_token_ms": 15.253, "predicted_per_second": 65.56087327083196}, "tps": 65.56087327083196}, {"id": "1d3d229d-6100-4b47-9aea-a3e808908c7b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 122, "prompt_ms": 96.764, "prompt_per_token_ms": 0.7931475409836065, "prompt_per_second": 1260.799470877599, "predicted_n": 2, "predicted_ms": 34.107, "predicted_per_token_ms": 17.0535, "predicted_per_second": 58.63898906382854}, "tps": 58.63898906382854}, {"id": "59ececd8-32e5-45f8-b130-7eb835554413", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 100.031, "prompt_per_token_ms": 0.6099451219512195, "prompt_per_second": 1639.4917575551576, "predicted_n": 2, "predicted_ms": 30.352, "predicted_per_token_ms": 15.176, "predicted_per_second": 65.89351607801792}, "tps": 65.89351607801792}, {"id": "88cb084a-a296-41a0-a956-01543b8dd03e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 127, "prompt_ms": 96.197, "prompt_per_token_ms": 0.7574566929133858, "prompt_per_second": 1320.207490878094, "predicted_n": 2, "predicted_ms": 28.661, "predicted_per_token_ms": 14.3305, "predicted_per_second": 69.78123582568647}, "tps": 69.78123582568647}, {"id": "f329cb7e-141c-4f4d-ac9c-3383c244c1cd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 96.63, "prompt_per_token_ms": 0.8330172413793103, "prompt_per_second": 1200.4553451309116, "predicted_n": 2, "predicted_ms": 28.886, "predicted_per_token_ms": 14.443, "predicted_per_second": 69.23769300006924}, "tps": 69.23769300006924}, {"id": "0734d518-04ba-4615-a50b-cb7478088a26", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 104, "prompt_ms": 93.347, "prompt_per_token_ms": 0.8975673076923076, "prompt_per_second": 1114.122574908674, "predicted_n": 2, "predicted_ms": 26.794, "predicted_per_token_ms": 13.397, "predicted_per_second": 74.64357692020602}, "tps": 74.64357692020602}, {"id": "a39601c2-5bce-40e4-8650-f00f6ebf4a33", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 103, "prompt_ms": 86.153, "prompt_per_token_ms": 0.8364368932038836, "prompt_per_second": 1195.5474562696597, "predicted_n": 2, "predicted_ms": 28.111, "predicted_per_token_ms": 14.0555, "predicted_per_second": 71.14652627085482}, "tps": 71.14652627085482}, {"id": "c21d2d77-7c3c-41a0-906a-d4d077039154", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 96.474, "prompt_per_token_ms": 0.8107058823529412, "prompt_per_second": 1233.4929618342767, "predicted_n": 2, "predicted_ms": 28.676, "predicted_per_token_ms": 14.338, "predicted_per_second": 69.74473427256243}, "tps": 69.74473427256243}, {"id": "1542ecec-bc85-4ecc-859c-1cd92eb04d0c", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 98.923, "prompt_per_token_ms": 0.701581560283688, "prompt_per_second": 1425.3510305995571, "predicted_n": 3, "predicted_ms": 59.016, "predicted_per_token_ms": 19.672, "predicted_per_second": 50.8336722244815}, "tps": 50.8336722244815}, {"id": "912204d4-c78e-4c5d-b6ee-c5b9e9ba4c3f", "answer": "AD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 99.134, "prompt_per_token_ms": 0.7343259259259259, "prompt_per_second": 1361.7931284927472, "predicted_n": 2, "predicted_ms": 29.92, "predicted_per_token_ms": 14.96, "predicted_per_second": 66.84491978609626}, "tps": 66.84491978609626}, {"id": "f308bc1a-dbd3-4d9d-9af3-d2aba109e067", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 95.304, "prompt_per_token_ms": 0.8287304347826087, "prompt_per_second": 1206.6649878284227, "predicted_n": 2, "predicted_ms": 27.891, "predicted_per_token_ms": 13.9455, "predicted_per_second": 71.70771933598652}, "tps": 71.70771933598652}, {"id": "c4df3074-6870-440b-b4fe-196d5ea302bb", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 114, "prompt_ms": 95.763, "prompt_per_token_ms": 0.8400263157894737, "prompt_per_second": 1190.4388960245606, "predicted_n": 2, "predicted_ms": 29.004, "predicted_per_token_ms": 14.502, "predicted_per_second": 68.95600606812853}, "tps": 68.95600606812853}, {"id": "99ad21b1-5ff5-4c21-bdb5-b45b047f3873", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 95, "prompt_ms": 92.44, "prompt_per_token_ms": 0.9730526315789474, "prompt_per_second": 1027.6936391172653, "predicted_n": 2, "predicted_ms": 29.754, "predicted_per_token_ms": 14.877, "predicted_per_second": 67.2178530617732}, "tps": 67.2178530617732}, {"id": "76c90f79-6f59-4df9-836c-ea2c942d5bda", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 90, "prompt_ms": 92.215, "prompt_per_token_ms": 1.0246111111111111, "prompt_per_second": 975.9800466301577, "predicted_n": 2, "predicted_ms": 29.674, "predicted_per_token_ms": 14.837, "predicted_per_second": 67.39906989283548}, "tps": 67.39906989283548}, {"id": "6b18e8e2-e7af-428c-a337-23a038898b7a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 94, "prompt_ms": 93.178, "prompt_per_token_ms": 0.9912553191489362, "prompt_per_second": 1008.8218248942885, "predicted_n": 2, "predicted_ms": 28.865, "predicted_per_token_ms": 14.4325, "predicted_per_second": 69.28806513078122}, "tps": 69.28806513078122}, {"id": "917a3891-25ad-4f06-91d5-a5d6c0ef6836", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 89, "prompt_ms": 91.841, "prompt_per_token_ms": 1.0319213483146066, "prompt_per_second": 969.0661033743099, "predicted_n": 2, "predicted_ms": 26.281, "predicted_per_token_ms": 13.1405, "predicted_per_second": 76.10060499980975}, "tps": 76.10060499980975}, {"id": "97118608-b5b2-4270-a900-3622783e4f58", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 151, "prompt_ms": 89.451, "prompt_per_token_ms": 0.5923907284768212, "prompt_per_second": 1688.0750354942932, "predicted_n": 64, "predicted_ms": 1854.341, "predicted_per_token_ms": 28.974078125, "predicted_per_second": 34.51360887776305}, "tps": 34.51360887776305}, {"id": "ceccd25d-392a-4bd3-a6f0-7bd78dc19f9c", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 108, "prompt_ms": 93.843, "prompt_per_token_ms": 0.8689166666666667, "prompt_per_second": 1150.8583485182698, "predicted_n": 3, "predicted_ms": 60.649, "predicted_per_token_ms": 20.216333333333335, "predicted_per_second": 49.46495408003429}, "tps": 49.46495408003429}, {"id": "3fd74316-dfd9-403f-9da3-428b49b4e4dc", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 97.24, "prompt_per_token_ms": 0.6945714285714285, "prompt_per_second": 1439.736733854381, "predicted_n": 64, "predicted_ms": 1840.75, "predicted_per_token_ms": 28.76171875, "predicted_per_second": 34.76843677848703}, "tps": 34.76843677848703}, {"id": "3b49f4f2-856b-4228-9741-5185e158502a", "answer": "AD", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 95.389, "prompt_per_token_ms": 0.8015882352941176, "prompt_per_second": 1247.5232993322081, "predicted_n": 3, "predicted_ms": 57.706, "predicted_per_token_ms": 19.235333333333333, "predicted_per_second": 51.98766159498146}, "tps": 51.98766159498146}, {"id": "daa69515-79d7-44bb-aceb-91a3d7a403de", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 103.163, "prompt_per_token_ms": 0.7316524822695035, "prompt_per_second": 1366.7690935703693, "predicted_n": 2, "predicted_ms": 29.841, "predicted_per_token_ms": 14.9205, "predicted_per_second": 67.02188264468349}, "tps": 67.02188264468349}, {"id": "0dadf73b-6454-4174-8018-db4add486894", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 95.253, "prompt_per_token_ms": 0.95253, "prompt_per_second": 1049.8357007128384, "predicted_n": 2, "predicted_ms": 31.297, "predicted_per_token_ms": 15.6485, "predicted_per_second": 63.903888551618365}, "tps": 63.903888551618365}, {"id": "aeb79555-d153-4573-96ff-667f50608578", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 197, "prompt_ms": 101.353, "prompt_per_token_ms": 0.5144822335025381, "prompt_per_second": 1943.7017157854232, "predicted_n": 2, "predicted_ms": 29.91, "predicted_per_token_ms": 14.955, "predicted_per_second": 66.86726847208291}, "tps": 66.86726847208291}, {"id": "86ce528a-ba6d-4f03-8ede-e790e260f811", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 134, "prompt_ms": 97.203, "prompt_per_token_ms": 0.7253955223880597, "prompt_per_second": 1378.5582749503615, "predicted_n": 2, "predicted_ms": 31.476, "predicted_per_token_ms": 15.738, "predicted_per_second": 63.54047528275512}, "tps": 63.54047528275512}, {"id": "cca6002a-f0e7-43a3-bda8-f1a18d0ea40f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 121, "prompt_ms": 96.798, "prompt_per_token_ms": 0.7999834710743802, "prompt_per_second": 1250.0258269798962, "predicted_n": 2, "predicted_ms": 31.173, "predicted_per_token_ms": 15.5865, "predicted_per_second": 64.158085522728}, "tps": 64.158085522728}, {"id": "3c45dc3d-fda7-498e-a5f7-a5c3e478c392", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 99, "prompt_ms": 94.1, "prompt_per_token_ms": 0.9505050505050504, "prompt_per_second": 1052.0722635494155, "predicted_n": 2, "predicted_ms": 29.822, "predicted_per_token_ms": 14.911, "predicted_per_second": 67.06458319361545}, "tps": 67.06458319361545}, {"id": "d95bc58f-fd64-4dd5-aa8f-1369424b9c42", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 104.925, "prompt_per_token_ms": 0.5961647727272728, "prompt_per_second": 1677.3886109125565, "predicted_n": 2, "predicted_ms": 31.602, "predicted_per_token_ms": 15.801, "predicted_per_second": 63.28713372571356}, "tps": 63.28713372571356}, {"id": "21ffc946-de7a-4bde-9cde-e451dcc07df2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 64, "prompt_n": 84, "prompt_ms": 94.416, "prompt_per_token_ms": 1.1239999999999999, "prompt_per_second": 889.6797153024911, "predicted_n": 2, "predicted_ms": 27.567, "predicted_per_token_ms": 13.7835, "predicted_per_second": 72.55051329488155}, "tps": 72.55051329488155}, {"id": "6e4d7c7d-8e9e-4880-901f-5c2a287587c2", "answer": "AD", "llm_answer": "ABD", "score": 0, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 85.08, "prompt_per_token_ms": 0.6699212598425197, "prompt_per_second": 1492.7127409496943, "predicted_n": 64, "predicted_ms": 1838.84, "predicted_per_token_ms": 28.731875, "predicted_per_second": 34.80455069500337}, "tps": 34.80455069500337}, {"id": "b7232303-15bb-4c96-99a4-a480e28da950", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 104, "prompt_ms": 94.799, "prompt_per_token_ms": 0.9115288461538462, "prompt_per_second": 1097.0579858437325, "predicted_n": 2, "predicted_ms": 28.465, "predicted_per_token_ms": 14.2325, "predicted_per_second": 70.26172492534691}, "tps": 70.26172492534691}, {"id": "11b49f59-bfdc-4a2f-9bc0-cb7443befc50", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 104, "prompt_ms": 96.819, "prompt_per_token_ms": 0.9309519230769231, "prompt_per_second": 1074.1693262686042, "predicted_n": 2, "predicted_ms": 27.681, "predicted_per_token_ms": 13.8405, "predicted_per_second": 72.2517250099346}, "tps": 72.2517250099346}, {"id": "d7e421dd-d2a9-4eef-bafa-a1f34281eb98", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 97.592, "prompt_per_token_ms": 0.6921418439716311, "prompt_per_second": 1444.7905566030004, "predicted_n": 2, "predicted_ms": 28.184, "predicted_per_token_ms": 14.092, "predicted_per_second": 70.9622480840193}, "tps": 70.9622480840193}, {"id": "71db5057-185d-4801-960a-c23ca4537e1e", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["MemorySafety", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 124, "prompt_ms": 97.569, "prompt_per_token_ms": 0.7868467741935484, "prompt_per_second": 1270.8954688476872, "predicted_n": 2, "predicted_ms": 30.381, "predicted_per_token_ms": 15.1905, "predicted_per_second": 65.83061782034824}, "tps": 65.83061782034824}, {"id": "11ca3da8-2123-4329-9f4c-bbc511d41f07", "answer": "B", "llm_answer": "C", "score": 0, "topics": [], "timings": {"cache_n": 60, "prompt_n": 102, "prompt_ms": 93.738, "prompt_per_token_ms": 0.919, "prompt_per_second": 1088.139281828074, "predicted_n": 2, "predicted_ms": 29.305, "predicted_per_token_ms": 14.6525, "predicted_per_second": 68.2477392936359}, "tps": 68.2477392936359}, {"id": "f5baf9a6-4f83-4552-ac2f-959c6f725e3b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 90, "prompt_ms": 86.047, "prompt_per_token_ms": 0.9560777777777777, "prompt_per_second": 1045.9400095296758, "predicted_n": 2, "predicted_ms": 26.785, "predicted_per_token_ms": 13.3925, "predicted_per_second": 74.66865783087549}, "tps": 74.66865783087549}, {"id": "0b222fe7-3fd1-4f16-9169-d6635818e6cb", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 103, "prompt_ms": 91.44, "prompt_per_token_ms": 0.8877669902912622, "prompt_per_second": 1126.4216972878392, "predicted_n": 3, "predicted_ms": 59.99, "predicted_per_token_ms": 19.996666666666666, "predicted_per_second": 50.00833472245374}, "tps": 50.00833472245374}, {"id": "66c817eb-7fb2-4169-9904-54c0b9990403", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 99.568, "prompt_per_token_ms": 0.5926666666666667, "prompt_per_second": 1687.2890888638922, "predicted_n": 2, "predicted_ms": 29.833, "predicted_per_token_ms": 14.9165, "predicted_per_second": 67.03985519391279}, "tps": 67.03985519391279}, {"id": "f700d18a-3d8d-4f3e-bd20-afa6eebfb5fe", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 144, "prompt_ms": 98.176, "prompt_per_token_ms": 0.6817777777777778, "prompt_per_second": 1466.753585397653, "predicted_n": 3, "predicted_ms": 60.002, "predicted_per_token_ms": 20.000666666666667, "predicted_per_second": 49.99833338888703}, "tps": 49.99833338888703}, {"id": "4f260853-18a9-4add-b1df-66c2859d9b9e", "answer": "C", "llm_answer": "AC", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 112, "prompt_ms": 95.342, "prompt_per_token_ms": 0.8512678571428571, "prompt_per_second": 1174.7183822449706, "predicted_n": 2, "predicted_ms": 30.815, "predicted_per_token_ms": 15.4075, "predicted_per_second": 64.9034561090378}, "tps": 64.9034561090378}, {"id": "9c126e8f-4ee4-4252-a875-7e23ee899498", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 98.06, "prompt_per_token_ms": 0.6716438356164384, "prompt_per_second": 1488.8843565164186, "predicted_n": 2, "predicted_ms": 29.668, "predicted_per_token_ms": 14.834, "predicted_per_second": 67.41270055278414}, "tps": 67.41270055278414}, {"id": "2a26b7ae-f0d1-43ea-986a-3bf39e60f8fe", "answer": "AC", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 138, "prompt_ms": 98.241, "prompt_per_token_ms": 0.7118913043478261, "prompt_per_second": 1404.7088282896143, "predicted_n": 3, "predicted_ms": 59.784, "predicted_per_token_ms": 19.928, "predicted_per_second": 50.18065034122842}, "tps": 50.18065034122842}, {"id": "a2c51ca4-5b22-471e-99e9-ea6351fa14e3", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 199, "prompt_ms": 102.006, "prompt_per_token_ms": 0.5125929648241206, "prompt_per_second": 1950.865635354783, "predicted_n": 2, "predicted_ms": 31.208, "predicted_per_token_ms": 15.604, "predicted_per_second": 64.0861317610869}, "tps": 64.0861317610869}, {"id": "dfdc11fe-81eb-4656-b376-a896e703e41b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 98.567, "prompt_per_token_ms": 0.7355746268656715, "prompt_per_second": 1359.4813680034902, "predicted_n": 2, "predicted_ms": 29.492, "predicted_per_token_ms": 14.746, "predicted_per_second": 67.81500067815}, "tps": 67.81500067815}, {"id": "0e8577ca-0792-4d35-9be0-46042162b2fc", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 89, "prompt_ms": 86.859, "prompt_per_token_ms": 0.975943820224719, "prompt_per_second": 1024.649144015013, "predicted_n": 2, "predicted_ms": 26.88, "predicted_per_token_ms": 13.44, "predicted_per_second": 74.40476190476191}, "tps": 74.40476190476191}, {"id": "c547da8e-a6b0-44cd-b5cb-07beda081073", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 94, "prompt_ms": 84.112, "prompt_per_token_ms": 0.8948085106382978, "prompt_per_second": 1117.5575423245198, "predicted_n": 2, "predicted_ms": 31.354, "predicted_per_token_ms": 15.677, "predicted_per_second": 63.78771448618996}, "tps": 63.78771448618996}, {"id": "8f4de3bc-729b-4a6f-929c-f5b9fd23bb9a", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 96.706, "prompt_per_token_ms": 0.6858581560283689, "prompt_per_second": 1458.0274233243026, "predicted_n": 2, "predicted_ms": 31.173, "predicted_per_token_ms": 15.5865, "predicted_per_second": 64.158085522728}, "tps": 64.158085522728}, {"id": "43bbe213-bae2-400e-ac1a-a312c848e03c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 256, "prompt_ms": 104.746, "prompt_per_token_ms": 0.4091640625, "prompt_per_second": 2444.007408397457, "predicted_n": 2, "predicted_ms": 30.431, "predicted_per_token_ms": 15.2155, "predicted_per_second": 65.72245407643521}, "tps": 65.72245407643521}, {"id": "a4c7a570-219f-472e-a3c0-dbfe36975388", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 171, "prompt_ms": 100.274, "prompt_per_token_ms": 0.5863976608187135, "prompt_per_second": 1705.32740291601, "predicted_n": 2, "predicted_ms": 29.472, "predicted_per_token_ms": 14.736, "predicted_per_second": 67.86102062975027}, "tps": 67.86102062975027}, {"id": "5c3f5e40-ad06-4687-a343-af02b91e8ddf", "answer": "CD", "llm_answer": "CD", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 159, "prompt_ms": 98.701, "prompt_per_token_ms": 0.6207610062893081, "prompt_per_second": 1610.92592780215, "predicted_n": 4, "predicted_ms": 87.824, "predicted_per_token_ms": 21.956, "predicted_per_second": 45.54563672800146}, "tps": 45.54563672800146}, {"id": "f89c3dfe-49c6-4109-bb00-a9740459e10b", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 180, "prompt_ms": 100.999, "prompt_per_token_ms": 0.5611055555555555, "prompt_per_second": 1782.1958633253796, "predicted_n": 2, "predicted_ms": 29.838, "predicted_per_token_ms": 14.919, "predicted_per_second": 67.02862122126147}, "tps": 67.02862122126147}, {"id": "5a1f53b4-4828-4f77-aee3-6310ceb717b2", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 96.776, "prompt_per_token_ms": 0.6674206896551724, "prompt_per_second": 1498.3053649665208, "predicted_n": 2, "predicted_ms": 29.523, "predicted_per_token_ms": 14.7615, "predicted_per_second": 67.74379297496867}, "tps": 67.74379297496867}, {"id": "9438b59d-8596-4948-a8ff-0a0bb85ec3f5", "answer": "AC", "llm_answer": "AB", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 103, "prompt_ms": 94.965, "prompt_per_token_ms": 0.9219902912621359, "prompt_per_second": 1084.610119517717, "predicted_n": 3, "predicted_ms": 59.576, "predicted_per_token_ms": 19.858666666666668, "predicted_per_second": 50.35584799248019}, "tps": 50.35584799248019}, {"id": "a530034d-1b3f-455d-be95-6766e9a44a77", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 91.759, "prompt_per_token_ms": 0.9761595744680851, "prompt_per_second": 1024.4226724354014, "predicted_n": 2, "predicted_ms": 25.816, "predicted_per_token_ms": 12.908, "predicted_per_second": 77.47133560582584}, "tps": 77.47133560582584}, {"id": "eb10327b-3e59-47ff-8f22-6bee7812e464", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 109, "prompt_ms": 80.187, "prompt_per_token_ms": 0.7356605504587156, "prompt_per_second": 1359.3225834611596, "predicted_n": 2, "predicted_ms": 28.03, "predicted_per_token_ms": 14.015, "predicted_per_second": 71.35212272565109}, "tps": 71.35212272565109}, {"id": "fbe07b19-1115-46f5-ba8a-5c1e942e301d", "answer": "ABC", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 157, "prompt_ms": 97.518, "prompt_per_token_ms": 0.6211337579617835, "prompt_per_second": 1609.9591870218833, "predicted_n": 2, "predicted_ms": 29.095, "predicted_per_token_ms": 14.5475, "predicted_per_second": 68.74033339061695}, "tps": 68.74033339061695}, {"id": "244df2cc-e22f-4395-a2c2-bbae1b9c9539", "answer": "ABC", "llm_answer": "AC", "score": 0, "topics": ["SoftwareSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 175, "prompt_ms": 100.524, "prompt_per_token_ms": 0.5744228571428571, "prompt_per_second": 1740.87780032629, "predicted_n": 2, "predicted_ms": 29.224, "predicted_per_token_ms": 14.612, "predicted_per_second": 68.4369011771147}, "tps": 68.4369011771147}, {"id": "bb73d512-538a-47f4-8be7-d2b431e7f1d3", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 100.204, "prompt_per_token_ms": 0.8146666666666667, "prompt_per_second": 1227.4959083469723, "predicted_n": 2, "predicted_ms": 29.761, "predicted_per_token_ms": 14.8805, "predicted_per_second": 67.20204294210544}, "tps": 67.20204294210544}, {"id": "6fb2594e-757c-4ac3-8547-4d123afcaf6c", "answer": "ABCD", "llm_answer": "ABCD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 98.345, "prompt_per_token_ms": 0.6925704225352113, "prompt_per_second": 1443.8964868574917, "predicted_n": 3, "predicted_ms": 59.027, "predicted_per_token_ms": 19.675666666666668, "predicted_per_second": 50.824199095329256}, "tps": 50.824199095329256}, {"id": "ee80948b-94d0-4f67-9c93-31f75fd97173", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 110, "prompt_ms": 95.397, "prompt_per_token_ms": 0.8672454545454545, "prompt_per_second": 1153.0760925395978, "predicted_n": 3, "predicted_ms": 60.728, "predicted_per_token_ms": 20.24266666666667, "predicted_per_second": 49.40060598076669}, "tps": 49.40060598076669}, {"id": "bfda1ac7-8ea2-419a-a993-8a559eb5cae3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 68, "prompt_n": 98, "prompt_ms": 96.119, "prompt_per_token_ms": 0.9808061224489796, "prompt_per_second": 1019.5694919838951, "predicted_n": 2, "predicted_ms": 30.06, "predicted_per_token_ms": 15.03, "predicted_per_second": 66.53359946773121}, "tps": 66.53359946773121}, {"id": "1157b6bf-e63d-4484-9fc8-3a5e6ec63f21", "answer": "AC", "llm_answer": "ABC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 189, "prompt_ms": 98.341, "prompt_per_token_ms": 0.5203227513227513, "prompt_per_second": 1921.8840564972902, "predicted_n": 3, "predicted_ms": 60.947, "predicted_per_token_ms": 20.31566666666667, "predicted_per_second": 49.22309547639752}, "tps": 49.22309547639752}, {"id": "2615a6e1-a0c7-463d-a741-f98418a139d0", "answer": "AB", "llm_answer": "A", "score": 0, "topics": [], "timings": {"cache_n": 62, "prompt_n": 116, "prompt_ms": 96.424, "prompt_per_token_ms": 0.8312413793103449, "prompt_per_second": 1203.019995021986, "predicted_n": 2, "predicted_ms": 30.809, "predicted_per_token_ms": 15.4045, "predicted_per_second": 64.91609594598981}, "tps": 64.91609594598981}, {"id": "8477d241-1cee-43b3-9faa-d42068a0eaad", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 107, "prompt_ms": 93.406, "prompt_per_token_ms": 0.8729532710280374, "prompt_per_second": 1145.5366892919085, "predicted_n": 2, "predicted_ms": 30.919, "predicted_per_token_ms": 15.4595, "predicted_per_second": 64.68514505643779}, "tps": 64.68514505643779}, {"id": "79087488-9744-4b73-ad85-87a37481c1da", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 112, "prompt_ms": 95.98, "prompt_per_token_ms": 0.8569642857142857, "prompt_per_second": 1166.9097728693478, "predicted_n": 2, "predicted_ms": 29.679, "predicted_per_token_ms": 14.8395, "predicted_per_second": 67.38771521951548}, "tps": 67.38771521951548}, {"id": "ef740d57-6703-4b92-b0ce-f386209e4a1c", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 104, "prompt_ms": 95.77, "prompt_per_token_ms": 0.9208653846153846, "prompt_per_second": 1085.9350527305003, "predicted_n": 2, "predicted_ms": 28.952, "predicted_per_token_ms": 14.476, "predicted_per_second": 69.07985631389886}, "tps": 69.07985631389886}, {"id": "4524d52f-d964-495d-a83c-10748028baff", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 153, "prompt_ms": 95.593, "prompt_per_token_ms": 0.6247908496732026, "prompt_per_second": 1600.535604071428, "predicted_n": 2, "predicted_ms": 29.615, "predicted_per_token_ms": 14.8075, "predicted_per_second": 67.53334458889077}, "tps": 67.53334458889077}, {"id": "cef987e0-803a-47a9-9c34-2d4c87dc5553", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Cryptography"], "timings": {"cache_n": 59, "prompt_n": 90, "prompt_ms": 95.105, "prompt_per_token_ms": 1.0567222222222223, "prompt_per_second": 946.3224856737289, "predicted_n": 2, "predicted_ms": 32.258, "predicted_per_token_ms": 16.129, "predicted_per_second": 62.00012400024799}, "tps": 62.00012400024799}, {"id": "fcb02501-c22d-47e8-803d-dde0b3bd6c88", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 156, "prompt_ms": 99.377, "prompt_per_token_ms": 0.6370320512820512, "prompt_per_second": 1569.7797277035936, "predicted_n": 2, "predicted_ms": 29.765, "predicted_per_token_ms": 14.8825, "predicted_per_second": 67.19301192675961}, "tps": 67.19301192675961}, {"id": "7f236906-85ab-46cd-88c8-9ab8908121cf", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 83.204, "prompt_per_token_ms": 0.8000384615384615, "prompt_per_second": 1249.9399067352533, "predicted_n": 2, "predicted_ms": 25.836, "predicted_per_token_ms": 12.918, "predicted_per_second": 77.41136398823348}, "tps": 77.41136398823348}, {"id": "16374e76-2440-46bd-be97-556210b15d83", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 95.282, "prompt_per_token_ms": 0.6709999999999999, "prompt_per_second": 1490.312965722802, "predicted_n": 2, "predicted_ms": 28.487, "predicted_per_token_ms": 14.2435, "predicted_per_second": 70.20746305332257}, "tps": 70.20746305332257}, {"id": "db12a597-7279-4c76-bad3-978864ed4444", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 94.542, "prompt_per_token_ms": 0.9090576923076923, "prompt_per_second": 1100.040193776311, "predicted_n": 2, "predicted_ms": 28.648, "predicted_per_token_ms": 14.324, "predicted_per_second": 69.8129014241832}, "tps": 69.8129014241832}, {"id": "fc586af6-6ebf-406a-a975-7483e10f0bf9", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 130, "prompt_ms": 96.779, "prompt_per_token_ms": 0.7444538461538461, "prompt_per_second": 1343.266617757985, "predicted_n": 2, "predicted_ms": 29.28, "predicted_per_token_ms": 14.64, "predicted_per_second": 68.30601092896174}, "tps": 68.30601092896174}, {"id": "28307634-cb9d-4021-8918-0fa8eaa52f80", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 82, "prompt_ms": 91.775, "prompt_per_token_ms": 1.119207317073171, "prompt_per_second": 893.4895123944428, "predicted_n": 2, "predicted_ms": 28.542, "predicted_per_token_ms": 14.271, "predicted_per_second": 70.07217433956976}, "tps": 70.07217433956976}, {"id": "064ed10c-1f35-4570-a304-eead6f164d2a", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 145, "prompt_ms": 97.028, "prompt_per_token_ms": 0.6691586206896553, "prompt_per_second": 1494.4139835923652, "predicted_n": 3, "predicted_ms": 59.765, "predicted_per_token_ms": 19.921666666666667, "predicted_per_second": 50.19660336317243}, "tps": 50.19660336317243}, {"id": "7d53d175-59d3-4002-b8ff-bdfcd5681167", "answer": "B", "llm_answer": "B", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 80, "prompt_ms": 94.152, "prompt_per_token_ms": 1.1769, "prompt_per_second": 849.689863199932, "predicted_n": 2, "predicted_ms": 29.19, "predicted_per_token_ms": 14.595, "predicted_per_second": 68.5166152792052}, "tps": 68.5166152792052}, {"id": "d0d4dcad-a18a-4485-a459-43f1b80ccfcf", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 164, "prompt_ms": 98.673, "prompt_per_token_ms": 0.6016646341463415, "prompt_per_second": 1662.0554761687595, "predicted_n": 4, "predicted_ms": 88.06, "predicted_per_token_ms": 22.015, "predicted_per_second": 45.42357483533954}, "tps": 45.42357483533954}, {"id": "7f261b67-2ddd-49c1-8b36-828d0b61c6d7", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 97.598, "prompt_per_token_ms": 0.659445945945946, "prompt_per_second": 1516.424516895838, "predicted_n": 2, "predicted_ms": 29.192, "predicted_per_token_ms": 14.596, "predicted_per_second": 68.51192107426692}, "tps": 68.51192107426692}, {"id": "c533748d-973b-4b11-856d-0aa44e424d8b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 93, "prompt_ms": 91.267, "prompt_per_token_ms": 0.9813655913978494, "prompt_per_second": 1018.9882432861823, "predicted_n": 2, "predicted_ms": 25.285, "predicted_per_token_ms": 12.6425, "predicted_per_second": 79.09827961241842}, "tps": 79.09827961241842}, {"id": "abec6922-8e29-4f9c-a9aa-cd6064468db9", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 150, "prompt_ms": 83.884, "prompt_per_token_ms": 0.5592266666666666, "prompt_per_second": 1788.1836822278385, "predicted_n": 2, "predicted_ms": 28.552, "predicted_per_token_ms": 14.276, "predicted_per_second": 70.04763239002521}, "tps": 70.04763239002521}, {"id": "7b9eb73a-ffb4-4414-807a-a8192c0584cf", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 169, "prompt_ms": 99.663, "prompt_per_token_ms": 0.5897218934911242, "prompt_per_second": 1695.7145580606646, "predicted_n": 3, "predicted_ms": 59.461, "predicted_per_token_ms": 19.820333333333334, "predicted_per_second": 50.45323825700879}, "tps": 50.45323825700879}, {"id": "9b25110d-6e6e-4ad1-b44b-7092437c2651", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 97.661, "prompt_per_token_ms": 0.8005, "prompt_per_second": 1249.2192379762648, "predicted_n": 2, "predicted_ms": 31.65, "predicted_per_token_ms": 15.825, "predicted_per_second": 63.191153238546605}, "tps": 63.191153238546605}, {"id": "03b0b265-c699-40e9-ba39-207fff598330", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 105, "prompt_ms": 98.06, "prompt_per_token_ms": 0.9339047619047619, "prompt_per_second": 1070.7729961248215, "predicted_n": 2, "predicted_ms": 31.531, "predicted_per_token_ms": 15.7655, "predicted_per_second": 63.4296406710856}, "tps": 63.4296406710856}, {"id": "d97c9d6a-e22a-4a77-9347-a657e4ede0a0", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 169, "prompt_ms": 104.779, "prompt_per_token_ms": 0.6199940828402367, "prompt_per_second": 1612.9186191889596, "predicted_n": 2, "predicted_ms": 34.253, "predicted_per_token_ms": 17.1265, "predicted_per_second": 58.389046214930076}, "tps": 58.389046214930076}, {"id": "231e2f9d-3dde-4a43-985e-e33d08986c39", "answer": "ABCD", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 156, "prompt_ms": 98.883, "prompt_per_token_ms": 0.6338653846153846, "prompt_per_second": 1577.622038166318, "predicted_n": 3, "predicted_ms": 60.491, "predicted_per_token_ms": 20.163666666666668, "predicted_per_second": 49.59415450232265}, "tps": 49.59415450232265}, {"id": "48c00a10-7ae4-4ca8-a962-cbd4a6705a4b", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 108, "prompt_ms": 94.829, "prompt_per_token_ms": 0.8780462962962963, "prompt_per_second": 1138.8921110630715, "predicted_n": 3, "predicted_ms": 60.337, "predicted_per_token_ms": 20.112333333333336, "predicted_per_second": 49.72073520393788}, "tps": 49.72073520393788}, {"id": "a9ba3663-99af-47c8-9de8-f27362dc8315", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 98.835, "prompt_per_token_ms": 0.667804054054054, "prompt_per_second": 1497.4452370111803, "predicted_n": 2, "predicted_ms": 29.238, "predicted_per_token_ms": 14.619, "predicted_per_second": 68.40413160954922}, "tps": 68.40413160954922}, {"id": "ef179e6b-59f1-41ac-94a5-e0c4b5d2d943", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 56, "prompt_ms": 91.152, "prompt_per_token_ms": 1.6277142857142857, "prompt_per_second": 614.3584342636476, "predicted_n": 2, "predicted_ms": 27.447, "predicted_per_token_ms": 13.7235, "predicted_per_second": 72.86770867490073}, "tps": 72.86770867490073}, {"id": "2a04dfa6-3120-4cf2-9eb8-5d12996c54cf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 110, "prompt_ms": 79.868, "prompt_per_token_ms": 0.7260727272727272, "prompt_per_second": 1377.2724996243803, "predicted_n": 2, "predicted_ms": 29.603, "predicted_per_token_ms": 14.8015, "predicted_per_second": 67.5607201972773}, "tps": 67.5607201972773}, {"id": "7c12a040-4846-4a23-bf74-606e536d5ff8", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 101.446, "prompt_per_token_ms": 0.7044861111111111, "prompt_per_second": 1419.4744001734914, "predicted_n": 8, "predicted_ms": 207.562, "predicted_per_token_ms": 25.94525, "predicted_per_second": 38.54270049431013}, "tps": 38.54270049431013}, {"id": "23530f33-06bd-45fc-bad8-17eb4b5fd833", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 109, "prompt_ms": 96.928, "prompt_per_token_ms": 0.8892477064220183, "prompt_per_second": 1124.5460548035655, "predicted_n": 2, "predicted_ms": 28.968, "predicted_per_token_ms": 14.484, "predicted_per_second": 69.04170118751726}, "tps": 69.04170118751726}, {"id": "7b2748bb-ba3b-4ae8-97ae-b05891bf42b8", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 177, "prompt_ms": 101.019, "prompt_per_token_ms": 0.570728813559322, "prompt_per_second": 1752.145635969471, "predicted_n": 3, "predicted_ms": 57.551, "predicted_per_token_ms": 19.183666666666667, "predicted_per_second": 52.12767805946031}, "tps": 52.12767805946031}, {"id": "2e3be9fd-5288-4c02-9f46-e8ef419ab0a3", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 97.902, "prompt_per_token_ms": 0.8587894736842105, "prompt_per_second": 1164.4297358583074, "predicted_n": 2, "predicted_ms": 29.01, "predicted_per_token_ms": 14.505, "predicted_per_second": 68.94174422612892}, "tps": 68.94174422612892}, {"id": "9d9e61a9-c660-4b2f-b060-52c69cd32589", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 136, "prompt_ms": 98.66, "prompt_per_token_ms": 0.7254411764705883, "prompt_per_second": 1378.4715183458343, "predicted_n": 2, "predicted_ms": 30.616, "predicted_per_token_ms": 15.308, "predicted_per_second": 65.32532009406846}, "tps": 65.32532009406846}, {"id": "e2ceeaf7-2137-43c6-85a9-db5a0f87e1f4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 179, "prompt_ms": 100.01, "prompt_per_token_ms": 0.5587150837988827, "prompt_per_second": 1789.8210178982101, "predicted_n": 2, "predicted_ms": 33.631, "predicted_per_token_ms": 16.8155, "predicted_per_second": 59.468942344860395}, "tps": 59.468942344860395}, {"id": "3d77a173-7d7e-47b9-9694-fc604c435b5e", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 96.154, "prompt_per_token_ms": 0.7692319999999999, "prompt_per_second": 1299.997920003328, "predicted_n": 2, "predicted_ms": 30.387, "predicted_per_token_ms": 15.1935, "predicted_per_second": 65.81761937670714}, "tps": 65.81761937670714}, {"id": "e248bc9c-8c65-46ab-ae4c-96ecacc896c9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 173, "prompt_ms": 89.244, "prompt_per_token_ms": 0.5158612716763006, "prompt_per_second": 1938.5056698489534, "predicted_n": 2, "predicted_ms": 26.052, "predicted_per_token_ms": 13.026, "predicted_per_second": 76.76953784738215}, "tps": 76.76953784738215}, {"id": "1a24c416-bf47-4ed8-9645-34f8037f9036", "answer": "BCD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 96.613, "prompt_per_token_ms": 0.6114746835443038, "prompt_per_second": 1635.3906824133396, "predicted_n": 2, "predicted_ms": 28.767, "predicted_per_token_ms": 14.3835, "predicted_per_second": 69.52410748427017}, "tps": 69.52410748427017}, {"id": "b5804d4c-e140-434d-9ad9-0346c6ba5db5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 97.287, "prompt_per_token_ms": 0.7049782608695653, "prompt_per_second": 1418.4834561657774, "predicted_n": 2, "predicted_ms": 29.311, "predicted_per_token_ms": 14.6555, "predicted_per_second": 68.23376889222476}, "tps": 68.23376889222476}, {"id": "3d29d707-fb0f-48f4-a2bf-bc72d6c4d03a", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 99.125, "prompt_per_token_ms": 0.7030141843971631, "prompt_per_second": 1422.4464060529635, "predicted_n": 2, "predicted_ms": 29.424, "predicted_per_token_ms": 14.712, "predicted_per_second": 67.97172376291464}, "tps": 67.97172376291464}, {"id": "132ca45a-faab-434b-b923-355abad9b91b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 97.983, "prompt_per_token_ms": 0.6404117647058823, "prompt_per_second": 1561.4953614402498, "predicted_n": 2, "predicted_ms": 30.081, "predicted_per_token_ms": 15.0405, "predicted_per_second": 66.48715135800006}, "tps": 66.48715135800006}, {"id": "8ba3dc58-2372-4903-9d76-60521e17020a", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 95.196, "prompt_per_token_ms": 0.8067457627118644, "prompt_per_second": 1239.547880163032, "predicted_n": 2, "predicted_ms": 30.407, "predicted_per_token_ms": 15.2035, "predicted_per_second": 65.77432827967245}, "tps": 65.77432827967245}, {"id": "532e3286-3c86-40e9-a283-0c56191f4766", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 196, "prompt_ms": 104.247, "prompt_per_token_ms": 0.5318724489795918, "prompt_per_second": 1880.1500282981765, "predicted_n": 2, "predicted_ms": 30.031, "predicted_per_token_ms": 15.0155, "predicted_per_second": 66.59784888948087}, "tps": 66.59784888948087}, {"id": "85f325d1-57e8-4dbd-9461-90a7ea3c31a2", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.141, "prompt_per_token_ms": 0.709058394160584, "prompt_per_second": 1410.3210796676995, "predicted_n": 2, "predicted_ms": 30.119, "predicted_per_token_ms": 15.0595, "predicted_per_second": 66.4032670407384}, "tps": 66.4032670407384}, {"id": "a67778dd-be4d-45b1-8238-f3f7de80adee", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 152, "prompt_ms": 98.853, "prompt_per_token_ms": 0.6503486842105263, "prompt_per_second": 1537.6366928671866, "predicted_n": 2, "predicted_ms": 31.003, "predicted_per_token_ms": 15.5015, "predicted_per_second": 64.50988614005097}, "tps": 64.50988614005097}, {"id": "bede4496-6806-4879-a54a-5baa7c5fea90", "answer": "AD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 99.971, "prompt_per_token_ms": 0.7573560606060606, "prompt_per_second": 1320.3829110442027, "predicted_n": 3, "predicted_ms": 52.862, "predicted_per_token_ms": 17.62066666666667, "predicted_per_second": 56.75154175021754}, "tps": 56.75154175021754}, {"id": "fe7e9514-3ab9-4d4c-ad22-eb3cf5702ca2", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 87, "prompt_ms": 80.115, "prompt_per_token_ms": 0.9208620689655171, "prompt_per_second": 1085.9389627410596, "predicted_n": 4, "predicted_ms": 86.383, "predicted_per_token_ms": 21.59575, "predicted_per_second": 46.305407313939085}, "tps": 46.305407313939085}, {"id": "7f9cdfd5-5ce1-441d-a519-ce2e61d1f489", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 171, "prompt_ms": 100.232, "prompt_per_token_ms": 0.5861520467836258, "prompt_per_second": 1706.0419826003674, "predicted_n": 2, "predicted_ms": 29.242, "predicted_per_token_ms": 14.621, "predicted_per_second": 68.39477463921756}, "tps": 68.39477463921756}, {"id": "bb9e5272-bce9-41e1-87e1-8b6d46e59a40", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 94, "prompt_ms": 92.643, "prompt_per_token_ms": 0.985563829787234, "prompt_per_second": 1014.6476258324968, "predicted_n": 2, "predicted_ms": 33.139, "predicted_per_token_ms": 16.5695, "predicted_per_second": 60.35185129303841}, "tps": 60.35185129303841}, {"id": "577fd091-520e-4dce-9c37-86a8181bf5bd", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 96.044, "prompt_per_token_ms": 0.7622539682539682, "prompt_per_second": 1311.8987130898338, "predicted_n": 3, "predicted_ms": 61.728, "predicted_per_token_ms": 20.576, "predicted_per_second": 48.600311041990665}, "tps": 48.600311041990665}, {"id": "87c2d98d-07be-4428-ba78-1153d05aa2a3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 97.195, "prompt_per_token_ms": 0.7094525547445255, "prompt_per_second": 1409.5375276505995, "predicted_n": 2, "predicted_ms": 31.064, "predicted_per_token_ms": 15.532, "predicted_per_second": 64.38320885912954}, "tps": 64.38320885912954}, {"id": "9644fa09-38aa-4461-8a34-7ae66ea8e2af", "answer": "ACD", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 152, "prompt_ms": 98.086, "prompt_per_token_ms": 0.6453026315789474, "prompt_per_second": 1549.6605020084417, "predicted_n": 2, "predicted_ms": 30.378, "predicted_per_token_ms": 15.189, "predicted_per_second": 65.83711896767397}, "tps": 65.83711896767397}, {"id": "6f190e6c-f230-4bcb-ac14-7484bd0f6104", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 121, "prompt_ms": 96.069, "prompt_per_token_ms": 0.7939586776859504, "prompt_per_second": 1259.5113928530536, "predicted_n": 2, "predicted_ms": 29.323, "predicted_per_token_ms": 14.6615, "predicted_per_second": 68.20584524093715}, "tps": 68.20584524093715}, {"id": "6f01d9bb-ecc3-493a-b4b2-9dc8a24ed000", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 152, "prompt_ms": 97.221, "prompt_per_token_ms": 0.6396118421052632, "prompt_per_second": 1563.4482262062722, "predicted_n": 2, "predicted_ms": 29.071, "predicted_per_token_ms": 14.5355, "predicted_per_second": 68.79708300368064}, "tps": 68.79708300368064}, {"id": "a45f3ef2-25bd-4f4b-ad33-8118db20d255", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 103, "prompt_ms": 95.068, "prompt_per_token_ms": 0.9229902912621359, "prompt_per_second": 1083.435014936677, "predicted_n": 2, "predicted_ms": 29.193, "predicted_per_token_ms": 14.5965, "predicted_per_second": 68.50957421299626}, "tps": 68.50957421299626}, {"id": "c0a0a7d3-f132-410f-9f49-3bfcbfee7fb6", "answer": "AC", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 107, "prompt_ms": 85.795, "prompt_per_token_ms": 0.8018224299065421, "prompt_per_second": 1247.1589253452998, "predicted_n": 2, "predicted_ms": 27.345, "predicted_per_token_ms": 13.6725, "predicted_per_second": 73.13951362223442}, "tps": 73.13951362223442}, {"id": "0f7736e8-3a11-4334-b383-e5e811813e47", "answer": "A", "llm_answer": "A", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 95.53, "prompt_per_token_ms": 0.7182706766917293, "prompt_per_second": 1392.232806448236, "predicted_n": 2, "predicted_ms": 30.037, "predicted_per_token_ms": 15.0185, "predicted_per_second": 66.58454572693678}, "tps": 66.58454572693678}, {"id": "12f60976-e250-42b3-be14-5dae19e92124", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 97.281, "prompt_per_token_ms": 0.7483153846153847, "prompt_per_second": 1336.3349472147695, "predicted_n": 2, "predicted_ms": 30.223, "predicted_per_token_ms": 15.1115, "predicted_per_second": 66.17476756112895}, "tps": 66.17476756112895}, {"id": "70955fa1-fdcb-478d-b925-c001b38960ca", "answer": "CD", "llm_answer": "AC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 59, "prompt_ms": 90.803, "prompt_per_token_ms": 1.5390338983050846, "prompt_per_second": 649.7582678986378, "predicted_n": 3, "predicted_ms": 71.122, "predicted_per_token_ms": 23.707333333333334, "predicted_per_second": 42.18104102809257}, "tps": 42.18104102809257}, {"id": "1e3d47ab-25b6-49ac-a1e5-7ba1bf276dda", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 94.199, "prompt_per_token_ms": 0.94199, "prompt_per_second": 1061.5823947175659, "predicted_n": 2, "predicted_ms": 29.927, "predicted_per_token_ms": 14.9635, "predicted_per_second": 66.82928459250844}, "tps": 66.82928459250844}, {"id": "306c2d4f-3068-4aa3-8e3d-d13769d98cab", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 96.967, "prompt_per_token_ms": 0.7077883211678831, "prompt_per_second": 1412.8517949405466, "predicted_n": 64, "predicted_ms": 1843.64, "predicted_per_token_ms": 28.806875, "predicted_per_second": 34.71393547547243}, "tps": 34.71393547547243}, {"id": "0f951158-0401-4895-8e81-5dcbfa8435e1", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 173, "prompt_ms": 91.631, "prompt_per_token_ms": 0.5296589595375722, "prompt_per_second": 1888.0073337625913, "predicted_n": 2, "predicted_ms": 29.826, "predicted_per_token_ms": 14.913, "predicted_per_second": 67.0555890833501}, "tps": 67.0555890833501}, {"id": "58bd6fbf-e3a8-4bbd-97a2-f9bc00bd76b9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 96.222, "prompt_per_token_ms": 0.6636, "prompt_per_second": 1506.9318866787223, "predicted_n": 2, "predicted_ms": 30.588, "predicted_per_token_ms": 15.294, "predicted_per_second": 65.3851183470642}, "tps": 65.3851183470642}, {"id": "203821e1-3120-43cf-b9a2-2a75d4ff4cee", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 96, "prompt_ms": 92.811, "prompt_per_token_ms": 0.96678125, "prompt_per_second": 1034.360151275172, "predicted_n": 2, "predicted_ms": 29.914, "predicted_per_token_ms": 14.957, "predicted_per_second": 66.85832720465334}, "tps": 66.85832720465334}, {"id": "4a89e214-80ca-4f3d-b5f6-b5b3ca530088", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 158, "prompt_ms": 98.108, "prompt_per_token_ms": 0.6209367088607595, "prompt_per_second": 1610.470094181922, "predicted_n": 2, "predicted_ms": 38.603, "predicted_per_token_ms": 19.3015, "predicted_per_second": 51.809444861798305}, "tps": 51.809444861798305}, {"id": "35bc2d3c-4572-4321-9fa1-7148d8f13f96", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 124, "prompt_ms": 95.399, "prompt_per_token_ms": 0.7693467741935484, "prompt_per_second": 1299.8039811738067, "predicted_n": 2, "predicted_ms": 30.154, "predicted_per_token_ms": 15.077, "predicted_per_second": 66.32619221330503}, "tps": 66.32619221330503}, {"id": "f53a3bb2-804b-46c8-a893-77bc35e294b2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 169, "prompt_ms": 98.117, "prompt_per_token_ms": 0.5805739644970415, "prompt_per_second": 1722.4334213235218, "predicted_n": 2, "predicted_ms": 29.797, "predicted_per_token_ms": 14.8985, "predicted_per_second": 67.12085109239185}, "tps": 67.12085109239185}, {"id": "ebeac3f7-b24b-4fe4-92db-31949e8b157a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 92, "prompt_ms": 93.599, "prompt_per_token_ms": 1.0173804347826088, "prompt_per_second": 982.9164841504717, "predicted_n": 3, "predicted_ms": 58.524, "predicted_per_token_ms": 19.508, "predicted_per_second": 51.2610211195407}, "tps": 51.2610211195407}, {"id": "1811efc9-0a74-4d0f-92cc-66f1688ca895", "answer": "AB", "llm_answer": "BCD", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 98.471, "prompt_per_token_ms": 0.6791103448275863, "prompt_per_second": 1472.5147505356906, "predicted_n": 4, "predicted_ms": 86.976, "predicted_per_token_ms": 21.744, "predicted_per_second": 45.989698307579104}, "tps": 45.989698307579104}, {"id": "211cf668-247f-4d34-a0cb-ba9bb69c316f", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 183, "prompt_ms": 100.336, "prompt_per_token_ms": 0.5482841530054645, "prompt_per_second": 1823.8717907829694, "predicted_n": 2, "predicted_ms": 28.655, "predicted_per_token_ms": 14.3275, "predicted_per_second": 69.79584714709475}, "tps": 69.79584714709475}, {"id": "f06933d8-2c90-4f85-8308-b036d7457842", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 141, "prompt_ms": 90.11, "prompt_per_token_ms": 0.6390780141843971, "prompt_per_second": 1564.7541893241596, "predicted_n": 2, "predicted_ms": 26.147, "predicted_per_token_ms": 13.0735, "predicted_per_second": 76.49061077752707}, "tps": 76.49061077752707}, {"id": "599aa502-8939-4fe3-bb27-342e9f9cdaac", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 112, "prompt_ms": 89.831, "prompt_per_token_ms": 0.8020625, "prompt_per_second": 1246.7856307956051, "predicted_n": 2, "predicted_ms": 28.831, "predicted_per_token_ms": 14.4155, "predicted_per_second": 69.36977558877597}, "tps": 69.36977558877597}, {"id": "5dcfbba1-3bf3-49f6-988c-1cb8d338590e", "answer": "ACD", "llm_answer": "BCD", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 130, "prompt_ms": 96.845, "prompt_per_token_ms": 0.7449615384615385, "prompt_per_second": 1342.3511797201713, "predicted_n": 4, "predicted_ms": 87.421, "predicted_per_token_ms": 21.85525, "predicted_per_second": 45.755596481394626}, "tps": 45.755596481394626}, {"id": "2363b9b4-2241-4580-bf81-8f49c9c3a0bd", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 115, "prompt_ms": 95.375, "prompt_per_token_ms": 0.8293478260869566, "prompt_per_second": 1205.7667103538663, "predicted_n": 3, "predicted_ms": 60.813, "predicted_per_token_ms": 20.271, "predicted_per_second": 49.33155739726703}, "tps": 49.33155739726703}, {"id": "a45fe784-7f89-406f-a04a-b2a85aa09862", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 96.286, "prompt_per_token_ms": 0.7581574803149607, "prompt_per_second": 1318.9871840142907, "predicted_n": 2, "predicted_ms": 31.631, "predicted_per_token_ms": 15.8155, "predicted_per_second": 63.22911068255825}, "tps": 63.22911068255825}, {"id": "e70148d7-0ca9-4784-9783-5acd001c4bf1", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 95.115, "prompt_per_token_ms": 0.8270869565217391, "prompt_per_second": 1209.062713557273, "predicted_n": 2, "predicted_ms": 31.945, "predicted_per_token_ms": 15.9725, "predicted_per_second": 62.607606824229144}, "tps": 62.607606824229144}, {"id": "763e9c7b-13bf-4930-a115-5bd8966ce153", "answer": "C", "llm_answer": "C", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 99, "prompt_ms": 93.581, "prompt_per_token_ms": 0.9452626262626262, "prompt_per_second": 1057.9070537822847, "predicted_n": 2, "predicted_ms": 30.767, "predicted_per_token_ms": 15.3835, "predicted_per_second": 65.00471284168103}, "tps": 65.00471284168103}, {"id": "74fb63e8-1dc1-4838-95b4-692a33c424df", "answer": "BC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 149, "prompt_ms": 98.572, "prompt_per_token_ms": 0.6615570469798658, "prompt_per_second": 1511.5854400844053, "predicted_n": 8, "predicted_ms": 207.874, "predicted_per_token_ms": 25.98425, "predicted_per_second": 38.48485140036753}, "tps": 38.48485140036753}, {"id": "bfe1b311-32c7-430b-83f7-464afa5f2021", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 96.652, "prompt_per_token_ms": 0.7670793650793651, "prompt_per_second": 1303.6460704382735, "predicted_n": 2, "predicted_ms": 29.851, "predicted_per_token_ms": 14.9255, "predicted_per_second": 66.99943050484072}, "tps": 66.99943050484072}, {"id": "1fd58f90-ec60-4803-9695-590c02880ac3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 124, "prompt_ms": 96.572, "prompt_per_token_ms": 0.7788064516129033, "prompt_per_second": 1284.016070910823, "predicted_n": 2, "predicted_ms": 31.585, "predicted_per_token_ms": 15.7925, "predicted_per_second": 63.32119677061896}, "tps": 63.32119677061896}, {"id": "d55202f4-3039-4d30-8a3d-bcf79c020934", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 102, "prompt_ms": 94.433, "prompt_per_token_ms": 0.9258137254901961, "prompt_per_second": 1080.130886448593, "predicted_n": 2, "predicted_ms": 29.71, "predicted_per_token_ms": 14.855, "predicted_per_second": 67.31740154830024}, "tps": 67.31740154830024}, {"id": "5e3d9b9d-5ea0-4c1b-97fd-90db11b8ec0d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 91.204, "prompt_per_token_ms": 0.9702553191489361, "prompt_per_second": 1030.6565501513091, "predicted_n": 3, "predicted_ms": 58.552, "predicted_per_token_ms": 19.517333333333333, "predicted_per_second": 51.236507719633835}, "tps": 51.236507719633835}, {"id": "bbca18bc-2736-4d0b-ae54-edacb4e75659", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 95.414, "prompt_per_token_ms": 0.8595855855855856, "prompt_per_second": 1163.3512901670615, "predicted_n": 2, "predicted_ms": 29.614, "predicted_per_token_ms": 14.807, "predicted_per_second": 67.53562504220976}, "tps": 67.53562504220976}, {"id": "2b045678-b69d-47c8-bd1c-15640b1d4e1e", "answer": "ABC", "llm_answer": "ABD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 97.097, "prompt_per_token_ms": 0.8592654867256637, "prompt_per_second": 1163.7846689393082, "predicted_n": 3, "predicted_ms": 59.027, "predicted_per_token_ms": 19.675666666666668, "predicted_per_second": 50.824199095329256}, "tps": 50.824199095329256}, {"id": "44c67350-0e1c-4cdd-88e6-1a9d354c2bfc", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 90, "prompt_ms": 88.396, "prompt_per_token_ms": 0.9821777777777778, "prompt_per_second": 1018.1456174487533, "predicted_n": 3, "predicted_ms": 52.001, "predicted_per_token_ms": 17.333666666666666, "predicted_per_second": 57.691198246187575}, "tps": 57.691198246187575}, {"id": "2030676f-a2e1-403b-87c3-09d21ab702c6", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 97, "prompt_ms": 93.877, "prompt_per_token_ms": 0.9678041237113402, "prompt_per_second": 1033.2669343928758, "predicted_n": 2, "predicted_ms": 29.673, "predicted_per_token_ms": 14.8365, "predicted_per_second": 67.4013412866916}, "tps": 67.4013412866916}, {"id": "29eee2b8-f83d-4bc3-80d2-8733dd936767", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 99.436, "prompt_per_token_ms": 0.5954251497005989, "prompt_per_second": 1679.4722233396355, "predicted_n": 2, "predicted_ms": 28.751, "predicted_per_token_ms": 14.3755, "predicted_per_second": 69.56279781572815}, "tps": 69.56279781572815}, {"id": "fd91fc96-b367-476c-878c-e2f2cde09638", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["Vulnerability", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 107, "prompt_ms": 97.231, "prompt_per_token_ms": 0.9087009345794392, "prompt_per_second": 1100.472071664387, "predicted_n": 2, "predicted_ms": 37.597, "predicted_per_token_ms": 18.7985, "predicted_per_second": 53.195733702157085}, "tps": 53.195733702157085}, {"id": "e6c6c967-2b83-4cff-9cf7-e0f908c42611", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["PenTest", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 78, "prompt_ms": 91.298, "prompt_per_token_ms": 1.1704871794871796, "prompt_per_second": 854.3451116125216, "predicted_n": 2, "predicted_ms": 28.835, "predicted_per_token_ms": 14.4175, "predicted_per_second": 69.3601525923357}, "tps": 69.3601525923357}, {"id": "276998c2-b7ff-4880-8fe1-797048cbd5e8", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 139, "prompt_ms": 98.395, "prompt_per_token_ms": 0.7078776978417266, "prompt_per_second": 1412.6734082016364, "predicted_n": 2, "predicted_ms": 30.155, "predicted_per_token_ms": 15.0775, "predicted_per_second": 66.3239927043608}, "tps": 66.3239927043608}, {"id": "3a650e15-a037-427e-88ab-f19cc22f75b0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 147, "prompt_ms": 96.714, "prompt_per_token_ms": 0.6579183673469388, "prompt_per_second": 1519.9454060425585, "predicted_n": 2, "predicted_ms": 29.038, "predicted_per_token_ms": 14.519, "predicted_per_second": 68.87526689165921}, "tps": 68.87526689165921}, {"id": "cb2a93fc-a60f-4fed-a336-c9f1d2be78d0", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 157, "prompt_ms": 97.902, "prompt_per_token_ms": 0.6235796178343949, "prompt_per_second": 1603.644460787318, "predicted_n": 2, "predicted_ms": 31.046, "predicted_per_token_ms": 15.523, "predicted_per_second": 64.4205372672808}, "tps": 64.4205372672808}, {"id": "9423bf56-43b2-4f7b-bdba-fcd12a98e9be", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 123, "prompt_ms": 96.242, "prompt_per_token_ms": 0.7824552845528455, "prompt_per_second": 1278.0283036512124, "predicted_n": 2, "predicted_ms": 29.578, "predicted_per_token_ms": 14.789, "predicted_per_second": 67.6178240584218}, "tps": 67.6178240584218}, {"id": "909d4786-c0a7-4122-9e06-dec2961d71f7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 80, "prompt_ms": 92.089, "prompt_per_token_ms": 1.1511125, "prompt_per_second": 868.7248205540293, "predicted_n": 2, "predicted_ms": 30.9, "predicted_per_token_ms": 15.45, "predicted_per_second": 64.72491909385114}, "tps": 64.72491909385114}, {"id": "e134cac6-51e6-4c29-9695-dfc8de4932d9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 94, "prompt_ms": 81.518, "prompt_per_token_ms": 0.8672127659574468, "prompt_per_second": 1153.1195564169875, "predicted_n": 2, "predicted_ms": 26.908, "predicted_per_token_ms": 13.454, "predicted_per_second": 74.32733759476736}, "tps": 74.32733759476736}, {"id": "1217d243-11ff-4b83-83ea-67af12ca28d0", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 95, "prompt_ms": 95.058, "prompt_per_token_ms": 1.0006105263157896, "prompt_per_second": 999.3898461991625, "predicted_n": 2, "predicted_ms": 30.131, "predicted_per_token_ms": 15.0655, "predicted_per_second": 66.37682121403206}, "tps": 66.37682121403206}, {"id": "dd70edc2-d64f-40b6-81b5-c22c4295d538", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 191, "prompt_ms": 99.415, "prompt_per_token_ms": 0.5204973821989529, "prompt_per_second": 1921.2392496102195, "predicted_n": 3, "predicted_ms": 59.911, "predicted_per_token_ms": 19.970333333333333, "predicted_per_second": 50.07427684398524}, "tps": 50.07427684398524}, {"id": "50d3f622-6b55-4913-9e49-29a8feb68190", "answer": "ACD", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 158, "prompt_ms": 98.095, "prompt_per_token_ms": 0.6208544303797469, "prompt_per_second": 1610.6835210765073, "predicted_n": 4, "predicted_ms": 88.011, "predicted_per_token_ms": 22.00275, "predicted_per_second": 45.44886434650214}, "tps": 45.44886434650214}, {"id": "decfb159-2637-4c6b-9a51-cd01eab99e25", "answer": "D", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 93.272, "prompt_per_token_ms": 0.9715833333333334, "prompt_per_second": 1029.2477914057808, "predicted_n": 8, "predicted_ms": 201.956, "predicted_per_token_ms": 25.2445, "predicted_per_second": 39.6125888807463}, "tps": 39.6125888807463}, {"id": "e73249cc-9096-4a6b-a03f-924091b97072", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 136, "prompt_ms": 97.969, "prompt_per_token_ms": 0.720360294117647, "prompt_per_second": 1388.194224703733, "predicted_n": 2, "predicted_ms": 29.905, "predicted_per_token_ms": 14.9525, "predicted_per_second": 66.87844841999666}, "tps": 66.87844841999666}, {"id": "5b67044f-d852-46c2-8710-88b9486420b5", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 105, "prompt_ms": 95.71, "prompt_per_token_ms": 0.9115238095238095, "prompt_per_second": 1097.0640476439244, "predicted_n": 2, "predicted_ms": 33.987, "predicted_per_token_ms": 16.9935, "predicted_per_second": 58.84602936416865}, "tps": 58.84602936416865}, {"id": "419bab6e-263c-4557-890d-2879860ba0c5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 132, "prompt_ms": 97.04, "prompt_per_token_ms": 0.7351515151515152, "prompt_per_second": 1360.2638087386642, "predicted_n": 2, "predicted_ms": 30.78, "predicted_per_token_ms": 15.39, "predicted_per_second": 64.9772579597141}, "tps": 64.9772579597141}, {"id": "e47756e2-9ea3-4297-bad9-34f45e919cb2", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 82.072, "prompt_per_token_ms": 0.5779718309859155, "prompt_per_second": 1730.1881274978068, "predicted_n": 2, "predicted_ms": 26.185, "predicted_per_token_ms": 13.0925, "predicted_per_second": 76.37960664502579}, "tps": 76.37960664502579}, {"id": "54f6f9a2-d124-4547-8524-75df1ce89aab", "answer": "AB", "llm_answer": "ABC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 159, "prompt_ms": 96.093, "prompt_per_token_ms": 0.6043584905660377, "prompt_per_second": 1654.6470606599855, "predicted_n": 3, "predicted_ms": 59.181, "predicted_per_token_ms": 19.727, "predicted_per_second": 50.69194504993156}, "tps": 50.69194504993156}, {"id": "6a4c6835-9496-4a07-b751-297e05d9df52", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 130, "prompt_ms": 99.069, "prompt_per_token_ms": 0.7620692307692308, "prompt_per_second": 1312.21673782919, "predicted_n": 2, "predicted_ms": 29.871, "predicted_per_token_ms": 14.9355, "predicted_per_second": 66.9545713233571}, "tps": 66.9545713233571}, {"id": "9d40e942-e755-4b39-97e8-caff7711a231", "answer": "AB", "llm_answer": "AD", "score": 0, "topics": [], "timings": {"cache_n": 63, "prompt_n": 138, "prompt_ms": 97.5, "prompt_per_token_ms": 0.7065217391304348, "prompt_per_second": 1415.3846153846155, "predicted_n": 2, "predicted_ms": 29.509, "predicted_per_token_ms": 14.7545, "predicted_per_second": 67.77593276627469}, "tps": 67.77593276627469}, {"id": "0c9633c3-6927-4dd2-9561-50d633c734c3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 161, "prompt_ms": 98.975, "prompt_per_token_ms": 0.614751552795031, "prompt_per_second": 1626.6734023743372, "predicted_n": 2, "predicted_ms": 31.842, "predicted_per_token_ms": 15.921, "predicted_per_second": 62.81012499214874}, "tps": 62.81012499214874}, {"id": "78711345-fe5f-4dcb-af82-9fdd8073df5a", "answer": "CD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 133, "prompt_ms": 96.049, "prompt_per_token_ms": 0.7221729323308271, "prompt_per_second": 1384.709887661506, "predicted_n": 2, "predicted_ms": 32.445, "predicted_per_token_ms": 16.2225, "predicted_per_second": 61.64278008938203}, "tps": 61.64278008938203}, {"id": "0984ccd7-13f5-4b03-8694-2b15f473ac1a", "answer": "AC", "llm_answer": "ABC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 124, "prompt_ms": 95.179, "prompt_per_token_ms": 0.7675725806451613, "prompt_per_second": 1302.808392607613, "predicted_n": 3, "predicted_ms": 61.832, "predicted_per_token_ms": 20.610666666666667, "predicted_per_second": 48.5185664380903}, "tps": 48.5185664380903}, {"id": "288ba220-06cb-4104-bfcc-4b576444582b", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 99.297, "prompt_per_token_ms": 0.6054695121951219, "prompt_per_second": 1651.6108240933763, "predicted_n": 3, "predicted_ms": 58.355, "predicted_per_token_ms": 19.451666666666664, "predicted_per_second": 51.409476480164514}, "tps": 51.409476480164514}, {"id": "0ecb213f-fc03-495a-b90a-45b1a695c196", "answer": "BCD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 96.387, "prompt_per_token_ms": 0.7836341463414634, "prompt_per_second": 1276.105698901304, "predicted_n": 2, "predicted_ms": 28.995, "predicted_per_token_ms": 14.4975, "predicted_per_second": 68.97740989825832}, "tps": 68.97740989825832}, {"id": "63ccbe8d-3d42-4bf2-a51b-760de15b15c3", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 170, "prompt_ms": 89.813, "prompt_per_token_ms": 0.5283117647058824, "prompt_per_second": 1892.8217518622025, "predicted_n": 8, "predicted_ms": 198.419, "predicted_per_token_ms": 24.802375, "predicted_per_second": 40.318719477469394}, "tps": 40.318719477469394}, {"id": "145c214a-903b-4017-b2af-347c40ea42c9", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 183, "prompt_ms": 100.494, "prompt_per_token_ms": 0.5491475409836065, "prompt_per_second": 1821.0042390590484, "predicted_n": 2, "predicted_ms": 29.751, "predicted_per_token_ms": 14.8755, "predicted_per_second": 67.2246311048368}, "tps": 67.2246311048368}, {"id": "b2513815-2c48-4029-a252-e73acd551e70", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 101.039, "prompt_per_token_ms": 0.7375109489051095, "prompt_per_second": 1355.9120735557556, "predicted_n": 2, "predicted_ms": 29.244, "predicted_per_token_ms": 14.622, "predicted_per_second": 68.3900971139379}, "tps": 68.3900971139379}, {"id": "85c3b986-ac94-4692-9d5d-26bc24e0673b", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 97.746, "prompt_per_token_ms": 0.7757619047619048, "prompt_per_second": 1289.0553066110124, "predicted_n": 2, "predicted_ms": 28.54, "predicted_per_token_ms": 14.27, "predicted_per_second": 70.0770847932726}, "tps": 70.0770847932726}, {"id": "abf26766-70c7-4157-bb5d-3d4522fbb90e", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 97.37, "prompt_per_token_ms": 0.6715172413793103, "prompt_per_second": 1489.1650405669097, "predicted_n": 3, "predicted_ms": 59.637, "predicted_per_token_ms": 19.879, "predicted_per_second": 50.30434126465114}, "tps": 50.30434126465114}, {"id": "0c91d01f-31b1-4949-a413-333eb09529f0", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 95.764, "prompt_per_token_ms": 0.8474690265486725, "prompt_per_second": 1179.9841276471325, "predicted_n": 3, "predicted_ms": 60.364, "predicted_per_token_ms": 20.121333333333332, "predicted_per_second": 49.698495792194024}, "tps": 49.698495792194024}, {"id": "7593c324-b176-48cb-b9dd-c522e2876fbd", "answer": "B", "llm_answer": "BCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 194, "prompt_ms": 101.476, "prompt_per_token_ms": 0.5230721649484537, "prompt_per_second": 1911.782096259214, "predicted_n": 6, "predicted_ms": 150.124, "predicted_per_token_ms": 25.020666666666667, "predicted_per_second": 39.96696064586608}, "tps": 39.96696064586608}, {"id": "814747b5-79a7-44a0-a914-1721f4884d10", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 90.103, "prompt_per_token_ms": 0.6214, "prompt_per_second": 1609.26939169617, "predicted_n": 2, "predicted_ms": 25.284, "predicted_per_token_ms": 12.642, "predicted_per_second": 79.1014080050625}, "tps": 79.1014080050625}, {"id": "3921f1ea-e732-45b8-a286-7af8565233cd", "answer": "CD", "llm_answer": "CD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 93.92, "prompt_per_token_ms": 0.7224615384615385, "prompt_per_second": 1384.1567291311753, "predicted_n": 4, "predicted_ms": 86.77, "predicted_per_token_ms": 21.6925, "predicted_per_second": 46.09888210210902}, "tps": 46.09888210210902}, {"id": "40f0d5ad-7907-4420-8772-9dd0ef71ac2b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 136, "prompt_ms": 97.648, "prompt_per_token_ms": 0.718, "prompt_per_second": 1392.757660167131, "predicted_n": 2, "predicted_ms": 29.246, "predicted_per_token_ms": 14.623, "predicted_per_second": 68.38542022840731}, "tps": 68.38542022840731}, {"id": "33a354a9-250a-442d-b99b-28ca580c8ebf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 291, "prompt_ms": 109.487, "prompt_per_token_ms": 0.3762439862542955, "prompt_per_second": 2657.8497903860734, "predicted_n": 2, "predicted_ms": 29.648, "predicted_per_token_ms": 14.824, "predicted_per_second": 67.45817593092283}, "tps": 67.45817593092283}, {"id": "158246ad-be51-46de-b7de-c97c7405622b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 88, "prompt_ms": 93.284, "prompt_per_token_ms": 1.0600454545454545, "prompt_per_second": 943.3557737661334, "predicted_n": 3, "predicted_ms": 58.355, "predicted_per_token_ms": 19.451666666666664, "predicted_per_second": 51.409476480164514}, "tps": 51.409476480164514}, {"id": "7de3b0e7-37c0-4753-93a7-7aac4207141f", "answer": "AC", "llm_answer": "AB", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 109, "prompt_ms": 94.993, "prompt_per_token_ms": 0.8714954128440366, "prompt_per_second": 1147.4529702188584, "predicted_n": 2, "predicted_ms": 30.063, "predicted_per_token_ms": 15.0315, "predicted_per_second": 66.52696005056049}, "tps": 66.52696005056049}, {"id": "04c99f39-d4d3-4f90-a9b3-51e265ba08f1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 98, "prompt_ms": 94.12, "prompt_per_token_ms": 0.9604081632653062, "prompt_per_second": 1041.2239694007649, "predicted_n": 2, "predicted_ms": 30.251, "predicted_per_token_ms": 15.1255, "predicted_per_second": 66.11351690853195}, "tps": 66.11351690853195}, {"id": "8791d918-fc6d-4a0a-a8ce-f02ac5526546", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 77, "prompt_ms": 92.563, "prompt_per_token_ms": 1.2021168831168831, "prompt_per_second": 831.8658643302399, "predicted_n": 2, "predicted_ms": 29.381, "predicted_per_token_ms": 14.6905, "predicted_per_second": 68.07120247779177}, "tps": 68.07120247779177}, {"id": "d99de8c0-7ef3-49ac-868f-43d9a6a2e891", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 122, "prompt_ms": 99.681, "prompt_per_token_ms": 0.8170573770491804, "prompt_per_second": 1223.904254572085, "predicted_n": 3, "predicted_ms": 58.239, "predicted_per_token_ms": 19.413, "predicted_per_second": 51.51187348683872}, "tps": 51.51187348683872}, {"id": "cdd08d0c-36a4-4ab6-9d67-6d1b4a9aea09", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 125, "prompt_ms": 94.131, "prompt_per_token_ms": 0.753048, "prompt_per_second": 1327.9365989950177, "predicted_n": 2, "predicted_ms": 27.262, "predicted_per_token_ms": 13.631, "predicted_per_second": 73.36218912772357}, "tps": 73.36218912772357}, {"id": "e6d54245-da3d-42cb-8d87-1b16183ba787", "answer": "BD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 128, "prompt_ms": 88.722, "prompt_per_token_ms": 0.693140625, "prompt_per_second": 1442.7086855571335, "predicted_n": 2, "predicted_ms": 30.228, "predicted_per_token_ms": 15.114, "predicted_per_second": 66.1638216223369}, "tps": 66.1638216223369}, {"id": "d87e4d77-e481-4fdf-8477-5ea5069a948e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 64, "prompt_n": 103, "prompt_ms": 94.329, "prompt_per_token_ms": 0.9158155339805825, "prompt_per_second": 1091.922950524229, "predicted_n": 2, "predicted_ms": 31.646, "predicted_per_token_ms": 15.823, "predicted_per_second": 63.19914049168931}, "tps": 63.19914049168931}, {"id": "9dd998bf-80e0-433c-9456-ec52ae9786d7", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 108, "prompt_ms": 95.123, "prompt_per_token_ms": 0.8807685185185186, "prompt_per_second": 1135.3720971794412, "predicted_n": 2, "predicted_ms": 29.742, "predicted_per_token_ms": 14.871, "predicted_per_second": 67.24497343823549}, "tps": 67.24497343823549}, {"id": "a4f036a2-2f9c-47fe-8fab-54bda8733e96", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 188, "prompt_ms": 100.545, "prompt_per_token_ms": 0.534813829787234, "prompt_per_second": 1869.809538017803, "predicted_n": 2, "predicted_ms": 29.0, "predicted_per_token_ms": 14.5, "predicted_per_second": 68.96551724137932}, "tps": 68.96551724137932}, {"id": "e1682f93-ec53-4d78-a6f9-7bc0da5890af", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 97.908, "prompt_per_token_ms": 0.7094782608695652, "prompt_per_second": 1409.4864566736119, "predicted_n": 2, "predicted_ms": 29.39, "predicted_per_token_ms": 14.695, "predicted_per_second": 68.05035726437563}, "tps": 68.05035726437563}, {"id": "e75b07c1-dc98-4855-a223-5f2d1e8bbafa", "answer": "AD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 155, "prompt_ms": 100.762, "prompt_per_token_ms": 0.6500774193548388, "prompt_per_second": 1538.2783192076379, "predicted_n": 3, "predicted_ms": 59.214, "predicted_per_token_ms": 19.738, "predicted_per_second": 50.663694396595396}, "tps": 50.663694396595396}, {"id": "75f95abb-45f2-4c1d-8687-b502379d86e1", "answer": "ABCD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 181, "prompt_ms": 93.266, "prompt_per_token_ms": 0.5152817679558012, "prompt_per_second": 1940.6857804559002, "predicted_n": 2, "predicted_ms": 29.331, "predicted_per_token_ms": 14.6655, "predicted_per_second": 68.18724216699056}, "tps": 68.18724216699056}, {"id": "d45b6b4a-02d7-4f6a-9cf6-eeec8adb7c14", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["MemorySafety", "SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 94.818, "prompt_per_token_ms": 0.7967899159663865, "prompt_per_second": 1255.0359636355965, "predicted_n": 2, "predicted_ms": 29.527, "predicted_per_token_ms": 14.7635, "predicted_per_second": 67.73461577539202}, "tps": 67.73461577539202}, {"id": "ac3a1794-1513-4c85-a36f-80f7cf7c35dc", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 175, "prompt_ms": 100.277, "prompt_per_token_ms": 0.5730114285714286, "prompt_per_second": 1745.1658904833612, "predicted_n": 2, "predicted_ms": 33.601, "predicted_per_token_ms": 16.8005, "predicted_per_second": 59.52203803458231}, "tps": 59.52203803458231}, {"id": "d0757080-e8ce-490d-abac-3352b07b8367", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 101.06, "prompt_per_token_ms": 0.631625, "prompt_per_second": 1583.2178903621611, "predicted_n": 64, "predicted_ms": 1865.225, "predicted_per_token_ms": 29.144140625, "predicted_per_second": 34.31221434411398}, "tps": 34.31221434411398}, {"id": "7ed87700-1bd1-44f9-adcb-732a92032fcc", "answer": "ACD", "llm_answer": "C", "score": 0, "topics": ["Cryptography", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 104, "prompt_ms": 99.075, "prompt_per_token_ms": 0.9526442307692308, "prompt_per_second": 1049.709815796114, "predicted_n": 2, "predicted_ms": 29.342, "predicted_per_token_ms": 14.671, "predicted_per_second": 68.16167950378298}, "tps": 68.16167950378298}, {"id": "009b6972-fbc2-4be4-b317-ddff4afa6c6e", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 121, "prompt_ms": 96.881, "prompt_per_token_ms": 0.8006694214876033, "prompt_per_second": 1248.9549034382387, "predicted_n": 2, "predicted_ms": 26.395, "predicted_per_token_ms": 13.1975, "predicted_per_second": 75.77192650123129}, "tps": 75.77192650123129}, {"id": "a6ee50a8-4795-47db-b1ae-a1df3fd00e96", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 135, "prompt_ms": 86.24, "prompt_per_token_ms": 0.6388148148148147, "prompt_per_second": 1565.3988868274585, "predicted_n": 3, "predicted_ms": 58.095, "predicted_per_token_ms": 19.365, "predicted_per_second": 51.63955589981927}, "tps": 51.63955589981927}, {"id": "3a1315b5-5981-47c1-b5e6-9e7ac0ce53ee", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 110, "prompt_ms": 94.995, "prompt_per_token_ms": 0.8635909090909092, "prompt_per_second": 1157.9556818779936, "predicted_n": 2, "predicted_ms": 30.373, "predicted_per_token_ms": 15.1865, "predicted_per_second": 65.84795706713199}, "tps": 65.84795706713199}, {"id": "bda504e4-0154-46d5-8970-1c609eb7523b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 114, "prompt_ms": 95.4, "prompt_per_token_ms": 0.8368421052631579, "prompt_per_second": 1194.9685534591194, "predicted_n": 2, "predicted_ms": 29.987, "predicted_per_token_ms": 14.9935, "predicted_per_second": 66.69556807950111}, "tps": 66.69556807950111}, {"id": "d00ef981-3832-420a-b070-3e69bc96318c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 58, "prompt_ms": 91.0, "prompt_per_token_ms": 1.5689655172413792, "prompt_per_second": 637.3626373626373, "predicted_n": 2, "predicted_ms": 29.306, "predicted_per_token_ms": 14.653, "predicted_per_second": 68.24541049614413}, "tps": 68.24541049614413}, {"id": "d82caef4-4625-4a48-933a-63225111816a", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 86, "prompt_ms": 92.857, "prompt_per_token_ms": 1.079732558139535, "prompt_per_second": 926.1552710081094, "predicted_n": 3, "predicted_ms": 58.777, "predicted_per_token_ms": 19.592333333333332, "predicted_per_second": 51.04037293499158}, "tps": 51.04037293499158}, {"id": "c5b08b66-3344-4c37-9c95-3ee46c7eb4e6", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 86, "prompt_ms": 97.806, "prompt_per_token_ms": 1.1372790697674418, "prompt_per_second": 879.2916589984254, "predicted_n": 2, "predicted_ms": 29.743, "predicted_per_token_ms": 14.8715, "predicted_per_second": 67.24271257102512}, "tps": 67.24271257102512}, {"id": "99d6c43e-098e-4a31-81b3-dccaa953a708", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 142, "prompt_ms": 97.15, "prompt_per_token_ms": 0.6841549295774648, "prompt_per_second": 1461.6572310859497, "predicted_n": 2, "predicted_ms": 29.808, "predicted_per_token_ms": 14.904, "predicted_per_second": 67.09608158883522}, "tps": 67.09608158883522}, {"id": "f535982c-2b36-4f2f-90d4-883fe762ee25", "answer": "BC", "llm_answer": "B", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 100.058, "prompt_per_token_ms": 0.5955833333333334, "prompt_per_second": 1679.0261648244018, "predicted_n": 2, "predicted_ms": 31.583, "predicted_per_token_ms": 15.7915, "predicted_per_second": 63.32520659848653}, "tps": 63.32520659848653}, {"id": "b75651bf-7ad4-4fea-af92-035e10d17718", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 98.475, "prompt_per_token_ms": 0.6436274509803921, "prompt_per_second": 1553.6938309215539, "predicted_n": 2, "predicted_ms": 29.982, "predicted_per_token_ms": 14.991, "predicted_per_second": 66.70669068107532}, "tps": 66.70669068107532}, {"id": "efcaa0e5-4c13-40e2-887d-84252fe0b310", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 90.435, "prompt_per_token_ms": 0.7932894736842105, "prompt_per_second": 1260.5738928512192, "predicted_n": 2, "predicted_ms": 28.326, "predicted_per_token_ms": 14.163, "predicted_per_second": 70.60650992021465}, "tps": 70.60650992021465}, {"id": "a62af244-4469-4445-924a-cee05b32116e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 151, "prompt_ms": 98.234, "prompt_per_token_ms": 0.6505562913907285, "prompt_per_second": 1537.145998330517, "predicted_n": 2, "predicted_ms": 29.242, "predicted_per_token_ms": 14.621, "predicted_per_second": 68.39477463921756}, "tps": 68.39477463921756}, {"id": "cd3f6680-fb19-47bf-a8ca-0b280969d0a2", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 98, "prompt_ms": 92.536, "prompt_per_token_ms": 0.9442448979591836, "prompt_per_second": 1059.0472897034667, "predicted_n": 4, "predicted_ms": 90.113, "predicted_per_token_ms": 22.52825, "predicted_per_second": 44.38871195055098}, "tps": 44.38871195055098}, {"id": "89efcd47-2fae-4553-b00a-73eab39b2252", "answer": "ABC", "llm_answer": "ABC", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 140, "prompt_ms": 99.76, "prompt_per_token_ms": 0.7125714285714286, "prompt_per_second": 1403.3680834001602, "predicted_n": 4, "predicted_ms": 87.942, "predicted_per_token_ms": 21.9855, "predicted_per_second": 45.484523890746175}, "tps": 45.484523890746175}, {"id": "f9c70ae5-becb-4539-bcc6-c41bbc18d6e5", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 96.745, "prompt_per_token_ms": 0.7219776119402985, "prompt_per_second": 1385.0845004909816, "predicted_n": 2, "predicted_ms": 28.922, "predicted_per_token_ms": 14.461, "predicted_per_second": 69.15151096051449}, "tps": 69.15151096051449}, {"id": "cdba3d33-bd5b-4bfa-88f9-b7093ab620b0", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 179, "prompt_ms": 99.129, "prompt_per_token_ms": 0.5537932960893855, "prompt_per_second": 1805.7278899212135, "predicted_n": 2, "predicted_ms": 29.303, "predicted_per_token_ms": 14.6515, "predicted_per_second": 68.25239736545745}, "tps": 68.25239736545745}, {"id": "da003750-3a7e-4ade-b94f-231c447a75b9", "answer": "CD", "llm_answer": "ACD", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 133, "prompt_ms": 95.79, "prompt_per_token_ms": 0.7202255639097744, "prompt_per_second": 1388.4539095939033, "predicted_n": 3, "predicted_ms": 60.769, "predicted_per_token_ms": 20.256333333333334, "predicted_per_second": 49.367276078263586}, "tps": 49.367276078263586}, {"id": "a7c891ca-f1af-4ab7-87fd-987a9f514380", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 109, "prompt_ms": 96.019, "prompt_per_token_ms": 0.880908256880734, "prompt_per_second": 1135.1919932513356, "predicted_n": 2, "predicted_ms": 31.356, "predicted_per_token_ms": 15.678, "predicted_per_second": 63.78364587319811}, "tps": 63.78364587319811}, {"id": "2bf49701-af29-447c-8887-eb94043a6701", "answer": "BD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 151, "prompt_ms": 97.559, "prompt_per_token_ms": 0.6460860927152318, "prompt_per_second": 1547.7813425721872, "predicted_n": 2, "predicted_ms": 30.874, "predicted_per_token_ms": 15.437, "predicted_per_second": 64.77942605428517}, "tps": 64.77942605428517}, {"id": "05b2485d-4b47-4c1b-a37b-8c106b2db6f7", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 165, "prompt_ms": 86.488, "prompt_per_token_ms": 0.524169696969697, "prompt_per_second": 1907.7791138655073, "predicted_n": 5, "predicted_ms": 116.992, "predicted_per_token_ms": 23.398400000000002, "predicted_per_second": 42.73796498905908}, "tps": 42.73796498905908}, {"id": "aa1911db-dd35-4319-9ec7-1d3f709417e0", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 96.364, "prompt_per_token_ms": 0.8379478260869565, "prompt_per_second": 1193.3917230501017, "predicted_n": 2, "predicted_ms": 28.847, "predicted_per_token_ms": 14.4235, "predicted_per_second": 69.33129961521128}, "tps": 69.33129961521128}, {"id": "3dea6038-0a29-4165-b4df-1b4f656654c6", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 150, "prompt_ms": 98.358, "prompt_per_token_ms": 0.6557200000000001, "prompt_per_second": 1525.0411761117548, "predicted_n": 3, "predicted_ms": 58.678, "predicted_per_token_ms": 19.55933333333333, "predicted_per_second": 51.126486928661514}, "tps": 51.126486928661514}, {"id": "a8f5b7c5-4d12-47f7-aee9-260b13b0c3a0", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["Vulnerability", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 98.186, "prompt_per_token_ms": 0.663418918918919, "prompt_per_second": 1507.3432057523473, "predicted_n": 2, "predicted_ms": 28.932, "predicted_per_token_ms": 14.466, "predicted_per_second": 69.12760956726116}, "tps": 69.12760956726116}, {"id": "d76a3bf8-6514-4a18-9053-357d77355f0b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 96.287, "prompt_per_token_ms": 0.7702960000000001, "prompt_per_second": 1298.202249524858, "predicted_n": 2, "predicted_ms": 30.047, "predicted_per_token_ms": 15.0235, "predicted_per_second": 66.56238559589976}, "tps": 66.56238559589976}, {"id": "a4e65bb0-cb2a-4f89-995b-0df7c44f2e4e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 98.179, "prompt_per_token_ms": 0.7219044117647059, "prompt_per_second": 1385.2249462716059, "predicted_n": 2, "predicted_ms": 30.116, "predicted_per_token_ms": 15.058, "predicted_per_second": 66.40988179041041}, "tps": 66.40988179041041}, {"id": "c5128821-955f-47f5-901a-92a5c3b21735", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 97.595, "prompt_per_token_ms": 0.60996875, "prompt_per_second": 1639.4282493980224, "predicted_n": 2, "predicted_ms": 32.194, "predicted_per_token_ms": 16.097, "predicted_per_second": 62.123377026775174}, "tps": 62.123377026775174}, {"id": "81d7e018-20b5-4db9-8f24-38b19da5689b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 98.394, "prompt_per_token_ms": 0.7935000000000001, "prompt_per_second": 1260.2394454946439, "predicted_n": 2, "predicted_ms": 29.572, "predicted_per_token_ms": 14.786, "predicted_per_second": 67.63154335181929}, "tps": 67.63154335181929}, {"id": "5b5e13fc-6e2a-4509-afdf-0ac32489d473", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 88, "prompt_ms": 94.169, "prompt_per_token_ms": 1.0701022727272727, "prompt_per_second": 934.4901188289141, "predicted_n": 2, "predicted_ms": 29.125, "predicted_per_token_ms": 14.5625, "predicted_per_second": 68.6695278969957}, "tps": 68.6695278969957}, {"id": "456d5310-1731-433f-aca8-05325066da7b", "answer": "AB", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 182, "prompt_ms": 87.941, "prompt_per_token_ms": 0.4831923076923077, "prompt_per_second": 2069.5693703733186, "predicted_n": 3, "predicted_ms": 57.426, "predicted_per_token_ms": 19.142, "predicted_per_second": 52.241145125901156}, "tps": 52.241145125901156}, {"id": "2bc00990-dff8-407d-8076-0aad0c9496cd", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 84.095, "prompt_per_token_ms": 0.6093840579710145, "prompt_per_second": 1641.0012485879065, "predicted_n": 2, "predicted_ms": 25.7, "predicted_per_token_ms": 12.85, "predicted_per_second": 77.82101167315176}, "tps": 77.82101167315176}, {"id": "c58c6b24-c7ee-431f-aab7-c4795d351f48", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 121, "prompt_ms": 84.577, "prompt_per_token_ms": 0.6989834710743802, "prompt_per_second": 1430.6489944074629, "predicted_n": 2, "predicted_ms": 28.245, "predicted_per_token_ms": 14.1225, "predicted_per_second": 70.80899274207825}, "tps": 70.80899274207825}, {"id": "856d7f64-a060-4c45-8a54-cf293420d3a4", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 98, "prompt_ms": 79.737, "prompt_per_token_ms": 0.8136428571428571, "prompt_per_second": 1229.040470546923, "predicted_n": 2, "predicted_ms": 27.343, "predicted_per_token_ms": 13.6715, "predicted_per_second": 73.1448634019676}, "tps": 73.1448634019676}, {"id": "8e55d32e-8700-48b7-8733-de5f8c0a2127", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 132, "prompt_ms": 88.23, "prompt_per_token_ms": 0.668409090909091, "prompt_per_second": 1496.089765385923, "predicted_n": 2, "predicted_ms": 26.69, "predicted_per_token_ms": 13.345, "predicted_per_second": 74.93443237167479}, "tps": 74.93443237167479}, {"id": "9fd171b7-4679-427d-9b7f-0f94a50e436a", "answer": "BC", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 81.246, "prompt_per_token_ms": 0.6018222222222221, "prompt_per_second": 1661.6202643822467, "predicted_n": 2, "predicted_ms": 27.285, "predicted_per_token_ms": 13.6425, "predicted_per_second": 73.30034817665384}, "tps": 73.30034817665384}, {"id": "59674197-319f-4da6-b4b3-3ec74faadf51", "answer": "B", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 89.579, "prompt_per_token_ms": 0.7342540983606557, "prompt_per_second": 1361.9263443440987, "predicted_n": 2, "predicted_ms": 27.398, "predicted_per_token_ms": 13.699, "predicted_per_second": 72.99802905321556}, "tps": 72.99802905321556}, {"id": "41b922f3-d410-46af-ae65-980401ea54ad", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 114, "prompt_ms": 78.343, "prompt_per_token_ms": 0.6872192982456141, "prompt_per_second": 1455.1395785200975, "predicted_n": 2, "predicted_ms": 27.152, "predicted_per_token_ms": 13.576, "predicted_per_second": 73.65939893930465}, "tps": 73.65939893930465}, {"id": "7e113b10-c718-48ae-b7d3-7989d35b6ecb", "answer": "CD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 148, "prompt_ms": 82.772, "prompt_per_token_ms": 0.5592702702702703, "prompt_per_second": 1788.044266176968, "predicted_n": 2, "predicted_ms": 26.914, "predicted_per_token_ms": 13.457, "predicted_per_second": 74.31076763022962}, "tps": 74.31076763022962}, {"id": "53f98bbc-3e4d-497b-8013-9852d5c7184c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 85.125, "prompt_per_token_ms": 0.5830479452054794, "prompt_per_second": 1715.1248164464025, "predicted_n": 2, "predicted_ms": 28.398, "predicted_per_token_ms": 14.199, "predicted_per_second": 70.42749489400661}, "tps": 70.42749489400661}, {"id": "445f2a45-ce45-4c8e-9a97-0abc80aa58dd", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 69, "prompt_ms": 81.518, "prompt_per_token_ms": 1.1814202898550725, "prompt_per_second": 846.4388233273633, "predicted_n": 2, "predicted_ms": 26.674, "predicted_per_token_ms": 13.337, "predicted_per_second": 74.97938067031566}, "tps": 74.97938067031566}, {"id": "7e6ea1c6-27d1-4127-9c69-4082d588a027", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 195, "prompt_ms": 84.205, "prompt_per_token_ms": 0.4318205128205128, "prompt_per_second": 2315.776972863844, "predicted_n": 2, "predicted_ms": 29.042, "predicted_per_token_ms": 14.521, "predicted_per_second": 68.86578059362303}, "tps": 68.86578059362303}, {"id": "5a255f3d-e2bc-4342-ac17-3bc92d5482ca", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 84, "prompt_ms": 92.454, "prompt_per_token_ms": 1.100642857142857, "prompt_per_second": 908.5599325069766, "predicted_n": 2, "predicted_ms": 28.619, "predicted_per_token_ms": 14.3095, "predicted_per_second": 69.88364373318424}, "tps": 69.88364373318424}, {"id": "11cac7aa-d151-4970-8eb2-b414b4fc99ce", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 128, "prompt_ms": 95.447, "prompt_per_token_ms": 0.7456796875, "prompt_per_second": 1341.0583884249897, "predicted_n": 2, "predicted_ms": 28.78, "predicted_per_token_ms": 14.39, "predicted_per_second": 69.49270326615705}, "tps": 69.49270326615705}, {"id": "8dc29902-15d7-4cff-8857-9cebf7686f2a", "answer": "BCD", "llm_answer": "B", "score": 0, "topics": ["Cryptography"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 101.356, "prompt_per_token_ms": 0.7398248175182481, "prompt_per_second": 1351.6713366746912, "predicted_n": 2, "predicted_ms": 30.111, "predicted_per_token_ms": 15.0555, "predicted_per_second": 66.42090930224835}, "tps": 66.42090930224835}, {"id": "fe0e29c1-29d9-4fd6-8106-3d26175161b9", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 92, "prompt_ms": 93.987, "prompt_per_token_ms": 1.0215978260869565, "prompt_per_second": 978.8587783416856, "predicted_n": 2, "predicted_ms": 30.824, "predicted_per_token_ms": 15.412, "predicted_per_second": 64.88450558006748}, "tps": 64.88450558006748}, {"id": "e11a070a-8585-4c6a-90b5-e1f70d654eb1", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 125, "prompt_ms": 95.654, "prompt_per_token_ms": 0.765232, "prompt_per_second": 1306.793233947352, "predicted_n": 2, "predicted_ms": 29.794, "predicted_per_token_ms": 14.897, "predicted_per_second": 67.12760958582265}, "tps": 67.12760958582265}, {"id": "588ec3ad-4efc-4c05-9cd0-0e51c024cda8", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 92, "prompt_ms": 93.425, "prompt_per_token_ms": 1.0154891304347826, "prompt_per_second": 984.7471233609848, "predicted_n": 2, "predicted_ms": 28.904, "predicted_per_token_ms": 14.452, "predicted_per_second": 69.1945751453086}, "tps": 69.1945751453086}, {"id": "a6d7c4f6-7c95-487b-9052-2c89b0165f4b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 97.558, "prompt_per_token_ms": 0.7447175572519085, "prompt_per_second": 1342.7909551241312, "predicted_n": 2, "predicted_ms": 29.629, "predicted_per_token_ms": 14.8145, "predicted_per_second": 67.50143440548112}, "tps": 67.50143440548112}, {"id": "68ab5a5b-d924-4529-866d-455ef3d85b9f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 98.123, "prompt_per_token_ms": 0.6861748251748252, "prompt_per_second": 1457.3545448060088, "predicted_n": 2, "predicted_ms": 30.232, "predicted_per_token_ms": 15.116, "predicted_per_second": 66.15506747816883}, "tps": 66.15506747816883}, {"id": "2a37827c-f773-4a42-86bc-715d4fa73b44", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 88, "prompt_ms": 92.426, "prompt_per_token_ms": 1.0502954545454546, "prompt_per_second": 952.1130417847792, "predicted_n": 2, "predicted_ms": 29.367, "predicted_per_token_ms": 14.6835, "predicted_per_second": 68.10365376102428}, "tps": 68.10365376102428}, {"id": "396e56d2-f127-4d98-8e59-774ca9a22837", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 161, "prompt_ms": 84.29, "prompt_per_token_ms": 0.5235403726708074, "prompt_per_second": 1910.0723692015658, "predicted_n": 2, "predicted_ms": 27.441, "predicted_per_token_ms": 13.7205, "predicted_per_second": 72.88364126671769}, "tps": 72.88364126671769}, {"id": "ecbf92bc-b860-4df5-8b84-3d294c058060", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 96.622, "prompt_per_token_ms": 0.7319848484848485, "prompt_per_second": 1366.148496201693, "predicted_n": 2, "predicted_ms": 29.775, "predicted_per_token_ms": 14.8875, "predicted_per_second": 67.17044500419816}, "tps": 67.17044500419816}, {"id": "4de8282e-cdcd-4bd4-bc1b-e6c1ac4a47ec", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 120, "prompt_ms": 95.685, "prompt_per_token_ms": 0.7973750000000001, "prompt_per_second": 1254.115065057219, "predicted_n": 2, "predicted_ms": 30.147, "predicted_per_token_ms": 15.0735, "predicted_per_second": 66.34159286164461}, "tps": 66.34159286164461}, {"id": "d0ba335d-82c1-4374-a2da-394b0175dda6", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 164, "prompt_ms": 99.409, "prompt_per_token_ms": 0.6061524390243903, "prompt_per_second": 1649.7500226337654, "predicted_n": 2, "predicted_ms": 29.673, "predicted_per_token_ms": 14.8365, "predicted_per_second": 67.4013412866916}, "tps": 67.4013412866916}, {"id": "f20b3e98-8e80-404e-b46a-88d1050f9640", "answer": "AB", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 134, "prompt_ms": 97.593, "prompt_per_token_ms": 0.7283059701492538, "prompt_per_second": 1373.0492965684016, "predicted_n": 3, "predicted_ms": 57.359, "predicted_per_token_ms": 19.119666666666667, "predicted_per_second": 52.302167053121565}, "tps": 52.302167053121565}, {"id": "61a2ec5e-b3a4-411a-9b26-e08b578efb31", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 125, "prompt_ms": 94.546, "prompt_per_token_ms": 0.756368, "prompt_per_second": 1322.107757070632, "predicted_n": 3, "predicted_ms": 58.909, "predicted_per_token_ms": 19.636333333333333, "predicted_per_second": 50.92600451543907}, "tps": 50.92600451543907}, {"id": "a74cc59b-8e2c-4eb1-9f29-45cfbb3d3bfb", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 95.943, "prompt_per_token_ms": 0.7554566929133858, "prompt_per_second": 1323.7026150943789, "predicted_n": 2, "predicted_ms": 30.136, "predicted_per_token_ms": 15.068, "predicted_per_second": 66.36580833554552}, "tps": 66.36580833554552}, {"id": "6a462b7e-771a-46ff-aa33-83a3743429c5", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 194, "prompt_ms": 104.238, "prompt_per_token_ms": 0.5373092783505155, "prompt_per_second": 1861.1255012567394, "predicted_n": 2, "predicted_ms": 32.632, "predicted_per_token_ms": 16.316, "predicted_per_second": 61.28953174797745}, "tps": 61.28953174797745}, {"id": "53353b89-1c00-4dcf-9a43-0e55308238d7", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 102.886, "prompt_per_token_ms": 0.7455507246376811, "prompt_per_second": 1341.2903602044983, "predicted_n": 2, "predicted_ms": 31.299, "predicted_per_token_ms": 15.6495, "predicted_per_second": 63.89980510559443}, "tps": 63.89980510559443}, {"id": "b8b3c31e-0618-4697-bc56-7e68eda381ce", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 154, "prompt_ms": 91.434, "prompt_per_token_ms": 0.5937272727272727, "prompt_per_second": 1684.2749961721024, "predicted_n": 2, "predicted_ms": 28.412, "predicted_per_token_ms": 14.206, "predicted_per_second": 70.39279177812192}, "tps": 70.39279177812192}, {"id": "6ae2a0a6-d13c-42a8-b0b3-a167974f3fb2", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 183, "prompt_ms": 93.884, "prompt_per_token_ms": 0.5130273224043715, "prompt_per_second": 1949.213923565251, "predicted_n": 2, "predicted_ms": 29.195, "predicted_per_token_ms": 14.5975, "predicted_per_second": 68.50488097276931}, "tps": 68.50488097276931}, {"id": "97dda1c6-bf6d-4f9f-abae-d11b8e37adf6", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 140, "prompt_ms": 97.146, "prompt_per_token_ms": 0.6939, "prompt_per_second": 1441.1298457991065, "predicted_n": 2, "predicted_ms": 31.038, "predicted_per_token_ms": 15.519, "predicted_per_second": 64.43714156840002}, "tps": 64.43714156840002}, {"id": "ef301c49-97ec-4816-b30f-464a3e87359a", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 151, "prompt_ms": 97.23, "prompt_per_token_ms": 0.6439072847682119, "prompt_per_second": 1553.0186156536047, "predicted_n": 2, "predicted_ms": 29.404, "predicted_per_token_ms": 14.702, "predicted_per_second": 68.0179567405795}, "tps": 68.0179567405795}, {"id": "34bae2ef-4df7-4878-9606-8a3312d97177", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 79, "prompt_ms": 91.758, "prompt_per_token_ms": 1.161493670886076, "prompt_per_second": 860.9603522308682, "predicted_n": 4, "predicted_ms": 89.685, "predicted_per_token_ms": 22.42125, "predicted_per_second": 44.60054635669287}, "tps": 44.60054635669287}, {"id": "5e207899-3d03-432a-9822-2904db3324d0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 232, "prompt_ms": 104.682, "prompt_per_token_ms": 0.4512155172413793, "prompt_per_second": 2216.23583806194, "predicted_n": 2, "predicted_ms": 35.654, "predicted_per_token_ms": 17.827, "predicted_per_second": 56.0946878330622}, "tps": 56.0946878330622}, {"id": "9eb647d4-247e-435b-8f39-41fe30d78d92", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 211, "prompt_ms": 99.168, "prompt_per_token_ms": 0.46999052132701424, "prompt_per_second": 2127.702484672475, "predicted_n": 2, "predicted_ms": 33.069, "predicted_per_token_ms": 16.5345, "predicted_per_second": 60.47960325380265}, "tps": 60.47960325380265}, {"id": "bf9a616f-836e-4a1c-a827-cd7f81d3f274", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 132, "prompt_ms": 100.605, "prompt_per_token_ms": 0.762159090909091, "prompt_per_second": 1312.062024750261, "predicted_n": 3, "predicted_ms": 58.849, "predicted_per_token_ms": 19.616333333333333, "predicted_per_second": 50.97792655780047}, "tps": 50.97792655780047}, {"id": "85190efa-bf04-4df4-b20a-0e1ee9929c4d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 98.058, "prompt_per_token_ms": 0.6954468085106383, "prompt_per_second": 1437.9244936670134, "predicted_n": 2, "predicted_ms": 29.137, "predicted_per_token_ms": 14.5685, "predicted_per_second": 68.64124652503689}, "tps": 68.64124652503689}, {"id": "d98de15a-d1d8-4637-8a6d-061fe5c52e5b", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 79, "prompt_ms": 90.364, "prompt_per_token_ms": 1.1438481012658228, "prompt_per_second": 874.2419547607453, "predicted_n": 2, "predicted_ms": 25.876, "predicted_per_token_ms": 12.938, "predicted_per_second": 77.29169887154119}, "tps": 77.29169887154119}, {"id": "c0d9c18d-b4cd-43fb-aba0-f974a6e6e0a3", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "MemorySafety"], "timings": {"cache_n": 58, "prompt_n": 117, "prompt_ms": 92.129, "prompt_per_token_ms": 0.7874273504273505, "prompt_per_second": 1269.9584278565922, "predicted_n": 3, "predicted_ms": 58.381, "predicted_per_token_ms": 19.460333333333335, "predicted_per_second": 51.38658125074939}, "tps": 51.38658125074939}, {"id": "0b3c42f1-9658-4e9d-b060-9dd64929deb3", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["PenTest", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 88, "prompt_ms": 93.19, "prompt_per_token_ms": 1.0589772727272726, "prompt_per_second": 944.3073291125658, "predicted_n": 3, "predicted_ms": 59.374, "predicted_per_token_ms": 19.791333333333334, "predicted_per_second": 50.527166773335125}, "tps": 50.527166773335125}, {"id": "bbe80472-9565-4a25-9717-49aaf950e469", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 140, "prompt_ms": 99.449, "prompt_per_token_ms": 0.71035, "prompt_per_second": 1407.756739635391, "predicted_n": 2, "predicted_ms": 31.688, "predicted_per_token_ms": 15.844, "predicted_per_second": 63.11537490532694}, "tps": 63.11537490532694}, {"id": "ae760551-460b-48e8-99f9-a230fd91c97d", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 99.768, "prompt_per_token_ms": 0.7733953488372093, "prompt_per_second": 1292.9997594419053, "predicted_n": 2, "predicted_ms": 29.684, "predicted_per_token_ms": 14.842, "predicted_per_second": 67.37636437137851}, "tps": 67.37636437137851}, {"id": "69244e51-b228-44a0-a38d-80574749a55f", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 96.487, "prompt_per_token_ms": 0.8390173913043478, "prompt_per_second": 1191.8704074123975, "predicted_n": 3, "predicted_ms": 58.469, "predicted_per_token_ms": 19.48966666666667, "predicted_per_second": 51.30924079426705}, "tps": 51.30924079426705}, {"id": "60e863e5-047e-4699-80d0-82894ed6671d", "answer": "AC", "llm_answer": "BD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 100, "prompt_ms": 95.866, "prompt_per_token_ms": 0.95866, "prompt_per_second": 1043.1226920910437, "predicted_n": 2, "predicted_ms": 29.064, "predicted_per_token_ms": 14.532, "predicted_per_second": 68.81365262868152}, "tps": 68.81365262868152}, {"id": "045340b0-9126-4d69-a0b4-0e902a008fca", "answer": "BC", "llm_answer": "AC", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 220, "prompt_ms": 102.442, "prompt_per_token_ms": 0.46564545454545453, "prompt_per_second": 2147.556666211124, "predicted_n": 2, "predicted_ms": 29.646, "predicted_per_token_ms": 14.823, "predicted_per_second": 67.462726843419}, "tps": 67.462726843419}, {"id": "d65560dc-7de2-4c8c-8b65-f61192abf7f3", "answer": "ABD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 97.72, "prompt_per_token_ms": 0.7403030303030302, "prompt_per_second": 1350.798198935735, "predicted_n": 2, "predicted_ms": 30.658, "predicted_per_token_ms": 15.329, "predicted_per_second": 65.23582751647204}, "tps": 65.23582751647204}, {"id": "4feed59c-787b-4ed0-9a2c-12df574b42cf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 117, "prompt_ms": 97.125, "prompt_per_token_ms": 0.8301282051282052, "prompt_per_second": 1204.6332046332047, "predicted_n": 2, "predicted_ms": 26.942, "predicted_per_token_ms": 13.471, "predicted_per_second": 74.23353871279043}, "tps": 74.23353871279043}, {"id": "e564c8a7-9003-4376-8423-c4130ac21841", "answer": "AD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 182, "prompt_ms": 93.731, "prompt_per_token_ms": 0.5150054945054945, "prompt_per_second": 1941.7268566429464, "predicted_n": 3, "predicted_ms": 59.441, "predicted_per_token_ms": 19.813666666666666, "predicted_per_second": 50.470214161942096}, "tps": 50.470214161942096}, {"id": "2d36466f-9d0a-4fed-8e8e-b2d14db3b473", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 161, "prompt_ms": 99.626, "prompt_per_token_ms": 0.6187950310559006, "prompt_per_second": 1616.0440045771186, "predicted_n": 4, "predicted_ms": 87.908, "predicted_per_token_ms": 21.977, "predicted_per_second": 45.50211584838695}, "tps": 45.50211584838695}, {"id": "de287452-77c7-4e1e-89c8-738043577801", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 142, "prompt_ms": 104.269, "prompt_per_token_ms": 0.7342887323943662, "prompt_per_second": 1361.862106666411, "predicted_n": 2, "predicted_ms": 32.904, "predicted_per_token_ms": 16.452, "predicted_per_second": 60.78288353999513}, "tps": 60.78288353999513}, {"id": "3f435b2b-e7b8-48f8-a456-e4b4b5edd28b", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 61, "prompt_ms": 95.456, "prompt_per_token_ms": 1.5648524590163935, "prompt_per_second": 639.0378813275227, "predicted_n": 2, "predicted_ms": 29.581, "predicted_per_token_ms": 14.7905, "predicted_per_second": 67.6109664987661}, "tps": 67.6109664987661}, {"id": "593d442c-418e-4bbc-b3ca-77c38b6198f9", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 179, "prompt_ms": 102.272, "prompt_per_token_ms": 0.5713519553072626, "prompt_per_second": 1750.2346683354194, "predicted_n": 2, "predicted_ms": 30.344, "predicted_per_token_ms": 15.172, "predicted_per_second": 65.9108884787767}, "tps": 65.9108884787767}, {"id": "11bf0538-fc05-4a95-be4e-53b72fcb78c6", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 207, "prompt_ms": 101.999, "prompt_per_token_ms": 0.49274879227053137, "prompt_per_second": 2029.4316610947167, "predicted_n": 2, "predicted_ms": 29.5, "predicted_per_token_ms": 14.75, "predicted_per_second": 67.79661016949153}, "tps": 67.79661016949153}, {"id": "9748edda-3056-40f2-a5e4-bc9df7ea0909", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 141, "prompt_ms": 96.745, "prompt_per_token_ms": 0.6861347517730497, "prompt_per_second": 1457.439660964391, "predicted_n": 2, "predicted_ms": 29.633, "predicted_per_token_ms": 14.8165, "predicted_per_second": 67.49232274828738}, "tps": 67.49232274828738}, {"id": "b3d0e8db-b36f-43f4-bc0f-cdb1b97b5588", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 102.505, "prompt_per_token_ms": 0.7118402777777777, "prompt_per_second": 1404.809521486757, "predicted_n": 4, "predicted_ms": 91.761, "predicted_per_token_ms": 22.94025, "predicted_per_second": 43.59150401586731}, "tps": 43.59150401586731}, {"id": "30d3fe13-548c-4432-baa5-574580a57afe", "answer": "AB", "llm_answer": "AC", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 182, "prompt_ms": 88.286, "prompt_per_token_ms": 0.4850879120879121, "prompt_per_second": 2061.4820016763697, "predicted_n": 2, "predicted_ms": 29.097, "predicted_per_token_ms": 14.5485, "predicted_per_second": 68.73560848197408}, "tps": 68.73560848197408}, {"id": "3ac37ab1-c53a-459c-ae80-a685fbd4fd84", "answer": "ACD", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 94, "prompt_ms": 91.712, "prompt_per_token_ms": 0.9756595744680852, "prompt_per_second": 1024.9476622470343, "predicted_n": 3, "predicted_ms": 57.527, "predicted_per_token_ms": 19.175666666666668, "predicted_per_second": 52.149425487162546}, "tps": 52.149425487162546}, {"id": "7eea03ea-9ef9-45c1-991a-a2a0c1ecf698", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 149, "prompt_ms": 98.047, "prompt_per_token_ms": 0.6580335570469799, "prompt_per_second": 1519.6793374606057, "predicted_n": 2, "predicted_ms": 28.868, "predicted_per_token_ms": 14.434, "predicted_per_second": 69.28086462519053}, "tps": 69.28086462519053}, {"id": "d14b8a9b-0086-47f2-8b6d-104be4ea9f8b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 121, "prompt_ms": 96.136, "prompt_per_token_ms": 0.7945123966942148, "prompt_per_second": 1258.6336023966048, "predicted_n": 2, "predicted_ms": 28.93, "predicted_per_token_ms": 14.465, "predicted_per_second": 69.13238852402351}, "tps": 69.13238852402351}, {"id": "e7a1e2e3-fc3e-4b9c-aefb-50dba93f4af5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 200, "prompt_ms": 102.314, "prompt_per_token_ms": 0.51157, "prompt_per_second": 1954.7666985945227, "predicted_n": 2, "predicted_ms": 31.296, "predicted_per_token_ms": 15.648, "predicted_per_second": 63.90593047034765}, "tps": 63.90593047034765}, {"id": "a1a3d520-8217-43af-adf6-a67c97ff04c2", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 63, "prompt_n": 136, "prompt_ms": 97.407, "prompt_per_token_ms": 0.7162279411764706, "prompt_per_second": 1396.2035582658332, "predicted_n": 2, "predicted_ms": 29.999, "predicted_per_token_ms": 14.9995, "predicted_per_second": 66.66888896296544}, "tps": 66.66888896296544}, {"id": "9781f102-ee11-40ee-9b3f-3428228a5f39", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 102, "prompt_ms": 94.752, "prompt_per_token_ms": 0.9289411764705882, "prompt_per_second": 1076.4944275582573, "predicted_n": 2, "predicted_ms": 31.343, "predicted_per_token_ms": 15.6715, "predicted_per_second": 63.81010113901031}, "tps": 63.81010113901031}, {"id": "96a723be-c611-4964-831d-16fad95d8d13", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 101.055, "prompt_per_token_ms": 0.8215853658536586, "prompt_per_second": 1217.1589728365739, "predicted_n": 2, "predicted_ms": 31.638, "predicted_per_token_ms": 15.819, "predicted_per_second": 63.21512105695682}, "tps": 63.21512105695682}, {"id": "96c3a6ad-dd1f-4d27-80ac-f20068c875c7", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 160, "prompt_ms": 105.211, "prompt_per_token_ms": 0.65756875, "prompt_per_second": 1520.7535333757878, "predicted_n": 2, "predicted_ms": 31.129, "predicted_per_token_ms": 15.5645, "predicted_per_second": 64.24877124224999}, "tps": 64.24877124224999}, {"id": "d72fcae9-edf0-4833-8ca0-9dc9d7408a96", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 140, "prompt_ms": 103.294, "prompt_per_token_ms": 0.7378142857142856, "prompt_per_second": 1355.354618854919, "predicted_n": 2, "predicted_ms": 27.963, "predicted_per_token_ms": 13.9815, "predicted_per_second": 71.52308407538533}, "tps": 71.52308407538533}, {"id": "067c49dc-94bf-4c8c-9b4d-5e42cac621b0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 93.08, "prompt_per_token_ms": 0.8024137931034483, "prompt_per_second": 1246.2397937258274, "predicted_n": 2, "predicted_ms": 30.885, "predicted_per_token_ms": 15.4425, "predicted_per_second": 64.75635421725757}, "tps": 64.75635421725757}, {"id": "e8e9aaac-c612-461f-a253-053144455c95", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 102, "prompt_ms": 95.19, "prompt_per_token_ms": 0.933235294117647, "prompt_per_second": 1071.5411282697762, "predicted_n": 2, "predicted_ms": 30.816, "predicted_per_token_ms": 15.408, "predicted_per_second": 64.90134994807892}, "tps": 64.90134994807892}, {"id": "1ed36536-5865-4414-8a42-69d6a22ba7de", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 96, "prompt_ms": 92.986, "prompt_per_token_ms": 0.9686041666666667, "prompt_per_second": 1032.4134815993805, "predicted_n": 2, "predicted_ms": 29.715, "predicted_per_token_ms": 14.8575, "predicted_per_second": 67.30607437321218}, "tps": 67.30607437321218}, {"id": "4d42f1c3-b756-49b0-9ce3-d84fe5c613c2", "answer": "AD", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 85, "prompt_ms": 95.765, "prompt_per_token_ms": 1.1266470588235293, "prompt_per_second": 887.5894115804313, "predicted_n": 2, "predicted_ms": 28.619, "predicted_per_token_ms": 14.3095, "predicted_per_second": 69.88364373318424}, "tps": 69.88364373318424}, {"id": "e82f3efd-9d1b-4ba6-8b38-fb479c19ef3e", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 96.961, "prompt_per_token_ms": 0.8505350877192982, "prompt_per_second": 1175.7304483245841, "predicted_n": 2, "predicted_ms": 28.949, "predicted_per_token_ms": 14.4745, "predicted_per_second": 69.0870150955128}, "tps": 69.0870150955128}, {"id": "060a9000-19b0-4b20-b4a5-b8b092339b09", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 98.052, "prompt_per_token_ms": 0.760093023255814, "prompt_per_second": 1315.6284420511565, "predicted_n": 2, "predicted_ms": 30.004, "predicted_per_token_ms": 15.002, "predicted_per_second": 66.65777896280495}, "tps": 66.65777896280495}, {"id": "4a27ae02-21a0-4fc2-83e6-8fc85ad9f567", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 157, "prompt_ms": 96.803, "prompt_per_token_ms": 0.6165796178343949, "prompt_per_second": 1621.8505624825677, "predicted_n": 2, "predicted_ms": 30.487, "predicted_per_token_ms": 15.2435, "predicted_per_second": 65.60173188572179}, "tps": 65.60173188572179}, {"id": "480ab3ac-ad12-47bc-bbdf-cb6a125204cd", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 225, "prompt_ms": 104.126, "prompt_per_token_ms": 0.46278222222222226, "prompt_per_second": 2160.8435933388396, "predicted_n": 2, "predicted_ms": 29.847, "predicted_per_token_ms": 14.9235, "predicted_per_second": 67.0084095553992}, "tps": 67.0084095553992}, {"id": "5241e793-69ec-40aa-9dfe-b36886fbe247", "answer": "CD", "llm_answer": "AD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 101, "prompt_ms": 94.522, "prompt_per_token_ms": 0.9358613861386139, "prompt_per_second": 1068.534309472927, "predicted_n": 2, "predicted_ms": 29.851, "predicted_per_token_ms": 14.9255, "predicted_per_second": 66.99943050484072}, "tps": 66.99943050484072}, {"id": "0706a389-ac25-4cf2-9445-f970851e36d2", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 205, "prompt_ms": 101.878, "prompt_per_token_ms": 0.49696585365853657, "prompt_per_second": 2012.2106833663795, "predicted_n": 2, "predicted_ms": 28.429, "predicted_per_token_ms": 14.2145, "predicted_per_second": 70.35069823067994}, "tps": 70.35069823067994}, {"id": "286852f8-7a49-4525-988b-8f3ff35dec37", "answer": "AD", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 121, "prompt_ms": 87.479, "prompt_per_token_ms": 0.7229669421487603, "prompt_per_second": 1383.1891082431212, "predicted_n": 3, "predicted_ms": 62.514, "predicted_per_token_ms": 20.838, "predicted_per_second": 47.98925040790863}, "tps": 47.98925040790863}, {"id": "996c2ee3-5c38-47c8-aeb1-2d88c5a35884", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 95.309, "prompt_per_token_ms": 0.8434424778761062, "prompt_per_second": 1185.6173079142577, "predicted_n": 2, "predicted_ms": 29.689, "predicted_per_token_ms": 14.8445, "predicted_per_second": 67.36501734649197}, "tps": 67.36501734649197}, {"id": "3e053bda-2526-404d-8ca5-a0865fb4d085", "answer": "B", "llm_answer": "AB", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 106, "prompt_ms": 95.05, "prompt_per_token_ms": 0.8966981132075471, "prompt_per_second": 1115.202524986849, "predicted_n": 2, "predicted_ms": 30.174, "predicted_per_token_ms": 15.087, "predicted_per_second": 66.28222973420826}, "tps": 66.28222973420826}, {"id": "4e8194d1-cfd0-406e-8f86-c5dff33cfe55", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["PenTest", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 117, "prompt_ms": 95.984, "prompt_per_token_ms": 0.8203760683760684, "prompt_per_second": 1218.95315885981, "predicted_n": 39, "predicted_ms": 1112.651, "predicted_per_token_ms": 28.52951282051282, "predicted_per_second": 35.051422233926}, "tps": 35.051422233926}, {"id": "2da0f6cc-903a-4a12-bbef-ff1c8fb4cc0c", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["NetworkSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 116, "prompt_ms": 96.414, "prompt_per_token_ms": 0.8311551724137931, "prompt_per_second": 1203.1447715062127, "predicted_n": 3, "predicted_ms": 58.439, "predicted_per_token_ms": 19.479666666666667, "predicted_per_second": 51.33558069097692}, "tps": 51.33558069097692}, {"id": "c0234be8-e9ae-410c-a59a-6085436e74c9", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 158, "prompt_ms": 97.916, "prompt_per_token_ms": 0.6197215189873417, "prompt_per_second": 1613.6280076800524, "predicted_n": 8, "predicted_ms": 206.505, "predicted_per_token_ms": 25.813125, "predicted_per_second": 38.73998208275829}, "tps": 38.73998208275829}, {"id": "0a548965-03b2-4f2b-9a05-ee7b106268a9", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 156, "prompt_ms": 87.611, "prompt_per_token_ms": 0.5616089743589744, "prompt_per_second": 1780.5983266941366, "predicted_n": 2, "predicted_ms": 26.974, "predicted_per_token_ms": 13.487, "predicted_per_second": 74.14547341884779}, "tps": 74.14547341884779}, {"id": "7a05e606-5ee6-41f4-aace-8464fdfc9f3c", "answer": "AD", "llm_answer": "AB", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 121, "prompt_ms": 92.549, "prompt_per_token_ms": 0.7648677685950414, "prompt_per_second": 1307.415531232104, "predicted_n": 3, "predicted_ms": 60.836, "predicted_per_token_ms": 20.278666666666666, "predicted_per_second": 49.31290683148136}, "tps": 49.31290683148136}, {"id": "a4ee6e19-9d98-48e4-a8dc-89dd4266da8d", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 97.28, "prompt_per_token_ms": 0.7483076923076923, "prompt_per_second": 1336.3486842105262, "predicted_n": 2, "predicted_ms": 31.216, "predicted_per_token_ms": 15.608, "predicted_per_second": 64.06970784213223}, "tps": 64.06970784213223}, {"id": "3dad8437-fb58-4b65-9057-21d905c1898e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 103.186, "prompt_per_token_ms": 0.7067534246575343, "prompt_per_second": 1414.9206287674683, "predicted_n": 2, "predicted_ms": 30.932, "predicted_per_token_ms": 15.466, "predicted_per_second": 64.65795939480151}, "tps": 64.65795939480151}, {"id": "dfc7546b-9114-4a92-9b83-128b238161b8", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 85, "prompt_ms": 96.144, "prompt_per_token_ms": 1.1311058823529412, "prompt_per_second": 884.090530870361, "predicted_n": 2, "predicted_ms": 29.702, "predicted_per_token_ms": 14.851, "predicted_per_second": 67.33553296074338}, "tps": 67.33553296074338}, {"id": "36400ca6-65eb-4dc6-acc4-3c44864261de", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 140, "prompt_ms": 98.456, "prompt_per_token_ms": 0.7032571428571429, "prompt_per_second": 1421.9549849679042, "predicted_n": 3, "predicted_ms": 59.34, "predicted_per_token_ms": 19.78, "predicted_per_second": 50.556117290192105}, "tps": 50.556117290192105}, {"id": "021e7297-a382-4359-888e-e3d23dd010aa", "answer": "", "llm_answer": "ABCD", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 106, "prompt_ms": 94.874, "prompt_per_token_ms": 0.8950377358490565, "prompt_per_second": 1117.2713282880452, "predicted_n": 8, "predicted_ms": 206.47, "predicted_per_token_ms": 25.80875, "predicted_per_second": 38.746549135467625}, "tps": 38.746549135467625}, {"id": "41aadf67-8972-4c80-9f45-7dca303143c2", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 107, "prompt_ms": 94.179, "prompt_per_token_ms": 0.880177570093458, "prompt_per_second": 1136.1343823994732, "predicted_n": 2, "predicted_ms": 29.825, "predicted_per_token_ms": 14.9125, "predicted_per_second": 67.05783738474435}, "tps": 67.05783738474435}, {"id": "f0cf2363-137c-46cf-9736-6bca83f5dccd", "answer": "ABC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 188, "prompt_ms": 97.284, "prompt_per_token_ms": 0.517468085106383, "prompt_per_second": 1932.4863286871425, "predicted_n": 2, "predicted_ms": 26.234, "predicted_per_token_ms": 13.117, "predicted_per_second": 76.23694442326752}, "tps": 76.23694442326752}, {"id": "41a66ec2-7e5f-4ca6-be52-c814b8579ac2", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 126, "prompt_ms": 92.463, "prompt_per_token_ms": 0.7338333333333333, "prompt_per_second": 1362.7072450601863, "predicted_n": 2, "predicted_ms": 28.581, "predicted_per_token_ms": 14.2905, "predicted_per_second": 69.9765578531192}, "tps": 69.9765578531192}, {"id": "67d28089-913e-4c69-beb1-59e52c0728d5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 95.729, "prompt_per_token_ms": 0.9204711538461539, "prompt_per_second": 1086.4001504246362, "predicted_n": 2, "predicted_ms": 29.234, "predicted_per_token_ms": 14.617, "predicted_per_second": 68.41349114045289}, "tps": 68.41349114045289}, {"id": "8793c56e-832c-4fa0-822a-5712af3012e4", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 95, "prompt_ms": 93.717, "prompt_per_token_ms": 0.9864947368421052, "prompt_per_second": 1013.6901522669312, "predicted_n": 2, "predicted_ms": 30.029, "predicted_per_token_ms": 15.0145, "predicted_per_second": 66.60228445835692}, "tps": 66.60228445835692}, {"id": "4915c448-2e4c-4349-bda7-ccb95cb112c8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 164, "prompt_ms": 99.757, "prompt_per_token_ms": 0.6082743902439025, "prompt_per_second": 1643.99490762553, "predicted_n": 2, "predicted_ms": 33.961, "predicted_per_token_ms": 16.9805, "predicted_per_second": 58.89108094579076}, "tps": 58.89108094579076}, {"id": "8e79f6a2-5848-461e-a778-3453fb2a3385", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 143, "prompt_ms": 98.05, "prompt_per_token_ms": 0.6856643356643356, "prompt_per_second": 1458.4395716471188, "predicted_n": 2, "predicted_ms": 29.174, "predicted_per_token_ms": 14.587, "predicted_per_second": 68.55419208884624}, "tps": 68.55419208884624}, {"id": "517936e0-f232-4d26-8549-20c27d718b1c", "answer": "AA", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 120, "prompt_ms": 96.742, "prompt_per_token_ms": 0.8061833333333334, "prompt_per_second": 1240.4126439395504, "predicted_n": 2, "predicted_ms": 29.37, "predicted_per_token_ms": 14.685, "predicted_per_second": 68.09669731018046}, "tps": 68.09669731018046}, {"id": "a686c676-676f-4a8a-9907-3e3480ff45b8", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 179, "prompt_ms": 99.413, "prompt_per_token_ms": 0.5553798882681564, "prompt_per_second": 1800.5693420377615, "predicted_n": 3, "predicted_ms": 58.572, "predicted_per_token_ms": 19.524, "predicted_per_second": 51.21901249743904}, "tps": 51.21901249743904}, {"id": "79a7f66d-c870-4723-b7fe-6ebd10f2d10a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 119, "prompt_ms": 97.865, "prompt_per_token_ms": 0.8223949579831933, "prompt_per_second": 1215.960762274562, "predicted_n": 2, "predicted_ms": 29.987, "predicted_per_token_ms": 14.9935, "predicted_per_second": 66.69556807950111}, "tps": 66.69556807950111}, {"id": "7a588568-aaaf-44c8-8f8a-8f2e9e731811", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 157, "prompt_ms": 97.026, "prompt_per_token_ms": 0.618, "prompt_per_second": 1618.1229773462783, "predicted_n": 2, "predicted_ms": 29.907, "predicted_per_token_ms": 14.9535, "predicted_per_second": 66.87397599224262}, "tps": 66.87397599224262}, {"id": "df83f007-e2ef-43b6-b6be-c39c5dd79185", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 125, "prompt_ms": 88.821, "prompt_per_token_ms": 0.710568, "prompt_per_second": 1407.3248443498724, "predicted_n": 2, "predicted_ms": 28.134, "predicted_per_token_ms": 14.067, "predicted_per_second": 71.0883628350039}, "tps": 71.0883628350039}, {"id": "ee589d77-a1f7-41a8-a665-3764f228bbe6", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 97.26, "prompt_per_token_ms": 0.7099270072992702, "prompt_per_second": 1408.5955171704709, "predicted_n": 3, "predicted_ms": 56.215, "predicted_per_token_ms": 18.738333333333333, "predicted_per_second": 53.36653917993418}, "tps": 53.36653917993418}, {"id": "978ad75c-ebaa-4d34-aee5-b9d4ca44ac84", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 63, "prompt_n": 114, "prompt_ms": 97.163, "prompt_per_token_ms": 0.8523070175438596, "prompt_per_second": 1173.2861274353406, "predicted_n": 2, "predicted_ms": 29.19, "predicted_per_token_ms": 14.595, "predicted_per_second": 68.5166152792052}, "tps": 68.5166152792052}, {"id": "4ed72ff0-e601-46bb-a7f7-4cb0fdacba25", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 78, "prompt_ms": 92.856, "prompt_per_token_ms": 1.1904615384615385, "prompt_per_second": 840.0103385887827, "predicted_n": 2, "predicted_ms": 28.895, "predicted_per_token_ms": 14.4475, "predicted_per_second": 69.21612735767434}, "tps": 69.21612735767434}, {"id": "92c2eb08-4bbf-47e4-badd-68c259e303ed", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 97.263, "prompt_per_token_ms": 0.689808510638298, "prompt_per_second": 1449.6776780481784, "predicted_n": 2, "predicted_ms": 29.76, "predicted_per_token_ms": 14.88, "predicted_per_second": 67.20430107526882}, "tps": 67.20430107526882}, {"id": "49363144-ebc0-4ef4-9d69-0ab4b14f891d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 96.985, "prompt_per_token_ms": 0.7949590163934426, "prompt_per_second": 1257.9264834768264, "predicted_n": 2, "predicted_ms": 31.88, "predicted_per_token_ms": 15.94, "predicted_per_second": 62.73525721455458}, "tps": 62.73525721455458}, {"id": "d3a74146-a3d8-4e4b-97ff-161b0d0cdf86", "answer": "ABCD", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 106, "prompt_ms": 95.475, "prompt_per_token_ms": 0.9007075471698113, "prompt_per_second": 1110.2382822728464, "predicted_n": 6, "predicted_ms": 145.539, "predicted_per_token_ms": 24.2565, "predicted_per_second": 41.22606311710263}, "tps": 41.22606311710263}, {"id": "9a80386d-14b5-4583-b189-0d26445fed16", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 205, "prompt_ms": 103.541, "prompt_per_token_ms": 0.5050780487804878, "prompt_per_second": 1979.8920234496481, "predicted_n": 2, "predicted_ms": 31.77, "predicted_per_token_ms": 15.885, "predicted_per_second": 62.95247088448222}, "tps": 62.95247088448222}, {"id": "81a18f0e-6369-493c-8eff-c1a2e29ec4a5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 96.763, "prompt_per_token_ms": 0.6627602739726027, "prompt_per_second": 1508.8411892975619, "predicted_n": 2, "predicted_ms": 27.732, "predicted_per_token_ms": 13.866, "predicted_per_second": 72.11885186787826}, "tps": 72.11885186787826}, {"id": "b5c396a3-3e03-4fc5-ac39-2773817effc0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 120, "prompt_ms": 88.419, "prompt_per_token_ms": 0.736825, "prompt_per_second": 1357.1743629762834, "predicted_n": 2, "predicted_ms": 29.833, "predicted_per_token_ms": 14.9165, "predicted_per_second": 67.03985519391279}, "tps": 67.03985519391279}, {"id": "b5fa9f90-00a3-4e92-bc14-b65ae7d1c90f", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 65, "prompt_n": 82, "prompt_ms": 94.935, "prompt_per_token_ms": 1.1577439024390244, "prompt_per_second": 863.748880813188, "predicted_n": 2, "predicted_ms": 29.987, "predicted_per_token_ms": 14.9935, "predicted_per_second": 66.69556807950111}, "tps": 66.69556807950111}, {"id": "46430420-5811-4986-b9b3-10d174de9fae", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 104, "prompt_ms": 94.222, "prompt_per_token_ms": 0.9059807692307692, "prompt_per_second": 1103.7761881513873, "predicted_n": 2, "predicted_ms": 30.095, "predicted_per_token_ms": 15.0475, "predicted_per_second": 66.45622196378136}, "tps": 66.45622196378136}, {"id": "f8d517f5-8208-4ed2-b410-af9e9066c4be", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 91, "prompt_ms": 97.642, "prompt_per_token_ms": 1.072989010989011, "prompt_per_second": 931.9759939370352, "predicted_n": 2, "predicted_ms": 30.29, "predicted_per_token_ms": 15.145, "predicted_per_second": 66.02839220864972}, "tps": 66.02839220864972}, {"id": "b0f69a6f-5546-49a1-9861-639312eb67ff", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 110, "prompt_ms": 92.069, "prompt_per_token_ms": 0.8369909090909091, "prompt_per_second": 1194.7561068329187, "predicted_n": 2, "predicted_ms": 29.8, "predicted_per_token_ms": 14.9, "predicted_per_second": 67.11409395973155}, "tps": 67.11409395973155}, {"id": "4acf5933-4190-47ac-94f0-7a1d060e841e", "answer": "ABD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 151, "prompt_ms": 97.127, "prompt_per_token_ms": 0.6432251655629139, "prompt_per_second": 1554.6655409927212, "predicted_n": 2, "predicted_ms": 31.664, "predicted_per_token_ms": 15.832, "predicted_per_second": 63.16321374431531}, "tps": 63.16321374431531}, {"id": "43cc7654-8a82-448c-a9bc-c4d433e94fdf", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 115, "prompt_ms": 95.237, "prompt_per_token_ms": 0.8281478260869565, "prompt_per_second": 1207.5138864096937, "predicted_n": 64, "predicted_ms": 1830.567, "predicted_per_token_ms": 28.602609375, "predicted_per_second": 34.96184515508037}, "tps": 34.96184515508037}, {"id": "6781cb3d-b669-416c-9d18-0a3268a7b6ee", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 113, "prompt_ms": 93.258, "prompt_per_token_ms": 0.82529203539823, "prompt_per_second": 1211.6922944948424, "predicted_n": 2, "predicted_ms": 29.337, "predicted_per_token_ms": 14.6685, "predicted_per_second": 68.17329651975321}, "tps": 68.17329651975321}, {"id": "92440942-e4b4-492a-9af2-951c236c179c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 177, "prompt_ms": 99.556, "prompt_per_token_ms": 0.5624632768361582, "prompt_per_second": 1777.8938486881755, "predicted_n": 2, "predicted_ms": 28.328, "predicted_per_token_ms": 14.164, "predicted_per_second": 70.60152499293984}, "tps": 70.60152499293984}, {"id": "96be588b-fe3e-401b-91f6-cd59f039c72d", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 153, "prompt_ms": 98.015, "prompt_per_token_ms": 0.6406209150326797, "prompt_per_second": 1560.9855634341682, "predicted_n": 2, "predicted_ms": 28.912, "predicted_per_token_ms": 14.456, "predicted_per_second": 69.17542888765911}, "tps": 69.17542888765911}, {"id": "e6d248a2-00f2-4cae-8a2e-e5c5666deab3", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 87, "prompt_ms": 92.326, "prompt_per_token_ms": 1.0612183908045976, "prompt_per_second": 942.3131079002665, "predicted_n": 3, "predicted_ms": 58.389, "predicted_per_token_ms": 19.463, "predicted_per_second": 51.379540666906436}, "tps": 51.379540666906436}, {"id": "b32942d3-c3c5-42a2-a546-61acb7fb9e54", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 93, "prompt_ms": 93.174, "prompt_per_token_ms": 1.0018709677419355, "prompt_per_second": 998.1325262412261, "predicted_n": 2, "predicted_ms": 31.14, "predicted_per_token_ms": 15.57, "predicted_per_second": 64.22607578676943}, "tps": 64.22607578676943}, {"id": "004f3f07-40b6-48c5-9752-9cf7e18cea45", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 160, "prompt_ms": 98.573, "prompt_per_token_ms": 0.6160812499999999, "prompt_per_second": 1623.1625292930112, "predicted_n": 2, "predicted_ms": 29.354, "predicted_per_token_ms": 14.677, "predicted_per_second": 68.13381481229135}, "tps": 68.13381481229135}, {"id": "6f5efdae-73e6-434c-ad61-f012ce14efb0", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 142, "prompt_ms": 97.454, "prompt_per_token_ms": 0.6862957746478873, "prompt_per_second": 1457.0977076364234, "predicted_n": 3, "predicted_ms": 57.718, "predicted_per_token_ms": 19.239333333333335, "predicted_per_second": 51.976852974808544}, "tps": 51.976852974808544}, {"id": "2816e2f2-dd5a-4a16-9f03-20d62d561aef", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 91, "prompt_ms": 93.713, "prompt_per_token_ms": 1.0298131868131868, "prompt_per_second": 971.0499076969044, "predicted_n": 2, "predicted_ms": 29.691, "predicted_per_token_ms": 14.8455, "predicted_per_second": 67.3604796066148}, "tps": 67.3604796066148}, {"id": "ce9f6d09-c86c-4df6-9a2b-c5f2520aaba1", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 101, "prompt_ms": 94.136, "prompt_per_token_ms": 0.932039603960396, "prompt_per_second": 1072.9157814226228, "predicted_n": 2, "predicted_ms": 28.692, "predicted_per_token_ms": 14.346, "predicted_per_second": 69.70584134950509}, "tps": 69.70584134950509}, {"id": "cc4cb2ef-ab6e-44ca-9405-2f17c07831b4", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 90, "prompt_ms": 78.588, "prompt_per_token_ms": 0.8732, "prompt_per_second": 1145.2130096197893, "predicted_n": 2, "predicted_ms": 28.922, "predicted_per_token_ms": 14.461, "predicted_per_second": 69.15151096051449}, "tps": 69.15151096051449}, {"id": "39aac00c-93d6-481f-81bf-1a5f458f4855", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 158, "prompt_ms": 96.359, "prompt_per_token_ms": 0.6098670886075949, "prompt_per_second": 1639.7015328095974, "predicted_n": 3, "predicted_ms": 57.271, "predicted_per_token_ms": 19.090333333333334, "predicted_per_second": 52.38253217160518}, "tps": 52.38253217160518}, {"id": "0e4fd5ee-f4bb-4af9-b1c1-3531be1858fa", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 140, "prompt_ms": 97.261, "prompt_per_token_ms": 0.6947214285714285, "prompt_per_second": 1439.4258747082592, "predicted_n": 2, "predicted_ms": 29.189, "predicted_per_token_ms": 14.5945, "predicted_per_second": 68.51896262290589}, "tps": 68.51896262290589}, {"id": "0de24da5-91c0-40e6-983e-256a57479451", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 139, "prompt_ms": 100.857, "prompt_per_token_ms": 0.725589928057554, "prompt_per_second": 1378.18892094748, "predicted_n": 2, "predicted_ms": 29.773, "predicted_per_token_ms": 14.8865, "predicted_per_second": 67.1749571759648}, "tps": 67.1749571759648}, {"id": "7bc55574-72e3-4d13-8ccb-7e047d9389ba", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 153, "prompt_ms": 96.578, "prompt_per_token_ms": 0.6312287581699346, "prompt_per_second": 1584.2117252376318, "predicted_n": 2, "predicted_ms": 28.951, "predicted_per_token_ms": 14.4755, "predicted_per_second": 69.08224240958862}, "tps": 69.08224240958862}, {"id": "bacbd3ed-930a-47e5-863d-f23330a9bab0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 143, "prompt_ms": 96.853, "prompt_per_token_ms": 0.6772937062937062, "prompt_per_second": 1476.4643325451973, "predicted_n": 2, "predicted_ms": 31.443, "predicted_per_token_ms": 15.7215, "predicted_per_second": 63.60716216645994}, "tps": 63.60716216645994}, {"id": "255d8a70-06ca-495a-a2c2-8cddec41e9ff", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 141, "prompt_ms": 97.99, "prompt_per_token_ms": 0.6949645390070922, "prompt_per_second": 1438.9223390141851, "predicted_n": 2, "predicted_ms": 29.698, "predicted_per_token_ms": 14.849, "predicted_per_second": 67.34460233012324}, "tps": 67.34460233012324}, {"id": "7a487fd8-f627-4ed5-b7c0-0ab83e23fde2", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 109, "prompt_ms": 94.614, "prompt_per_token_ms": 0.8680183486238533, "prompt_per_second": 1152.0493795844166, "predicted_n": 2, "predicted_ms": 30.222, "predicted_per_token_ms": 15.111, "predicted_per_second": 66.1769571835087}, "tps": 66.1769571835087}, {"id": "395d4ce4-6906-4098-b16d-cdfe10cbc721", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 96.117, "prompt_per_token_ms": 0.787844262295082, "prompt_per_second": 1269.2863905448569, "predicted_n": 2, "predicted_ms": 29.041, "predicted_per_token_ms": 14.5205, "predicted_per_second": 68.86815192314315}, "tps": 68.86815192314315}, {"id": "2487ad79-4fee-4136-83f0-c86d66ab910d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 92, "prompt_ms": 92.013, "prompt_per_token_ms": 1.0001413043478262, "prompt_per_second": 999.8587156162716, "predicted_n": 2, "predicted_ms": 26.46, "predicted_per_token_ms": 13.23, "predicted_per_second": 75.58578987150416}, "tps": 75.58578987150416}, {"id": "568df606-4b7e-43d1-92e8-63c5ec617ab4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 180, "prompt_ms": 91.612, "prompt_per_token_ms": 0.5089555555555555, "prompt_per_second": 1964.808103741868, "predicted_n": 2, "predicted_ms": 32.509, "predicted_per_token_ms": 16.2545, "predicted_per_second": 61.52142483619921}, "tps": 61.52142483619921}, {"id": "076473f3-2a3f-4a35-bcac-1ca14970e924", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 121, "prompt_ms": 95.941, "prompt_per_token_ms": 0.792900826446281, "prompt_per_second": 1261.1917741111724, "predicted_n": 2, "predicted_ms": 29.317, "predicted_per_token_ms": 14.6585, "predicted_per_second": 68.21980420916192}, "tps": 68.21980420916192}, {"id": "edc6a7d3-f9ba-42fc-850b-1bf3c66a814b", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 168, "prompt_ms": 99.041, "prompt_per_token_ms": 0.5895297619047619, "prompt_per_second": 1696.2672024717037, "predicted_n": 2, "predicted_ms": 29.184, "predicted_per_token_ms": 14.592, "predicted_per_second": 68.53070175438596}, "tps": 68.53070175438596}, {"id": "1441eb93-8f33-463c-acd3-3f361a4f3eaa", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 98.339, "prompt_per_token_ms": 0.8333813559322034, "prompt_per_second": 1199.9308514424592, "predicted_n": 2, "predicted_ms": 30.265, "predicted_per_token_ms": 15.1325, "predicted_per_second": 66.08293408227325}, "tps": 66.08293408227325}, {"id": "ecab667c-0062-4e2f-a56e-dd7908ca04b0", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 97.129, "prompt_per_token_ms": 0.7471461538461539, "prompt_per_second": 1338.4262166809087, "predicted_n": 2, "predicted_ms": 28.953, "predicted_per_token_ms": 14.4765, "predicted_per_second": 69.07747038303458}, "tps": 69.07747038303458}, {"id": "af9fcea4-a1ef-4aa4-89c2-4740bfe84591", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 101, "prompt_ms": 95.15, "prompt_per_token_ms": 0.9420792079207921, "prompt_per_second": 1061.4818707304257, "predicted_n": 8, "predicted_ms": 205.924, "predicted_per_token_ms": 25.7405, "predicted_per_second": 38.84928420193858}, "tps": 38.84928420193858}, {"id": "f9522b3d-ab7f-4536-ad03-be9e6ee3606e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 121, "prompt_ms": 99.105, "prompt_per_token_ms": 0.8190495867768596, "prompt_per_second": 1220.9272993289944, "predicted_n": 2, "predicted_ms": 29.424, "predicted_per_token_ms": 14.712, "predicted_per_second": 67.97172376291464}, "tps": 67.97172376291464}, {"id": "0667a568-c043-47c5-be46-259d9e0c21e9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 67, "prompt_n": 131, "prompt_ms": 100.535, "prompt_per_token_ms": 0.7674427480916031, "prompt_per_second": 1303.028795941712, "predicted_n": 2, "predicted_ms": 30.509, "predicted_per_token_ms": 15.2545, "predicted_per_second": 65.55442656265365}, "tps": 65.55442656265365}, {"id": "bf07b95b-90a0-4d3d-9527-5893e83a606b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 200, "prompt_ms": 93.302, "prompt_per_token_ms": 0.46651000000000004, "prompt_per_second": 2143.5767722020964, "predicted_n": 2, "predicted_ms": 26.655, "predicted_per_token_ms": 13.3275, "predicted_per_second": 75.03282686175201}, "tps": 75.03282686175201}, {"id": "6b7a4764-f6bf-4242-967e-78542946cbf1", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 94.558, "prompt_per_token_ms": 0.7056567164179105, "prompt_per_second": 1417.1196514308676, "predicted_n": 3, "predicted_ms": 59.113, "predicted_per_token_ms": 19.704333333333334, "predicted_per_second": 50.75025798047807}, "tps": 50.75025798047807}, {"id": "ced9b1ef-23c2-4671-a91b-1ac1de414510", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 191, "prompt_ms": 100.291, "prompt_per_token_ms": 0.5250837696335079, "prompt_per_second": 1904.4580271410196, "predicted_n": 2, "predicted_ms": 29.16, "predicted_per_token_ms": 14.58, "predicted_per_second": 68.58710562414267}, "tps": 68.58710562414267}, {"id": "97a5bba2-fe0c-47f0-b2f5-1189b032c11b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 106, "prompt_ms": 94.745, "prompt_per_token_ms": 0.8938207547169812, "prompt_per_second": 1118.7925484194416, "predicted_n": 2, "predicted_ms": 30.131, "predicted_per_token_ms": 15.0655, "predicted_per_second": 66.37682121403206}, "tps": 66.37682121403206}, {"id": "e578ec30-6641-47ae-b2f1-5956bc6c1023", "answer": "CD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 162, "prompt_ms": 98.686, "prompt_per_token_ms": 0.6091728395061728, "prompt_per_second": 1641.5702328597772, "predicted_n": 2, "predicted_ms": 29.408, "predicted_per_token_ms": 14.704, "predicted_per_second": 68.00870511425462}, "tps": 68.00870511425462}, {"id": "55210ad2-6937-4622-bbad-3a1bddffa001", "answer": "AD", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 142, "prompt_ms": 97.1, "prompt_per_token_ms": 0.6838028169014084, "prompt_per_second": 1462.4098867147272, "predicted_n": 3, "predicted_ms": 60.86, "predicted_per_token_ms": 20.286666666666665, "predicted_per_second": 49.29346040092014}, "tps": 49.29346040092014}, {"id": "0dcadcb0-1f20-4644-abf9-b2f28d302495", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 95.957, "prompt_per_token_ms": 0.6854071428571428, "prompt_per_second": 1458.9868378544559, "predicted_n": 3, "predicted_ms": 60.218, "predicted_per_token_ms": 20.072666666666667, "predicted_per_second": 49.818990999368964}, "tps": 49.818990999368964}, {"id": "35c9b34d-ad77-4037-94a1-22fa5ac0d7fc", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 65, "prompt_n": 149, "prompt_ms": 97.069, "prompt_per_token_ms": 0.6514697986577181, "prompt_per_second": 1534.9905737156041, "predicted_n": 2, "predicted_ms": 29.482, "predicted_per_token_ms": 14.741, "predicted_per_second": 67.83800284919612}, "tps": 67.83800284919612}, {"id": "df923d9b-ab4c-42fd-a324-06ab1fbf85be", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 205, "prompt_ms": 102.015, "prompt_per_token_ms": 0.4976341463414634, "prompt_per_second": 2009.5084056266235, "predicted_n": 2, "predicted_ms": 29.518, "predicted_per_token_ms": 14.759, "predicted_per_second": 67.75526797208482}, "tps": 67.75526797208482}, {"id": "c4b4f4b8-4261-42ad-8782-6f06305e5457", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 81, "prompt_ms": 87.927, "prompt_per_token_ms": 1.0855185185185185, "prompt_per_second": 921.2187382715206, "predicted_n": 2, "predicted_ms": 26.217, "predicted_per_token_ms": 13.1085, "predicted_per_second": 76.28637906701759}, "tps": 76.28637906701759}, {"id": "e2cb8e0e-db8e-4478-8550-2d3a9c49ceb8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 89, "prompt_ms": 86.471, "prompt_per_token_ms": 0.9715842696629214, "prompt_per_second": 1029.2467995050363, "predicted_n": 2, "predicted_ms": 29.315, "predicted_per_token_ms": 14.6575, "predicted_per_second": 68.22445846836091}, "tps": 68.22445846836091}, {"id": "e064003d-c2a9-45ee-a3a6-eb1f8c39e022", "answer": "B", "llm_answer": "B", "score": 1, "topics": [], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 97.148, "prompt_per_token_ms": 0.7091094890510948, "prompt_per_second": 1410.2194589698192, "predicted_n": 2, "predicted_ms": 28.717, "predicted_per_token_ms": 14.3585, "predicted_per_second": 69.64515792039559}, "tps": 69.64515792039559}, {"id": "f3738f8e-772e-4673-a519-556cb2c0d4ce", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 99.902, "prompt_per_token_ms": 0.6205093167701864, "prompt_per_second": 1611.5793477608056, "predicted_n": 2, "predicted_ms": 31.646, "predicted_per_token_ms": 15.823, "predicted_per_second": 63.19914049168931}, "tps": 63.19914049168931}, {"id": "f6a101d9-e014-4d3f-9c75-19d085bd528d", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 66, "prompt_n": 132, "prompt_ms": 98.185, "prompt_per_token_ms": 0.7438257575757576, "prompt_per_second": 1344.4008758975401, "predicted_n": 2, "predicted_ms": 30.566, "predicted_per_token_ms": 15.283, "predicted_per_second": 65.43217954590068}, "tps": 65.43217954590068}, {"id": "b32fd1b9-5649-462a-abfe-5f9f94821516", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 89, "prompt_ms": 94.465, "prompt_per_token_ms": 1.0614044943820224, "prompt_per_second": 942.1478854602233, "predicted_n": 2, "predicted_ms": 30.338, "predicted_per_token_ms": 15.169, "predicted_per_second": 65.92392379194409}, "tps": 65.92392379194409}, {"id": "18438aa4-2893-4921-ac32-af09a722a1ea", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 159, "prompt_ms": 98.454, "prompt_per_token_ms": 0.6192075471698113, "prompt_per_second": 1614.9673959412517, "predicted_n": 2, "predicted_ms": 29.255, "predicted_per_token_ms": 14.6275, "predicted_per_second": 68.36438215689626}, "tps": 68.36438215689626}, {"id": "1522eda8-af42-4dc5-8b48-833f4288c985", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 66, "prompt_n": 128, "prompt_ms": 97.229, "prompt_per_token_ms": 0.7596015625, "prompt_per_second": 1316.4796511328925, "predicted_n": 3, "predicted_ms": 59.328, "predicted_per_token_ms": 19.776, "predicted_per_second": 50.566343042071196}, "tps": 50.566343042071196}, {"id": "4b828139-e59d-4742-9207-1da126e1d1cf", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 98.076, "prompt_per_token_ms": 0.7544307692307691, "prompt_per_second": 1325.5026713976915, "predicted_n": 2, "predicted_ms": 29.63, "predicted_per_token_ms": 14.815, "predicted_per_second": 67.49915626054674}, "tps": 67.49915626054674}, {"id": "5d668143-e53f-44b0-b81a-853269639ab1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 60, "prompt_ms": 90.665, "prompt_per_token_ms": 1.5110833333333333, "prompt_per_second": 661.7768708983621, "predicted_n": 2, "predicted_ms": 30.612, "predicted_per_token_ms": 15.306, "predicted_per_second": 65.33385600418137}, "tps": 65.33385600418137}, {"id": "321e0e0a-716f-4ffb-81cf-0c641a45a38f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 111, "prompt_ms": 84.62, "prompt_per_token_ms": 0.7623423423423424, "prompt_per_second": 1311.7466320018907, "predicted_n": 2, "predicted_ms": 26.072, "predicted_per_token_ms": 13.036, "predicted_per_second": 76.71064743786438}, "tps": 76.71064743786438}, {"id": "2ea04001-2940-4673-a1e4-c17749e02d02", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 109, "prompt_ms": 93.612, "prompt_per_token_ms": 0.8588256880733944, "prompt_per_second": 1164.3806349613299, "predicted_n": 2, "predicted_ms": 29.226, "predicted_per_token_ms": 14.613, "predicted_per_second": 68.43221788818175}, "tps": 68.43221788818175}, {"id": "f91c1c98-4ecc-4f57-959b-37ab8a62808c", "answer": "AC", "llm_answer": "ABD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 128, "prompt_ms": 95.91, "prompt_per_token_ms": 0.749296875, "prompt_per_second": 1334.5845063079971, "predicted_n": 4, "predicted_ms": 87.567, "predicted_per_token_ms": 21.89175, "predicted_per_second": 45.6793084152706}, "tps": 45.6793084152706}, {"id": "073cf188-0a75-4b0f-8509-1385e264888f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 257, "prompt_ms": 106.695, "prompt_per_token_ms": 0.41515564202334626, "prompt_per_second": 2408.7351797178876, "predicted_n": 2, "predicted_ms": 31.934, "predicted_per_token_ms": 15.967, "predicted_per_second": 62.629172668629046}, "tps": 62.629172668629046}, {"id": "270a310a-cde5-411d-9c87-8b29f8302035", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 108, "prompt_ms": 94.59, "prompt_per_token_ms": 0.8758333333333334, "prompt_per_second": 1141.7697431018078, "predicted_n": 2, "predicted_ms": 29.03, "predicted_per_token_ms": 14.515, "predicted_per_second": 68.89424733034791}, "tps": 68.89424733034791}, {"id": "3ab86d67-f9eb-40a6-a9e3-fb74f942aed3", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 110, "prompt_ms": 95.536, "prompt_per_token_ms": 0.8685090909090909, "prompt_per_second": 1151.3984257243344, "predicted_n": 2, "predicted_ms": 29.716, "predicted_per_token_ms": 14.858, "predicted_per_second": 67.3038093956118}, "tps": 67.3038093956118}, {"id": "ddcc58d5-7ff6-4c78-aa49-439b4f711c15", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 97.549, "prompt_per_token_ms": 0.7068768115942029, "prompt_per_second": 1414.6736511906834, "predicted_n": 2, "predicted_ms": 28.782, "predicted_per_token_ms": 14.391, "predicted_per_second": 69.48787436592315}, "tps": 69.48787436592315}, {"id": "7bb40181-1ee1-4114-ba28-cffc874fcb95", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 178, "prompt_ms": 99.602, "prompt_per_token_ms": 0.559561797752809, "prompt_per_second": 1787.112708580149, "predicted_n": 5, "predicted_ms": 124.334, "predicted_per_token_ms": 24.8668, "predicted_per_second": 40.214261585728764}, "tps": 40.214261585728764}, {"id": "5f154f3e-89d6-4c08-99d7-9b6a6b8bd7df", "answer": "AC", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 189, "prompt_ms": 99.82, "prompt_per_token_ms": 0.5281481481481481, "prompt_per_second": 1893.4081346423563, "predicted_n": 4, "predicted_ms": 82.176, "predicted_per_token_ms": 20.544, "predicted_per_second": 48.67601246105919}, "tps": 48.67601246105919}, {"id": "25ac4805-c94f-4998-99c1-1fc035aecf2d", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 64, "prompt_n": 125, "prompt_ms": 86.151, "prompt_per_token_ms": 0.6892079999999999, "prompt_per_second": 1450.9407900082415, "predicted_n": 2, "predicted_ms": 29.551, "predicted_per_token_ms": 14.7755, "predicted_per_second": 67.67960475110826}, "tps": 67.67960475110826}, {"id": "f7732d1e-1a00-415a-a455-9a1fd21c318f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 191, "prompt_ms": 99.925, "prompt_per_token_ms": 0.5231675392670156, "prompt_per_second": 1911.4335751813862, "predicted_n": 2, "predicted_ms": 29.285, "predicted_per_token_ms": 14.6425, "predicted_per_second": 68.29434864264982}, "tps": 68.29434864264982}, {"id": "4659b684-0441-41ae-ba69-69050a625a98", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 82, "prompt_ms": 92.82, "prompt_per_token_ms": 1.131951219512195, "prompt_per_second": 883.4302951950011, "predicted_n": 2, "predicted_ms": 29.177, "predicted_per_token_ms": 14.5885, "predicted_per_second": 68.54714329780306}, "tps": 68.54714329780306}, {"id": "8f25008b-0434-40ca-b89c-6e98303f2556", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 95.902, "prompt_per_token_ms": 0.7551338582677165, "prompt_per_second": 1324.2685241183708, "predicted_n": 2, "predicted_ms": 28.301, "predicted_per_token_ms": 14.1505, "predicted_per_second": 70.66888095827002}, "tps": 70.66888095827002}, {"id": "c4fb3fcc-6765-45af-b815-0ff01e8b1b3a", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 61, "prompt_n": 129, "prompt_ms": 97.014, "prompt_per_token_ms": 0.7520465116279069, "prompt_per_second": 1329.704991032222, "predicted_n": 2, "predicted_ms": 28.813, "predicted_per_token_ms": 14.4065, "predicted_per_second": 69.41311213688266}, "tps": 69.41311213688266}, {"id": "b5341e73-f339-4906-b836-62e65c986e7b", "answer": "AD", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 150, "prompt_ms": 100.098, "prompt_per_token_ms": 0.66732, "prompt_per_second": 1498.531439189594, "predicted_n": 2, "predicted_ms": 29.119, "predicted_per_token_ms": 14.5595, "predicted_per_second": 68.68367732408393}, "tps": 68.68367732408393}, {"id": "04dbcba9-f202-43d2-8556-e37e256a0d2e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 97.38, "prompt_per_token_ms": 0.7490769230769231, "prompt_per_second": 1334.9763811871023, "predicted_n": 2, "predicted_ms": 29.304, "predicted_per_token_ms": 14.652, "predicted_per_second": 68.25006825006825}, "tps": 68.25006825006825}, {"id": "9b292455-3bc7-4779-859f-cb66f4ada24b", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 126, "prompt_ms": 95.857, "prompt_per_token_ms": 0.7607698412698413, "prompt_per_second": 1314.4579947213035, "predicted_n": 2, "predicted_ms": 29.449, "predicted_per_token_ms": 14.7245, "predicted_per_second": 67.9140208496044}, "tps": 67.9140208496044}, {"id": "7c11e198-9626-4f9c-ae86-7a45780cb2aa", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 154, "prompt_ms": 97.944, "prompt_per_token_ms": 0.636, "prompt_per_second": 1572.3270440251572, "predicted_n": 2, "predicted_ms": 29.768, "predicted_per_token_ms": 14.884, "predicted_per_second": 67.18624025799517}, "tps": 67.18624025799517}, {"id": "e036b2d8-6082-41b8-8798-796b370a9c35", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 93.53, "prompt_per_token_ms": 0.7926271186440678, "prompt_per_second": 1261.627285362985, "predicted_n": 2, "predicted_ms": 26.107, "predicted_per_token_ms": 13.0535, "predicted_per_second": 76.60780633546558}, "tps": 76.60780633546558}, {"id": "2876ad88-0e1d-4826-9f51-e9448151bb03", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 87.595, "prompt_per_token_ms": 0.7179918032786885, "prompt_per_second": 1392.773560134711, "predicted_n": 2, "predicted_ms": 30.506, "predicted_per_token_ms": 15.253, "predicted_per_second": 65.56087327083196}, "tps": 65.56087327083196}, {"id": "b97a932b-2e87-4d97-bdf3-76eca44a5223", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 94.725, "prompt_per_token_ms": 0.8309210526315789, "prompt_per_second": 1203.4837688044338, "predicted_n": 2, "predicted_ms": 29.893, "predicted_per_token_ms": 14.9465, "predicted_per_second": 66.90529555414311}, "tps": 66.90529555414311}, {"id": "299c70fa-fcc9-41d6-8331-9b76fa0fb9b8", "answer": "CD", "llm_answer": "AC", "score": 0, "topics": ["PenTest", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 96, "prompt_ms": 92.321, "prompt_per_token_ms": 0.9616770833333333, "prompt_per_second": 1039.85008827894, "predicted_n": 2, "predicted_ms": 29.158, "predicted_per_token_ms": 14.579, "predicted_per_second": 68.59181013786953}, "tps": 68.59181013786953}, {"id": "2ef39110-a42e-4c2e-b29d-dc07f9cd42e1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 98.641, "prompt_per_token_ms": 0.699581560283688, "prompt_per_second": 1429.4258979531837, "predicted_n": 2, "predicted_ms": 29.926, "predicted_per_token_ms": 14.963, "predicted_per_second": 66.83151774376796}, "tps": 66.83151774376796}, {"id": "4a96d754-f396-4580-91d9-8285f5d14489", "answer": "AD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 171, "prompt_ms": 99.628, "prompt_per_token_ms": 0.5826198830409357, "prompt_per_second": 1716.3849520215201, "predicted_n": 64, "predicted_ms": 1833.316, "predicted_per_token_ms": 28.6455625, "predicted_per_second": 34.90942096179818}, "tps": 34.90942096179818}, {"id": "d263a7ca-ebbb-4876-a1ed-d7ae38d8a75d", "answer": "CD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 231, "prompt_ms": 106.16, "prompt_per_token_ms": 0.45956709956709957, "prompt_per_second": 2175.9608138658627, "predicted_n": 3, "predicted_ms": 61.291, "predicted_per_token_ms": 20.430333333333333, "predicted_per_second": 48.9468274298021}, "tps": 48.9468274298021}, {"id": "77bb9129-cc5f-4e9c-8167-5e6cca43ff04", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 90.822, "prompt_per_token_ms": 0.7829482758620689, "prompt_per_second": 1277.2235801898219, "predicted_n": 2, "predicted_ms": 29.481, "predicted_per_token_ms": 14.7405, "predicted_per_second": 67.84030392456158}, "tps": 67.84030392456158}, {"id": "13546507-f32f-4965-9ae7-2e55e10c0195", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 97.116, "prompt_per_token_ms": 0.8093, "prompt_per_second": 1235.6357345854442, "predicted_n": 2, "predicted_ms": 29.944, "predicted_per_token_ms": 14.972, "predicted_per_second": 66.7913438418381}, "tps": 66.7913438418381}, {"id": "6cd7dbac-86c9-4082-9897-1cd7f747bae8", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 95.404, "prompt_per_token_ms": 0.8296, "prompt_per_second": 1205.400192864031, "predicted_n": 2, "predicted_ms": 28.832, "predicted_per_token_ms": 14.416, "predicted_per_second": 69.36736958934517}, "tps": 69.36736958934517}, {"id": "8c147d57-09da-466e-bf03-9cc98089dc4b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 95.794, "prompt_per_token_ms": 0.7663519999999999, "prompt_per_second": 1304.8833956197675, "predicted_n": 2, "predicted_ms": 30.037, "predicted_per_token_ms": 15.0185, "predicted_per_second": 66.58454572693678}, "tps": 66.58454572693678}, {"id": "71ed401d-afa9-47c1-a71e-d9a742f1d832", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 95.798, "prompt_per_token_ms": 0.7725645161290323, "prompt_per_second": 1294.390279546546, "predicted_n": 2, "predicted_ms": 29.82, "predicted_per_token_ms": 14.91, "predicted_per_second": 67.0690811535882}, "tps": 67.0690811535882}, {"id": "d150584b-1899-4a9d-b73c-e81544bae767", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 110, "prompt_ms": 96.479, "prompt_per_token_ms": 0.8770818181818182, "prompt_per_second": 1140.1444874014035, "predicted_n": 2, "predicted_ms": 33.25, "predicted_per_token_ms": 16.625, "predicted_per_second": 60.150375939849624}, "tps": 60.150375939849624}, {"id": "a3090369-00c5-41a3-a120-d2e8efbc99ed", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 64, "prompt_n": 157, "prompt_ms": 97.92, "prompt_per_token_ms": 0.6236942675159236, "prompt_per_second": 1603.3496732026142, "predicted_n": 2, "predicted_ms": 29.581, "predicted_per_token_ms": 14.7905, "predicted_per_second": 67.6109664987661}, "tps": 67.6109664987661}, {"id": "23d92a83-8f6e-4200-9e7a-f26059825e04", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 128, "prompt_ms": 95.38, "prompt_per_token_ms": 0.74515625, "prompt_per_second": 1342.000419375131, "predicted_n": 2, "predicted_ms": 28.516, "predicted_per_token_ms": 14.258, "predicted_per_second": 70.13606396409034}, "tps": 70.13606396409034}, {"id": "56a9b6d1-dda9-4c22-a9c2-5e58f2bc56a0", "answer": "B", "llm_answer": "B", "score": 1, "topics": [], "timings": {"cache_n": 62, "prompt_n": 125, "prompt_ms": 96.743, "prompt_per_token_ms": 0.773944, "prompt_per_second": 1292.0831481347489, "predicted_n": 2, "predicted_ms": 28.808, "predicted_per_token_ms": 14.404, "predicted_per_second": 69.42515967786726}, "tps": 69.42515967786726}, {"id": "e0e3e2a0-fde5-4183-bccd-3eec28d0fede", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 96.039, "prompt_per_token_ms": 0.7114, "prompt_per_second": 1405.678942929435, "predicted_n": 2, "predicted_ms": 26.359, "predicted_per_token_ms": 13.1795, "predicted_per_second": 75.87541257255586}, "tps": 75.87541257255586}, {"id": "eba2a157-edf2-4bbc-b4c5-dee7b11aa440", "answer": "CD", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 164, "prompt_ms": 87.3, "prompt_per_token_ms": 0.5323170731707317, "prompt_per_second": 1878.5796105383733, "predicted_n": 3, "predicted_ms": 58.645, "predicted_per_token_ms": 19.548333333333336, "predicted_per_second": 51.15525620257481}, "tps": 51.15525620257481}, {"id": "c721b246-3924-448a-8ee8-6757af082657", "answer": "ABCD", "llm_answer": "ABCD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 168, "prompt_ms": 98.409, "prompt_per_token_ms": 0.5857678571428572, "prompt_per_second": 1707.160930402707, "predicted_n": 3, "predicted_ms": 58.377, "predicted_per_token_ms": 19.459, "predicted_per_second": 51.39010226630351}, "tps": 51.39010226630351}, {"id": "a4af6379-7f52-4fc4-ae00-2c0eb7bd0d9d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 99.772, "prompt_per_token_ms": 0.7501654135338346, "prompt_per_second": 1333.0393296716513, "predicted_n": 2, "predicted_ms": 32.412, "predicted_per_token_ms": 16.206, "predicted_per_second": 61.70554115759595}, "tps": 61.70554115759595}, {"id": "7a1f4cf2-b8fb-45fe-9799-4bc3a7991079", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 94, "prompt_ms": 97.393, "prompt_per_token_ms": 1.036095744680851, "prompt_per_second": 965.1617672728019, "predicted_n": 2, "predicted_ms": 30.232, "predicted_per_token_ms": 15.116, "predicted_per_second": 66.15506747816883}, "tps": 66.15506747816883}, {"id": "713b84c3-e45d-493b-995c-94760908691b", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 103, "prompt_ms": 96.192, "prompt_per_token_ms": 0.9339029126213592, "prompt_per_second": 1070.7751164337992, "predicted_n": 2, "predicted_ms": 29.806, "predicted_per_token_ms": 14.903, "predicted_per_second": 67.10058377507885}, "tps": 67.10058377507885}, {"id": "3501acb4-7f8c-4133-966e-271e1f00611a", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 108, "prompt_ms": 93.819, "prompt_per_token_ms": 0.8686944444444444, "prompt_per_second": 1151.1527515748407, "predicted_n": 3, "predicted_ms": 59.811, "predicted_per_token_ms": 19.937, "predicted_per_second": 50.1579976927321}, "tps": 50.1579976927321}, {"id": "75dac808-c889-42bd-b24f-f2cf65989944", "answer": "AC", "llm_answer": "AB", "score": 0, "topics": ["WebSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 117, "prompt_ms": 94.713, "prompt_per_token_ms": 0.8095128205128205, "prompt_per_second": 1235.310886573121, "predicted_n": 4, "predicted_ms": 92.514, "predicted_per_token_ms": 23.1285, "predicted_per_second": 43.23669931037465}, "tps": 43.23669931037465}, {"id": "ef70a02d-edd6-4da4-b8da-1644087fcaf4", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 96.012, "prompt_per_token_ms": 0.8348869565217392, "prompt_per_second": 1197.7669457984418, "predicted_n": 3, "predicted_ms": 59.146, "predicted_per_token_ms": 19.715333333333334, "predicted_per_second": 50.72194231224428}, "tps": 50.72194231224428}, {"id": "59e85d9a-477e-4eb6-b184-e6f73408e807", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 87.16, "prompt_per_token_ms": 0.6315942028985507, "prompt_per_second": 1583.295089490592, "predicted_n": 2, "predicted_ms": 26.653, "predicted_per_token_ms": 13.3265, "predicted_per_second": 75.03845720931977}, "tps": 75.03845720931977}, {"id": "42183b42-d996-42d4-b8b7-f753bc7f635b", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 94.006, "prompt_per_token_ms": 0.7833833333333333, "prompt_per_second": 1276.514265046912, "predicted_n": 2, "predicted_ms": 31.12, "predicted_per_token_ms": 15.56, "predicted_per_second": 64.26735218508998}, "tps": 64.26735218508998}, {"id": "225db329-bca6-4617-9304-a47703fba814", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 96.721, "prompt_per_token_ms": 0.7217985074626866, "prompt_per_second": 1385.428190361969, "predicted_n": 2, "predicted_ms": 29.67, "predicted_per_token_ms": 14.835, "predicted_per_second": 67.40815638692281}, "tps": 67.40815638692281}, {"id": "4897ec33-27af-48e4-b12d-f9d8116c2f14", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 235, "prompt_ms": 104.802, "prompt_per_token_ms": 0.44596595744680856, "prompt_per_second": 2242.3236197782485, "predicted_n": 2, "predicted_ms": 31.021, "predicted_per_token_ms": 15.5105, "predicted_per_second": 64.47245414396698}, "tps": 64.47245414396698}, {"id": "f9381558-28d9-469f-92de-0378827e2d58", "answer": "BC", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 115, "prompt_ms": 95.023, "prompt_per_token_ms": 0.8262869565217391, "prompt_per_second": 1210.233311935005, "predicted_n": 2, "predicted_ms": 29.125, "predicted_per_token_ms": 14.5625, "predicted_per_second": 68.6695278969957}, "tps": 68.6695278969957}, {"id": "14bcba9a-38d5-4050-8d32-a076998c6a67", "answer": "ABC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 96.312, "prompt_per_token_ms": 0.7894426229508197, "prompt_per_second": 1266.7165046930809, "predicted_n": 2, "predicted_ms": 30.532, "predicted_per_token_ms": 15.266, "predicted_per_second": 65.5050438883794}, "tps": 65.5050438883794}, {"id": "9f2642f4-5696-4da5-925a-c36d84855bb6", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 150, "prompt_ms": 98.574, "prompt_per_token_ms": 0.65716, "prompt_per_second": 1521.6994339278106, "predicted_n": 2, "predicted_ms": 28.997, "predicted_per_token_ms": 14.4985, "predicted_per_second": 68.97265234334586}, "tps": 68.97265234334586}, {"id": "032870af-52ef-4fb3-88c5-e25551dcdaae", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 96.804, "prompt_per_token_ms": 0.6540810810810811, "prompt_per_second": 1528.8624437006736, "predicted_n": 4, "predicted_ms": 91.678, "predicted_per_token_ms": 22.9195, "predicted_per_second": 43.63096926198216}, "tps": 43.63096926198216}, {"id": "f7ce4962-608a-4cdd-adb1-602a92534135", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 174, "prompt_ms": 103.46, "prompt_per_token_ms": 0.5945977011494252, "prompt_per_second": 1681.8093949352408, "predicted_n": 3, "predicted_ms": 59.635, "predicted_per_token_ms": 19.878333333333334, "predicted_per_second": 50.306028339062635}, "tps": 50.306028339062635}, {"id": "7dd5e078-47cb-4897-8480-fae7f463c3d1", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 93.474, "prompt_per_token_ms": 0.7921525423728814, "prompt_per_second": 1262.3831225795407, "predicted_n": 3, "predicted_ms": 53.271, "predicted_per_token_ms": 17.757, "predicted_per_second": 56.315819113589}, "tps": 56.315819113589}, {"id": "b9cf6635-81c2-42fa-81c6-1f1fa65abf66", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 114, "prompt_ms": 93.752, "prompt_per_token_ms": 0.8223859649122807, "prompt_per_second": 1215.97405922007, "predicted_n": 2, "predicted_ms": 28.748, "predicted_per_token_ms": 14.374, "predicted_per_second": 69.57005704744678}, "tps": 69.57005704744678}, {"id": "94c13d6d-3918-4e23-afe9-c60e6a04acaf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 184, "prompt_ms": 101.214, "prompt_per_token_ms": 0.5500760869565218, "prompt_per_second": 1817.9303258442508, "predicted_n": 2, "predicted_ms": 30.018, "predicted_per_token_ms": 15.009, "predicted_per_second": 66.6266906522753}, "tps": 66.6266906522753}, {"id": "6a7c1834-a597-4410-aa9b-798f0992963f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 207, "prompt_ms": 103.442, "prompt_per_token_ms": 0.499719806763285, "prompt_per_second": 2001.1214013650163, "predicted_n": 2, "predicted_ms": 30.531, "predicted_per_token_ms": 15.2655, "predicted_per_second": 65.5071894140382}, "tps": 65.5071894140382}, {"id": "e9c21b19-de0a-4de6-a19b-5a346346cc6e", "answer": "C", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 93, "prompt_ms": 92.79, "prompt_per_token_ms": 0.997741935483871, "prompt_per_second": 1002.2631749110895, "predicted_n": 2, "predicted_ms": 30.841, "predicted_per_token_ms": 15.4205, "predicted_per_second": 64.84874031321941}, "tps": 64.84874031321941}, {"id": "265c7bfa-e4fc-4da2-9c01-5e7f9d4b58da", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 78, "prompt_ms": 92.288, "prompt_per_token_ms": 1.1831794871794872, "prompt_per_second": 845.1803051317615, "predicted_n": 64, "predicted_ms": 1855.791, "predicted_per_token_ms": 28.996734375, "predicted_per_second": 34.486642084157104}, "tps": 34.486642084157104}, {"id": "84e5ea5c-27b3-47fa-a811-c4007d7f6066", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 90.619, "prompt_per_token_ms": 0.767957627118644, "prompt_per_second": 1302.1551771703505, "predicted_n": 2, "predicted_ms": 26.376, "predicted_per_token_ms": 13.188, "predicted_per_second": 75.82650894752805}, "tps": 75.82650894752805}, {"id": "be499028-1453-403b-b685-4ae3ed291cbb", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 153, "prompt_ms": 95.441, "prompt_per_token_ms": 0.623797385620915, "prompt_per_second": 1603.0846281996205, "predicted_n": 2, "predicted_ms": 28.355, "predicted_per_token_ms": 14.1775, "predicted_per_second": 70.53429730206312}, "tps": 70.53429730206312}, {"id": "ba58e416-aca3-4f3b-a5a0-a2bc97ea0631", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 148, "prompt_ms": 97.371, "prompt_per_token_ms": 0.6579121621621621, "prompt_per_second": 1519.959741606844, "predicted_n": 2, "predicted_ms": 29.0, "predicted_per_token_ms": 14.5, "predicted_per_second": 68.96551724137932}, "tps": 68.96551724137932}, {"id": "295e5999-8a43-4aab-b6d9-1656e51ecaa7", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 98, "prompt_ms": 94.206, "prompt_per_token_ms": 0.9612857142857143, "prompt_per_second": 1040.2734433050973, "predicted_n": 2, "predicted_ms": 28.731, "predicted_per_token_ms": 14.3655, "predicted_per_second": 69.61122132887822}, "tps": 69.61122132887822}, {"id": "3a5ee894-ad28-47b4-a2cc-4d4c4962010b", "answer": "ABD", "llm_answer": "AB", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 66, "prompt_n": 125, "prompt_ms": 97.294, "prompt_per_token_ms": 0.7783519999999999, "prompt_per_second": 1284.7657615063622, "predicted_n": 3, "predicted_ms": 57.375, "predicted_per_token_ms": 19.125, "predicted_per_second": 52.287581699346404}, "tps": 52.287581699346404}, {"id": "bdc6d9c9-02bf-4560-bbff-18e41f9ea2fe", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 83, "prompt_ms": 92.711, "prompt_per_token_ms": 1.117, "prompt_per_second": 895.2551477170994, "predicted_n": 2, "predicted_ms": 30.502, "predicted_per_token_ms": 15.251, "predicted_per_second": 65.56947085437021}, "tps": 65.56947085437021}, {"id": "5fc115a4-3aa3-4269-84de-16980373f5a1", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 96.781, "prompt_per_token_ms": 0.6912928571428572, "prompt_per_second": 1446.564924933613, "predicted_n": 2, "predicted_ms": 29.762, "predicted_per_token_ms": 14.881, "predicted_per_second": 67.19978496068812}, "tps": 67.19978496068812}, {"id": "2e5dcce8-94fc-4524-a0eb-a69787b77cfc", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 97.146, "prompt_per_token_ms": 0.7530697674418605, "prompt_per_second": 1327.898215057748, "predicted_n": 2, "predicted_ms": 29.806, "predicted_per_token_ms": 14.903, "predicted_per_second": 67.10058377507885}, "tps": 67.10058377507885}, {"id": "e36944c4-14d3-4df5-97b1-24709850b445", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 64, "prompt_n": 49, "prompt_ms": 90.272, "prompt_per_token_ms": 1.8422857142857143, "prompt_per_second": 542.803970223325, "predicted_n": 2, "predicted_ms": 31.964, "predicted_per_token_ms": 15.982, "predicted_per_second": 62.57039169065199}, "tps": 62.57039169065199}, {"id": "a45a8d69-6b2a-4760-a15f-7c0825b9c97e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 85, "prompt_ms": 93.581, "prompt_per_token_ms": 1.1009529411764707, "prompt_per_second": 908.3040360756991, "predicted_n": 2, "predicted_ms": 30.597, "predicted_per_token_ms": 15.2985, "predicted_per_second": 65.3658855443344}, "tps": 65.3658855443344}, {"id": "d30fac69-cd23-4cf6-8d9b-d2b0bfb69183", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 128, "prompt_ms": 90.182, "prompt_per_token_ms": 0.704546875, "prompt_per_second": 1419.3519771129493, "predicted_n": 3, "predicted_ms": 55.327, "predicted_per_token_ms": 18.442333333333334, "predicted_per_second": 54.22307372530591}, "tps": 54.22307372530591}, {"id": "90be6a0d-1efc-4fd9-82cf-754adfbd61b3", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 181, "prompt_ms": 98.366, "prompt_per_token_ms": 0.5434585635359116, "prompt_per_second": 1840.0666897098592, "predicted_n": 3, "predicted_ms": 59.133, "predicted_per_token_ms": 19.711000000000002, "predicted_per_second": 50.7330931966922}, "tps": 50.7330931966922}, {"id": "179996e9-39cc-4221-9ee7-9c3ecb78a77c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 140, "prompt_ms": 98.529, "prompt_per_token_ms": 0.7037785714285714, "prompt_per_second": 1420.9014604837157, "predicted_n": 2, "predicted_ms": 28.815, "predicted_per_token_ms": 14.4075, "predicted_per_second": 69.40829429116779}, "tps": 69.40829429116779}, {"id": "bb669984-d478-4eb5-9440-eed026eb7745", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 99, "prompt_ms": 95.376, "prompt_per_token_ms": 0.9633939393939395, "prompt_per_second": 1037.9969803724207, "predicted_n": 2, "predicted_ms": 30.024, "predicted_per_token_ms": 15.012, "predicted_per_second": 66.61337596589395}, "tps": 66.61337596589395}, {"id": "ea7afbdc-ea4e-44d8-a0d2-357617db0529", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 88, "prompt_ms": 92.327, "prompt_per_token_ms": 1.0491704545454545, "prompt_per_second": 953.1339694780509, "predicted_n": 2, "predicted_ms": 30.0, "predicted_per_token_ms": 15.0, "predicted_per_second": 66.66666666666667}, "tps": 66.66666666666667}, {"id": "b8519767-7e16-4211-be00-7044d8a1f7d2", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 178, "prompt_ms": 99.948, "prompt_per_token_ms": 0.5615056179775281, "prompt_per_second": 1780.9260815624127, "predicted_n": 2, "predicted_ms": 31.924, "predicted_per_token_ms": 15.962, "predicted_per_second": 62.64879087833605}, "tps": 62.64879087833605}, {"id": "e055330e-c32a-48f1-abe8-33846e29bb0f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 85.912, "prompt_per_token_ms": 0.543746835443038, "prompt_per_second": 1839.0911630505632, "predicted_n": 2, "predicted_ms": 28.738, "predicted_per_token_ms": 14.369, "predicted_per_second": 69.59426543252836}, "tps": 69.59426543252836}, {"id": "ca4b35e5-75c7-49a0-814d-131dc3a4ed5b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 94.051, "prompt_per_token_ms": 0.6966740740740741, "prompt_per_second": 1435.3914365610146, "predicted_n": 2, "predicted_ms": 29.23, "predicted_per_token_ms": 14.615, "predicted_per_second": 68.42285323297982}, "tps": 68.42285323297982}, {"id": "5ec6df4a-afb5-4728-93f6-8f2db43732b2", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 148, "prompt_ms": 97.165, "prompt_per_token_ms": 0.6565202702702703, "prompt_per_second": 1523.182215818453, "predicted_n": 2, "predicted_ms": 29.336, "predicted_per_token_ms": 14.668, "predicted_per_second": 68.17562039814563}, "tps": 68.17562039814563}, {"id": "6b79cc3e-cdc3-47c0-9cb7-58a740011f24", "answer": "ABC", "llm_answer": "ABC", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 136, "prompt_ms": 98.818, "prompt_per_token_ms": 0.7266029411764706, "prompt_per_second": 1376.2674816329009, "predicted_n": 4, "predicted_ms": 87.212, "predicted_per_token_ms": 21.803, "predicted_per_second": 45.86524790166491}, "tps": 45.86524790166491}, {"id": "95e6888c-63d2-4d0e-9dd0-df58d80735a7", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 95.444, "prompt_per_token_ms": 0.8299478260869565, "prompt_per_second": 1204.8950169733037, "predicted_n": 2, "predicted_ms": 29.627, "predicted_per_token_ms": 14.8135, "predicted_per_second": 67.50599115671515}, "tps": 67.50599115671515}, {"id": "54e77c73-e28b-478a-a015-a1a340a1e3f3", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 155, "prompt_ms": 96.945, "prompt_per_token_ms": 0.6254516129032258, "prompt_per_second": 1598.8447057609985, "predicted_n": 2, "predicted_ms": 30.132, "predicted_per_token_ms": 15.066, "predicted_per_second": 66.37461834594451}, "tps": 66.37461834594451}, {"id": "a5082d39-fd57-4477-b419-8df279a97bd5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 187, "prompt_ms": 99.922, "prompt_per_token_ms": 0.5343422459893048, "prompt_per_second": 1871.4597385961051, "predicted_n": 2, "predicted_ms": 29.132, "predicted_per_token_ms": 14.566, "predicted_per_second": 68.65302759851708}, "tps": 68.65302759851708}, {"id": "dd8dc0b4-a129-4be6-9b51-17458f4a29d0", "answer": "ABD", "llm_answer": "BCD", "score": 0, "topics": ["NetworkSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 131, "prompt_ms": 97.033, "prompt_per_token_ms": 0.7407099236641221, "prompt_per_second": 1350.0561664588336, "predicted_n": 4, "predicted_ms": 89.886, "predicted_per_token_ms": 22.4715, "predicted_per_second": 44.500812139821555}, "tps": 44.500812139821555}, {"id": "3720deea-6d3b-4810-b52d-c85d3e28589d", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 175, "prompt_ms": 99.223, "prompt_per_token_ms": 0.5669885714285714, "prompt_per_second": 1763.7039799240097, "predicted_n": 2, "predicted_ms": 29.317, "predicted_per_token_ms": 14.6585, "predicted_per_second": 68.21980420916192}, "tps": 68.21980420916192}, {"id": "b021d5d1-4b87-4dc1-ae6f-df4e0e5969c2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 92, "prompt_ms": 82.662, "prompt_per_token_ms": 0.8985000000000001, "prompt_per_second": 1112.9660545353365, "predicted_n": 2, "predicted_ms": 27.016, "predicted_per_token_ms": 13.508, "predicted_per_second": 74.03020432336393}, "tps": 74.03020432336393}, {"id": "8da52677-a449-49d1-b40e-388059030673", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 96.41, "prompt_per_token_ms": 0.6695138888888889, "prompt_per_second": 1493.6209936728555, "predicted_n": 2, "predicted_ms": 29.326, "predicted_per_token_ms": 14.663, "predicted_per_second": 68.19886789879288}, "tps": 68.19886789879288}, {"id": "836617f0-7044-48fb-8fe2-4d8a33f8f716", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 125, "prompt_ms": 95.61, "prompt_per_token_ms": 0.76488, "prompt_per_second": 1307.3946239933061, "predicted_n": 2, "predicted_ms": 29.788, "predicted_per_token_ms": 14.894, "predicted_per_second": 67.14113065664026}, "tps": 67.14113065664026}, {"id": "837f9b7a-3e00-43e3-b91d-e75ab49e23b7", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 111, "prompt_ms": 94.434, "prompt_per_token_ms": 0.8507567567567568, "prompt_per_second": 1175.4241057246331, "predicted_n": 2, "predicted_ms": 29.638, "predicted_per_token_ms": 14.819, "predicted_per_second": 67.4809366354005}, "tps": 67.4809366354005}, {"id": "46605378-381d-478f-9f77-cd66355ecded", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 95.65, "prompt_per_token_ms": 0.7652, "prompt_per_second": 1306.8478829064295, "predicted_n": 2, "predicted_ms": 29.206, "predicted_per_token_ms": 14.603, "predicted_per_second": 68.47907964116962}, "tps": 68.47907964116962}, {"id": "89a8424e-adce-4fb6-a11a-0bcd75792308", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 114, "prompt_ms": 96.62, "prompt_per_token_ms": 0.8475438596491228, "prompt_per_second": 1179.8799420409853, "predicted_n": 2, "predicted_ms": 30.027, "predicted_per_token_ms": 15.0135, "predicted_per_second": 66.60672061811036}, "tps": 66.60672061811036}, {"id": "e917cc8a-31d0-49f7-b698-0d4152004878", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 99, "prompt_ms": 94.668, "prompt_per_token_ms": 0.9562424242424243, "prompt_per_second": 1045.759918874382, "predicted_n": 2, "predicted_ms": 30.246, "predicted_per_token_ms": 15.123, "predicted_per_second": 66.12444620776301}, "tps": 66.12444620776301}, {"id": "61a52083-6d04-4992-aac8-c1506f57d903", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 74, "prompt_ms": 91.3, "prompt_per_token_ms": 1.2337837837837837, "prompt_per_second": 810.514786418401, "predicted_n": 2, "predicted_ms": 30.084, "predicted_per_token_ms": 15.042, "predicted_per_second": 66.48052120728627}, "tps": 66.48052120728627}, {"id": "d41e602e-108a-4738-a981-8b87eef9846c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 96.35, "prompt_per_token_ms": 0.830603448275862, "prompt_per_second": 1203.9439543331605, "predicted_n": 2, "predicted_ms": 31.359, "predicted_per_token_ms": 15.6795, "predicted_per_second": 63.77754392678337}, "tps": 63.77754392678337}, {"id": "1984dc00-72ae-49f9-b460-41d2e2cf47e1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 182, "prompt_ms": 105.838, "prompt_per_token_ms": 0.5815274725274725, "prompt_per_second": 1719.609214081899, "predicted_n": 2, "predicted_ms": 30.021, "predicted_per_token_ms": 15.0105, "predicted_per_second": 66.62003264381599}, "tps": 66.62003264381599}, {"id": "5f68a75e-3093-4dbf-8f40-16de6ddb497c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 124, "prompt_ms": 92.084, "prompt_per_token_ms": 0.7426129032258065, "prompt_per_second": 1346.5965857260762, "predicted_n": 2, "predicted_ms": 25.768, "predicted_per_token_ms": 12.884, "predicted_per_second": 77.6156473144986}, "tps": 77.6156473144986}, {"id": "c6c46ade-35eb-40f5-b780-f76aeb1c8e66", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 94.299, "prompt_per_token_ms": 0.6371554054054055, "prompt_per_second": 1569.475816286493, "predicted_n": 3, "predicted_ms": 60.199, "predicted_per_token_ms": 20.066333333333333, "predicted_per_second": 49.83471486237313}, "tps": 49.83471486237313}, {"id": "64b5de39-e030-4fa2-817c-58c49b31d287", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 217, "prompt_ms": 102.423, "prompt_per_token_ms": 0.47199539170506916, "prompt_per_second": 2118.6647530339865, "predicted_n": 2, "predicted_ms": 30.077, "predicted_per_token_ms": 15.0385, "predicted_per_second": 66.49599361638461}, "tps": 66.49599361638461}, {"id": "738308b7-aa49-434d-ae43-bdc8ce92aa64", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 133, "prompt_ms": 97.971, "prompt_per_token_ms": 0.736624060150376, "prompt_per_second": 1357.544579518429, "predicted_n": 2, "predicted_ms": 29.629, "predicted_per_token_ms": 14.8145, "predicted_per_second": 67.50143440548112}, "tps": 67.50143440548112}, {"id": "95df2f1a-0320-4620-acc4-ca079f2c62f5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 121, "prompt_ms": 95.079, "prompt_per_token_ms": 0.7857768595041322, "prompt_per_second": 1272.6259216020362, "predicted_n": 2, "predicted_ms": 29.622, "predicted_per_token_ms": 14.811, "predicted_per_second": 67.51738572682466}, "tps": 67.51738572682466}, {"id": "16926b4e-36fd-4a10-b02a-014cf8906d18", "answer": "AB", "llm_answer": "C", "score": 0, "topics": ["MemorySafety", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 135, "prompt_ms": 97.003, "prompt_per_token_ms": 0.7185407407407407, "prompt_per_second": 1391.7095347566571, "predicted_n": 2, "predicted_ms": 32.489, "predicted_per_token_ms": 16.2445, "predicted_per_second": 61.559296992828344}, "tps": 61.559296992828344}, {"id": "bc646d4b-3f83-4ab3-9a40-3f7a5e15074e", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 213, "prompt_ms": 100.581, "prompt_per_token_ms": 0.4722112676056338, "prompt_per_second": 2117.696185164196, "predicted_n": 2, "predicted_ms": 32.926, "predicted_per_token_ms": 16.463, "predicted_per_second": 60.74227054607301}, "tps": 60.74227054607301}, {"id": "b01335c1-6e44-4642-bd11-7209d57684be", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 181, "prompt_ms": 102.098, "prompt_per_token_ms": 0.5640773480662984, "prompt_per_second": 1772.8065192266256, "predicted_n": 3, "predicted_ms": 59.135, "predicted_per_token_ms": 19.711666666666666, "predicted_per_second": 50.73137735689524}, "tps": 50.73137735689524}, {"id": "20e7685f-fd17-4b6e-a127-19b7508dabe2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.151, "prompt_per_token_ms": 0.676903448275862, "prompt_per_second": 1477.315564793023, "predicted_n": 2, "predicted_ms": 29.373, "predicted_per_token_ms": 14.6865, "predicted_per_second": 68.08974228032547}, "tps": 68.08974228032547}, {"id": "0b73c283-7dca-4108-bb31-d140f44011cf", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 168, "prompt_ms": 96.427, "prompt_per_token_ms": 0.5739702380952382, "prompt_per_second": 1742.2506144544575, "predicted_n": 2, "predicted_ms": 26.363, "predicted_per_token_ms": 13.1815, "predicted_per_second": 75.86390016310739}, "tps": 75.86390016310739}, {"id": "2452864d-a7a1-4b82-89f9-953326e22a5c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 139, "prompt_ms": 92.425, "prompt_per_token_ms": 0.6649280575539568, "prompt_per_second": 1503.9220989991886, "predicted_n": 2, "predicted_ms": 32.28, "predicted_per_token_ms": 16.14, "predicted_per_second": 61.95786864931846}, "tps": 61.95786864931846}, {"id": "ca2b46c8-0944-4af2-a2df-34a153e2e78d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 153, "prompt_ms": 97.649, "prompt_per_token_ms": 0.6382287581699346, "prompt_per_second": 1566.8363219285402, "predicted_n": 2, "predicted_ms": 28.222, "predicted_per_token_ms": 14.111, "predicted_per_second": 70.86669973779321}, "tps": 70.86669973779321}, {"id": "a50af7df-b139-400b-a274-f2d97bac9a10", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["MemorySafety", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 173, "prompt_ms": 99.518, "prompt_per_token_ms": 0.5752485549132949, "prompt_per_second": 1738.378986715971, "predicted_n": 2, "predicted_ms": 33.227, "predicted_per_token_ms": 16.6135, "predicted_per_second": 60.19201251993861}, "tps": 60.19201251993861}, {"id": "08f679b8-b7e2-4461-8b25-a6f45205d7a7", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 97, "prompt_ms": 95.759, "prompt_per_token_ms": 0.9872061855670103, "prompt_per_second": 1012.9596173727796, "predicted_n": 2, "predicted_ms": 29.275, "predicted_per_token_ms": 14.6375, "predicted_per_second": 68.31767719897523}, "tps": 68.31767719897523}, {"id": "c1a7e1a8-65b1-4d7d-847b-dcb778570dd1", "answer": "BC", "llm_answer": "AC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 139, "prompt_ms": 97.241, "prompt_per_token_ms": 0.6995755395683453, "prompt_per_second": 1429.4381999362408, "predicted_n": 4, "predicted_ms": 89.719, "predicted_per_token_ms": 22.42975, "predicted_per_second": 44.58364449001884}, "tps": 44.58364449001884}, {"id": "6fe3098b-bb55-40f8-ba86-864e0f46633f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 115, "prompt_ms": 95.556, "prompt_per_token_ms": 0.8309217391304348, "prompt_per_second": 1203.4827744987233, "predicted_n": 2, "predicted_ms": 31.561, "predicted_per_token_ms": 15.7805, "predicted_per_second": 63.36934824625329}, "tps": 63.36934824625329}, {"id": "aaa8c0a9-e5d5-4547-bdf9-6127cdcf36d7", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 94.551, "prompt_per_token_ms": 0.6566041666666667, "prompt_per_second": 1522.9875939968906, "predicted_n": 2, "predicted_ms": 31.082, "predicted_per_token_ms": 15.541, "predicted_per_second": 64.3459236857345}, "tps": 64.3459236857345}, {"id": "0cf6a7c1-cc20-4554-b022-4e6923169542", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 113, "prompt_ms": 95.932, "prompt_per_token_ms": 0.8489557522123894, "prompt_per_second": 1177.9176916982863, "predicted_n": 2, "predicted_ms": 30.231, "predicted_per_token_ms": 15.1155, "predicted_per_second": 66.15725579702953}, "tps": 66.15725579702953}, {"id": "19c9e763-ce52-4980-8bf4-a548ae8c9192", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 97.395, "prompt_per_token_ms": 0.70068345323741, "prompt_per_second": 1427.1779865496176, "predicted_n": 2, "predicted_ms": 29.095, "predicted_per_token_ms": 14.5475, "predicted_per_second": 68.74033339061695}, "tps": 68.74033339061695}, {"id": "ae28ad19-0e87-4a14-8fc5-836d5404288b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 80, "prompt_ms": 80.137, "prompt_per_token_ms": 1.0017125, "prompt_per_second": 998.2904276426619, "predicted_n": 2, "predicted_ms": 27.214, "predicted_per_token_ms": 13.607, "predicted_per_second": 73.49158521349305}, "tps": 73.49158521349305}, {"id": "67c070a2-3270-4599-87e0-a88ee24079f9", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 96.191, "prompt_per_token_ms": 0.7574094488188977, "prompt_per_second": 1320.2898400058216, "predicted_n": 2, "predicted_ms": 29.348, "predicted_per_token_ms": 14.674, "predicted_per_second": 68.14774430966335}, "tps": 68.14774430966335}, {"id": "4023cd5b-05c9-4007-8129-db2d641d9a6b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 135, "prompt_ms": 97.399, "prompt_per_token_ms": 0.721474074074074, "prompt_per_second": 1386.0511914906724, "predicted_n": 2, "predicted_ms": 30.412, "predicted_per_token_ms": 15.206, "predicted_per_second": 65.76351440220965}, "tps": 65.76351440220965}, {"id": "1b509813-d583-4e78-8f86-7ccb33e44b75", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 64, "prompt_n": 145, "prompt_ms": 96.98, "prompt_per_token_ms": 0.6688275862068965, "prompt_per_second": 1495.153639925758, "predicted_n": 2, "predicted_ms": 30.244, "predicted_per_token_ms": 15.122, "predicted_per_second": 66.12881893929375}, "tps": 66.12881893929375}, {"id": "36041960-6745-4b20-a042-5868cd109b9c", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 94.979, "prompt_per_token_ms": 0.8556666666666667, "prompt_per_second": 1168.6793922867162, "predicted_n": 2, "predicted_ms": 30.027, "predicted_per_token_ms": 15.0135, "predicted_per_second": 66.60672061811036}, "tps": 66.60672061811036}, {"id": "f9ad54de-9c0c-42b4-bfc7-963c9807e85b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 96.148, "prompt_per_token_ms": 0.7570708661417322, "prompt_per_second": 1320.880309522819, "predicted_n": 2, "predicted_ms": 31.56, "predicted_per_token_ms": 15.78, "predicted_per_second": 63.37135614702155}, "tps": 63.37135614702155}, {"id": "588b953a-a218-42ce-93ce-2c5b02ccf278", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 128, "prompt_ms": 95.869, "prompt_per_token_ms": 0.7489765625, "prompt_per_second": 1335.155263953937, "predicted_n": 2, "predicted_ms": 29.091, "predicted_per_token_ms": 14.5455, "predicted_per_second": 68.74978515692138}, "tps": 68.74978515692138}, {"id": "08006dda-e127-4fd9-b98b-4913545e5a8d", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 186, "prompt_ms": 99.829, "prompt_per_token_ms": 0.5367150537634409, "prompt_per_second": 1863.1860481423234, "predicted_n": 2, "predicted_ms": 29.568, "predicted_per_token_ms": 14.784, "predicted_per_second": 67.64069264069263}, "tps": 67.64069264069263}, {"id": "e5eb8b71-f00d-48bf-964d-e7d0c452b561", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 102, "prompt_ms": 97.491, "prompt_per_token_ms": 0.9557941176470588, "prompt_per_second": 1046.2504231159799, "predicted_n": 2, "predicted_ms": 30.623, "predicted_per_token_ms": 15.3115, "predicted_per_second": 65.31038761715051}, "tps": 65.31038761715051}, {"id": "e7d6110e-225d-4cb9-9e63-08d5255cefb9", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 148, "prompt_ms": 97.89, "prompt_per_token_ms": 0.6614189189189189, "prompt_per_second": 1511.901113494739, "predicted_n": 8, "predicted_ms": 191.509, "predicted_per_token_ms": 23.938625, "predicted_per_second": 41.77349367392655}, "tps": 41.77349367392655}, {"id": "be776a3b-c799-4ad5-a246-d7cd33b8a2e0", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 97.388, "prompt_per_token_ms": 0.690695035460993, "prompt_per_second": 1447.8169795046617, "predicted_n": 2, "predicted_ms": 29.587, "predicted_per_token_ms": 14.7935, "predicted_per_second": 67.59725555142461}, "tps": 67.59725555142461}, {"id": "73ea8b90-bd82-4e7f-8695-848ea4f1b2f6", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 101, "prompt_ms": 94.189, "prompt_per_token_ms": 0.9325643564356435, "prompt_per_second": 1072.3120534244977, "predicted_n": 2, "predicted_ms": 28.859, "predicted_per_token_ms": 14.4295, "predicted_per_second": 69.30247063307806}, "tps": 69.30247063307806}, {"id": "400d0595-809e-4174-9f5e-bfd275471f09", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 82, "prompt_ms": 93.364, "prompt_per_token_ms": 1.1385853658536587, "prompt_per_second": 878.2828499207403, "predicted_n": 2, "predicted_ms": 29.148, "predicted_per_token_ms": 14.574, "predicted_per_second": 68.61534239055852}, "tps": 68.61534239055852}, {"id": "ada35d96-4a1a-4660-b83d-6a6d0a15035f", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 132, "prompt_ms": 96.926, "prompt_per_token_ms": 0.7342878787878788, "prompt_per_second": 1361.8636898252275, "predicted_n": 2, "predicted_ms": 29.106, "predicted_per_token_ms": 14.553, "predicted_per_second": 68.71435442864014}, "tps": 68.71435442864014}, {"id": "1c1e1359-13bf-4116-8401-67412212dcc7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 177, "prompt_ms": 101.228, "prompt_per_token_ms": 0.571909604519774, "prompt_per_second": 1748.5280752361007, "predicted_n": 2, "predicted_ms": 32.49, "predicted_per_token_ms": 16.245, "predicted_per_second": 61.55740227762388}, "tps": 61.55740227762388}, {"id": "23221291-bc53-4f55-8e46-3b7502583194", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 61, "prompt_n": 158, "prompt_ms": 96.705, "prompt_per_token_ms": 0.6120569620253165, "prompt_per_second": 1633.834858590559, "predicted_n": 2, "predicted_ms": 29.626, "predicted_per_token_ms": 14.813, "predicted_per_second": 67.50826976304597}, "tps": 67.50826976304597}, {"id": "5d8e7d80-18e4-4e06-9296-eb4b481a828c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 95.667, "prompt_per_token_ms": 0.7777804878048781, "prompt_per_second": 1285.709805889178, "predicted_n": 2, "predicted_ms": 28.528, "predicted_per_token_ms": 14.264, "predicted_per_second": 70.10656197420079}, "tps": 70.10656197420079}, {"id": "fa5fe341-069e-494b-a090-7e91ea5e280d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 96.538, "prompt_per_token_ms": 0.7912950819672131, "prompt_per_second": 1263.7510617580642, "predicted_n": 2, "predicted_ms": 28.824, "predicted_per_token_ms": 14.412, "predicted_per_second": 69.38662225922842}, "tps": 69.38662225922842}, {"id": "e25bbdd9-20c7-4a24-87e7-7438fdd4ba26", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 170, "prompt_ms": 98.902, "prompt_per_token_ms": 0.5817764705882353, "prompt_per_second": 1718.8732280439222, "predicted_n": 2, "predicted_ms": 29.262, "predicted_per_token_ms": 14.631, "predicted_per_second": 68.3480281593876}, "tps": 68.3480281593876}, {"id": "867bd867-80a8-4850-ba17-5a2e8266ac14", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 193, "prompt_ms": 91.18, "prompt_per_token_ms": 0.4724352331606218, "prompt_per_second": 2116.6922570739193, "predicted_n": 2, "predicted_ms": 30.502, "predicted_per_token_ms": 15.251, "predicted_per_second": 65.56947085437021}, "tps": 65.56947085437021}, {"id": "73e09b23-7146-401b-8152-e6d121353c3a", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 109, "prompt_ms": 94.717, "prompt_per_token_ms": 0.8689633027522936, "prompt_per_second": 1150.7965835066566, "predicted_n": 64, "predicted_ms": 1853.674, "predicted_per_token_ms": 28.96365625, "predicted_per_second": 34.52602776971571}, "tps": 34.52602776971571}, {"id": "245e5aa5-ca50-4bf2-ac97-832de07e6a42", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 109, "prompt_ms": 96.623, "prompt_per_token_ms": 0.8864495412844037, "prompt_per_second": 1128.0957949970505, "predicted_n": 2, "predicted_ms": 28.203, "predicted_per_token_ms": 14.1015, "predicted_per_second": 70.91444172605752}, "tps": 70.91444172605752}, {"id": "a2458509-f3b0-46bb-96ef-7d16d51496c3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 149, "prompt_ms": 98.973, "prompt_per_token_ms": 0.664248322147651, "prompt_per_second": 1505.4610853465088, "predicted_n": 2, "predicted_ms": 28.495, "predicted_per_token_ms": 14.2475, "predicted_per_second": 70.1877522372346}, "tps": 70.1877522372346}, {"id": "36c5c864-7dca-4d27-9797-9bc65bf50d6f", "answer": "AB", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 98.97, "prompt_per_token_ms": 0.7854761904761904, "prompt_per_second": 1273.1130645650198, "predicted_n": 2, "predicted_ms": 29.022, "predicted_per_token_ms": 14.511, "predicted_per_second": 68.91323823306458}, "tps": 68.91323823306458}, {"id": "0d59d765-fa2d-4d6a-a405-a1aa5c76ada4", "answer": "ABD", "llm_answer": "AB", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 166, "prompt_ms": 98.541, "prompt_per_token_ms": 0.5936204819277108, "prompt_per_second": 1684.577992916654, "predicted_n": 3, "predicted_ms": 54.433, "predicted_per_token_ms": 18.144333333333332, "predicted_per_second": 55.11362592544963}, "tps": 55.11362592544963}, {"id": "b38726cd-82cd-4477-ada1-bd191fda19c1", "answer": "C", "llm_answer": "ACD", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 208, "prompt_ms": 87.453, "prompt_per_token_ms": 0.4204471153846154, "prompt_per_second": 2378.420408676661, "predicted_n": 6, "predicted_ms": 157.195, "predicted_per_token_ms": 26.199166666666667, "predicted_per_second": 38.1691529628805}, "tps": 38.1691529628805}, {"id": "6f705495-73e4-45ea-ad41-1aa86f6592ab", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 90, "prompt_ms": 93.064, "prompt_per_token_ms": 1.0340444444444443, "prompt_per_second": 967.0764205278089, "predicted_n": 2, "predicted_ms": 29.952, "predicted_per_token_ms": 14.976, "predicted_per_second": 66.77350427350427}, "tps": 66.77350427350427}, {"id": "10fb6e78-dd36-42b5-b54b-e1277ea0cccb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 144, "prompt_ms": 97.595, "prompt_per_token_ms": 0.6777430555555556, "prompt_per_second": 1475.4854244582202, "predicted_n": 2, "predicted_ms": 30.597, "predicted_per_token_ms": 15.2985, "predicted_per_second": 65.3658855443344}, "tps": 65.3658855443344}, {"id": "3d34c08c-a1ff-47fd-b0c5-5e56a0d7161b", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 99, "prompt_ms": 91.787, "prompt_per_token_ms": 0.9271414141414142, "prompt_per_second": 1078.5841132186474, "predicted_n": 2, "predicted_ms": 30.977, "predicted_per_token_ms": 15.4885, "predicted_per_second": 64.56403137811925}, "tps": 64.56403137811925}, {"id": "43d90b11-4a76-41ec-8b00-4342a15ebe04", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 113, "prompt_ms": 96.96, "prompt_per_token_ms": 0.8580530973451327, "prompt_per_second": 1165.4290429042906, "predicted_n": 2, "predicted_ms": 36.064, "predicted_per_token_ms": 18.032, "predicted_per_second": 55.45696539485359}, "tps": 55.45696539485359}, {"id": "3629347a-d25a-41f3-9816-90b36677b918", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["PenTest", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 130, "prompt_ms": 97.068, "prompt_per_token_ms": 0.746676923076923, "prompt_per_second": 1339.2673177566242, "predicted_n": 4, "predicted_ms": 85.763, "predicted_per_token_ms": 21.44075, "predicted_per_second": 46.640159509345516}, "tps": 46.640159509345516}, {"id": "d437b927-daa1-45d4-bc9f-9ec6d522ab8b", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 64, "prompt_n": 98, "prompt_ms": 94.392, "prompt_per_token_ms": 0.9631836734693877, "prompt_per_second": 1038.223578269345, "predicted_n": 2, "predicted_ms": 28.9, "predicted_per_token_ms": 14.45, "predicted_per_second": 69.20415224913495}, "tps": 69.20415224913495}, {"id": "ba70e36b-6460-4bef-947b-cf55b6f62024", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 98, "prompt_ms": 94.814, "prompt_per_token_ms": 0.9674897959183673, "prompt_per_second": 1033.6026325226233, "predicted_n": 2, "predicted_ms": 29.445, "predicted_per_token_ms": 14.7225, "predicted_per_second": 67.92324673119376}, "tps": 67.92324673119376}, {"id": "1e4fb44e-fd63-47f3-8a7e-d33d9f944e0c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 143, "prompt_ms": 92.432, "prompt_per_token_ms": 0.6463776223776224, "prompt_per_second": 1547.0832612082395, "predicted_n": 2, "predicted_ms": 27.498, "predicted_per_token_ms": 13.749, "predicted_per_second": 72.73256236817222}, "tps": 72.73256236817222}, {"id": "287a7599-6e41-4a30-b53e-be09d4dd2177", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 81, "prompt_ms": 89.662, "prompt_per_token_ms": 1.1069382716049383, "prompt_per_second": 903.3927416296758, "predicted_n": 2, "predicted_ms": 30.259, "predicted_per_token_ms": 15.1295, "predicted_per_second": 66.09603754254933}, "tps": 66.09603754254933}, {"id": "dda9364e-c19c-4674-991c-14b089338275", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 138, "prompt_ms": 96.128, "prompt_per_token_ms": 0.6965797101449276, "prompt_per_second": 1435.5858854860187, "predicted_n": 2, "predicted_ms": 29.21, "predicted_per_token_ms": 14.605, "predicted_per_second": 68.46970215679562}, "tps": 68.46970215679562}, {"id": "2bd1645d-ecc0-49b1-8a3e-ddc2f0c68ab5", "answer": "BC", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 98.211, "prompt_per_token_ms": 0.7221397058823529, "prompt_per_second": 1384.7735996986082, "predicted_n": 2, "predicted_ms": 32.459, "predicted_per_token_ms": 16.2295, "predicted_per_second": 61.61619273545087}, "tps": 61.61619273545087}, {"id": "fa4ab592-ad7c-494e-9a41-5f1bf5f6fa93", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 94, "prompt_ms": 96.92, "prompt_per_token_ms": 1.031063829787234, "prompt_per_second": 969.8720594304581, "predicted_n": 2, "predicted_ms": 30.852, "predicted_per_token_ms": 15.426, "predicted_per_second": 64.82561908466226}, "tps": 64.82561908466226}, {"id": "0cfb0d06-083d-4df1-8502-878579c7aa64", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 65, "prompt_ms": 91.742, "prompt_per_token_ms": 1.4114153846153847, "prompt_per_second": 708.5086438054544, "predicted_n": 2, "predicted_ms": 31.518, "predicted_per_token_ms": 15.759, "predicted_per_second": 63.45580303318739}, "tps": 63.45580303318739}, {"id": "af868820-1a0f-4ee3-81a6-8443c261aca1", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 158, "prompt_ms": 99.192, "prompt_per_token_ms": 0.6277974683544303, "prompt_per_second": 1592.870392773611, "predicted_n": 2, "predicted_ms": 29.924, "predicted_per_token_ms": 14.962, "predicted_per_second": 66.8359844940516}, "tps": 66.8359844940516}, {"id": "4893133e-2282-4250-be9c-4a6ca1f2a8e7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 192, "prompt_ms": 100.273, "prompt_per_token_ms": 0.5222552083333333, "prompt_per_second": 1914.7726706092367, "predicted_n": 2, "predicted_ms": 29.857, "predicted_per_token_ms": 14.9285, "predicted_per_second": 66.98596644003082}, "tps": 66.98596644003082}, {"id": "3af82b04-1f15-4f5c-a9fa-ece25b85e9c6", "answer": "BD", "llm_answer": "AB", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 98.442, "prompt_per_token_ms": 0.6932535211267605, "prompt_per_second": 1442.4737408829565, "predicted_n": 3, "predicted_ms": 60.376, "predicted_per_token_ms": 20.125333333333334, "predicted_per_second": 49.68861799390487}, "tps": 49.68861799390487}, {"id": "19417ef5-3eab-4aa1-aeda-84ebd15b2719", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 134, "prompt_ms": 96.359, "prompt_per_token_ms": 0.7190970149253731, "prompt_per_second": 1390.6329455473801, "predicted_n": 2, "predicted_ms": 29.866, "predicted_per_token_ms": 14.933, "predicted_per_second": 66.96578048617157}, "tps": 66.96578048617157}, {"id": "10c18dfa-5306-448e-8d51-3ad3becc59a1", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 84.723, "prompt_per_token_ms": 0.6184160583941606, "prompt_per_second": 1617.0343354224945, "predicted_n": 2, "predicted_ms": 29.329, "predicted_per_token_ms": 14.6645, "predicted_per_second": 68.1918919840431}, "tps": 68.1918919840431}, {"id": "8ef323bb-d6db-4464-a612-4875aff3ad16", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 96.375, "prompt_per_token_ms": 0.747093023255814, "prompt_per_second": 1338.5214007782101, "predicted_n": 2, "predicted_ms": 27.251, "predicted_per_token_ms": 13.6255, "predicted_per_second": 73.39180213570144}, "tps": 73.39180213570144}, {"id": "d2589dec-e16e-4534-bfb4-90041a4e15dd", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["PenTest", "NetworkSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 122, "prompt_ms": 99.903, "prompt_per_token_ms": 0.8188770491803279, "prompt_per_second": 1221.184549012542, "predicted_n": 2, "predicted_ms": 28.865, "predicted_per_token_ms": 14.4325, "predicted_per_second": 69.28806513078122}, "tps": 69.28806513078122}, {"id": "91d8a1a1-560d-47af-9241-9993e52545ac", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 99.08, "prompt_per_token_ms": 0.7128057553956835, "prompt_per_second": 1402.9067420266451, "predicted_n": 3, "predicted_ms": 58.149, "predicted_per_token_ms": 19.383, "predicted_per_second": 51.591600887375535}, "tps": 51.591600887375535}, {"id": "fc480623-c9e0-40ad-931c-c334fb62007c", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 188, "prompt_ms": 100.14, "prompt_per_token_ms": 0.5326595744680851, "prompt_per_second": 1877.371679648492, "predicted_n": 2, "predicted_ms": 29.156, "predicted_per_token_ms": 14.578, "predicted_per_second": 68.59651529702292}, "tps": 68.59651529702292}, {"id": "ac4ac102-50d1-42a4-adc0-5d602a3b6fce", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 99, "prompt_ms": 95.463, "prompt_per_token_ms": 0.9642727272727272, "prompt_per_second": 1037.0510040539266, "predicted_n": 2, "predicted_ms": 30.068, "predicted_per_token_ms": 15.034, "predicted_per_second": 66.51589729945456}, "tps": 66.51589729945456}, {"id": "94ad635a-ff05-4af0-b521-798e7f3ff9f0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 162, "prompt_ms": 99.082, "prompt_per_token_ms": 0.6116172839506172, "prompt_per_second": 1635.0093861649948, "predicted_n": 2, "predicted_ms": 29.503, "predicted_per_token_ms": 14.7515, "predicted_per_second": 67.78971630003728}, "tps": 67.78971630003728}, {"id": "ee2b0f00-0514-4c31-b320-ea25027825bb", "answer": "BD", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 110, "prompt_ms": 95.289, "prompt_per_token_ms": 0.8662636363636363, "prompt_per_second": 1154.3829822959628, "predicted_n": 2, "predicted_ms": 32.175, "predicted_per_token_ms": 16.0875, "predicted_per_second": 62.16006216006217}, "tps": 62.16006216006217}, {"id": "313bf095-5e6c-4458-85a1-4fbb5416d9ee", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 103, "prompt_ms": 100.824, "prompt_per_token_ms": 0.978873786407767, "prompt_per_second": 1021.582162977069, "predicted_n": 2, "predicted_ms": 30.41, "predicted_per_token_ms": 15.205, "predicted_per_second": 65.76783952647156}, "tps": 65.76783952647156}, {"id": "b47d62b5-faca-40ea-9694-de9a565dae0b", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 125, "prompt_ms": 97.437, "prompt_per_token_ms": 0.779496, "prompt_per_second": 1282.8802200396153, "predicted_n": 2, "predicted_ms": 27.861, "predicted_per_token_ms": 13.9305, "predicted_per_second": 71.78493234270127}, "tps": 71.78493234270127}, {"id": "2d206ff3-110f-4004-bb2c-1fe3cf5a331b", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 148, "prompt_ms": 86.828, "prompt_per_token_ms": 0.5866756756756757, "prompt_per_second": 1704.5192794950938, "predicted_n": 2, "predicted_ms": 31.459, "predicted_per_token_ms": 15.7295, "predicted_per_second": 63.57481165962046}, "tps": 63.57481165962046}, {"id": "b7184eb8-efee-4edf-a2d7-116a58944e93", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 64, "prompt_n": 111, "prompt_ms": 94.495, "prompt_per_token_ms": 0.8513063063063063, "prompt_per_second": 1174.6653262077357, "predicted_n": 3, "predicted_ms": 59.907, "predicted_per_token_ms": 19.968999999999998, "predicted_per_second": 50.077620311482804}, "tps": 50.077620311482804}, {"id": "de0831ea-456d-4125-90eb-5dafba7e9d4b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 140, "prompt_ms": 97.74, "prompt_per_token_ms": 0.6981428571428571, "prompt_per_second": 1432.3715981174544, "predicted_n": 2, "predicted_ms": 30.354, "predicted_per_token_ms": 15.177, "predicted_per_second": 65.88917440864466}, "tps": 65.88917440864466}, {"id": "96522387-c49f-469a-84a2-de327d97f5fb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 117, "prompt_ms": 96.033, "prompt_per_token_ms": 0.8207948717948718, "prompt_per_second": 1218.3311986504639, "predicted_n": 2, "predicted_ms": 30.282, "predicted_per_token_ms": 15.141, "predicted_per_second": 66.04583581005218}, "tps": 66.04583581005218}, {"id": "2d30e5f3-589d-40a2-9ce2-3011abeeb0b2", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 177, "prompt_ms": 101.38, "prompt_per_token_ms": 0.5727683615819209, "prompt_per_second": 1745.9064904320378, "predicted_n": 2, "predicted_ms": 29.907, "predicted_per_token_ms": 14.9535, "predicted_per_second": 66.87397599224262}, "tps": 66.87397599224262}, {"id": "59530205-2a88-409a-b32e-9583777735fb", "answer": "C", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 101, "prompt_ms": 96.562, "prompt_per_token_ms": 0.956059405940594, "prompt_per_second": 1045.9601085313063, "predicted_n": 2, "predicted_ms": 30.052, "predicted_per_token_ms": 15.026, "predicted_per_second": 66.5513110608279}, "tps": 66.5513110608279}, {"id": "40bb7f25-38cb-40f6-bc1f-d3f161088d84", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 95.972, "prompt_per_token_ms": 0.813322033898305, "prompt_per_second": 1229.5252782061434, "predicted_n": 2, "predicted_ms": 28.846, "predicted_per_token_ms": 14.423, "predicted_per_second": 69.33370311308327}, "tps": 69.33370311308327}, {"id": "7c7607cc-6038-450b-9eeb-101589d71f50", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 97.563, "prompt_per_token_ms": 0.6870633802816901, "prompt_per_second": 1455.4697990016707, "predicted_n": 2, "predicted_ms": 29.681, "predicted_per_token_ms": 14.8405, "predicted_per_second": 67.383174421347}, "tps": 67.383174421347}, {"id": "7c413169-9154-4970-8d35-5ab53e754c82", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 61, "prompt_n": 115, "prompt_ms": 95.173, "prompt_per_token_ms": 0.8275913043478261, "prompt_per_second": 1208.325890746325, "predicted_n": 2, "predicted_ms": 30.521, "predicted_per_token_ms": 15.2605, "predicted_per_second": 65.52865240326332}, "tps": 65.52865240326332}, {"id": "b8de5c0e-0ffe-4e0e-aa2f-aeb96ba377a0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 155, "prompt_ms": 88.106, "prompt_per_token_ms": 0.5684258064516129, "prompt_per_second": 1759.2445463419065, "predicted_n": 2, "predicted_ms": 26.498, "predicted_per_token_ms": 13.249, "predicted_per_second": 75.47739452034115}, "tps": 75.47739452034115}, {"id": "6cceeba1-c6c0-42f5-9b5c-089e8ed0af8f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 108, "prompt_ms": 89.307, "prompt_per_token_ms": 0.8269166666666666, "prompt_per_second": 1209.3117000906984, "predicted_n": 2, "predicted_ms": 29.601, "predicted_per_token_ms": 14.8005, "predicted_per_second": 67.56528495658931}, "tps": 67.56528495658931}, {"id": "240d278e-4930-4658-9a41-5dfd298e33dd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 95.618, "prompt_per_token_ms": 0.747015625, "prompt_per_second": 1338.660084921249, "predicted_n": 2, "predicted_ms": 30.831, "predicted_per_token_ms": 15.4155, "predicted_per_second": 64.86977392883786}, "tps": 64.86977392883786}, {"id": "9b968000-b54a-4fa5-8a39-1e1de223b43d", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 97.686, "prompt_per_token_ms": 0.7456946564885497, "prompt_per_second": 1341.0314681735354, "predicted_n": 2, "predicted_ms": 31.837, "predicted_per_token_ms": 15.9185, "predicted_per_second": 62.81998932060181}, "tps": 62.81998932060181}, {"id": "e46b299e-cc72-4515-b543-9036ad8c7e27", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 95.464, "prompt_per_token_ms": 0.8090169491525424, "prompt_per_second": 1236.0680465934802, "predicted_n": 2, "predicted_ms": 29.944, "predicted_per_token_ms": 14.972, "predicted_per_second": 66.7913438418381}, "tps": 66.7913438418381}, {"id": "759c61b5-b5e3-44eb-8990-54a8ef08a949", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 84, "prompt_ms": 94.439, "prompt_per_token_ms": 1.1242738095238094, "prompt_per_second": 889.4630396340496, "predicted_n": 2, "predicted_ms": 28.962, "predicted_per_token_ms": 14.481, "predicted_per_second": 69.05600441958428}, "tps": 69.05600441958428}, {"id": "02e498af-67a1-4cc1-9c97-6b0a2b68aecc", "answer": "AB", "llm_answer": "AC", "score": 0, "topics": ["PenTest", "Vulnerability", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 128, "prompt_ms": 97.093, "prompt_per_token_ms": 0.7585390625, "prompt_per_second": 1318.3236690595613, "predicted_n": 2, "predicted_ms": 29.766, "predicted_per_token_ms": 14.883, "predicted_per_second": 67.19075455217363}, "tps": 67.19075455217363}, {"id": "66628aa0-affe-4ee0-9c62-dc392f47a490", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 99, "prompt_ms": 96.49, "prompt_per_token_ms": 0.9746464646464646, "prompt_per_second": 1026.0130583480154, "predicted_n": 2, "predicted_ms": 29.231, "predicted_per_token_ms": 14.6155, "predicted_per_second": 68.4205124696384}, "tps": 68.4205124696384}, {"id": "dd609dd9-1b2b-4b57-9eaa-bdba734e5279", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 113, "prompt_ms": 97.27, "prompt_per_token_ms": 0.8607964601769911, "prompt_per_second": 1161.7148144340497, "predicted_n": 2, "predicted_ms": 29.269, "predicted_per_token_ms": 14.6345, "predicted_per_second": 68.33168198435204}, "tps": 68.33168198435204}, {"id": "e55f1c19-3f92-47f1-9a54-3aee4f4ad84e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 73, "prompt_ms": 91.457, "prompt_per_token_ms": 1.252835616438356, "prompt_per_second": 798.1893130104859, "predicted_n": 2, "predicted_ms": 31.501, "predicted_per_token_ms": 15.7505, "predicted_per_second": 63.49004793498619}, "tps": 63.49004793498619}, {"id": "9d69268f-e084-4609-a559-7b4dfd31f6d1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 126, "prompt_ms": 91.288, "prompt_per_token_ms": 0.7245079365079364, "prompt_per_second": 1380.247129962317, "predicted_n": 2, "predicted_ms": 27.511, "predicted_per_token_ms": 13.7555, "predicted_per_second": 72.69819344989277}, "tps": 72.69819344989277}, {"id": "95727cd2-7327-4db4-9d8a-ec4f5b745271", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 93, "prompt_ms": 87.89, "prompt_per_token_ms": 0.9450537634408602, "prompt_per_second": 1058.140857890545, "predicted_n": 2, "predicted_ms": 29.078, "predicted_per_token_ms": 14.539, "predicted_per_second": 68.78052135635188}, "tps": 68.78052135635188}, {"id": "5f0e6a39-332b-440f-b5d5-afe932413e01", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 172, "prompt_ms": 100.363, "prompt_per_token_ms": 0.5835058139534883, "prompt_per_second": 1713.7789822942718, "predicted_n": 2, "predicted_ms": 29.091, "predicted_per_token_ms": 14.5455, "predicted_per_second": 68.74978515692138}, "tps": 68.74978515692138}, {"id": "0b79c7d5-d38d-420a-a6e1-b23d012fa303", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 101, "prompt_ms": 92.742, "prompt_per_token_ms": 0.9182376237623763, "prompt_per_second": 1089.0427206659335, "predicted_n": 2, "predicted_ms": 30.881, "predicted_per_token_ms": 15.4405, "predicted_per_second": 64.76474207441468}, "tps": 64.76474207441468}, {"id": "aeb8ff51-2c9f-47cd-8793-815b330a8e2c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 97.454, "prompt_per_token_ms": 0.6960999999999999, "prompt_per_second": 1436.5752047119668, "predicted_n": 2, "predicted_ms": 31.02, "predicted_per_token_ms": 15.51, "predicted_per_second": 64.47453255963894}, "tps": 64.47453255963894}, {"id": "5db4c81a-28dd-47d1-8647-ff4bbddd93be", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 61, "prompt_n": 124, "prompt_ms": 95.86, "prompt_per_token_ms": 0.7730645161290323, "prompt_per_second": 1293.5530982683078, "predicted_n": 2, "predicted_ms": 30.496, "predicted_per_token_ms": 15.248, "predicted_per_second": 65.58237145855195}, "tps": 65.58237145855195}, {"id": "7217810a-fab5-4f69-951c-a90d29726ddd", "answer": "BD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 97.307, "prompt_per_token_ms": 0.8177058823529412, "prompt_per_second": 1222.933601899144, "predicted_n": 2, "predicted_ms": 29.636, "predicted_per_token_ms": 14.818, "predicted_per_second": 67.48549061951681}, "tps": 67.48549061951681}, {"id": "8ca93f78-2870-4693-95f1-207476a06a87", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 147, "prompt_ms": 97.053, "prompt_per_token_ms": 0.6602244897959183, "prompt_per_second": 1514.636332725418, "predicted_n": 2, "predicted_ms": 30.135, "predicted_per_token_ms": 15.0675, "predicted_per_second": 66.3680106188817}, "tps": 66.3680106188817}, {"id": "394d03ab-f333-4bc7-b9ab-af1ae9f0adba", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 97.765, "prompt_per_token_ms": 0.6742413793103448, "prompt_per_second": 1483.148365979645, "predicted_n": 3, "predicted_ms": 61.843, "predicted_per_token_ms": 20.614333333333335, "predicted_per_second": 48.50993645198325}, "tps": 48.50993645198325}, {"id": "523ac824-876e-42f5-8bf3-adf73f77b077", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 91, "prompt_ms": 93.97, "prompt_per_token_ms": 1.0326373626373626, "prompt_per_second": 968.3941683516015, "predicted_n": 2, "predicted_ms": 30.514, "predicted_per_token_ms": 15.257, "predicted_per_second": 65.54368486596317}, "tps": 65.54368486596317}, {"id": "f9e471db-c1d7-4c74-ad90-020bab9d1eeb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 104, "prompt_ms": 79.216, "prompt_per_token_ms": 0.7616923076923077, "prompt_per_second": 1312.8660876590588, "predicted_n": 2, "predicted_ms": 26.157, "predicted_per_token_ms": 13.0785, "predicted_per_second": 76.46136789387162}, "tps": 76.46136789387162}, {"id": "538de01c-5805-4c48-b4ca-1e66398d6ded", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 180, "prompt_ms": 96.196, "prompt_per_token_ms": 0.5344222222222222, "prompt_per_second": 1871.1796748305542, "predicted_n": 2, "predicted_ms": 28.703, "predicted_per_token_ms": 14.3515, "predicted_per_second": 69.67912761732224}, "tps": 69.67912761732224}, {"id": "1a171294-11e7-4ee7-885a-d388e0be5d85", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 96.103, "prompt_per_token_ms": 0.8504690265486725, "prompt_per_second": 1175.8217745543843, "predicted_n": 2, "predicted_ms": 28.849, "predicted_per_token_ms": 14.4245, "predicted_per_second": 69.32649311934556}, "tps": 69.32649311934556}, {"id": "fcd255c8-b5eb-446a-9aa0-8b2108e4d781", "answer": "AD", "llm_answer": "BD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 134, "prompt_ms": 98.473, "prompt_per_token_ms": 0.7348731343283582, "prompt_per_second": 1360.7790968082622, "predicted_n": 2, "predicted_ms": 30.539, "predicted_per_token_ms": 15.2695, "predicted_per_second": 65.49002914306297}, "tps": 65.49002914306297}, {"id": "5b6fb352-13b1-4ed8-84c7-2062e9bdd49a", "answer": "ABC", "llm_answer": "AC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 97.212, "prompt_per_token_ms": 0.607575, "prompt_per_second": 1645.8873390116446, "predicted_n": 2, "predicted_ms": 30.637, "predicted_per_token_ms": 15.3185, "predicted_per_second": 65.28054313411887}, "tps": 65.28054313411887}, {"id": "3b8b0bf2-bc51-4591-be28-932b53c48de3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 92, "prompt_ms": 93.186, "prompt_per_token_ms": 1.0128913043478263, "prompt_per_second": 987.2727662953662, "predicted_n": 2, "predicted_ms": 29.608, "predicted_per_token_ms": 14.804, "predicted_per_second": 67.54931099702783}, "tps": 67.54931099702783}, {"id": "be6acb34-0d67-4e23-a098-df563c31eb3f", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 147, "prompt_ms": 101.05, "prompt_per_token_ms": 0.6874149659863945, "prompt_per_second": 1454.7253834735282, "predicted_n": 2, "predicted_ms": 28.785, "predicted_per_token_ms": 14.3925, "predicted_per_second": 69.48063227375368}, "tps": 69.48063227375368}, {"id": "3ab6baf5-2400-4d9f-b786-1294165dddbd", "answer": "AB", "llm_answer": "ABC", "score": 0, "topics": ["WebSecurity", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 141, "prompt_ms": 96.726, "prompt_per_token_ms": 0.6859999999999999, "prompt_per_second": 1457.725947521866, "predicted_n": 4, "predicted_ms": 89.171, "predicted_per_token_ms": 22.29275, "predicted_per_second": 44.85763308699016}, "tps": 44.85763308699016}, {"id": "6bfdf59e-af99-451b-886f-63c7fec3ddaf", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 117, "prompt_ms": 96.056, "prompt_per_token_ms": 0.8209914529914529, "prompt_per_second": 1218.0394769717664, "predicted_n": 2, "predicted_ms": 30.678, "predicted_per_token_ms": 15.339, "predicted_per_second": 65.19329812895234}, "tps": 65.19329812895234}, {"id": "bf567492-f42a-493a-ae80-3c2e604264c0", "answer": "AD", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 93.94, "prompt_per_token_ms": 0.701044776119403, "prompt_per_second": 1426.4424100489675, "predicted_n": 64, "predicted_ms": 1837.963, "predicted_per_token_ms": 28.718171875, "predicted_per_second": 34.82115798849052}, "tps": 34.82115798849052}, {"id": "9ce50f17-3e30-41fa-a4f5-6c9c3b621631", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 145, "prompt_ms": 97.069, "prompt_per_token_ms": 0.6694413793103449, "prompt_per_second": 1493.7827730789436, "predicted_n": 2, "predicted_ms": 29.371, "predicted_per_token_ms": 14.6855, "predicted_per_second": 68.09437880902932}, "tps": 68.09437880902932}, {"id": "b6cd5c86-b1b8-436c-8341-c131f5a357c6", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 161, "prompt_ms": 97.441, "prompt_per_token_ms": 0.605223602484472, "prompt_per_second": 1652.2818936587266, "predicted_n": 2, "predicted_ms": 31.429, "predicted_per_token_ms": 15.7145, "predicted_per_second": 63.63549587960164}, "tps": 63.63549587960164}, {"id": "631d4b16-eaf7-474c-b5c1-5ee190349e73", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 138, "prompt_ms": 97.962, "prompt_per_token_ms": 0.7098695652173913, "prompt_per_second": 1408.7094996018864, "predicted_n": 2, "predicted_ms": 28.539, "predicted_per_token_ms": 14.2695, "predicted_per_second": 70.07954027821577}, "tps": 70.07954027821577}, {"id": "bd8638f6-4726-43c2-b08b-23732df21849", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 95, "prompt_ms": 93.422, "prompt_per_token_ms": 0.9833894736842105, "prompt_per_second": 1016.891096315643, "predicted_n": 2, "predicted_ms": 28.67, "predicted_per_token_ms": 14.335, "predicted_per_second": 69.75933031042902}, "tps": 69.75933031042902}, {"id": "4a22ba20-d474-46d2-b0b8-41a86135ccc4", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 100, "prompt_ms": 98.258, "prompt_per_token_ms": 0.98258, "prompt_per_second": 1017.7288363288486, "predicted_n": 4, "predicted_ms": 93.969, "predicted_per_token_ms": 23.49225, "predicted_per_second": 42.56722961827837}, "tps": 42.56722961827837}, {"id": "0a5af0f1-850e-4cbe-bba5-2f0cdee87c2d", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 144, "prompt_ms": 98.583, "prompt_per_token_ms": 0.6846041666666667, "prompt_per_second": 1460.6980919631173, "predicted_n": 2, "predicted_ms": 26.705, "predicted_per_token_ms": 13.3525, "predicted_per_second": 74.89234225800412}, "tps": 74.89234225800412}, {"id": "d6f87d3d-c497-44a1-8932-ab7352e25416", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 86.242, "prompt_per_token_ms": 0.730864406779661, "prompt_per_second": 1368.2428515108647, "predicted_n": 2, "predicted_ms": 29.724, "predicted_per_token_ms": 14.862, "predicted_per_second": 67.28569506122999}, "tps": 67.28569506122999}, {"id": "e137e495-9dbb-4c1a-b273-10729dd81163", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 74, "prompt_ms": 91.766, "prompt_per_token_ms": 1.2400810810810812, "prompt_per_second": 806.3988841183009, "predicted_n": 2, "predicted_ms": 28.197, "predicted_per_token_ms": 14.0985, "predicted_per_second": 70.92953151044438}, "tps": 70.92953151044438}, {"id": "890af326-da31-4451-9dd2-675cb829155f", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 92.967, "prompt_per_token_ms": 0.96840625, "prompt_per_second": 1032.6244796540707, "predicted_n": 3, "predicted_ms": 59.645, "predicted_per_token_ms": 19.881666666666668, "predicted_per_second": 50.297594098415615}, "tps": 50.297594098415615}, {"id": "2b52de5c-3403-4963-81e5-8c1bb8b8e93c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 97.432, "prompt_per_token_ms": 0.6813426573426573, "prompt_per_second": 1467.6902865588306, "predicted_n": 2, "predicted_ms": 29.809, "predicted_per_token_ms": 14.9045, "predicted_per_second": 67.09383072226508}, "tps": 67.09383072226508}, {"id": "e9129e16-70a3-4d7e-92bb-421c6f03849d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 166, "prompt_ms": 97.962, "prompt_per_token_ms": 0.590132530120482, "prompt_per_second": 1694.5346154631386, "predicted_n": 2, "predicted_ms": 28.698, "predicted_per_token_ms": 14.349, "predicted_per_second": 69.69126768415917}, "tps": 69.69126768415917}, {"id": "9fa2b7ab-123d-4eea-97fa-4436957e37df", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 109, "prompt_ms": 95.775, "prompt_per_token_ms": 0.8786697247706422, "prompt_per_second": 1138.0840511615766, "predicted_n": 2, "predicted_ms": 28.545, "predicted_per_token_ms": 14.2725, "predicted_per_second": 70.06480994920301}, "tps": 70.06480994920301}, {"id": "21ddfd4a-6da1-4361-b983-4e30e68cfae0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 160, "prompt_ms": 96.977, "prompt_per_token_ms": 0.6061062500000001, "prompt_per_second": 1649.87574373305, "predicted_n": 2, "predicted_ms": 29.825, "predicted_per_token_ms": 14.9125, "predicted_per_second": 67.05783738474435}, "tps": 67.05783738474435}, {"id": "52486023-96ce-4aea-b629-6063113d5a8c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 97.406, "prompt_per_token_ms": 0.7162205882352941, "prompt_per_second": 1396.217892121635, "predicted_n": 2, "predicted_ms": 30.884, "predicted_per_token_ms": 15.442, "predicted_per_second": 64.7584509778526}, "tps": 64.7584509778526}, {"id": "b3392a3b-77e9-4e98-af01-4bfb2f8e4a0e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 162, "prompt_ms": 98.801, "prompt_per_token_ms": 0.6098827160493827, "prompt_per_second": 1639.6595176162184, "predicted_n": 2, "predicted_ms": 31.218, "predicted_per_token_ms": 15.609, "predicted_per_second": 64.06560317765391}, "tps": 64.06560317765391}, {"id": "190200c4-ed5e-4d30-aa81-86a50ec7ee57", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 90, "prompt_ms": 88.565, "prompt_per_token_ms": 0.9840555555555556, "prompt_per_second": 1016.2027889120985, "predicted_n": 2, "predicted_ms": 26.955, "predicted_per_token_ms": 13.4775, "predicted_per_second": 74.19773696902244}, "tps": 74.19773696902244}, {"id": "3f883a90-cef8-4227-aed5-3c34a99b1bf9", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 93.918, "prompt_per_token_ms": 0.6756690647482014, "prompt_per_second": 1480.0144807172212, "predicted_n": 2, "predicted_ms": 30.376, "predicted_per_token_ms": 15.188, "predicted_per_second": 65.84145377929944}, "tps": 65.84145377929944}, {"id": "154ed34d-1b5f-45b5-8ba7-f8929543cf45", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 199, "prompt_ms": 101.714, "prompt_per_token_ms": 0.5111256281407035, "prompt_per_second": 1956.4661698487919, "predicted_n": 2, "predicted_ms": 30.088, "predicted_per_token_ms": 15.044, "predicted_per_second": 66.47168306301515}, "tps": 66.47168306301515}, {"id": "cdb20286-3829-4dcb-b2c1-f331c079e952", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 96.993, "prompt_per_token_ms": 0.8219745762711864, "prompt_per_second": 1216.582639984329, "predicted_n": 2, "predicted_ms": 32.39, "predicted_per_token_ms": 16.195, "predicted_per_second": 61.74745291756715}, "tps": 61.74745291756715}, {"id": "8984de7d-d0bd-47be-9b9b-f17c5b679ae1", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 98.675, "prompt_per_token_ms": 0.5606534090909091, "prompt_per_second": 1783.6331390929822, "predicted_n": 2, "predicted_ms": 30.774, "predicted_per_token_ms": 15.387, "predicted_per_second": 64.98992656138299}, "tps": 64.98992656138299}, {"id": "ec75205f-2a95-4953-8905-767c0d453c48", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 172, "prompt_ms": 100.069, "prompt_per_token_ms": 0.581796511627907, "prompt_per_second": 1718.814018327354, "predicted_n": 2, "predicted_ms": 29.454, "predicted_per_token_ms": 14.727, "predicted_per_second": 67.90249202145719}, "tps": 67.90249202145719}, {"id": "13b4724d-a48f-435c-9c02-2591619e475a", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 140, "prompt_ms": 98.748, "prompt_per_token_ms": 0.7053428571428572, "prompt_per_second": 1417.7502329161098, "predicted_n": 2, "predicted_ms": 29.537, "predicted_per_token_ms": 14.7685, "predicted_per_second": 67.71168365101398}, "tps": 67.71168365101398}, {"id": "65bbf157-ef54-4e7a-b96f-b36b130cdd13", "answer": "ABC", "llm_answer": "AC", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 179, "prompt_ms": 99.899, "prompt_per_token_ms": 0.5580949720670391, "prompt_per_second": 1791.8097278251032, "predicted_n": 2, "predicted_ms": 30.493, "predicted_per_token_ms": 15.2465, "predicted_per_second": 65.58882366444759}, "tps": 65.58882366444759}, {"id": "cfc1ce0d-ac39-4f64-bf73-528d84b4e0dc", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 162, "prompt_ms": 100.051, "prompt_per_token_ms": 0.6175987654320988, "prompt_per_second": 1619.174221147215, "predicted_n": 2, "predicted_ms": 29.328, "predicted_per_token_ms": 14.664, "predicted_per_second": 68.19421713038734}, "tps": 68.19421713038734}, {"id": "ee7713cb-f0c3-4e77-a625-8f28a96ebc17", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 95.041, "prompt_per_token_ms": 0.7986638655462185, "prompt_per_second": 1252.0912027440788, "predicted_n": 2, "predicted_ms": 28.927, "predicted_per_token_ms": 14.4635, "predicted_per_second": 69.13955819822311}, "tps": 69.13955819822311}, {"id": "fbb0f56a-e48d-4c49-8f36-122dcb147d83", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 87.198, "prompt_per_token_ms": 0.5891756756756756, "prompt_per_second": 1697.2866350145646, "predicted_n": 2, "predicted_ms": 26.709, "predicted_per_token_ms": 13.3545, "predicted_per_second": 74.88112621213823}, "tps": 74.88112621213823}, {"id": "106fc130-66d9-4d15-b973-c70475389e91", "answer": "AC", "llm_answer": "AD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 143, "prompt_ms": 98.537, "prompt_per_token_ms": 0.6890699300699301, "prompt_per_second": 1451.2315170951015, "predicted_n": 4, "predicted_ms": 89.449, "predicted_per_token_ms": 22.36225, "predicted_per_second": 44.718219320506655}, "tps": 44.718219320506655}, {"id": "9b05a8e4-5f7d-41e2-a115-9b0586f1f77f", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 172, "prompt_ms": 100.315, "prompt_per_token_ms": 0.5832267441860465, "prompt_per_second": 1714.5990131087076, "predicted_n": 2, "predicted_ms": 29.615, "predicted_per_token_ms": 14.8075, "predicted_per_second": 67.53334458889077}, "tps": 67.53334458889077}, {"id": "93ca297e-bd1a-4fb0-81e1-e7c1422e7342", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 98, "prompt_ms": 94.206, "prompt_per_token_ms": 0.9612857142857143, "prompt_per_second": 1040.2734433050973, "predicted_n": 2, "predicted_ms": 31.001, "predicted_per_token_ms": 15.5005, "predicted_per_second": 64.51404793393762}, "tps": 64.51404793393762}, {"id": "af09ea11-5bab-416a-beb9-f908323d1c05", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 66, "prompt_ms": 89.413, "prompt_per_token_ms": 1.3547424242424242, "prompt_per_second": 738.1476966436649, "predicted_n": 2, "predicted_ms": 29.308, "predicted_per_token_ms": 14.654, "predicted_per_second": 68.24075337791729}, "tps": 68.24075337791729}, {"id": "5b009694-5b69-439b-bf74-6d572f1133e1", "answer": "BD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 99.701, "prompt_per_token_ms": 0.6006084337349398, "prompt_per_second": 1664.9782850723664, "predicted_n": 2, "predicted_ms": 29.827, "predicted_per_token_ms": 14.9135, "predicted_per_second": 67.05334093271196}, "tps": 67.05334093271196}, {"id": "c8f43d72-77f4-4a41-a903-c9b16a0d7e73", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 95.357, "prompt_per_token_ms": 0.769008064516129, "prompt_per_second": 1300.376479964764, "predicted_n": 2, "predicted_ms": 29.049, "predicted_per_token_ms": 14.5245, "predicted_per_second": 68.84918585837723}, "tps": 68.84918585837723}, {"id": "247f6b8a-6969-476c-ad95-8dd98fce1c11", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 107, "prompt_ms": 93.679, "prompt_per_token_ms": 0.8755046728971962, "prompt_per_second": 1142.1983582232945, "predicted_n": 2, "predicted_ms": 31.73, "predicted_per_token_ms": 15.865, "predicted_per_second": 63.03183107469272}, "tps": 63.03183107469272}, {"id": "a0812bfc-d44e-424e-9d5b-f0adfbfd1530", "answer": "AD", "llm_answer": "ABCD", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 97.28, "prompt_per_token_ms": 0.765984251968504, "prompt_per_second": 1305.5098684210525, "predicted_n": 5, "predicted_ms": 119.671, "predicted_per_token_ms": 23.9342, "predicted_per_second": 41.78121683615913}, "tps": 41.78121683615913}, {"id": "7f20814e-9e18-4e4a-b65e-9143bec22579", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 111, "prompt_ms": 81.799, "prompt_per_token_ms": 0.7369279279279279, "prompt_per_second": 1356.9848042152103, "predicted_n": 2, "predicted_ms": 25.915, "predicted_per_token_ms": 12.9575, "predicted_per_second": 77.17538105344396}, "tps": 77.17538105344396}, {"id": "3446d9ad-6108-4210-8aa0-80f62fd688be", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 97.503, "prompt_per_token_ms": 0.5838502994011976, "prompt_per_second": 1712.767812272443, "predicted_n": 2, "predicted_ms": 28.814, "predicted_per_token_ms": 14.407, "predicted_per_second": 69.41070313042272}, "tps": 69.41070313042272}, {"id": "6c64a083-d4ea-4a38-a522-1d906608d20d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 141, "prompt_ms": 98.072, "prompt_per_token_ms": 0.6955460992907802, "prompt_per_second": 1437.7192266905945, "predicted_n": 2, "predicted_ms": 29.129, "predicted_per_token_ms": 14.5645, "predicted_per_second": 68.6600981839404}, "tps": 68.6600981839404}, {"id": "ff2dd138-2a32-46ed-949b-e22f1dda9228", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 159, "prompt_ms": 98.189, "prompt_per_token_ms": 0.6175408805031446, "prompt_per_second": 1619.3259937467537, "predicted_n": 2, "predicted_ms": 30.609, "predicted_per_token_ms": 15.3045, "predicted_per_second": 65.34025940082982}, "tps": 65.34025940082982}, {"id": "2f29bfc1-ec35-4c65-a115-127765d2bec0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 126, "prompt_ms": 95.71, "prompt_per_token_ms": 0.7596031746031745, "prompt_per_second": 1316.4768571727093, "predicted_n": 2, "predicted_ms": 30.216, "predicted_per_token_ms": 15.108, "predicted_per_second": 66.19009796134497}, "tps": 66.19009796134497}, {"id": "e402736a-9c82-4851-a6de-82db69f022bc", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 190, "prompt_ms": 99.034, "prompt_per_token_ms": 0.5212315789473685, "prompt_per_second": 1918.5330290607264, "predicted_n": 2, "predicted_ms": 33.025, "predicted_per_token_ms": 16.5125, "predicted_per_second": 60.56018168054504}, "tps": 60.56018168054504}, {"id": "a670b890-0d82-4f62-b372-79d3b353f852", "answer": "ABC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 64, "prompt_n": 113, "prompt_ms": 94.757, "prompt_per_token_ms": 0.8385575221238939, "prompt_per_second": 1192.5240351636292, "predicted_n": 2, "predicted_ms": 29.17, "predicted_per_token_ms": 14.585, "predicted_per_second": 68.56359273225917}, "tps": 68.56359273225917}, {"id": "3167c331-1570-4f0b-a08c-c52e10ea3249", "answer": "ACD", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 186, "prompt_ms": 99.787, "prompt_per_token_ms": 0.536489247311828, "prompt_per_second": 1863.9702566466572, "predicted_n": 3, "predicted_ms": 59.313, "predicted_per_token_ms": 19.771, "predicted_per_second": 50.579131050528545}, "tps": 50.579131050528545}, {"id": "b4d2d15e-329d-4d68-ba68-21281c5fd700", "answer": "AB", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 87, "prompt_ms": 94.492, "prompt_per_token_ms": 1.0861149425287357, "prompt_per_second": 920.7128645811284, "predicted_n": 3, "predicted_ms": 58.917, "predicted_per_token_ms": 19.639, "predicted_per_second": 50.91908956667855}, "tps": 50.91908956667855}, {"id": "21e82879-d8e2-4ac4-833f-31562659810d", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 58, "prompt_ms": 87.629, "prompt_per_token_ms": 1.510844827586207, "prompt_per_second": 661.8813406520673, "predicted_n": 2, "predicted_ms": 26.391, "predicted_per_token_ms": 13.1955, "predicted_per_second": 75.78341101132962}, "tps": 75.78341101132962}, {"id": "09deab35-daab-481e-bd63-c1feaf72b48c", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 177, "prompt_ms": 90.578, "prompt_per_token_ms": 0.5117401129943503, "prompt_per_second": 1954.1168937269535, "predicted_n": 8, "predicted_ms": 207.369, "predicted_per_token_ms": 25.921125, "predicted_per_second": 38.5785724963712}, "tps": 38.5785724963712}, {"id": "5f1cfbcb-5203-4c91-be0b-e2d8939c6635", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 99.571, "prompt_per_token_ms": 0.7718682170542636, "prompt_per_second": 1295.5579435779496, "predicted_n": 2, "predicted_ms": 28.155, "predicted_per_token_ms": 14.0775, "predicted_per_second": 71.03534008169063}, "tps": 71.03534008169063}, {"id": "7a4152ed-c799-4c2d-8a21-c605af89feb3", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 124, "prompt_ms": 96.12, "prompt_per_token_ms": 0.7751612903225806, "prompt_per_second": 1290.054099042863, "predicted_n": 2, "predicted_ms": 28.544, "predicted_per_token_ms": 14.272, "predicted_per_second": 70.06726457399103}, "tps": 70.06726457399103}, {"id": "80340d52-8ad9-4873-b309-b4a0aa8ec9e4", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 83, "prompt_ms": 92.866, "prompt_per_token_ms": 1.118867469879518, "prompt_per_second": 893.7609028061939, "predicted_n": 3, "predicted_ms": 59.151, "predicted_per_token_ms": 19.717000000000002, "predicted_per_second": 50.717654815641325}, "tps": 50.717654815641325}, {"id": "8a3cab59-c1fe-40d4-94a3-16c4738505e6", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 99.076, "prompt_per_token_ms": 0.7393731343283582, "prompt_per_second": 1352.497072954096, "predicted_n": 2, "predicted_ms": 30.421, "predicted_per_token_ms": 15.2105, "predicted_per_second": 65.74405838072384}, "tps": 65.74405838072384}, {"id": "76fd056a-b772-4944-b6ef-27d5e510b31a", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 96.785, "prompt_per_token_ms": 0.7332196969696969, "prompt_per_second": 1363.8477036730899, "predicted_n": 2, "predicted_ms": 29.332, "predicted_per_token_ms": 14.666, "predicted_per_second": 68.18491749624982}, "tps": 68.18491749624982}, {"id": "d3ea639a-6a58-4407-a003-9848df75492c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 167, "prompt_ms": 101.599, "prompt_per_token_ms": 0.6083772455089821, "prompt_per_second": 1643.7169657181664, "predicted_n": 2, "predicted_ms": 29.728, "predicted_per_token_ms": 14.864, "predicted_per_second": 67.27664155005382}, "tps": 67.27664155005382}, {"id": "34c4b821-93ea-48bf-b7cc-772a10d9d3ef", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 155, "prompt_ms": 96.636, "prompt_per_token_ms": 0.623458064516129, "prompt_per_second": 1603.9571174303574, "predicted_n": 2, "predicted_ms": 27.45, "predicted_per_token_ms": 13.725, "predicted_per_second": 72.85974499089254}, "tps": 72.85974499089254}, {"id": "73452376-3611-4d7e-ba56-8a50896a7b54", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 105, "prompt_ms": 79.027, "prompt_per_token_ms": 0.7526380952380952, "prompt_per_second": 1328.6598251230591, "predicted_n": 2, "predicted_ms": 29.954, "predicted_per_token_ms": 14.977, "predicted_per_second": 66.76904587033451}, "tps": 66.76904587033451}, {"id": "15633ad3-e10f-42e6-9735-a17500c6b51a", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 133, "prompt_ms": 96.427, "prompt_per_token_ms": 0.725015037593985, "prompt_per_second": 1379.2817364431123, "predicted_n": 4, "predicted_ms": 87.112, "predicted_per_token_ms": 21.778, "predicted_per_second": 45.91789879695106}, "tps": 45.91789879695106}, {"id": "f1af1f87-52c8-448e-8944-08ac8948e846", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 61, "prompt_ms": 90.499, "prompt_per_token_ms": 1.4835901639344262, "prompt_per_second": 674.0405971336701, "predicted_n": 4, "predicted_ms": 86.69, "predicted_per_token_ms": 21.6725, "predicted_per_second": 46.141423462913835}, "tps": 46.141423462913835}, {"id": "64813a0d-265d-4b20-a55b-ce3ccbb4043c", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 154, "prompt_ms": 97.181, "prompt_per_token_ms": 0.6310454545454546, "prompt_per_second": 1584.671900885976, "predicted_n": 3, "predicted_ms": 60.037, "predicted_per_token_ms": 20.012333333333334, "predicted_per_second": 49.969185668837554}, "tps": 49.969185668837554}, {"id": "250ef822-1b8a-4d0f-9a8d-e5907b91e564", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 100.451, "prompt_per_token_ms": 0.5707443181818181, "prompt_per_second": 1752.0980378492995, "predicted_n": 2, "predicted_ms": 30.528, "predicted_per_token_ms": 15.264, "predicted_per_second": 65.51362683438155}, "tps": 65.51362683438155}, {"id": "2078d636-e164-4beb-b60c-1cc943a71217", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 114, "prompt_ms": 95.386, "prompt_per_token_ms": 0.836719298245614, "prompt_per_second": 1195.1439414589145, "predicted_n": 2, "predicted_ms": 30.138, "predicted_per_token_ms": 15.069, "predicted_per_second": 66.36140420731303}, "tps": 66.36140420731303}, {"id": "8df80f6b-f042-45ed-ad2d-57dbe5e7893e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 96.794, "prompt_per_token_ms": 0.8490701754385964, "prompt_per_second": 1177.7589520011572, "predicted_n": 2, "predicted_ms": 30.639, "predicted_per_token_ms": 15.3195, "predicted_per_second": 65.27628186298509}, "tps": 65.27628186298509}, {"id": "1faba3ce-ee2d-4494-acc7-59d77512219c", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 101, "prompt_ms": 96.459, "prompt_per_token_ms": 0.9550396039603961, "prompt_per_second": 1047.0769964440851, "predicted_n": 2, "predicted_ms": 32.798, "predicted_per_token_ms": 16.399, "predicted_per_second": 60.97932800780535}, "tps": 60.97932800780535}, {"id": "4c1d9259-f8ce-4b17-be64-55259253ed57", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 142, "prompt_ms": 97.976, "prompt_per_token_ms": 0.6899718309859155, "prompt_per_second": 1449.334530905528, "predicted_n": 2, "predicted_ms": 28.228, "predicted_per_token_ms": 14.114, "predicted_per_second": 70.85163667280713}, "tps": 70.85163667280713}, {"id": "7166a3e6-d5e7-480b-b1f8-96f7907db8c6", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 170, "prompt_ms": 87.684, "prompt_per_token_ms": 0.5157882352941177, "prompt_per_second": 1938.7801651384518, "predicted_n": 2, "predicted_ms": 32.468, "predicted_per_token_ms": 16.234, "predicted_per_second": 61.599112972773185}, "tps": 61.599112972773185}, {"id": "731ccd08-13e2-4030-a3b0-210858d3199f", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 128, "prompt_ms": 95.99, "prompt_per_token_ms": 0.749921875, "prompt_per_second": 1333.472236691322, "predicted_n": 2, "predicted_ms": 30.852, "predicted_per_token_ms": 15.426, "predicted_per_second": 64.82561908466226}, "tps": 64.82561908466226}, {"id": "eb502d16-dbf2-44ea-a7e9-ad9f33fe3d0e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 94.706, "prompt_per_token_ms": 0.763758064516129, "prompt_per_second": 1309.3151437078961, "predicted_n": 2, "predicted_ms": 30.729, "predicted_per_token_ms": 15.3645, "predicted_per_second": 65.08509876663739}, "tps": 65.08509876663739}, {"id": "86f4d182-7868-4854-bbc0-c5c371e66efc", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 147, "prompt_ms": 97.223, "prompt_per_token_ms": 0.6613809523809524, "prompt_per_second": 1511.9879040967671, "predicted_n": 2, "predicted_ms": 29.232, "predicted_per_token_ms": 14.616, "predicted_per_second": 68.41817186644774}, "tps": 68.41817186644774}, {"id": "5f42fb9e-eef2-4662-a4d0-fefeea806aba", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 160, "prompt_ms": 97.554, "prompt_per_token_ms": 0.6097125, "prompt_per_second": 1640.1172683846894, "predicted_n": 2, "predicted_ms": 29.316, "predicted_per_token_ms": 14.658, "predicted_per_second": 68.22213125938055}, "tps": 68.22213125938055}, {"id": "f997a4cd-fda7-4b42-9aa1-5d0ca3d4b618", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 209, "prompt_ms": 102.049, "prompt_per_token_ms": 0.4882727272727273, "prompt_per_second": 2048.0357475330475, "predicted_n": 2, "predicted_ms": 35.97, "predicted_per_token_ms": 17.985, "predicted_per_second": 55.60189046427579}, "tps": 55.60189046427579}, {"id": "7c5e8123-8eca-42d1-bf7d-707c1cba21fe", "answer": "AB", "llm_answer": "AD", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 94.846, "prompt_per_token_ms": 0.7970252100840336, "prompt_per_second": 1254.6654576893068, "predicted_n": 2, "predicted_ms": 31.016, "predicted_per_token_ms": 15.508, "predicted_per_second": 64.48284756254837}, "tps": 64.48284756254837}, {"id": "2a336490-e69e-4533-9e94-9c2d798699e3", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 162, "prompt_ms": 98.754, "prompt_per_token_ms": 0.6095925925925926, "prompt_per_second": 1640.4398809162158, "predicted_n": 2, "predicted_ms": 31.131, "predicted_per_token_ms": 15.5655, "predicted_per_second": 64.24464360283962}, "tps": 64.24464360283962}, {"id": "2187c88b-9fdf-4edb-9df5-58808a16264f", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 96.463, "prompt_per_token_ms": 0.7041094890510948, "prompt_per_second": 1420.2336647211885, "predicted_n": 2, "predicted_ms": 29.463, "predicted_per_token_ms": 14.7315, "predicted_per_second": 67.88174999151478}, "tps": 67.88174999151478}, {"id": "2cdf366c-c316-4e9d-ba2e-be8499efa52a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 124, "prompt_ms": 95.59, "prompt_per_token_ms": 0.7708870967741935, "prompt_per_second": 1297.2068207971545, "predicted_n": 2, "predicted_ms": 26.313, "predicted_per_token_ms": 13.1565, "predicted_per_second": 76.00805685402653}, "tps": 76.00805685402653}, {"id": "6240260f-34d9-4d90-b331-0d0a39139a86", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 248, "prompt_ms": 98.592, "prompt_per_token_ms": 0.3975483870967742, "prompt_per_second": 2515.4170723790976, "predicted_n": 2, "predicted_ms": 30.72, "predicted_per_token_ms": 15.36, "predicted_per_second": 65.10416666666667}, "tps": 65.10416666666667}, {"id": "c46aa73a-1298-43ba-b4ab-fd69127df8dd", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 117, "prompt_ms": 95.312, "prompt_per_token_ms": 0.8146324786324786, "prompt_per_second": 1227.547423199597, "predicted_n": 2, "predicted_ms": 30.132, "predicted_per_token_ms": 15.066, "predicted_per_second": 66.37461834594451}, "tps": 66.37461834594451}, {"id": "9bf52616-f6ec-4dc5-aa91-cd2fdb00bc70", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 85, "prompt_ms": 92.054, "prompt_per_token_ms": 1.0829882352941176, "prompt_per_second": 923.3710648097856, "predicted_n": 2, "predicted_ms": 32.636, "predicted_per_token_ms": 16.318, "predicted_per_second": 61.28201985537443}, "tps": 61.28201985537443}, {"id": "229cbc26-ebff-44a0-9653-674f01fb110e", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 184, "prompt_ms": 105.728, "prompt_per_token_ms": 0.5746086956521739, "prompt_per_second": 1740.314769975787, "predicted_n": 2, "predicted_ms": 31.581, "predicted_per_token_ms": 15.7905, "predicted_per_second": 63.32921693423261}, "tps": 63.32921693423261}, {"id": "455fbf16-aa8a-4b58-9152-a841cdb01432", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": [], "timings": {"cache_n": 63, "prompt_n": 130, "prompt_ms": 98.954, "prompt_per_token_ms": 0.7611846153846153, "prompt_per_second": 1313.7417385856054, "predicted_n": 2, "predicted_ms": 30.561, "predicted_per_token_ms": 15.2805, "predicted_per_second": 65.44288472235856}, "tps": 65.44288472235856}, {"id": "2e76b61a-f4ba-4987-bbd6-cd7935cb93cf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 149, "prompt_ms": 99.029, "prompt_per_token_ms": 0.6646241610738255, "prompt_per_second": 1504.6097607771462, "predicted_n": 2, "predicted_ms": 29.101, "predicted_per_token_ms": 14.5505, "predicted_per_second": 68.72616061303735}, "tps": 68.72616061303735}, {"id": "1b43d827-abab-42a5-bede-92fda19019b5", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 95.909, "prompt_per_token_ms": 0.8413070175438597, "prompt_per_second": 1188.6267190774586, "predicted_n": 4, "predicted_ms": 93.831, "predicted_per_token_ms": 23.45775, "predicted_per_second": 42.629834489667594}, "tps": 42.629834489667594}, {"id": "5f5d04e2-b838-4f80-a96a-4f510b6b8bb0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 110, "prompt_ms": 97.192, "prompt_per_token_ms": 0.8835636363636363, "prompt_per_second": 1131.7803934480205, "predicted_n": 2, "predicted_ms": 29.553, "predicted_per_token_ms": 14.7765, "predicted_per_second": 67.67502453219639}, "tps": 67.67502453219639}, {"id": "e8d68116-ffdd-4688-aa7d-09c8561a2da6", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 120, "prompt_ms": 95.715, "prompt_per_token_ms": 0.797625, "prompt_per_second": 1253.7219871493496, "predicted_n": 2, "predicted_ms": 30.325, "predicted_per_token_ms": 15.1625, "predicted_per_second": 65.95218466611706}, "tps": 65.95218466611706}, {"id": "34963636-680e-4eef-a4ce-fb3f175304f4", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 83, "prompt_ms": 79.373, "prompt_per_token_ms": 0.9563012048192772, "prompt_per_second": 1045.6956395751704, "predicted_n": 2, "predicted_ms": 28.377, "predicted_per_token_ms": 14.1885, "predicted_per_second": 70.47961377171653}, "tps": 70.47961377171653}, {"id": "abc442cd-8658-4288-bf61-e3dddbbb8373", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 95.285, "prompt_per_token_ms": 0.7006249999999999, "prompt_per_second": 1427.2970561998216, "predicted_n": 2, "predicted_ms": 33.204, "predicted_per_token_ms": 16.602, "predicted_per_second": 60.23370678231538}, "tps": 60.23370678231538}, {"id": "f0c79e96-a158-4bc2-a274-5685b58a374e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 123, "prompt_ms": 99.187, "prompt_per_token_ms": 0.8063983739837398, "prompt_per_second": 1240.0818655670603, "predicted_n": 2, "predicted_ms": 31.019, "predicted_per_token_ms": 15.5095, "predicted_per_second": 64.4766111093201}, "tps": 64.4766111093201}, {"id": "9cfb4774-f52c-41aa-80d1-a0de89fa8a9c", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "MemorySafety"], "timings": {"cache_n": 62, "prompt_n": 177, "prompt_ms": 101.82, "prompt_per_token_ms": 0.5752542372881355, "prompt_per_second": 1738.3618149675901, "predicted_n": 2, "predicted_ms": 29.803, "predicted_per_token_ms": 14.9015, "predicted_per_second": 67.1073381874308}, "tps": 67.1073381874308}, {"id": "b265d896-e546-4249-acc3-5b1f53ed653e", "answer": "AD", "llm_answer": "AB", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 104, "prompt_ms": 94.557, "prompt_per_token_ms": 0.909201923076923, "prompt_per_second": 1099.8656894783041, "predicted_n": 3, "predicted_ms": 61.568, "predicted_per_token_ms": 20.522666666666666, "predicted_per_second": 48.726611226611226}, "tps": 48.726611226611226}, {"id": "418d1731-1d6e-4713-81f4-d4361b2f2ea2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 176, "prompt_ms": 101.988, "prompt_per_token_ms": 0.5794772727272727, "prompt_per_second": 1725.693218810056, "predicted_n": 2, "predicted_ms": 30.299, "predicted_per_token_ms": 15.1495, "predicted_per_second": 66.0087791676293}, "tps": 66.0087791676293}, {"id": "3c34fe7e-5fa1-45a7-a84b-d89b2e9dc4e3", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 129, "prompt_ms": 98.669, "prompt_per_token_ms": 0.7648759689922481, "prompt_per_second": 1307.4015141533814, "predicted_n": 3, "predicted_ms": 58.88, "predicted_per_token_ms": 19.62666666666667, "predicted_per_second": 50.95108695652174}, "tps": 50.95108695652174}, {"id": "4ac9808e-62b0-44ec-a026-fc19312cbe6b", "answer": "B", "llm_answer": "ABCD", "score": 0, "topics": ["Cryptography"], "timings": {"cache_n": 62, "prompt_n": 146, "prompt_ms": 98.064, "prompt_per_token_ms": 0.6716712328767123, "prompt_per_second": 1488.823625387502, "predicted_n": 8, "predicted_ms": 209.386, "predicted_per_token_ms": 26.17325, "predicted_per_second": 38.20694793348171}, "tps": 38.20694793348171}, {"id": "1a24e3e9-7abb-4a95-bb13-e9861d435203", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 55, "prompt_ms": 86.046, "prompt_per_token_ms": 1.5644727272727275, "prompt_per_second": 639.1929897961554, "predicted_n": 2, "predicted_ms": 26.824, "predicted_per_token_ms": 13.412, "predicted_per_second": 74.56009543692215}, "tps": 74.56009543692215}, {"id": "cc599866-043a-4a85-9c69-4cc524ec1e0f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 121, "prompt_ms": 93.567, "prompt_per_token_ms": 0.7732809917355371, "prompt_per_second": 1293.1909754507467, "predicted_n": 2, "predicted_ms": 29.792, "predicted_per_token_ms": 14.896, "predicted_per_second": 67.13211600429645}, "tps": 67.13211600429645}, {"id": "dd544823-505c-421a-8a3c-8ac7b2659859", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 279, "prompt_ms": 106.861, "prompt_per_token_ms": 0.38301433691756276, "prompt_per_second": 2610.8683242717175, "predicted_n": 2, "predicted_ms": 30.254, "predicted_per_token_ms": 15.127, "predicted_per_second": 66.10696106299993}, "tps": 66.10696106299993}, {"id": "9a5d732c-9415-4b64-b866-48016dc515c3", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 132, "prompt_ms": 98.433, "prompt_per_token_ms": 0.7457045454545455, "prompt_per_second": 1341.0136844350977, "predicted_n": 2, "predicted_ms": 30.648, "predicted_per_token_ms": 15.324, "predicted_per_second": 65.25711302531975}, "tps": 65.25711302531975}, {"id": "1d3d4311-b63a-4106-98c7-d27e4f4ec5a6", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 90, "prompt_ms": 92.483, "prompt_per_token_ms": 1.0275888888888889, "prompt_per_second": 973.1518224971076, "predicted_n": 2, "predicted_ms": 30.109, "predicted_per_token_ms": 15.0545, "predicted_per_second": 66.42532133249195}, "tps": 66.42532133249195}, {"id": "8c6a1349-2f0a-4b10-9f4e-eafb75ed2afe", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 97.522, "prompt_per_token_ms": 0.7501692307692308, "prompt_per_second": 1333.0325465023275, "predicted_n": 2, "predicted_ms": 33.449, "predicted_per_token_ms": 16.7245, "predicted_per_second": 59.79251995575354}, "tps": 59.79251995575354}, {"id": "5e32a823-e30a-413f-8c9b-2872bcf2bb7f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 96.145, "prompt_per_token_ms": 0.8079411764705882, "prompt_per_second": 1237.71386967601, "predicted_n": 2, "predicted_ms": 28.533, "predicted_per_token_ms": 14.2665, "predicted_per_second": 70.09427680229909}, "tps": 70.09427680229909}, {"id": "3775b5e7-422b-45cd-8613-c3a5e85f5405", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 62, "prompt_n": 96, "prompt_ms": 92.926, "prompt_per_token_ms": 0.9679791666666667, "prompt_per_second": 1033.080085229107, "predicted_n": 2, "predicted_ms": 28.979, "predicted_per_token_ms": 14.4895, "predicted_per_second": 69.01549397839815}, "tps": 69.01549397839815}, {"id": "1af6163a-e096-438c-aedc-62afcf1c22c7", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 98.058, "prompt_per_token_ms": 0.7721102362204725, "prompt_per_second": 1295.1518489057496, "predicted_n": 2, "predicted_ms": 29.595, "predicted_per_token_ms": 14.7975, "predicted_per_second": 67.57898293630682}, "tps": 67.57898293630682}, {"id": "1802b230-fb27-4561-8bf9-e0df435c26f2", "answer": "ABCD", "llm_answer": "ABCD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 121, "prompt_ms": 95.576, "prompt_per_token_ms": 0.7898842975206611, "prompt_per_second": 1266.0082028961247, "predicted_n": 3, "predicted_ms": 55.969, "predicted_per_token_ms": 18.656333333333333, "predicted_per_second": 53.60110060926584}, "tps": 53.60110060926584}, {"id": "b2a48227-c13b-4d63-bad4-8e8fdb238176", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 136, "prompt_ms": 90.555, "prompt_per_token_ms": 0.6658455882352942, "prompt_per_second": 1501.8497045994145, "predicted_n": 2, "predicted_ms": 27.553, "predicted_per_token_ms": 13.7765, "predicted_per_second": 72.5873770551301}, "tps": 72.5873770551301}, {"id": "af4073eb-38c4-4689-9624-80d7fdafb711", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 142, "prompt_ms": 100.55, "prompt_per_token_ms": 0.7080985915492958, "prompt_per_second": 1412.232720039781, "predicted_n": 2, "predicted_ms": 30.947, "predicted_per_token_ms": 15.4735, "predicted_per_second": 64.62661970465635}, "tps": 64.62661970465635}, {"id": "250c5e53-dd97-46f1-957f-42e0af3cf7b1", "answer": "", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 117, "prompt_ms": 97.321, "prompt_per_token_ms": 0.8318034188034188, "prompt_per_second": 1202.207128985522, "predicted_n": 2, "predicted_ms": 30.895, "predicted_per_token_ms": 15.4475, "predicted_per_second": 64.73539407671144}, "tps": 64.73539407671144}, {"id": "3a206e3a-0239-43cd-80c9-d7f2ca06a2fb", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 120, "prompt_ms": 97.632, "prompt_per_token_ms": 0.8136, "prompt_per_second": 1229.1052114060965, "predicted_n": 3, "predicted_ms": 59.954, "predicted_per_token_ms": 19.984666666666666, "predicted_per_second": 50.038362744770986}, "tps": 50.038362744770986}, {"id": "752ca435-b982-4040-b6e5-cbd1202ce03c", "answer": "ABD", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 150, "prompt_ms": 97.27, "prompt_per_token_ms": 0.6484666666666666, "prompt_per_second": 1542.0993111956411, "predicted_n": 2, "predicted_ms": 30.666, "predicted_per_token_ms": 15.333, "predicted_per_second": 65.21880910454576}, "tps": 65.21880910454576}, {"id": "be244341-5c82-4fd5-9755-48e22620c722", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 102, "prompt_ms": 95.375, "prompt_per_token_ms": 0.9350490196078431, "prompt_per_second": 1069.462647444299, "predicted_n": 2, "predicted_ms": 29.33, "predicted_per_token_ms": 14.665, "predicted_per_second": 68.18956699624958}, "tps": 68.18956699624958}, {"id": "6b5b2a96-e297-41da-ac6f-79f7170969f4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 184, "prompt_ms": 101.914, "prompt_per_token_ms": 0.5538804347826087, "prompt_per_second": 1805.443805561552, "predicted_n": 2, "predicted_ms": 29.507, "predicted_per_token_ms": 14.7535, "predicted_per_second": 67.78052665469211}, "tps": 67.78052665469211}, {"id": "a3cf6caa-82c1-4a45-bb6f-94b7920ca8fb", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "MemorySafety", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 86, "prompt_ms": 95.467, "prompt_per_token_ms": 1.110081395348837, "prompt_per_second": 900.8348434537588, "predicted_n": 2, "predicted_ms": 30.922, "predicted_per_token_ms": 15.461, "predicted_per_second": 64.67886941336265}, "tps": 64.67886941336265}, {"id": "b4c81135-0ead-4579-a790-afb51b55bf96", "answer": "ABD", "llm_answer": "BCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 152, "prompt_ms": 98.076, "prompt_per_token_ms": 0.6452368421052631, "prompt_per_second": 1549.8185080957626, "predicted_n": 6, "predicted_ms": 140.612, "predicted_per_token_ms": 23.435333333333332, "predicted_per_second": 42.670611327624954}, "tps": 42.670611327624954}, {"id": "f9381e1e-608d-48a7-9525-72b90de03b72", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 178, "prompt_ms": 96.202, "prompt_per_token_ms": 0.5404606741573034, "prompt_per_second": 1850.2733830897487, "predicted_n": 2, "predicted_ms": 30.679, "predicted_per_token_ms": 15.3395, "predicted_per_second": 65.19117311516021}, "tps": 65.19117311516021}, {"id": "3a296304-56f7-4376-b146-b7ebcf5a1fac", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 150, "prompt_ms": 96.771, "prompt_per_token_ms": 0.64514, "prompt_per_second": 1550.0511516880056, "predicted_n": 2, "predicted_ms": 29.591, "predicted_per_token_ms": 14.7955, "predicted_per_second": 67.58811800885404}, "tps": 67.58811800885404}, {"id": "0fca90cc-51a1-4111-a112-2a37b63e168c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 97.096, "prompt_per_token_ms": 0.7468923076923077, "prompt_per_second": 1338.8811073576667, "predicted_n": 2, "predicted_ms": 31.537, "predicted_per_token_ms": 15.7685, "predicted_per_second": 63.41757300948093}, "tps": 63.41757300948093}, {"id": "2aeb068c-271c-400d-b400-5b7bfbc72c69", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 97.362, "prompt_per_token_ms": 0.7265820895522388, "prompt_per_second": 1376.3069780817978, "predicted_n": 4, "predicted_ms": 89.738, "predicted_per_token_ms": 22.4345, "predicted_per_second": 44.57420490761996}, "tps": 44.57420490761996}, {"id": "9652105b-9a0f-4e32-81d2-fbad54a0a0d1", "answer": "AC", "llm_answer": "ABD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 96.927, "prompt_per_token_ms": 0.8214152542372882, "prompt_per_second": 1217.411041299122, "predicted_n": 6, "predicted_ms": 145.415, "predicted_per_token_ms": 24.235833333333332, "predicted_per_second": 41.26121789361483}, "tps": 41.26121789361483}, {"id": "75040e11-74b6-4665-8350-ba02a0ba5199", "answer": "B", "llm_answer": "AB", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 201, "prompt_ms": 102.862, "prompt_per_token_ms": 0.5117512437810945, "prompt_per_second": 1954.0743909315395, "predicted_n": 2, "predicted_ms": 29.649, "predicted_per_token_ms": 14.8245, "predicted_per_second": 67.45590070491416}, "tps": 67.45590070491416}, {"id": "cb7e8e3e-4eda-4c81-bb29-277d1e00f51f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 102.539, "prompt_per_token_ms": 0.7484598540145986, "prompt_per_second": 1336.0770048469362, "predicted_n": 2, "predicted_ms": 29.891, "predicted_per_token_ms": 14.9455, "predicted_per_second": 66.90977217222576}, "tps": 66.90977217222576}, {"id": "fcaa7b8d-cf15-4b9e-82e7-558ce5359151", "answer": "ACD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 98, "prompt_ms": 96.681, "prompt_per_token_ms": 0.9865408163265306, "prompt_per_second": 1013.6428046875808, "predicted_n": 2, "predicted_ms": 28.441, "predicted_per_token_ms": 14.2205, "predicted_per_second": 70.3210154354629}, "tps": 70.3210154354629}, {"id": "2df8a1bb-0314-4dab-bb9e-44160ba57187", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 84.377, "prompt_per_token_ms": 0.897627659574468, "prompt_per_second": 1114.0476670182634, "predicted_n": 2, "predicted_ms": 32.074, "predicted_per_token_ms": 16.037, "predicted_per_second": 62.3558022073954}, "tps": 62.3558022073954}, {"id": "5b99394b-26ed-4aac-afdc-cacacf9c8370", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 100, "prompt_ms": 93.315, "prompt_per_token_ms": 0.9331499999999999, "prompt_per_second": 1071.6390719605638, "predicted_n": 2, "predicted_ms": 29.23, "predicted_per_token_ms": 14.615, "predicted_per_second": 68.42285323297982}, "tps": 68.42285323297982}, {"id": "983f62b8-c5df-4e6d-bcf3-812b946b4916", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 158, "prompt_ms": 97.419, "prompt_per_token_ms": 0.6165759493670886, "prompt_per_second": 1621.8602120736202, "predicted_n": 2, "predicted_ms": 29.237, "predicted_per_token_ms": 14.6185, "predicted_per_second": 68.40647125218047}, "tps": 68.40647125218047}, {"id": "923e0abd-57b3-4d13-b9b5-501ca56074bf", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 128, "prompt_ms": 97.25, "prompt_per_token_ms": 0.759765625, "prompt_per_second": 1316.1953727506427, "predicted_n": 2, "predicted_ms": 29.638, "predicted_per_token_ms": 14.819, "predicted_per_second": 67.4809366354005}, "tps": 67.4809366354005}, {"id": "9fa65342-1115-490b-bf63-58490c61fa1d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 163, "prompt_ms": 98.927, "prompt_per_token_ms": 0.6069141104294479, "prompt_per_second": 1647.6796021308642, "predicted_n": 2, "predicted_ms": 28.861, "predicted_per_token_ms": 14.4305, "predicted_per_second": 69.29766813346731}, "tps": 69.29766813346731}, {"id": "2f618b38-5795-47e3-a061-14fcf887cf8b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 167, "prompt_ms": 99.034, "prompt_per_token_ms": 0.5930179640718564, "prompt_per_second": 1686.2895571217964, "predicted_n": 2, "predicted_ms": 31.598, "predicted_per_token_ms": 15.799, "predicted_per_second": 63.29514526235838}, "tps": 63.29514526235838}, {"id": "ac76ee1c-2616-4d76-b2f3-8b89383df56b", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 232, "prompt_ms": 105.429, "prompt_per_token_ms": 0.45443534482758624, "prompt_per_second": 2200.533060163712, "predicted_n": 2, "predicted_ms": 30.005, "predicted_per_token_ms": 15.0025, "predicted_per_second": 66.65555740709883}, "tps": 66.65555740709883}, {"id": "526413b2-e4da-4593-a617-4f684e4105fc", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 166, "prompt_ms": 99.889, "prompt_per_token_ms": 0.6017409638554216, "prompt_per_second": 1661.8446475587903, "predicted_n": 2, "predicted_ms": 29.314, "predicted_per_token_ms": 14.657, "predicted_per_second": 68.22678583611926}, "tps": 68.22678583611926}, {"id": "96284696-b57f-4d66-b4c4-83d25acc566a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 113, "prompt_ms": 97.527, "prompt_per_token_ms": 0.863070796460177, "prompt_per_second": 1158.6535010817518, "predicted_n": 2, "predicted_ms": 30.562, "predicted_per_token_ms": 15.281, "predicted_per_second": 65.4407434068451}, "tps": 65.4407434068451}, {"id": "46e9a5bd-ceec-4425-83dd-f1c63293a716", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 109, "prompt_ms": 95.242, "prompt_per_token_ms": 0.8737798165137615, "prompt_per_second": 1144.4530774238256, "predicted_n": 2, "predicted_ms": 29.322, "predicted_per_token_ms": 14.661, "predicted_per_second": 68.2081713389264}, "tps": 68.2081713389264}, {"id": "963425a7-28ab-4f87-a741-1620ed531113", "answer": "CD", "llm_answer": "BCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 174, "prompt_ms": 87.798, "prompt_per_token_ms": 0.5045862068965518, "prompt_per_second": 1981.8219093829014, "predicted_n": 6, "predicted_ms": 146.064, "predicted_per_token_ms": 24.343999999999998, "predicted_per_second": 41.077883667433454}, "tps": 41.077883667433454}, {"id": "0a1dbcfe-611c-4701-ba0d-0fa431bfa182", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 120, "prompt_ms": 93.998, "prompt_per_token_ms": 0.7833166666666667, "prompt_per_second": 1276.6229068703587, "predicted_n": 2, "predicted_ms": 31.02, "predicted_per_token_ms": 15.51, "predicted_per_second": 64.47453255963894}, "tps": 64.47453255963894}, {"id": "1cf27ef8-c3f8-4972-aab7-d9d3f8c06423", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 110, "prompt_ms": 97.032, "prompt_per_token_ms": 0.8821090909090908, "prompt_per_second": 1133.6466320389152, "predicted_n": 2, "predicted_ms": 29.234, "predicted_per_token_ms": 14.617, "predicted_per_second": 68.41349114045289}, "tps": 68.41349114045289}, {"id": "e6dbc33f-7838-4309-875b-06df3ad78bee", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.362, "prompt_per_token_ms": 0.6783586206896551, "prompt_per_second": 1474.1465199975603, "predicted_n": 2, "predicted_ms": 29.533, "predicted_per_token_ms": 14.7665, "predicted_per_second": 67.72085463718552}, "tps": 67.72085463718552}, {"id": "a76c7724-91bc-4b25-8813-af40e9ae1da4", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 142, "prompt_ms": 97.593, "prompt_per_token_ms": 0.687274647887324, "prompt_per_second": 1455.0223889008435, "predicted_n": 2, "predicted_ms": 30.164, "predicted_per_token_ms": 15.082, "predicted_per_second": 66.30420368651372}, "tps": 66.30420368651372}, {"id": "baa52bbd-4fb7-4424-81fb-c539d09abd7c", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 242, "prompt_ms": 104.382, "prompt_per_token_ms": 0.4313305785123967, "prompt_per_second": 2318.4073882470157, "predicted_n": 2, "predicted_ms": 31.094, "predicted_per_token_ms": 15.547, "predicted_per_second": 64.32109088570142}, "tps": 64.32109088570142}, {"id": "05914d87-9141-4505-8bdc-273bf001948d", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 65, "prompt_n": 90, "prompt_ms": 94.762, "prompt_per_token_ms": 1.0529111111111111, "prompt_per_second": 949.7477891982019, "predicted_n": 2, "predicted_ms": 30.671, "predicted_per_token_ms": 15.3355, "predicted_per_second": 65.20817710540902}, "tps": 65.20817710540902}, {"id": "2042f3d8-ad98-4d29-91d7-f431848e4459", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 153, "prompt_ms": 97.803, "prompt_per_token_ms": 0.639235294117647, "prompt_per_second": 1564.3691911291064, "predicted_n": 2, "predicted_ms": 30.727, "predicted_per_token_ms": 15.3635, "predicted_per_second": 65.08933511244183}, "tps": 65.08933511244183}, {"id": "69bca45c-b053-452f-a847-d1e3026ce489", "answer": "AC", "llm_answer": "AD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 161, "prompt_ms": 99.169, "prompt_per_token_ms": 0.6159565217391304, "prompt_per_second": 1623.4912119714832, "predicted_n": 2, "predicted_ms": 27.63, "predicted_per_token_ms": 13.815, "predicted_per_second": 72.38508867173363}, "tps": 72.38508867173363}, {"id": "cfd5b92e-3b75-4b3f-bb89-574fa8644611", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 112, "prompt_ms": 82.207, "prompt_per_token_ms": 0.7339910714285713, "prompt_per_second": 1362.4143929349084, "predicted_n": 2, "predicted_ms": 30.496, "predicted_per_token_ms": 15.248, "predicted_per_second": 65.58237145855195}, "tps": 65.58237145855195}, {"id": "ed8e7084-ae06-4bb4-8e10-cb07158eba2d", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 99.898, "prompt_per_token_ms": 0.6091341463414633, "prompt_per_second": 1641.6745079981583, "predicted_n": 2, "predicted_ms": 31.002, "predicted_per_token_ms": 15.501, "predicted_per_second": 64.51196696987292}, "tps": 64.51196696987292}, {"id": "69fdd3ef-bab9-4ef1-a649-a8fe927ccb63", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 163, "prompt_ms": 98.671, "prompt_per_token_ms": 0.6053435582822086, "prompt_per_second": 1651.9544749723827, "predicted_n": 2, "predicted_ms": 29.446, "predicted_per_token_ms": 14.723, "predicted_per_second": 67.92094002580995}, "tps": 67.92094002580995}, {"id": "5e81dae8-cb98-4f57-bf83-9fc75a28098c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 107, "prompt_ms": 95.798, "prompt_per_token_ms": 0.8953084112149533, "prompt_per_second": 1116.9335476732292, "predicted_n": 2, "predicted_ms": 29.398, "predicted_per_token_ms": 14.699, "predicted_per_second": 68.03183890060548}, "tps": 68.03183890060548}, {"id": "c0eb46ce-8669-4298-89c8-b071e95ffddb", "answer": "C", "llm_answer": "C", "score": 1, "topics": [], "timings": {"cache_n": 62, "prompt_n": 141, "prompt_ms": 96.09, "prompt_per_token_ms": 0.6814893617021277, "prompt_per_second": 1467.3743365594755, "predicted_n": 2, "predicted_ms": 29.578, "predicted_per_token_ms": 14.789, "predicted_per_second": 67.6178240584218}, "tps": 67.6178240584218}, {"id": "7537b63d-9d5a-453d-9cf1-af4ab4f0d0fa", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 96.688, "prompt_per_token_ms": 0.755375, "prompt_per_second": 1323.8457719675657, "predicted_n": 2, "predicted_ms": 32.718, "predicted_per_token_ms": 16.359, "predicted_per_second": 61.1284308331805}, "tps": 61.1284308331805}, {"id": "e1bf8156-ef6f-4711-82d4-5ef5cc7dff46", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 83, "prompt_ms": 93.518, "prompt_per_token_ms": 1.126722891566265, "prompt_per_second": 887.5296734318526, "predicted_n": 2, "predicted_ms": 29.71, "predicted_per_token_ms": 14.855, "predicted_per_second": 67.31740154830024}, "tps": 67.31740154830024}, {"id": "c994a7d8-5652-45e3-bfcc-6a6a678aa80e", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 150, "prompt_ms": 95.043, "prompt_per_token_ms": 0.6336200000000001, "prompt_per_second": 1578.2330103216439, "predicted_n": 2, "predicted_ms": 30.193, "predicted_per_token_ms": 15.0965, "predicted_per_second": 66.24051932567151}, "tps": 66.24051932567151}, {"id": "dfaf6380-c49f-4e7b-8339-15ecfaf51c01", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "Vulnerability", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 100.746, "prompt_per_token_ms": 0.8257868852459016, "prompt_per_second": 1210.9661922061423, "predicted_n": 2, "predicted_ms": 30.456, "predicted_per_token_ms": 15.228, "predicted_per_second": 65.66850538481744}, "tps": 65.66850538481744}, {"id": "102235a4-4d6a-484a-ae78-18a4fbe351d8", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 78, "prompt_ms": 93.946, "prompt_per_token_ms": 1.2044358974358975, "prompt_per_second": 830.2641943243991, "predicted_n": 3, "predicted_ms": 52.691, "predicted_per_token_ms": 17.563666666666666, "predicted_per_second": 56.93571957260254}, "tps": 56.93571957260254}, {"id": "c9066904-640a-4d5e-946d-8031ee572d84", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 160, "prompt_ms": 86.863, "prompt_per_token_ms": 0.54289375, "prompt_per_second": 1841.9810506199417, "predicted_n": 2, "predicted_ms": 31.021, "predicted_per_token_ms": 15.5105, "predicted_per_second": 64.47245414396698}, "tps": 64.47245414396698}, {"id": "427aa16a-6d34-4e17-974a-ea97ef4a149b", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "MemorySafety"], "timings": {"cache_n": 61, "prompt_n": 177, "prompt_ms": 99.627, "prompt_per_token_ms": 0.562864406779661, "prompt_per_second": 1776.6268180312568, "predicted_n": 3, "predicted_ms": 61.564, "predicted_per_token_ms": 20.521333333333335, "predicted_per_second": 48.72977714248587}, "tps": 48.72977714248587}, {"id": "1012128e-ee1f-4a1c-b551-9fd9ec194d05", "answer": "AD", "llm_answer": "AC", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 156, "prompt_ms": 97.279, "prompt_per_token_ms": 0.6235833333333333, "prompt_per_second": 1603.6349057864495, "predicted_n": 3, "predicted_ms": 57.756, "predicted_per_token_ms": 19.252, "predicted_per_second": 51.94265530853937}, "tps": 51.94265530853937}, {"id": "49dd189c-3990-429d-9896-5344d150a4da", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 148, "prompt_ms": 95.792, "prompt_per_token_ms": 0.6472432432432432, "prompt_per_second": 1545.01419742776, "predicted_n": 2, "predicted_ms": 29.192, "predicted_per_token_ms": 14.596, "predicted_per_second": 68.51192107426692}, "tps": 68.51192107426692}, {"id": "a7100690-05be-4c64-bfa7-47701033cbdd", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 147, "prompt_ms": 95.94, "prompt_per_token_ms": 0.6526530612244897, "prompt_per_second": 1532.2076297686053, "predicted_n": 4, "predicted_ms": 91.084, "predicted_per_token_ms": 22.771, "predicted_per_second": 43.91550656536823}, "tps": 43.91550656536823}, {"id": "1894a9e1-0e1b-48ae-bd86-9aaa358d144b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 174, "prompt_ms": 98.629, "prompt_per_token_ms": 0.5668333333333334, "prompt_per_second": 1764.1870038224051, "predicted_n": 2, "predicted_ms": 33.375, "predicted_per_token_ms": 16.6875, "predicted_per_second": 59.9250936329588}, "tps": 59.9250936329588}, {"id": "a7da98cd-41c9-4808-b345-e7231f58a678", "answer": "BD", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 117, "prompt_ms": 95.758, "prompt_per_token_ms": 0.8184444444444444, "prompt_per_second": 1221.8300298669562, "predicted_n": 4, "predicted_ms": 88.689, "predicted_per_token_ms": 22.17225, "predicted_per_second": 45.101421822322955}, "tps": 45.101421822322955}, {"id": "f02000dc-9e64-4054-92f7-2ab46f47ad02", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "PenTest", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 98.013, "prompt_per_token_ms": 0.7260222222222222, "prompt_per_second": 1377.3683082856355, "predicted_n": 2, "predicted_ms": 29.667, "predicted_per_token_ms": 14.8335, "predicted_per_second": 67.41497286547342}, "tps": 67.41497286547342}, {"id": "6e1bbc2c-aff6-4734-9453-1c02e0c968de", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 94, "prompt_ms": 82.521, "prompt_per_token_ms": 0.8778829787234043, "prompt_per_second": 1139.103985652137, "predicted_n": 2, "predicted_ms": 26.875, "predicted_per_token_ms": 13.4375, "predicted_per_second": 74.4186046511628}, "tps": 74.4186046511628}, {"id": "2478b34b-d109-4022-802f-637166763080", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 125, "prompt_ms": 92.958, "prompt_per_token_ms": 0.743664, "prompt_per_second": 1344.6933023515996, "predicted_n": 2, "predicted_ms": 29.219, "predicted_per_token_ms": 14.6095, "predicted_per_second": 68.44861220438756}, "tps": 68.44861220438756}, {"id": "3ebadf2f-21dc-4c3c-819f-f5792beade74", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 194, "prompt_ms": 100.828, "prompt_per_token_ms": 0.5197319587628866, "prompt_per_second": 1924.0687110723213, "predicted_n": 2, "predicted_ms": 28.98, "predicted_per_token_ms": 14.49, "predicted_per_second": 69.01311249137336}, "tps": 69.01311249137336}, {"id": "82179ac5-e4cf-4e2e-acb8-266243f3472f", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 108, "prompt_ms": 99.663, "prompt_per_token_ms": 0.9228055555555555, "prompt_per_second": 1083.6519069263418, "predicted_n": 2, "predicted_ms": 32.369, "predicted_per_token_ms": 16.1845, "predicted_per_second": 61.787512743674505}, "tps": 61.787512743674505}, {"id": "634bc607-ad2b-435f-8a8a-04b0c9ddbfaa", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 65, "prompt_n": 126, "prompt_ms": 98.318, "prompt_per_token_ms": 0.7803015873015873, "prompt_per_second": 1281.5557680180639, "predicted_n": 2, "predicted_ms": 29.727, "predicted_per_token_ms": 14.8635, "predicted_per_second": 67.2789046994315}, "tps": 67.2789046994315}, {"id": "b0637e01-a122-4109-b1a2-0f438b2d387a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 187, "prompt_ms": 100.758, "prompt_per_token_ms": 0.5388128342245989, "prompt_per_second": 1855.9320351733857, "predicted_n": 2, "predicted_ms": 32.382, "predicted_per_token_ms": 16.191, "predicted_per_second": 61.762707677104565}, "tps": 61.762707677104565}, {"id": "27e97a82-da69-4636-a0cc-319ba5bbd065", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 97, "prompt_ms": 94.398, "prompt_per_token_ms": 0.9731752577319587, "prompt_per_second": 1027.564143308121, "predicted_n": 2, "predicted_ms": 31.59, "predicted_per_token_ms": 15.795, "predicted_per_second": 63.31117442228553}, "tps": 63.31117442228553}, {"id": "ec0cee0a-a7c5-45e4-9243-fb685d39650f", "answer": "BD", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 74, "prompt_ms": 92.024, "prompt_per_token_ms": 1.2435675675675675, "prompt_per_second": 804.1380509432321, "predicted_n": 3, "predicted_ms": 59.735, "predicted_per_token_ms": 19.911666666666665, "predicted_per_second": 50.22181300744957}, "tps": 50.22181300744957}, {"id": "cf91cf25-e09a-4199-ae2a-94ee881d808c", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 143, "prompt_ms": 98.139, "prompt_per_token_ms": 0.6862867132867133, "prompt_per_second": 1457.1169463719827, "predicted_n": 2, "predicted_ms": 28.12, "predicted_per_token_ms": 14.06, "predicted_per_second": 71.12375533428165}, "tps": 71.12375533428165}, {"id": "dec16e1c-37da-4969-a95e-ddfbc8adfd5f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 180, "prompt_ms": 100.047, "prompt_per_token_ms": 0.5558166666666666, "prompt_per_second": 1799.1543974332064, "predicted_n": 2, "predicted_ms": 26.921, "predicted_per_token_ms": 13.4605, "predicted_per_second": 74.29144534006909}, "tps": 74.29144534006909}, {"id": "9edb507b-f605-4fec-9fae-3c14d179e6e3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 142, "prompt_ms": 83.97, "prompt_per_token_ms": 0.591338028169014, "prompt_per_second": 1691.0801476717877, "predicted_n": 2, "predicted_ms": 31.485, "predicted_per_token_ms": 15.7425, "predicted_per_second": 63.52231221216452}, "tps": 63.52231221216452}, {"id": "18ddcf5b-5395-4e1f-9fc1-53462f2cf059", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["Cryptography"], "timings": {"cache_n": 61, "prompt_n": 142, "prompt_ms": 97.796, "prompt_per_token_ms": 0.6887042253521127, "prompt_per_second": 1452.0021268763549, "predicted_n": 2, "predicted_ms": 30.421, "predicted_per_token_ms": 15.2105, "predicted_per_second": 65.74405838072384}, "tps": 65.74405838072384}, {"id": "2db7859f-0d8f-4af9-b876-58b4a0c6b66c", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 106, "prompt_ms": 94.144, "prompt_per_token_ms": 0.8881509433962265, "prompt_per_second": 1125.9347382732835, "predicted_n": 64, "predicted_ms": 1847.908, "predicted_per_token_ms": 28.8735625, "predicted_per_second": 34.63375882349121}, "tps": 34.63375882349121}, {"id": "d6500e3f-f7e6-4c35-a7c5-0344f012a982", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 140, "prompt_ms": 97.091, "prompt_per_token_ms": 0.6935071428571428, "prompt_per_second": 1441.9462154061655, "predicted_n": 4, "predicted_ms": 90.869, "predicted_per_token_ms": 22.71725, "predicted_per_second": 44.01941256093937}, "tps": 44.01941256093937}, {"id": "9e26512c-1a5c-4974-9a7c-8fa2aa5d2e27", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 96.911, "prompt_per_token_ms": 0.7943524590163935, "prompt_per_second": 1258.8870200493236, "predicted_n": 2, "predicted_ms": 31.242, "predicted_per_token_ms": 15.621, "predicted_per_second": 64.01638819537801}, "tps": 64.01638819537801}, {"id": "d2d7845a-52c4-4281-b62f-3701d422fe37", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 86, "prompt_ms": 92.767, "prompt_per_token_ms": 1.078686046511628, "prompt_per_second": 927.0538014595708, "predicted_n": 2, "predicted_ms": 26.481, "predicted_per_token_ms": 13.2405, "predicted_per_second": 75.52584872172501}, "tps": 75.52584872172501}, {"id": "f19c34e2-39b7-4745-839c-f4a075c2fff9", "answer": "A", "llm_answer": "A", "score": 1, "topics": [], "timings": {"cache_n": 58, "prompt_n": 167, "prompt_ms": 87.476, "prompt_per_token_ms": 0.523808383233533, "prompt_per_second": 1909.0950660752665, "predicted_n": 2, "predicted_ms": 26.974, "predicted_per_token_ms": 13.487, "predicted_per_second": 74.14547341884779}, "tps": 74.14547341884779}, {"id": "c21415ad-2e42-4c12-af70-34b59caec2f1", "answer": "AB", "llm_answer": "CD", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 144, "prompt_ms": 99.256, "prompt_per_token_ms": 0.6892777777777778, "prompt_per_second": 1450.793906665592, "predicted_n": 4, "predicted_ms": 90.542, "predicted_per_token_ms": 22.6355, "predicted_per_second": 44.178392348302445}, "tps": 44.178392348302445}, {"id": "e3229078-fe30-4387-bcd7-a69950a75b89", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 97.892, "prompt_per_token_ms": 0.7708031496062991, "prompt_per_second": 1297.3480979038122, "predicted_n": 2, "predicted_ms": 29.133, "predicted_per_token_ms": 14.5665, "predicted_per_second": 68.65067106030962}, "tps": 68.65067106030962}, {"id": "71cc1590-800e-4781-af56-4688fdbfac4b", "answer": "B", "llm_answer": "AB", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 64, "prompt_n": 115, "prompt_ms": 97.479, "prompt_per_token_ms": 0.8476434782608696, "prompt_per_second": 1179.7412776085105, "predicted_n": 2, "predicted_ms": 29.195, "predicted_per_token_ms": 14.5975, "predicted_per_second": 68.50488097276931}, "tps": 68.50488097276931}, {"id": "f4c068d6-2a7f-4cb0-9f1b-2fc78f9b06a3", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 135, "prompt_ms": 97.879, "prompt_per_token_ms": 0.7250296296296297, "prompt_per_second": 1379.2539768489664, "predicted_n": 2, "predicted_ms": 28.706, "predicted_per_token_ms": 14.353, "predicted_per_second": 69.67184560719014}, "tps": 69.67184560719014}, {"id": "49961fc8-670e-4a0d-8ad8-ced35f5959a4", "answer": "ABD", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 182, "prompt_ms": 101.153, "prompt_per_token_ms": 0.5557857142857143, "prompt_per_second": 1799.2545945251252, "predicted_n": 4, "predicted_ms": 86.921, "predicted_per_token_ms": 21.73025, "predicted_per_second": 46.01879867926048}, "tps": 46.01879867926048}, {"id": "0f47b809-fce4-441c-8779-7325656ce921", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 107, "prompt_ms": 94.773, "prompt_per_token_ms": 0.8857289719626168, "prompt_per_second": 1129.0135376109229, "predicted_n": 3, "predicted_ms": 61.432, "predicted_per_token_ms": 20.477333333333334, "predicted_per_second": 48.834483656726135}, "tps": 48.834483656726135}, {"id": "5115f625-ebf5-41ac-ac17-a2eba96a2fba", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 164, "prompt_ms": 100.461, "prompt_per_token_ms": 0.6125670731707317, "prompt_per_second": 1632.474293506933, "predicted_n": 2, "predicted_ms": 29.749, "predicted_per_token_ms": 14.8745, "predicted_per_second": 67.22915055968268}, "tps": 67.22915055968268}, {"id": "9f3f4603-1a46-40f7-b6fd-4e287ccb7e94", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 97.681, "prompt_per_token_ms": 0.6067142857142857, "prompt_per_second": 1648.222274546739, "predicted_n": 2, "predicted_ms": 26.944, "predicted_per_token_ms": 13.472, "predicted_per_second": 74.22802850356295}, "tps": 74.22802850356295}, {"id": "0a495d10-f1a7-4b89-bc46-0141fbbcc001", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 68, "prompt_ms": 87.212, "prompt_per_token_ms": 1.282529411764706, "prompt_per_second": 779.7092143283035, "predicted_n": 2, "predicted_ms": 29.137, "predicted_per_token_ms": 14.5685, "predicted_per_second": 68.64124652503689}, "tps": 68.64124652503689}, {"id": "e490e3b0-9276-41df-b24e-a952759bd490", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 149, "prompt_ms": 84.375, "prompt_per_token_ms": 0.5662751677852349, "prompt_per_second": 1765.9259259259259, "predicted_n": 2, "predicted_ms": 27.68, "predicted_per_token_ms": 13.84, "predicted_per_second": 72.25433526011561}, "tps": 72.25433526011561}, {"id": "92432759-dd16-4472-aee5-04d2a5b8f055", "answer": "BD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 133, "prompt_ms": 80.553, "prompt_per_token_ms": 0.6056616541353383, "prompt_per_second": 1651.0868620659692, "predicted_n": 2, "predicted_ms": 30.67, "predicted_per_token_ms": 15.335, "predicted_per_second": 65.21030322791}, "tps": 65.21030322791}, {"id": "08c56d00-6a94-4f03-8cca-c1bbe2e2f7c2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 95, "prompt_ms": 80.232, "prompt_per_token_ms": 0.8445473684210526, "prompt_per_second": 1184.0662079968092, "predicted_n": 2, "predicted_ms": 27.354, "predicted_per_token_ms": 13.677, "predicted_per_second": 73.11544929443592}, "tps": 73.11544929443592}, {"id": "c1d44646-e98f-4a61-98a7-e22f41f62979", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 81.607, "prompt_per_token_ms": 0.6425748031496062, "prompt_per_second": 1556.239048120872, "predicted_n": 2, "predicted_ms": 26.343, "predicted_per_token_ms": 13.1715, "predicted_per_second": 75.92149717192423}, "tps": 75.92149717192423}, {"id": "b52b9838-7905-4f29-9328-8756fbc1fea0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 161, "prompt_ms": 83.62, "prompt_per_token_ms": 0.5193788819875776, "prompt_per_second": 1925.3767041377662, "predicted_n": 2, "predicted_ms": 26.697, "predicted_per_token_ms": 13.3485, "predicted_per_second": 74.91478443270779}, "tps": 74.91478443270779}, {"id": "2a0735c8-ed26-45d1-bff4-32a60f23f3b3", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 85.101, "prompt_per_token_ms": 0.6596976744186046, "prompt_per_second": 1515.8458772517363, "predicted_n": 2, "predicted_ms": 27.546, "predicted_per_token_ms": 13.773, "predicted_per_second": 72.60582298700356}, "tps": 72.60582298700356}, {"id": "c958caab-3590-452c-b3cd-f66eb95d4070", "answer": "AA", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 128, "prompt_ms": 81.034, "prompt_per_token_ms": 0.633078125, "prompt_per_second": 1579.5838783720412, "predicted_n": 2, "predicted_ms": 26.359, "predicted_per_token_ms": 13.1795, "predicted_per_second": 75.87541257255586}, "tps": 75.87541257255586}, {"id": "c8f3172c-d0b9-4ba4-a4d4-a9ed26e8a8cb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 193, "prompt_ms": 93.683, "prompt_per_token_ms": 0.48540414507772023, "prompt_per_second": 2060.13897932389, "predicted_n": 2, "predicted_ms": 29.999, "predicted_per_token_ms": 14.9995, "predicted_per_second": 66.66888896296544}, "tps": 66.66888896296544}, {"id": "b32cbb6f-2f5a-4ed4-8a77-bde2d9cbacc8", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 78, "prompt_ms": 77.695, "prompt_per_token_ms": 0.9960897435897434, "prompt_per_second": 1003.9256065383875, "predicted_n": 2, "predicted_ms": 26.29, "predicted_per_token_ms": 13.145, "predicted_per_second": 76.07455306200076}, "tps": 76.07455306200076}, {"id": "7563aca4-3bb7-4482-8d90-4c40aa419a05", "answer": "BCD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 130, "prompt_ms": 85.836, "prompt_per_token_ms": 0.6602769230769231, "prompt_per_second": 1514.5160538701712, "predicted_n": 2, "predicted_ms": 26.656, "predicted_per_token_ms": 13.328, "predicted_per_second": 75.03001200480192}, "tps": 75.03001200480192}, {"id": "b3384cb2-d3a9-49f8-bf6f-97b4b37d117d", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 135, "prompt_ms": 86.798, "prompt_per_token_ms": 0.6429481481481482, "prompt_per_second": 1555.335376391161, "predicted_n": 2, "predicted_ms": 29.238, "predicted_per_token_ms": 14.619, "predicted_per_second": 68.40413160954922}, "tps": 68.40413160954922}, {"id": "00ff497a-2819-4a2d-bfb7-70d2672f5676", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 95.503, "prompt_per_token_ms": 0.751992125984252, "prompt_per_second": 1329.8011580788036, "predicted_n": 2, "predicted_ms": 29.494, "predicted_per_token_ms": 14.747, "predicted_per_second": 67.81040211568455}, "tps": 67.81040211568455}, {"id": "1febcf33-fff7-4ca0-80a7-5fd9e4520cf2", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 95.012, "prompt_per_token_ms": 0.95012, "prompt_per_second": 1052.4986317517787, "predicted_n": 2, "predicted_ms": 30.222, "predicted_per_token_ms": 15.111, "predicted_per_second": 66.1769571835087}, "tps": 66.1769571835087}, {"id": "fa1c8885-eca2-4ca6-89d3-2069d469ce64", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 95.714, "prompt_per_token_ms": 0.7596349206349207, "prompt_per_second": 1316.4218400651944, "predicted_n": 3, "predicted_ms": 56.851, "predicted_per_token_ms": 18.950333333333333, "predicted_per_second": 52.76952032506024}, "tps": 52.76952032506024}, {"id": "8262f609-a420-4ef7-a1c5-4c7a2af50dfe", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 156, "prompt_ms": 104.036, "prompt_per_token_ms": 0.6668974358974359, "prompt_per_second": 1499.4809489023032, "predicted_n": 2, "predicted_ms": 29.892, "predicted_per_token_ms": 14.946, "predicted_per_second": 66.90753378830456}, "tps": 66.90753378830456}, {"id": "7cd15737-e796-4b88-a135-0df077dcc768", "answer": "ABC", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 65, "prompt_n": 70, "prompt_ms": 97.714, "prompt_per_token_ms": 1.3959142857142857, "prompt_per_second": 716.376363673578, "predicted_n": 2, "predicted_ms": 29.888, "predicted_per_token_ms": 14.944, "predicted_per_second": 66.91648822269806}, "tps": 66.91648822269806}, {"id": "721806ff-f950-4ef4-887e-f951d775218b", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 106, "prompt_ms": 96.29, "prompt_per_token_ms": 0.9083962264150944, "prompt_per_second": 1100.8412088482708, "predicted_n": 2, "predicted_ms": 29.923, "predicted_per_token_ms": 14.9615, "predicted_per_second": 66.83821809310564}, "tps": 66.83821809310564}, {"id": "93dc670a-9816-4c4a-a8b9-1d2e77b88550", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 92, "prompt_ms": 93.689, "prompt_per_token_ms": 1.0183586956521737, "prompt_per_second": 981.9722699569854, "predicted_n": 4, "predicted_ms": 91.994, "predicted_per_token_ms": 22.9985, "predicted_per_second": 43.48109659325608}, "tps": 43.48109659325608}, {"id": "b21a1325-1267-453a-9f48-fc13415d9e8a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 113, "prompt_ms": 96.929, "prompt_per_token_ms": 0.857778761061947, "prompt_per_second": 1165.8017724313672, "predicted_n": 2, "predicted_ms": 30.82, "predicted_per_token_ms": 15.41, "predicted_per_second": 64.89292667099286}, "tps": 64.89292667099286}, {"id": "c4c1fc5c-66b7-4b4e-a6bf-fa8f8fed02db", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 89.462, "prompt_per_token_ms": 0.7044251968503937, "prompt_per_second": 1419.5971473921888, "predicted_n": 2, "predicted_ms": 28.159, "predicted_per_token_ms": 14.0795, "predicted_per_second": 71.02524947618879}, "tps": 71.02524947618879}, {"id": "11a36b47-d158-46dd-81c0-f72c92ca9f95", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 66, "prompt_n": 106, "prompt_ms": 95.304, "prompt_per_token_ms": 0.8990943396226415, "prompt_per_second": 1112.230336607068, "predicted_n": 2, "predicted_ms": 29.442, "predicted_per_token_ms": 14.721, "predicted_per_second": 67.93016778751443}, "tps": 67.93016778751443}, {"id": "2e57a94e-1d80-4f6e-aecf-ea20a5fa816a", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 164, "prompt_ms": 100.637, "prompt_per_token_ms": 0.613640243902439, "prompt_per_second": 1629.6193249003845, "predicted_n": 2, "predicted_ms": 31.522, "predicted_per_token_ms": 15.761, "predicted_per_second": 63.44775077723495}, "tps": 63.44775077723495}, {"id": "dd6611c1-e41b-4cfd-8862-6403d2bb7326", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 134, "prompt_ms": 104.04, "prompt_per_token_ms": 0.7764179104477612, "prompt_per_second": 1287.9661668589004, "predicted_n": 2, "predicted_ms": 31.787, "predicted_per_token_ms": 15.8935, "predicted_per_second": 62.91880328436153}, "tps": 62.91880328436153}, {"id": "2d1281d4-a392-44b2-b188-447132460c8f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 98.971, "prompt_per_token_ms": 0.7792992125984253, "prompt_per_second": 1283.204170918754, "predicted_n": 2, "predicted_ms": 31.976, "predicted_per_token_ms": 15.988, "predicted_per_second": 62.54691018263698}, "tps": 62.54691018263698}, {"id": "6420e23a-8d76-4119-b45f-6240074a8d6a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 85, "prompt_ms": 94.373, "prompt_per_token_ms": 1.1102705882352941, "prompt_per_second": 900.6813389422821, "predicted_n": 2, "predicted_ms": 30.223, "predicted_per_token_ms": 15.1115, "predicted_per_second": 66.17476756112895}, "tps": 66.17476756112895}, {"id": "e3b069a1-e754-450b-9432-5b91fd6b2a96", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 123, "prompt_ms": 96.674, "prompt_per_token_ms": 0.7859674796747969, "prompt_per_second": 1272.317272482777, "predicted_n": 2, "predicted_ms": 29.474, "predicted_per_token_ms": 14.737, "predicted_per_second": 67.85641582411617}, "tps": 67.85641582411617}, {"id": "c79ab447-186f-453e-bcc4-37486a93fe92", "answer": "B", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 100.109, "prompt_per_token_ms": 0.8138943089430893, "prompt_per_second": 1228.6607597718487, "predicted_n": 2, "predicted_ms": 30.753, "predicted_per_token_ms": 15.3765, "predicted_per_second": 65.034305596202}, "tps": 65.034305596202}, {"id": "abb9972a-1da0-4825-88d2-c6953620a1d4", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 97.431, "prompt_per_token_ms": 0.7270970149253732, "prompt_per_second": 1375.3322864386078, "predicted_n": 2, "predicted_ms": 30.979, "predicted_per_token_ms": 15.4895, "predicted_per_second": 64.55986313309016}, "tps": 64.55986313309016}, {"id": "9ef0f350-338a-4b20-989c-fdfec762153c", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 176, "prompt_ms": 100.624, "prompt_per_token_ms": 0.5717272727272728, "prompt_per_second": 1749.0857051995547, "predicted_n": 2, "predicted_ms": 27.209, "predicted_per_token_ms": 13.6045, "predicted_per_second": 73.50509022749826}, "tps": 73.50509022749826}, {"id": "28fcb01a-ac32-42e1-bd73-719c89bb27ea", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 171, "prompt_ms": 98.607, "prompt_per_token_ms": 0.5766491228070175, "prompt_per_second": 1734.1568042836716, "predicted_n": 3, "predicted_ms": 63.734, "predicted_per_token_ms": 21.244666666666667, "predicted_per_second": 47.070637336429535}, "tps": 47.070637336429535}, {"id": "286d665f-8ab4-43cf-b3af-112ebe53f674", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 125, "prompt_ms": 100.277, "prompt_per_token_ms": 0.802216, "prompt_per_second": 1246.5470646309723, "predicted_n": 2, "predicted_ms": 31.151, "predicted_per_token_ms": 15.5755, "predicted_per_second": 64.20339635966742}, "tps": 64.20339635966742}, {"id": "e212ce84-de17-44aa-9d4f-39d65d923bdb", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 97.754, "prompt_per_token_ms": 0.769716535433071, "prompt_per_second": 1299.1795732143953, "predicted_n": 2, "predicted_ms": 30.824, "predicted_per_token_ms": 15.412, "predicted_per_second": 64.88450558006748}, "tps": 64.88450558006748}, {"id": "5d05de44-b97d-4a05-aae9-a91cc24fb264", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SoftwareSecurity", "MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 106, "prompt_ms": 97.568, "prompt_per_token_ms": 0.9204528301886792, "prompt_per_second": 1086.4217776320104, "predicted_n": 3, "predicted_ms": 58.853, "predicted_per_token_ms": 19.61766666666667, "predicted_per_second": 50.97446179464089}, "tps": 50.97446179464089}, {"id": "27d6fe77-dcd8-4f73-ac81-0e659d5e5abd", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 97.44, "prompt_per_token_ms": 0.8188235294117647, "prompt_per_second": 1221.2643678160919, "predicted_n": 3, "predicted_ms": 59.596, "predicted_per_token_ms": 19.865333333333332, "predicted_per_second": 50.338948922746496}, "tps": 50.338948922746496}, {"id": "684b20be-e78c-48df-ba88-1e091e94115d", "answer": "B", "llm_answer": "B", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 99.019, "prompt_per_token_ms": 0.7675891472868217, "prompt_per_second": 1302.780274492774, "predicted_n": 2, "predicted_ms": 30.213, "predicted_per_token_ms": 15.1065, "predicted_per_second": 66.19667030748353}, "tps": 66.19667030748353}, {"id": "204e27e5-e18a-4b3f-9027-581320d7f878", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 146, "prompt_ms": 98.74, "prompt_per_token_ms": 0.6763013698630137, "prompt_per_second": 1478.63074741746, "predicted_n": 4, "predicted_ms": 97.686, "predicted_per_token_ms": 24.4215, "predicted_per_second": 40.947525745756806}, "tps": 40.947525745756806}, {"id": "c7ca6385-c701-4938-9fbd-22a68f84fbc0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 110, "prompt_ms": 96.561, "prompt_per_token_ms": 0.8778272727272728, "prompt_per_second": 1139.176271993869, "predicted_n": 2, "predicted_ms": 30.381, "predicted_per_token_ms": 15.1905, "predicted_per_second": 65.83061782034824}, "tps": 65.83061782034824}, {"id": "9a497cd2-3e15-4e32-8ca1-196e330208ab", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 196, "prompt_ms": 100.302, "prompt_per_token_ms": 0.5117448979591837, "prompt_per_second": 1954.0986221610735, "predicted_n": 2, "predicted_ms": 26.899, "predicted_per_token_ms": 13.4495, "predicted_per_second": 74.35220640172497}, "tps": 74.35220640172497}, {"id": "195a636c-37b6-43ea-a574-f0d9fc090771", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 130, "prompt_ms": 98.674, "prompt_per_token_ms": 0.7590307692307693, "prompt_per_second": 1317.4696475261974, "predicted_n": 2, "predicted_ms": 30.827, "predicted_per_token_ms": 15.4135, "predicted_per_second": 64.87819119602945}, "tps": 64.87819119602945}, {"id": "e39f13f5-a2c8-4a10-aa31-b81fc0cd0f38", "answer": "A", "llm_answer": "A", "score": 1, "topics": [], "timings": {"cache_n": 66, "prompt_n": 137, "prompt_ms": 97.922, "prompt_per_token_ms": 0.7147591240875912, "prompt_per_second": 1399.0727313576112, "predicted_n": 2, "predicted_ms": 30.389, "predicted_per_token_ms": 15.1945, "predicted_per_second": 65.81328770278719}, "tps": 65.81328770278719}, {"id": "d82d1112-6d3b-4c30-8234-1bfb7d775e8c", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 166, "prompt_ms": 100.213, "prompt_per_token_ms": 0.6036927710843373, "prompt_per_second": 1656.471715246525, "predicted_n": 3, "predicted_ms": 60.139, "predicted_per_token_ms": 20.046333333333333, "predicted_per_second": 49.8844343936547}, "tps": 49.8844343936547}, {"id": "738b843d-fce9-45da-b397-00cc106700e9", "answer": "AC", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 114, "prompt_ms": 97.645, "prompt_per_token_ms": 0.8565350877192982, "prompt_per_second": 1167.4944953658662, "predicted_n": 3, "predicted_ms": 60.363, "predicted_per_token_ms": 20.121, "predicted_per_second": 49.69931911932807}, "tps": 49.69931911932807}, {"id": "bd3a7afc-6848-4038-8a22-d048614ddd7b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 75, "prompt_ms": 96.203, "prompt_per_token_ms": 1.2827066666666667, "prompt_per_second": 779.6014677296965, "predicted_n": 2, "predicted_ms": 29.995, "predicted_per_token_ms": 14.9975, "predicted_per_second": 66.67777962993831}, "tps": 66.67777962993831}, {"id": "683b321c-12e3-454e-aa56-bb0938fa2298", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 151, "prompt_ms": 98.462, "prompt_per_token_ms": 0.6520662251655629, "prompt_per_second": 1533.5865613129938, "predicted_n": 2, "predicted_ms": 29.848, "predicted_per_token_ms": 14.924, "predicted_per_second": 67.00616456714017}, "tps": 67.00616456714017}, {"id": "c1086936-423d-4085-ab11-0b5660c83ba3", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 64, "prompt_n": 111, "prompt_ms": 95.233, "prompt_per_token_ms": 0.857954954954955, "prompt_per_second": 1165.5623575861307, "predicted_n": 2, "predicted_ms": 30.136, "predicted_per_token_ms": 15.068, "predicted_per_second": 66.36580833554552}, "tps": 66.36580833554552}, {"id": "0fa4a54e-ba7b-48a8-85bf-ec121e71cf95", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 101, "prompt_ms": 95.819, "prompt_per_token_ms": 0.9487029702970298, "prompt_per_second": 1054.0706957910227, "predicted_n": 2, "predicted_ms": 29.444, "predicted_per_token_ms": 14.722, "predicted_per_second": 67.92555359326178}, "tps": 67.92555359326178}, {"id": "ae39321a-a6f4-41c3-aa0e-941ac98bcbb2", "answer": "AB", "llm_answer": "AC", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 66, "prompt_n": 127, "prompt_ms": 94.437, "prompt_per_token_ms": 0.7435984251968504, "prompt_per_second": 1344.8118851721254, "predicted_n": 2, "predicted_ms": 26.425, "predicted_per_token_ms": 13.2125, "predicted_per_second": 75.68590350047303}, "tps": 75.68590350047303}, {"id": "c1b2f20e-f4dd-493b-9146-3a612364d2d9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 143, "prompt_ms": 91.512, "prompt_per_token_ms": 0.6399440559440559, "prompt_per_second": 1562.6365941078766, "predicted_n": 2, "predicted_ms": 30.13, "predicted_per_token_ms": 15.065, "predicted_per_second": 66.37902422834385}, "tps": 66.37902422834385}, {"id": "543cbc91-4cd7-4c8c-b9b3-000022a600fe", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest", "ApplicationSecurity"], "timings": {"cache_n": 64, "prompt_n": 120, "prompt_ms": 97.408, "prompt_per_token_ms": 0.8117333333333333, "prompt_per_second": 1231.9316688567674, "predicted_n": 2, "predicted_ms": 29.393, "predicted_per_token_ms": 14.6965, "predicted_per_second": 68.04341169666247}, "tps": 68.04341169666247}, {"id": "d3db9a2f-5264-4a36-99e3-48e7a38a4ea6", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 186, "prompt_ms": 103.102, "prompt_per_token_ms": 0.5543118279569893, "prompt_per_second": 1804.0387189385267, "predicted_n": 2, "predicted_ms": 33.366, "predicted_per_token_ms": 16.683, "predicted_per_second": 59.94125756758377}, "tps": 59.94125756758377}, {"id": "52c14d5e-702e-4f04-90dc-911a30207434", "answer": "AD", "llm_answer": "ABD", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 117, "prompt_ms": 97.572, "prompt_per_token_ms": 0.8339487179487179, "prompt_per_second": 1199.114500061493, "predicted_n": 4, "predicted_ms": 87.532, "predicted_per_token_ms": 21.883, "predicted_per_second": 45.697573458849334}, "tps": 45.697573458849334}, {"id": "ff76812f-68c9-4249-a8a1-dbbaf5987dab", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 92, "prompt_ms": 92.939, "prompt_per_token_ms": 1.0102065217391303, "prompt_per_second": 989.8965988444033, "predicted_n": 2, "predicted_ms": 30.811, "predicted_per_token_ms": 15.4055, "predicted_per_second": 64.91188212002207}, "tps": 64.91188212002207}, {"id": "fbd239eb-9272-4bf7-9bd0-46ad12df3274", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 116, "prompt_ms": 95.634, "prompt_per_token_ms": 0.8244310344827587, "prompt_per_second": 1212.9577346968651, "predicted_n": 6, "predicted_ms": 147.45, "predicted_per_token_ms": 24.575, "predicted_per_second": 40.691759918616484}, "tps": 40.691759918616484}, {"id": "afb8eb7a-8fb0-44b8-86c4-46d0cbcff383", "answer": "ABD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 173, "prompt_ms": 100.8, "prompt_per_token_ms": 0.5826589595375722, "prompt_per_second": 1716.2698412698412, "predicted_n": 2, "predicted_ms": 29.519, "predicted_per_token_ms": 14.7595, "predicted_per_second": 67.75297266167553}, "tps": 67.75297266167553}, {"id": "563a2ce9-6701-48d5-94c8-488cad492c17", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 147, "prompt_ms": 98.018, "prompt_per_token_ms": 0.6667891156462585, "prompt_per_second": 1499.7245403905406, "predicted_n": 2, "predicted_ms": 29.758, "predicted_per_token_ms": 14.879, "predicted_per_second": 67.20881779689495}, "tps": 67.20881779689495}, {"id": "611f58e0-799e-4655-8b57-503c6396cd94", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 87.751, "prompt_per_token_ms": 0.9140729166666667, "prompt_per_second": 1094.0046267279006, "predicted_n": 2, "predicted_ms": 29.724, "predicted_per_token_ms": 14.862, "predicted_per_second": 67.28569506122999}, "tps": 67.28569506122999}, {"id": "c9c46d7f-0afb-47e2-9935-5f2becefa778", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 126, "prompt_ms": 94.695, "prompt_per_token_ms": 0.751547619047619, "prompt_per_second": 1330.5876762236655, "predicted_n": 3, "predicted_ms": 58.475, "predicted_per_token_ms": 19.491666666666667, "predicted_per_second": 51.3039760581445}, "tps": 51.3039760581445}, {"id": "afaf3a52-ddc3-4338-90af-49d525c6e306", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 133, "prompt_ms": 97.409, "prompt_per_token_ms": 0.7323984962406016, "prompt_per_second": 1365.3769158907287, "predicted_n": 2, "predicted_ms": 28.86, "predicted_per_token_ms": 14.43, "predicted_per_second": 69.3000693000693}, "tps": 69.3000693000693}, {"id": "bcb4b4fc-9cc8-45a8-ad3d-ac467bde16b8", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 101.733, "prompt_per_token_ms": 0.7425766423357665, "prompt_per_second": 1346.66234161973, "predicted_n": 2, "predicted_ms": 29.334, "predicted_per_token_ms": 14.667, "predicted_per_second": 68.18026863025841}, "tps": 68.18026863025841}, {"id": "21a6be3c-a1cc-483c-9a93-5f9a5b692c4f", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 121, "prompt_ms": 95.98, "prompt_per_token_ms": 0.7932231404958678, "prompt_per_second": 1260.679308189206, "predicted_n": 2, "predicted_ms": 29.322, "predicted_per_token_ms": 14.661, "predicted_per_second": 68.2081713389264}, "tps": 68.2081713389264}, {"id": "97b6b195-1575-42ed-b774-487e3d4cf290", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 107, "prompt_ms": 96.369, "prompt_per_token_ms": 0.9006448598130841, "prompt_per_second": 1110.315557907626, "predicted_n": 2, "predicted_ms": 29.441, "predicted_per_token_ms": 14.7205, "predicted_per_second": 67.932475119731}, "tps": 67.932475119731}, {"id": "e1b8cd29-7735-4f26-bb34-6bc6750af694", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 134, "prompt_ms": 99.037, "prompt_per_token_ms": 0.7390820895522389, "prompt_per_second": 1353.0296757777396, "predicted_n": 2, "predicted_ms": 29.555, "predicted_per_token_ms": 14.7775, "predicted_per_second": 67.67044493317543}, "tps": 67.67044493317543}, {"id": "23d24d2a-dd27-4411-9ca6-76cbf0599812", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 127, "prompt_ms": 97.122, "prompt_per_token_ms": 0.764740157480315, "prompt_per_second": 1307.6336978233562, "predicted_n": 2, "predicted_ms": 28.858, "predicted_per_token_ms": 14.429, "predicted_per_second": 69.30487213251091}, "tps": 69.30487213251091}, {"id": "db74ab6e-34bf-41cd-8bb6-7ceb8ac8f602", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 64, "prompt_n": 113, "prompt_ms": 100.331, "prompt_per_token_ms": 0.8878849557522124, "prompt_per_second": 1126.2720395490924, "predicted_n": 2, "predicted_ms": 31.648, "predicted_per_token_ms": 15.824, "predicted_per_second": 63.19514661274014}, "tps": 63.19514661274014}, {"id": "a50aa8a5-a0ab-4416-b633-070de04f6db9", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 103.402, "prompt_per_token_ms": 0.7716567164179104, "prompt_per_second": 1295.9130384325254, "predicted_n": 2, "predicted_ms": 27.867, "predicted_per_token_ms": 13.9335, "predicted_per_second": 71.76947644166935}, "tps": 71.76947644166935}, {"id": "082db64c-70c7-4b48-a95c-d535ae6a8529", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 102, "prompt_ms": 89.968, "prompt_per_token_ms": 0.8820392156862745, "prompt_per_second": 1133.736439622977, "predicted_n": 2, "predicted_ms": 30.804, "predicted_per_token_ms": 15.402, "predicted_per_second": 64.92663290481757}, "tps": 64.92663290481757}, {"id": "a3fb2c6d-9b5a-4316-92b2-e714b4059285", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 97.498, "prompt_per_token_ms": 0.7557984496124032, "prompt_per_second": 1323.104063673101, "predicted_n": 2, "predicted_ms": 29.545, "predicted_per_token_ms": 14.7725, "predicted_per_second": 67.69334912844812}, "tps": 67.69334912844812}, {"id": "d15655d9-f291-419a-89c1-c73ddb5bc029", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 93, "prompt_ms": 93.516, "prompt_per_token_ms": 1.0055483870967743, "prompt_per_second": 994.4822276401899, "predicted_n": 2, "predicted_ms": 31.468, "predicted_per_token_ms": 15.734, "predicted_per_second": 63.556628956400154}, "tps": 63.556628956400154}, {"id": "6f3a4603-8a15-4743-9522-908c43f95ba2", "answer": "BC", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 104, "prompt_ms": 94.414, "prompt_per_token_ms": 0.9078269230769231, "prompt_per_second": 1101.53155252399, "predicted_n": 4, "predicted_ms": 89.174, "predicted_per_token_ms": 22.2935, "predicted_per_second": 44.856123982326686}, "tps": 44.856123982326686}, {"id": "fba4a913-7516-4479-b842-bd69b1bdb77c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 136, "prompt_ms": 98.41, "prompt_per_token_ms": 0.7236029411764706, "prompt_per_second": 1381.9733766893607, "predicted_n": 2, "predicted_ms": 30.426, "predicted_per_token_ms": 15.213, "predicted_per_second": 65.73325445342799}, "tps": 65.73325445342799}, {"id": "20d28819-1732-4ec1-9427-e4671b7019c1", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity"], "timings": {"cache_n": 63, "prompt_n": 143, "prompt_ms": 98.704, "prompt_per_token_ms": 0.6902377622377622, "prompt_per_second": 1448.7761387583078, "predicted_n": 2, "predicted_ms": 30.667, "predicted_per_token_ms": 15.3335, "predicted_per_second": 65.21668242736492}, "tps": 65.21668242736492}, {"id": "31db3377-01b1-4e79-bcb5-6249354fdbe4", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 95.525, "prompt_per_token_ms": 0.7960416666666668, "prompt_per_second": 1256.2156503533106, "predicted_n": 2, "predicted_ms": 28.864, "predicted_per_token_ms": 14.432, "predicted_per_second": 69.29046563192904}, "tps": 69.29046563192904}, {"id": "51dcdedf-353a-431a-b65a-71383c7fc1f8", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 137, "prompt_ms": 98.898, "prompt_per_token_ms": 0.7218832116788321, "prompt_per_second": 1385.2656272118747, "predicted_n": 2, "predicted_ms": 29.872, "predicted_per_token_ms": 14.936, "predicted_per_second": 66.95232994108196}, "tps": 66.95232994108196}, {"id": "c2dd663e-998e-44eb-b024-81ecdecf0dc4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 95, "prompt_ms": 92.766, "prompt_per_token_ms": 0.9764842105263158, "prompt_per_second": 1024.0820990449085, "predicted_n": 2, "predicted_ms": 31.332, "predicted_per_token_ms": 15.666, "predicted_per_second": 63.83250351078769}, "tps": 63.83250351078769}, {"id": "ffd930de-d3ac-4671-92a2-738ce82d0764", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 89.045, "prompt_per_token_ms": 0.6645149253731344, "prompt_per_second": 1504.857094727385, "predicted_n": 2, "predicted_ms": 28.01, "predicted_per_token_ms": 14.005, "predicted_per_second": 71.40307033202427}, "tps": 71.40307033202427}, {"id": "0311118a-5bec-488a-a338-73959cee72b7", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 95.269, "prompt_per_token_ms": 0.7328384615384615, "prompt_per_second": 1364.5572011882145, "predicted_n": 2, "predicted_ms": 29.313, "predicted_per_token_ms": 14.6565, "predicted_per_second": 68.22911336267185}, "tps": 68.22911336267185}, {"id": "96830aca-590c-42f7-af86-3bcd587e0964", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 121, "prompt_ms": 95.705, "prompt_per_token_ms": 0.7909504132231405, "prompt_per_second": 1264.3017606185674, "predicted_n": 2, "predicted_ms": 29.683, "predicted_per_token_ms": 14.8415, "predicted_per_second": 67.37863423508405}, "tps": 67.37863423508405}, {"id": "b5b19c69-b0d3-4588-ae10-2edf674fb81e", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 312, "prompt_ms": 110.814, "prompt_per_token_ms": 0.3551730769230769, "prompt_per_second": 2815.528723807461, "predicted_n": 2, "predicted_ms": 31.244, "predicted_per_token_ms": 15.622, "predicted_per_second": 64.01229035974907}, "tps": 64.01229035974907}, {"id": "969d09a2-a265-4bf9-887a-b8c35a64fe6f", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 156, "prompt_ms": 97.917, "prompt_per_token_ms": 0.627673076923077, "prompt_per_second": 1593.1860657495636, "predicted_n": 2, "predicted_ms": 31.228, "predicted_per_token_ms": 15.614, "predicted_per_second": 64.0450877417702}, "tps": 64.0450877417702}, {"id": "a2f1533e-bb07-477c-840f-40c4888ca2c5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 96.927, "prompt_per_token_ms": 0.7455923076923078, "prompt_per_second": 1341.2155539736088, "predicted_n": 2, "predicted_ms": 30.615, "predicted_per_token_ms": 15.3075, "predicted_per_second": 65.32745386248571}, "tps": 65.32745386248571}, {"id": "c2ada736-6e69-47d2-9106-816b1404a246", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 65, "prompt_n": 75, "prompt_ms": 91.842, "prompt_per_token_ms": 1.22456, "prompt_per_second": 816.6198471287646, "predicted_n": 2, "predicted_ms": 30.256, "predicted_per_token_ms": 15.128, "predicted_per_second": 66.10259122157589}, "tps": 66.10259122157589}, {"id": "5abe1c4c-8791-42ca-9c4a-713978cd9cd0", "answer": "AC", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 152, "prompt_ms": 97.369, "prompt_per_token_ms": 0.6405855263157895, "prompt_per_second": 1561.0717990325463, "predicted_n": 2, "predicted_ms": 29.204, "predicted_per_token_ms": 14.602, "predicted_per_second": 68.48376934666484}, "tps": 68.48376934666484}, {"id": "db8b0c18-feb6-46e5-8c40-165192476d6b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 166, "prompt_ms": 98.945, "prompt_per_token_ms": 0.5960542168674698, "prompt_per_second": 1677.6997321744404, "predicted_n": 2, "predicted_ms": 28.703, "predicted_per_token_ms": 14.3515, "predicted_per_second": 69.67912761732224}, "tps": 69.67912761732224}, {"id": "eb78b766-ca19-4926-acae-3fc7763141d5", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 200, "prompt_ms": 102.893, "prompt_per_token_ms": 0.514465, "prompt_per_second": 1943.7668257315852, "predicted_n": 2, "predicted_ms": 28.859, "predicted_per_token_ms": 14.4295, "predicted_per_second": 69.30247063307806}, "tps": 69.30247063307806}, {"id": "f208bd49-0da8-476d-ab3b-0c9c4732623c", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 139, "prompt_ms": 91.887, "prompt_per_token_ms": 0.6610575539568345, "prompt_per_second": 1512.727589321667, "predicted_n": 2, "predicted_ms": 33.098, "predicted_per_token_ms": 16.549, "predicted_per_second": 60.4266118798719}, "tps": 60.4266118798719}, {"id": "8907298f-92a2-4d46-9774-73f291ba7e16", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 136, "prompt_ms": 105.423, "prompt_per_token_ms": 0.7751691176470589, "prompt_per_second": 1290.0410726312095, "predicted_n": 2, "predicted_ms": 32.341, "predicted_per_token_ms": 16.1705, "predicted_per_second": 61.84100677159024}, "tps": 61.84100677159024}, {"id": "f1f35ca0-bf77-4f46-bce4-8030aac17a49", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 143, "prompt_ms": 99.855, "prompt_per_token_ms": 0.6982867132867133, "prompt_per_second": 1432.0765109408642, "predicted_n": 2, "predicted_ms": 30.902, "predicted_per_token_ms": 15.451, "predicted_per_second": 64.72073004983496}, "tps": 64.72073004983496}, {"id": "4afa2c75-50c0-4ce1-a326-feacf4a7511c", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 145, "prompt_ms": 98.481, "prompt_per_token_ms": 0.6791793103448276, "prompt_per_second": 1472.3652278104405, "predicted_n": 2, "predicted_ms": 29.241, "predicted_per_token_ms": 14.6205, "predicted_per_second": 68.39711364180431}, "tps": 68.39711364180431}, {"id": "1483041e-fb04-4c52-be14-e1324e81d11e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 112, "prompt_ms": 94.355, "prompt_per_token_ms": 0.8424553571428571, "prompt_per_second": 1187.0065179375763, "predicted_n": 2, "predicted_ms": 29.649, "predicted_per_token_ms": 14.8245, "predicted_per_second": 67.45590070491416}, "tps": 67.45590070491416}, {"id": "83c99b71-e65e-4d7d-8637-c1e9350114f2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 103, "prompt_ms": 95.433, "prompt_per_token_ms": 0.9265339805825243, "prompt_per_second": 1079.2912304967883, "predicted_n": 2, "predicted_ms": 28.608, "predicted_per_token_ms": 14.304, "predicted_per_second": 69.91051454138703}, "tps": 69.91051454138703}, {"id": "f1ecae49-8067-4f8f-acbc-299bae3a36f4", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 158, "prompt_ms": 97.179, "prompt_per_token_ms": 0.6150569620253165, "prompt_per_second": 1625.8656705666863, "predicted_n": 2, "predicted_ms": 28.854, "predicted_per_token_ms": 14.427, "predicted_per_second": 69.31447979482914}, "tps": 69.31447979482914}, {"id": "55a9dce6-fd22-4fd0-be79-3cb24d826c4f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 130, "prompt_ms": 99.209, "prompt_per_token_ms": 0.7631461538461539, "prompt_per_second": 1310.364987047546, "predicted_n": 2, "predicted_ms": 33.702, "predicted_per_token_ms": 16.851, "predicted_per_second": 59.34365913002196}, "tps": 59.34365913002196}, {"id": "00ffd4f7-03a2-4340-87ad-bf883666d751", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 98, "prompt_ms": 96.497, "prompt_per_token_ms": 0.9846632653061225, "prompt_per_second": 1015.5756137496503, "predicted_n": 2, "predicted_ms": 31.431, "predicted_per_token_ms": 15.7155, "predicted_per_second": 63.63144666093984}, "tps": 63.63144666093984}, {"id": "54f4ae87-3581-4549-bbd4-ebf8ed5513f6", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 74, "prompt_ms": 91.353, "prompt_per_token_ms": 1.2345, "prompt_per_second": 810.0445524503848, "predicted_n": 2, "predicted_ms": 25.897, "predicted_per_token_ms": 12.9485, "predicted_per_second": 77.22902266671815}, "tps": 77.22902266671815}, {"id": "ccbba18d-5a26-47b5-a125-adf40cee786e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 84.057, "prompt_per_token_ms": 0.6135547445255475, "prompt_per_second": 1629.8464137430553, "predicted_n": 2, "predicted_ms": 30.273, "predicted_per_token_ms": 15.1365, "predicted_per_second": 66.0654708816437}, "tps": 66.0654708816437}, {"id": "ae5220fb-c8c2-4488-b754-2e3a97cde4e2", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 102.396, "prompt_per_token_ms": 0.7474160583941606, "prompt_per_second": 1337.942888394078, "predicted_n": 2, "predicted_ms": 32.408, "predicted_per_token_ms": 16.204, "predicted_per_second": 61.713157245124656}, "tps": 61.713157245124656}, {"id": "5c5c4522-203b-4669-99f7-fa1a19da7331", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 87, "prompt_ms": 92.412, "prompt_per_token_ms": 1.0622068965517242, "prompt_per_second": 941.4361771198545, "predicted_n": 2, "predicted_ms": 30.932, "predicted_per_token_ms": 15.466, "predicted_per_second": 64.65795939480151}, "tps": 64.65795939480151}, {"id": "682a7ade-4ea4-4a12-a433-848b5cac3577", "answer": "ACD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 95.269, "prompt_per_token_ms": 0.807364406779661, "prompt_per_second": 1238.598074924687, "predicted_n": 2, "predicted_ms": 29.874, "predicted_per_token_ms": 14.937, "predicted_per_second": 66.9478476266988}, "tps": 66.9478476266988}, {"id": "78a4f6ab-7aeb-45c5-bf1f-5796f958146e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 95.741, "prompt_per_token_ms": 0.7847622950819673, "prompt_per_second": 1274.2712108709957, "predicted_n": 2, "predicted_ms": 28.623, "predicted_per_token_ms": 14.3115, "predicted_per_second": 69.87387765084023}, "tps": 69.87387765084023}, {"id": "29d4966e-05ff-4f81-9f0d-ed8b141d1e8b", "answer": "ABD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 67, "prompt_n": 138, "prompt_ms": 98.982, "prompt_per_token_ms": 0.7172608695652174, "prompt_per_second": 1394.1928835545857, "predicted_n": 3, "predicted_ms": 59.708, "predicted_per_token_ms": 19.902666666666665, "predicted_per_second": 50.244523346955184}, "tps": 50.244523346955184}, {"id": "0c6db526-0448-4e73-975b-c788a0b156cc", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 151, "prompt_ms": 99.785, "prompt_per_token_ms": 0.6608278145695364, "prompt_per_second": 1513.2534950142806, "predicted_n": 4, "predicted_ms": 90.208, "predicted_per_token_ms": 22.552, "predicted_per_second": 44.34196523589926}, "tps": 44.34196523589926}, {"id": "4d43f415-22c1-48f4-9d0c-7e4c41f3e014", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 136, "prompt_ms": 98.523, "prompt_per_token_ms": 0.7244338235294118, "prompt_per_second": 1380.3883357185632, "predicted_n": 2, "predicted_ms": 29.938, "predicted_per_token_ms": 14.969, "predicted_per_second": 66.80472977486806}, "tps": 66.80472977486806}, {"id": "42c276a8-419d-4897-b88f-bb29727d2ee9", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 126, "prompt_ms": 95.599, "prompt_per_token_ms": 0.7587222222222223, "prompt_per_second": 1318.0054184667204, "predicted_n": 2, "predicted_ms": 29.568, "predicted_per_token_ms": 14.784, "predicted_per_second": 67.64069264069263}, "tps": 67.64069264069263}, {"id": "fb779ae7-3a14-492c-87cf-dc21a4ecaa3c", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 85.32, "prompt_per_token_ms": 0.6367164179104478, "prompt_per_second": 1570.5578996718239, "predicted_n": 3, "predicted_ms": 58.647, "predicted_per_token_ms": 19.549, "predicted_per_second": 51.153511688577424}, "tps": 51.153511688577424}, {"id": "4d7c33ac-f305-4e24-92d9-e1b53ccf9aad", "answer": "BC", "llm_answer": "BCD", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 89, "prompt_ms": 92.583, "prompt_per_token_ms": 1.0402584269662922, "prompt_per_second": 961.2995906375901, "predicted_n": 64, "predicted_ms": 1833.425, "predicted_per_token_ms": 28.647265625, "predicted_per_second": 34.90734554181382}, "tps": 34.90734554181382}, {"id": "3f44d801-d39f-4c63-b7b4-402eb5ecf492", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 385, "prompt_ms": 115.684, "prompt_per_token_ms": 0.30047792207792207, "prompt_per_second": 3328.031534179316, "predicted_n": 2, "predicted_ms": 29.462, "predicted_per_token_ms": 14.731, "predicted_per_second": 67.88405403570701}, "tps": 67.88405403570701}, {"id": "444bda17-234f-4078-9282-c5fff4dd379e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 72, "prompt_ms": 92.27, "prompt_per_token_ms": 1.2815277777777778, "prompt_per_second": 780.3186301072938, "predicted_n": 2, "predicted_ms": 29.832, "predicted_per_token_ms": 14.916, "predicted_per_second": 67.04210244033253}, "tps": 67.04210244033253}, {"id": "e7605ba6-0bc6-4cd4-9bdf-4c7d2cac9e8f", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 107, "prompt_ms": 96.478, "prompt_per_token_ms": 0.9016635514018692, "prompt_per_second": 1109.0611331080663, "predicted_n": 2, "predicted_ms": 30.717, "predicted_per_token_ms": 15.3585, "predicted_per_second": 65.11052511638506}, "tps": 65.11052511638506}, {"id": "cd35dcfb-1b6f-4b73-adc7-39c795f32938", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 111, "prompt_ms": 94.358, "prompt_per_token_ms": 0.8500720720720721, "prompt_per_second": 1176.3708429597914, "predicted_n": 2, "predicted_ms": 29.291, "predicted_per_token_ms": 14.6455, "predicted_per_second": 68.28035915468915}, "tps": 68.28035915468915}, {"id": "112f996e-a4b0-4ceb-8728-b114c8da84c9", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 118, "prompt_ms": 81.986, "prompt_per_token_ms": 0.6947966101694916, "prompt_per_second": 1439.270119288659, "predicted_n": 2, "predicted_ms": 28.593, "predicted_per_token_ms": 14.2965, "predicted_per_second": 69.94718987164691}, "tps": 69.94718987164691}, {"id": "ca4752f9-18e1-493b-a100-ebfbf618af63", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 63, "prompt_n": 122, "prompt_ms": 95.883, "prompt_per_token_ms": 0.7859262295081967, "prompt_per_second": 1272.3840513959722, "predicted_n": 2, "predicted_ms": 30.391, "predicted_per_token_ms": 15.1955, "predicted_per_second": 65.80895659899312}, "tps": 65.80895659899312}, {"id": "1b3e9fe0-b12f-48bd-91e0-21fbc117ec56", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 104, "prompt_ms": 93.411, "prompt_per_token_ms": 0.8981826923076923, "prompt_per_second": 1113.3592403464258, "predicted_n": 3, "predicted_ms": 65.543, "predicted_per_token_ms": 21.84766666666667, "predicted_per_second": 45.7714782661764}, "tps": 45.7714782661764}, {"id": "3dc59cd7-0cf0-44dc-9249-dc050be3651a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 98.397, "prompt_per_token_ms": 0.7569, "prompt_per_second": 1321.178491214163, "predicted_n": 2, "predicted_ms": 32.03, "predicted_per_token_ms": 16.015, "predicted_per_second": 62.44146113019045}, "tps": 62.44146113019045}, {"id": "ac6733a7-2727-440d-af81-c18c6653475f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 125, "prompt_ms": 95.73, "prompt_per_token_ms": 0.7658400000000001, "prompt_per_second": 1305.7557714405098, "predicted_n": 2, "predicted_ms": 30.302, "predicted_per_token_ms": 15.151, "predicted_per_second": 66.0022440762986}, "tps": 66.0022440762986}, {"id": "d01ebc53-33ce-49fd-b8a8-511af7d4f04d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 95.004, "prompt_per_token_ms": 0.9896250000000001, "prompt_per_second": 1010.4837691044586, "predicted_n": 2, "predicted_ms": 33.324, "predicted_per_token_ms": 16.662, "predicted_per_second": 60.01680470531749}, "tps": 60.01680470531749}, {"id": "775d5fc8-9c45-466c-b2b5-ca31fac4ae51", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 93.915, "prompt_per_token_ms": 0.8460810810810812, "prompt_per_second": 1181.9198211148378, "predicted_n": 3, "predicted_ms": 58.56, "predicted_per_token_ms": 19.52, "predicted_per_second": 51.229508196721305}, "tps": 51.229508196721305}, {"id": "79a82caa-85a2-49b1-9b20-f605eefd2b71", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 98, "prompt_ms": 93.965, "prompt_per_token_ms": 0.958826530612245, "prompt_per_second": 1042.9415207790134, "predicted_n": 4, "predicted_ms": 86.568, "predicted_per_token_ms": 21.642, "predicted_per_second": 46.2064504204787}, "tps": 46.2064504204787}, {"id": "451e0b47-acbf-4ae8-8272-9be2f70ed2e1", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 128, "prompt_ms": 95.835, "prompt_per_token_ms": 0.7487109375, "prompt_per_second": 1335.6289455835552, "predicted_n": 2, "predicted_ms": 29.011, "predicted_per_token_ms": 14.5055, "predicted_per_second": 68.93936782599704}, "tps": 68.93936782599704}, {"id": "5eb7ad00-5556-4921-8568-59ebf9ed00a1", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["Vulnerability", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 141, "prompt_ms": 84.045, "prompt_per_token_ms": 0.596063829787234, "prompt_per_second": 1677.6726753524897, "predicted_n": 4, "predicted_ms": 82.939, "predicted_per_token_ms": 20.73475, "predicted_per_second": 48.228215917722665}, "tps": 48.228215917722665}, {"id": "299c9e63-3e6d-4a8c-8e03-582c9eeefdff", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 100, "prompt_ms": 94.339, "prompt_per_token_ms": 0.94339, "prompt_per_second": 1060.006996046174, "predicted_n": 2, "predicted_ms": 28.318, "predicted_per_token_ms": 14.159, "predicted_per_second": 70.62645667066883}, "tps": 70.62645667066883}, {"id": "529f203c-303b-4c55-8b88-054ba8eb4939", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 95.169, "prompt_per_token_ms": 0.8348157894736842, "prompt_per_second": 1197.869053998676, "predicted_n": 2, "predicted_ms": 28.966, "predicted_per_token_ms": 14.483, "predicted_per_second": 69.04646827314782}, "tps": 69.04646827314782}, {"id": "7caca903-23ab-4716-b69f-d31994ae5fc0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 96, "prompt_ms": 96.59, "prompt_per_token_ms": 1.0061458333333333, "prompt_per_second": 993.8917072160677, "predicted_n": 2, "predicted_ms": 29.755, "predicted_per_token_ms": 14.8775, "predicted_per_second": 67.21559401781214}, "tps": 67.21559401781214}, {"id": "4cb8e0cb-3d99-4a9b-816f-ab14e647aee8", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 97.239, "prompt_per_token_ms": 0.7537906976744186, "prompt_per_second": 1326.628204732669, "predicted_n": 2, "predicted_ms": 29.513, "predicted_per_token_ms": 14.7565, "predicted_per_second": 67.76674685731712}, "tps": 67.76674685731712}, {"id": "81316b8b-a524-4c22-a043-56d6fa80fff8", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 221, "prompt_ms": 100.634, "prompt_per_token_ms": 0.4553574660633484, "prompt_per_second": 2196.0768726275414, "predicted_n": 2, "predicted_ms": 29.96, "predicted_per_token_ms": 14.98, "predicted_per_second": 66.75567423230974}, "tps": 66.75567423230974}, {"id": "b5a1f2bb-95d3-4f26-8d4f-6da49899bd7f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 96.388, "prompt_per_token_ms": 0.7302121212121212, "prompt_per_second": 1369.4650786404948, "predicted_n": 2, "predicted_ms": 31.548, "predicted_per_token_ms": 15.774, "predicted_per_second": 63.39546088500064}, "tps": 63.39546088500064}, {"id": "810567f1-5d6f-4449-b613-efef35ec2a7f", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 97, "prompt_ms": 92.87, "prompt_per_token_ms": 0.9574226804123712, "prompt_per_second": 1044.4707655863033, "predicted_n": 3, "predicted_ms": 62.977, "predicted_per_token_ms": 20.99233333333333, "predicted_per_second": 47.6364386998428}, "tps": 47.6364386998428}, {"id": "c93e6e77-3673-44e3-8cb5-2c3001a6614c", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 104, "prompt_ms": 93.613, "prompt_per_token_ms": 0.900125, "prompt_per_second": 1110.9568115539507, "predicted_n": 2, "predicted_ms": 30.481, "predicted_per_token_ms": 15.2405, "predicted_per_second": 65.61464518880614}, "tps": 65.61464518880614}, {"id": "622f8775-31c6-4182-817c-2e17e2a8ce1b", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 86, "prompt_ms": 90.482, "prompt_per_token_ms": 1.0521162790697673, "prompt_per_second": 950.4652859132204, "predicted_n": 2, "predicted_ms": 26.038, "predicted_per_token_ms": 13.019, "predicted_per_second": 76.81081496274675}, "tps": 76.81081496274675}, {"id": "a75576fb-3f96-43ba-a223-c4e9d36baa38", "answer": "CD", "llm_answer": "C", "score": 0, "topics": ["Vulnerability", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 90.88, "prompt_per_token_ms": 0.699076923076923, "prompt_per_second": 1430.4577464788733, "predicted_n": 3, "predicted_ms": 61.185, "predicted_per_token_ms": 20.395, "predicted_per_second": 49.03162539838196}, "tps": 49.03162539838196}, {"id": "5115a62f-f463-4795-baa7-71056d3dd58c", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 149, "prompt_ms": 97.362, "prompt_per_token_ms": 0.6534362416107382, "prompt_per_second": 1530.371192046178, "predicted_n": 2, "predicted_ms": 30.097, "predicted_per_token_ms": 15.0485, "predicted_per_second": 66.45180582782336}, "tps": 66.45180582782336}, {"id": "73776168-b535-4b61-8f5d-635ddfb9cfe3", "answer": "AB", "llm_answer": "BD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 98.526, "prompt_per_token_ms": 0.5971272727272727, "prompt_per_second": 1674.68485475915, "predicted_n": 2, "predicted_ms": 31.507, "predicted_per_token_ms": 15.7535, "predicted_per_second": 63.477957279334746}, "tps": 63.477957279334746}, {"id": "44d869c1-5667-494b-ac96-58b2104ae5be", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 98.842, "prompt_per_token_ms": 0.6139254658385093, "prompt_per_second": 1628.8622245604097, "predicted_n": 2, "predicted_ms": 30.131, "predicted_per_token_ms": 15.0655, "predicted_per_second": 66.37682121403206}, "tps": 66.37682121403206}, {"id": "1d0968b5-fd80-4cdf-9ef1-5dd9c55c5794", "answer": "C", "llm_answer": "C", "score": 1, "topics": [], "timings": {"cache_n": 60, "prompt_n": 167, "prompt_ms": 99.21, "prompt_per_token_ms": 0.5940718562874251, "prompt_per_second": 1683.2980546315898, "predicted_n": 2, "predicted_ms": 30.158, "predicted_per_token_ms": 15.079, "predicted_per_second": 66.31739505272233}, "tps": 66.31739505272233}, {"id": "b23351f0-03df-4ebe-b24b-9455acbdaaac", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 95, "prompt_ms": 93.139, "prompt_per_token_ms": 0.9804105263157894, "prompt_per_second": 1019.9808887791366, "predicted_n": 2, "predicted_ms": 28.832, "predicted_per_token_ms": 14.416, "predicted_per_second": 69.36736958934517}, "tps": 69.36736958934517}, {"id": "9a90fef8-40dc-4d57-ada9-983f5ac6a589", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 173, "prompt_ms": 99.22, "prompt_per_token_ms": 0.5735260115606936, "prompt_per_second": 1743.6000806289055, "predicted_n": 2, "predicted_ms": 29.472, "predicted_per_token_ms": 14.736, "predicted_per_second": 67.86102062975027}, "tps": 67.86102062975027}, {"id": "db81af78-ce94-484a-ab2d-c9b8d39ee39a", "answer": "AB", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 96, "prompt_ms": 93.771, "prompt_per_token_ms": 0.97678125, "prompt_per_second": 1023.7706753687174, "predicted_n": 2, "predicted_ms": 32.085, "predicted_per_token_ms": 16.0425, "predicted_per_second": 62.33442418575658}, "tps": 62.33442418575658}, {"id": "b14ac0ca-403c-4033-aa73-e03052f2f045", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 109, "prompt_ms": 94.606, "prompt_per_token_ms": 0.8679449541284403, "prompt_per_second": 1152.1467983003192, "predicted_n": 2, "predicted_ms": 29.672, "predicted_per_token_ms": 14.836, "predicted_per_second": 67.40361283364788}, "tps": 67.40361283364788}, {"id": "c2a3091c-a68b-4fbc-a603-62a62d2fd961", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 66, "prompt_n": 141, "prompt_ms": 91.084, "prompt_per_token_ms": 0.6459858156028369, "prompt_per_second": 1548.0216064292301, "predicted_n": 2, "predicted_ms": 25.698, "predicted_per_token_ms": 12.849, "predicted_per_second": 77.82706825433885}, "tps": 77.82706825433885}, {"id": "9adb28c8-9811-47db-b3d9-3917ca9bc899", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 108, "prompt_ms": 91.152, "prompt_per_token_ms": 0.844, "prompt_per_second": 1184.834123222749, "predicted_n": 4, "predicted_ms": 87.304, "predicted_per_token_ms": 21.826, "predicted_per_second": 45.816915605241455}, "tps": 45.816915605241455}, {"id": "b99d911a-1326-44b7-8063-0dc5a56d10ca", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 117, "prompt_ms": 95.541, "prompt_per_token_ms": 0.8165897435897436, "prompt_per_second": 1224.6051433416021, "predicted_n": 2, "predicted_ms": 29.241, "predicted_per_token_ms": 14.6205, "predicted_per_second": 68.39711364180431}, "tps": 68.39711364180431}, {"id": "30591e53-996c-4fd4-b01a-0e4c65197731", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 100, "prompt_ms": 95.122, "prompt_per_token_ms": 0.95122, "prompt_per_second": 1051.281512163327, "predicted_n": 2, "predicted_ms": 28.765, "predicted_per_token_ms": 14.3825, "predicted_per_second": 69.52894142186685}, "tps": 69.52894142186685}, {"id": "6b81c500-fc37-4e22-a0e9-cf3c3910e620", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 99.321, "prompt_per_token_ms": 0.5643238636363637, "prompt_per_second": 1772.0320979450469, "predicted_n": 2, "predicted_ms": 29.14, "predicted_per_token_ms": 14.57, "predicted_per_second": 68.63417982155113}, "tps": 68.63417982155113}, {"id": "30381b2a-999e-4ef6-8e40-cb4fe5cc1dd2", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 133, "prompt_ms": 97.186, "prompt_per_token_ms": 0.7307218045112782, "prompt_per_second": 1368.5098676764142, "predicted_n": 2, "predicted_ms": 36.952, "predicted_per_token_ms": 18.476, "predicted_per_second": 54.12426932236415}, "tps": 54.12426932236415}, {"id": "574b0ba4-03ea-418e-b80d-9a0bfb124888", "answer": "D", "llm_answer": "BD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 138, "prompt_ms": 97.817, "prompt_per_token_ms": 0.7088188405797101, "prompt_per_second": 1410.7977140987762, "predicted_n": 3, "predicted_ms": 59.466, "predicted_per_token_ms": 19.822, "predicted_per_second": 50.4489960649783}, "tps": 50.4489960649783}, {"id": "08946499-0a0d-444e-985f-a84484f767c5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 64, "prompt_n": 73, "prompt_ms": 92.216, "prompt_per_token_ms": 1.2632328767123286, "prompt_per_second": 791.6196755443742, "predicted_n": 2, "predicted_ms": 28.903, "predicted_per_token_ms": 14.4515, "predicted_per_second": 69.19696917275024}, "tps": 69.19696917275024}, {"id": "a65068b5-093a-4654-9e28-43ce4d04e764", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["PenTest", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 110, "prompt_ms": 95.297, "prompt_per_token_ms": 0.8663363636363636, "prompt_per_second": 1154.286074063192, "predicted_n": 2, "predicted_ms": 30.117, "predicted_per_token_ms": 15.0585, "predicted_per_second": 66.40767672742969}, "tps": 66.40767672742969}, {"id": "5c725539-498d-4f82-8319-916d2267a3e4", "answer": "ABD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 98.182, "prompt_per_token_ms": 0.7166569343065694, "prompt_per_second": 1395.3677863559512, "predicted_n": 64, "predicted_ms": 1833.468, "predicted_per_token_ms": 28.6479375, "predicted_per_second": 34.90652686602657}, "tps": 34.90652686602657}, {"id": "2558351c-d550-4900-a37a-e6723e2c6092", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 94, "prompt_ms": 92.956, "prompt_per_token_ms": 0.9888936170212766, "prompt_per_second": 1011.2311200998321, "predicted_n": 2, "predicted_ms": 29.776, "predicted_per_token_ms": 14.888, "predicted_per_second": 67.16818914562063}, "tps": 67.16818914562063}, {"id": "0b81ec5a-6811-4105-9c80-2fdc713cb3ca", "answer": "ABD", "llm_answer": "BD", "score": 0, "topics": ["NetworkSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 176, "prompt_ms": 100.133, "prompt_per_token_ms": 0.5689375, "prompt_per_second": 1757.6623091288586, "predicted_n": 4, "predicted_ms": 92.15, "predicted_per_token_ms": 23.0375, "predicted_per_second": 43.40748779164406}, "tps": 43.40748779164406}, {"id": "0bb77b63-5dbd-46aa-8405-cc32a5b5595c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 160, "prompt_ms": 96.998, "prompt_per_token_ms": 0.6062375, "prompt_per_second": 1649.51854677416, "predicted_n": 2, "predicted_ms": 29.819, "predicted_per_token_ms": 14.9095, "predicted_per_second": 67.07133035983769}, "tps": 67.07133035983769}, {"id": "4cf3060a-df5a-468c-a369-32685d31e9d0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 100, "prompt_ms": 100.741, "prompt_per_token_ms": 1.00741, "prompt_per_second": 992.6445042237024, "predicted_n": 2, "predicted_ms": 32.55, "predicted_per_token_ms": 16.275, "predicted_per_second": 61.44393241167435}, "tps": 61.44393241167435}, {"id": "5579b4d4-4bde-4b5b-9101-7eb75eb0fa98", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 101.163, "prompt_per_token_ms": 0.7606240601503759, "prompt_per_second": 1314.709923588664, "predicted_n": 3, "predicted_ms": 61.479, "predicted_per_token_ms": 20.493, "predicted_per_second": 48.79715024642561}, "tps": 48.79715024642561}, {"id": "ba19f695-ba94-4673-9ed4-13ece458ec0e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 94.934, "prompt_per_token_ms": 0.6980441176470588, "prompt_per_second": 1432.5742094507764, "predicted_n": 2, "predicted_ms": 27.839, "predicted_per_token_ms": 13.9195, "predicted_per_second": 71.84166097920185}, "tps": 71.84166097920185}, {"id": "29ae21ee-6dd6-4875-b1ef-0a646f852e48", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 255, "prompt_ms": 100.552, "prompt_per_token_ms": 0.394321568627451, "prompt_per_second": 2536.0012729731875, "predicted_n": 2, "predicted_ms": 30.285, "predicted_per_token_ms": 15.1425, "predicted_per_second": 66.03929337956083}, "tps": 66.03929337956083}, {"id": "615166a7-4a9b-4ecb-9418-004d5ccc6478", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 141, "prompt_ms": 97.36, "prompt_per_token_ms": 0.6904964539007092, "prompt_per_second": 1448.2333607230896, "predicted_n": 8, "predicted_ms": 203.804, "predicted_per_token_ms": 25.4755, "predicted_per_second": 39.25340032580322}, "tps": 39.25340032580322}, {"id": "1da30ab2-09c0-4d05-af84-b21df6cfb7a6", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 163, "prompt_ms": 99.571, "prompt_per_token_ms": 0.6108650306748467, "prompt_per_second": 1637.0228279318276, "predicted_n": 2, "predicted_ms": 28.688, "predicted_per_token_ms": 14.344, "predicted_per_second": 69.71556051310652}, "tps": 69.71556051310652}, {"id": "a331d117-13c8-4e6f-a8f3-ef6251a605ed", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 129, "prompt_ms": 100.787, "prompt_per_token_ms": 0.7812945736434109, "prompt_per_second": 1279.9269747090398, "predicted_n": 2, "predicted_ms": 29.888, "predicted_per_token_ms": 14.944, "predicted_per_second": 66.91648822269806}, "tps": 66.91648822269806}, {"id": "f68b1d40-3635-441e-94ec-97d0181341c5", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 149, "prompt_ms": 96.847, "prompt_per_token_ms": 0.649979865771812, "prompt_per_second": 1538.509194915692, "predicted_n": 3, "predicted_ms": 60.669, "predicted_per_token_ms": 20.223, "predicted_per_second": 49.4486475794887}, "tps": 49.4486475794887}, {"id": "c7fd5811-fc17-4d96-821e-30a6bd36718e", "answer": "A", "llm_answer": "ABCD", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 131, "prompt_ms": 96.586, "prompt_per_token_ms": 0.7372977099236641, "prompt_per_second": 1356.304226285383, "predicted_n": 64, "predicted_ms": 1827.337, "predicted_per_token_ms": 28.552140625, "predicted_per_second": 35.02364369571677}, "tps": 35.02364369571677}, {"id": "dccc6da5-bc6b-4754-ad61-19cc3ab33577", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 150, "prompt_ms": 97.155, "prompt_per_token_ms": 0.6477, "prompt_per_second": 1543.9246564767639, "predicted_n": 2, "predicted_ms": 28.842, "predicted_per_token_ms": 14.421, "predicted_per_second": 69.3433187712364}, "tps": 69.3433187712364}, {"id": "333b212e-2827-4e43-ac61-b39f235b2bdc", "answer": "A", "llm_answer": "AB", "score": 0, "topics": ["PenTest", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 98, "prompt_ms": 94.802, "prompt_per_token_ms": 0.9673673469387756, "prompt_per_second": 1033.7334655387015, "predicted_n": 2, "predicted_ms": 31.547, "predicted_per_token_ms": 15.7735, "predicted_per_second": 63.3974704409294}, "tps": 63.3974704409294}, {"id": "9302fef6-ae80-47cc-8905-14b0419df159", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 189, "prompt_ms": 98.396, "prompt_per_token_ms": 0.5206137566137566, "prompt_per_second": 1920.8097890158137, "predicted_n": 2, "predicted_ms": 29.166, "predicted_per_token_ms": 14.583, "predicted_per_second": 68.57299595419323}, "tps": 68.57299595419323}, {"id": "92ea1501-42f9-4df6-8377-a80b4880eb52", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["MemorySafety", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 181, "prompt_ms": 100.327, "prompt_per_token_ms": 0.554292817679558, "prompt_per_second": 1804.1005910672102, "predicted_n": 2, "predicted_ms": 31.603, "predicted_per_token_ms": 15.8015, "predicted_per_second": 63.285131158434325}, "tps": 63.285131158434325}, {"id": "c69edb30-6eb3-4c19-893d-5c946e115163", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 196, "prompt_ms": 101.638, "prompt_per_token_ms": 0.5185612244897959, "prompt_per_second": 1928.4126015860209, "predicted_n": 2, "predicted_ms": 30.515, "predicted_per_token_ms": 15.2575, "predicted_per_second": 65.54153694904146}, "tps": 65.54153694904146}, {"id": "9492d4a7-0b0c-4dc0-bfd3-c3c355e1f0db", "answer": "BCD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 150, "prompt_ms": 97.19, "prompt_per_token_ms": 0.6479333333333334, "prompt_per_second": 1543.3686593270913, "predicted_n": 2, "predicted_ms": 30.905, "predicted_per_token_ms": 15.4525, "predicted_per_second": 64.71444750040446}, "tps": 64.71444750040446}, {"id": "ea72fe54-c3c6-4492-9dc3-63d0b317342c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 125, "prompt_ms": 96.406, "prompt_per_token_ms": 0.771248, "prompt_per_second": 1296.5997966931518, "predicted_n": 2, "predicted_ms": 29.246, "predicted_per_token_ms": 14.623, "predicted_per_second": 68.38542022840731}, "tps": 68.38542022840731}, {"id": "c7b2e845-8d3f-44cd-81d7-8c537f43a87e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 88, "prompt_ms": 93.433, "prompt_per_token_ms": 1.0617386363636365, "prompt_per_second": 941.8513801333575, "predicted_n": 2, "predicted_ms": 26.38, "predicted_per_token_ms": 13.19, "predicted_per_second": 75.8150113722517}, "tps": 75.8150113722517}, {"id": "91d52b1c-a42f-4d3a-921b-6b09b8815b52", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 90.478, "prompt_per_token_ms": 0.5483515151515151, "prompt_per_second": 1823.6477375715644, "predicted_n": 2, "predicted_ms": 27.263, "predicted_per_token_ms": 13.6315, "predicted_per_second": 73.35949822103217}, "tps": 73.35949822103217}, {"id": "90564c74-65ec-4762-af8a-2cad52205b30", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 150, "prompt_ms": 98.252, "prompt_per_token_ms": 0.6550133333333333, "prompt_per_second": 1526.6864796645361, "predicted_n": 64, "predicted_ms": 1939.42, "predicted_per_token_ms": 30.3034375, "predicted_per_second": 32.99955656845861}, "tps": 32.99955656845861}, {"id": "d9b3ef6c-1112-4286-8c75-30b0a0e09c46", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 100.863, "prompt_per_token_ms": 0.638373417721519, "prompt_per_second": 1566.4812666686496, "predicted_n": 2, "predicted_ms": 32.735, "predicted_per_token_ms": 16.3675, "predicted_per_second": 61.09668550481136}, "tps": 61.09668550481136}, {"id": "1833052d-7d27-407a-a2df-d13e11531b31", "answer": "BC", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 96, "prompt_ms": 95.909, "prompt_per_token_ms": 0.9990520833333334, "prompt_per_second": 1000.9488160652284, "predicted_n": 3, "predicted_ms": 63.338, "predicted_per_token_ms": 21.112666666666666, "predicted_per_second": 47.36493100508384}, "tps": 47.36493100508384}, {"id": "1786b19b-0206-4fa9-9727-a141b172cf66", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 100.191, "prompt_per_token_ms": 0.7707, "prompt_per_second": 1297.5217334890358, "predicted_n": 2, "predicted_ms": 30.369, "predicted_per_token_ms": 15.1845, "predicted_per_second": 65.85663011623696}, "tps": 65.85663011623696}, {"id": "ae5dbe43-7d2b-43f8-b7ca-f331733d79f8", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 145, "prompt_ms": 93.229, "prompt_per_token_ms": 0.6429586206896551, "prompt_per_second": 1555.3100430123675, "predicted_n": 2, "predicted_ms": 28.753, "predicted_per_token_ms": 14.3765, "predicted_per_second": 69.55795916947797}, "tps": 69.55795916947797}, {"id": "43f79df3-a7b6-4321-bb40-9b38f98c8bb1", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 134, "prompt_ms": 105.159, "prompt_per_token_ms": 0.7847686567164179, "prompt_per_second": 1274.2608811418897, "predicted_n": 3, "predicted_ms": 63.856, "predicted_per_token_ms": 21.285333333333334, "predicted_per_second": 46.98070658982711}, "tps": 46.98070658982711}, {"id": "97cc97ec-d867-418e-8e9d-f102e52ca82f", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 81, "prompt_ms": 103.131, "prompt_per_token_ms": 1.2732222222222223, "prompt_per_second": 785.408848939698, "predicted_n": 2, "predicted_ms": 30.814, "predicted_per_token_ms": 15.407, "predicted_per_second": 64.90556240669825}, "tps": 64.90556240669825}, {"id": "c2dc2a1d-45c9-4a49-a513-a41b8d257c07", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 97.795, "prompt_per_token_ms": 0.6985357142857143, "prompt_per_second": 1431.566030983179, "predicted_n": 2, "predicted_ms": 30.563, "predicted_per_token_ms": 15.2815, "predicted_per_second": 65.43860223145634}, "tps": 65.43860223145634}, {"id": "a96ef1b8-a510-4c3f-aa1e-1b03043b9367", "answer": "C", "llm_answer": "ACD", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 129, "prompt_ms": 96.438, "prompt_per_token_ms": 0.7475813953488373, "prompt_per_second": 1337.646985628072, "predicted_n": 64, "predicted_ms": 1921.955, "predicted_per_token_ms": 30.030546875, "predicted_per_second": 33.29942688564509}, "tps": 33.29942688564509}, {"id": "a83fcb0c-2588-4317-8692-9a5ddbf5b456", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 161, "prompt_ms": 102.171, "prompt_per_token_ms": 0.6346024844720497, "prompt_per_second": 1575.7896076186, "predicted_n": 2, "predicted_ms": 27.707, "predicted_per_token_ms": 13.8535, "predicted_per_second": 72.18392463998268}, "tps": 72.18392463998268}, {"id": "ed0f7030-e600-497e-9997-b5d7035d5b09", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 94, "prompt_ms": 89.485, "prompt_per_token_ms": 0.951968085106383, "prompt_per_second": 1050.4553835838408, "predicted_n": 2, "predicted_ms": 31.045, "predicted_per_token_ms": 15.5225, "predicted_per_second": 64.42261233693026}, "tps": 64.42261233693026}, {"id": "5e51e197-b740-4438-89e8-b59a5ef2f1de", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 99.384, "prompt_per_token_ms": 0.8282, "prompt_per_second": 1207.437816952427, "predicted_n": 3, "predicted_ms": 61.836, "predicted_per_token_ms": 20.612, "predicted_per_second": 48.51542790607414}, "tps": 48.51542790607414}, {"id": "22c281bb-e34e-4d52-b84d-0aac010d48a8", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 147, "prompt_ms": 105.113, "prompt_per_token_ms": 0.7150544217687075, "prompt_per_second": 1398.4949530505266, "predicted_n": 2, "predicted_ms": 32.369, "predicted_per_token_ms": 16.1845, "predicted_per_second": 61.787512743674505}, "tps": 61.787512743674505}, {"id": "afa21780-9fa1-48e8-87df-1c94b8dee028", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 101.515, "prompt_per_token_ms": 0.6953082191780822, "prompt_per_second": 1438.2111018076148, "predicted_n": 2, "predicted_ms": 30.799, "predicted_per_token_ms": 15.3995, "predicted_per_second": 64.9371732848469}, "tps": 64.9371732848469}, {"id": "3bc438da-9915-41ac-bca2-57db74a96db3", "answer": "BCD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 61, "prompt_n": 167, "prompt_ms": 103.967, "prompt_per_token_ms": 0.6225568862275449, "prompt_per_second": 1606.2789154250868, "predicted_n": 3, "predicted_ms": 63.187, "predicted_per_token_ms": 21.06233333333333, "predicted_per_second": 47.478120499469824}, "tps": 47.478120499469824}, {"id": "0f1c2b65-1831-4b26-8f1d-95ecc6ef4eb6", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 100.18, "prompt_per_token_ms": 0.7476119402985075, "prompt_per_second": 1337.5923337991615, "predicted_n": 2, "predicted_ms": 32.286, "predicted_per_token_ms": 16.143, "predicted_per_second": 61.9463544570402}, "tps": 61.9463544570402}, {"id": "589aabad-eb7d-4728-a9b0-a4b22bb339ea", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 165, "prompt_ms": 103.178, "prompt_per_token_ms": 0.6253212121212121, "prompt_per_second": 1599.1781193665315, "predicted_n": 2, "predicted_ms": 39.211, "predicted_per_token_ms": 19.6055, "predicted_per_second": 51.00609522837979}, "tps": 51.00609522837979}, {"id": "cf57834a-4128-45df-8f46-a24ee0e74cff", "answer": "AB", "llm_answer": "A", "score": 0, "topics": [], "timings": {"cache_n": 60, "prompt_n": 161, "prompt_ms": 108.063, "prompt_per_token_ms": 0.6711987577639752, "prompt_per_second": 1489.8716489455223, "predicted_n": 2, "predicted_ms": 30.48, "predicted_per_token_ms": 15.24, "predicted_per_second": 65.61679790026247}, "tps": 65.61679790026247}, {"id": "4275b9af-b012-4410-b9b3-f42621f2de2f", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 100.527, "prompt_per_token_ms": 0.6932896551724138, "prompt_per_second": 1442.3985595909558, "predicted_n": 2, "predicted_ms": 27.022, "predicted_per_token_ms": 13.511, "predicted_per_second": 74.01376656058027}, "tps": 74.01376656058027}, {"id": "bf270675-a5df-48ca-ada4-6faf02245711", "answer": "ABD", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 127, "prompt_ms": 92.755, "prompt_per_token_ms": 0.7303543307086614, "prompt_per_second": 1369.1984259608646, "predicted_n": 3, "predicted_ms": 61.874, "predicted_per_token_ms": 20.624666666666666, "predicted_per_second": 48.48563209102369}, "tps": 48.48563209102369}, {"id": "eda33693-5b7e-43bc-a401-24be17917cb2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 90, "prompt_ms": 95.187, "prompt_per_token_ms": 1.0576333333333332, "prompt_per_second": 945.5072646474833, "predicted_n": 2, "predicted_ms": 30.942, "predicted_per_token_ms": 15.471, "predicted_per_second": 64.6370628918622}, "tps": 64.6370628918622}, {"id": "7c646d5e-d9bf-4e12-ad42-4f82ba1d8f7e", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 101.25, "prompt_per_token_ms": 0.7232142857142857, "prompt_per_second": 1382.716049382716, "predicted_n": 2, "predicted_ms": 33.328, "predicted_per_token_ms": 16.664, "predicted_per_second": 60.009601536245796}, "tps": 60.009601536245796}, {"id": "17336d81-4618-494e-8de8-4355537b7d03", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 104.213, "prompt_per_token_ms": 0.7287622377622377, "prompt_per_second": 1372.1896500436608, "predicted_n": 3, "predicted_ms": 65.399, "predicted_per_token_ms": 21.799666666666667, "predicted_per_second": 45.872261043746846}, "tps": 45.872261043746846}, {"id": "2cb27832-f74a-4625-9f2c-31bb9a7b5504", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 103.852, "prompt_per_token_ms": 0.9109824561403509, "prompt_per_second": 1097.715980433694, "predicted_n": 2, "predicted_ms": 31.346, "predicted_per_token_ms": 15.673, "predicted_per_second": 63.80399413003254}, "tps": 63.80399413003254}, {"id": "55a583bb-01b2-4d45-ac51-596052e8b15b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 105, "prompt_ms": 97.417, "prompt_per_token_ms": 0.9277809523809524, "prompt_per_second": 1077.8406232998348, "predicted_n": 2, "predicted_ms": 31.686, "predicted_per_token_ms": 15.843, "predicted_per_second": 63.119358707315534}, "tps": 63.119358707315534}, {"id": "d6a8defe-b5dc-44f8-952b-9cf77a77d04e", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 116, "prompt_ms": 97.896, "prompt_per_token_ms": 0.8439310344827586, "prompt_per_second": 1184.930947127564, "predicted_n": 2, "predicted_ms": 33.923, "predicted_per_token_ms": 16.9615, "predicted_per_second": 58.95704978922854}, "tps": 58.95704978922854}, {"id": "2968f4d2-3a56-4a8b-b7ee-ad8deeeae1ba", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 96.782, "prompt_per_token_ms": 0.8132941176470588, "prompt_per_second": 1229.5674815564878, "predicted_n": 2, "predicted_ms": 30.605, "predicted_per_token_ms": 15.3025, "predicted_per_second": 65.34879921581441}, "tps": 65.34879921581441}, {"id": "192acf52-327f-49b5-8911-1023319d540f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 99.024, "prompt_per_token_ms": 0.7228029197080292, "prompt_per_second": 1383.5029891743416, "predicted_n": 2, "predicted_ms": 29.484, "predicted_per_token_ms": 14.742, "predicted_per_second": 67.83340116673449}, "tps": 67.83340116673449}, {"id": "70001d43-417d-4319-bb76-f10dc491424f", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 149, "prompt_ms": 89.95, "prompt_per_token_ms": 0.6036912751677852, "prompt_per_second": 1656.4758198999443, "predicted_n": 3, "predicted_ms": 60.913, "predicted_per_token_ms": 20.304333333333332, "predicted_per_second": 49.250570485774794}, "tps": 49.250570485774794}, {"id": "f5f8ce6f-e899-4307-aae0-9718eb718fe4", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 211, "prompt_ms": 106.43, "prompt_per_token_ms": 0.5044075829383886, "prompt_per_second": 1982.5237245137648, "predicted_n": 2, "predicted_ms": 32.996, "predicted_per_token_ms": 16.498, "predicted_per_second": 60.61340768578009}, "tps": 60.61340768578009}, {"id": "3cd590f5-79fe-4359-86e4-ecc1daf24fd0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 133, "prompt_ms": 106.42, "prompt_per_token_ms": 0.8001503759398496, "prompt_per_second": 1249.7650817515505, "predicted_n": 2, "predicted_ms": 30.921, "predicted_per_token_ms": 15.4605, "predicted_per_second": 64.68096115908283}, "tps": 64.68096115908283}, {"id": "a349458f-8fec-4e91-823f-b1b353d71b2e", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 106.62, "prompt_per_token_ms": 0.7670503597122302, "prompt_per_second": 1303.6953667229413, "predicted_n": 3, "predicted_ms": 66.426, "predicted_per_token_ms": 22.142, "predicted_per_second": 45.16303856923494}, "tps": 45.16303856923494}, {"id": "0adf29c3-fe2e-4ebe-a6fc-733d121b83cb", "answer": "AB", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 112, "prompt_ms": 105.121, "prompt_per_token_ms": 0.9385803571428571, "prompt_per_second": 1065.438875200959, "predicted_n": 2, "predicted_ms": 30.454, "predicted_per_token_ms": 15.227, "predicted_per_second": 65.67281802062126}, "tps": 65.67281802062126}, {"id": "ba657a5d-ff86-4c07-b811-72cdb8251a0a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 148, "prompt_ms": 104.786, "prompt_per_token_ms": 0.7080135135135135, "prompt_per_second": 1412.4024201706334, "predicted_n": 2, "predicted_ms": 31.961, "predicted_per_token_ms": 15.9805, "predicted_per_second": 62.576264822752734}, "tps": 62.576264822752734}, {"id": "53e2e30d-c519-4e06-a39e-d5ca07909e8b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 66, "prompt_n": 96, "prompt_ms": 99.038, "prompt_per_token_ms": 1.0316458333333334, "prompt_per_second": 969.3249055917931, "predicted_n": 2, "predicted_ms": 31.735, "predicted_per_token_ms": 15.8675, "predicted_per_second": 63.02190011028833}, "tps": 63.02190011028833}, {"id": "7ba0ce3b-d307-4d23-8880-355679c44b07", "answer": "BCD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 108, "prompt_ms": 98.628, "prompt_per_token_ms": 0.9132222222222223, "prompt_per_second": 1095.0237255140528, "predicted_n": 3, "predicted_ms": 58.399, "predicted_per_token_ms": 19.466333333333335, "predicted_per_second": 51.370742649702905}, "tps": 51.370742649702905}, {"id": "4c68cbfe-ccdb-4750-993d-8009d3fbf312", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 93.826, "prompt_per_token_ms": 0.7628130081300812, "prompt_per_second": 1310.9372668556691, "predicted_n": 2, "predicted_ms": 27.258, "predicted_per_token_ms": 13.629, "predicted_per_second": 73.37295472888694}, "tps": 73.37295472888694}, {"id": "977186bb-ac66-419f-bf6d-620202754434", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 159, "prompt_ms": 94.197, "prompt_per_token_ms": 0.592433962264151, "prompt_per_second": 1687.9518456001783, "predicted_n": 2, "predicted_ms": 31.737, "predicted_per_token_ms": 15.8685, "predicted_per_second": 63.0179286006869}, "tps": 63.0179286006869}, {"id": "57a481a9-c135-40c5-a3fe-6b4fe04d5262", "answer": "CD", "llm_answer": "D", "score": 0, "topics": ["Cryptography", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 74, "prompt_ms": 96.485, "prompt_per_token_ms": 1.3038513513513514, "prompt_per_second": 766.958594600197, "predicted_n": 2, "predicted_ms": 30.226, "predicted_per_token_ms": 15.113, "predicted_per_second": 66.16819956328989}, "tps": 66.16819956328989}, {"id": "5e9cb083-a849-40e8-bf66-0174b4c23700", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 222, "prompt_ms": 110.365, "prompt_per_token_ms": 0.49713963963963964, "prompt_per_second": 2011.5072713269606, "predicted_n": 2, "predicted_ms": 30.396, "predicted_per_token_ms": 15.198, "predicted_per_second": 65.79813133307015}, "tps": 65.79813133307015}, {"id": "24f8a0d0-f6f6-49ba-ad49-59bf8b03f850", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 109, "prompt_ms": 98.827, "prompt_per_token_ms": 0.9066697247706422, "prompt_per_second": 1102.9374563631395, "predicted_n": 2, "predicted_ms": 31.452, "predicted_per_token_ms": 15.726, "predicted_per_second": 63.58896095637797}, "tps": 63.58896095637797}, {"id": "da6ec626-8e86-4f67-ae1a-0b1bb60052b7", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 99.93, "prompt_per_token_ms": 0.6245625, "prompt_per_second": 1601.1207845491842, "predicted_n": 2, "predicted_ms": 30.398, "predicted_per_token_ms": 15.199, "predicted_per_second": 65.79380222383051}, "tps": 65.79380222383051}, {"id": "edc85f6a-29f2-422a-adb8-270b3cba7e7c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 104, "prompt_ms": 99.302, "prompt_per_token_ms": 0.9548269230769232, "prompt_per_second": 1047.3102253731042, "predicted_n": 3, "predicted_ms": 63.329, "predicted_per_token_ms": 21.109666666666666, "predicted_per_second": 47.371662271629106}, "tps": 47.371662271629106}, {"id": "4ba6c69e-6b1b-4f37-932f-96b6ac965fe5", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 137, "prompt_ms": 102.178, "prompt_per_token_ms": 0.7458248175182481, "prompt_per_second": 1340.7974319325099, "predicted_n": 2, "predicted_ms": 33.672, "predicted_per_token_ms": 16.836, "predicted_per_second": 59.39653124257544}, "tps": 59.39653124257544}, {"id": "be82160c-a200-422c-91b0-e73281ecb7cd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 104.098, "prompt_per_token_ms": 0.6588481012658228, "prompt_per_second": 1517.8005341120866, "predicted_n": 2, "predicted_ms": 33.382, "predicted_per_token_ms": 16.691, "predicted_per_second": 59.91252770954407}, "tps": 59.91252770954407}, {"id": "ca1cff29-97b9-4dd4-a4eb-7236c83c4c2b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 104.29, "prompt_per_token_ms": 0.7242361111111112, "prompt_per_second": 1380.7651740339438, "predicted_n": 2, "predicted_ms": 27.181, "predicted_per_token_ms": 13.5905, "predicted_per_second": 73.58081012471948}, "tps": 73.58081012471948}, {"id": "9ae30129-74e9-4a1f-b059-ed54385c6ea7", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 119, "prompt_ms": 94.355, "prompt_per_token_ms": 0.7928991596638656, "prompt_per_second": 1261.1944253086747, "predicted_n": 2, "predicted_ms": 31.587, "predicted_per_token_ms": 15.7935, "predicted_per_second": 63.317187450533446}, "tps": 63.317187450533446}, {"id": "9a3242a9-fee3-4e87-842c-4533388282c3", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 152, "prompt_ms": 99.246, "prompt_per_token_ms": 0.6529342105263157, "prompt_per_second": 1531.54787094694, "predicted_n": 2, "predicted_ms": 31.801, "predicted_per_token_ms": 15.9005, "predicted_per_second": 62.89110405333166}, "tps": 62.89110405333166}, {"id": "b7092610-90ee-4f6b-ac4a-bbd4b63a041e", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 126, "prompt_ms": 105.166, "prompt_per_token_ms": 0.8346507936507936, "prompt_per_second": 1198.1058517011202, "predicted_n": 4, "predicted_ms": 96.148, "predicted_per_token_ms": 24.037, "predicted_per_second": 41.60252943378958}, "tps": 41.60252943378958}, {"id": "3155668b-347a-40c6-b5b1-7fea8e0865f5", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 163, "prompt_ms": 105.396, "prompt_per_token_ms": 0.646601226993865, "prompt_per_second": 1546.5482561008007, "predicted_n": 2, "predicted_ms": 34.863, "predicted_per_token_ms": 17.4315, "predicted_per_second": 57.36740957462066}, "tps": 57.36740957462066}, {"id": "eaffa54e-eaa7-413a-b963-9eae83de3350", "answer": "B", "llm_answer": "BD", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 96, "prompt_ms": 101.782, "prompt_per_token_ms": 1.0602291666666666, "prompt_per_second": 943.1923129826494, "predicted_n": 3, "predicted_ms": 65.406, "predicted_per_token_ms": 21.802000000000003, "predicted_per_second": 45.86735161911751}, "tps": 45.86735161911751}, {"id": "806c5a1a-4536-476a-84bd-9a66a80a4d32", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 125, "prompt_ms": 105.644, "prompt_per_token_ms": 0.845152, "prompt_per_second": 1183.2191132482676, "predicted_n": 64, "predicted_ms": 1920.705, "predicted_per_token_ms": 30.011015625, "predicted_per_second": 33.32109824257239}, "tps": 33.32109824257239}, {"id": "5d2edc03-7815-44e7-928b-98081c6345d6", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 79, "prompt_ms": 94.806, "prompt_per_token_ms": 1.2000759493670885, "prompt_per_second": 833.2805940552286, "predicted_n": 2, "predicted_ms": 29.283, "predicted_per_token_ms": 14.6415, "predicted_per_second": 68.299013079261}, "tps": 68.299013079261}, {"id": "cee3fd3d-8891-4128-8e01-52b5af432583", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 102.872, "prompt_per_token_ms": 0.8296129032258065, "prompt_per_second": 1205.3814449024028, "predicted_n": 2, "predicted_ms": 32.194, "predicted_per_token_ms": 16.097, "predicted_per_second": 62.123377026775174}, "tps": 62.123377026775174}, {"id": "b35ebd47-4fc7-44b4-9021-4878e8b98360", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 138, "prompt_ms": 106.629, "prompt_per_token_ms": 0.7726739130434783, "prompt_per_second": 1294.2070168528262, "predicted_n": 2, "predicted_ms": 32.038, "predicted_per_token_ms": 16.019, "predicted_per_second": 62.42586928022973}, "tps": 62.42586928022973}, {"id": "5e666236-8a3f-4897-b02e-03c7c658941e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 136, "prompt_ms": 101.545, "prompt_per_token_ms": 0.7466544117647059, "prompt_per_second": 1339.3076960953272, "predicted_n": 2, "predicted_ms": 31.659, "predicted_per_token_ms": 15.8295, "predicted_per_second": 63.173189298461736}, "tps": 63.173189298461736}, {"id": "854b136a-5774-471f-b03d-53d29c80e631", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 151, "prompt_ms": 101.408, "prompt_per_token_ms": 0.6715761589403973, "prompt_per_second": 1489.0343957084253, "predicted_n": 2, "predicted_ms": 33.707, "predicted_per_token_ms": 16.8535, "predicted_per_second": 59.334856261310705}, "tps": 59.334856261310705}, {"id": "9e1b71c6-ec25-43f4-bcd2-69547335e865", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 74, "prompt_ms": 97.763, "prompt_per_token_ms": 1.3211216216216217, "prompt_per_second": 756.9325818561214, "predicted_n": 2, "predicted_ms": 32.508, "predicted_per_token_ms": 16.254, "predicted_per_second": 61.52331733727082}, "tps": 61.52331733727082}, {"id": "4b8aa558-b4d3-43b8-b737-fd59998bc4e1", "answer": "ABD", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 99.598, "prompt_per_token_ms": 0.8299833333333333, "prompt_per_second": 1204.8434707524248, "predicted_n": 4, "predicted_ms": 90.179, "predicted_per_token_ms": 22.54475, "predicted_per_second": 44.35622484170372}, "tps": 44.35622484170372}, {"id": "1f20d072-5bf5-4364-951e-f04239c18fed", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 119, "prompt_ms": 98.19, "prompt_per_token_ms": 0.8251260504201681, "prompt_per_second": 1211.93604236684, "predicted_n": 2, "predicted_ms": 27.362, "predicted_per_token_ms": 13.681, "predicted_per_second": 73.09407207075506}, "tps": 73.09407207075506}, {"id": "268ea10f-2705-4776-bc9c-7876ec0cfd48", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 115, "prompt_ms": 96.654, "prompt_per_token_ms": 0.8404695652173912, "prompt_per_second": 1189.8110786930702, "predicted_n": 2, "predicted_ms": 27.052, "predicted_per_token_ms": 13.526, "predicted_per_second": 73.93168712110011}, "tps": 73.93168712110011}, {"id": "6c460791-e27e-4c70-b39a-6ed5928a3e15", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 126, "prompt_ms": 100.934, "prompt_per_token_ms": 0.801063492063492, "prompt_per_second": 1248.3404997324985, "predicted_n": 2, "predicted_ms": 29.589, "predicted_per_token_ms": 14.7945, "predicted_per_second": 67.5926864713238}, "tps": 67.5926864713238}, {"id": "66c93881-9f22-48dd-98f4-220b1086b158", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 180, "prompt_ms": 102.841, "prompt_per_token_ms": 0.5713388888888888, "prompt_per_second": 1750.2746958897717, "predicted_n": 2, "predicted_ms": 30.955, "predicted_per_token_ms": 15.4775, "predicted_per_second": 64.60991762235504}, "tps": 64.60991762235504}, {"id": "9ca5bb8a-9b23-4f13-b85e-7f92d60f5350", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 91, "prompt_ms": 94.54, "prompt_per_token_ms": 1.038901098901099, "prompt_per_second": 962.5555320499259, "predicted_n": 2, "predicted_ms": 31.144, "predicted_per_token_ms": 15.572, "predicted_per_second": 64.21782686873877}, "tps": 64.21782686873877}, {"id": "d447ab35-4cc7-4b9f-8751-a6c15a871414", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 97.521, "prompt_per_token_ms": 0.7993524590163934, "prompt_per_second": 1251.012602413839, "predicted_n": 2, "predicted_ms": 31.761, "predicted_per_token_ms": 15.8805, "predicted_per_second": 62.97030949907119}, "tps": 62.97030949907119}, {"id": "fe6dd28b-9b53-4e30-85b3-e48b1d38abf6", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 179, "prompt_ms": 102.986, "prompt_per_token_ms": 0.5753407821229051, "prompt_per_second": 1738.1003243159264, "predicted_n": 2, "predicted_ms": 31.635, "predicted_per_token_ms": 15.8175, "predicted_per_second": 63.221115852694794}, "tps": 63.221115852694794}, {"id": "bc4d9c17-9a00-40f2-a2ea-2730a5f04b0f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 172, "prompt_ms": 102.229, "prompt_per_token_ms": 0.5943546511627907, "prompt_per_second": 1682.4971387766682, "predicted_n": 2, "predicted_ms": 31.856, "predicted_per_token_ms": 15.928, "predicted_per_second": 62.78252134605725}, "tps": 62.78252134605725}, {"id": "1e6fca53-ff35-499c-8713-2a3d108c6f91", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 156, "prompt_ms": 103.749, "prompt_per_token_ms": 0.6650576923076923, "prompt_per_second": 1503.628950640488, "predicted_n": 2, "predicted_ms": 30.231, "predicted_per_token_ms": 15.1155, "predicted_per_second": 66.15725579702953}, "tps": 66.15725579702953}, {"id": "43747b4d-cafe-49da-90f4-04d658120cf3", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 123, "prompt_ms": 98.918, "prompt_per_token_ms": 0.8042113821138211, "prompt_per_second": 1243.4541741644593, "predicted_n": 3, "predicted_ms": 59.677, "predicted_per_token_ms": 19.892333333333333, "predicted_per_second": 50.27062352330043}, "tps": 50.27062352330043}, {"id": "6496b772-b3c9-4f60-95a3-f8f3fc6464d3", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 152, "prompt_ms": 91.441, "prompt_per_token_ms": 0.6015855263157895, "prompt_per_second": 1662.2740346234184, "predicted_n": 2, "predicted_ms": 28.222, "predicted_per_token_ms": 14.111, "predicted_per_second": 70.86669973779321}, "tps": 70.86669973779321}, {"id": "af068a73-4330-49be-b6f7-ffdf03cdc260", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 110, "prompt_ms": 98.211, "prompt_per_token_ms": 0.8928272727272727, "prompt_per_second": 1120.0374703444625, "predicted_n": 4, "predicted_ms": 90.148, "predicted_per_token_ms": 22.537, "predicted_per_second": 44.371478013932645}, "tps": 44.371478013932645}, {"id": "366a404d-5fd0-4364-9e3d-b9ec8a563af4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 192, "prompt_ms": 102.446, "prompt_per_token_ms": 0.5335729166666666, "prompt_per_second": 1874.1580930441405, "predicted_n": 2, "predicted_ms": 29.614, "predicted_per_token_ms": 14.807, "predicted_per_second": 67.53562504220976}, "tps": 67.53562504220976}, {"id": "cca3a87b-646e-4a36-8fb1-a866ddb7d7cd", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 115, "prompt_ms": 99.12, "prompt_per_token_ms": 0.8619130434782609, "prompt_per_second": 1160.2098466505245, "predicted_n": 2, "predicted_ms": 30.38, "predicted_per_token_ms": 15.19, "predicted_per_second": 65.83278472679395}, "tps": 65.83278472679395}, {"id": "9a748c8b-199e-485e-a244-e4c30a03a72b", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 119, "prompt_ms": 98.31, "prompt_per_token_ms": 0.8261344537815126, "prompt_per_second": 1210.4567185433832, "predicted_n": 3, "predicted_ms": 63.589, "predicted_per_token_ms": 21.19633333333333, "predicted_per_second": 47.177971032725786}, "tps": 47.177971032725786}, {"id": "454b2de5-944b-42fc-adf7-4219365faedc", "answer": "CD", "llm_answer": "CD", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 100.988, "prompt_per_token_ms": 0.7425588235294117, "prompt_per_second": 1346.6946567909058, "predicted_n": 4, "predicted_ms": 90.601, "predicted_per_token_ms": 22.65025, "predicted_per_second": 44.149623072593016}, "tps": 44.149623072593016}, {"id": "25e7b4c9-dd6d-489e-b2ec-89c6c4c5171c", "answer": "BD", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 144, "prompt_ms": 100.654, "prompt_per_token_ms": 0.6989861111111111, "prompt_per_second": 1430.6435909154134, "predicted_n": 3, "predicted_ms": 58.776, "predicted_per_token_ms": 19.592000000000002, "predicted_per_second": 51.04124132298897}, "tps": 51.04124132298897}, {"id": "fb409d74-45c5-4518-bc5e-c762a4e028cc", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 194, "prompt_ms": 103.661, "prompt_per_token_ms": 0.5343350515463917, "prompt_per_second": 1871.4849364756271, "predicted_n": 3, "predicted_ms": 58.695, "predicted_per_token_ms": 19.565, "predicted_per_second": 51.111679018655764}, "tps": 51.111679018655764}, {"id": "3af4cf94-dd22-43d3-8e06-38707a7301fb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 138, "prompt_ms": 87.648, "prompt_per_token_ms": 0.6351304347826087, "prompt_per_second": 1574.4797371303396, "predicted_n": 2, "predicted_ms": 28.699, "predicted_per_token_ms": 14.3495, "predicted_per_second": 69.68883933238091}, "tps": 69.68883933238091}, {"id": "af1daf0f-470b-4435-9203-b16a9a82e14f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 156, "prompt_ms": 98.225, "prompt_per_token_ms": 0.6296474358974359, "prompt_per_second": 1588.1903792313567, "predicted_n": 2, "predicted_ms": 29.088, "predicted_per_token_ms": 14.544, "predicted_per_second": 68.75687568756875}, "tps": 68.75687568756875}, {"id": "34ea72d4-4a71-4096-9fd8-6807cfabf3b4", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 135, "prompt_ms": 98.312, "prompt_per_token_ms": 0.728237037037037, "prompt_per_second": 1373.179266010253, "predicted_n": 2, "predicted_ms": 32.972, "predicted_per_token_ms": 16.486, "predicted_per_second": 60.657527599175054}, "tps": 60.657527599175054}, {"id": "105822e5-7c4b-4da9-bb0c-fd9d44bc202e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 156, "prompt_ms": 99.04, "prompt_per_token_ms": 0.6348717948717949, "prompt_per_second": 1575.121163166397, "predicted_n": 2, "predicted_ms": 29.128, "predicted_per_token_ms": 14.564, "predicted_per_second": 68.66245536940401}, "tps": 68.66245536940401}, {"id": "9d0a6e8b-314c-4d5e-8ddc-5fa0f728df06", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 132, "prompt_ms": 98.263, "prompt_per_token_ms": 0.7444166666666667, "prompt_per_second": 1343.333706481585, "predicted_n": 2, "predicted_ms": 33.932, "predicted_per_token_ms": 16.966, "predicted_per_second": 58.94141223623718}, "tps": 58.94141223623718}, {"id": "d7764627-cf8e-406b-8c06-5fc6955e0c04", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 134, "prompt_ms": 100.913, "prompt_per_token_ms": 0.7530820895522388, "prompt_per_second": 1327.8764876675948, "predicted_n": 2, "predicted_ms": 30.195, "predicted_per_token_ms": 15.0975, "predicted_per_second": 66.2361318099023}, "tps": 66.2361318099023}, {"id": "1e347a00-b414-40db-b0b3-2163302a2228", "answer": "ACD", "llm_answer": "AD", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 131, "prompt_ms": 99.565, "prompt_per_token_ms": 0.7600381679389313, "prompt_per_second": 1315.7233967759755, "predicted_n": 2, "predicted_ms": 30.184, "predicted_per_token_ms": 15.092, "predicted_per_second": 66.260270341903}, "tps": 66.260270341903}, {"id": "42377318-0c16-4ac5-9ba1-dcc430033311", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 97.792, "prompt_per_token_ms": 0.7190588235294118, "prompt_per_second": 1390.7068062827225, "predicted_n": 2, "predicted_ms": 30.659, "predicted_per_token_ms": 15.3295, "predicted_per_second": 65.23369972928015}, "tps": 65.23369972928015}, {"id": "dd7c8d58-e64c-4bcd-b427-b4e26879ab4c", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 61, "prompt_n": 204, "prompt_ms": 102.726, "prompt_per_token_ms": 0.5035588235294117, "prompt_per_second": 1985.8653116056305, "predicted_n": 2, "predicted_ms": 28.938, "predicted_per_token_ms": 14.469, "predicted_per_second": 69.11327666044647}, "tps": 69.11327666044647}, {"id": "986dd692-46a9-42e2-80b1-1eb9b33d8b9b", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["Cryptography", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 187, "prompt_ms": 97.037, "prompt_per_token_ms": 0.5189144385026738, "prompt_per_second": 1927.0999721755618, "predicted_n": 2, "predicted_ms": 26.945, "predicted_per_token_ms": 13.4725, "predicted_per_second": 74.2252737056968}, "tps": 74.2252737056968}, {"id": "c7e8b762-983f-4313-ab0f-1f6916b9401e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 183, "prompt_ms": 100.519, "prompt_per_token_ms": 0.5492841530054645, "prompt_per_second": 1820.5513385529102, "predicted_n": 2, "predicted_ms": 31.851, "predicted_per_token_ms": 15.9255, "predicted_per_second": 62.79237700543154}, "tps": 62.79237700543154}, {"id": "faa37ace-d327-4b50-b10b-8d1350d8a9dd", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 102.98, "prompt_per_token_ms": 0.7982945736434108, "prompt_per_second": 1252.6704214410565, "predicted_n": 2, "predicted_ms": 30.456, "predicted_per_token_ms": 15.228, "predicted_per_second": 65.66850538481744}, "tps": 65.66850538481744}, {"id": "02f7c988-1486-4d26-b19b-28ca2696b03d", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 121, "prompt_ms": 100.966, "prompt_per_token_ms": 0.8344297520661157, "prompt_per_second": 1198.4232315829092, "predicted_n": 2, "predicted_ms": 29.878, "predicted_per_token_ms": 14.939, "predicted_per_second": 66.93888479817926}, "tps": 66.93888479817926}, {"id": "7aa82a4c-78e6-4344-a8bb-8f5fc9aaba4d", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 159, "prompt_ms": 97.823, "prompt_per_token_ms": 0.6152389937106918, "prompt_per_second": 1625.3846232481114, "predicted_n": 2, "predicted_ms": 28.977, "predicted_per_token_ms": 14.4885, "predicted_per_second": 69.02025744556028}, "tps": 69.02025744556028}, {"id": "15e0a70e-dfd1-4cfb-b12a-fc4ec48d8f39", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 100, "prompt_ms": 95.512, "prompt_per_token_ms": 0.95512, "prompt_per_second": 1046.9888600385293, "predicted_n": 2, "predicted_ms": 30.355, "predicted_per_token_ms": 15.1775, "predicted_per_second": 65.88700378850271}, "tps": 65.88700378850271}, {"id": "cb526c66-f831-4d12-8a5f-b5239a565439", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 96.703, "prompt_per_token_ms": 0.7798629032258064, "prompt_per_second": 1282.2766615306662, "predicted_n": 2, "predicted_ms": 29.303, "predicted_per_token_ms": 14.6515, "predicted_per_second": 68.25239736545745}, "tps": 68.25239736545745}, {"id": "23948b6e-ab54-46dc-8584-952329224767", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 124, "prompt_ms": 98.754, "prompt_per_token_ms": 0.7964032258064516, "prompt_per_second": 1255.6453409482147, "predicted_n": 2, "predicted_ms": 29.51, "predicted_per_token_ms": 14.755, "predicted_per_second": 67.77363605557437}, "tps": 67.77363605557437}, {"id": "51e9a4bd-7308-458f-b975-3e2e8d1e3bd0", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 238, "prompt_ms": 109.708, "prompt_per_token_ms": 0.4609579831932773, "prompt_per_second": 2169.395121595508, "predicted_n": 2, "predicted_ms": 29.578, "predicted_per_token_ms": 14.789, "predicted_per_second": 67.6178240584218}, "tps": 67.6178240584218}, {"id": "e762581e-074d-4a5c-baf8-6995f9e8875f", "answer": "ABC", "llm_answer": "AC", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 191, "prompt_ms": 103.954, "prompt_per_token_ms": 0.544261780104712, "prompt_per_second": 1837.3511360794198, "predicted_n": 2, "predicted_ms": 29.355, "predicted_per_token_ms": 14.6775, "predicted_per_second": 68.13149378300119}, "tps": 68.13149378300119}, {"id": "d4d7f004-d738-42b9-82e3-b3fee62a9151", "answer": "ABC", "llm_answer": "BC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 86.686, "prompt_per_token_ms": 0.5159880952380953, "prompt_per_second": 1938.0292088687906, "predicted_n": 3, "predicted_ms": 57.947, "predicted_per_token_ms": 19.31566666666667, "predicted_per_second": 51.771446321638734}, "tps": 51.771446321638734}, {"id": "a8ada9ff-9197-41b7-8234-2752cb1478db", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 205, "prompt_ms": 102.033, "prompt_per_token_ms": 0.4977219512195122, "prompt_per_second": 2009.1539011888308, "predicted_n": 2, "predicted_ms": 30.241, "predicted_per_token_ms": 15.1205, "predicted_per_second": 66.13537912106081}, "tps": 66.13537912106081}, {"id": "a374b7bf-874a-4574-a9ba-2751de339d25", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 154, "prompt_ms": 97.921, "prompt_per_token_ms": 0.6358506493506494, "prompt_per_second": 1572.6963572675934, "predicted_n": 2, "predicted_ms": 30.824, "predicted_per_token_ms": 15.412, "predicted_per_second": 64.88450558006748}, "tps": 64.88450558006748}, {"id": "0bba4052-289a-4f87-823b-eaada6c5f012", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 160, "prompt_ms": 98.346, "prompt_per_token_ms": 0.6146625, "prompt_per_second": 1626.9090761190082, "predicted_n": 2, "predicted_ms": 30.53, "predicted_per_token_ms": 15.265, "predicted_per_second": 65.50933508024893}, "tps": 65.50933508024893}, {"id": "919d0a27-6a3c-4063-b2ad-66f92d35eea2", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 163, "prompt_ms": 98.887, "prompt_per_token_ms": 0.6066687116564418, "prompt_per_second": 1648.3460920040047, "predicted_n": 2, "predicted_ms": 29.923, "predicted_per_token_ms": 14.9615, "predicted_per_second": 66.83821809310564}, "tps": 66.83821809310564}, {"id": "e326358b-6079-42a8-8437-e32150e8c806", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 98.117, "prompt_per_token_ms": 0.8315, "prompt_per_second": 1202.6458208057725, "predicted_n": 4, "predicted_ms": 89.218, "predicted_per_token_ms": 22.3045, "predicted_per_second": 44.834002107198096}, "tps": 44.834002107198096}, {"id": "f9869177-179a-4150-8fd6-830812576aca", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 145, "prompt_ms": 98.842, "prompt_per_token_ms": 0.6816689655172413, "prompt_per_second": 1466.9877177717974, "predicted_n": 2, "predicted_ms": 29.466, "predicted_per_token_ms": 14.733, "predicted_per_second": 67.87483879725785}, "tps": 67.87483879725785}, {"id": "c9be498f-86c1-4b0b-aacf-b346b3561c2e", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["WebSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 149, "prompt_ms": 101.227, "prompt_per_token_ms": 0.6793758389261745, "prompt_per_second": 1471.939304730951, "predicted_n": 2, "predicted_ms": 30.137, "predicted_per_token_ms": 15.0685, "predicted_per_second": 66.36360619836081}, "tps": 66.36360619836081}, {"id": "22f5c4c3-c734-405b-9277-ea18167a69bd", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 64, "prompt_n": 152, "prompt_ms": 99.765, "prompt_per_token_ms": 0.6563486842105263, "prompt_per_second": 1523.5804139728361, "predicted_n": 2, "predicted_ms": 30.353, "predicted_per_token_ms": 15.1765, "predicted_per_second": 65.89134517181168}, "tps": 65.89134517181168}, {"id": "b8b79f91-e47a-410c-a39f-36017e3f7e49", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 151, "prompt_ms": 85.347, "prompt_per_token_ms": 0.5652119205298013, "prompt_per_second": 1769.2478938919942, "predicted_n": 2, "predicted_ms": 26.845, "predicted_per_token_ms": 13.4225, "predicted_per_second": 74.50176941702365}, "tps": 74.50176941702365}, {"id": "d545a0e0-9301-4b6f-8cd1-8d8e3726561d", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 158, "prompt_ms": 96.532, "prompt_per_token_ms": 0.6109620253164557, "prompt_per_second": 1636.762938714623, "predicted_n": 2, "predicted_ms": 29.319, "predicted_per_token_ms": 14.6595, "predicted_per_second": 68.21515058494492}, "tps": 68.21515058494492}, {"id": "4d46e207-22b3-4db9-9484-987b3e899b62", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 120, "prompt_ms": 96.981, "prompt_per_token_ms": 0.808175, "prompt_per_second": 1237.3557707179757, "predicted_n": 2, "predicted_ms": 29.185, "predicted_per_token_ms": 14.5925, "predicted_per_second": 68.5283536063046}, "tps": 68.5283536063046}, {"id": "648bfd55-f095-4da7-82e0-d53a5a92b60a", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 102.205, "prompt_per_token_ms": 0.7922868217054263, "prompt_per_second": 1262.1691698057825, "predicted_n": 2, "predicted_ms": 33.181, "predicted_per_token_ms": 16.5905, "predicted_per_second": 60.275458846930476}, "tps": 60.275458846930476}, {"id": "04ae0721-b964-4921-bffc-dce0a7c99de1", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 74, "prompt_ms": 91.16, "prompt_per_token_ms": 1.2318918918918917, "prompt_per_second": 811.7595436594999, "predicted_n": 2, "predicted_ms": 30.743, "predicted_per_token_ms": 15.3715, "predicted_per_second": 65.055459779462}, "tps": 65.055459779462}, {"id": "513cb7db-09db-4ca2-8013-de2e303f4d2a", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 174, "prompt_ms": 100.588, "prompt_per_token_ms": 0.5780919540229885, "prompt_per_second": 1729.8286077862172, "predicted_n": 2, "predicted_ms": 31.845, "predicted_per_token_ms": 15.9225, "predicted_per_second": 62.80420788192809}, "tps": 62.80420788192809}, {"id": "f725c3e7-99fe-4eb2-b589-97d7c43de1b4", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 134, "prompt_ms": 98.378, "prompt_per_token_ms": 0.7341641791044776, "prompt_per_second": 1362.0931509077234, "predicted_n": 2, "predicted_ms": 29.052, "predicted_per_token_ms": 14.526, "predicted_per_second": 68.84207627702051}, "tps": 68.84207627702051}, {"id": "9a0f92ea-11dd-42e8-9a0f-73655a4b44c6", "answer": "ABD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 139, "prompt_ms": 98.674, "prompt_per_token_ms": 0.709884892086331, "prompt_per_second": 1408.6790846626263, "predicted_n": 2, "predicted_ms": 28.543, "predicted_per_token_ms": 14.2715, "predicted_per_second": 70.06971937077392}, "tps": 70.06971937077392}, {"id": "948cd20f-b671-4b29-9fc8-016120ca4b43", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 158, "prompt_ms": 99.123, "prompt_per_token_ms": 0.6273607594936709, "prompt_per_second": 1593.979197562624, "predicted_n": 2, "predicted_ms": 29.57, "predicted_per_token_ms": 14.785, "predicted_per_second": 67.63611768684477}, "tps": 67.63611768684477}, {"id": "646c5447-9f6f-470e-9eb6-c8486f9e5a8f", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 137, "prompt_ms": 98.295, "prompt_per_token_ms": 0.7174817518248175, "prompt_per_second": 1393.7636705834477, "predicted_n": 2, "predicted_ms": 30.95, "predicted_per_token_ms": 15.475, "predicted_per_second": 64.62035541195476}, "tps": 64.62035541195476}, {"id": "d9d13637-21dc-4000-875f-6f72afde27d0", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 126, "prompt_ms": 86.46, "prompt_per_token_ms": 0.6861904761904761, "prompt_per_second": 1457.3213046495491, "predicted_n": 4, "predicted_ms": 87.981, "predicted_per_token_ms": 21.99525, "predicted_per_second": 45.464361623532355}, "tps": 45.464361623532355}, {"id": "719a5258-b425-42e2-9d02-cd098614aee7", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 101.061, "prompt_per_token_ms": 0.6162256097560976, "prompt_per_second": 1622.7822800091035, "predicted_n": 2, "predicted_ms": 31.291, "predicted_per_token_ms": 15.6455, "predicted_per_second": 63.91614202166757}, "tps": 63.91614202166757}, {"id": "afd33e5f-4696-4fbf-b4d7-ae6aba9b7fa4", "answer": "AD", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 151, "prompt_ms": 101.228, "prompt_per_token_ms": 0.6703841059602649, "prompt_per_second": 1491.6821432805152, "predicted_n": 3, "predicted_ms": 60.8, "predicted_per_token_ms": 20.266666666666666, "predicted_per_second": 49.342105263157904}, "tps": 49.342105263157904}, {"id": "56b21f93-be65-48d1-b336-54c1ec370422", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 163, "prompt_ms": 101.124, "prompt_per_token_ms": 0.6203926380368098, "prompt_per_second": 1611.8824413591235, "predicted_n": 2, "predicted_ms": 30.368, "predicted_per_token_ms": 15.184, "predicted_per_second": 65.85879873551107}, "tps": 65.85879873551107}, {"id": "fec2f025-f550-46f0-9f36-9b3407e90261", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 129, "prompt_ms": 99.554, "prompt_per_token_ms": 0.7717364341085271, "prompt_per_second": 1295.77917512104, "predicted_n": 2, "predicted_ms": 30.33, "predicted_per_token_ms": 15.165, "predicted_per_second": 65.94131223211342}, "tps": 65.94131223211342}, {"id": "59a857ab-e667-4667-bac2-792ba6779342", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 95.456, "prompt_per_token_ms": 0.909104761904762, "prompt_per_second": 1099.9832383506537, "predicted_n": 2, "predicted_ms": 29.249, "predicted_per_token_ms": 14.6245, "predicted_per_second": 68.37840609935382}, "tps": 68.37840609935382}, {"id": "5cfaacc9-87f6-4a22-a85d-3e62a2b58401", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 124, "prompt_ms": 97.745, "prompt_per_token_ms": 0.7882661290322581, "prompt_per_second": 1268.60708987672, "predicted_n": 2, "predicted_ms": 29.92, "predicted_per_token_ms": 14.96, "predicted_per_second": 66.84491978609626}, "tps": 66.84491978609626}, {"id": "03f527f5-b6a0-42f2-b679-031a1aded62b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 87, "prompt_ms": 98.231, "prompt_per_token_ms": 1.1290919540229885, "prompt_per_second": 885.6674573199907, "predicted_n": 2, "predicted_ms": 30.652, "predicted_per_token_ms": 15.326, "predicted_per_second": 65.24859715516116}, "tps": 65.24859715516116}, {"id": "64833db9-4441-4a43-be12-3bc3d6a64826", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 158, "prompt_ms": 101.0, "prompt_per_token_ms": 0.6392405063291139, "prompt_per_second": 1564.3564356435643, "predicted_n": 2, "predicted_ms": 29.995, "predicted_per_token_ms": 14.9975, "predicted_per_second": 66.67777962993831}, "tps": 66.67777962993831}, {"id": "db155e64-0c03-4825-9f3b-f82222dedeec", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 78, "prompt_ms": 84.09, "prompt_per_token_ms": 1.0780769230769232, "prompt_per_second": 927.5775954334641, "predicted_n": 2, "predicted_ms": 26.438, "predicted_per_token_ms": 13.219, "predicted_per_second": 75.64868749527196}, "tps": 75.64868749527196}, {"id": "63369784-7b0f-4cfe-90a1-af5dfaa8c70e", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 58, "prompt_n": 107, "prompt_ms": 92.227, "prompt_per_token_ms": 0.8619345794392523, "prompt_per_second": 1160.1808581001224, "predicted_n": 2, "predicted_ms": 31.15, "predicted_per_token_ms": 15.575, "predicted_per_second": 64.20545746388443}, "tps": 64.20545746388443}, {"id": "01ae4df4-cd01-4e2a-b797-396f59f49072", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 98.652, "prompt_per_token_ms": 0.7647441860465116, "prompt_per_second": 1307.626809390585, "predicted_n": 2, "predicted_ms": 30.434, "predicted_per_token_ms": 15.217, "predicted_per_second": 65.71597555365709}, "tps": 65.71597555365709}, {"id": "e6692719-ad61-4584-a59e-4e13dd9292d9", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 99.189, "prompt_per_token_ms": 0.7402164179104477, "prompt_per_second": 1350.9562552299146, "predicted_n": 4, "predicted_ms": 93.849, "predicted_per_token_ms": 23.46225, "predicted_per_second": 42.6216581956121}, "tps": 42.6216581956121}, {"id": "8233324b-61d2-4ae9-8493-1ec519ddff9b", "answer": "ABD", "llm_answer": "AB", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 112, "prompt_ms": 95.748, "prompt_per_token_ms": 0.8548928571428572, "prompt_per_second": 1169.7372268872457, "predicted_n": 2, "predicted_ms": 29.012, "predicted_per_token_ms": 14.506, "predicted_per_second": 68.93699158968703}, "tps": 68.93699158968703}, {"id": "15883e58-565c-4231-b73c-f2c35e7d9900", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 100.307, "prompt_per_token_ms": 0.62691875, "prompt_per_second": 1595.1030336865822, "predicted_n": 2, "predicted_ms": 29.629, "predicted_per_token_ms": 14.8145, "predicted_per_second": 67.50143440548112}, "tps": 67.50143440548112}, {"id": "fb812f20-6436-454e-a6c0-2cdc8955b965", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 181, "prompt_ms": 100.958, "prompt_per_token_ms": 0.5577790055248618, "prompt_per_second": 1792.8247390003762, "predicted_n": 2, "predicted_ms": 30.749, "predicted_per_token_ms": 15.3745, "predicted_per_second": 65.0427656183941}, "tps": 65.0427656183941}, {"id": "d92e3b1b-35ed-4c74-9881-654c5a750504", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 61, "prompt_n": 94, "prompt_ms": 94.813, "prompt_per_token_ms": 1.008648936170213, "prompt_per_second": 991.4252264984759, "predicted_n": 2, "predicted_ms": 30.232, "predicted_per_token_ms": 15.116, "predicted_per_second": 66.15506747816883}, "tps": 66.15506747816883}, {"id": "9aa89e62-6022-4989-8a48-9454f54a89dc", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 153, "prompt_ms": 98.453, "prompt_per_token_ms": 0.643483660130719, "prompt_per_second": 1554.0410144942255, "predicted_n": 2, "predicted_ms": 28.975, "predicted_per_token_ms": 14.4875, "predicted_per_second": 69.02502157031924}, "tps": 69.02502157031924}, {"id": "fa500e4e-7516-4aa7-b388-ab3793387e24", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 109, "prompt_ms": 93.611, "prompt_per_token_ms": 0.858816513761468, "prompt_per_second": 1164.3930734635887, "predicted_n": 2, "predicted_ms": 27.065, "predicted_per_token_ms": 13.5325, "predicted_per_second": 73.89617587289857}, "tps": 73.89617587289857}, {"id": "2d5feefa-d17a-44a5-91b6-6efe0d69d793", "answer": "C", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 160, "prompt_ms": 91.639, "prompt_per_token_ms": 0.57274375, "prompt_per_second": 1745.9815144207162, "predicted_n": 2, "predicted_ms": 29.86, "predicted_per_token_ms": 14.93, "predicted_per_second": 66.97923643670462}, "tps": 66.97923643670462}, {"id": "d9e4ad6c-7bc5-402c-ae34-e1dce0057efb", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 90, "prompt_ms": 94.655, "prompt_per_token_ms": 1.0517222222222222, "prompt_per_second": 950.8214040462733, "predicted_n": 2, "predicted_ms": 31.25, "predicted_per_token_ms": 15.625, "predicted_per_second": 64.0}, "tps": 64.0}, {"id": "fd136413-5d55-4186-909f-06947f546b86", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 138, "prompt_ms": 102.317, "prompt_per_token_ms": 0.741427536231884, "prompt_per_second": 1348.7494746718532, "predicted_n": 2, "predicted_ms": 34.787, "predicted_per_token_ms": 17.3935, "predicted_per_second": 57.492741541380404}, "tps": 57.492741541380404}, {"id": "4e2a1f5d-80ad-4135-b263-08407419b642", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 178, "prompt_ms": 102.149, "prompt_per_token_ms": 0.573870786516854, "prompt_per_second": 1742.5525457909523, "predicted_n": 2, "predicted_ms": 31.636, "predicted_per_token_ms": 15.818, "predicted_per_second": 63.21911746112024}, "tps": 63.21911746112024}, {"id": "1f16710f-c35b-424b-bbcc-7cf9b2d5b62b", "answer": "A", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 139, "prompt_ms": 100.174, "prompt_per_token_ms": 0.7206762589928059, "prompt_per_second": 1387.5856010541656, "predicted_n": 2, "predicted_ms": 30.981, "predicted_per_token_ms": 15.4905, "predicted_per_second": 64.55569542622898}, "tps": 64.55569542622898}, {"id": "d0da9864-a45f-4811-bf20-ddf54841aafd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 109, "prompt_ms": 96.104, "prompt_per_token_ms": 0.8816880733944954, "prompt_per_second": 1134.18796304004, "predicted_n": 2, "predicted_ms": 30.791, "predicted_per_token_ms": 15.3955, "predicted_per_second": 64.95404501315319}, "tps": 64.95404501315319}, {"id": "9cbbdc44-9115-48a4-a8b8-6abd14759a04", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 118, "prompt_ms": 97.217, "prompt_per_token_ms": 0.8238728813559322, "prompt_per_second": 1213.7794830122305, "predicted_n": 2, "predicted_ms": 32.471, "predicted_per_token_ms": 16.2355, "predicted_per_second": 61.59342182254936}, "tps": 61.59342182254936}, {"id": "59752af5-aeb5-4767-a9ac-cbe3ac288fb3", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 99, "prompt_ms": 97.155, "prompt_per_token_ms": 0.9813636363636363, "prompt_per_second": 1018.9902732746641, "predicted_n": 2, "predicted_ms": 29.713, "predicted_per_token_ms": 14.8565, "predicted_per_second": 67.310604785784}, "tps": 67.310604785784}, {"id": "9075cd44-dbc4-4ea8-bb64-f397f2098365", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 120, "prompt_ms": 97.639, "prompt_per_token_ms": 0.8136583333333333, "prompt_per_second": 1229.01709357941, "predicted_n": 2, "predicted_ms": 29.388, "predicted_per_token_ms": 14.694, "predicted_per_second": 68.05498843065196}, "tps": 68.05498843065196}, {"id": "53d81fbe-2e91-4f80-a5c5-a43747d47705", "answer": "AC", "llm_answer": "BCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 88.8, "prompt_per_token_ms": 0.6992125984251968, "prompt_per_second": 1430.18018018018, "predicted_n": 6, "predicted_ms": 151.544, "predicted_per_token_ms": 25.257333333333335, "predicted_per_second": 39.59246159531225}, "tps": 39.59246159531225}, {"id": "31030e6c-998e-4e75-98db-56b1b8956a5d", "answer": "B", "llm_answer": "D", "score": 0, "topics": [], "timings": {"cache_n": 59, "prompt_n": 110, "prompt_ms": 96.013, "prompt_per_token_ms": 0.8728454545454546, "prompt_per_second": 1145.6781894118503, "predicted_n": 2, "predicted_ms": 29.288, "predicted_per_token_ms": 14.644, "predicted_per_second": 68.28735318219066}, "tps": 68.28735318219066}, {"id": "ab6b8430-b4e1-49f9-96ba-6a79ca43a8f5", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 164, "prompt_ms": 101.162, "prompt_per_token_ms": 0.6168414634146342, "prompt_per_second": 1621.1620964393744, "predicted_n": 3, "predicted_ms": 58.683, "predicted_per_token_ms": 19.561, "predicted_per_second": 51.12213077041051}, "tps": 51.12213077041051}, {"id": "b2f3ea12-0823-4886-8560-4c80e67e2b2e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 75, "prompt_ms": 92.282, "prompt_per_token_ms": 1.2304266666666666, "prompt_per_second": 812.7262087947813, "predicted_n": 2, "predicted_ms": 30.351, "predicted_per_token_ms": 15.1755, "predicted_per_second": 65.89568712727753}, "tps": 65.89568712727753}, {"id": "03a34a6a-1c9e-491d-a1c8-e143f3a17fbc", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 208, "prompt_ms": 102.388, "prompt_per_token_ms": 0.49225, "prompt_per_second": 2031.488065007618, "predicted_n": 3, "predicted_ms": 60.742, "predicted_per_token_ms": 20.247333333333334, "predicted_per_second": 49.38921997958579}, "tps": 49.38921997958579}, {"id": "0d088d56-63af-4009-bacf-277bf585f24e", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 114, "prompt_ms": 96.101, "prompt_per_token_ms": 0.8429912280701755, "prompt_per_second": 1186.251964079458, "predicted_n": 2, "predicted_ms": 29.956, "predicted_per_token_ms": 14.978, "predicted_per_second": 66.76458806249165}, "tps": 66.76458806249165}, {"id": "0c168594-8b5e-4a03-ae15-3e583b2a8526", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 120, "prompt_ms": 96.067, "prompt_per_token_ms": 0.8005583333333333, "prompt_per_second": 1249.1282126016217, "predicted_n": 2, "predicted_ms": 31.085, "predicted_per_token_ms": 15.5425, "predicted_per_second": 64.33971368827409}, "tps": 64.33971368827409}, {"id": "b7e8c76c-fe6d-48d5-9942-f771ad4346ed", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 96.161, "prompt_per_token_ms": 0.8663153153153154, "prompt_per_second": 1154.3141190295441, "predicted_n": 2, "predicted_ms": 30.125, "predicted_per_token_ms": 15.0625, "predicted_per_second": 66.39004149377594}, "tps": 66.39004149377594}, {"id": "6dc1c35c-8483-4b50-a23e-e72d439b6f92", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 147, "prompt_ms": 97.277, "prompt_per_token_ms": 0.6617482993197279, "prompt_per_second": 1511.1485757167677, "predicted_n": 2, "predicted_ms": 28.724, "predicted_per_token_ms": 14.362, "predicted_per_second": 69.62818548948614}, "tps": 69.62818548948614}, {"id": "7231db3c-fb5e-4d85-ac89-88ac362f689b", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["SoftwareSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 161, "prompt_ms": 91.792, "prompt_per_token_ms": 0.570136645962733, "prompt_per_second": 1753.965487188426, "predicted_n": 3, "predicted_ms": 56.936, "predicted_per_token_ms": 18.978666666666665, "predicted_per_second": 52.690740480539546}, "tps": 52.690740480539546}, {"id": "c8dd31e9-1a87-4956-a0bd-102c1612bfba", "answer": "BD", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 162, "prompt_ms": 99.721, "prompt_per_token_ms": 0.6155617283950617, "prompt_per_second": 1624.532445523009, "predicted_n": 3, "predicted_ms": 59.056, "predicted_per_token_ms": 19.685333333333332, "predicted_per_second": 50.79924139799513}, "tps": 50.79924139799513}, {"id": "f11e9c2e-3c6c-41e6-acf2-10375eafdadb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 115, "prompt_ms": 98.82, "prompt_per_token_ms": 0.8593043478260869, "prompt_per_second": 1163.732038048978, "predicted_n": 2, "predicted_ms": 30.888, "predicted_per_token_ms": 15.444, "predicted_per_second": 64.75006475006475}, "tps": 64.75006475006475}, {"id": "b3f06b0b-19da-4c68-9da5-379a06b0f102", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 182, "prompt_ms": 100.674, "prompt_per_token_ms": 0.5531538461538462, "prompt_per_second": 1807.8153247114449, "predicted_n": 2, "predicted_ms": 30.764, "predicted_per_token_ms": 15.382, "predicted_per_second": 65.0110518788194}, "tps": 65.0110518788194}, {"id": "ace699e0-59c2-48e2-b221-4d9978cf02e4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 97.655, "prompt_per_token_ms": 0.7689370078740158, "prompt_per_second": 1300.4966463570734, "predicted_n": 2, "predicted_ms": 32.259, "predicted_per_token_ms": 16.1295, "predicted_per_second": 61.998202052140485}, "tps": 61.998202052140485}, {"id": "af9530ea-8f9c-4761-b1e9-f56a3c7abf26", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 137, "prompt_ms": 97.711, "prompt_per_token_ms": 0.7132189781021898, "prompt_per_second": 1402.0939300590517, "predicted_n": 2, "predicted_ms": 31.305, "predicted_per_token_ms": 15.6525, "predicted_per_second": 63.887557898099345}, "tps": 63.887557898099345}, {"id": "0742ca88-dfd7-4b40-b042-8eec1b094f1a", "answer": "C", "llm_answer": "C", "score": 1, "topics": [], "timings": {"cache_n": 59, "prompt_n": 91, "prompt_ms": 93.589, "prompt_per_token_ms": 1.0284505494505494, "prompt_per_second": 972.3364925365161, "predicted_n": 2, "predicted_ms": 30.402, "predicted_per_token_ms": 15.201, "predicted_per_second": 65.78514571409775}, "tps": 65.78514571409775}, {"id": "0e6c063c-0fa5-4f58-9cad-851f0e22ae4e", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 132, "prompt_ms": 98.556, "prompt_per_token_ms": 0.7466363636363637, "prompt_per_second": 1339.3400706197492, "predicted_n": 4, "predicted_ms": 90.617, "predicted_per_token_ms": 22.65425, "predicted_per_second": 44.1418276923756}, "tps": 44.1418276923756}, {"id": "79c8f3ee-a590-4dec-94d3-944a5ff07c4f", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 118, "prompt_ms": 95.972, "prompt_per_token_ms": 0.813322033898305, "prompt_per_second": 1229.5252782061434, "predicted_n": 2, "predicted_ms": 26.33, "predicted_per_token_ms": 13.165, "predicted_per_second": 75.9589821496392}, "tps": 75.9589821496392}, {"id": "6faddb1c-b879-4809-93fc-ca6dae025a27", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 155, "prompt_ms": 89.788, "prompt_per_token_ms": 0.5792774193548387, "prompt_per_second": 1726.288590903016, "predicted_n": 2, "predicted_ms": 27.644, "predicted_per_token_ms": 13.822, "predicted_per_second": 72.34843003906816}, "tps": 72.34843003906816}, {"id": "7100618b-0f78-4e60-b9c6-e69c4a850da6", "answer": "AC", "llm_answer": "CD", "score": 0, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 144, "prompt_ms": 96.787, "prompt_per_token_ms": 0.6721319444444445, "prompt_per_second": 1487.8031140545734, "predicted_n": 4, "predicted_ms": 90.407, "predicted_per_token_ms": 22.60175, "predicted_per_second": 44.244361609167434}, "tps": 44.244361609167434}, {"id": "f0a97f6c-30ef-4778-8eb3-759c5fbc4187", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 62, "prompt_ms": 95.525, "prompt_per_token_ms": 1.540725806451613, "prompt_per_second": 649.0447526825437, "predicted_n": 2, "predicted_ms": 29.433, "predicted_per_token_ms": 14.7165, "predicted_per_second": 67.9509394217375}, "tps": 67.9509394217375}, {"id": "3726acf9-e1da-4733-a34a-2867c64892a0", "answer": "ACD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity", "Cryptography"], "timings": {"cache_n": 58, "prompt_n": 116, "prompt_ms": 96.224, "prompt_per_token_ms": 0.8295172413793104, "prompt_per_second": 1205.520452278018, "predicted_n": 2, "predicted_ms": 30.406, "predicted_per_token_ms": 15.203, "predicted_per_second": 65.77649148194436}, "tps": 65.77649148194436}, {"id": "3aae7ce1-d610-4a25-9cce-890e7ab5808a", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 94, "prompt_ms": 94.101, "prompt_per_token_ms": 1.0010744680851065, "prompt_per_second": 998.9266851574373, "predicted_n": 2, "predicted_ms": 31.34, "predicted_per_token_ms": 15.67, "predicted_per_second": 63.81620931716656}, "tps": 63.81620931716656}, {"id": "d05b6e38-1fad-4e9e-a4ff-43d355c91e36", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 144, "prompt_ms": 99.616, "prompt_per_token_ms": 0.6917777777777778, "prompt_per_second": 1445.55091551558, "predicted_n": 2, "predicted_ms": 30.308, "predicted_per_token_ms": 15.154, "predicted_per_second": 65.98917777484493}, "tps": 65.98917777484493}, {"id": "642f0f29-3c12-4d84-834f-2064e53ed0f4", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 155, "prompt_ms": 96.466, "prompt_per_token_ms": 0.6223612903225806, "prompt_per_second": 1606.7837372753095, "predicted_n": 2, "predicted_ms": 29.172, "predicted_per_token_ms": 14.586, "predicted_per_second": 68.55889208830385}, "tps": 68.55889208830385}, {"id": "f2fad912-3d20-4dc4-8dbc-f4c7d2010996", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 113, "prompt_ms": 96.108, "prompt_per_token_ms": 0.8505132743362832, "prompt_per_second": 1175.760602655346, "predicted_n": 2, "predicted_ms": 29.028, "predicted_per_token_ms": 14.514, "predicted_per_second": 68.89899407468651}, "tps": 68.89899407468651}, {"id": "8b6449a0-7609-46b1-b9e7-0b108e4b1c03", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 81, "prompt_ms": 93.543, "prompt_per_token_ms": 1.154851851851852, "prompt_per_second": 865.9119335492768, "predicted_n": 2, "predicted_ms": 28.999, "predicted_per_token_ms": 14.4995, "predicted_per_second": 68.9678954446705}, "tps": 68.9678954446705}, {"id": "343606d4-f518-4cbd-9648-a3bd0e3da2a1", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 96, "prompt_ms": 87.794, "prompt_per_token_ms": 0.9145208333333333, "prompt_per_second": 1093.468801968244, "predicted_n": 2, "predicted_ms": 30.569, "predicted_per_token_ms": 15.2845, "predicted_per_second": 65.42575812097223}, "tps": 65.42575812097223}, {"id": "8250f916-c99c-4ab2-8ad6-418303077550", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 94.705, "prompt_per_token_ms": 0.7285, "prompt_per_second": 1372.6835964310228, "predicted_n": 2, "predicted_ms": 31.41, "predicted_per_token_ms": 15.705, "predicted_per_second": 63.67398917542184}, "tps": 63.67398917542184}, {"id": "a30d9d7b-1e7c-45d6-82e6-b32e48c5b79c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 130, "prompt_ms": 99.743, "prompt_per_token_ms": 0.7672538461538461, "prompt_per_second": 1303.3496084938292, "predicted_n": 2, "predicted_ms": 31.6, "predicted_per_token_ms": 15.8, "predicted_per_second": 63.291139240506325}, "tps": 63.291139240506325}, {"id": "aa473dc5-3dff-4dc9-aad0-e8b4771d10ad", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 126, "prompt_ms": 97.149, "prompt_per_token_ms": 0.7710238095238096, "prompt_per_second": 1296.9768088194423, "predicted_n": 2, "predicted_ms": 29.899, "predicted_per_token_ms": 14.9495, "predicted_per_second": 66.8918692932874}, "tps": 66.8918692932874}, {"id": "f190d94b-f408-4ee7-99cf-68235cbe0969", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 96.832, "prompt_per_token_ms": 0.7335757575757575, "prompt_per_second": 1363.1857237276934, "predicted_n": 2, "predicted_ms": 29.21, "predicted_per_token_ms": 14.605, "predicted_per_second": 68.46970215679562}, "tps": 68.46970215679562}, {"id": "5d248a73-a789-48de-9796-e27ae7532297", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 136, "prompt_ms": 96.76, "prompt_per_token_ms": 0.7114705882352942, "prompt_per_second": 1405.5394791236047, "predicted_n": 2, "predicted_ms": 29.719, "predicted_per_token_ms": 14.8595, "predicted_per_second": 67.29701537736801}, "tps": 67.29701537736801}, {"id": "ce9c94db-4e17-4938-87af-2aa8d81f1f3b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 56, "prompt_ms": 90.415, "prompt_per_token_ms": 1.6145535714285715, "prompt_per_second": 619.3662555991815, "predicted_n": 2, "predicted_ms": 31.452, "predicted_per_token_ms": 15.726, "predicted_per_second": 63.58896095637797}, "tps": 63.58896095637797}, {"id": "e73e0da7-c2eb-4576-894c-49a9f0066d75", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 132, "prompt_ms": 100.739, "prompt_per_token_ms": 0.7631742424242425, "prompt_per_second": 1310.3167591498823, "predicted_n": 2, "predicted_ms": 32.902, "predicted_per_token_ms": 16.451, "predicted_per_second": 60.78657832350617}, "tps": 60.78657832350617}, {"id": "8f574b93-a7c9-4579-b790-42164beeffea", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 157, "prompt_ms": 105.33, "prompt_per_token_ms": 0.6708917197452229, "prompt_per_second": 1490.5534985284346, "predicted_n": 2, "predicted_ms": 32.165, "predicted_per_token_ms": 16.0825, "predicted_per_second": 62.1793875330328}, "tps": 62.1793875330328}, {"id": "e5836ecb-2e6c-4569-a2ab-ce7b11afcc7a", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 101.104, "prompt_per_token_ms": 0.7717862595419848, "prompt_per_second": 1295.6955214432662, "predicted_n": 2, "predicted_ms": 30.802, "predicted_per_token_ms": 15.401, "predicted_per_second": 64.9308486461918}, "tps": 64.9308486461918}, {"id": "e33caa01-13fa-40aa-a4b5-3dfa70bdf138", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 125, "prompt_ms": 89.578, "prompt_per_token_ms": 0.716624, "prompt_per_second": 1395.4319140860478, "predicted_n": 3, "predicted_ms": 55.925, "predicted_per_token_ms": 18.641666666666666, "predicted_per_second": 53.643272239606624}, "tps": 53.643272239606624}, {"id": "881ba8de-aada-4511-80a5-7cd595abd301", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 99.333, "prompt_per_token_ms": 0.7641, "prompt_per_second": 1308.7292239235703, "predicted_n": 2, "predicted_ms": 28.905, "predicted_per_token_ms": 14.4525, "predicted_per_second": 69.19218128351496}, "tps": 69.19218128351496}, {"id": "0874c321-3eab-4bc4-8d82-fc7ca5024e80", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 111, "prompt_ms": 94.723, "prompt_per_token_ms": 0.8533603603603603, "prompt_per_second": 1171.8378852021158, "predicted_n": 2, "predicted_ms": 29.692, "predicted_per_token_ms": 14.846, "predicted_per_second": 67.35821096591674}, "tps": 67.35821096591674}, {"id": "c585b075-dd14-41c1-b6df-f877e79d17c8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 99.731, "prompt_per_token_ms": 0.7671615384615385, "prompt_per_second": 1303.5064323028948, "predicted_n": 2, "predicted_ms": 30.742, "predicted_per_token_ms": 15.371, "predicted_per_second": 65.05757595471992}, "tps": 65.05757595471992}, {"id": "5a466d93-f94e-4d49-b053-4e694c42e847", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 96.865, "prompt_per_token_ms": 0.7749199999999999, "prompt_per_second": 1290.4557889846694, "predicted_n": 2, "predicted_ms": 32.9, "predicted_per_token_ms": 16.45, "predicted_per_second": 60.79027355623101}, "tps": 60.79027355623101}, {"id": "04e0af8d-a47d-41d2-ae1a-49934d1b8d98", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 103.151, "prompt_per_token_ms": 0.6251575757575757, "prompt_per_second": 1599.59670773914, "predicted_n": 2, "predicted_ms": 28.96, "predicted_per_token_ms": 14.48, "predicted_per_second": 69.06077348066299}, "tps": 69.06077348066299}, {"id": "938b518c-f052-46f0-82a5-6590be052915", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 119, "prompt_ms": 95.705, "prompt_per_token_ms": 0.8042436974789916, "prompt_per_second": 1243.4042108562771, "predicted_n": 2, "predicted_ms": 30.597, "predicted_per_token_ms": 15.2985, "predicted_per_second": 65.3658855443344}, "tps": 65.3658855443344}, {"id": "70e163d9-e602-42b6-864e-11127b2932ef", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 138, "prompt_ms": 98.012, "prompt_per_token_ms": 0.710231884057971, "prompt_per_second": 1407.9908582622536, "predicted_n": 2, "predicted_ms": 31.167, "predicted_per_token_ms": 15.5835, "predicted_per_second": 64.1704366798216}, "tps": 64.1704366798216}, {"id": "6e03455e-5715-486e-bf20-87c35cedb4ec", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 164, "prompt_ms": 100.038, "prompt_per_token_ms": 0.6099878048780487, "prompt_per_second": 1639.377036726044, "predicted_n": 2, "predicted_ms": 29.528, "predicted_per_token_ms": 14.764, "predicted_per_second": 67.7323218639935}, "tps": 67.7323218639935}, {"id": "b1ae1660-99a3-4294-ba40-ddb45e597faf", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 175, "prompt_ms": 96.804, "prompt_per_token_ms": 0.5531657142857143, "prompt_per_second": 1807.77653815958, "predicted_n": 2, "predicted_ms": 27.788, "predicted_per_token_ms": 13.894, "predicted_per_second": 71.97351374694112}, "tps": 71.97351374694112}, {"id": "82371a29-d04a-4a11-88ee-221ae1bb23ea", "answer": "AC", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 65, "prompt_n": 173, "prompt_ms": 95.217, "prompt_per_token_ms": 0.5503872832369943, "prompt_per_second": 1816.9024438913218, "predicted_n": 3, "predicted_ms": 60.658, "predicted_per_token_ms": 20.219333333333335, "predicted_per_second": 49.45761482409576}, "tps": 49.45761482409576}, {"id": "bd6146d7-74bf-46eb-8fc0-500fae7f4ed6", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 224, "prompt_ms": 105.397, "prompt_per_token_ms": 0.4705223214285715, "prompt_per_second": 2125.2976839948005, "predicted_n": 2, "predicted_ms": 32.77, "predicted_per_token_ms": 16.385, "predicted_per_second": 61.03143118706133}, "tps": 61.03143118706133}, {"id": "0c9b5e48-6400-41cc-8f54-744382a28035", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 107, "prompt_ms": 101.469, "prompt_per_token_ms": 0.9483084112149532, "prompt_per_second": 1054.509258985503, "predicted_n": 2, "predicted_ms": 32.302, "predicted_per_token_ms": 16.151, "predicted_per_second": 61.91567085629373}, "tps": 61.91567085629373}, {"id": "f8fa5d9f-029e-4971-99ff-a54ed13cabf2", "answer": "D", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 133, "prompt_ms": 106.328, "prompt_per_token_ms": 0.7994586466165414, "prompt_per_second": 1250.8464374388684, "predicted_n": 2, "predicted_ms": 33.396, "predicted_per_token_ms": 16.698, "predicted_per_second": 59.88741166606779}, "tps": 59.88741166606779}, {"id": "a5b3974e-5fd4-4ade-ba3c-65071349faac", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 168, "prompt_ms": 102.572, "prompt_per_token_ms": 0.6105476190476191, "prompt_per_second": 1637.873883710954, "predicted_n": 64, "predicted_ms": 1917.588, "predicted_per_token_ms": 29.9623125, "predicted_per_second": 33.37526100497083}, "tps": 33.37526100497083}, {"id": "10cc4543-69b8-42d1-96b4-8eb764a32326", "answer": "ABC", "llm_answer": "ABCD", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 143, "prompt_ms": 94.814, "prompt_per_token_ms": 0.663034965034965, "prompt_per_second": 1508.216086231991, "predicted_n": 8, "predicted_ms": 213.336, "predicted_per_token_ms": 26.667, "predicted_per_second": 37.4995312558593}, "tps": 37.4995312558593}, {"id": "2d3f7fa9-f150-4d24-a87a-9fb5488611f1", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 62, "prompt_n": 114, "prompt_ms": 96.838, "prompt_per_token_ms": 0.8494561403508771, "prompt_per_second": 1177.2238170965943, "predicted_n": 3, "predicted_ms": 58.443, "predicted_per_token_ms": 19.480999999999998, "predicted_per_second": 51.332067142343824}, "tps": 51.332067142343824}, {"id": "e28c5b64-a631-4171-8e83-cc375f5cb13e", "answer": "BC", "llm_answer": "BC", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 120, "prompt_ms": 96.21, "prompt_per_token_ms": 0.80175, "prompt_per_second": 1247.2715933894606, "predicted_n": 3, "predicted_ms": 61.486, "predicted_per_token_ms": 20.49533333333333, "predicted_per_second": 48.791594834596495}, "tps": 48.791594834596495}, {"id": "41cc6013-bdeb-4c46-8310-b6f415c21a0e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 150, "prompt_ms": 100.584, "prompt_per_token_ms": 0.67056, "prompt_per_second": 1491.2908613696015, "predicted_n": 2, "predicted_ms": 29.279, "predicted_per_token_ms": 14.6395, "predicted_per_second": 68.30834386420301}, "tps": 68.30834386420301}, {"id": "0b2c771a-8cd4-47c5-a46f-e2481f943013", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 116, "prompt_ms": 95.822, "prompt_per_token_ms": 0.826051724137931, "prompt_per_second": 1210.577946609338, "predicted_n": 2, "predicted_ms": 30.044, "predicted_per_token_ms": 15.022, "predicted_per_second": 66.56903208627347}, "tps": 66.56903208627347}, {"id": "267d70d2-7e49-409c-b927-a602361f30f5", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 173, "prompt_ms": 100.374, "prompt_per_token_ms": 0.5801965317919074, "prompt_per_second": 1723.5539083826488, "predicted_n": 2, "predicted_ms": 32.421, "predicted_per_token_ms": 16.2105, "predicted_per_second": 61.68841183183739}, "tps": 61.68841183183739}, {"id": "d8d21f8d-799d-4000-afbf-f72cb120ddcc", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 134, "prompt_ms": 97.623, "prompt_per_token_ms": 0.7285298507462686, "prompt_per_second": 1372.6273521608637, "predicted_n": 2, "predicted_ms": 30.164, "predicted_per_token_ms": 15.082, "predicted_per_second": 66.30420368651372}, "tps": 66.30420368651372}, {"id": "bb62352b-e26c-4503-a5a0-c451d8ab9022", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 111, "prompt_ms": 93.598, "prompt_per_token_ms": 0.8432252252252253, "prompt_per_second": 1185.922776127695, "predicted_n": 2, "predicted_ms": 25.907, "predicted_per_token_ms": 12.9535, "predicted_per_second": 77.19921256803181}, "tps": 77.19921256803181}, {"id": "a2a28828-0121-4b1a-97a1-6a124cb9c4bf", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 118, "prompt_ms": 88.112, "prompt_per_token_ms": 0.7467118644067796, "prompt_per_second": 1339.2046486290176, "predicted_n": 2, "predicted_ms": 27.415, "predicted_per_token_ms": 13.7075, "predicted_per_second": 72.95276308590188}, "tps": 72.95276308590188}, {"id": "87951795-f2ba-4721-a5a9-35a015574e8f", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 124, "prompt_ms": 96.454, "prompt_per_token_ms": 0.7778548387096774, "prompt_per_second": 1285.586911895826, "predicted_n": 2, "predicted_ms": 30.133, "predicted_per_token_ms": 15.0665, "predicted_per_second": 66.37241562406663}, "tps": 66.37241562406663}, {"id": "a52c0420-4b2f-4d1e-a3bb-45b5e2e90b6d", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["NetworkSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 105, "prompt_ms": 95.225, "prompt_per_token_ms": 0.9069047619047619, "prompt_per_second": 1102.6516145970072, "predicted_n": 4, "predicted_ms": 100.911, "predicted_per_token_ms": 25.22775, "predicted_per_second": 39.63888971469909}, "tps": 39.63888971469909}, {"id": "a9c1b2d6-c9b1-41be-acd7-931ed25ea67a", "answer": "A", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 129, "prompt_ms": 98.852, "prompt_per_token_ms": 0.7662945736434109, "prompt_per_second": 1304.9811839922309, "predicted_n": 8, "predicted_ms": 208.847, "predicted_per_token_ms": 26.105875, "predicted_per_second": 38.305553826485415}, "tps": 38.305553826485415}, {"id": "1b790c88-db7e-4fed-abf0-ead4f23a6cd9", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 114, "prompt_ms": 96.046, "prompt_per_token_ms": 0.8425087719298247, "prompt_per_second": 1186.9312621035751, "predicted_n": 2, "predicted_ms": 29.855, "predicted_per_token_ms": 14.9275, "predicted_per_second": 66.9904538603249}, "tps": 66.9904538603249}, {"id": "ae6ef008-0353-4dfa-acbc-d8d551dbbe55", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 143, "prompt_ms": 97.797, "prompt_per_token_ms": 0.6838951048951049, "prompt_per_second": 1462.2125423070238, "predicted_n": 2, "predicted_ms": 29.311, "predicted_per_token_ms": 14.6555, "predicted_per_second": 68.23376889222476}, "tps": 68.23376889222476}, {"id": "b92e0e55-b51c-4932-8619-acdd41d1cf22", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 78, "prompt_ms": 91.564, "prompt_per_token_ms": 1.173897435897436, "prompt_per_second": 851.8631776680793, "predicted_n": 3, "predicted_ms": 60.245, "predicted_per_token_ms": 20.081666666666667, "predicted_per_second": 49.79666362353722}, "tps": 49.79666362353722}, {"id": "19dd6e6c-b87f-4d4d-9b8e-4ad2353c399f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 155, "prompt_ms": 97.69, "prompt_per_token_ms": 0.630258064516129, "prompt_per_second": 1586.651653188658, "predicted_n": 2, "predicted_ms": 26.734, "predicted_per_token_ms": 13.367, "predicted_per_second": 74.81110196753198}, "tps": 74.81110196753198}, {"id": "acf33f8a-5c7c-41ff-bc12-7bf2216e14f0", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 53, "prompt_ms": 85.389, "prompt_per_token_ms": 1.6111132075471697, "prompt_per_second": 620.6888475096324, "predicted_n": 2, "predicted_ms": 27.479, "predicted_per_token_ms": 13.7395, "predicted_per_second": 72.78285235998399}, "tps": 72.78285235998399}, {"id": "8850298a-a2ef-487f-9c21-480d86cf5e8f", "answer": "ACD", "llm_answer": "CD", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 96, "prompt_ms": 96.638, "prompt_per_token_ms": 1.0066458333333335, "prompt_per_second": 993.3980421780252, "predicted_n": 3, "predicted_ms": 60.401, "predicted_per_token_ms": 20.133666666666667, "predicted_per_second": 49.66805185344614}, "tps": 49.66805185344614}, {"id": "de1c290f-0dc4-4cf5-967a-443788c16ae8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 276, "prompt_ms": 107.437, "prompt_per_token_ms": 0.3892644927536232, "prompt_per_second": 2568.9473831175483, "predicted_n": 2, "predicted_ms": 33.476, "predicted_per_token_ms": 16.738, "predicted_per_second": 59.744294419882905}, "tps": 59.744294419882905}, {"id": "f725cb3b-8b30-45aa-bca1-681980dd839d", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 114, "prompt_ms": 96.053, "prompt_per_token_ms": 0.8425701754385965, "prompt_per_second": 1186.8447627872113, "predicted_n": 2, "predicted_ms": 29.113, "predicted_per_token_ms": 14.5565, "predicted_per_second": 68.697832583382}, "tps": 68.697832583382}, {"id": "a024dc86-960f-4d35-9262-5a1bdc73d6af", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 63, "prompt_n": 138, "prompt_ms": 97.549, "prompt_per_token_ms": 0.7068768115942029, "prompt_per_second": 1414.6736511906834, "predicted_n": 2, "predicted_ms": 30.206, "predicted_per_token_ms": 15.103, "predicted_per_second": 66.21201085876979}, "tps": 66.21201085876979}, {"id": "1bf1f5da-4fb3-417d-8578-c1bb8d12674b", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 137, "prompt_ms": 97.367, "prompt_per_token_ms": 0.7107080291970803, "prompt_per_second": 1407.0475623157743, "predicted_n": 2, "predicted_ms": 28.97, "predicted_per_token_ms": 14.485, "predicted_per_second": 69.03693476009666}, "tps": 69.03693476009666}, {"id": "bce31b60-9b41-4e75-924b-5608b7733dfc", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 166, "prompt_ms": 99.172, "prompt_per_token_ms": 0.5974216867469879, "prompt_per_second": 1673.8595571330618, "predicted_n": 2, "predicted_ms": 31.091, "predicted_per_token_ms": 15.5455, "predicted_per_second": 64.32729728860441}, "tps": 64.32729728860441}, {"id": "047e3412-269c-4cc4-9a6b-c6361ecacaf6", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 93, "prompt_ms": 97.063, "prompt_per_token_ms": 1.0436881720430107, "prompt_per_second": 958.1405891019235, "predicted_n": 2, "predicted_ms": 30.435, "predicted_per_token_ms": 15.2175, "predicted_per_second": 65.71381632988336}, "tps": 65.71381632988336}, {"id": "57ebaa99-340c-433e-9ce6-248075ff9e56", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 131, "prompt_ms": 98.386, "prompt_per_token_ms": 0.7510381679389313, "prompt_per_second": 1331.4902526782266, "predicted_n": 3, "predicted_ms": 61.342, "predicted_per_token_ms": 20.447333333333333, "predicted_per_second": 48.906132829056766}, "tps": 48.906132829056766}, {"id": "aacdda17-fafb-41bf-a96e-f30e39909f0f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 127, "prompt_ms": 89.084, "prompt_per_token_ms": 0.7014488188976378, "prompt_per_second": 1425.620762426474, "predicted_n": 2, "predicted_ms": 28.341, "predicted_per_token_ms": 14.1705, "predicted_per_second": 70.5691401150277}, "tps": 70.5691401150277}, {"id": "73ede46d-dded-4fee-97de-7bc05523e1c5", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 110, "prompt_ms": 102.389, "prompt_per_token_ms": 0.9308090909090909, "prompt_per_second": 1074.3341569895206, "predicted_n": 2, "predicted_ms": 31.495, "predicted_per_token_ms": 15.7475, "predicted_per_second": 63.502143197332906}, "tps": 63.502143197332906}, {"id": "d99bce7b-accd-401e-9cbd-66a39fff35ff", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["ApplicationSecurity", "MemorySafety"], "timings": {"cache_n": 59, "prompt_n": 134, "prompt_ms": 102.053, "prompt_per_token_ms": 0.761589552238806, "prompt_per_second": 1313.043222639217, "predicted_n": 2, "predicted_ms": 31.204, "predicted_per_token_ms": 15.602, "predicted_per_second": 64.09434687860531}, "tps": 64.09434687860531}, {"id": "359ea04f-0f1f-4b23-8ac5-87c099e58ae8", "answer": "ABC", "llm_answer": "AC", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 102, "prompt_ms": 98.544, "prompt_per_token_ms": 0.9661176470588235, "prompt_per_second": 1035.070628348758, "predicted_n": 2, "predicted_ms": 30.259, "predicted_per_token_ms": 15.1295, "predicted_per_second": 66.09603754254933}, "tps": 66.09603754254933}, {"id": "41c6eee1-101a-4325-9b14-2fcb68d0232d", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 173, "prompt_ms": 99.8, "prompt_per_token_ms": 0.576878612716763, "prompt_per_second": 1733.4669338677354, "predicted_n": 2, "predicted_ms": 29.039, "predicted_per_token_ms": 14.5195, "predicted_per_second": 68.87289507214436}, "tps": 68.87289507214436}, {"id": "4922b462-fdad-4ea8-bbc2-ef9064a6ec5d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 95.256, "prompt_per_token_ms": 1.0133617021276595, "prompt_per_second": 986.8144788779709, "predicted_n": 2, "predicted_ms": 30.493, "predicted_per_token_ms": 15.2465, "predicted_per_second": 65.58882366444759}, "tps": 65.58882366444759}, {"id": "e80b794a-28f1-45c2-a20a-0d9f1d5814fc", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 98.845, "prompt_per_token_ms": 0.6770205479452055, "prompt_per_second": 1477.0600435024535, "predicted_n": 2, "predicted_ms": 30.736, "predicted_per_token_ms": 15.368, "predicted_per_second": 65.07027589796981}, "tps": 65.07027589796981}, {"id": "f0ca971b-66cb-4c00-b938-93359cafe30c", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 110, "prompt_ms": 96.085, "prompt_per_token_ms": 0.8734999999999999, "prompt_per_second": 1144.8196908986836, "predicted_n": 2, "predicted_ms": 32.687, "predicted_per_token_ms": 16.3435, "predicted_per_second": 61.186404380946556}, "tps": 61.186404380946556}, {"id": "502af749-bc98-421e-88f7-81503a2410f8", "answer": "C", "llm_answer": "C", "score": 1, "topics": [], "timings": {"cache_n": 58, "prompt_n": 115, "prompt_ms": 96.812, "prompt_per_token_ms": 0.8418434782608696, "prompt_per_second": 1187.8692724042473, "predicted_n": 2, "predicted_ms": 30.948, "predicted_per_token_ms": 15.474, "predicted_per_second": 64.62453147214683}, "tps": 64.62453147214683}, {"id": "0bd0fab8-a55e-41dc-a784-dba581b02600", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 166, "prompt_ms": 98.626, "prompt_per_token_ms": 0.594132530120482, "prompt_per_second": 1683.1261533469876, "predicted_n": 2, "predicted_ms": 28.502, "predicted_per_token_ms": 14.251, "predicted_per_second": 70.17051434987019}, "tps": 70.17051434987019}, {"id": "9ed22e92-f490-45cc-b08c-35f293b7c37a", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["MemorySafety"], "timings": {"cache_n": 60, "prompt_n": 149, "prompt_ms": 91.433, "prompt_per_token_ms": 0.6136442953020135, "prompt_per_second": 1629.6085658350921, "predicted_n": 2, "predicted_ms": 28.24, "predicted_per_token_ms": 14.12, "predicted_per_second": 70.8215297450425}, "tps": 70.8215297450425}, {"id": "dc9f50b3-70f9-43fa-84fc-e89cf42443cd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 202, "prompt_ms": 104.477, "prompt_per_token_ms": 0.5172128712871288, "prompt_per_second": 1933.439895862247, "predicted_n": 2, "predicted_ms": 29.508, "predicted_per_token_ms": 14.754, "predicted_per_second": 67.778229632642}, "tps": 67.778229632642}, {"id": "7148ea80-6c57-4c1b-bcdd-2e0f36f31da0", "answer": "B", "llm_answer": "ABCD", "score": 0, "topics": ["SoftwareSecurity", "SystemSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 135, "prompt_ms": 99.436, "prompt_per_token_ms": 0.736562962962963, "prompt_per_second": 1357.6571865320407, "predicted_n": 64, "predicted_ms": 1883.626, "predicted_per_token_ms": 29.43165625, "predicted_per_second": 33.97702091604172}, "tps": 33.97702091604172}, {"id": "543be4ac-11ce-4b02-9e3a-b2b50fd16bbd", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 141, "prompt_ms": 100.335, "prompt_per_token_ms": 0.7115957446808511, "prompt_per_second": 1405.29227089251, "predicted_n": 2, "predicted_ms": 40.203, "predicted_per_token_ms": 20.1015, "predicted_per_second": 49.747531278760285}, "tps": 49.747531278760285}, {"id": "a06ce85e-4c9b-4d78-91b8-a49d4ed2adac", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 63, "prompt_n": 119, "prompt_ms": 106.726, "prompt_per_token_ms": 0.8968571428571428, "prompt_per_second": 1115.0047785919082, "predicted_n": 2, "predicted_ms": 32.627, "predicted_per_token_ms": 16.3135, "predicted_per_second": 61.29892420388022}, "tps": 61.29892420388022}, {"id": "964f5529-78f6-426e-8d96-7ff40e6788d6", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 101, "prompt_ms": 95.835, "prompt_per_token_ms": 0.9488613861386138, "prompt_per_second": 1053.894714874524, "predicted_n": 2, "predicted_ms": 26.248, "predicted_per_token_ms": 13.124, "predicted_per_second": 76.19628162145688}, "tps": 76.19628162145688}, {"id": "273890ac-8b96-488f-80cd-fab6158a182f", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 140, "prompt_ms": 91.774, "prompt_per_token_ms": 0.6555285714285715, "prompt_per_second": 1525.4865212369516, "predicted_n": 2, "predicted_ms": 30.451, "predicted_per_token_ms": 15.2255, "predicted_per_second": 65.67928803651769}, "tps": 65.67928803651769}, {"id": "d3c2665f-4c2b-481b-a530-6c5c6055af04", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 151, "prompt_ms": 100.546, "prompt_per_token_ms": 0.6658675496688742, "prompt_per_second": 1501.8001710659796, "predicted_n": 2, "predicted_ms": 31.877, "predicted_per_token_ms": 15.9385, "predicted_per_second": 62.74116133889638}, "tps": 62.74116133889638}, {"id": "a17e7a95-1460-4472-ae77-d4bb6fc6ea80", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 185, "prompt_ms": 105.825, "prompt_per_token_ms": 0.572027027027027, "prompt_per_second": 1748.169147176943, "predicted_n": 2, "predicted_ms": 31.504, "predicted_per_token_ms": 15.752, "predicted_per_second": 63.48400203148806}, "tps": 63.48400203148806}, {"id": "e5332d35-3662-43a5-9f0a-f415c47c56d2", "answer": "AD", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 68, "prompt_n": 114, "prompt_ms": 100.065, "prompt_per_token_ms": 0.8777631578947368, "prompt_per_second": 1139.2594813371309, "predicted_n": 3, "predicted_ms": 58.672, "predicted_per_token_ms": 19.557333333333332, "predicted_per_second": 51.13171529860922}, "tps": 51.13171529860922}, {"id": "6341c473-8771-410a-b7ec-1f5236875cc1", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 94, "prompt_ms": 95.733, "prompt_per_token_ms": 1.018436170212766, "prompt_per_second": 981.8975692812302, "predicted_n": 2, "predicted_ms": 32.08, "predicted_per_token_ms": 16.04, "predicted_per_second": 62.34413965087282}, "tps": 62.34413965087282}, {"id": "2796d33d-a4c5-4830-8665-f5ea868b3c23", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 132, "prompt_ms": 97.582, "prompt_per_token_ms": 0.7392575757575757, "prompt_per_second": 1352.7084913201204, "predicted_n": 2, "predicted_ms": 32.449, "predicted_per_token_ms": 16.2245, "predicted_per_second": 61.63518136152116}, "tps": 61.63518136152116}, {"id": "bcc8b032-f9e6-4d27-86f0-dc9299a709c0", "answer": "ACD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 100, "prompt_ms": 101.321, "prompt_per_token_ms": 1.01321, "prompt_per_second": 986.9622289554978, "predicted_n": 2, "predicted_ms": 31.609, "predicted_per_token_ms": 15.8045, "predicted_per_second": 63.273118415641115}, "tps": 63.273118415641115}, {"id": "03f0350c-beac-43e6-a5b7-e222697280de", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 129, "prompt_ms": 103.179, "prompt_per_token_ms": 0.7998372093023256, "prompt_per_second": 1250.2544122350478, "predicted_n": 2, "predicted_ms": 29.739, "predicted_per_token_ms": 14.8695, "predicted_per_second": 67.25175695215037}, "tps": 67.25175695215037}, {"id": "e504575a-59c1-4db5-a32b-e3deb91a2664", "answer": "AC", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 167, "prompt_ms": 102.063, "prompt_per_token_ms": 0.6111556886227545, "prompt_per_second": 1636.2442804934205, "predicted_n": 3, "predicted_ms": 59.701, "predicted_per_token_ms": 19.900333333333332, "predicted_per_second": 50.25041456592017}, "tps": 50.25041456592017}, {"id": "b191c8ae-1541-4dab-9556-7fa0f0eb3f6b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 153, "prompt_ms": 87.395, "prompt_per_token_ms": 0.5712091503267973, "prompt_per_second": 1750.672235253733, "predicted_n": 2, "predicted_ms": 27.932, "predicted_per_token_ms": 13.966, "predicted_per_second": 71.6024631247315}, "tps": 71.6024631247315}, {"id": "434a8346-d1d9-456a-ab03-f0b1ff82cea1", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 174, "prompt_ms": 99.708, "prompt_per_token_ms": 0.5730344827586207, "prompt_per_second": 1745.0956793838009, "predicted_n": 2, "predicted_ms": 30.091, "predicted_per_token_ms": 15.0455, "predicted_per_second": 66.46505599680968}, "tps": 66.46505599680968}, {"id": "7d6265b9-8848-4c2b-979b-fb55c7f578b0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 174, "prompt_ms": 101.706, "prompt_per_token_ms": 0.5845172413793104, "prompt_per_second": 1710.8135213261753, "predicted_n": 2, "predicted_ms": 31.702, "predicted_per_token_ms": 15.851, "predicted_per_second": 63.08750236578133}, "tps": 63.08750236578133}, {"id": "de7fdad6-f0a2-4c02-8360-4f629d4148c3", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 154, "prompt_ms": 97.234, "prompt_per_token_ms": 0.6313896103896104, "prompt_per_second": 1583.8081329576075, "predicted_n": 2, "predicted_ms": 29.691, "predicted_per_token_ms": 14.8455, "predicted_per_second": 67.3604796066148}, "tps": 67.3604796066148}, {"id": "b6b860ea-e356-4122-8958-d4fdb65d8c43", "answer": "CD", "llm_answer": "CD", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 120, "prompt_ms": 97.713, "prompt_per_token_ms": 0.814275, "prompt_per_second": 1228.0863344693132, "predicted_n": 4, "predicted_ms": 88.617, "predicted_per_token_ms": 22.15425, "predicted_per_second": 45.13806605955968}, "tps": 45.13806605955968}, {"id": "c0de31ff-12cb-4a91-b588-e3c7d9ba5cec", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 185, "prompt_ms": 100.875, "prompt_per_token_ms": 0.5452702702702703, "prompt_per_second": 1833.9529120198265, "predicted_n": 2, "predicted_ms": 29.235, "predicted_per_token_ms": 14.6175, "predicted_per_second": 68.41115101761588}, "tps": 68.41115101761588}, {"id": "2279e498-05b1-40d0-9012-c359d5443677", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 132, "prompt_ms": 97.884, "prompt_per_token_ms": 0.7415454545454545, "prompt_per_second": 1348.5350006129704, "predicted_n": 2, "predicted_ms": 32.555, "predicted_per_token_ms": 16.2775, "predicted_per_second": 61.43449546920596}, "tps": 61.43449546920596}, {"id": "01adae0a-ff82-470f-bab3-827ff5ad510d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 196, "prompt_ms": 107.544, "prompt_per_token_ms": 0.5486938775510204, "prompt_per_second": 1822.5098564308564, "predicted_n": 2, "predicted_ms": 30.275, "predicted_per_token_ms": 15.1375, "predicted_per_second": 66.06110652353428}, "tps": 66.06110652353428}, {"id": "e3940fc9-00f6-477b-919d-0e9cccaa389b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 62, "prompt_n": 91, "prompt_ms": 96.421, "prompt_per_token_ms": 1.0595714285714286, "prompt_per_second": 943.777807738978, "predicted_n": 2, "predicted_ms": 29.797, "predicted_per_token_ms": 14.8985, "predicted_per_second": 67.12085109239185}, "tps": 67.12085109239185}, {"id": "69147b80-49f6-4772-9c1d-ab7f76a7f531", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 148, "prompt_ms": 90.711, "prompt_per_token_ms": 0.6129121621621622, "prompt_per_second": 1631.5551586907873, "predicted_n": 2, "predicted_ms": 29.199, "predicted_per_token_ms": 14.5995, "predicted_per_second": 68.4954964211103}, "tps": 68.4954964211103}, {"id": "c8d44f57-1224-4c1f-b0d0-bda5cd35bcc1", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 63, "prompt_n": 142, "prompt_ms": 100.377, "prompt_per_token_ms": 0.7068802816901408, "prompt_per_second": 1414.6667065164331, "predicted_n": 2, "predicted_ms": 32.252, "predicted_per_token_ms": 16.126, "predicted_per_second": 62.01165819174004}, "tps": 62.01165819174004}, {"id": "87d4c8c9-9d19-4b4e-8ac8-2399ff67942b", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 166, "prompt_ms": 100.94, "prompt_per_token_ms": 0.6080722891566265, "prompt_per_second": 1644.5413116702991, "predicted_n": 2, "predicted_ms": 28.811, "predicted_per_token_ms": 14.4055, "predicted_per_second": 69.41793065148728}, "tps": 69.41793065148728}, {"id": "ca517bb5-0a29-4f6f-a946-b3d0247edc90", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["Cryptography"], "timings": {"cache_n": 62, "prompt_n": 67, "prompt_ms": 92.946, "prompt_per_token_ms": 1.3872537313432836, "prompt_per_second": 720.8486648161298, "predicted_n": 2, "predicted_ms": 30.442, "predicted_per_token_ms": 15.221, "predicted_per_second": 65.69870573549701}, "tps": 65.69870573549701}, {"id": "3a3f0ea4-c1ae-4944-94c5-0bdcb2aea4a0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 145, "prompt_ms": 97.605, "prompt_per_token_ms": 0.6731379310344828, "prompt_per_second": 1485.5796321909738, "predicted_n": 2, "predicted_ms": 29.68, "predicted_per_token_ms": 14.84, "predicted_per_second": 67.38544474393531}, "tps": 67.38544474393531}, {"id": "611136b8-e015-4b5a-bb30-af22fa67f28f", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["SystemSecurity", "WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 188, "prompt_ms": 100.874, "prompt_per_token_ms": 0.536563829787234, "prompt_per_second": 1863.7111644229437, "predicted_n": 2, "predicted_ms": 28.735, "predicted_per_token_ms": 14.3675, "predicted_per_second": 69.60153123368714}, "tps": 69.60153123368714}, {"id": "38594237-f78d-404f-9960-ae0e13f1f1d5", "answer": "ABCD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 142, "prompt_ms": 97.033, "prompt_per_token_ms": 0.683330985915493, "prompt_per_second": 1463.4196613523234, "predicted_n": 2, "predicted_ms": 28.575, "predicted_per_token_ms": 14.2875, "predicted_per_second": 69.9912510936133}, "tps": 69.9912510936133}, {"id": "3b78cf27-1a4f-49f4-9f11-31d7813115f7", "answer": "BCD", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 65, "prompt_n": 196, "prompt_ms": 104.984, "prompt_per_token_ms": 0.5356326530612244, "prompt_per_second": 1866.9511544616323, "predicted_n": 2, "predicted_ms": 35.017, "predicted_per_token_ms": 17.5085, "predicted_per_second": 57.11511551532112}, "tps": 57.11511551532112}, {"id": "fe3b6cd4-a709-4521-8324-2129a00552a1", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 157, "prompt_ms": 100.169, "prompt_per_token_ms": 0.6380191082802548, "prompt_per_second": 1567.3511765116953, "predicted_n": 2, "predicted_ms": 33.366, "predicted_per_token_ms": 16.683, "predicted_per_second": 59.94125756758377}, "tps": 59.94125756758377}, {"id": "93cc9970-b41a-49d1-bad6-a20b40a88f1f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 103, "prompt_ms": 96.492, "prompt_per_token_ms": 0.9368155339805826, "prompt_per_second": 1067.4460058864984, "predicted_n": 2, "predicted_ms": 28.731, "predicted_per_token_ms": 14.3655, "predicted_per_second": 69.61122132887822}, "tps": 69.61122132887822}, {"id": "72574b19-8d65-4d92-b272-0a112bb10a10", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 91, "prompt_ms": 85.305, "prompt_per_token_ms": 0.9374175824175824, "prompt_per_second": 1066.7604478049352, "predicted_n": 64, "predicted_ms": 1857.684, "predicted_per_token_ms": 29.0263125, "predicted_per_second": 34.45149982451267}, "tps": 34.45149982451267}, {"id": "6d12c982-f6ce-4b41-becc-77ffb811a15b", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 155, "prompt_ms": 98.877, "prompt_per_token_ms": 0.637916129032258, "prompt_per_second": 1567.6041951110976, "predicted_n": 2, "predicted_ms": 28.857, "predicted_per_token_ms": 14.4285, "predicted_per_second": 69.30727379838514}, "tps": 69.30727379838514}, {"id": "6da49679-1a5e-4ea6-a85c-4156211eb190", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 145, "prompt_ms": 98.208, "prompt_per_token_ms": 0.6772965517241379, "prompt_per_second": 1476.4581296839362, "predicted_n": 2, "predicted_ms": 29.41, "predicted_per_token_ms": 14.705, "predicted_per_second": 68.00408024481469}, "tps": 68.00408024481469}, {"id": "1c256513-6323-40f4-a31a-6acd946d4e30", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 120, "prompt_ms": 98.56, "prompt_per_token_ms": 0.8213333333333334, "prompt_per_second": 1217.5324675324673, "predicted_n": 2, "predicted_ms": 32.22, "predicted_per_token_ms": 16.11, "predicted_per_second": 62.07324643078833}, "tps": 62.07324643078833}, {"id": "87f1c1c6-5f2c-4f6e-bc02-c9607c83d3b0", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 86, "prompt_ms": 96.058, "prompt_per_token_ms": 1.116953488372093, "prompt_per_second": 895.2924274917237, "predicted_n": 2, "predicted_ms": 31.161, "predicted_per_token_ms": 15.5805, "predicted_per_second": 64.18279259330573}, "tps": 64.18279259330573}, {"id": "95c1767e-cbdb-4510-b4b9-f62586fe81c1", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 141, "prompt_ms": 102.672, "prompt_per_token_ms": 0.7281702127659574, "prompt_per_second": 1373.3052828424497, "predicted_n": 2, "predicted_ms": 28.556, "predicted_per_token_ms": 14.278, "predicted_per_second": 70.03782042302844}, "tps": 70.03782042302844}, {"id": "229d0b37-5030-4530-9c36-63f8f09dfd36", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 63, "prompt_n": 181, "prompt_ms": 94.421, "prompt_per_token_ms": 0.5216629834254144, "prompt_per_second": 1916.9464419991314, "predicted_n": 2, "predicted_ms": 29.492, "predicted_per_token_ms": 14.746, "predicted_per_second": 67.81500067815}, "tps": 67.81500067815}, {"id": "fcd10cc9-4de3-4fd1-837f-2eccda1e430b", "answer": "ACD", "llm_answer": "ACD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 163, "prompt_ms": 99.305, "prompt_per_token_ms": 0.6092331288343559, "prompt_per_second": 1641.4077840994912, "predicted_n": 3, "predicted_ms": 60.397, "predicted_per_token_ms": 20.13233333333333, "predicted_per_second": 49.67134129178602}, "tps": 49.67134129178602}, {"id": "f449e2ce-5fcc-4308-b063-10622dba5635", "answer": "AB", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 98.221, "prompt_per_token_ms": 0.6216518987341773, "prompt_per_second": 1608.6173017990043, "predicted_n": 3, "predicted_ms": 61.528, "predicted_per_token_ms": 20.509333333333334, "predicted_per_second": 48.758288909114555}, "tps": 48.758288909114555}, {"id": "cd6619f2-4d6a-4dda-a399-bc9d097ac259", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 99, "prompt_ms": 95.166, "prompt_per_token_ms": 0.9612727272727273, "prompt_per_second": 1040.2874976357102, "predicted_n": 2, "predicted_ms": 30.58, "predicted_per_token_ms": 15.29, "predicted_per_second": 65.40222367560497}, "tps": 65.40222367560497}, {"id": "d9fd3d78-4e4f-44c0-8a3e-bf8151517b2a", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 109, "prompt_ms": 94.678, "prompt_per_token_ms": 0.8686055045871559, "prompt_per_second": 1151.2706225311056, "predicted_n": 2, "predicted_ms": 30.078, "predicted_per_token_ms": 15.039, "predicted_per_second": 66.49378283130527}, "tps": 66.49378283130527}, {"id": "ae887cc3-b90e-48df-bb34-be97fb561ab5", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 72, "prompt_ms": 91.567, "prompt_per_token_ms": 1.2717638888888887, "prompt_per_second": 786.309478305503, "predicted_n": 64, "predicted_ms": 1862.375, "predicted_per_token_ms": 29.099609375, "predicted_per_second": 34.36472246459494}, "tps": 34.36472246459494}, {"id": "2ea7927f-120c-4e82-ac85-34e163ad2084", "answer": "AB", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 132, "prompt_ms": 91.765, "prompt_per_token_ms": 0.6951893939393939, "prompt_per_second": 1438.4569280226665, "predicted_n": 3, "predicted_ms": 57.752, "predicted_per_token_ms": 19.250666666666667, "predicted_per_second": 51.946252943621}, "tps": 51.946252943621}, {"id": "8e00eb0f-13c2-4cd5-9803-106be431e6ba", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 80, "prompt_ms": 91.124, "prompt_per_token_ms": 1.13905, "prompt_per_second": 877.9245862780388, "predicted_n": 2, "predicted_ms": 29.7, "predicted_per_token_ms": 14.85, "predicted_per_second": 67.34006734006734}, "tps": 67.34006734006734}, {"id": "e5a8f7dc-5798-4b7a-9c4c-551530ec9e5e", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 165, "prompt_ms": 103.057, "prompt_per_token_ms": 0.6245878787878788, "prompt_per_second": 1601.0557264426482, "predicted_n": 2, "predicted_ms": 31.703, "predicted_per_token_ms": 15.8515, "predicted_per_second": 63.08551241207457}, "tps": 63.08551241207457}, {"id": "799e2521-0181-4114-b16f-c7f61bc8cf61", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 151, "prompt_ms": 101.256, "prompt_per_token_ms": 0.6705695364238411, "prompt_per_second": 1491.269653156356, "predicted_n": 2, "predicted_ms": 29.237, "predicted_per_token_ms": 14.6185, "predicted_per_second": 68.40647125218047}, "tps": 68.40647125218047}, {"id": "cf68ebda-c0ae-44b2-b1f1-08169d5f2682", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 145, "prompt_ms": 98.146, "prompt_per_token_ms": 0.6768689655172414, "prompt_per_second": 1477.3908259124162, "predicted_n": 2, "predicted_ms": 30.138, "predicted_per_token_ms": 15.069, "predicted_per_second": 66.36140420731303}, "tps": 66.36140420731303}, {"id": "78db462f-0597-4576-a301-fadc237eb821", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 92, "prompt_ms": 93.069, "prompt_per_token_ms": 1.0116195652173914, "prompt_per_second": 988.5138982905156, "predicted_n": 2, "predicted_ms": 31.359, "predicted_per_token_ms": 15.6795, "predicted_per_second": 63.77754392678337}, "tps": 63.77754392678337}, {"id": "470aabc4-ca83-4d95-ba91-353898151e6a", "answer": "BD", "llm_answer": "BD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 132, "prompt_ms": 97.786, "prompt_per_token_ms": 0.7408030303030303, "prompt_per_second": 1349.8864868181538, "predicted_n": 4, "predicted_ms": 93.143, "predicted_per_token_ms": 23.28575, "predicted_per_second": 42.94471940993955}, "tps": 42.94471940993955}, {"id": "44cc3d85-e444-4d43-af27-88e57ce4d2b9", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity", "NetworkSecurity"], "timings": {"cache_n": 59, "prompt_n": 195, "prompt_ms": 103.722, "prompt_per_token_ms": 0.5319076923076923, "prompt_per_second": 1880.0254526522822, "predicted_n": 2, "predicted_ms": 29.553, "predicted_per_token_ms": 14.7765, "predicted_per_second": 67.67502453219639}, "tps": 67.67502453219639}, {"id": "9ec82904-7d39-44c9-8229-70ad7f73bc99", "answer": "C", "llm_answer": "B", "score": 0, "topics": ["WebSecurity", "NetworkSecurity"], "timings": {"cache_n": 60, "prompt_n": 155, "prompt_ms": 97.782, "prompt_per_token_ms": 0.6308516129032258, "prompt_per_second": 1585.1588226872022, "predicted_n": 2, "predicted_ms": 27.607, "predicted_per_token_ms": 13.8035, "predicted_per_second": 72.4453942840584}, "tps": 72.4453942840584}, {"id": "4cc5bf48-ace1-44a5-8761-7c18abdb49a2", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 114, "prompt_ms": 91.102, "prompt_per_token_ms": 0.799140350877193, "prompt_per_second": 1251.344646659788, "predicted_n": 2, "predicted_ms": 29.901, "predicted_per_token_ms": 14.9505, "predicted_per_second": 66.88739507039898}, "tps": 66.88739507039898}, {"id": "1dc19301-c704-4118-84ce-b9eba63bd2a2", "answer": "AB", "llm_answer": "AB", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 94, "prompt_ms": 93.557, "prompt_per_token_ms": 0.9952872340425533, "prompt_per_second": 1004.7350812873435, "predicted_n": 3, "predicted_ms": 60.27, "predicted_per_token_ms": 20.09, "predicted_per_second": 49.77600796416127}, "tps": 49.77600796416127}, {"id": "e63a6303-664c-4793-8b8b-694a420cc3d9", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 98.853, "prompt_per_token_ms": 0.7432556390977443, "prompt_per_second": 1345.4321062587883, "predicted_n": 3, "predicted_ms": 59.554, "predicted_per_token_ms": 19.851333333333333, "predicted_per_second": 50.37445007891997}, "tps": 50.37445007891997}, {"id": "c584d147-b4a4-431c-b2df-2314bdcb21fa", "answer": "AC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 188, "prompt_ms": 103.115, "prompt_per_token_ms": 0.5484840425531915, "prompt_per_second": 1823.2070988701937, "predicted_n": 2, "predicted_ms": 31.62, "predicted_per_token_ms": 15.81, "predicted_per_second": 63.25110689437065}, "tps": 63.25110689437065}, {"id": "ec2f62af-94cf-4cf4-a8ae-efbc83987f7e", "answer": "ABCD", "llm_answer": "AC", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 61, "prompt_n": 182, "prompt_ms": 104.871, "prompt_per_token_ms": 0.5762142857142857, "prompt_per_second": 1735.465476633197, "predicted_n": 2, "predicted_ms": 31.484, "predicted_per_token_ms": 15.742, "predicted_per_second": 63.52432981832041}, "tps": 63.52432981832041}, {"id": "09405153-85ae-4cef-9bc5-75628f68e199", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 65, "prompt_n": 115, "prompt_ms": 98.995, "prompt_per_token_ms": 0.8608260869565217, "prompt_per_second": 1161.6748320622255, "predicted_n": 2, "predicted_ms": 30.568, "predicted_per_token_ms": 15.284, "predicted_per_second": 65.4278984559016}, "tps": 65.4278984559016}, {"id": "9d71d35c-e4af-433a-ae96-880e3b111035", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 110, "prompt_ms": 96.918, "prompt_per_token_ms": 0.8810727272727273, "prompt_per_second": 1134.9800862584864, "predicted_n": 2, "predicted_ms": 32.928, "predicted_per_token_ms": 16.464, "predicted_per_second": 60.738581146744416}, "tps": 60.738581146744416}, {"id": "dbefc6d4-3954-4426-855f-5ab18bba0be7", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 122, "prompt_ms": 99.563, "prompt_per_token_ms": 0.8160901639344262, "prompt_per_second": 1225.3548004780891, "predicted_n": 2, "predicted_ms": 32.214, "predicted_per_token_ms": 16.107, "predicted_per_second": 62.08480784751971}, "tps": 62.08480784751971}, {"id": "e1b6b9a9-2819-450b-9fc5-d7cc045457a0", "answer": "B", "llm_answer": "BC", "score": 0, "topics": ["WebSecurity", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 115, "prompt_ms": 97.624, "prompt_per_token_ms": 0.8489043478260869, "prompt_per_second": 1177.9890190936655, "predicted_n": 4, "predicted_ms": 86.369, "predicted_per_token_ms": 21.59225, "predicted_per_second": 46.312913198022436}, "tps": 46.312913198022436}, {"id": "094d6ee0-9f48-4e70-a8d8-1fca58b30373", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 129, "prompt_ms": 92.31, "prompt_per_token_ms": 0.7155813953488372, "prompt_per_second": 1397.4650633734157, "predicted_n": 2, "predicted_ms": 29.824, "predicted_per_token_ms": 14.912, "predicted_per_second": 67.06008583690986}, "tps": 67.06008583690986}, {"id": "2d966865-5f98-4862-9d46-388c6e56e83e", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["WebSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 82, "prompt_ms": 92.386, "prompt_per_token_ms": 1.1266585365853659, "prompt_per_second": 887.5803693200269, "predicted_n": 64, "predicted_ms": 1856.233, "predicted_per_token_ms": 29.003640625, "predicted_per_second": 34.4784302401692}, "tps": 34.4784302401692}, {"id": "48c80eee-166e-49c5-a0ba-56f01efbb10f", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 118, "prompt_ms": 99.504, "prompt_per_token_ms": 0.8432542372881356, "prompt_per_second": 1185.8819745939861, "predicted_n": 64, "predicted_ms": 1877.124, "predicted_per_token_ms": 29.3300625, "predicted_per_second": 34.09471084488824}, "tps": 34.09471084488824}, {"id": "e07b8b1f-1953-492a-b481-e72559fa6d5d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 146, "prompt_ms": 97.866, "prompt_per_token_ms": 0.6703150684931507, "prompt_per_second": 1491.8357754480617, "predicted_n": 2, "predicted_ms": 29.434, "predicted_per_token_ms": 14.717, "predicted_per_second": 67.94863083508866}, "tps": 67.94863083508866}, {"id": "5d742c98-1f86-41b5-83a9-0fcfb935f7c3", "answer": "ABD", "llm_answer": "BCD", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 155, "prompt_ms": 97.825, "prompt_per_token_ms": 0.6311290322580645, "prompt_per_second": 1584.4620495783286, "predicted_n": 4, "predicted_ms": 90.514, "predicted_per_token_ms": 22.6285, "predicted_per_second": 44.19205868705394}, "tps": 44.19205868705394}, {"id": "62d97b99-e176-4022-bfc2-440c2a3a8f68", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 159, "prompt_ms": 97.731, "prompt_per_token_ms": 0.6146603773584906, "prompt_per_second": 1626.9146944163062, "predicted_n": 2, "predicted_ms": 29.908, "predicted_per_token_ms": 14.954, "predicted_per_second": 66.87174000267487}, "tps": 66.87174000267487}, {"id": "8a36c3d0-dbec-4b8b-be41-7b521d18f8e7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 159, "prompt_ms": 100.145, "prompt_per_token_ms": 0.6298427672955974, "prompt_per_second": 1587.6978381347046, "predicted_n": 2, "predicted_ms": 32.187, "predicted_per_token_ms": 16.0935, "predicted_per_second": 62.13688756330196}, "tps": 62.13688756330196}, {"id": "a12e0018-6465-426d-b6b2-d5993429caa4", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["MemorySafety"], "timings": {"cache_n": 62, "prompt_n": 157, "prompt_ms": 97.676, "prompt_per_token_ms": 0.6221401273885351, "prompt_per_second": 1607.3549285392521, "predicted_n": 2, "predicted_ms": 31.34, "predicted_per_token_ms": 15.67, "predicted_per_second": 63.81620931716656}, "tps": 63.81620931716656}, {"id": "da18a277-6e52-4e25-82a5-8bd0d852a352", "answer": "C", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 61, "prompt_n": 152, "prompt_ms": 98.413, "prompt_per_token_ms": 0.647453947368421, "prompt_per_second": 1544.5113958521742, "predicted_n": 2, "predicted_ms": 29.305, "predicted_per_token_ms": 14.6525, "predicted_per_second": 68.2477392936359}, "tps": 68.2477392936359}, {"id": "a6ff87ab-6c6e-4b71-90e2-5ac43fab00b4", "answer": "D", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 176, "prompt_ms": 102.854, "prompt_per_token_ms": 0.5843977272727273, "prompt_per_second": 1711.1633966593422, "predicted_n": 2, "predicted_ms": 32.876, "predicted_per_token_ms": 16.438, "predicted_per_second": 60.83465141744738}, "tps": 60.83465141744738}, {"id": "2fce283e-4f8a-47bd-ba69-aaf029785beb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 154, "prompt_ms": 100.467, "prompt_per_token_ms": 0.6523831168831169, "prompt_per_second": 1532.8416295898155, "predicted_n": 2, "predicted_ms": 31.342, "predicted_per_token_ms": 15.671, "predicted_per_second": 63.81213706847043}, "tps": 63.81213706847043}, {"id": "92c7ba50-5c82-4976-9f5f-b681e77dcc88", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 63, "prompt_n": 111, "prompt_ms": 85.641, "prompt_per_token_ms": 0.7715405405405406, "prompt_per_second": 1296.1081724874766, "predicted_n": 2, "predicted_ms": 28.568, "predicted_per_token_ms": 14.284, "predicted_per_second": 70.00840100812097}, "tps": 70.00840100812097}, {"id": "71e44c60-c40b-41d8-a9cc-47444ff4a6c5", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 119, "prompt_ms": 98.351, "prompt_per_token_ms": 0.8264789915966386, "prompt_per_second": 1209.9521102988276, "predicted_n": 2, "predicted_ms": 29.069, "predicted_per_token_ms": 14.5345, "predicted_per_second": 68.80181636795211}, "tps": 68.80181636795211}, {"id": "61244f9b-bb5d-40f2-94a0-a6ea54f5abd4", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 149, "prompt_ms": 97.578, "prompt_per_token_ms": 0.6548859060402685, "prompt_per_second": 1526.9835413720305, "predicted_n": 3, "predicted_ms": 61.868, "predicted_per_token_ms": 20.622666666666667, "predicted_per_second": 48.4903342600375}, "tps": 48.4903342600375}, {"id": "42f4f666-d479-4749-ac95-ec0db099aabb", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 169, "prompt_ms": 100.41, "prompt_per_token_ms": 0.5941420118343195, "prompt_per_second": 1683.0992928991138, "predicted_n": 2, "predicted_ms": 31.598, "predicted_per_token_ms": 15.799, "predicted_per_second": 63.29514526235838}, "tps": 63.29514526235838}, {"id": "cdfdf2ba-977a-4852-982c-b50e8913c803", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 122, "prompt_ms": 97.21, "prompt_per_token_ms": 0.7968032786885245, "prompt_per_second": 1255.014916160889, "predicted_n": 2, "predicted_ms": 29.179, "predicted_per_token_ms": 14.5895, "predicted_per_second": 68.5424449090099}, "tps": 68.5424449090099}, {"id": "4fbb4e01-e069-4a74-9b23-70010557232e", "answer": "ABC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 123, "prompt_ms": 97.284, "prompt_per_token_ms": 0.7909268292682927, "prompt_per_second": 1264.3394597261624, "predicted_n": 2, "predicted_ms": 28.832, "predicted_per_token_ms": 14.416, "predicted_per_second": 69.36736958934517}, "tps": 69.36736958934517}, {"id": "d49176db-65dc-4603-b3ab-cc661ebec53a", "answer": "B", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 93, "prompt_ms": 93.867, "prompt_per_token_ms": 1.0093225806451613, "prompt_per_second": 990.7635271181565, "predicted_n": 2, "predicted_ms": 28.915, "predicted_per_token_ms": 14.4575, "predicted_per_second": 69.16825177243645}, "tps": 69.16825177243645}, {"id": "7f98ff62-e714-4c9c-ac67-0195b51a9bbf", "answer": "ABC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 153, "prompt_ms": 99.223, "prompt_per_token_ms": 0.648516339869281, "prompt_per_second": 1541.9811938764199, "predicted_n": 2, "predicted_ms": 28.542, "predicted_per_token_ms": 14.271, "predicted_per_second": 70.07217433956976}, "tps": 70.07217433956976}, {"id": "6421b110-3119-45b2-9f2a-84053042be05", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 132, "prompt_ms": 98.556, "prompt_per_token_ms": 0.7466363636363637, "prompt_per_second": 1339.3400706197492, "predicted_n": 2, "predicted_ms": 29.015, "predicted_per_token_ms": 14.5075, "predicted_per_second": 68.92986386351888}, "tps": 68.92986386351888}, {"id": "8f8fa77b-6cb0-4952-9c78-135731a8b672", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity", "SystemSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 133, "prompt_ms": 97.179, "prompt_per_token_ms": 0.7306691729323308, "prompt_per_second": 1368.6084442111978, "predicted_n": 2, "predicted_ms": 25.997, "predicted_per_token_ms": 12.9985, "predicted_per_second": 76.93195368696388}, "tps": 76.93195368696388}, {"id": "d481f375-329d-4985-9b59-b71887c09777", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 193, "prompt_ms": 96.041, "prompt_per_token_ms": 0.49762176165803107, "prompt_per_second": 2009.5584177590822, "predicted_n": 2, "predicted_ms": 29.709, "predicted_per_token_ms": 14.8545, "predicted_per_second": 67.31966744084285}, "tps": 67.31966744084285}, {"id": "982f78dd-1d2d-4256-b6ce-69991b9b0f26", "answer": "B", "llm_answer": "C", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 97.064, "prompt_per_token_ms": 0.9244190476190476, "prompt_per_second": 1081.7604879254925, "predicted_n": 2, "predicted_ms": 32.176, "predicted_per_token_ms": 16.088, "predicted_per_second": 62.15813028344107}, "tps": 62.15813028344107}, {"id": "bee78eaf-1989-4827-add0-52024c735220", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 123, "prompt_ms": 98.434, "prompt_per_token_ms": 0.8002764227642276, "prompt_per_second": 1249.5682386167382, "predicted_n": 2, "predicted_ms": 32.037, "predicted_per_token_ms": 16.0185, "predicted_per_second": 62.42781783562756}, "tps": 62.42781783562756}, {"id": "af9d5c93-474f-4542-9ed5-8eaffcfa2128", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 142, "prompt_ms": 98.695, "prompt_per_token_ms": 0.6950352112676056, "prompt_per_second": 1438.7760271543646, "predicted_n": 2, "predicted_ms": 29.426, "predicted_per_token_ms": 14.713, "predicted_per_second": 67.9671039217019}, "tps": 67.9671039217019}, {"id": "37184c78-86e0-4b5e-a40f-9ced28f66674", "answer": "ABCD", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 66, "prompt_n": 136, "prompt_ms": 98.111, "prompt_per_token_ms": 0.7214044117647059, "prompt_per_second": 1386.1850353171405, "predicted_n": 2, "predicted_ms": 29.678, "predicted_per_token_ms": 14.839, "predicted_per_second": 67.38998584810297}, "tps": 67.38998584810297}, {"id": "620739d0-8c54-40cd-9bca-0210d07497d0", "answer": "BD", "llm_answer": "D", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 141, "prompt_ms": 98.585, "prompt_per_token_ms": 0.6991843971631205, "prompt_per_second": 1430.2378658010855, "predicted_n": 2, "predicted_ms": 28.633, "predicted_per_token_ms": 14.3165, "predicted_per_second": 69.84947438270527}, "tps": 69.84947438270527}, {"id": "5b9c9cdf-5a75-4684-9a56-dfea6a62c619", "answer": "BC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 119, "prompt_ms": 96.366, "prompt_per_token_ms": 0.809798319327731, "prompt_per_second": 1234.8753709814664, "predicted_n": 2, "predicted_ms": 29.406, "predicted_per_token_ms": 14.703, "predicted_per_second": 68.01333061280012}, "tps": 68.01333061280012}, {"id": "bd810d4b-bb20-4739-8b75-6b4ef9241878", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 131, "prompt_ms": 100.532, "prompt_per_token_ms": 0.7674198473282442, "prompt_per_second": 1303.067679942705, "predicted_n": 2, "predicted_ms": 35.4, "predicted_per_token_ms": 17.7, "predicted_per_second": 56.49717514124294}, "tps": 56.49717514124294}, {"id": "cbd8c37c-b209-4f52-b74e-7c38c85ef57b", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 166, "prompt_ms": 99.882, "prompt_per_token_ms": 0.6016987951807229, "prompt_per_second": 1661.9611141146552, "predicted_n": 2, "predicted_ms": 29.637, "predicted_per_token_ms": 14.8185, "predicted_per_second": 67.48321355062927}, "tps": 67.48321355062927}, {"id": "2b123151-a786-4f89-b376-d8f08814bb60", "answer": "BCD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 169, "prompt_ms": 93.215, "prompt_per_token_ms": 0.5515680473372782, "prompt_per_second": 1813.0129271040066, "predicted_n": 2, "predicted_ms": 26.374, "predicted_per_token_ms": 13.187, "predicted_per_second": 75.8322590429969}, "tps": 75.8322590429969}, {"id": "4fdd543a-2174-49ac-80ea-5b1e135604e1", "answer": "ABC", "llm_answer": "C", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 65, "prompt_n": 136, "prompt_ms": 97.814, "prompt_per_token_ms": 0.7192205882352941, "prompt_per_second": 1390.3940131269553, "predicted_n": 2, "predicted_ms": 30.121, "predicted_per_token_ms": 15.0605, "predicted_per_second": 66.39885793964343}, "tps": 66.39885793964343}, {"id": "a039eb66-5d6b-4353-9dd5-0fb5a901d99e", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 80, "prompt_ms": 94.307, "prompt_per_token_ms": 1.1788375, "prompt_per_second": 848.2933398369155, "predicted_n": 2, "predicted_ms": 30.063, "predicted_per_token_ms": 15.0315, "predicted_per_second": 66.52696005056049}, "tps": 66.52696005056049}, {"id": "0dcdeb41-433d-4de6-889a-215b4a127e25", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 105, "prompt_ms": 99.023, "prompt_per_token_ms": 0.9430761904761904, "prompt_per_second": 1060.3597144097837, "predicted_n": 2, "predicted_ms": 30.686, "predicted_per_token_ms": 15.343, "predicted_per_second": 65.17630189663039}, "tps": 65.17630189663039}, {"id": "7b178d5c-fc1a-4b75-84a1-b7e48f784615", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 48, "prompt_ms": 91.402, "prompt_per_token_ms": 1.9042083333333333, "prompt_per_second": 525.1526224809086, "predicted_n": 2, "predicted_ms": 31.071, "predicted_per_token_ms": 15.5355, "predicted_per_second": 64.36870393614625}, "tps": 64.36870393614625}, {"id": "185dea13-e8a6-42fe-9180-7d81103a8529", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 58, "prompt_n": 136, "prompt_ms": 97.19, "prompt_per_token_ms": 0.7146323529411764, "prompt_per_second": 1399.3209177898962, "predicted_n": 6, "predicted_ms": 148.034, "predicted_per_token_ms": 24.67233333333333, "predicted_per_second": 40.53122931218504}, "tps": 40.53122931218504}, {"id": "b31a2b2a-15a2-4db5-aeb0-f6b644793e8c", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 146, "prompt_ms": 98.659, "prompt_per_token_ms": 0.6757465753424658, "prompt_per_second": 1479.8447176638724, "predicted_n": 2, "predicted_ms": 29.675, "predicted_per_token_ms": 14.8375, "predicted_per_second": 67.39679865206402}, "tps": 67.39679865206402}, {"id": "4a7569a8-4ca6-4615-a83f-e9815088c88c", "answer": "D", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 116, "prompt_ms": 96.582, "prompt_per_token_ms": 0.832603448275862, "prompt_per_second": 1201.0519558509868, "predicted_n": 2, "predicted_ms": 29.024, "predicted_per_token_ms": 14.512, "predicted_per_second": 68.90848952590959}, "tps": 68.90848952590959}, {"id": "fc21522c-a186-45b3-b0af-91534a0c305c", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 122, "prompt_ms": 96.84, "prompt_per_token_ms": 0.7937704918032787, "prompt_per_second": 1259.8099958694754, "predicted_n": 2, "predicted_ms": 28.872, "predicted_per_token_ms": 14.436, "predicted_per_second": 69.27126627874758}, "tps": 69.27126627874758}, {"id": "2d45e791-e6f9-4cd9-8e63-affe493a9c3b", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 85, "prompt_ms": 92.488, "prompt_per_token_ms": 1.0880941176470589, "prompt_per_second": 919.0381454891444, "predicted_n": 2, "predicted_ms": 28.633, "predicted_per_token_ms": 14.3165, "predicted_per_second": 69.84947438270527}, "tps": 69.84947438270527}, {"id": "c317c92e-d6c7-4ed9-bcff-530cedcef542", "answer": "AD", "llm_answer": "A", "score": 0, "topics": ["SoftwareSecurity", "ApplicationSecurity", "Vulnerability"], "timings": {"cache_n": 58, "prompt_n": 154, "prompt_ms": 89.887, "prompt_per_token_ms": 0.5836818181818182, "prompt_per_second": 1713.262206993225, "predicted_n": 2, "predicted_ms": 30.002, "predicted_per_token_ms": 15.001, "predicted_per_second": 66.66222251849877}, "tps": 66.66222251849877}, {"id": "675b519a-713c-444d-af34-d127ee549cf0", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 85, "prompt_ms": 93.399, "prompt_per_token_ms": 1.0988117647058824, "prompt_per_second": 910.0739836614953, "predicted_n": 2, "predicted_ms": 29.776, "predicted_per_token_ms": 14.888, "predicted_per_second": 67.16818914562063}, "tps": 67.16818914562063}, {"id": "56cc3621-aafb-47cc-a864-e3d53f8b08a7", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["WebSecurity", "PenTest"], "timings": {"cache_n": 58, "prompt_n": 156, "prompt_ms": 99.678, "prompt_per_token_ms": 0.6389615384615385, "prompt_per_second": 1565.0394269547946, "predicted_n": 2, "predicted_ms": 33.72, "predicted_per_token_ms": 16.86, "predicted_per_second": 59.31198102016607}, "tps": 59.31198102016607}, {"id": "1e3c312c-0a9c-48af-9e51-5c352cc94e7a", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SoftwareSecurity", "MemorySafety", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 201, "prompt_ms": 101.638, "prompt_per_token_ms": 0.5056616915422886, "prompt_per_second": 1977.6068006060725, "predicted_n": 2, "predicted_ms": 29.604, "predicted_per_token_ms": 14.802, "predicted_per_second": 67.55843804891231}, "tps": 67.55843804891231}, {"id": "557a081e-448c-4f7b-a1d1-9c3df4247710", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 62, "prompt_n": 166, "prompt_ms": 99.75, "prompt_per_token_ms": 0.6009036144578314, "prompt_per_second": 1664.1604010025062, "predicted_n": 2, "predicted_ms": 30.444, "predicted_per_token_ms": 15.222, "predicted_per_second": 65.6943896991197}, "tps": 65.6943896991197}, {"id": "d507c7ff-d501-4aa8-b7a6-0ca29fa32f70", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 90, "prompt_ms": 94.09, "prompt_per_token_ms": 1.0454444444444444, "prompt_per_second": 956.5309809756616, "predicted_n": 2, "predicted_ms": 30.449, "predicted_per_token_ms": 15.2245, "predicted_per_second": 65.68360208873854}, "tps": 65.68360208873854}, {"id": "a3097ed3-d514-4c35-8dd3-4edd98881ef3", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 62, "prompt_n": 105, "prompt_ms": 94.438, "prompt_per_token_ms": 0.8994095238095239, "prompt_per_second": 1111.8405726508397, "predicted_n": 64, "predicted_ms": 1867.018, "predicted_per_token_ms": 29.17215625, "predicted_per_second": 34.27926243881955}, "tps": 34.27926243881955}, {"id": "bf6e65a3-3ebf-4206-8e7a-183be3727db3", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 72, "prompt_ms": 95.585, "prompt_per_token_ms": 1.3275694444444444, "prompt_per_second": 753.2562640581682, "predicted_n": 2, "predicted_ms": 29.108, "predicted_per_token_ms": 14.554, "predicted_per_second": 68.7096330905593}, "tps": 68.7096330905593}, {"id": "1f320f90-ee9f-42d0-80f2-a7eb3d8ab3a3", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity"], "timings": {"cache_n": 61, "prompt_n": 83, "prompt_ms": 94.769, "prompt_per_token_ms": 1.1417951807228917, "prompt_per_second": 875.8138209752133, "predicted_n": 2, "predicted_ms": 29.206, "predicted_per_token_ms": 14.603, "predicted_per_second": 68.47907964116962}, "tps": 68.47907964116962}, {"id": "a488fe1b-638a-4e02-84c3-270e5740cbc7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 131, "prompt_ms": 98.301, "prompt_per_token_ms": 0.7503893129770992, "prompt_per_second": 1332.6415804518774, "predicted_n": 2, "predicted_ms": 29.851, "predicted_per_token_ms": 14.9255, "predicted_per_second": 66.99943050484072}, "tps": 66.99943050484072}, {"id": "f2479590-b8fd-4387-9716-efec881d61d0", "answer": "AB", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 148, "prompt_ms": 97.594, "prompt_per_token_ms": 0.6594189189189189, "prompt_per_second": 1516.4866692624548, "predicted_n": 2, "predicted_ms": 30.055, "predicted_per_token_ms": 15.0275, "predicted_per_second": 66.5446681084678}, "tps": 66.5446681084678}, {"id": "53e320b0-895d-408e-8865-c97e7d40aa04", "answer": "ABD", "llm_answer": "ABD", "score": 1, "topics": ["MemorySafety", "SoftwareSecurity"], "timings": {"cache_n": 61, "prompt_n": 112, "prompt_ms": 96.581, "prompt_per_token_ms": 0.8623303571428572, "prompt_per_second": 1159.6483780453711, "predicted_n": 3, "predicted_ms": 61.184, "predicted_per_token_ms": 20.394666666666666, "predicted_per_second": 49.03242677824268}, "tps": 49.03242677824268}, {"id": "824d520a-018a-42b6-b447-b63af9f37e0d", "answer": "ACD", "llm_answer": "A", "score": 0, "topics": ["PenTest", "SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 164, "prompt_ms": 99.018, "prompt_per_token_ms": 0.6037682926829269, "prompt_per_second": 1656.2645175624634, "predicted_n": 2, "predicted_ms": 30.772, "predicted_per_token_ms": 15.386, "predicted_per_second": 64.99415052645263}, "tps": 64.99415052645263}, {"id": "9ce2f2a2-7887-4067-ba18-d84c3b0c3ee0", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 93, "prompt_ms": 94.048, "prompt_per_token_ms": 1.0112688172043012, "prompt_per_second": 988.8567539979584, "predicted_n": 2, "predicted_ms": 30.7, "predicted_per_token_ms": 15.35, "predicted_per_second": 65.14657980456026}, "tps": 65.14657980456026}, {"id": "bf6c6991-9108-49fb-a530-8b7b04a12d46", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SoftwareSecurity"], "timings": {"cache_n": 59, "prompt_n": 127, "prompt_ms": 96.45, "prompt_per_token_ms": 0.7594488188976378, "prompt_per_second": 1316.7444271643337, "predicted_n": 2, "predicted_ms": 30.104, "predicted_per_token_ms": 15.052, "predicted_per_second": 66.43635397289397}, "tps": 66.43635397289397}, {"id": "b291fa81-ebe9-4d58-b3d6-bce303838e9c", "answer": "BC", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 108, "prompt_ms": 92.182, "prompt_per_token_ms": 0.8535370370370371, "prompt_per_second": 1171.5953222971948, "predicted_n": 2, "predicted_ms": 26.889, "predicted_per_token_ms": 13.4445, "predicted_per_second": 74.37985793447135}, "tps": 74.37985793447135}, {"id": "28298af7-2079-43c3-9382-bc28acee615d", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 125, "prompt_ms": 91.299, "prompt_per_token_ms": 0.730392, "prompt_per_second": 1369.1278108193956, "predicted_n": 2, "predicted_ms": 29.877, "predicted_per_token_ms": 14.9385, "predicted_per_second": 66.94112528031596}, "tps": 66.94112528031596}, {"id": "e4e39cd2-6596-49e8-8e9b-74665a611e4a", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 60, "prompt_n": 105, "prompt_ms": 97.862, "prompt_per_token_ms": 0.9320190476190475, "prompt_per_second": 1072.939445341399, "predicted_n": 2, "predicted_ms": 30.758, "predicted_per_token_ms": 15.379, "predicted_per_second": 65.02373366278692}, "tps": 65.02373366278692}, {"id": "c7781c90-20cb-4b5a-bec3-9fc0efe060e6", "answer": "", "llm_answer": "D", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 75, "prompt_ms": 99.463, "prompt_per_token_ms": 1.3261733333333332, "prompt_per_second": 754.049244442657, "predicted_n": 2, "predicted_ms": 29.337, "predicted_per_token_ms": 14.6685, "predicted_per_second": 68.17329651975321}, "tps": 68.17329651975321}, {"id": "9ebe21a7-e012-456d-a23b-f65a1e28e9dd", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 116, "prompt_ms": 98.541, "prompt_per_token_ms": 0.8494913793103448, "prompt_per_second": 1177.1749830019992, "predicted_n": 2, "predicted_ms": 29.448, "predicted_per_token_ms": 14.724, "predicted_per_second": 67.91632708503124}, "tps": 67.91632708503124}, {"id": "f0311682-4c9a-488e-bed6-b7de7cd6d92a", "answer": "AB", "llm_answer": "ACD", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 139, "prompt_ms": 97.738, "prompt_per_token_ms": 0.7031510791366906, "prompt_per_second": 1422.1694734903517, "predicted_n": 3, "predicted_ms": 57.867, "predicted_per_token_ms": 19.288999999999998, "predicted_per_second": 51.84301933744621}, "tps": 51.84301933744621}, {"id": "8f8c8cfa-4afe-4cfd-ab38-df862c4f28d7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 95.326, "prompt_per_token_ms": 0.8217758620689655, "prompt_per_second": 1216.8768226926547, "predicted_n": 2, "predicted_ms": 30.245, "predicted_per_token_ms": 15.1225, "predicted_per_second": 66.12663250123987}, "tps": 66.12663250123987}, {"id": "6cc05540-0579-495e-a750-2eab7b7aec28", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 58, "prompt_n": 171, "prompt_ms": 99.25, "prompt_per_token_ms": 0.5804093567251462, "prompt_per_second": 1722.9219143576825, "predicted_n": 2, "predicted_ms": 29.154, "predicted_per_token_ms": 14.577, "predicted_per_second": 68.60122110173562}, "tps": 68.60122110173562}, {"id": "8c12a5b7-8620-4ea1-9481-efe84d1b99da", "answer": "AD", "llm_answer": "D", "score": 0, "topics": ["NetworkSecurity", "Vulnerability"], "timings": {"cache_n": 60, "prompt_n": 124, "prompt_ms": 96.277, "prompt_per_token_ms": 0.7764274193548387, "prompt_per_second": 1287.9503931364707, "predicted_n": 2, "predicted_ms": 29.686, "predicted_per_token_ms": 14.843, "predicted_per_second": 67.37182510274204}, "tps": 67.37182510274204}, {"id": "8f13bd2d-4889-4773-85a2-85ec2d119fb3", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["ApplicationSecurity", "WebSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 257, "prompt_ms": 105.955, "prompt_per_token_ms": 0.4122762645914397, "prompt_per_second": 2425.5580199141145, "predicted_n": 2, "predicted_ms": 29.632, "predicted_per_token_ms": 14.816, "predicted_per_second": 67.49460043196544}, "tps": 67.49460043196544}, {"id": "cae780f6-1084-4539-908e-c1cb35c27c0e", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 132, "prompt_ms": 83.967, "prompt_per_token_ms": 0.6361136363636364, "prompt_per_second": 1572.0461609918182, "predicted_n": 2, "predicted_ms": 32.121, "predicted_per_token_ms": 16.0605, "predicted_per_second": 62.264562124466856}, "tps": 62.264562124466856}, {"id": "72363b25-516d-4e3d-8a5d-c59896a4a715", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity"], "timings": {"cache_n": 59, "prompt_n": 133, "prompt_ms": 96.405, "prompt_per_token_ms": 0.7248496240601504, "prompt_per_second": 1379.5964939577823, "predicted_n": 2, "predicted_ms": 30.259, "predicted_per_token_ms": 15.1295, "predicted_per_second": 66.09603754254933}, "tps": 66.09603754254933}, {"id": "6cf887e4-064f-4ca8-a88b-20485555e636", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 92, "prompt_ms": 93.558, "prompt_per_token_ms": 1.0169347826086956, "prompt_per_second": 983.3472284572138, "predicted_n": 2, "predicted_ms": 29.638, "predicted_per_token_ms": 14.819, "predicted_per_second": 67.4809366354005}, "tps": 67.4809366354005}, {"id": "6b91f5be-5700-464e-90db-c7adc842f649", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 59, "prompt_n": 130, "prompt_ms": 99.039, "prompt_per_token_ms": 0.7618384615384616, "prompt_per_second": 1312.6142226799543, "predicted_n": 2, "predicted_ms": 28.76, "predicted_per_token_ms": 14.38, "predicted_per_second": 69.54102920723226}, "tps": 69.54102920723226}, {"id": "8782efe8-09d1-4b04-a939-bad2324eef2c", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["SystemSecurity", "SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 59, "prompt_n": 165, "prompt_ms": 98.222, "prompt_per_token_ms": 0.5952848484848484, "prompt_per_second": 1679.8680540001224, "predicted_n": 2, "predicted_ms": 28.664, "predicted_per_token_ms": 14.332, "predicted_per_second": 69.77393245883337}, "tps": 69.77393245883337}, {"id": "42aa3f04-53f3-420f-90f3-6b24e60c4c8f", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 62, "prompt_n": 162, "prompt_ms": 99.808, "prompt_per_token_ms": 0.6160987654320988, "prompt_per_second": 1623.1163834562358, "predicted_n": 2, "predicted_ms": 31.275, "predicted_per_token_ms": 15.6375, "predicted_per_second": 63.948840927258196}, "tps": 63.948840927258196}, {"id": "62312ba4-3ba5-4461-8586-98db64c5e1b8", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 165, "prompt_ms": 99.99, "prompt_per_token_ms": 0.606, "prompt_per_second": 1650.1650165016501, "predicted_n": 2, "predicted_ms": 30.174, "predicted_per_token_ms": 15.087, "predicted_per_second": 66.28222973420826}, "tps": 66.28222973420826}, {"id": "a6c18124-c36d-4c31-ba09-aa0b675944de", "answer": "AC", "llm_answer": "ABCD", "score": 0, "topics": ["NetworkSecurity"], "timings": {"cache_n": 61, "prompt_n": 117, "prompt_ms": 95.771, "prompt_per_token_ms": 0.8185555555555556, "prompt_per_second": 1221.664178091489, "predicted_n": 64, "predicted_ms": 1846.186, "predicted_per_token_ms": 28.84665625, "predicted_per_second": 34.6660628994045}, "tps": 34.6660628994045}, {"id": "ae5a93c3-a4c4-4eb7-b888-304d007318da", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SoftwareSecurity", "Vulnerability"], "timings": {"cache_n": 62, "prompt_n": 97, "prompt_ms": 96.272, "prompt_per_token_ms": 0.9924948453608248, "prompt_per_second": 1007.5619079275386, "predicted_n": 2, "predicted_ms": 29.059, "predicted_per_token_ms": 14.5295, "predicted_per_second": 68.82549296259334}, "tps": 68.82549296259334}, {"id": "356b6b22-af9c-498b-8232-ac030e45a6e7", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity"], "timings": {"cache_n": 58, "prompt_n": 173, "prompt_ms": 106.83, "prompt_per_token_ms": 0.617514450867052, "prompt_per_second": 1619.3953009454274, "predicted_n": 2, "predicted_ms": 31.866, "predicted_per_token_ms": 15.933, "predicted_per_second": 62.762819305843216}, "tps": 62.762819305843216}, {"id": "d96618c7-1161-4384-83b1-a28e0f8e866b", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 158, "prompt_ms": 103.09, "prompt_per_token_ms": 0.6524683544303798, "prompt_per_second": 1532.6413813172956, "predicted_n": 2, "predicted_ms": 30.331, "predicted_per_token_ms": 15.1655, "predicted_per_second": 65.93913817546405}, "tps": 65.93913817546405}, {"id": "55f1e2e3-461d-4349-94b9-8d63734aa94d", "answer": "A", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 61, "prompt_n": 185, "prompt_ms": 102.185, "prompt_per_token_ms": 0.5523513513513514, "prompt_per_second": 1810.4418456720653, "predicted_n": 2, "predicted_ms": 31.733, "predicted_per_token_ms": 15.8665, "predicted_per_second": 63.02587212050547}, "tps": 63.02587212050547}, {"id": "4b7eb121-2a4a-4ce9-b619-ff7739bc3b8c", "answer": "AD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 360, "prompt_ms": 112.883, "prompt_per_token_ms": 0.31356388888888886, "prompt_per_second": 3189.1427407138367, "predicted_n": 2, "predicted_ms": 30.473, "predicted_per_token_ms": 15.2365, "predicted_per_second": 65.6318708364782}, "tps": 65.6318708364782}, {"id": "1e2bc5b0-4fca-4180-845f-3fafdd870632", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 63, "prompt_n": 132, "prompt_ms": 98.945, "prompt_per_token_ms": 0.7495833333333333, "prompt_per_second": 1334.0744858254586, "predicted_n": 2, "predicted_ms": 29.315, "predicted_per_token_ms": 14.6575, "predicted_per_second": 68.22445846836091}, "tps": 68.22445846836091}, {"id": "1f5ddf6f-f9da-450a-9b99-f9ecaec53051", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 62, "prompt_n": 136, "prompt_ms": 98.305, "prompt_per_token_ms": 0.7228308823529412, "prompt_per_second": 1383.449468490921, "predicted_n": 2, "predicted_ms": 29.787, "predicted_per_token_ms": 14.8935, "predicted_per_second": 67.14338469802263}, "tps": 67.14338469802263}, {"id": "c8fab6b9-9334-46eb-b81d-de87b4abde96", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 108, "prompt_ms": 93.396, "prompt_per_token_ms": 0.8647777777777778, "prompt_per_second": 1156.3664396762174, "predicted_n": 2, "predicted_ms": 25.897, "predicted_per_token_ms": 12.9485, "predicted_per_second": 77.22902266671815}, "tps": 77.22902266671815}, {"id": "c3612d50-86ec-4bf0-b9a1-604615ad9243", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["ApplicationSecurity", "SystemSecurity"], "timings": {"cache_n": 60, "prompt_n": 144, "prompt_ms": 90.161, "prompt_per_token_ms": 0.6261180555555556, "prompt_per_second": 1597.1428888322002, "predicted_n": 2, "predicted_ms": 30.393, "predicted_per_token_ms": 15.1965, "predicted_per_second": 65.80462606521239}, "tps": 65.80462606521239}, {"id": "a2050d82-0ca3-45da-969f-a2f6fd811481", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["PenTest"], "timings": {"cache_n": 61, "prompt_n": 109, "prompt_ms": 92.494, "prompt_per_token_ms": 0.8485688073394495, "prompt_per_second": 1178.4548186909421, "predicted_n": 2, "predicted_ms": 32.715, "predicted_per_token_ms": 16.3575, "predicted_per_second": 61.13403637475164}, "tps": 61.13403637475164}, {"id": "26c0e656-58a5-41f6-8015-9208e1cdacb0", "answer": "ABD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 59, "prompt_n": 187, "prompt_ms": 103.266, "prompt_per_token_ms": 0.5522245989304813, "prompt_per_second": 1810.857397400887, "predicted_n": 2, "predicted_ms": 31.469, "predicted_per_token_ms": 15.7345, "predicted_per_second": 63.55460929803934}, "tps": 63.55460929803934}, {"id": "eb3b8bfd-ec9c-41b4-a10d-730354570b29", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 74, "prompt_ms": 95.873, "prompt_per_token_ms": 1.295581081081081, "prompt_per_second": 771.8544324262305, "predicted_n": 2, "predicted_ms": 29.288, "predicted_per_token_ms": 14.644, "predicted_per_second": 68.28735318219066}, "tps": 68.28735318219066}, {"id": "756dc305-18bf-40b3-a9ef-08f9d84dd2ec", "answer": "AB", "llm_answer": "ABCD", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 58, "prompt_n": 127, "prompt_ms": 98.991, "prompt_per_token_ms": 0.7794566929133858, "prompt_per_second": 1282.9449141841176, "predicted_n": 8, "predicted_ms": 207.394, "predicted_per_token_ms": 25.92425, "predicted_per_second": 38.57392209996432}, "tps": 38.57392209996432}, {"id": "dfaae8ce-4158-49ef-84d6-e3570ccbef67", "answer": "AD", "llm_answer": "AD", "score": 1, "topics": ["PenTest", "SystemSecurity", "WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 136, "prompt_ms": 101.17, "prompt_per_token_ms": 0.7438970588235294, "prompt_per_second": 1344.2720173964613, "predicted_n": 4, "predicted_ms": 91.937, "predicted_per_token_ms": 22.98425, "predicted_per_second": 43.50805442857609}, "tps": 43.50805442857609}, {"id": "9aee8f43-cdb1-4142-a70b-ff7166837d61", "answer": "D", "llm_answer": "D", "score": 1, "topics": ["SystemSecurity", "NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 122, "prompt_ms": 98.894, "prompt_per_token_ms": 0.8106065573770492, "prompt_per_second": 1233.6441037878942, "predicted_n": 2, "predicted_ms": 30.014, "predicted_per_token_ms": 15.007, "predicted_per_second": 66.63557006730193}, "tps": 66.63557006730193}, {"id": "74edd7b5-7d19-44ca-b1aa-c7801bd360c8", "answer": "AC", "llm_answer": "AC", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 137, "prompt_ms": 101.229, "prompt_per_token_ms": 0.738897810218978, "prompt_per_second": 1353.3671181183258, "predicted_n": 2, "predicted_ms": 29.672, "predicted_per_token_ms": 14.836, "predicted_per_second": 67.40361283364788}, "tps": 67.40361283364788}, {"id": "541e1e69-6c15-4f58-b976-96969f221c45", "answer": "AC", "llm_answer": "C", "score": 0, "topics": ["SystemSecurity", "ApplicationSecurity"], "timings": {"cache_n": 60, "prompt_n": 148, "prompt_ms": 88.907, "prompt_per_token_ms": 0.600722972972973, "prompt_per_second": 1664.6608253568336, "predicted_n": 2, "predicted_ms": 27.279, "predicted_per_token_ms": 13.6395, "predicted_per_second": 73.31647054510796}, "tps": 73.31647054510796}, {"id": "1e292fe4-7a00-4afc-9e7f-6b38e24065c0", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["SystemSecurity", "PenTest"], "timings": {"cache_n": 60, "prompt_n": 130, "prompt_ms": 96.855, "prompt_per_token_ms": 0.7450384615384615, "prompt_per_second": 1342.2125858241702, "predicted_n": 2, "predicted_ms": 31.643, "predicted_per_token_ms": 15.8215, "predicted_per_second": 63.20513225673925}, "tps": 63.20513225673925}, {"id": "97ec2aa6-2db5-41b4-a943-324ebe6f64fe", "answer": "A", "llm_answer": "A", "score": 1, "topics": ["NetworkSecurity", "PenTest"], "timings": {"cache_n": 62, "prompt_n": 114, "prompt_ms": 96.841, "prompt_per_token_ms": 0.8494824561403508, "prompt_per_second": 1177.187348333867, "predicted_n": 2, "predicted_ms": 31.529, "predicted_per_token_ms": 15.7645, "predicted_per_second": 63.43366424561515}, "tps": 63.43366424561515}, {"id": "513f71f5-0808-418c-9e54-92612caf881c", "answer": "B", "llm_answer": "B", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 143, "prompt_ms": 97.85, "prompt_per_token_ms": 0.6842657342657342, "prompt_per_second": 1461.4205416453758, "predicted_n": 2, "predicted_ms": 29.932, "predicted_per_token_ms": 14.966, "predicted_per_second": 66.8181210744354}, "tps": 66.8181210744354}, {"id": "e252852e-3b32-4e82-80ed-734038ac80c9", "answer": "AC", "llm_answer": "A", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 60, "prompt_n": 161, "prompt_ms": 99.785, "prompt_per_token_ms": 0.6197826086956522, "prompt_per_second": 1613.4689582602596, "predicted_n": 2, "predicted_ms": 31.066, "predicted_per_token_ms": 15.533, "predicted_per_second": 64.37906392841049}, "tps": 64.37906392841049}, {"id": "b98abfa0-e6e5-4f4c-a84b-6c640dcd554c", "answer": "D", "llm_answer": "D", "score": 1, "topics": [], "timings": {"cache_n": 62, "prompt_n": 159, "prompt_ms": 100.196, "prompt_per_token_ms": 0.6301635220125786, "prompt_per_second": 1586.889696195457, "predicted_n": 2, "predicted_ms": 29.903, "predicted_per_token_ms": 14.9515, "predicted_per_second": 66.88292144600877}, "tps": 66.88292144600877}, {"id": "51ac5666-51c4-487e-ab88-e6c0abd22a85", "answer": "BD", "llm_answer": "B", "score": 0, "topics": ["WebSecurity"], "timings": {"cache_n": 61, "prompt_n": 153, "prompt_ms": 98.81, "prompt_per_token_ms": 0.6458169934640523, "prompt_per_second": 1548.4262726444692, "predicted_n": 2, "predicted_ms": 28.529, "predicted_per_token_ms": 14.2645, "predicted_per_second": 70.10410459532406}, "tps": 70.10410459532406}, {"id": "f9eff502-86e2-4783-a98c-b11fb760bd66", "answer": "A", "llm_answer": "D", "score": 0, "topics": ["PenTest"], "timings": {"cache_n": 59, "prompt_n": 116, "prompt_ms": 97.036, "prompt_per_token_ms": 0.8365172413793104, "prompt_per_second": 1195.432622944062, "predicted_n": 2, "predicted_ms": 29.201, "predicted_per_token_ms": 14.6005, "predicted_per_second": 68.49080510941405}, "tps": 68.49080510941405}, {"id": "b08d6547-c24b-4e53-b6ef-b425e4e71547", "answer": "C", "llm_answer": "C", "score": 1, "topics": ["WebSecurity"], "timings": {"cache_n": 59, "prompt_n": 60, "prompt_ms": 91.177, "prompt_per_token_ms": 1.5196166666666668, "prompt_per_second": 658.0606951314476, "predicted_n": 2, "predicted_ms": 28.962, "predicted_per_token_ms": 14.481, "predicted_per_second": 69.05600441958428}, "tps": 69.05600441958428}], "completed_at": 1781811525}