spec_version: 1 name: security_audit_env type: space runtime: fastapi app: server.app:app port: 8000 description: > AI Security Audit Benchmark — trains and evaluates AI agents on real-world VAPT (Vulnerability Assessment & Penetration Testing) engagements with three-tier output difficulty and compliance framework mapping. version: "1.0.0" tasks: - id: easy name: Startup Web App Audit difficulty: easy max_steps: 30 description: "2 hosts, 3 vulnerabilities. Labeled tool output with CWE/CVSS." - id: medium name: E-commerce Platform Audit difficulty: medium max_steps: 50 description: "4 hosts (2 hidden), 6 vulnerabilities. Evidence-based output. Attack chaining required." - id: hard name: Enterprise SOC2 Pre-Audit difficulty: hard max_steps: 60 description: "6 hosts (3 hidden), 10 vulnerabilities. Raw HTTP output. Honeypot trap. Progressive discovery." tools: - network_scan - service_fingerprint - web_crawl - vulnerability_scan - test_injection - test_xss - test_auth - test_config - test_crypto - check_secrets