name: nexus-enhanced version: 1.0.0 openenv_version: "0.2.3" description: > Multi-Agent Enterprise Incident Response RL Environment. Six agents coordinate across five simulated enterprise tools to detect, investigate, and resolve production incidents of escalating severity — culminating in a CrowdStrike-scale global failure scenario (INC007). author: Team Falcons contact: kunalkachru23@gmail.com # OpenEnv required fields observation_type: partial action_type: structured_dict num_agents: 6 multi_agent: true external_state: true # Episode configuration episodes: min_steps: 3 max_steps: 45 difficulty_tiers: - easy - medium - hard - very_hard - nightmare # Agent definitions agents: - name: incident_commander role: coordinator trained: true training_algorithm: GRPO tools: [datadog, slack, jira, runbook, customer_portal] - name: l1_support role: specialist trained: false # Scripted during training tools: [slack, customer_portal] - name: l2_engineer role: specialist trained: false tools: [datadog, slack, runbook] - name: sre_agent role: specialist trained: false tools: [datadog, runbook, jira] - name: product_manager role: specialist trained: false tools: [jira, customer_portal, slack] - name: oversight_agent role: monitor trained: false tools: [datadog, slack, jira, runbook, customer_portal] # Tool definitions tools: - name: datadog type: metrics_monitoring rate_limited: true rate_limit: 3 unique metric+service combinations per episode - name: slack type: communication rate_limited: false - name: jira type: ticket_management business_rules: - VP approval required for revenue impact > $100k - Change freeze windows block non-emergency closures - name: runbook type: procedure_execution schema_drift: true # INC007 only: v1.0 -> v2.0 at step 18-22 - name: customer_portal type: customer_communication schema_drift: true # INC007 only: GDPR compliance required in v2.0 # Incident library tasks: - id: INC001 title: Payment Service Timeout Storm difficulty: easy severity: P1 max_steps: 20 optimal_mttr_minutes: 18 - id: INC002 title: Database Connection Pool Exhaustion difficulty: easy severity: P2 max_steps: 22 optimal_mttr_minutes: 22 - id: INC003 title: Memory Leak Under Load difficulty: medium severity: P2 max_steps: 28 optimal_mttr_minutes: 28 demo: true # Used in 90-second demo - id: INC004 title: Third-Party API Failure Masked by Retry Logic difficulty: hard severity: P1 max_steps: 30 optimal_mttr_minutes: 35 - id: INC005 title: Config Deployment Error with Conflicting Signals difficulty: hard severity: P1 max_steps: 30 optimal_mttr_minutes: 25 - id: INC006 title: Multi-Region Cascade — Global CDN Misrouting difficulty: very_hard severity: P1 max_steps: 35 optimal_mttr_minutes: 42 - id: INC007 title: CrowdStrike-Scale Global Infrastructure Failure difficulty: nightmare severity: P1 max_steps: 45 optimal_mttr_minutes: 90 schema_drift_step: 18 qa_demo: true # Used in Q&A coalition debate demo # Reward model reward: type: multi_dimensional_sparse dimensions: mttr: 0.30 diagnosis: 0.25 customer: 0.20 coordination: 0.15 oversight: 0.05 bonus: name: reasoning_depth type: uncapped sponsor: Mercor # Sub-theme coverage sub_themes: - sponsor: Scaler AI Labs theme: Multi-App Enterprise RL mechanic: 5 enterprise tools with business rule nuances - sponsor: Fleet AI theme: Scalable Oversight mechanic: OversightAgent with monitor + analyse + explain - sponsor: Halluminate theme: Multi-Actor Environments mechanic: 6 agents + coalition debate + partial observability - sponsor: Scale AI theme: Non-Code Business (IT) mechanic: IT incident management domain - sponsor: Mercor theme: Token-Scaled Rewards mechanic: Uncapped reasoning depth bonus on postmortem quality - sponsor: Snorkel AI theme: Simulated Experts-in-Loop mechanic: Rotating expert review board (4 criteria, episode % 4) - sponsor: Patronus AI theme: Schema Drift mechanic: RunBook and CustomerPortal field renames in INC007 at step 18-22 # API server server: framework: FastAPI default_port: 7860 health_endpoint: /health reset_endpoint: /reset step_endpoint: /step/{session_id} state_endpoint: /state/{session_id} demo_endpoint: /demo/run/{incident_id} # Training training: algorithm: GRPO framework: HuggingFace TRL + Unsloth model: Qwen2.5-7B-Instruct quantization: 4-bit LoRA notebook: notebooks/grpo_colab_v2.ipynb trained_agent: incident_commander scripted_agents: [l1_support, l2_engineer, sre_agent, product_manager]