| name: nexus-enhanced |
| version: 1.0.0 |
| openenv_version: "0.2.3" |
| description: > |
| Multi-Agent Enterprise Incident Response RL Environment. |
| Six agents coordinate across five simulated enterprise tools to detect, investigate, |
| and resolve production incidents of escalating severity — culminating in a |
| CrowdStrike-scale global failure scenario (INC007). |
| |
| author: Team Falcons |
| contact: kunalkachru23@gmail.com |
|
|
| |
| observation_type: partial |
| action_type: structured_dict |
| num_agents: 6 |
| multi_agent: true |
| external_state: true |
|
|
| |
| episodes: |
| min_steps: 3 |
| max_steps: 45 |
| difficulty_tiers: |
| - easy |
| - medium |
| - hard |
| - very_hard |
| - nightmare |
|
|
| |
| agents: |
| - name: incident_commander |
| role: coordinator |
| trained: true |
| training_algorithm: GRPO |
| tools: [datadog, slack, jira, runbook, customer_portal] |
|
|
| - name: l1_support |
| role: specialist |
| trained: false |
| tools: [slack, customer_portal] |
|
|
| - name: l2_engineer |
| role: specialist |
| trained: false |
| tools: [datadog, slack, runbook] |
|
|
| - name: sre_agent |
| role: specialist |
| trained: false |
| tools: [datadog, runbook, jira] |
|
|
| - name: product_manager |
| role: specialist |
| trained: false |
| tools: [jira, customer_portal, slack] |
|
|
| - name: oversight_agent |
| role: monitor |
| trained: false |
| tools: [datadog, slack, jira, runbook, customer_portal] |
|
|
| |
| tools: |
| - name: datadog |
| type: metrics_monitoring |
| rate_limited: true |
| rate_limit: 3 unique metric+service combinations per episode |
|
|
| - name: slack |
| type: communication |
| rate_limited: false |
|
|
| - name: jira |
| type: ticket_management |
| business_rules: |
| - VP approval required for revenue impact > $100k |
| - Change freeze windows block non-emergency closures |
|
|
| - name: runbook |
| type: procedure_execution |
| schema_drift: true |
|
|
| - name: customer_portal |
| type: customer_communication |
| schema_drift: true |
|
|
| |
| tasks: |
| - id: INC001 |
| title: Payment Service Timeout Storm |
| difficulty: easy |
| severity: P1 |
| max_steps: 20 |
| optimal_mttr_minutes: 18 |
|
|
| - id: INC002 |
| title: Database Connection Pool Exhaustion |
| difficulty: easy |
| severity: P2 |
| max_steps: 22 |
| optimal_mttr_minutes: 22 |
|
|
| - id: INC003 |
| title: Memory Leak Under Load |
| difficulty: medium |
| severity: P2 |
| max_steps: 28 |
| optimal_mttr_minutes: 28 |
| demo: true |
|
|
| - id: INC004 |
| title: Third-Party API Failure Masked by Retry Logic |
| difficulty: hard |
| severity: P1 |
| max_steps: 30 |
| optimal_mttr_minutes: 35 |
|
|
| - id: INC005 |
| title: Config Deployment Error with Conflicting Signals |
| difficulty: hard |
| severity: P1 |
| max_steps: 30 |
| optimal_mttr_minutes: 25 |
|
|
| - id: INC006 |
| title: Multi-Region Cascade — Global CDN Misrouting |
| difficulty: very_hard |
| severity: P1 |
| max_steps: 35 |
| optimal_mttr_minutes: 42 |
|
|
| - id: INC007 |
| title: CrowdStrike-Scale Global Infrastructure Failure |
| difficulty: nightmare |
| severity: P1 |
| max_steps: 45 |
| optimal_mttr_minutes: 90 |
| schema_drift_step: 18 |
| qa_demo: true |
|
|
| |
| reward: |
| type: multi_dimensional_sparse |
| dimensions: |
| mttr: 0.30 |
| diagnosis: 0.25 |
| customer: 0.20 |
| coordination: 0.15 |
| oversight: 0.05 |
| bonus: |
| name: reasoning_depth |
| type: uncapped |
| sponsor: Mercor |
|
|
| |
| sub_themes: |
| - sponsor: Scaler AI Labs |
| theme: Multi-App Enterprise RL |
| mechanic: 5 enterprise tools with business rule nuances |
| - sponsor: Fleet AI |
| theme: Scalable Oversight |
| mechanic: OversightAgent with monitor + analyse + explain |
| - sponsor: Halluminate |
| theme: Multi-Actor Environments |
| mechanic: 6 agents + coalition debate + partial observability |
| - sponsor: Scale AI |
| theme: Non-Code Business (IT) |
| mechanic: IT incident management domain |
| - sponsor: Mercor |
| theme: Token-Scaled Rewards |
| mechanic: Uncapped reasoning depth bonus on postmortem quality |
| - sponsor: Snorkel AI |
| theme: Simulated Experts-in-Loop |
| mechanic: Rotating expert review board (4 criteria, episode % 4) |
| - sponsor: Patronus AI |
| theme: Schema Drift |
| mechanic: RunBook and CustomerPortal field renames in INC007 at step 18-22 |
|
|
| |
| server: |
| framework: FastAPI |
| default_port: 7860 |
| health_endpoint: /health |
| reset_endpoint: /reset |
| step_endpoint: /step/{session_id} |
| state_endpoint: /state/{session_id} |
| demo_endpoint: /demo/run/{incident_id} |
|
|
| |
| training: |
| algorithm: GRPO |
| framework: HuggingFace TRL + Unsloth |
| model: Qwen2.5-7B-Instruct |
| quantization: 4-bit LoRA |
| notebook: notebooks/grpo_colab_v2.ipynb |
| trained_agent: incident_commander |
| scripted_agents: [l1_support, l2_engineer, sre_agent, product_manager] |
|
|