Spaces:

kunalkachru23
/

nexus-enhanced-stage

Running

App Files Files Community

nexus-enhanced-stage / openenv.yaml

kunalkachru23

Upload folder using huggingface_hub

d3e2173 verified about 2 months ago

raw

history blame contribute delete

4.95 kB

	name: nexus-enhanced
	version: 1.0.0
	openenv_version: "0.2.3"
	description: >
	Multi-Agent Enterprise Incident Response RL Environment.
	Six agents coordinate across five simulated enterprise tools to detect, investigate,
	and resolve production incidents of escalating severity — culminating in a
	CrowdStrike-scale global failure scenario (INC007).

	author: Team Falcons
	contact: kunalkachru23@gmail.com

	# OpenEnv required fields
	observation_type: partial
	action_type: structured_dict
	num_agents: 6
	multi_agent: true
	external_state: true

	# Episode configuration
	episodes:
	min_steps: 3
	max_steps: 45
	difficulty_tiers:
	- easy
	- medium
	- hard
	- very_hard
	- nightmare

	# Agent definitions
	agents:
	- name: incident_commander
	role: coordinator
	trained: true
	training_algorithm: GRPO
	tools: [datadog, slack, jira, runbook, customer_portal]

	- name: l1_support
	role: specialist
	trained: false # Scripted during training
	tools: [slack, customer_portal]

	- name: l2_engineer
	role: specialist
	trained: false
	tools: [datadog, slack, runbook]

	- name: sre_agent
	role: specialist
	trained: false
	tools: [datadog, runbook, jira]

	- name: product_manager
	role: specialist
	trained: false
	tools: [jira, customer_portal, slack]

	- name: oversight_agent
	role: monitor
	trained: false
	tools: [datadog, slack, jira, runbook, customer_portal]

	# Tool definitions
	tools:
	- name: datadog
	type: metrics_monitoring
	rate_limited: true
	rate_limit: 3 unique metric+service combinations per episode

	- name: slack
	type: communication
	rate_limited: false

	- name: jira
	type: ticket_management
	business_rules:
	- VP approval required for revenue impact > $100k
	- Change freeze windows block non-emergency closures

	- name: runbook
	type: procedure_execution
	schema_drift: true # INC007 only: v1.0 -> v2.0 at step 18-22

	- name: customer_portal
	type: customer_communication
	schema_drift: true # INC007 only: GDPR compliance required in v2.0

	# Incident library
	tasks:
	- id: INC001
	title: Payment Service Timeout Storm
	difficulty: easy
	severity: P1
	max_steps: 20
	optimal_mttr_minutes: 18

	- id: INC002
	title: Database Connection Pool Exhaustion
	difficulty: easy
	severity: P2
	max_steps: 22
	optimal_mttr_minutes: 22

	- id: INC003
	title: Memory Leak Under Load
	difficulty: medium
	severity: P2
	max_steps: 28
	optimal_mttr_minutes: 28
	demo: true # Used in 90-second demo

	- id: INC004
	title: Third-Party API Failure Masked by Retry Logic
	difficulty: hard
	severity: P1
	max_steps: 30
	optimal_mttr_minutes: 35

	- id: INC005
	title: Config Deployment Error with Conflicting Signals
	difficulty: hard
	severity: P1
	max_steps: 30
	optimal_mttr_minutes: 25

	- id: INC006
	title: Multi-Region Cascade — Global CDN Misrouting
	difficulty: very_hard
	severity: P1
	max_steps: 35
	optimal_mttr_minutes: 42

	- id: INC007
	title: CrowdStrike-Scale Global Infrastructure Failure
	difficulty: nightmare
	severity: P1
	max_steps: 45
	optimal_mttr_minutes: 90
	schema_drift_step: 18
	qa_demo: true # Used in Q&A coalition debate demo

	# Reward model
	reward:
	type: multi_dimensional_sparse
	dimensions:
	mttr: 0.30
	diagnosis: 0.25
	customer: 0.20
	coordination: 0.15
	oversight: 0.05
	bonus:
	name: reasoning_depth
	type: uncapped
	sponsor: Mercor

	# Sub-theme coverage
	sub_themes:
	- sponsor: Scaler AI Labs
	theme: Multi-App Enterprise RL
	mechanic: 5 enterprise tools with business rule nuances
	- sponsor: Fleet AI
	theme: Scalable Oversight
	mechanic: OversightAgent with monitor + analyse + explain
	- sponsor: Halluminate
	theme: Multi-Actor Environments
	mechanic: 6 agents + coalition debate + partial observability
	- sponsor: Scale AI
	theme: Non-Code Business (IT)
	mechanic: IT incident management domain
	- sponsor: Mercor
	theme: Token-Scaled Rewards
	mechanic: Uncapped reasoning depth bonus on postmortem quality
	- sponsor: Snorkel AI
	theme: Simulated Experts-in-Loop
	mechanic: Rotating expert review board (4 criteria, episode % 4)
	- sponsor: Patronus AI
	theme: Schema Drift
	mechanic: RunBook and CustomerPortal field renames in INC007 at step 18-22

	# API server
	server:
	framework: FastAPI
	default_port: 7860
	health_endpoint: /health
	reset_endpoint: /reset
	step_endpoint: /step/{session_id}
	state_endpoint: /state/{session_id}
	demo_endpoint: /demo/run/{incident_id}

	# Training
	training:
	algorithm: GRPO
	framework: HuggingFace TRL + Unsloth
	model: Qwen2.5-7B-Instruct
	quantization: 4-bit LoRA
	notebook: notebooks/grpo_colab_v2.ipynb
	trained_agent: incident_commander
	scripted_agents: [l1_support, l2_engineer, sre_agent, product_manager]