marama_route/cli.py · AbteeXAILab/marama-route at main

Publish expanded LumynaX product platform package

711af7c verified 10 days ago

127 kB

	from __future__ import annotations

	import argparse
	import importlib.metadata as importlib_metadata
	import importlib.util
	import json
	import os
	import platform as py_platform
	import shutil
	import subprocess
	import sys
	import tempfile
	import time
	from collections.abc import Sequence
	from pathlib import Path
	from typing import Any

	from .compat import build_compatibility_matrix, model_runtime_compatibility
	from .download import (
	default_cache_root,
	list_pulled_models,
	load_chat_session,
	local_model_files,
	model_cache_dir,
	pull_model,
	resolve_model,
	run_pulled_model,
	)
	from .gateway import build_models_response, route_chat_payload
	from .ops import (
	cache_report,
	default_state_root,
	delete_alias,
	diff_model_registry,
	estimate_model_download,
	export_audit_receipts,
	export_session_markdown,
	hardware_recommendations,
	inspect_hardware,
	list_audit_receipts,
	load_aliases,
	load_session,
	prune_cache,
	remove_cached_model,
	resolve_alias,
	save_session,
	set_alias,
	set_favorite,
	show_audit_receipt,
	verify_cache,
	write_audit_receipt,
	)
	from .platform import (
	build_agent_bridge_config,
	build_opencode_provider_config,
	build_registry_analytics,
	catalog_models,
	compare_models,
	normalize_agent_target,
	recommend_model,
	render_hpe_apptainer_definition,
	render_hpe_gateway_config,
	render_hpe_readme,
	render_hpe_slurm_script,
	route_scenario_matrix,
	)
	from .registry import RoutingRequest, load_model_registry
	from .router import SovereignModelRouter
	from .ui import default_registry_path

	CHAT_COMMANDS = {"/bye", "/exit", "/q", "/quit"}
	_HF_TOKEN_ENV_NAMES = ("HF_TOKEN", "HUGGING_FACE_HUB_TOKEN", "HUGGINGFACE_HUB_TOKEN")
	MODEL_PICKER_COMMANDS = {
	"/all",
	"/back",
	"/categories",
	"/catalog",
	"/families",
	"/family",
	"/help",
	"/h",
	"/hardware",
	"/local",
	"/menu",
	"/models",
	"/next",
	"/prev",
	"/pull",
	"/runnable",
	"/search",
	"/switch",
	"/use",
	"/vllm",
	"/nim",
	"/nem",
	"/nemo",
	"/recommended",
	"/recommend",
	"?",
	}
	MODEL_PICKER_PAGE_SIZE = 12
	_DEPLOYMENT_USABLE_STATUSES = {"supported", "candidate", "experimental"}
	_DEPLOYMENT_PATHWAY_STATUSES = _DEPLOYMENT_USABLE_STATUSES \| {"convert_required"}
	_DEPLOYMENT_TARGET_LABELS = {
	"vllm": "vLLM",
	"nvidia_nim": "NVIDIA NIM",
	"nvidia_nemo": "NVIDIA NeMo/NEM",
	}
	_AGENT_TARGET_CHOICES = ("generic", "claude-code", "codex", "continue", "opencode", "litellm", "tabby", "hpe", "hpe-slurm")
	_AGENT_SETUP_DEFAULT_TARGETS = ("claude-code", "codex", "continue", "opencode", "litellm", "tabby")
	_AGENT_CANONICAL_TARGETS = {"generic", "claude-code", "codex", "continue", "opencode", "litellm", "tabby", "hpe-slurm"}


	class _ExitConversation(Exception):
	pass


	def _load_json_mapping(path: Path) -> dict[str, Any]:
	payload = json.loads(path.read_text(encoding="utf-8-sig"))
	if not isinstance(payload, dict):
	raise ValueError(f"Expected mapping in {path}")
	return payload


	def _registry_path(args: argparse.Namespace) -> Path:
	return args.registry or default_registry_path()


	def _route(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	payload = _load_json_mapping(args.request)
	decision = SovereignModelRouter(models).route(RoutingRequest.from_payload(payload))
	print(json.dumps(decision.to_dict(), indent=2, sort_keys=True))
	return 0 if decision.selected_model is not None else 2


	def _models(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	filtered = _filter_models_command(models, args)
	if args.limit > 0:
	filtered = filtered[: args.limit]
	if args.format == "table":
	_print_models_table(filtered)
	return 0
	response = build_models_response(tuple(filtered))
	response["count"] = len(filtered)
	response["total_count"] = len(models)
	print(json.dumps(response, indent=2, sort_keys=True))
	return 0


	def _chat_dry_run(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	payload = _load_json_mapping(args.request)
	result = route_chat_payload(payload, models)
	print(json.dumps(result, indent=2, sort_keys=True))
	selected = result["route_decision"]["selected_model"]
	return 0 if selected is not None else 2


	def _catalog(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	result = catalog_models(
	models,
	{
	"search": args.search,
	"task_type": args.task,
	"runtime": args.runtime,
	"family": args.family,
	"modality": args.modality,
	"jurisdiction": args.jurisdiction,
	"min_context_tokens": args.min_context_tokens,
	"requires_json": args.requires_json,
	"requires_tools": args.requires_tools,
	"requires_local": args.requires_local,
	"limit": args.limit,
	},
	)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _filter_models_command(models: tuple[Any, ...], args: argparse.Namespace) -> list[Any]:
	search = str(args.search or "").strip().lower()
	family = str(args.family or "").strip().lower()
	runtime = str(args.runtime or "").strip().lower()
	tag = str(args.tag or "").strip().lower()
	filtered: list[Any] = []
	for model in models:
	tags = {str(item).lower() for item in model.tags}
	searchable = " ".join(
	[
	model.model_id,
	model.repo_id,
	model.family,
	model.runtime,
	" ".join(sorted(tags)),
	],
	).lower()
	if search and search not in searchable:
	continue
	if family and family not in model.family.lower() and family not in tags:
	continue
	if runtime and runtime not in model.runtime.lower():
	continue
	if tag and tag not in tags:
	continue
	filtered.append(model)
	return filtered


	def _print_models_table(models: list[Any]) -> None:
	if not models:
	print("No matching LumynaX models.")
	return
	print(f"{'model':42} {'runtime':22} {'family':14} {'tags'}")
	print("-" * 110)
	for model in models:
	tags = ", ".join(list(model.tags)[:6])
	print(f"{model.model_id[:42]:42} {model.runtime[:22]:22} {model.family[:14]:14} {tags}")


	def _compat(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	if args.model:
	model = _resolve_cli_model(models, args.model)
	result = {
	"ok": True,
	"model": {
	"model_id": model.model_id,
	"repo_id": model.repo_id,
	"runtime": model.runtime,
	"family": model.family,
	"modalities": list(model.modalities),
	"primary_artifact": model.primary_artifact,
	"tags": list(model.tags),
	},
	"runtime_compatibility": model_runtime_compatibility(model),
	}
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0
	matrix = build_compatibility_matrix(
	models,
	target=args.target,
	status=args.status,
	limit=args.limit,
	)
	if args.format == "json":
	print(json.dumps(matrix, indent=2, sort_keys=True))
	else:
	_print_compatibility_table(matrix)
	return 0


	def _categories(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	result = _category_summary(models, limit=args.limit)
	if args.format == "json":
	print(json.dumps(result, indent=2, sort_keys=True))
	else:
	_print_category_summary(result)
	return 0


	def _compare(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model_ids = [item.strip() for value in args.model for item in value.split(",") if item.strip()]
	request = _load_json_mapping(args.request) if args.request else None
	result = compare_models(models, model_ids, request)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result["ok"] else 2


	def _matrix(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	result = route_scenario_matrix(models)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result["ok"] or args.allow_blocked_exit_zero else 2


	def _analytics(args: argparse.Namespace) -> int:
	print(json.dumps(build_registry_analytics(load_model_registry(_registry_path(args))), indent=2, sort_keys=True))
	return 0


	def _opencode_config(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	result = build_opencode_provider_config(models, base_url=args.base_url)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _recommend(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	prompt = " ".join(args.prompt).strip() if args.prompt else args.prompt_text
	modalities = tuple(item.strip().lower() for item in args.modality.split(",") if item.strip())
	result = recommend_model(
	models,
	prompt=prompt,
	task_type=args.task,
	jurisdiction=args.jurisdiction,
	data_sensitivity=args.sensitivity,
	min_context_tokens=args.min_context_tokens,
	requires_local=args.requires_local,
	requires_json=args.requires_json,
	requires_tools=args.requires_tools,
	modalities=modalities or ("text",),
	)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result["ok"] else 2


	def _doctor(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	cache_root = args.cache_dir or default_cache_root()
	pulled = list_pulled_models(cache_root)
	hardware = inspect_hardware(cache_root) if args.hardware else None
	checks = {
	"python": {
	"ok": sys.version_info >= (3, 11),
	"version": py_platform.python_version(),
	"executable": sys.executable,
	},
	"package": {
	"ok": True,
	"version": _installed_version(),
	},
	"registry": {
	"ok": bool(models),
	"count": len(models),
	"path": str(_registry_path(args)),
	"chat_capable": sum(1 for model in models if _is_chat_runnable(model)),
	},
	"cache": {
	"ok": True,
	"path": str(cache_root),
	"pulled_count": pulled["count"],
	},
	"cache_writable": _writable_dir(cache_root),
	"huggingface_hub": {
	"ok": importlib.util.find_spec("huggingface_hub") is not None,
	"hf_cli": shutil.which("hf") or shutil.which("huggingface-cli") or "",
	"token_env_present": any(os.getenv(name) for name in _HF_TOKEN_ENV_NAMES),
	"needed_for": "pull, update-registry, and private HF access",
	},
	"llama_cpp": {
	"ok": importlib.util.find_spec("llama_cpp") is not None,
	"needed_for": "local GGUF chat and run",
	"severity": "warning",
	},
	"transformers": {
	"ok": importlib.util.find_spec("transformers") is not None,
	"needed_for": "offline Transformers text-generation snapshots",
	"severity": "warning",
	},
	"tokenizer_extras": {
	"ok": any(importlib.util.find_spec(name) is not None for name in ("sentencepiece", "tiktoken", "tokenizers")),
	"needed_for": "slow-tokenizer conversion and some Transformers snapshots",
	"severity": "warning",
	},
	"git": {
	"ok": shutil.which("git") is not None,
	"executable": shutil.which("git") or "",
	"severity": "warning",
	},
	"node": {
	"ok": shutil.which("node") is not None,
	"executable": shutil.which("node") or "",
	"needed_for": "npm wrapper usage",
	"severity": "warning",
	},
	"npm": {
	"ok": shutil.which("npm") is not None,
	"executable": shutil.which("npm") or "",
	"needed_for": "npm wrapper usage",
	"severity": "warning",
	},
	}
	if args.online:
	checks["huggingface_hub"]["whoami"] = _hf_whoami_status()
	blocking = [name for name, item in checks.items() if not item.get("ok") and item.get("severity") != "warning"]
	result = {
	"ok": not blocking,
	"product": "LumynaX MaramaRoute",
	"checks": checks,
	"blocking_checks": blocking,
	"hardware": hardware,
	"next_commands": {
	"setup": "MaramaRoute setup --all-targets --hpe",
	"install_runtime": "python -m pip install llama-cpp-python",
	"install_transformers_runtime": "python -m pip install transformers torch sentencepiece tiktoken tokenizers",
	"choose_model": "MaramaRoute chat",
	"pull_small_model": "MaramaRoute pull qwen25-05b",
	"run_gateway": "MaramaRoute serve --live-local --port 8787",
	"agent_doctor": "MaramaRoute agent doctor --target claude-code",
	"hpe_init": "MaramaRoute hpe init qwen25-05b --backend auto",
	},
	}
	if args.json:
	print(json.dumps(result, indent=2, sort_keys=True))
	else:
	_print_doctor(result)
	return 0 if result["ok"] or args.allow_warnings else 2


	def _agent_config(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	result = build_agent_bridge_config(
	models,
	target=args.target,
	base_url=args.base_url,
	host=args.host,
	port=args.port,
	cache_dir=args.cache_dir,
	model_id=args.model or "",
	)
	text = json.dumps(result, indent=2, sort_keys=True)
	if args.output:
	args.output.parent.mkdir(parents=True, exist_ok=True)
	args.output.write_text(text + "\n", encoding="utf-8")
	else:
	print(text)
	return 0


	def _hpe_job(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model = _resolve_cli_model(models, args.model) if args.model else _recommended_model(models)
	script = render_hpe_slurm_script(
	model_id=model.model_id,
	repo_id=model.repo_id,
	model_runtime=model.runtime,
	mode=args.mode,
	prompt=args.prompt_text,
	port=args.port,
	backend=args.backend,
	backend_port=args.backend_port,
	backend_base_url=args.backend_base_url,
	backend_model=args.backend_model,
	backend_command=args.backend_command,
	api_key_env=args.api_key_env,
	vllm_args=args.vllm_args,
	cache_dir=args.cache_dir,
	job_name=args.job_name,
	partition=args.partition,
	time_limit=args.time,
	cpus=args.cpus,
	memory=args.memory,
	gpus=args.gpus,
	)
	if args.output:
	args.output.parent.mkdir(parents=True, exist_ok=True)
	args.output.write_text(script, encoding="utf-8")
	else:
	print(script, end="")
	return 0


	def _init(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model = _resolve_cli_model(models, args.model) if args.model else _recommended_model(models)
	state_root = args.state_dir or default_state_root()
	state_root.mkdir(parents=True, exist_ok=True)
	self_test = _init_self_test(models, model, args) if args.self_test else {"enabled": False}
	config = {
	"product": "LumynaX MaramaRoute",
	"version": _installed_version(),
	"state_dir": str(state_root),
	"cache_dir": str(args.cache_dir),
	"default_model": model.model_id,
	"gateway": {
	"host": args.host,
	"port": args.port,
	"base_url": f"http://{args.host}:{args.port}/v1",
	"live_local": args.live_local,
	},
	"next_commands": {
	"doctor": "MaramaRoute doctor --hardware",
	"pull": f"MaramaRoute pull {model.model_id}",
	"chat": f"MaramaRoute chat {model.model_id}",
	"serve": f"MaramaRoute serve --host {args.host} --port {args.port} --live-local",
	},
	"self_test": self_test,
	}
	config_path = state_root / "marama-route.json"
	config_path.write_text(json.dumps(config, indent=2, sort_keys=True), encoding="utf-8")
	set_alias("default", model.model_id, state_root)
	set_favorite(model.model_id, state_root)
	artifacts = {"config": str(config_path), "state_dir": str(state_root)}
	if args.agent:
	agent_result = build_agent_bridge_config(
	models,
	target=args.agent,
	base_url=f"http://{args.host}:{args.port}/v1",
	host=args.host,
	port=args.port,
	cache_dir=args.cache_dir,
	model_id=model.model_id,
	)
	agent_path = state_root / f"{args.agent}.agent.json"
	agent_path.write_text(json.dumps(agent_result, indent=2, sort_keys=True), encoding="utf-8")
	artifacts["agent_config"] = str(agent_path)
	if args.hpe:
	hpe_path = state_root / "marama-route.slurm"
	hpe_path.write_text(render_hpe_slurm_script(model_id=model.model_id), encoding="utf-8")
	artifacts["hpe_job"] = str(hpe_path)
	if args.pull:
	pull_model(models, model.model_id, cache_root=args.cache_dir)
	artifacts["pulled_model"] = model.model_id
	result = {"ok": True, "model_id": model.model_id, "artifacts": artifacts, "config": config, "self_test": self_test}
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _init_self_test(models: tuple[Any, ...], model: Any, args: argparse.Namespace) -> dict[str, Any]:
	checks = {
	"registry_loaded": {"ok": bool(models), "count": len(models), "path": str(_registry_path(args))},
	"selected_model": {"ok": True, "model_id": model.model_id, "runtime": model.runtime},
	"cache_writable": _writable_dir(args.cache_dir),
	"runtime_available": {
	"ok": importlib.util.find_spec("llama_cpp") is not None,
	"package": "llama-cpp-python",
	"needed_for": "local GGUF chat and run",
	"severity": "warning",
	},
	"huggingface_hub": {
	"ok": importlib.util.find_spec("huggingface_hub") is not None,
	"needed_for": "pull and update-registry",
	},
	}
	blocking = [item for item in checks.values() if not item.get("ok") and item.get("severity") != "warning"]
	return {"enabled": True, "ok": not blocking, "checks": checks}


	def _setup(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model = _resolve_cli_model(models, args.model) if args.model else _recommended_model(models)
	state_root = args.state_dir or default_state_root()
	state_root.mkdir(parents=True, exist_ok=True)
	targets = _expand_agent_targets(args.target, all_targets=args.all_targets, include_hpe=args.hpe)
	hardware = inspect_hardware(args.cache_dir)
	self_test = _init_self_test(models, model, args)
	base_url = f"http://{args.host}:{args.port}/v1"
	config = {
	"product": "LumynaX MaramaRoute",
	"mode": "production_setup",
	"version": _installed_version(),
	"state_dir": str(state_root),
	"cache_dir": str(args.cache_dir),
	"default_model": model.model_id,
	"default_repo": model.repo_id,
	"default_runtime": model.runtime,
	"hardware": hardware,
	"agent_targets": targets,
	"gateway": {
	"host": args.host,
	"port": args.port,
	"base_url": base_url,
	"live_local": True,
	},
	"next_commands": {
	"doctor": "MaramaRoute doctor --hardware",
	"pull": f"MaramaRoute pull {model.model_id}",
	"chat": f"MaramaRoute chat {model.model_id}",
	"serve": f"MaramaRoute serve --host {args.host} --port {args.port} --live-local",
	"agent_doctor": "MaramaRoute agent doctor --target claude-code",
	"hpe": f"MaramaRoute hpe init {model.model_id} --backend {args.backend}",
	"compat_vllm": "MaramaRoute compat --target vllm --status usable",
	"compat_nim": "MaramaRoute compat --target nim --status usable",
	"compat_nemo": "MaramaRoute compat --target nemo --status pathway",
	},
	"self_test": self_test,
	}
	config_path = state_root / "marama-route.json"
	config_path.write_text(json.dumps(config, indent=2, sort_keys=True), encoding="utf-8")
	set_alias("default", model.model_id, state_root)
	set_favorite(model.model_id, state_root)

	artifacts: dict[str, Any] = {
	"config": str(config_path),
	"state_dir": str(state_root),
	"aliases": str(state_root / "aliases.json"),
	}
	agent_artifacts: dict[str, Any] = {}
	for target in targets:
	if target == "hpe-slurm":
	continue
	agent_config = build_agent_bridge_config(
	models,
	target=target,
	base_url=base_url,
	host=args.host,
	port=args.port,
	cache_dir=args.cache_dir,
	model_id=model.model_id,
	)
	target_dir = state_root / "agents" / target
	agent_artifacts[target] = _write_agent_workspace_files(target_dir, agent_config)
	if agent_artifacts:
	artifacts["agents"] = agent_artifacts

	if "hpe-slurm" in targets:
	hpe_result = _write_hpe_init_bundle(
	state_root / "hpe",
	model,
	port=args.port,
	backend=args.backend,
	backend_port=args.backend_port,
	backend_base_url=args.backend_base_url,
	backend_model=args.backend_model,
	backend_command=args.backend_command,
	api_key_env=args.api_key_env,
	vllm_args=args.vllm_args,
	gpus=args.gpus,
	memory=args.memory,
	time_limit=args.time,
	partition=args.partition,
	)
	artifacts["hpe"] = hpe_result["written"]

	if args.pull:
	pulled = pull_model(models, model.model_id, cache_root=args.cache_dir)
	artifacts["pulled_model"] = pulled.to_dict()

	result = {
	"ok": self_test.get("ok", True),
	"model_id": model.model_id,
	"repo_id": model.repo_id,
	"runtime": model.runtime,
	"targets": targets,
	"artifacts": artifacts,
	"config": config,
	}
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result["ok"] or args.allow_warnings else 2


	def _expand_agent_targets(raw_targets: Sequence[str], *, all_targets: bool, include_hpe: bool) -> list[str]:
	selected: list[str] = []
	raw_values: list[str] = []
	for value in raw_targets:
	raw_values.extend(part.strip() for part in value.split(",") if part.strip())
	if all_targets or not raw_values:
	raw_values.extend(_AGENT_SETUP_DEFAULT_TARGETS)
	if include_hpe:
	raw_values.append("hpe-slurm")
	for value in raw_values:
	if value.lower() == "all":
	expanded = list(_AGENT_SETUP_DEFAULT_TARGETS)
	else:
	expanded = [value]
	for item in expanded:
	normalized = normalize_agent_target(item)
	if normalized not in _AGENT_CANONICAL_TARGETS:
	choices = ", ".join(_AGENT_TARGET_CHOICES)
	raise ValueError(f"Unsupported agent target: {item}. Choices: {choices}")
	if normalized not in selected:
	selected.append(normalized)
	return selected


	def _write_agent_workspace_files(target_dir: Path, config: dict[str, Any]) -> list[str]:
	target_dir.mkdir(parents=True, exist_ok=True)
	config_path = target_dir / "marama-route.agent.json"
	config_path.write_text(json.dumps(config, indent=2, sort_keys=True), encoding="utf-8")
	written = [str(config_path)]
	workspace_files = config.get("workspace_files", {})
	if isinstance(workspace_files, dict):
	for name, content in workspace_files.items():
	output_path = target_dir / Path(str(name)).name
	if isinstance(content, list):
	text = "\n".join(str(line) for line in content).rstrip() + "\n"
	else:
	text = str(content).rstrip() + "\n"
	output_path.write_text(text, encoding="utf-8")
	written.append(str(output_path))
	return written


	def _write_hpe_init_bundle(
	output_dir: Path,
	model: Any,
	*,
	port: int,
	backend: str,
	backend_port: int = 8000,
	backend_base_url: str = "",
	backend_model: str = "",
	backend_command: str = "",
	api_key_env: str = "",
	vllm_args: str = "",
	gpus: int = 0,
	memory: str = "32G",
	time_limit: str = "02:00:00",
	partition: str = "",
	) -> dict[str, Any]:
	output_dir.mkdir(parents=True, exist_ok=True)
	backend_base = backend_base_url or f"http://127.0.0.1:{backend_port}/v1"
	gateway_config = render_hpe_gateway_config(
	model_id=model.model_id,
	backend=backend,
	model_runtime=model.runtime,
	backend_base_url=backend_base,
	backend_model=backend_model or model.model_id,
	api_key_env=api_key_env,
	cache_dir="$MARAMA_ROUTE_CACHE",
	)
	script = output_dir / "marama-route.slurm"
	script.write_text(
	render_hpe_slurm_script(
	model_id=model.model_id,
	repo_id=model.repo_id,
	model_runtime=model.runtime,
	port=port,
	backend=backend,
	backend_port=backend_port,
	backend_base_url=backend_base_url,
	backend_model=backend_model,
	backend_command=backend_command,
	api_key_env=api_key_env,
	vllm_args=vllm_args,
	gpus=gpus,
	memory=memory,
	time_limit=time_limit,
	partition=partition,
	),
	encoding="utf-8",
	)
	gateway = output_dir / "gateway.hpe.json"
	gateway.write_text(json.dumps(gateway_config, indent=2, sort_keys=True), encoding="utf-8")
	env = output_dir / "marama-route.env"
	env.write_text(
	"\n".join(
	(
	"MARAMA_ROUTE_CACHE=${SCRATCH:-$HOME}/marama-route/models",
	f"MARAMA_ROUTE_PORT={port}",
	f"MARAMA_BACKEND_PORT={backend_port}",
	f"MARAMA_BACKEND_BASE_URL={backend_base}",
	f"MARAMA_BACKEND_MODEL={backend_model or model.model_id}",
	f"MARAMA_BACKEND={backend}",
	"",
	),
	),
	encoding="utf-8",
	)
	definition = output_dir / "marama-route.def"
	definition.write_text(render_hpe_apptainer_definition(backend=backend), encoding="utf-8")
	readme = output_dir / "README.hpe.md"
	readme.write_text(render_hpe_readme(model_id=model.model_id, port=port, backend=backend), encoding="utf-8")
	return {
	"ok": True,
	"written": [str(script), str(gateway), str(env), str(definition), str(readme)],
	"model_id": model.model_id,
	"repo_id": model.repo_id,
	"backend": backend,
	"gateway_config": gateway_config,
	}


	def _writable_dir(path: Path) -> dict[str, Any]:
	try:
	path.mkdir(parents=True, exist_ok=True)
	probe = path / ".marama-route-write-test"
	probe.write_text("ok", encoding="utf-8")
	probe.unlink(missing_ok=True)
	return {"ok": True, "path": str(path)}
	except OSError as exc:
	return {"ok": False, "path": str(path), "error": str(exc)}


	def _model_ops(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	action = args.model_action
	if action in {"ls", "list", "disk"}:
	result = cache_report(models, args.cache_dir)
	elif action == "verify":
	result = verify_cache(models, args.cache_dir, deep=args.deep, write_hashes=args.write_hashes)
	elif action == "rm":
	if not args.model:
	raise ValueError("rm requires <model>")
	result = remove_cached_model(models, args.model, cache_root=args.cache_dir, dry_run=args.dry_run)
	elif action == "prune":
	result = prune_cache(models, cache_root=args.cache_dir, dry_run=args.dry_run)
	elif action == "estimate":
	if not args.model:
	raise ValueError("estimate requires <model>")
	model = _resolve_cli_model(models, args.model)
	result = estimate_model_download(model, args.cache_dir, remote=args.remote_sizes, all_files=args.all_files)
	elif action == "ps":
	result = {
	"ok": True,
	"running_process_registry": "not_persistent",
	"note": "Use `MaramaRoute serve --live-local` to start the local gateway.",
	"cache": cache_report(models, args.cache_dir),
	}
	else:
	raise ValueError(f"Unsupported model action: {action}")
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result.get("ok", False) else 2


	def _hardware(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	hardware = inspect_hardware(args.cache_dir)
	result = {"ok": True, "hardware": hardware}
	if args.recommend:
	result["recommendations"] = hardware_recommendations(models, hardware, limit=args.limit)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _alias(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	if args.alias_action == "list":
	result = {"ok": True, **load_aliases(args.state_dir)}
	elif args.alias_action == "set":
	if not args.name or not args.model:
	raise ValueError("alias set requires <name> <model>")
	model = _resolve_cli_model(models, args.model)
	result = set_alias(args.name, model.model_id, args.state_dir)
	elif args.alias_action == "rm":
	if not args.name:
	raise ValueError("alias rm requires <name>")
	result = delete_alias(args.name, args.state_dir)
	elif args.alias_action == "favorite":
	model_ref = args.model or args.name
	if not model_ref:
	raise ValueError("favorite requires <model>")
	model = _resolve_cli_model(models, model_ref)
	result = set_favorite(model.model_id, args.state_dir)
	else:
	raise ValueError(f"Unsupported alias action: {args.alias_action}")
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _bench(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model = _resolve_cli_model(models, args.model)
	prompt = args.prompt_text or "Say kia ora in one short sentence."
	start = time.perf_counter()
	try:
	result = run_pulled_model(
	models,
	model.model_id,
	prompt=prompt,
	cache_root=args.cache_dir,
	pull=args.pull,
	max_tokens=args.max_tokens,
	dry_run=args.dry_run,
	)
	except (FileNotFoundError, RuntimeError, ValueError) as exc:
	print(str(exc), file=sys.stderr)
	return 2
	elapsed = max(time.perf_counter() - start, 0.0001)
	text = str(result.get("response") or "")
	payload = {
	"ok": True,
	"model_id": model.model_id,
	"dry_run": args.dry_run,
	"elapsed_seconds": round(elapsed, 4),
	"characters": len(text),
	"characters_per_second": round(len(text) / elapsed, 2),
	"result": result,
	}
	print(json.dumps(payload, indent=2, sort_keys=True))
	return 0


	def _eval(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	scenarios = route_scenario_matrix(models)
	analytics = build_registry_analytics(models)
	result = {
	"ok": scenarios["ok"],
	"suite": args.suite,
	"analytics": analytics,
	"matrix": scenarios,
	"note": "This eval checks deterministic routing coverage. Use `bench` for local runtime timing.",
	}
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result["ok"] or args.allow_blocked_exit_zero else 2


	def _audit(args: argparse.Namespace) -> int:
	state_root = args.state_dir
	if args.audit_action == "list":
	result = list_audit_receipts(state_root)
	elif args.audit_action == "show":
	if not args.receipt:
	raise ValueError("audit show requires <receipt-id>")
	result = show_audit_receipt(args.receipt, state_root)
	elif args.audit_action == "export":
	path = export_audit_receipts(args.output, state_root)
	result = {"ok": True, "path": str(path)}
	elif args.audit_action == "record":
	if args.request is None:
	raise ValueError("audit record requires --request")
	models = load_model_registry(_registry_path(args))
	payload = _load_json_mapping(args.request)
	decision = SovereignModelRouter(models).route(RoutingRequest.from_payload(payload))
	route_result = {"route_decision": decision.to_dict()}
	from .platform import route_receipt

	receipt = route_receipt(payload, route_result)
	path = write_audit_receipt(receipt, state_root)
	result = {"ok": True, "path": str(path), "receipt": receipt}
	else:
	raise ValueError(f"Unsupported audit action: {args.audit_action}")
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _agent_init(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model_ref = args.model or args.model_option
	model = _resolve_cli_model(models, model_ref) if model_ref else _recommended_model(models)
	target_dir = args.output_dir.resolve()
	config = build_agent_bridge_config(
	models,
	target=args.target,
	base_url=args.base_url,
	host=args.host,
	port=args.port,
	cache_dir=args.cache_dir,
	model_id=model.model_id,
	)
	written = _write_agent_workspace_files(target_dir, config)
	ok = bool(config.get("ok", True))
	print(
	json.dumps(
	{"ok": ok, "target": config.get("target", args.target), "model_id": model.model_id, "written": written},
	indent=2,
	sort_keys=True,
	),
	)
	return 0 if ok else 2


	def _agent_doctor(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	config = build_agent_bridge_config(
	models,
	target=args.target,
	base_url=args.base_url,
	host=args.host,
	port=args.port,
	cache_dir=args.cache_dir,
	model_id=args.model or "",
	)
	health = {"ok": True, "checked": False, "base_url": args.base_url}
	if args.health_check:
	health = _gateway_health(args.base_url)
	checks = {
	"registry": {"ok": bool(models), "count": len(models), "path": str(_registry_path(args))},
	"target": {"ok": config.get("target") in _AGENT_CANONICAL_TARGETS, "value": config.get("target")},
	"cache": {"ok": True, "path": str(args.cache_dir or default_cache_root())},
	"gateway": health,
	}
	result = {
	"ok": all(bool(item.get("ok")) for item in checks.values()),
	"product": "LumynaX MaramaRoute",
	"mode": "agent_doctor",
	"config": config,
	"checks": checks,
	"next_commands": config.get("commands", {}),
	}
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result["ok"] or args.allow_warnings else 2


	def _gateway_health(base_url: str) -> dict[str, Any]:
	from urllib.error import URLError
	from urllib.request import urlopen

	root = base_url.removesuffix("/v1").rstrip("/")
	url = f"{root}/health"
	try:
	with urlopen(url, timeout=2) as response:
	return {"ok": 200 <= response.status < 300, "checked": True, "url": url, "status": response.status}
	except URLError as exc:
	return {"ok": False, "checked": True, "url": url, "error": str(exc)}


	def _hpe(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	model = _resolve_cli_model(models, args.model) if args.model else _recommended_model(models)
	output_dir = args.output_dir.resolve()
	output_dir.mkdir(parents=True, exist_ok=True)
	gateway_config = render_hpe_gateway_config(
	model_id=model.model_id,
	backend=args.backend,
	model_runtime=model.runtime,
	backend_base_url=args.backend_base_url or f"http://127.0.0.1:{args.backend_port}/v1",
	backend_model=args.backend_model or model.model_id,
	api_key_env=args.api_key_env,
	cache_dir="$MARAMA_ROUTE_CACHE",
	)
	if args.hpe_action == "init":
	result = _write_hpe_init_bundle(
	output_dir,
	model,
	port=args.port,
	backend=args.backend,
	backend_port=args.backend_port,
	backend_base_url=args.backend_base_url,
	backend_model=args.backend_model,
	backend_command=args.backend_command,
	api_key_env=args.api_key_env,
	vllm_args=args.vllm_args,
	gpus=args.gpus,
	memory=args.memory,
	time_limit=args.time,
	partition=args.partition,
	)
	elif args.hpe_action == "submit":
	script = output_dir / "marama-route.slurm"
	script.write_text(
	render_hpe_slurm_script(
	model_id=model.model_id,
	repo_id=model.repo_id,
	model_runtime=model.runtime,
	port=args.port,
	backend=args.backend,
	backend_port=args.backend_port,
	backend_base_url=args.backend_base_url,
	backend_model=args.backend_model,
	backend_command=args.backend_command,
	api_key_env=args.api_key_env,
	vllm_args=args.vllm_args,
	gpus=args.gpus,
	memory=args.memory,
	time_limit=args.time,
	partition=args.partition,
	),
	encoding="utf-8",
	)
	(output_dir / "gateway.hpe.json").write_text(json.dumps(gateway_config, indent=2, sort_keys=True), encoding="utf-8")
	command = ["sbatch", str(script)]
	if args.execute and shutil.which("sbatch"):
	completed = subprocess_run(command)
	result = {"ok": completed["returncode"] == 0, "command": command, "result": completed}
	else:
	result = {"ok": True, "command": " ".join(command), "execute": False, "script": str(script)}
	elif args.hpe_action == "plan":
	result = {
	"ok": True,
	"model_id": model.model_id,
	"repo_id": model.repo_id,
	"runtime": model.runtime,
	"backend": args.backend,
	"gateway_config": gateway_config,
	"commands": {
	"init": f"MaramaRoute hpe init {model.model_id} --backend {args.backend}",
	"submit": f"MaramaRoute hpe submit {model.model_id} --backend {args.backend}",
	"serve_api": f"MaramaRoute serve --host 0.0.0.0 --port {args.port} --config gateway.hpe.json",
	"tunnel": f"ssh -N -L {args.port}:127.0.0.1:{args.port} <user>@<login-node>",
	},
	}
	elif args.hpe_action == "tunnel":
	result = {
	"ok": True,
	"command": f"ssh -N -L {args.port}:127.0.0.1:{args.port} <user>@<login-node>",
	"base_url": f"http://127.0.0.1:{args.port}/v1",
	}
	elif args.hpe_action == "status":
	command = ["squeue", "-u", os.getenv("USER") or os.getenv("USERNAME") or ""]
	if shutil.which("squeue"):
	result = {"ok": True, "command": command, "result": subprocess_run(command)}
	else:
	result = {"ok": True, "scheduler": "slurm", "status": "squeue_not_available_on_this_machine"}
	else:
	raise ValueError(f"Unsupported HPE action: {args.hpe_action}")
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0 if result.get("ok", False) else 2


	def _update_registry(args: argparse.Namespace) -> int:
	output = args.output or (default_state_root() / "lumynax_model_registry.json")
	if args.dry_run:
	result = {"ok": True, "dry_run": True, "output": str(output), "repo_id": args.repo_id}
	if args.diff:
	try:
	payload, source = _download_registry_payload(args.repo_id, args.filename)
	result["source"] = source
	result["diff"] = diff_model_registry(load_model_registry(_registry_path(args)), payload)
	except Exception as exc:
	print(str(exc), file=sys.stderr)
	return 2
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0
	try:
	payload, downloaded = _download_registry_payload(args.repo_id, args.filename)
	except Exception as exc:
	print(str(exc), file=sys.stderr)
	return 2
	output.parent.mkdir(parents=True, exist_ok=True)
	shutil.copy2(downloaded, output)
	result = {"ok": True, "output": str(output), "source": downloaded}
	if args.diff:
	result["diff"] = diff_model_registry(load_model_registry(_registry_path(args)), payload)
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _download_registry_payload(repo_id: str, filename: str) -> tuple[dict[str, Any] \| list[Any], str]:
	try:
	from huggingface_hub import hf_hub_download # type: ignore[import-not-found]
	except ImportError as exc:
	raise RuntimeError("huggingface-hub is required for update-registry.") from exc
	downloaded = hf_hub_download(repo_id=repo_id, filename=filename)
	payload = json.loads(Path(downloaded).read_text(encoding="utf-8-sig"))
	return payload, downloaded


	def _pull(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	targets = _pull_targets(models, args)
	if not targets:
	print("No matching LumynaX models found for the pull request.", file=sys.stderr)
	return 2
	if args.estimate:
	estimates = [
	estimate_model_download(model, args.cache_dir, remote=args.remote_sizes, all_files=args.all_files)
	for model in targets
	]
	output = estimates[0] if len(estimates) == 1 else {"ok": True, "count": len(estimates), "models": estimates}
	print(json.dumps(output, indent=2, sort_keys=True))
	return 0
	if len(targets) > 1 and not args.dry_run and not args.yes:
	if not sys.stdin.isatty():
	print("Batch pull needs --yes in non-interactive mode.", file=sys.stderr)
	return 2
	answer = input(f"Pull {len(targets)} models into {args.cache_dir}? [y/N] ").strip().lower()
	if answer not in {"y", "yes"}:
	print("Pull cancelled.")
	return 2

	pulled = []
	for model in targets:
	result = pull_model(
	models,
	model.model_id,
	cache_root=args.cache_dir,
	all_files=args.all_files,
	force=args.force,
	dry_run=args.dry_run,
	)
	pulled.append(result.to_dict())
	output = pulled[0] if len(pulled) == 1 else {"ok": True, "count": len(pulled), "models": pulled}
	if args.verify and not args.dry_run:
	output = {
	"ok": True,
	"pull": output,
	"verify": verify_cache(models, args.cache_dir, deep=args.deep_verify, write_hashes=args.write_hashes),
	}
	print(json.dumps(output, indent=2, sort_keys=True))
	return 0


	def _local(args: argparse.Namespace) -> int:
	print(json.dumps(list_pulled_models(args.cache_dir), indent=2, sort_keys=True))
	return 0


	def _run(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	if args.model is None:
	return _conversation(args, models)
	prompt = " ".join(args.prompt).strip() if args.prompt else args.prompt_text
	if not prompt:
	return _conversation(args, models)
	model = _resolve_cli_model(models, args.model)
	if args.stream and not args.dry_run:
	return _stream_once(args, models, model, prompt)
	try:
	result = run_pulled_model(
	models,
	model.model_id,
	prompt=prompt or "Say kia ora to Aotearoa in one short sentence.",
	cache_root=args.cache_dir,
	pull=args.pull,
	max_tokens=args.max_tokens,
	temperature=args.temperature,
	context_tokens=args.context_tokens,
	threads=args.threads,
	dry_run=args.dry_run,
	)
	except (FileNotFoundError, RuntimeError, ValueError) as exc:
	print(str(exc), file=sys.stderr)
	return 2
	print(json.dumps(result, indent=2, sort_keys=True))
	return 0


	def _chat(args: argparse.Namespace) -> int:
	models = load_model_registry(_registry_path(args))
	prompt = " ".join(args.prompt).strip() if args.prompt else args.prompt_text
	if prompt:
	model = _resolve_cli_model(models, args.model) if args.model else _recommended_model(models)
	if args.stream and not args.dry_run:
	return _stream_once(args, models, model, prompt)
	try:
	result = run_pulled_model(
	models,
	model.model_id,
	prompt=prompt,
	cache_root=args.cache_dir,
	pull=args.pull,
	max_tokens=args.max_tokens,
	temperature=args.temperature,
	context_tokens=args.context_tokens,
	threads=args.threads,
	dry_run=args.dry_run,
	)
	except (FileNotFoundError, RuntimeError, ValueError) as exc:
	print(str(exc), file=sys.stderr)
	return 2
	if args.dry_run:
	print(json.dumps(result, indent=2, sort_keys=True))
	else:
	print(result["response"])
	return 0
	return _conversation(args, models)


	def _conversation(args: argparse.Namespace, models: tuple[Any, ...]) -> int:
	if args.dry_run:
	model = _resolve_cli_model(models, args.model) if args.model else _recommended_model(models)
	payload = {
	"ok": True,
	"mode": "conversation_dry_run",
	"model_id": model.model_id,
	"repo_id": model.repo_id,
	"cache_dir": str(model_cache_dir(model, args.cache_dir)),
	"commands": [
	"/models",
	"/all",
	"/hardware",
	"/recommended",
	"/categories",
	"/vllm",
	"/nim",
	"/nemo",
	"/search <text>",
	"/switch <text>",
	"/pull [text]",
	"/local",
	"/settings",
	"/clear",
	"/history",
	"/save <name>",
	"/load <name>",
	"/export <name> <file.md>",
	"/info",
	"/exit",
	],
	}
	print(json.dumps(payload, indent=2, sort_keys=True))
	return 0
	if not sys.stdin.isatty():
	print(
	"MaramaRoute chat needs an interactive terminal or a prompt.\n"
	"Try: MaramaRoute chat lumynax-coder-qwen25-05b-instruct-gguf \"Say kia ora\"",
	file=sys.stderr,
	)
	return 2

	_print_startup_guide(models, args.cache_dir, animated=True)
	try:
	model = _resolve_cli_model(models, args.model) if args.model else _prompt_for_model(models, args.cache_dir)
	except _ExitConversation:
	return 0
	session = None

	while True:
	try:
	if session is None:
	try:
	model = _ensure_model_ready(
	models,
	model,
	cache_dir=args.cache_dir,
	auto_pull=args.pull,
	)
	session = load_chat_session(
	models,
	model.model_id,
	cache_root=args.cache_dir,
	pull=False,
	max_tokens=args.max_tokens,
	temperature=args.temperature,
	context_tokens=args.context_tokens,
	threads=args.threads,
	)
	print(_loaded_model_message(model))
	except FileNotFoundError as exc:
	print(str(exc))
	model = _prompt_for_model(models, args.cache_dir)
	continue
	except RuntimeError as exc:
	print(str(exc), file=sys.stderr)
	return 2
	prompt = input("\n>>> ").strip()
	except (KeyboardInterrupt, EOFError):
	print()
	return 0

	if not prompt:
	continue
	command = prompt.lower()
	if command in CHAT_COMMANDS:
	return 0
	if command in {"/help", "/h", "?"}:
	_print_chat_help()
	continue
	if command == "/models":
	try:
	model = _prompt_for_model(models, args.cache_dir)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command in {"/hardware", "/recommended-hardware"}:
	try:
	model = _prompt_for_model(models, args.cache_dir, hardware_only=True)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command in {"/recommended", "/recommend", "/best"}:
	try:
	model = _prompt_for_model(models, args.cache_dir, show_menu=False)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command == "/all":
	try:
	model = _prompt_for_model(models, args.cache_dir, include_all=True)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command in {"/vllm", "/nim", "/nemo", "/nem"}:
	try:
	model = _prompt_for_model(
	models,
	args.cache_dir,
	compatibility_target=_deployment_target_from_command(command),
	)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command == "/catalog":
	choices = _conversation_choices(models, include_all=True)
	_print_model_picker_header(
	models,
	choices,
	query="",
	include_all=True,
	hardware_only=False,
	compatibility_target="",
	offset=0,
	shown=min(len(choices), MODEL_PICKER_PAGE_SIZE),
	hardware=None,
	)
	continue
	if command in {"/families", "/categories"}:
	_print_model_categories(models)
	continue
	if command == "/local":
	_print_local_models(args.cache_dir)
	continue
	if command == "/clear":
	if session is not None:
	session.history.clear()
	print("Conversation history cleared.")
	continue
	if command == "/history":
	for index, (user, assistant) in enumerate(session.history if session is not None else [], 1):
	print(f"{index}. user: {user}")
	print(f" assistant: {assistant}")
	if session is not None and not session.history:
	print("No conversation history yet.")
	continue
	if command.startswith("/save "):
	name = prompt.split(maxsplit=1)[1].strip()
	path = save_session(name, model.model_id, session.history if session is not None else [])
	print(f"Saved session to {path}")
	continue
	if command.startswith("/load "):
	name = prompt.split(maxsplit=1)[1].strip()
	try:
	payload = load_session(name)
	model = _resolve_cli_model(models, str(payload.get("model_id") or model.model_id))
	session = load_chat_session(
	models,
	model.model_id,
	cache_root=args.cache_dir,
	pull=False,
	max_tokens=args.max_tokens,
	temperature=args.temperature,
	context_tokens=args.context_tokens,
	threads=args.threads,
	)
	session.history = [
	(str(item.get("user") or ""), str(item.get("assistant") or ""))
	for item in payload.get("history", [])
	if isinstance(item, dict)
	]
	except (OSError, RuntimeError, ValueError, FileNotFoundError) as exc:
	print(str(exc))
	continue
	print(f"Loaded session {name}.")
	continue
	if command.startswith("/export "):
	parts = prompt.split(maxsplit=2)
	if len(parts) < 3:
	print("Use /export <session-name> <file.md>")
	continue
	try:
	path = export_session_markdown(parts[1], Path(parts[2]))
	except (OSError, ValueError, FileNotFoundError) as exc:
	print(str(exc))
	continue
	print(f"Exported session to {path}")
	continue
	if command == "/settings":
	_print_chat_settings(model, args)
	continue
	if command == "/switch":
	try:
	model = _prompt_for_model(models, args.cache_dir)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command.startswith("/switch ") or command.startswith("/use "):
	query = prompt.split(maxsplit=1)[1].strip()
	try:
	model = _resolve_cli_model(models, query)
	except ValueError as exc:
	print(str(exc))
	continue
	session = None
	print(f"Switched to {model.model_id}.")
	continue
	if command.startswith("/search ") or command.startswith("/family "):
	query = prompt.split(maxsplit=1)[1].strip()
	try:
	model = _prompt_for_model(models, args.cache_dir, initial_query=query, include_all=True)
	except _ExitConversation:
	return 0
	session = None
	continue
	if command == "/pull":
	pulled = pull_model(models, model.model_id, cache_root=args.cache_dir)
	print(f"Pulled {model.model_id} to {pulled.cache_dir}")
	session = None
	continue
	if command.startswith("/pull "):
	query = prompt.split(maxsplit=1)[1].strip()
	try:
	pulled_model = _resolve_cli_model(models, query)
	pulled = pull_model(models, pulled_model.model_id, cache_root=args.cache_dir)
	except (RuntimeError, ValueError) as exc:
	print(str(exc))
	continue
	print(f"Pulled {pulled_model.model_id} to {pulled.cache_dir}")
	model = pulled_model
	session = None
	continue
	if command == "/info":
	print(json.dumps(model.to_dict(), indent=2, sort_keys=True))
	continue

	try:
	if args.stream:
	for chunk in session.send_stream(prompt):
	print(chunk, end="", flush=True)
	print()
	continue
	response = session.send(prompt)
	except RuntimeError as exc:
	print(str(exc), file=sys.stderr)
	return 2
	print(response)


	def _stream_once(args: argparse.Namespace, models: tuple[Any, ...], model: Any, prompt: str) -> int:
	try:
	session = load_chat_session(
	models,
	model.model_id,
	cache_root=args.cache_dir,
	pull=args.pull,
	max_tokens=args.max_tokens,
	temperature=args.temperature,
	context_tokens=args.context_tokens,
	threads=args.threads,
	)
	for chunk in session.send_stream(prompt):
	print(chunk, end="", flush=True)
	print()
	except (FileNotFoundError, RuntimeError, ValueError) as exc:
	print(str(exc), file=sys.stderr)
	return 2
	return 0


	def _recommended_model(models: tuple[Any, ...]) -> Any:
	choices = _conversation_choices(models, limit=1)
	if not choices:
	raise ValueError("No runnable LumynaX models found in the registry.")
	return choices[0]


	def _resolve_cli_model(models: tuple[Any, ...], model_ref: str) -> Any:
	model_ref = resolve_alias(model_ref)
	try:
	return resolve_model(models, model_ref)
	except ValueError:
	choices = _conversation_choices(models, search=model_ref, limit=1)
	if choices:
	return choices[0]
	raise


	def _pull_targets(models: tuple[Any, ...], args: argparse.Namespace) -> list[Any]:
	if args.model:
	return [_resolve_cli_model(models, args.model)]

	has_filters = bool(args.search or args.family or args.runtime or args.limit)
	if not has_filters:
	if sys.stdin.isatty():
	return [_prompt_for_model(models, args.cache_dir)]
	return []

	choices = _conversation_choices(
	models,
	search=args.search,
	include_all=not args.chat_only,
	limit=None,
	)
	family = args.family.strip().lower()
	runtime = args.runtime.strip().lower()
	filtered = []
	for model in choices:
	if args.chat_only and not _is_chat_runnable(model):
	continue
	if family and family not in model.family.lower() and family not in " ".join(model.tags):
	continue
	if runtime and runtime != model.runtime.lower():
	continue
	filtered.append(model)
	return filtered[: args.limit] if args.limit > 0 else filtered


	def _conversation_choices(
	models: tuple[Any, ...],
	*,
	search: str = "",
	include_all: bool = False,
	compatibility_target: str = "",
	limit: int \| None = None,
	) -> list[Any]:
	filtered: list[Any] = []
	query = search.strip().lower()
	for model in models:
	haystack = " ".join((model.model_id, model.repo_id, model.family, " ".join(model.tags))).lower()
	if compatibility_target:
	if not _matches_deployment_target(model, compatibility_target):
	continue
	elif not include_all and not _is_chat_runnable(model):
	continue
	if query and query not in haystack:
	continue
	filtered.append(model)
	ranked = sorted(
	filtered,
	key=lambda item: (
	"coder" in item.model_id.lower(),
	item.sovereignty_tier,
	-item.quality_rank,
	item.context_tokens,
	item.model_id,
	),
	reverse=True,
	)
	return ranked if limit is None else ranked[:limit]


	def _hardware_choices(
	models: tuple[Any, ...],
	cache_dir: Path \| None,
	*,
	search: str = "",
	include_all: bool = False,
	) -> tuple[list[Any], dict[str, Any]]:
	hardware = inspect_hardware(cache_dir)
	recommendations = hardware_recommendations(models, hardware, limit=len(models))
	index = {model.model_id: model for model in models}
	ranked = [index[row["model_id"]] for row in recommendations["models"] if row["model_id"] in index]
	query = search.strip().lower()
	filtered = []
	for model in ranked:
	haystack = " ".join((model.model_id, model.repo_id, model.family, " ".join(model.tags))).lower()
	if not include_all and not _is_chat_runnable(model):
	continue
	if query and query not in haystack:
	continue
	filtered.append(model)
	return filtered, hardware


	def _prompt_for_model(
	models: tuple[Any, ...],
	cache_dir: Path \| None,
	*,
	initial_query: str = "",
	include_all: bool = False,
	hardware_only: bool = False,
	compatibility_target: str = "",
	show_menu: bool = True,
	) -> Any:
	query = initial_query
	menu_open = show_menu and not initial_query and not include_all and not hardware_only and not compatibility_target
	offset = 0
	last_hardware: dict[str, Any] \| None = None
	while True:
	if menu_open:
	_print_model_picker_menu(models, cache_dir)
	raw = input(f"{_prompt_marker()} Choose option 1-9, search text, /help, or /exit: ").strip()
	if not raw:
	raw = "1"
	lowered = raw.lower()
	if lowered in CHAT_COMMANDS:
	raise _ExitConversation
	if lowered in {"/help", "/h", "?"}:
	_print_model_picker_help()
	continue
	if raw == "1" or lowered in {"/hardware", "/recommended-hardware"}:
	hardware_only = True
	include_all = False
	compatibility_target = ""
	query = ""
	offset = 0
	menu_open = False
	continue
	if raw == "2" or lowered in {"/recommended", "/recommend", "/best"}:
	hardware_only = False
	include_all = False
	compatibility_target = ""
	query = ""
	offset = 0
	menu_open = False
	continue
	if raw == "3":
	query = input(f"{_prompt_marker()} Search text: ").strip()
	include_all = True
	hardware_only = False
	compatibility_target = ""
	offset = 0
	menu_open = False
	continue
	if raw == "4" or lowered in {"/models", "/switch", "/runnable"}:
	hardware_only = False
	include_all = False
	compatibility_target = ""
	query = ""
	offset = 0
	menu_open = False
	continue
	if raw == "5" or lowered in {"/all", "/catalog"}:
	hardware_only = False
	include_all = True
	compatibility_target = ""
	query = ""
	offset = 0
	menu_open = False
	continue
	if raw == "6" or lowered == "/local":
	_print_local_models(cache_dir)
	continue
	if raw == "7" or lowered == "/vllm":
	hardware_only = False
	include_all = True
	compatibility_target = "vllm"
	query = ""
	offset = 0
	menu_open = False
	continue
	if raw == "8" or lowered == "/nim":
	hardware_only = False
	include_all = True
	compatibility_target = "nvidia_nim"
	query = ""
	offset = 0
	menu_open = False
	continue
	if raw == "9" or lowered in {"/nemo", "/nem"}:
	hardware_only = False
	include_all = True
	compatibility_target = "nvidia_nemo"
	query = ""
	offset = 0
	menu_open = False
	continue
	handled = _handle_model_picker_command(raw, models)
	if handled is not None:
	query = handled["query"]
	include_all = handled["include_all"]
	hardware_only = handled.get("hardware_only", False)
	compatibility_target = handled.get("compatibility_target", "")
	menu_open = handled.get("menu", False)
	offset = 0
	else:
	query = raw
	include_all = True
	hardware_only = False
	compatibility_target = ""
	offset = 0
	menu_open = False
	continue

	if compatibility_target:
	choices = _conversation_choices(
	models,
	search=query,
	include_all=True,
	compatibility_target=compatibility_target,
	)
	last_hardware = None
	elif hardware_only:
	choices, last_hardware = _hardware_choices(models, cache_dir, search=query, include_all=include_all)
	else:
	choices = _conversation_choices(models, search=query, include_all=include_all)
	last_hardware = None
	if not choices:
	raw = input(f"{_prompt_marker()} No match. Search again, /hardware, /all, /models, /menu, or /exit: ").strip()
	handled = _handle_model_picker_command(raw, models)
	if handled is not None:
	query = handled["query"]
	include_all = handled["include_all"]
	hardware_only = handled.get("hardware_only", False)
	compatibility_target = handled.get("compatibility_target", "")
	menu_open = handled.get("menu", False)
	offset = 0
	else:
	query = raw
	continue
	print()
	offset = min(offset, max(0, len(choices) - 1))
	page = choices[offset : offset + MODEL_PICKER_PAGE_SIZE]
	_print_model_picker_header(
	models,
	choices,
	query=query,
	include_all=include_all,
	hardware_only=hardware_only,
	compatibility_target=compatibility_target,
	offset=offset,
	shown=len(page),
	hardware=last_hardware,
	)
	for index, model in enumerate(page, offset + 1):
	cached = "local" if local_model_files(model_cache_dir(model, cache_dir)) else "not pulled"
	chat_status = _offline_capability_label(model)
	print(
	f"{index:>2}. {model.model_id} "
	f"[{model.family}, {model.runtime}, tier {model.sovereignty_tier}, {chat_status}, {cached}]",
	)
	raw = input(
	f"{_prompt_marker()} Choose {offset + 1}-{offset + len(page)}, search text, /next, /prev, /menu, or /help: ",
	).strip()
	if not raw:
	return page[0]
	command = raw.lower()
	if command == "/next":
	if offset + MODEL_PICKER_PAGE_SIZE >= len(choices):
	print("Already at the last page.")
	else:
	offset += MODEL_PICKER_PAGE_SIZE
	continue
	if command in {"/prev", "/back"}:
	offset = max(0, offset - MODEL_PICKER_PAGE_SIZE)
	continue
	if command in {"/help", "/h", "?"}:
	_print_model_picker_help()
	continue
	handled = _handle_model_picker_command(raw, models)
	if handled is not None:
	query = handled["query"]
	include_all = handled["include_all"]
	hardware_only = handled.get("hardware_only", False)
	compatibility_target = handled.get("compatibility_target", "")
	menu_open = handled.get("menu", False)
	offset = 0
	continue
	if raw.isdigit():
	selected = int(raw)
	if 1 <= selected <= len(choices):
	return choices[selected - 1]
	print(f"Choose a number from 1 to {len(choices)}.")
	continue
	try:
	return resolve_model(tuple(choices), raw)
	except ValueError:
	query = raw
	include_all = True
	hardware_only = False
	offset = 0


	def _handle_model_picker_command(raw: str, models: tuple[Any, ...]) -> dict[str, Any] \| None:
	command = raw.strip().lower()
	if not command:
	return None
	if command in CHAT_COMMANDS:
	raise _ExitConversation
	if command in {"/models", "/switch", "/runnable"}:
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": ""}
	if command in {"/hardware", "/recommended-hardware"}:
	return {"query": "", "include_all": False, "hardware_only": True, "compatibility_target": ""}
	if command in {"/recommended", "/recommend", "/best"}:
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": ""}
	if command == "/menu":
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": "", "menu": True}
	if command == "/all":
	return {"query": "", "include_all": True, "hardware_only": False, "compatibility_target": ""}
	if command == "/catalog":
	return {"query": "", "include_all": True, "hardware_only": False, "compatibility_target": ""}
	if command in {"/vllm", "/nim", "/nemo", "/nem"}:
	return {
	"query": "",
	"include_all": True,
	"hardware_only": False,
	"compatibility_target": _deployment_target_from_command(command),
	}
	if command.startswith("/search "):
	query = command.removeprefix("/search ").strip()
	return {"query": query, "include_all": True, "hardware_only": False, "compatibility_target": ""}
	if command.startswith("/use "):
	query = command.removeprefix("/use ").strip()
	return {"query": query, "include_all": False, "hardware_only": False, "compatibility_target": ""}
	if command.startswith("/family "):
	family = command.removeprefix("/family ").strip()
	return {"query": family, "include_all": True, "hardware_only": False, "compatibility_target": ""}
	if command in {"/families", "/categories"}:
	_print_model_categories(models)
	return {"query": "", "include_all": True, "hardware_only": False, "compatibility_target": ""}
	if command == "/local":
	print("Local models are shown after a model is selected. Use `MaramaRoute local` for full JSON.")
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": "", "menu": True}
	if command in {"/help", "/h", "?"}:
	_print_model_picker_help()
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": "", "menu": True}
	if command == "/pull":
	print("Choose a model first; then /pull downloads the selected model.")
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": ""}
	if command.startswith("/"):
	known = ", ".join(sorted(CHAT_COMMANDS \| MODEL_PICKER_COMMANDS))
	print(f"Unknown command {raw!r}. Known commands: {known}.")
	return {"query": "", "include_all": False, "hardware_only": False, "compatibility_target": "", "menu": True}
	return None


	def _is_chat_runnable(model: Any) -> bool:
	runtime = model.runtime.lower()
	return "llama" in runtime or "gguf" in runtime


	def _offline_capability_label(model: Any) -> str:
	runtime = model.runtime.lower()
	artifact = model.primary_artifact.lower()
	if "llama" in runtime or "gguf" in runtime or "gguf" in artifact:
	return "chat"
	if _is_transformers_smoke_test(model):
	return "smoke-test"
	if _is_transformers_text_generation(model):
	return "chat/transformers"
	return "offline-task"


	def _deployment_target_from_command(command: str) -> str:
	normalized = command.strip().lower().lstrip("/").replace("-", "_")
	if normalized == "vllm":
	return "vllm"
	if normalized == "nim":
	return "nvidia_nim"
	if normalized in {"nem", "nemo"}:
	return "nvidia_nemo"
	raise ValueError(f"Unknown deployment target command: {command}")


	def _deployment_statuses_for_target(target: str) -> set[str]:
	if target == "nvidia_nemo":
	return set(_DEPLOYMENT_PATHWAY_STATUSES)
	return set(_DEPLOYMENT_USABLE_STATUSES)


	def _matches_deployment_target(model: Any, target: str) -> bool:
	compatibility = model_runtime_compatibility(model)
	entry = compatibility.get(target, {})
	status = str(entry.get("status") or "").lower() if isinstance(entry, dict) else ""
	return status in _deployment_statuses_for_target(target)


	def _deployment_summary(models: tuple[Any, ...]) -> dict[str, Any]:
	summary: dict[str, Any] = {}
	for target, label in _DEPLOYMENT_TARGET_LABELS.items():
	statuses: dict[str, int] = {}
	usable = 0
	pathway = 0
	for model in models:
	entry = model_runtime_compatibility(model).get(target, {})
	status = str(entry.get("status") or "unknown").lower() if isinstance(entry, dict) else "unknown"
	statuses[status] = statuses.get(status, 0) + 1
	if status in _DEPLOYMENT_USABLE_STATUSES:
	usable += 1
	if status in _DEPLOYMENT_PATHWAY_STATUSES:
	pathway += 1
	summary[target] = {
	"label": label,
	"usable": usable,
	"pathway": pathway,
	"statuses": dict(_top_counts(statuses, limit=max(1, len(statuses)))),
	"browse_command": (
	f"MaramaRoute compat --target {_deployment_cli_target(target)} "
	f"--status {_deployment_cli_status(target)}"
	),
	"picker_command": f"/{_deployment_cli_target(target)}",
	}
	return summary


	def _deployment_cli_target(target: str) -> str:
	if target == "nvidia_nim":
	return "nim"
	if target == "nvidia_nemo":
	return "nemo"
	return target


	def _deployment_cli_status(target: str) -> str:
	if target == "nvidia_nemo":
	return "pathway"
	return "usable"


	def _format_deployment_counts(deployment: dict[str, Any]) -> list[str]:
	lines: list[str] = []
	for target in ("vllm", "nvidia_nim", "nvidia_nemo"):
	entry = deployment.get(target, {})
	label = str(entry.get("label") or target)
	statuses = entry.get("statuses") if isinstance(entry.get("statuses"), dict) else {}
	status_text = _format_counts(statuses)
	if target == "nvidia_nemo":
	lines.append(f"{label}: direct {entry.get('usable', 0)}, pathway {entry.get('pathway', 0)} ({status_text})")
	else:
	lines.append(f"{label}: usable {entry.get('usable', 0)} ({status_text})")
	return lines


	def _is_transformers_text_generation(model: Any) -> bool:
	runtime = model.runtime.lower()
	if "transformers" not in runtime or "multimodal" in runtime:
	return False
	if _is_transformers_smoke_test(model):
	return False
	modalities = {str(item).lower() for item in model.modalities}
	if modalities - {"text"}:
	return False
	task_tags = {
	"asr",
	"classifier",
	"detection",
	"doc-ai",
	"document-vqa",
	"embedding",
	"embedding-companion",
	"guardrail",
	"layout",
	"moderation",
	"ocr",
	"reranker",
	"retrieval",
	"safety",
	"speech",
	"tables",
	"tts",
	}
	return not (set(model.tags) & task_tags)


	def _is_transformers_smoke_test(model: Any) -> bool:
	runtime = model.runtime.lower()
	if "transformers" not in runtime:
	return False
	modalities = {str(item).lower() for item in model.modalities}
	if modalities - {"text"}:
	return False
	metadata = getattr(model, "metadata", {}) or {}
	weight = metadata.get("total_weight_size")
	try:
	total_weight_size = int(weight or 0)
	except (TypeError, ValueError):
	total_weight_size = 0
	return model.model_id == "lumynax-tiny" or (0 < total_weight_size < 50_000_000)


	def _loaded_model_message(model: Any) -> str:
	capability = _offline_capability_label(model)
	if capability == "smoke-test":
	return (
	f"Loaded {model.model_id} (smoke-test seed). "
	"It is usable for install/runtime checks, not useful free-form chat. "
	"Use lumynax-tiny-qwen25-05b-gguf for tiny local chat."
	)
	if capability == "offline-task":
	return (
	f"Loaded {model.model_id} (offline task model). "
	"Type /info for metadata, /categories for model groups, or /switch for chat models."
	)
	return f"Loaded {model.model_id}."


	def _print_model_picker_header(
	models: tuple[Any, ...],
	choices: list[Any],
	*,
	query: str,
	include_all: bool,
	hardware_only: bool,
	compatibility_target: str,
	offset: int,
	shown: int,
	hardware: dict[str, Any] \| None,
	) -> None:
	total = len(models)
	runnable = sum(1 for model in models if _is_chat_runnable(model))
	if compatibility_target:
	scope = f"{_DEPLOYMENT_TARGET_LABELS.get(compatibility_target, compatibility_target)} deployment-path models"
	elif hardware_only:
	scope = "hardware-suitable local GGUF chat models"
	else:
	scope = "all AbteeXAILab HF registry entries" if include_all else "recommended local GGUF chat models"
	suffix = f" matching {query!r}" if query else ""
	print("Type /help to see commands before choosing a model.")
	print(f"LumynaX model picker: showing {offset + 1}-{offset + shown} of {len(choices)} {scope}{suffix}.")
	print(f"Registry total: {total} models. Direct local chat models: {runnable}.")
	if hardware_only and hardware is not None:
	print(f"Hardware check: {hardware.get('memory', 'unknown memory')} RAM, {hardware.get('disk_free', 'unknown disk')} free.")
	if len(choices) > offset + shown:
	print("Use /next for more results.")
	if offset > 0:
	print("Use /prev for previous results.")
	if compatibility_target:
	print("Use /vllm, /nim, /nemo, /all, or /models to change category.")
	elif not include_all and not hardware_only:
	print("Use /hardware for machine-suitable models, /all, /vllm, /nim, /nemo, or /help.")
	else:
	print("Use /models to return to recommended chat models, /family <name>, /vllm, /nim, /nemo, or /help.")


	def _print_model_picker_menu(models: tuple[Any, ...], cache_dir: Path \| None) -> None:
	pulled = list_pulled_models(cache_dir)
	runnable = sum(1 for model in models if _is_chat_runnable(model))
	vllm_count, nim_count, nem_count = _deployment_tag_counts(models)
	print()
	_print_box(
	"MaramaRoute model picker",
	[
	f"Registry {len(models)} \| Local chat {runnable} \| Pulled {pulled['count']}",
	f"Deployment paths: vLLM {vllm_count} \| NIM {nim_count} \| NeMo/NEM {nem_count}",
	"",
	"1 Hardware-suitable models for this machine",
	"2 Recommended local chat models",
	"3 Search all AbteeXAILab Hugging Face models",
	"4 Browse local GGUF chat models",
	"5 Browse full registry",
	"6 Show pulled local models",
	"7 Browse vLLM deployment-path models",
	"8 Browse NVIDIA NIM deployment-path models",
	"9 Browse NVIDIA NeMo/NEM deployment-path models",
	"",
	"Type /help to see all commands. Press Enter for hardware-suitable models.",
	],
	)


	def _print_startup_guide(models: tuple[Any, ...], cache_dir: Path \| None, *, animated: bool = False) -> None:
	pulled = list_pulled_models(cache_dir)
	runnable = sum(1 for model in models if _is_chat_runnable(model))
	vllm_count, nim_count, nem_count = _deployment_tag_counts(models)
	if animated:
	_animate_startup()
	print()
	_print_box(
	"MaramaRoute",
	[
	"AbteeX AI Labs LumynaX model console",
	"model: choose with /models, /hardware, /vllm, /nim, or /nem",
	f"registry: {len(models)} models \| {runnable} local chat-capable \| {pulled['count']} pulled",
	"runtime: GGUF + llama.cpp \| Transformers \| task-model shells",
	f"Deployment paths: vLLM {vllm_count} \| NVIDIA NIM {nim_count} \| NVIDIA NeMo/NEM {nem_count}",
	f"directory: {_shorten_middle(str(Path.cwd()), 86)}",
	"",
	"Start here: press Enter for hardware-suitable models, or type /help any time.",
	"Offline flow: /models -> choose -> /pull -> chat. Use /switch <text> to change.",
	"Production commands: setup \| doctor \| verify --deep \| serve --live-local \| agent-init \| hpe init",
	"MaramaRoute serve --live-local --port 8787",
	],
	)


	def _animate_startup() -> None:
	if not sys.stdout.isatty():
	return
	frames = ("[ ]", "[= ]", "[== ]", "[===]")
	message = "Preparing local LumynaX console"
	for frame in frames:
	print(f"\r{frame} {message}", end="", flush=True)
	time.sleep(0.04)
	print("\r[OK ] Local LumynaX console ready ")


	def _deployment_tag_counts(models: tuple[Any, ...]) -> tuple[int, int, int]:
	vllm_count = 0
	nim_count = 0
	nem_count = 0
	for model in models:
	tags = {str(tag).lower() for tag in model.tags}
	if "vllm-compatible" in tags:
	vllm_count += 1
	if "nim-compatible" in tags:
	nim_count += 1
	if {"nem-compatible", "nem-pathway"} & tags:
	nem_count += 1
	return vllm_count, nim_count, nem_count


	def _print_box(title: str, lines: list[str]) -> None:
	width = min(max(shutil.get_terminal_size((100, 20)).columns, 78), 150)
	inner = width - 2
	title_text = f" {title} "
	if len(title_text) > inner:
	title_text = title_text[:inner]
	if _console_supports("╭─╮│╰╯"):
	top_left, top_right, bottom_left, bottom_right, horizontal, vertical = "╭", "╮", "╰", "╯", "─", "│"
	else:
	top_left, top_right, bottom_left, bottom_right, horizontal, vertical = "+", "+", "+", "+", "-", "\|"
	print(top_left + title_text + horizontal * max(0, inner - len(title_text)) + top_right)
	for line in lines:
	print(vertical + " " + _fit_line(line, inner - 2) + " " + vertical)
	print(bottom_left + horizontal * inner + bottom_right)


	def _fit_line(text: str, width: int) -> str:
	if len(text) > width:
	text = _shorten_middle(text, width)
	return text + " " * max(0, width - len(text))


	def _shorten_middle(text: str, width: int) -> str:
	if len(text) <= width:
	return text
	if width <= 3:
	return text[:width]
	marker = "…" if _console_supports("…") else "..."
	left = max(1, (width - len(marker)) // 2)
	right = max(1, width - left - len(marker))
	return f"{text[:left]}{marker}{text[-right:]}"


	def _prompt_marker() -> str:
	return "❯" if _console_supports("❯") else ">"


	def _console_supports(text: str) -> bool:
	encoding = getattr(sys.stdout, "encoding", None) or "utf-8"
	try:
	text.encode(encoding)
	except UnicodeEncodeError:
	return False
	return True


	def _print_model_picker_help() -> None:
	print(
	"\n".join(
	(
	"Model picker commands:",
	" /hardware show models suitable for this machine",
	" /recommended show recommended local chat models",
	" /models browse local GGUF chat-capable models",
	" /all browse every bundled AbteeXAILab registry entry",
	" /vllm browse vLLM supported/candidate/experimental models",
	" /nim browse NVIDIA NIM supported/candidate/experimental models",
	" /nemo browse NVIDIA NeMo candidates and conversion paths",
	" /nem alias for /nemo",
	" /search <text> search model id, repo, family, or tags",
	" /family <name> filter by family or tag",
	" /categories show category/family/runtime/tag/deployment counts",
	" /families alias for /categories",
	" /next next page of results",
	" /prev previous page of results",
	" /menu return to the picker menu",
	" /local show pulled local models",
	" /exit quit",
	),
	),
	)


	def _print_model_categories(models: tuple[Any, ...]) -> None:
	_print_category_summary(_category_summary(models, limit=18))
	print("Type a family/tag/search term, or use /vllm, /nim, /nemo, /family qwen, /all, /runnable.")


	def _category_summary(models: tuple[Any, ...], *, limit: int) -> dict[str, Any]:
	families: dict[str, int] = {}
	runtimes: dict[str, int] = {}
	tags: dict[str, int] = {}
	modalities: dict[str, int] = {}
	capabilities: dict[str, int] = {}
	for model in models:
	families[model.family] = families.get(model.family, 0) + 1
	runtimes[model.runtime] = runtimes.get(model.runtime, 0) + 1
	capability = _offline_capability_label(model)
	capabilities[capability] = capabilities.get(capability, 0) + 1
	for modality in model.modalities:
	modalities[modality] = modalities.get(modality, 0) + 1
	for tag in model.tags:
	tags[tag] = tags.get(tag, 0) + 1

	safe_limit = max(1, int(limit))
	return {
	"ok": True,
	"model_count": len(models),
	"families": dict(_top_counts(families, limit=safe_limit)),
	"runtimes": dict(_top_counts(runtimes, limit=safe_limit)),
	"tags": dict(_top_counts(tags, limit=safe_limit)),
	"modalities": dict(_top_counts(modalities, limit=safe_limit)),
	"capabilities": dict(_top_counts(capabilities, limit=safe_limit)),
	"deployment_compatibility": _deployment_summary(models),
	"commands": {
	"browse_recommended": "MaramaRoute chat",
	"browse_all": "MaramaRoute catalog",
	"category_cli": "MaramaRoute categories",
	"filter_family": "MaramaRoute catalog --family qwen",
	"pull_family": "MaramaRoute pull --family qwen --limit 3 --dry-run",
	"compatibility": "MaramaRoute compat",
	"browse_vllm": "MaramaRoute compat --target vllm --status usable",
	"browse_nim": "MaramaRoute compat --target nim --status usable",
	"browse_nem_nemo": "MaramaRoute compat --target nemo --status pathway",
	},
	}


	def _print_category_summary(summary: dict[str, Any]) -> None:
	print()
	print("Categories from the bundled AbteeXAILab Hugging Face registry:")
	print(f"Models: {summary['model_count']}")
	print(f"Capabilities: {_format_counts(summary['capabilities'])}")
	print(f"Families: {_format_counts(summary['families'])}")
	print(f"Runtimes: {_format_counts(summary['runtimes'])}")
	print(f"Modalities: {_format_counts(summary['modalities'])}")
	print(f"Tags: {_format_counts(summary['tags'])}")
	print("Deployment compatibility:")
	for line in _format_deployment_counts(summary.get("deployment_compatibility", {})):
	print(f" {line}")
	print(
	"Next: MaramaRoute chat \| MaramaRoute compat --target vllm --status usable \| "
	"MaramaRoute compat --target nemo --status pathway",
	)


	def _format_counts(counts: dict[str, int]) -> str:
	if not counts:
	return "none"
	return ", ".join(f"{name} ({count})" for name, count in counts.items())


	def _top_counts(counts: dict[str, int], *, limit: int) -> list[tuple[str, int]]:
	return sorted(counts.items(), key=lambda item: (-item[1], item[0]))[:limit]


	def _print_compatibility_table(matrix: dict[str, Any]) -> None:
	print("MaramaRoute runtime compatibility")
	print(f"Models: {matrix['model_count']}. Returned: {matrix['returned']}. Target: {matrix['target']}.")
	print("Summary:")
	for runtime, counts in matrix["summary"].items():
	count_text = ", ".join(f"{status}={count}" for status, count in counts.items())
	print(f" {runtime}: {count_text}")
	print()
	print(f"{'model':42} {'runtime':22} {'vllm':16} {'nim':16} {'nemo':16}")
	print("-" * 112)
	for row in matrix["models"]:
	compatibility = row.get("compatibility", {})
	print(
	f"{str(row.get('model_id', ''))[:42]:42} "
	f"{str(row.get('runtime', ''))[:22]:22} "
	f"{_compat_status(compatibility.get('vllm'))[:16]:16} "
	f"{_compat_status(compatibility.get('nvidia_nim'))[:16]:16} "
	f"{_compat_status(compatibility.get('nvidia_nemo'))[:16]:16}",
	)


	def _compat_status(entry: Any) -> str:
	if isinstance(entry, dict):
	return str(entry.get("status") or "")
	return ""


	def _print_chat_help() -> None:
	print(
	"\n".join(
	(
	"Commands:",
	" /hardware choose from models suitable for this machine",
	" /recommended choose from recommended local chat models",
	" /models choose from local GGUF chat models",
	" /all choose from every bundled AbteeXAILab registry entry",
	" /search <text> search model id, repo, family, or tags",
	" /family <name> filter by family or category",
	" /categories show category/family/runtime/tag counts",
	" /switch <text> switch directly to a matching model",
	" /pull [text] pull the selected model or another matching model",
	" /local show pulled models",
	" /info show selected model metadata",
	" /settings show current runtime settings",
	" /clear clear chat history",
	" /history show current chat history",
	" /save <name> save current chat history",
	" /load <name> load a saved chat history",
	" /export <n> <file> export a saved chat as markdown",
	" /exit quit",
	),
	),
	)


	def _print_local_models(cache_dir: Path \| None) -> None:
	local = list_pulled_models(cache_dir)
	print(f"Local cache: {local['cache_root']}")
	if not local["models"]:
	print("No models pulled yet.")
	return
	for index, item in enumerate(local["models"], 1):
	files = item.get("files") or []
	print(f"{index:>2}. {item.get('model_id')} [{item.get('runtime')}, files: {len(files)}]")


	def _print_chat_settings(model: Any, args: argparse.Namespace) -> None:
	print(
	json.dumps(
	{
	"model_id": model.model_id,
	"runtime": model.runtime,
	"cache_dir": str(args.cache_dir),
	"max_tokens": args.max_tokens,
	"temperature": args.temperature,
	"context_tokens": args.context_tokens or min(model.context_tokens, 32768),
	"threads": args.threads,
	},
	indent=2,
	sort_keys=True,
	),
	)


	def _installed_version() -> str:
	for package_name in ("lumynax-marama-route", "marama-route", "tinyluminax"):
	try:
	return importlib_metadata.version(package_name)
	except importlib_metadata.PackageNotFoundError:
	continue
	return "source"


	def _hf_whoami_status() -> dict[str, Any]:
	try:
	from huggingface_hub import HfApi # type: ignore[import-not-found]

	payload = HfApi().whoami()
	return {"ok": True, "name": payload.get("name") or payload.get("fullname") or ""}
	except Exception as exc:
	return {"ok": False, "error": str(exc)}


	def subprocess_run(command: list[str]) -> dict[str, Any]:
	completed = subprocess.run(command, text=True, capture_output=True, check=False)
	return {
	"returncode": completed.returncode,
	"stdout": completed.stdout,
	"stderr": completed.stderr,
	}


	def _print_doctor(result: dict[str, Any]) -> None:
	print("MaramaRoute doctor")
	for name, check in result["checks"].items():
	mark = "ok" if check.get("ok") else "needs attention"
	detail = ""
	if name == "registry":
	detail = f" {check['count']} models, {check['chat_capable']} chat-capable"
	elif name == "cache":
	detail = f" {check['pulled_count']} pulled"
	elif name == "package" or name == "python":
	detail = f" {check['version']}"
	print(f"- {name}: {mark}{detail}")
	if not result["checks"]["llama_cpp"]["ok"]:
	print("Install local GGUF runtime: python -m pip install llama-cpp-python")
	if not result["checks"]["tokenizer_extras"]["ok"]:
	print("Install tokenizer extras: python -m pip install sentencepiece tiktoken tokenizers")
	print("Next: MaramaRoute setup --all-targets --hpe \| MaramaRoute chat \| MaramaRoute serve --live-local --port 8787")


	def _ensure_model_ready(
	models: tuple[Any, ...],
	model: Any,
	*,
	cache_dir: Path \| None,
	auto_pull: bool,
	) -> Any:
	if local_model_files(model_cache_dir(model, cache_dir)):
	return model
	should_pull = auto_pull
	if not should_pull:
	answer = input(f"{model.model_id} is not pulled. Pull it now? [Y/n] ").strip().lower()
	should_pull = answer in {"", "y", "yes"}
	if not should_pull:
	raise FileNotFoundError(f"{model.model_id} is not available locally.")
	pull_model(models, model.model_id, cache_root=cache_dir)
	return model


	def _ui(args: argparse.Namespace) -> int:
	from .ui import run_ui

	return run_ui(
	registry_path=args.registry,
	host=args.host,
	port=args.port,
	open_browser=args.open,
	smoke=args.smoke,
	)


	def _serve(args: argparse.Namespace) -> int:
	from .server import serve_gateway

	config_path = args.config
	if args.live_local:
	config_path = _write_live_local_gateway_config(args)
	return serve_gateway(
	registry_path=args.registry,
	config_path=config_path,
	host=args.host,
	port=args.port,
	open_browser=args.open,
	smoke=args.smoke,
	)


	def _write_live_local_gateway_config(args: argparse.Namespace) -> Path:
	payload: dict[str, Any] = {}
	if args.config and args.config.exists():
	payload = _load_json_mapping(args.config)
	payload.update(
	{
	"mode": "local_live",
	"prompt_retention": payload.get("prompt_retention", "not_stored_by_default"),
	"cache_dir": str(args.cache_dir),
	"pull_missing": args.pull_missing,
	"threads": args.threads,
	"context_tokens": args.context_tokens,
	},
	)
	path = Path(tempfile.gettempdir()) / "marama-route-live-local.gateway.json"
	path.write_text(json.dumps(payload, indent=2, sort_keys=True), encoding="utf-8")
	return path


	def build_parser() -> argparse.ArgumentParser:
	parser = argparse.ArgumentParser(
	prog="MaramaRoute",
	description="List, pull, run, and route AbteeX AI Labs LumynaX models from Hugging Face.",
	)
	subparsers = parser.add_subparsers(dest="command")
	chat_live = subparsers.add_parser(
	"chat",
	help="Start a conversational LumynaX model session.",
	)
	_add_chat_arguments(chat_live)
	chat_live.set_defaults(handler=_chat)

	route = subparsers.add_parser("route", help="Select a LumynaX model for a request.")
	route.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	route.add_argument("--request", type=Path, required=True, help="Routing request JSON.")
	route.set_defaults(handler=_route)

	models = subparsers.add_parser(
	"models",
	help="Emit the full AbteeX/LumynaX Hugging Face model list.",
	)
	models.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	models.add_argument("--search", default="", help="Filter by model id, repo id, family, runtime, or tag.")
	models.add_argument("--family", default="", help="Filter by family name or family tag.")
	models.add_argument("--runtime", default="", help="Filter by runtime, for example llama_cpp or transformers.")
	models.add_argument("--tag", default="", help="Filter by an exact registry tag, for example vllm-compatible.")
	models.add_argument("--limit", type=int, default=0, help="Maximum models to emit; 0 means all matches.")
	models.add_argument("--format", choices=["json", "table"], default="json")
	models.set_defaults(handler=_models)

	chat = subparsers.add_parser(
	"dry-run",
	help="Route a chat-shaped request without invoking a model backend.",
	)
	chat.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	chat.add_argument("--request", type=Path, required=True, help="Chat request JSON.")
	chat.set_defaults(handler=_chat_dry_run)

	catalog = subparsers.add_parser(
	"catalog",
	help="Search and filter the MaramaRoute model catalog.",
	)
	catalog.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	catalog.add_argument("--search", default="")
	catalog.add_argument("--task", default="")
	catalog.add_argument("--runtime", default="")
	catalog.add_argument("--family", default="")
	catalog.add_argument("--modality", default="")
	catalog.add_argument("--jurisdiction", default="NZ")
	catalog.add_argument("--min-context-tokens", type=int, default=0)
	catalog.add_argument("--requires-json", action=argparse.BooleanOptionalAction, default=False)
	catalog.add_argument("--requires-tools", action=argparse.BooleanOptionalAction, default=False)
	catalog.add_argument("--requires-local", action=argparse.BooleanOptionalAction, default=False)
	catalog.add_argument("--limit", type=int, default=0, help="Maximum rows; 0 means all models.")
	catalog.set_defaults(handler=_catalog)

	compat = subparsers.add_parser(
	"compat",
	help="Show vLLM, NVIDIA NIM, and NVIDIA NeMo compatibility for LumynaX models.",
	)
	compat.add_argument("model", nargs="?", help="Optional model id, repo id, or unique search fragment.")
	compat.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	compat.add_argument("--target", default="", help="Runtime target: vllm, nim, nem, nemo, llama-cpp, or all.")
	compat.add_argument(
	"--status",
	default="",
	help="Filter by status such as candidate, experimental, unsupported, usable, or pathway.",
	)
	compat.add_argument("--limit", type=int, default=0, help="Maximum rows; 0 means all models.")
	compat.add_argument("--format", choices=["json", "table"], default="table")
	compat.set_defaults(handler=_compat)

	categories = subparsers.add_parser(
	"categories",
	aliases=["families"],
	help="Show model families, runtimes, tags, modalities, local capability, and deployment categories.",
	)
	categories.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	categories.add_argument("--limit", type=int, default=18)
	categories.add_argument("--format", choices=["json", "table"], default="table")
	categories.set_defaults(handler=_categories)

	compare = subparsers.add_parser(
	"compare",
	help="Compare routed fit for selected model ids.",
	)
	compare.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	compare.add_argument("--model", action="append", required=True, help="Model id, repeatable or comma-separated.")
	compare.add_argument("--request", type=Path, default=None, help="Optional routing request JSON.")
	compare.set_defaults(handler=_compare)

	matrix = subparsers.add_parser(
	"matrix",
	help="Run the built-in sovereign routing scenario matrix.",
	)
	matrix.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	matrix.add_argument("--allow-blocked-exit-zero", action=argparse.BooleanOptionalAction, default=False)
	matrix.set_defaults(handler=_matrix)

	analytics = subparsers.add_parser("analytics", help="Summarise registry coverage.")
	analytics.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	analytics.set_defaults(handler=_analytics)

	recommend = subparsers.add_parser(
	"recommend",
	help="Ask the router for the best LumynaX model from plain CLI options.",
	)
	recommend.add_argument("prompt", nargs="*", help="Prompt or task description.")
	recommend.add_argument("--prompt-text", default="", help="Prompt text, useful when avoiding shell quoting.")
	recommend.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	recommend.add_argument("--task", default="", help="Task type: code, reasoning, multimodal, general.")
	recommend.add_argument("--sensitivity", default="internal", help="Data sensitivity: internal, personal, restricted.")
	recommend.add_argument("--jurisdiction", default="NZ")
	recommend.add_argument("--modality", default="text", help="Comma-separated modalities.")
	recommend.add_argument("--min-context-tokens", type=int, default=4096)
	recommend.add_argument("--requires-json", action=argparse.BooleanOptionalAction, default=False)
	recommend.add_argument("--requires-tools", action=argparse.BooleanOptionalAction, default=False)
	recommend.add_argument("--requires-local", action=argparse.BooleanOptionalAction, default=True)
	recommend.set_defaults(handler=_recommend)

	doctor = subparsers.add_parser("doctor", help="Check MaramaRoute install, registry, cache, and runtimes.")
	doctor.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	doctor.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	doctor.add_argument("--online", action=argparse.BooleanOptionalAction, default=False)
	doctor.add_argument("--hardware", action=argparse.BooleanOptionalAction, default=False)
	doctor.add_argument("--json", action=argparse.BooleanOptionalAction, default=False)
	doctor.add_argument("--allow-warnings", action=argparse.BooleanOptionalAction, default=True)
	doctor.set_defaults(handler=_doctor)

	init = subparsers.add_parser("init", help="Create a local MaramaRoute config and starter aliases.")
	init.add_argument("model", nargs="?", help="Optional starter model id, repo id, alias, or search fragment.")
	init.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	init.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	init.add_argument("--state-dir", type=Path, default=None, help="MaramaRoute state directory.")
	init.add_argument("--host", default="127.0.0.1")
	init.add_argument("--port", type=int, default=8787)
	init.add_argument("--live-local", action=argparse.BooleanOptionalAction, default=True)
	init.add_argument("--agent", default="", choices=("", *_AGENT_TARGET_CHOICES))
	init.add_argument("--hpe", action=argparse.BooleanOptionalAction, default=False)
	init.add_argument("--pull", action=argparse.BooleanOptionalAction, default=False)
	init.add_argument("--self-test", action=argparse.BooleanOptionalAction, default=True)
	init.set_defaults(handler=_init)

	setup = subparsers.add_parser("setup", help="Create a production-ready MaramaRoute local, agent, and HPE setup.")
	setup.add_argument("model", nargs="?", help="Optional default model id, repo id, alias, or search fragment.")
	setup.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	setup.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	setup.add_argument("--state-dir", type=Path, default=None, help="MaramaRoute state directory.")
	setup.add_argument("--host", default="127.0.0.1")
	setup.add_argument("--port", type=int, default=8787)
	setup.add_argument("--target", action="append", default=[], help="Agent target. Repeat or comma-separate values; use all for every coding target.")
	setup.add_argument("--all-targets", action=argparse.BooleanOptionalAction, default=False)
	setup.add_argument("--hpe", action=argparse.BooleanOptionalAction, default=False)
	setup.add_argument("--backend", choices=["auto", "local-live", "vllm", "nim", "nemo", "external"], default="auto")
	setup.add_argument("--backend-port", type=int, default=8000)
	setup.add_argument("--backend-base-url", default="")
	setup.add_argument("--backend-model", default="")
	setup.add_argument("--backend-command", default="")
	setup.add_argument("--api-key-env", default="")
	setup.add_argument("--vllm-args", default="")
	setup.add_argument("--gpus", type=int, default=0)
	setup.add_argument("--memory", default="32G")
	setup.add_argument("--time", default="02:00:00")
	setup.add_argument("--partition", default="")
	setup.add_argument("--pull", action=argparse.BooleanOptionalAction, default=False)
	setup.add_argument("--allow-warnings", action=argparse.BooleanOptionalAction, default=True)
	setup.set_defaults(handler=_setup)

	hardware = subparsers.add_parser("hardware", help="Inspect local hardware and recommend runnable LumynaX models.")
	hardware.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	hardware.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	hardware.add_argument("--recommend", action=argparse.BooleanOptionalAction, default=True)
	hardware.add_argument("--limit", type=int, default=8)
	hardware.set_defaults(handler=_hardware)

	model_ops = subparsers.add_parser("model", help="Manage local MaramaRoute model cache.")
	model_ops.add_argument("model_action", choices=["ls", "list", "disk", "verify", "rm", "prune", "estimate", "ps"])
	model_ops.add_argument("model", nargs="?", help="Model id, alias, repo id, or unique search fragment.")
	model_ops.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	model_ops.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	model_ops.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=False)
	model_ops.add_argument("--deep", action=argparse.BooleanOptionalAction, default=False, help="Hash local files during verify.")
	model_ops.add_argument("--write-hashes", action=argparse.BooleanOptionalAction, default=False, help="Write a SHA256 manifest during deep verify.")
	model_ops.add_argument("--remote-sizes", action=argparse.BooleanOptionalAction, default=False, help="Fetch exact Hugging Face file sizes for estimate.")
	model_ops.add_argument("--all-files", action=argparse.BooleanOptionalAction, default=False, help="Plan or inspect all repository files where supported.")
	model_ops.set_defaults(handler=_model_ops)

	for command_name, action_name, help_text in (
	("ls", "ls", "List pulled models and cache size."),
	("ps", "ps", "Show local MaramaRoute runtime status."),
	("disk", "disk", "Show model-cache disk use."),
	("verify", "verify", "Verify pulled model files."),
	("prune", "prune", "Remove orphaned cache directories."),
	):
	command = subparsers.add_parser(command_name, help=help_text)
	command.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	command.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	command.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=False)
	command.add_argument("--deep", action=argparse.BooleanOptionalAction, default=False, help="Hash local files during verify.")
	command.add_argument("--write-hashes", action=argparse.BooleanOptionalAction, default=False, help="Write a SHA256 manifest during deep verify.")
	command.add_argument("--remote-sizes", action=argparse.BooleanOptionalAction, default=False, help="Fetch exact Hugging Face file sizes for estimate.")
	command.add_argument("--all-files", action=argparse.BooleanOptionalAction, default=False, help="Plan or inspect all repository files where supported.")
	command.set_defaults(handler=_model_ops, model_action=action_name, model=None)

	rm = subparsers.add_parser("rm", help="Remove one pulled model from the local cache.")
	rm.add_argument("model", help="Model id, alias, repo id, or unique search fragment.")
	rm.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	rm.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	rm.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=False)
	rm.set_defaults(handler=_model_ops, model_action="rm")

	alias = subparsers.add_parser("alias", help="Manage local model aliases and favorites.")
	alias.add_argument("alias_action", choices=["list", "set", "rm", "favorite"])
	alias.add_argument("name", nargs="?", help="Alias name.")
	alias.add_argument("model", nargs="?", help="Model id, repo id, or unique search fragment.")
	alias.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	alias.add_argument("--state-dir", type=Path, default=None, help="MaramaRoute state directory.")
	alias.set_defaults(handler=_alias)

	favorite = subparsers.add_parser("favorite", help="Mark a LumynaX model as a favorite.")
	favorite.add_argument("model", help="Model id, repo id, or unique search fragment.")
	favorite.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	favorite.add_argument("--state-dir", type=Path, default=None, help="MaramaRoute state directory.")
	favorite.set_defaults(handler=_alias, alias_action="favorite", name=None)

	bench = subparsers.add_parser("bench", help="Benchmark a pulled local GGUF model or dry-run the benchmark plan.")
	bench.add_argument("model", help="Model id, alias, repo id, or unique search fragment.")
	bench.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	bench.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	bench.add_argument("--prompt-text", default="")
	bench.add_argument("--max-tokens", type=int, default=64)
	bench.add_argument("--pull", action=argparse.BooleanOptionalAction, default=False)
	bench.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=True)
	bench.set_defaults(handler=_bench)

	eval_cmd = subparsers.add_parser("eval", help="Run deterministic MaramaRoute routing evals.")
	eval_cmd.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	eval_cmd.add_argument("--suite", default="routing")
	eval_cmd.add_argument("--allow-blocked-exit-zero", action=argparse.BooleanOptionalAction, default=False)
	eval_cmd.set_defaults(handler=_eval)

	audit = subparsers.add_parser("audit", help="List, show, export, or record routing audit receipts.")
	audit.add_argument("audit_action", choices=["list", "show", "export", "record"])
	audit.add_argument("receipt", nargs="?", help="Receipt id for `show`.")
	audit.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	audit.add_argument("--request", type=Path, default=None, help="Routing request JSON for `record`.")
	audit.add_argument("--output", type=Path, default=Path("marama-route-audit.json"))
	audit.add_argument("--state-dir", type=Path, default=None, help="MaramaRoute state directory.")
	audit.set_defaults(handler=_audit)

	opencode = subparsers.add_parser(
	"opencode-config",
	help="Emit an OpenCode-compatible MaramaRoute provider config.",
	)
	opencode.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	opencode.add_argument("--base-url", default="http://127.0.0.1:8787/v1")
	opencode.set_defaults(handler=_opencode_config)

	agent = subparsers.add_parser(
	"agent-config",
	aliases=["agent"],
	help="Emit command-bridge config for coding agents, local gateways, or HPE/Slurm jobs.",
	)
	agent.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	agent.add_argument("--target", default="generic", choices=_AGENT_TARGET_CHOICES)
	agent.add_argument("--model", default="", help="Default model id, repo id, or unique search fragment.")
	agent.add_argument("--base-url", default="http://127.0.0.1:8787/v1")
	agent.add_argument("--host", default="127.0.0.1")
	agent.add_argument("--port", type=int, default=8787)
	agent.add_argument("--cache-dir", type=Path, default=None)
	agent.add_argument("--output", type=Path, default=None)
	agent.set_defaults(handler=_agent_config)

	agent_init = subparsers.add_parser("agent-init", help="Write local agent workspace files for MaramaRoute.")
	agent_init.add_argument("model", nargs="?", help="Optional default model id, repo id, alias, or search fragment.")
	agent_init.add_argument("--model", dest="model_option", default="", help="Default model id, repo id, alias, or search fragment.")
	agent_init.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	agent_init.add_argument("--target", default="claude-code", choices=_AGENT_TARGET_CHOICES)
	agent_init.add_argument("--output-dir", type=Path, default=Path("."))
	agent_init.add_argument("--base-url", default="http://127.0.0.1:8787/v1")
	agent_init.add_argument("--host", default="127.0.0.1")
	agent_init.add_argument("--port", type=int, default=8787)
	agent_init.add_argument("--cache-dir", type=Path, default=default_cache_root())
	agent_init.set_defaults(handler=_agent_init)

	agent_doctor = subparsers.add_parser("agent-doctor", help="Check agent bridge config and local gateway readiness.")
	agent_doctor.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	agent_doctor.add_argument("--target", default="claude-code", choices=_AGENT_TARGET_CHOICES)
	agent_doctor.add_argument("--model", default="", help="Default model id, repo id, alias, or search fragment.")
	agent_doctor.add_argument("--base-url", default="http://127.0.0.1:8787/v1")
	agent_doctor.add_argument("--host", default="127.0.0.1")
	agent_doctor.add_argument("--port", type=int, default=8787)
	agent_doctor.add_argument("--cache-dir", type=Path, default=default_cache_root())
	agent_doctor.add_argument("--health-check", action=argparse.BooleanOptionalAction, default=False)
	agent_doctor.add_argument("--allow-warnings", action=argparse.BooleanOptionalAction, default=True)
	agent_doctor.set_defaults(handler=_agent_doctor)

	hpe = subparsers.add_parser("hpe-job", help="Emit an HPE/HPC Slurm job script for MaramaRoute.")
	hpe.add_argument("model", nargs="?", help="Model id, repo id, or unique search fragment.")
	hpe.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	hpe.add_argument("--mode", choices=["serve", "pull", "run"], default="serve")
	hpe.add_argument("--prompt-text", default="Say kia ora in one sentence.")
	hpe.add_argument("--port", type=int, default=8787)
	hpe.add_argument("--backend", choices=["auto", "local-live", "vllm", "nim", "nemo", "external"], default="auto")
	hpe.add_argument("--backend-port", type=int, default=8000)
	hpe.add_argument("--backend-base-url", default="")
	hpe.add_argument("--backend-model", default="")
	hpe.add_argument("--backend-command", default="", help="Optional command to start a NIM/NeMo/external backend before MaramaRoute.")
	hpe.add_argument("--api-key-env", default="")
	hpe.add_argument("--vllm-args", default="", help="Extra vLLM serve args, for example '--tensor-parallel-size 2'.")
	hpe.add_argument("--cache-dir", default="$SCRATCH/marama-route/models")
	hpe.add_argument("--job-name", default="marama-route")
	hpe.add_argument("--partition", default="")
	hpe.add_argument("--time", default="02:00:00")
	hpe.add_argument("--cpus", type=int, default=8)
	hpe.add_argument("--memory", default="32G")
	hpe.add_argument("--gpus", type=int, default=0)
	hpe.add_argument("--output", type=Path, default=None)
	hpe.set_defaults(handler=_hpe_job)

	hpe_ops = subparsers.add_parser("hpe", help="Prepare or inspect HPE/HPC MaramaRoute workflows.")
	hpe_ops.add_argument("hpe_action", choices=["plan", "init", "submit", "tunnel", "status"])
	hpe_ops.add_argument("model", nargs="?", help="Optional model id, repo id, alias, or search fragment.")
	hpe_ops.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	hpe_ops.add_argument("--output-dir", type=Path, default=Path("marama-route-hpe"))
	hpe_ops.add_argument("--port", type=int, default=8787)
	hpe_ops.add_argument("--backend", choices=["auto", "local-live", "vllm", "nim", "nemo", "external"], default="auto")
	hpe_ops.add_argument("--backend-port", type=int, default=8000)
	hpe_ops.add_argument("--backend-base-url", default="")
	hpe_ops.add_argument("--backend-model", default="")
	hpe_ops.add_argument("--backend-command", default="", help="Optional command to start a NIM/NeMo/external backend before MaramaRoute.")
	hpe_ops.add_argument("--api-key-env", default="")
	hpe_ops.add_argument("--vllm-args", default="", help="Extra vLLM serve args, for example '--tensor-parallel-size 2'.")
	hpe_ops.add_argument("--partition", default="")
	hpe_ops.add_argument("--time", default="02:00:00")
	hpe_ops.add_argument("--memory", default="32G")
	hpe_ops.add_argument("--gpus", type=int, default=0)
	hpe_ops.add_argument("--execute", action=argparse.BooleanOptionalAction, default=False)
	hpe_ops.set_defaults(handler=_hpe)

	update_registry = subparsers.add_parser("update-registry", help="Download a fresh registry JSON from Hugging Face.")
	update_registry.add_argument("--registry", type=Path, default=None, help="Local registry JSON to diff against.")
	update_registry.add_argument("--repo-id", default="AbteeXAILab/marama-route")
	update_registry.add_argument("--filename", default="configs/lumynax_model_registry.json")
	update_registry.add_argument("--output", type=Path, default=None)
	update_registry.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=True)
	update_registry.add_argument("--diff", action=argparse.BooleanOptionalAction, default=False)
	update_registry.set_defaults(handler=_update_registry)

	pull = subparsers.add_parser(
	"pull",
	help="Download a LumynaX model artifact from Hugging Face into the local MaramaRoute cache.",
	)
	pull.add_argument("model", nargs="?", help="Model id, repo id, or unique search fragment.")
	pull.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	pull.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	pull.add_argument("--search", default="", help="Batch-pull models matching text.")
	pull.add_argument("--family", default="", help="Batch-pull models matching a family or tag.")
	pull.add_argument("--runtime", default="", help="Batch-pull models for one runtime.")
	pull.add_argument("--limit", type=int, default=0, help="Maximum models for batch pull; 0 means all matches.")
	pull.add_argument(
	"--chat-only",
	action=argparse.BooleanOptionalAction,
	default=False,
	help="Restrict batch pulls to direct local GGUF chat models.",
	)
	pull.add_argument("--yes", action=argparse.BooleanOptionalAction, default=False, help="Confirm batch pulls.")
	pull.add_argument("--all-files", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--force", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--estimate", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--remote-sizes", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--verify", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--deep-verify", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--write-hashes", action=argparse.BooleanOptionalAction, default=False)
	pull.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=False)
	pull.set_defaults(handler=_pull)

	local = subparsers.add_parser("local", help="List models already pulled into the local MaramaRoute cache.")
	local.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	local.set_defaults(handler=_local)

	run = subparsers.add_parser("run", help="Run or chat with a pulled LumynaX model.")
	_add_chat_arguments(run)
	run.set_defaults(handler=_run)

	ui = subparsers.add_parser(
	"ui",
	help="Launch the local MaramaRoute browser platform.",
	)
	ui.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	ui.add_argument("--host", type=str, default="127.0.0.1")
	ui.add_argument("--port", type=int, default=8787)
	ui.add_argument("--open", action=argparse.BooleanOptionalAction, default=False)
	ui.add_argument("--smoke", action=argparse.BooleanOptionalAction, default=False)
	ui.set_defaults(handler=_ui)

	serve = subparsers.add_parser(
	"serve",
	help="Run the local MaramaRoute gateway and browser console.",
	)
	serve.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	serve.add_argument("--config", type=Path, default=None, help="Gateway backend config JSON.")
	serve.add_argument("--host", type=str, default="127.0.0.1")
	serve.add_argument("--port", type=int, default=8787)
	serve.add_argument("--open", action=argparse.BooleanOptionalAction, default=False)
	serve.add_argument("--smoke", action=argparse.BooleanOptionalAction, default=False)
	serve.add_argument("--live-local", action=argparse.BooleanOptionalAction, default=False)
	serve.add_argument("--pull-missing", action=argparse.BooleanOptionalAction, default=False)
	serve.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	serve.add_argument("--context-tokens", type=int, default=None)
	serve.add_argument("--threads", type=int, default=None)
	serve.set_defaults(handler=_serve)
	return parser


	def _add_chat_arguments(parser: argparse.ArgumentParser) -> None:
	parser.add_argument("model", nargs="?", help="Model id, repo id, or unique search fragment.")
	parser.add_argument("prompt", nargs="*", help="Prompt text. Omit it to enter chat mode.")
	parser.add_argument("--prompt-text", default="", help="Prompt text, useful when avoiding shell quoting.")
	parser.add_argument("--registry", type=Path, default=None, help="MaramaRoute model registry JSON.")
	parser.add_argument("--cache-dir", type=Path, default=default_cache_root(), help="Local model cache directory.")
	parser.add_argument("--pull", action=argparse.BooleanOptionalAction, default=False, help="Download before running.")
	parser.add_argument("--max-tokens", type=int, default=192)
	parser.add_argument("--temperature", type=float, default=0.2)
	parser.add_argument("--context-tokens", type=int, default=None)
	parser.add_argument("--threads", type=int, default=None)
	parser.add_argument("--stream", action=argparse.BooleanOptionalAction, default=False)
	parser.add_argument("--dry-run", action=argparse.BooleanOptionalAction, default=False)


	def main(argv: Sequence[str] \| None = None) -> int:
	parser = build_parser()
	raw_argv = list(argv) if argv is not None else sys.argv[1:]
	if len(raw_argv) >= 2 and raw_argv[0] == "agent" and raw_argv[1] == "init":
	raw_argv = ["agent-init", *raw_argv[2:]]
	elif len(raw_argv) >= 2 and raw_argv[0] == "agent" and raw_argv[1] in {"doctor", "status", "check"}:
	raw_argv = ["agent-doctor", *raw_argv[2:]]
	elif len(raw_argv) >= 2 and raw_argv[0] == "agent" and raw_argv[1] in {"config", "export"}:
	raw_argv = ["agent-config", *raw_argv[2:]]
	args = parser.parse_args(raw_argv)
	handler = getattr(args, "handler", None)
	if handler is None:
	if argv is None and not raw_argv and sys.stdin.isatty():
	models = load_model_registry(default_registry_path())
	defaults = argparse.Namespace(
	model=None,
	registry=None,
	cache_dir=default_cache_root(),
	pull=False,
	max_tokens=192,
	temperature=0.2,
	context_tokens=None,
	threads=None,
	stream=False,
	dry_run=False,
	)
	return _conversation(defaults, models)
	parser.print_help()
	return 0
	try:
	return int(handler(args))
	except (FileNotFoundError, ValueError) as exc:
	print(str(exc), file=sys.stderr)
	return 2


	if __name__ == "__main__":
	raise SystemExit(main())