agentic-space-factory-etheroi

Paused

App Files Files Community

agentic-space-factory-etheroi / app.py

fffiloni

Upload 5 files

a584da0 verified 26 days ago

Raw

History Blame Contribute Delete

22.4 kB

	from __future__ import annotations

	import json
	from typing import Any

	import gradio as gr

	from src.bucket import check_user_bucket, create_user_bucket, read_run_bundle
	from src.config import settings, user_bucket_source
	from src.jobs import (
	fetch_recent_logs_safe,
	inspect_job_safe,
	launch_universal_model_card_job,
	launch_validate_existing_space_job,
	)
	from src.runs import make_run_id, validate_run_id
	from src.security import redact


	APP_DESCRIPTION = f"""
	# Agentic Space Factory

	Turn a Hugging Face model card into a private, testable Gradio Space using an agentic HF Job.

	## Recommended workflow

	```text
	1. Build from model card
	→ creates a private Space
	→ attempts ZeroGPU first
	→ falls back to a fixed GPU if automatic hardware assignment is available
	→ otherwise marks the run as manual_hardware_required

	2. If hardware had to be changed manually
	→ set the GPU in the generated Space Settings
	→ run Validate existing Space
	→ smoke-test generation
	→ measure latency
	→ store the output artifact in the Bucket
	```

	Each launch returns quick links to open the HF Job, generated Space, Space settings, and run artifacts in new tabs.

	## Honest guarantees

	- Spaces are private by default.
	- Nothing is published automatically.
	- Runs, reports, generated files, traces, validation results, and artifacts are written to your private Bucket.
	- Success is based on the deployed Space, not only generated code.
	- ZeroGPU and fixed-GPU upgrades are best-effort through OAuth; manual hardware selection is an expected fallback.

	## Limits

	This app attempts model-card builds; it does not guarantee that every model will run. Multi-GPU models, Docker-only apps, custom CUDA/FlashAttention stacks, gated models, very large models, or models with unclear documentation may produce `technical_blocker`, `health_only`, or `manual_hardware_required` instead of a full inference success.

	Run Bucket: by default each signed-in user writes to their own private bucket: `<username>/{settings.bucket_name}`. Use Check run bucket or Create private run bucket before launching Jobs.
	"""


	def _profile_username(profile: Any) -> str \| None:
	if profile is None:
	return None
	if isinstance(profile, dict):
	return profile.get("preferred_username") or profile.get("username") or profile.get("name")
	return getattr(profile, "preferred_username", None) or getattr(profile, "username", None) or getattr(profile, "name", None)


	def _token_value(oauth_token: Any) -> str \| None:
	if oauth_token is None:
	return None
	if isinstance(oauth_token, str):
	return oauth_token
	return getattr(oauth_token, "token", None) or getattr(oauth_token, "access_token", None)


	def get_login_status(profile: gr.OAuthProfile \| None) -> str:
	username = _profile_username(profile)
	if not username:
	return "Not signed in. Use the Hugging Face login button before launching a Job."
	return f"Signed in as {username}. Generated Spaces are created under `{username}/...` and remain private."




	def _safe_url(url: str \| None) -> str:
	return (url or "").strip()


	def _run_artifacts_url(run_id: str \| None, bucket_source: str \| None) -> str:
	if not run_id or not bucket_source:
	return ""
	return f"https://huggingface.co/buckets/{bucket_source}/tree/main/runs/{run_id}"


	def _button_link(label: str, url: str \| None):
	url = _safe_url(url)
	return gr.update(value=label, link=url or None, visible=bool(url))


	def _job_button(job_url: str \| None):
	return _button_link("Open HF Job ↗", job_url)


	def _space_button(target_space_url: str \| None):
	return _button_link("Open target Space ↗", target_space_url)


	def _settings_button(target_space_url: str \| None):
	target_space_url = _safe_url(target_space_url)
	return _button_link("Open Space settings ↗", f"{target_space_url}/settings" if target_space_url else "")


	def _artifacts_button(run_id: str \| None, bucket_source: str \| None):
	return _button_link("Open run artifacts ↗", _run_artifacts_url(run_id, bucket_source))




	def _format_bucket_status(status: dict[str, Any]) -> str:
	source = status.get("bucket_source") or "unknown"
	uri = status.get("bucket_uri") or ""
	if status.get("ok"):
	return (
	f"✅ Run bucket ready: `{source}`\n\n"
	f"Bucket URI: `{uri}`\n\n"
	"New Jobs will mount this private bucket and write runs under `runs/<run_id>/`."
	)
	if status.get("exists") is False:
	return (
	f"⚠️ Run bucket not found: `{source}`\n\n"
	"Click Create private run bucket before launching a Job, or create it manually in Hugging Face Storage Buckets."
	)
	return (
	f"❌ Could not check run bucket: `{source}`\n\n"
	f"```text\n{redact(str(status.get('error') or 'Unknown error'))}\n```"
	)


	def check_run_bucket_ui(
	bucket_name: str,
	profile: gr.OAuthProfile \| None,
	oauth_token: gr.OAuthToken \| None,
	) -> str:
	username = _profile_username(profile)
	token = _token_value(oauth_token)
	if not username or not token:
	raise gr.Error("Please sign in with Hugging Face first.")
	return _format_bucket_status(check_user_bucket(username=username, bucket_name=bucket_name, token=token))


	def create_run_bucket_ui(
	bucket_name: str,
	profile: gr.OAuthProfile \| None,
	oauth_token: gr.OAuthToken \| None,
	) -> str:
	username = _profile_username(profile)
	token = _token_value(oauth_token)
	if not username or not token:
	raise gr.Error("Please sign in with Hugging Face first.")
	return _format_bucket_status(create_user_bucket(username=username, bucket_name=bucket_name, token=token))


	def propose_universal_run_id() -> str:
	return make_run_id("universal")


	def propose_validate_run_id() -> str:
	return make_run_id("validate")


	def launch_universal_model_card_job_ui(
	requested_run_id: str,
	model_id: str,
	target_space_name: str,
	pi_model: str,
	preferred_hardware: str,
	allow_fixed_gpu_fallback: bool,
	fallback_hardware: str,
	implementation_mode: str,
	bucket_name: str,
	profile: gr.OAuthProfile \| None,
	oauth_token: gr.OAuthToken \| None,
	) -> tuple[str, str, str, str, str, Any, Any, Any, Any, str]:
	username = _profile_username(profile)
	token = _token_value(oauth_token)
	if not username or not token:
	raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")

	run_id = validate_run_id(requested_run_id or propose_universal_run_id())
	result = launch_universal_model_card_job(
	token=token,
	username=username,
	target_slug=target_space_name,
	model_id=model_id,
	pi_model=pi_model,
	preferred_space_hardware=preferred_hardware,
	fallback_space_hardware=fallback_hardware,
	allow_fixed_gpu_fallback=allow_fixed_gpu_fallback,
	implementation_mode=implementation_mode,
	run_id=run_id,
	bucket_name=bucket_name,
	)
	job_url = result.get("job_url") or ""
	target_space_url = result.get("target_space_url") or ""
	bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name)
	return (
	run_id,
	result["job_id"],
	job_url,
	result.get("target_space") or "",
	target_space_url,
	_job_button(job_url),
	_space_button(target_space_url),
	_settings_button(target_space_url),
	_artifacts_button(run_id, bucket_source),
	json.dumps(result, indent=2),
	)


	def launch_validate_existing_space_job_ui(
	requested_run_id: str,
	target_space_id: str,
	api_name: str,
	test_args_json: str,
	test_kwargs_json: str,
	expected_output_type: str,
	live_timeout_seconds: float,
	bucket_name: str,
	profile: gr.OAuthProfile \| None,
	oauth_token: gr.OAuthToken \| None,
	) -> tuple[str, str, str, str, Any, Any, Any, Any, str]:
	username = _profile_username(profile)
	token = _token_value(oauth_token)
	if not username or not token:
	raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")

	run_id = validate_run_id(requested_run_id or propose_validate_run_id())
	try:
	json.loads(test_args_json or "[]")
	json.loads(test_kwargs_json or "{}")
	except Exception as exc:
	raise gr.Error(f"Invalid JSON test args/kwargs: {exc}") from exc

	result = launch_validate_existing_space_job(
	token=token,
	username=username,
	target_space_id=target_space_id,
	api_name=api_name,
	test_args_json=test_args_json,
	test_kwargs_json=test_kwargs_json,
	expected_output_type=expected_output_type,
	live_timeout_seconds=int(live_timeout_seconds or 1800),
	run_id=run_id,
	bucket_name=bucket_name,
	)
	job_url = result.get("job_url") or ""
	target_space_url = result.get("target_space_url") or f"https://huggingface.co/spaces/{result.get('target_space', target_space_id)}"
	bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name)
	return (
	run_id,
	result["job_id"],
	job_url,
	target_space_url,
	_job_button(job_url),
	_space_button(target_space_url),
	_settings_button(target_space_url),
	_artifacts_button(run_id, bucket_source),
	json.dumps(result, indent=2),
	)


	def refresh_run_ui(
	run_id: str,
	job_id: str,
	bucket_name: str,
	profile: gr.OAuthProfile \| None,
	oauth_token: gr.OAuthToken \| None,
	) -> tuple[str, str, str, str]:
	username = _profile_username(profile)
	token = _token_value(oauth_token)
	if not username or not token:
	raise gr.Error("Please sign in with Hugging Face first.")
	run_id = validate_run_id(run_id)
	bucket_source = user_bucket_source(username=username, bucket_name=bucket_name)

	bundle = read_run_bundle(run_id, bucket_source=bucket_source, token=token)
	job_info = inspect_job_safe(job_id, token=token) if job_id else {}
	logs = redact(fetch_recent_logs_safe(job_id, token=token)) if job_id else ""

	state_text = json.dumps(bundle.get("state") or {"status": "not_available_yet"}, indent=2, ensure_ascii=False)
	events = bundle.get("events") or []
	events_text = "\n".join(json.dumps(event, ensure_ascii=False) for event in events) or "No events found yet. The Job may still be scheduling."
	report_text = bundle.get("report") or "No report found yet. Refresh after the Job has started writing to the Bucket."
	job_text = json.dumps(job_info, indent=2, ensure_ascii=False)
	if logs:
	job_text += "\n\nRecent job logs:\n" + logs
	return state_text, events_text, report_text, job_text


	def build_demo() -> gr.Blocks:
	with gr.Blocks(title="Agentic Space Factory") as demo:
	gr.Markdown(APP_DESCRIPTION)
	gr.LoginButton()

	login_status = gr.Markdown()
	demo.load(fn=get_login_status, inputs=None, outputs=login_status)

	gr.Markdown("## Run storage")
	gr.Markdown(
	"Runs are stored in a private Storage Bucket under the signed-in user's namespace. "
	"Create it once here, then use the same bucket name for Build and Validate."
	)
	global_bucket_name = gr.Textbox(
	label="Run Bucket name",
	value=settings.bucket_name,
	info="The app uses <your-username>/<bucket-name>. Default: space-factory-runs.",
	)
	with gr.Row():
	check_bucket_btn = gr.Button("Check run bucket")
	create_bucket_btn = gr.Button("Create private run bucket", variant="primary")
	bucket_status = gr.Markdown("Sign in, then check or create your private run bucket before launching Jobs.")
	check_bucket_btn.click(fn=check_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status)
	create_bucket_btn.click(fn=create_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status)

	with gr.Tab("Build from model card"):
	gr.Markdown(
	"""
	Paste a Hugging Face model ID or model-card URL. The worker creates a private Space, asks Pi + Qwen Coder to build the best Gradio app it can, attempts ZeroGPU first, then a fixed-GPU fallback if enabled. If automatic hardware assignment fails, set the hardware manually in the generated Space settings and run the validation tab.
	"""
	)
	with gr.Row():
	build_run_id = gr.Textbox(label="Run ID", value=propose_universal_run_id, interactive=True)
	gr.Button("Generate new run id").click(fn=propose_universal_run_id, inputs=None, outputs=build_run_id)
	model_id = gr.Textbox(
	label="Model card URL or model ID",
	value="Tongyi-MAI/Z-Image-Turbo",
	info="Examples: owner/model, https://huggingface.co/owner/model",
	)
	target_space_name = gr.Textbox(
	label="Target Space name",
	placeholder="e.g. space-factory-z-image-v1",
	info="Use a fresh name. The Space is created under your username and remains private.",
	)
	pi_model = gr.Textbox(
	label="Pi model",
	value="Qwen/Qwen3-Coder-Next",
	info="Model used by Pi through Hugging Face Inference Providers.",
	)
	implementation_mode = gr.Dropdown(
	label="Implementation mode",
	choices=["full-inference-gated", "full-inference-attempt", "safe-scaffold"],
	value="full-inference-gated",
	info="Gated mode forbids placeholder success; impossible models must produce technical blockers.",
	)
	with gr.Row():
	preferred_hw = gr.Dropdown(
	label="Preferred Space hardware",
	choices=["zero-a10g", "cpu-basic", "t4-small", "t4-medium", "a10g-large", "l40sx1", "a100-large", "h200"],
	value="zero-a10g",
	info="ZeroGPU is attempted first by the worker. If your quota is exceeded, use manual hardware selection after generation.",
	)
	allow_fallback = gr.Checkbox(label="Allow fixed GPU fallback", value=True)
	fallback_hw = gr.Dropdown(
	label="Fallback Space hardware",
	choices=["l40sx1", "a10g-large", "a100-large", "h200", "t4-medium"],
	value="l40sx1",
	)

	build_btn = gr.Button("Build private Space", variant="primary")
	build_job_id = gr.Textbox(label="Job ID", interactive=True)
	build_job_url = gr.Textbox(label="Job URL", interactive=False)
	generated_space = gr.Textbox(label="Generated Space", interactive=False)
	generated_space_url = gr.Textbox(label="Generated Space URL", interactive=False)
	gr.Markdown("Quick links")
	with gr.Row():
	build_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False)
	build_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False)
	build_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False)
	build_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False)
	build_result = gr.Code(label="Launch result", language="json")

	build_btn.click(
	fn=launch_universal_model_card_job_ui,
	inputs=[build_run_id, model_id, target_space_name, pi_model, preferred_hw, allow_fallback, fallback_hw, implementation_mode, global_bucket_name],
	outputs=[
	build_run_id,
	build_job_id,
	build_job_url,
	generated_space,
	generated_space_url,
	build_job_button,
	build_space_button,
	build_settings_button,
	build_artifacts_button,
	build_result,
	],
	)

	build_refresh = gr.Button("Refresh build run status")
	with gr.Tab("Build state"):
	build_state = gr.Code(label="state.json", language="json")
	with gr.Tab("Build events"):
	build_events = gr.Code(label="events.jsonl", language="json")
	with gr.Tab("Build report"):
	build_report = gr.Markdown()
	with gr.Tab("Build job"):
	build_job_info = gr.Code(label="Job info/logs", language="json")

	build_refresh.click(fn=refresh_run_ui, inputs=[build_run_id, build_job_id, global_bucket_name], outputs=[build_state, build_events, build_report, build_job_info])

	with gr.Tab("Validate existing Space"):
	gr.Markdown(
	"""
	Use this after the builder generated a Space, especially if you had to set the GPU manually. This job does not rerun Pi. It waits for the existing Space, calls a live generation endpoint, checks the output type, stores returned artifacts in the Bucket, measures latency, and recommends a conservative ZeroGPU duration.
	"""
	)
	with gr.Row():
	validate_run_id = gr.Textbox(label="Run ID", value=propose_validate_run_id, interactive=True)
	gr.Button("Generate new validation run id").click(fn=propose_validate_run_id, inputs=None, outputs=validate_run_id)
	target_space = gr.Textbox(
	label="Existing target Space",
	placeholder="fffiloni/space-factory-... or https://huggingface.co/spaces/...",
	)
	with gr.Row():
	api_name = gr.Textbox(label="Generation API name", value="/generate")
	expected_type = gr.Dropdown(label="Expected output type", choices=["image", "video", "audio", "text", "any"], value="image")
	test_args = gr.Code(label="Test args JSON list", language="json", value='["a cinematic robot cat astronaut, detailed, studio lighting"]')
	test_kwargs = gr.Code(label="Test kwargs JSON object", language="json", value="{}")
	timeout_s = gr.Number(label="Live wait timeout seconds", value=1800, precision=0)

	validate_btn = gr.Button("Validate Space + smoke-test generation", variant="primary")
	validate_job_id = gr.Textbox(label="Job ID", interactive=True)
	validate_job_url = gr.Textbox(label="Job URL", interactive=False)
	validate_space_url = gr.Textbox(label="Target Space URL", interactive=False)
	gr.Markdown("Quick links")
	with gr.Row():
	validate_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False)
	validate_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False)
	validate_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False)
	validate_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False)
	validate_result = gr.Code(label="Launch result", language="json")

	validate_btn.click(
	fn=launch_validate_existing_space_job_ui,
	inputs=[validate_run_id, target_space, api_name, test_args, test_kwargs, expected_type, timeout_s, global_bucket_name],
	outputs=[
	validate_run_id,
	validate_job_id,
	validate_job_url,
	validate_space_url,
	validate_job_button,
	validate_space_button,
	validate_settings_button,
	validate_artifacts_button,
	validate_result,
	],
	)

	validate_refresh = gr.Button("Refresh validation run status")
	with gr.Tab("Validation state"):
	validate_state = gr.Code(label="state.json", language="json")
	with gr.Tab("Validation events"):
	validate_events = gr.Code(label="events.jsonl", language="json")
	with gr.Tab("Validation report"):
	validate_report = gr.Markdown()
	with gr.Tab("Validation job"):
	validate_job_info = gr.Code(label="Job info/logs", language="json")

	validate_refresh.click(fn=refresh_run_ui, inputs=[validate_run_id, validate_job_id, global_bucket_name], outputs=[validate_state, validate_events, validate_report, validate_job_info])

	with gr.Tab("About & limits"):
	gr.Markdown(
	"""
	## Result statuses

	- `full_inference_success`: a live generation smoke test returned the expected output type.
	- `manual_hardware_required`: the Space was generated but automatic ZeroGPU/fixed-GPU assignment failed; set hardware manually, then validate.
	- `full_inference_candidate_health_passed`: the Space boots and contains inference signals, but generation was not smoke-tested yet.
	- `health_only`: the Space boots, but no real inference path was validated.
	- `technical_blocker`: the agent found concrete blockers such as multi-GPU requirements, missing licenses, custom CUDA, or unclear usage.
	- `failed`: the build, runtime, or validation job failed.

	## Hardware policy

	The builder tries to create an app optimized for ZeroGPU when GPU is needed. It attempts ZeroGPU first, then a fixed-GPU fallback if enabled. Hardware assignment through OAuth may fail because of quota, billing, or permission limits; manual hardware selection is a supported path.

	## What this app cannot guarantee

	It cannot guarantee that every model card becomes a working Space. It cannot bypass model licenses, ZeroGPU quota, billing requirements, custom CUDA build failures, multi-GPU needs, or missing model documentation.
	"""
	)

	return demo


	if __name__ == "__main__":
	build_demo().launch()