from __future__ import annotations import json from typing import Any import gradio as gr from src.bucket import check_user_bucket, create_user_bucket, read_run_bundle from src.config import settings, user_bucket_source from src.jobs import ( fetch_recent_logs_safe, inspect_job_safe, launch_universal_model_card_job, launch_validate_existing_space_job, ) from src.runs import make_run_id, validate_run_id from src.security import redact APP_DESCRIPTION = f""" # Agentic Space Factory Turn a Hugging Face model card into a **private, testable Gradio Space** using an agentic HF Job. ## Recommended workflow ```text 1. Build from model card → creates a private Space → attempts ZeroGPU first → falls back to a fixed GPU if automatic hardware assignment is available → otherwise marks the run as manual_hardware_required 2. If hardware had to be changed manually → set the GPU in the generated Space Settings → run Validate existing Space → smoke-test generation → measure latency → store the output artifact in the Bucket ``` Each launch returns quick links to open the HF Job, generated Space, Space settings, and run artifacts in new tabs. ## Honest guarantees - Spaces are private by default. - Nothing is published automatically. - Runs, reports, generated files, traces, validation results, and artifacts are written to your private Bucket. - Success is based on the deployed Space, not only generated code. - ZeroGPU and fixed-GPU upgrades are best-effort through OAuth; manual hardware selection is an expected fallback. ## Limits This app attempts model-card builds; it does not guarantee that every model will run. Multi-GPU models, Docker-only apps, custom CUDA/FlashAttention stacks, gated models, very large models, or models with unclear documentation may produce `technical_blocker`, `health_only`, or `manual_hardware_required` instead of a full inference success. Run Bucket: by default each signed-in user writes to their own private bucket: `/{settings.bucket_name}`. Use **Check run bucket** or **Create private run bucket** before launching Jobs. """ def _profile_username(profile: Any) -> str | None: if profile is None: return None if isinstance(profile, dict): return profile.get("preferred_username") or profile.get("username") or profile.get("name") return getattr(profile, "preferred_username", None) or getattr(profile, "username", None) or getattr(profile, "name", None) def _token_value(oauth_token: Any) -> str | None: if oauth_token is None: return None if isinstance(oauth_token, str): return oauth_token return getattr(oauth_token, "token", None) or getattr(oauth_token, "access_token", None) def get_login_status(profile: gr.OAuthProfile | None) -> str: username = _profile_username(profile) if not username: return "Not signed in. Use the Hugging Face login button before launching a Job." return f"Signed in as **{username}**. Generated Spaces are created under `{username}/...` and remain private." def _safe_url(url: str | None) -> str: return (url or "").strip() def _run_artifacts_url(run_id: str | None, bucket_source: str | None) -> str: if not run_id or not bucket_source: return "" return f"https://huggingface.co/buckets/{bucket_source}/tree/main/runs/{run_id}" def _button_link(label: str, url: str | None): url = _safe_url(url) return gr.update(value=label, link=url or None, visible=bool(url)) def _job_button(job_url: str | None): return _button_link("Open HF Job ↗", job_url) def _space_button(target_space_url: str | None): return _button_link("Open target Space ↗", target_space_url) def _settings_button(target_space_url: str | None): target_space_url = _safe_url(target_space_url) return _button_link("Open Space settings ↗", f"{target_space_url}/settings" if target_space_url else "") def _artifacts_button(run_id: str | None, bucket_source: str | None): return _button_link("Open run artifacts ↗", _run_artifacts_url(run_id, bucket_source)) def _format_bucket_status(status: dict[str, Any]) -> str: source = status.get("bucket_source") or "unknown" uri = status.get("bucket_uri") or "" if status.get("ok"): return ( f"✅ Run bucket ready: `{source}`\n\n" f"Bucket URI: `{uri}`\n\n" "New Jobs will mount this private bucket and write runs under `runs//`." ) if status.get("exists") is False: return ( f"⚠️ Run bucket not found: `{source}`\n\n" "Click **Create private run bucket** before launching a Job, or create it manually in Hugging Face Storage Buckets." ) return ( f"❌ Could not check run bucket: `{source}`\n\n" f"```text\n{redact(str(status.get('error') or 'Unknown error'))}\n```" ) def check_run_bucket_ui( bucket_name: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ) -> str: username = _profile_username(profile) token = _token_value(oauth_token) if not username or not token: raise gr.Error("Please sign in with Hugging Face first.") return _format_bucket_status(check_user_bucket(username=username, bucket_name=bucket_name, token=token)) def create_run_bucket_ui( bucket_name: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ) -> str: username = _profile_username(profile) token = _token_value(oauth_token) if not username or not token: raise gr.Error("Please sign in with Hugging Face first.") return _format_bucket_status(create_user_bucket(username=username, bucket_name=bucket_name, token=token)) def propose_universal_run_id() -> str: return make_run_id("universal") def propose_validate_run_id() -> str: return make_run_id("validate") def launch_universal_model_card_job_ui( requested_run_id: str, model_id: str, target_space_name: str, pi_model: str, preferred_hardware: str, allow_fixed_gpu_fallback: bool, fallback_hardware: str, implementation_mode: str, bucket_name: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ) -> tuple[str, str, str, str, str, Any, Any, Any, Any, str]: username = _profile_username(profile) token = _token_value(oauth_token) if not username or not token: raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.") run_id = validate_run_id(requested_run_id or propose_universal_run_id()) result = launch_universal_model_card_job( token=token, username=username, target_slug=target_space_name, model_id=model_id, pi_model=pi_model, preferred_space_hardware=preferred_hardware, fallback_space_hardware=fallback_hardware, allow_fixed_gpu_fallback=allow_fixed_gpu_fallback, implementation_mode=implementation_mode, run_id=run_id, bucket_name=bucket_name, ) job_url = result.get("job_url") or "" target_space_url = result.get("target_space_url") or "" bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name) return ( run_id, result["job_id"], job_url, result.get("target_space") or "", target_space_url, _job_button(job_url), _space_button(target_space_url), _settings_button(target_space_url), _artifacts_button(run_id, bucket_source), json.dumps(result, indent=2), ) def launch_validate_existing_space_job_ui( requested_run_id: str, target_space_id: str, api_name: str, test_args_json: str, test_kwargs_json: str, expected_output_type: str, live_timeout_seconds: float, bucket_name: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ) -> tuple[str, str, str, str, Any, Any, Any, Any, str]: username = _profile_username(profile) token = _token_value(oauth_token) if not username or not token: raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.") run_id = validate_run_id(requested_run_id or propose_validate_run_id()) try: json.loads(test_args_json or "[]") json.loads(test_kwargs_json or "{}") except Exception as exc: raise gr.Error(f"Invalid JSON test args/kwargs: {exc}") from exc result = launch_validate_existing_space_job( token=token, username=username, target_space_id=target_space_id, api_name=api_name, test_args_json=test_args_json, test_kwargs_json=test_kwargs_json, expected_output_type=expected_output_type, live_timeout_seconds=int(live_timeout_seconds or 1800), run_id=run_id, bucket_name=bucket_name, ) job_url = result.get("job_url") or "" target_space_url = result.get("target_space_url") or f"https://huggingface.co/spaces/{result.get('target_space', target_space_id)}" bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name) return ( run_id, result["job_id"], job_url, target_space_url, _job_button(job_url), _space_button(target_space_url), _settings_button(target_space_url), _artifacts_button(run_id, bucket_source), json.dumps(result, indent=2), ) def refresh_run_ui( run_id: str, job_id: str, bucket_name: str, profile: gr.OAuthProfile | None, oauth_token: gr.OAuthToken | None, ) -> tuple[str, str, str, str]: username = _profile_username(profile) token = _token_value(oauth_token) if not username or not token: raise gr.Error("Please sign in with Hugging Face first.") run_id = validate_run_id(run_id) bucket_source = user_bucket_source(username=username, bucket_name=bucket_name) bundle = read_run_bundle(run_id, bucket_source=bucket_source, token=token) job_info = inspect_job_safe(job_id, token=token) if job_id else {} logs = redact(fetch_recent_logs_safe(job_id, token=token)) if job_id else "" state_text = json.dumps(bundle.get("state") or {"status": "not_available_yet"}, indent=2, ensure_ascii=False) events = bundle.get("events") or [] events_text = "\n".join(json.dumps(event, ensure_ascii=False) for event in events) or "No events found yet. The Job may still be scheduling." report_text = bundle.get("report") or "No report found yet. Refresh after the Job has started writing to the Bucket." job_text = json.dumps(job_info, indent=2, ensure_ascii=False) if logs: job_text += "\n\nRecent job logs:\n" + logs return state_text, events_text, report_text, job_text def build_demo() -> gr.Blocks: with gr.Blocks(title="Agentic Space Factory") as demo: gr.Markdown(APP_DESCRIPTION) gr.LoginButton() login_status = gr.Markdown() demo.load(fn=get_login_status, inputs=None, outputs=login_status) gr.Markdown("## Run storage") gr.Markdown( "Runs are stored in a private Storage Bucket under the signed-in user's namespace. " "Create it once here, then use the same bucket name for Build and Validate." ) global_bucket_name = gr.Textbox( label="Run Bucket name", value=settings.bucket_name, info="The app uses /. Default: space-factory-runs.", ) with gr.Row(): check_bucket_btn = gr.Button("Check run bucket") create_bucket_btn = gr.Button("Create private run bucket", variant="primary") bucket_status = gr.Markdown("Sign in, then check or create your private run bucket before launching Jobs.") check_bucket_btn.click(fn=check_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status) create_bucket_btn.click(fn=create_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status) with gr.Tab("Build from model card"): gr.Markdown( """ Paste a Hugging Face model ID or model-card URL. The worker creates a **private** Space, asks Pi + Qwen Coder to build the best Gradio app it can, attempts ZeroGPU first, then a fixed-GPU fallback if enabled. If automatic hardware assignment fails, set the hardware manually in the generated Space settings and run the validation tab. """ ) with gr.Row(): build_run_id = gr.Textbox(label="Run ID", value=propose_universal_run_id, interactive=True) gr.Button("Generate new run id").click(fn=propose_universal_run_id, inputs=None, outputs=build_run_id) model_id = gr.Textbox( label="Model card URL or model ID", value="Tongyi-MAI/Z-Image-Turbo", info="Examples: owner/model, https://huggingface.co/owner/model", ) target_space_name = gr.Textbox( label="Target Space name", placeholder="e.g. space-factory-z-image-v1", info="Use a fresh name. The Space is created under your username and remains private.", ) pi_model = gr.Textbox( label="Pi model", value="Qwen/Qwen3-Coder-Next", info="Model used by Pi through Hugging Face Inference Providers.", ) implementation_mode = gr.Dropdown( label="Implementation mode", choices=["full-inference-gated", "full-inference-attempt", "safe-scaffold"], value="full-inference-gated", info="Gated mode forbids placeholder success; impossible models must produce technical blockers.", ) with gr.Row(): preferred_hw = gr.Dropdown( label="Preferred Space hardware", choices=["zero-a10g", "cpu-basic", "t4-small", "t4-medium", "a10g-large", "l40sx1", "a100-large", "h200"], value="zero-a10g", info="ZeroGPU is attempted first by the worker. If your quota is exceeded, use manual hardware selection after generation.", ) allow_fallback = gr.Checkbox(label="Allow fixed GPU fallback", value=True) fallback_hw = gr.Dropdown( label="Fallback Space hardware", choices=["l40sx1", "a10g-large", "a100-large", "h200", "t4-medium"], value="l40sx1", ) build_btn = gr.Button("Build private Space", variant="primary") build_job_id = gr.Textbox(label="Job ID", interactive=True) build_job_url = gr.Textbox(label="Job URL", interactive=False) generated_space = gr.Textbox(label="Generated Space", interactive=False) generated_space_url = gr.Textbox(label="Generated Space URL", interactive=False) gr.Markdown("Quick links") with gr.Row(): build_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False) build_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False) build_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False) build_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False) build_result = gr.Code(label="Launch result", language="json") build_btn.click( fn=launch_universal_model_card_job_ui, inputs=[build_run_id, model_id, target_space_name, pi_model, preferred_hw, allow_fallback, fallback_hw, implementation_mode, global_bucket_name], outputs=[ build_run_id, build_job_id, build_job_url, generated_space, generated_space_url, build_job_button, build_space_button, build_settings_button, build_artifacts_button, build_result, ], ) build_refresh = gr.Button("Refresh build run status") with gr.Tab("Build state"): build_state = gr.Code(label="state.json", language="json") with gr.Tab("Build events"): build_events = gr.Code(label="events.jsonl", language="json") with gr.Tab("Build report"): build_report = gr.Markdown() with gr.Tab("Build job"): build_job_info = gr.Code(label="Job info/logs", language="json") build_refresh.click(fn=refresh_run_ui, inputs=[build_run_id, build_job_id, global_bucket_name], outputs=[build_state, build_events, build_report, build_job_info]) with gr.Tab("Validate existing Space"): gr.Markdown( """ Use this after the builder generated a Space, especially if you had to set the GPU manually. This job does not rerun Pi. It waits for the existing Space, calls a live generation endpoint, checks the output type, stores returned artifacts in the Bucket, measures latency, and recommends a conservative ZeroGPU duration. """ ) with gr.Row(): validate_run_id = gr.Textbox(label="Run ID", value=propose_validate_run_id, interactive=True) gr.Button("Generate new validation run id").click(fn=propose_validate_run_id, inputs=None, outputs=validate_run_id) target_space = gr.Textbox( label="Existing target Space", placeholder="fffiloni/space-factory-... or https://huggingface.co/spaces/...", ) with gr.Row(): api_name = gr.Textbox(label="Generation API name", value="/generate") expected_type = gr.Dropdown(label="Expected output type", choices=["image", "video", "audio", "text", "any"], value="image") test_args = gr.Code(label="Test args JSON list", language="json", value='["a cinematic robot cat astronaut, detailed, studio lighting"]') test_kwargs = gr.Code(label="Test kwargs JSON object", language="json", value="{}") timeout_s = gr.Number(label="Live wait timeout seconds", value=1800, precision=0) validate_btn = gr.Button("Validate Space + smoke-test generation", variant="primary") validate_job_id = gr.Textbox(label="Job ID", interactive=True) validate_job_url = gr.Textbox(label="Job URL", interactive=False) validate_space_url = gr.Textbox(label="Target Space URL", interactive=False) gr.Markdown("Quick links") with gr.Row(): validate_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False) validate_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False) validate_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False) validate_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False) validate_result = gr.Code(label="Launch result", language="json") validate_btn.click( fn=launch_validate_existing_space_job_ui, inputs=[validate_run_id, target_space, api_name, test_args, test_kwargs, expected_type, timeout_s, global_bucket_name], outputs=[ validate_run_id, validate_job_id, validate_job_url, validate_space_url, validate_job_button, validate_space_button, validate_settings_button, validate_artifacts_button, validate_result, ], ) validate_refresh = gr.Button("Refresh validation run status") with gr.Tab("Validation state"): validate_state = gr.Code(label="state.json", language="json") with gr.Tab("Validation events"): validate_events = gr.Code(label="events.jsonl", language="json") with gr.Tab("Validation report"): validate_report = gr.Markdown() with gr.Tab("Validation job"): validate_job_info = gr.Code(label="Job info/logs", language="json") validate_refresh.click(fn=refresh_run_ui, inputs=[validate_run_id, validate_job_id, global_bucket_name], outputs=[validate_state, validate_events, validate_report, validate_job_info]) with gr.Tab("About & limits"): gr.Markdown( """ ## Result statuses - `full_inference_success`: a live generation smoke test returned the expected output type. - `manual_hardware_required`: the Space was generated but automatic ZeroGPU/fixed-GPU assignment failed; set hardware manually, then validate. - `full_inference_candidate_health_passed`: the Space boots and contains inference signals, but generation was not smoke-tested yet. - `health_only`: the Space boots, but no real inference path was validated. - `technical_blocker`: the agent found concrete blockers such as multi-GPU requirements, missing licenses, custom CUDA, or unclear usage. - `failed`: the build, runtime, or validation job failed. ## Hardware policy The builder tries to create an app optimized for ZeroGPU when GPU is needed. It attempts ZeroGPU first, then a fixed-GPU fallback if enabled. Hardware assignment through OAuth may fail because of quota, billing, or permission limits; manual hardware selection is a supported path. ## What this app cannot guarantee It cannot guarantee that every model card becomes a working Space. It cannot bypass model licenses, ZeroGPU quota, billing requirements, custom CUDA build failures, multi-GPU needs, or missing model documentation. """ ) return demo if __name__ == "__main__": build_demo().launch()