agentic-space-factory-etheroi

Paused

File size: 22,351 Bytes

from __future__ import annotations

import json
from typing import Any

import gradio as gr

from src.bucket import check_user_bucket, create_user_bucket, read_run_bundle
from src.config import settings, user_bucket_source
from src.jobs import (
    fetch_recent_logs_safe,
    inspect_job_safe,
    launch_universal_model_card_job,
    launch_validate_existing_space_job,
)
from src.runs import make_run_id, validate_run_id
from src.security import redact


APP_DESCRIPTION = f"""
# Agentic Space Factory

Turn a Hugging Face model card into a **private, testable Gradio Space** using an agentic HF Job.

## Recommended workflow

```text
1. Build from model card
   → creates a private Space
   → attempts ZeroGPU first
   → falls back to a fixed GPU if automatic hardware assignment is available
   → otherwise marks the run as manual_hardware_required

2. If hardware had to be changed manually
   → set the GPU in the generated Space Settings
   → run Validate existing Space
   → smoke-test generation
   → measure latency
   → store the output artifact in the Bucket
```

Each launch returns quick links to open the HF Job, generated Space, Space settings, and run artifacts in new tabs.

## Honest guarantees

- Spaces are private by default.
- Nothing is published automatically.
- Runs, reports, generated files, traces, validation results, and artifacts are written to your private Bucket.
- Success is based on the deployed Space, not only generated code.
- ZeroGPU and fixed-GPU upgrades are best-effort through OAuth; manual hardware selection is an expected fallback.

## Limits

This app attempts model-card builds; it does not guarantee that every model will run. Multi-GPU models, Docker-only apps, custom CUDA/FlashAttention stacks, gated models, very large models, or models with unclear documentation may produce `technical_blocker`, `health_only`, or `manual_hardware_required` instead of a full inference success.

Run Bucket: by default each signed-in user writes to their own private bucket: `<username>/{settings.bucket_name}`. Use **Check run bucket** or **Create private run bucket** before launching Jobs.
"""


def _profile_username(profile: Any) -> str | None:
    if profile is None:
        return None
    if isinstance(profile, dict):
        return profile.get("preferred_username") or profile.get("username") or profile.get("name")
    return getattr(profile, "preferred_username", None) or getattr(profile, "username", None) or getattr(profile, "name", None)


def _token_value(oauth_token: Any) -> str | None:
    if oauth_token is None:
        return None
    if isinstance(oauth_token, str):
        return oauth_token
    return getattr(oauth_token, "token", None) or getattr(oauth_token, "access_token", None)


def get_login_status(profile: gr.OAuthProfile | None) -> str:
    username = _profile_username(profile)
    if not username:
        return "Not signed in. Use the Hugging Face login button before launching a Job."
    return f"Signed in as **{username}**. Generated Spaces are created under `{username}/...` and remain private."




def _safe_url(url: str | None) -> str:
    return (url or "").strip()


def _run_artifacts_url(run_id: str | None, bucket_source: str | None) -> str:
    if not run_id or not bucket_source:
        return ""
    return f"https://huggingface.co/buckets/{bucket_source}/tree/main/runs/{run_id}"


def _button_link(label: str, url: str | None):
    url = _safe_url(url)
    return gr.update(value=label, link=url or None, visible=bool(url))


def _job_button(job_url: str | None):
    return _button_link("Open HF Job ↗", job_url)


def _space_button(target_space_url: str | None):
    return _button_link("Open target Space ↗", target_space_url)


def _settings_button(target_space_url: str | None):
    target_space_url = _safe_url(target_space_url)
    return _button_link("Open Space settings ↗", f"{target_space_url}/settings" if target_space_url else "")


def _artifacts_button(run_id: str | None, bucket_source: str | None):
    return _button_link("Open run artifacts ↗", _run_artifacts_url(run_id, bucket_source))




def _format_bucket_status(status: dict[str, Any]) -> str:
    source = status.get("bucket_source") or "unknown"
    uri = status.get("bucket_uri") or ""
    if status.get("ok"):
        return (
            f"✅ Run bucket ready: `{source}`\n\n"
            f"Bucket URI: `{uri}`\n\n"
            "New Jobs will mount this private bucket and write runs under `runs/<run_id>/`."
        )
    if status.get("exists") is False:
        return (
            f"⚠️ Run bucket not found: `{source}`\n\n"
            "Click **Create private run bucket** before launching a Job, or create it manually in Hugging Face Storage Buckets."
        )
    return (
        f"❌ Could not check run bucket: `{source}`\n\n"
        f"```text\n{redact(str(status.get('error') or 'Unknown error'))}\n```"
    )


def check_run_bucket_ui(
    bucket_name: str,
    profile: gr.OAuthProfile | None,
    oauth_token: gr.OAuthToken | None,
) -> str:
    username = _profile_username(profile)
    token = _token_value(oauth_token)
    if not username or not token:
        raise gr.Error("Please sign in with Hugging Face first.")
    return _format_bucket_status(check_user_bucket(username=username, bucket_name=bucket_name, token=token))


def create_run_bucket_ui(
    bucket_name: str,
    profile: gr.OAuthProfile | None,
    oauth_token: gr.OAuthToken | None,
) -> str:
    username = _profile_username(profile)
    token = _token_value(oauth_token)
    if not username or not token:
        raise gr.Error("Please sign in with Hugging Face first.")
    return _format_bucket_status(create_user_bucket(username=username, bucket_name=bucket_name, token=token))


def propose_universal_run_id() -> str:
    return make_run_id("universal")


def propose_validate_run_id() -> str:
    return make_run_id("validate")


def launch_universal_model_card_job_ui(
    requested_run_id: str,
    model_id: str,
    target_space_name: str,
    pi_model: str,
    preferred_hardware: str,
    allow_fixed_gpu_fallback: bool,
    fallback_hardware: str,
    implementation_mode: str,
    bucket_name: str,
    profile: gr.OAuthProfile | None,
    oauth_token: gr.OAuthToken | None,
) -> tuple[str, str, str, str, str, Any, Any, Any, Any, str]:
    username = _profile_username(profile)
    token = _token_value(oauth_token)
    if not username or not token:
        raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")

    run_id = validate_run_id(requested_run_id or propose_universal_run_id())
    result = launch_universal_model_card_job(
        token=token,
        username=username,
        target_slug=target_space_name,
        model_id=model_id,
        pi_model=pi_model,
        preferred_space_hardware=preferred_hardware,
        fallback_space_hardware=fallback_hardware,
        allow_fixed_gpu_fallback=allow_fixed_gpu_fallback,
        implementation_mode=implementation_mode,
        run_id=run_id,
        bucket_name=bucket_name,
    )
    job_url = result.get("job_url") or ""
    target_space_url = result.get("target_space_url") or ""
    bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name)
    return (
        run_id,
        result["job_id"],
        job_url,
        result.get("target_space") or "",
        target_space_url,
        _job_button(job_url),
        _space_button(target_space_url),
        _settings_button(target_space_url),
        _artifacts_button(run_id, bucket_source),
        json.dumps(result, indent=2),
    )


def launch_validate_existing_space_job_ui(
    requested_run_id: str,
    target_space_id: str,
    api_name: str,
    test_args_json: str,
    test_kwargs_json: str,
    expected_output_type: str,
    live_timeout_seconds: float,
    bucket_name: str,
    profile: gr.OAuthProfile | None,
    oauth_token: gr.OAuthToken | None,
) -> tuple[str, str, str, str, Any, Any, Any, Any, str]:
    username = _profile_username(profile)
    token = _token_value(oauth_token)
    if not username or not token:
        raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")

    run_id = validate_run_id(requested_run_id or propose_validate_run_id())
    try:
        json.loads(test_args_json or "[]")
        json.loads(test_kwargs_json or "{}")
    except Exception as exc:
        raise gr.Error(f"Invalid JSON test args/kwargs: {exc}") from exc

    result = launch_validate_existing_space_job(
        token=token,
        username=username,
        target_space_id=target_space_id,
        api_name=api_name,
        test_args_json=test_args_json,
        test_kwargs_json=test_kwargs_json,
        expected_output_type=expected_output_type,
        live_timeout_seconds=int(live_timeout_seconds or 1800),
        run_id=run_id,
        bucket_name=bucket_name,
    )
    job_url = result.get("job_url") or ""
    target_space_url = result.get("target_space_url") or f"https://huggingface.co/spaces/{result.get('target_space', target_space_id)}"
    bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name)
    return (
        run_id,
        result["job_id"],
        job_url,
        target_space_url,
        _job_button(job_url),
        _space_button(target_space_url),
        _settings_button(target_space_url),
        _artifacts_button(run_id, bucket_source),
        json.dumps(result, indent=2),
    )


def refresh_run_ui(
    run_id: str,
    job_id: str,
    bucket_name: str,
    profile: gr.OAuthProfile | None,
    oauth_token: gr.OAuthToken | None,
) -> tuple[str, str, str, str]:
    username = _profile_username(profile)
    token = _token_value(oauth_token)
    if not username or not token:
        raise gr.Error("Please sign in with Hugging Face first.")
    run_id = validate_run_id(run_id)
    bucket_source = user_bucket_source(username=username, bucket_name=bucket_name)

    bundle = read_run_bundle(run_id, bucket_source=bucket_source, token=token)
    job_info = inspect_job_safe(job_id, token=token) if job_id else {}
    logs = redact(fetch_recent_logs_safe(job_id, token=token)) if job_id else ""

    state_text = json.dumps(bundle.get("state") or {"status": "not_available_yet"}, indent=2, ensure_ascii=False)
    events = bundle.get("events") or []
    events_text = "\n".join(json.dumps(event, ensure_ascii=False) for event in events) or "No events found yet. The Job may still be scheduling."
    report_text = bundle.get("report") or "No report found yet. Refresh after the Job has started writing to the Bucket."
    job_text = json.dumps(job_info, indent=2, ensure_ascii=False)
    if logs:
        job_text += "\n\nRecent job logs:\n" + logs
    return state_text, events_text, report_text, job_text


def build_demo() -> gr.Blocks:
    with gr.Blocks(title="Agentic Space Factory") as demo:
        gr.Markdown(APP_DESCRIPTION)
        gr.LoginButton()

        login_status = gr.Markdown()
        demo.load(fn=get_login_status, inputs=None, outputs=login_status)

        gr.Markdown("## Run storage")
        gr.Markdown(
            "Runs are stored in a private Storage Bucket under the signed-in user's namespace. "
            "Create it once here, then use the same bucket name for Build and Validate."
        )
        global_bucket_name = gr.Textbox(
            label="Run Bucket name",
            value=settings.bucket_name,
            info="The app uses <your-username>/<bucket-name>. Default: space-factory-runs.",
        )
        with gr.Row():
            check_bucket_btn = gr.Button("Check run bucket")
            create_bucket_btn = gr.Button("Create private run bucket", variant="primary")
        bucket_status = gr.Markdown("Sign in, then check or create your private run bucket before launching Jobs.")
        check_bucket_btn.click(fn=check_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status)
        create_bucket_btn.click(fn=create_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status)

        with gr.Tab("Build from model card"):
            gr.Markdown(
                """
Paste a Hugging Face model ID or model-card URL. The worker creates a **private** Space, asks Pi + Qwen Coder to build the best Gradio app it can, attempts ZeroGPU first, then a fixed-GPU fallback if enabled. If automatic hardware assignment fails, set the hardware manually in the generated Space settings and run the validation tab.
"""
            )
            with gr.Row():
                build_run_id = gr.Textbox(label="Run ID", value=propose_universal_run_id, interactive=True)
                gr.Button("Generate new run id").click(fn=propose_universal_run_id, inputs=None, outputs=build_run_id)
            model_id = gr.Textbox(
                label="Model card URL or model ID",
                value="Tongyi-MAI/Z-Image-Turbo",
                info="Examples: owner/model, https://huggingface.co/owner/model",
            )
            target_space_name = gr.Textbox(
                label="Target Space name",
                placeholder="e.g. space-factory-z-image-v1",
                info="Use a fresh name. The Space is created under your username and remains private.",
            )
            pi_model = gr.Textbox(
                label="Pi model",
                value="Qwen/Qwen3-Coder-Next",
                info="Model used by Pi through Hugging Face Inference Providers.",
            )
            implementation_mode = gr.Dropdown(
                label="Implementation mode",
                choices=["full-inference-gated", "full-inference-attempt", "safe-scaffold"],
                value="full-inference-gated",
                info="Gated mode forbids placeholder success; impossible models must produce technical blockers.",
            )
            with gr.Row():
                preferred_hw = gr.Dropdown(
                    label="Preferred Space hardware",
                    choices=["zero-a10g", "cpu-basic", "t4-small", "t4-medium", "a10g-large", "l40sx1", "a100-large", "h200"],
                    value="zero-a10g",
                    info="ZeroGPU is attempted first by the worker. If your quota is exceeded, use manual hardware selection after generation.",
                )
                allow_fallback = gr.Checkbox(label="Allow fixed GPU fallback", value=True)
                fallback_hw = gr.Dropdown(
                    label="Fallback Space hardware",
                    choices=["l40sx1", "a10g-large", "a100-large", "h200", "t4-medium"],
                    value="l40sx1",
                )

            build_btn = gr.Button("Build private Space", variant="primary")
            build_job_id = gr.Textbox(label="Job ID", interactive=True)
            build_job_url = gr.Textbox(label="Job URL", interactive=False)
            generated_space = gr.Textbox(label="Generated Space", interactive=False)
            generated_space_url = gr.Textbox(label="Generated Space URL", interactive=False)
            gr.Markdown("Quick links")
            with gr.Row():
                build_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False)
                build_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False)
                build_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False)
                build_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False)
            build_result = gr.Code(label="Launch result", language="json")

            build_btn.click(
                fn=launch_universal_model_card_job_ui,
                inputs=[build_run_id, model_id, target_space_name, pi_model, preferred_hw, allow_fallback, fallback_hw, implementation_mode, global_bucket_name],
                outputs=[
                    build_run_id,
                    build_job_id,
                    build_job_url,
                    generated_space,
                    generated_space_url,
                    build_job_button,
                    build_space_button,
                    build_settings_button,
                    build_artifacts_button,
                    build_result,
                ],
            )

            build_refresh = gr.Button("Refresh build run status")
            with gr.Tab("Build state"):
                build_state = gr.Code(label="state.json", language="json")
            with gr.Tab("Build events"):
                build_events = gr.Code(label="events.jsonl", language="json")
            with gr.Tab("Build report"):
                build_report = gr.Markdown()
            with gr.Tab("Build job"):
                build_job_info = gr.Code(label="Job info/logs", language="json")

            build_refresh.click(fn=refresh_run_ui, inputs=[build_run_id, build_job_id, global_bucket_name], outputs=[build_state, build_events, build_report, build_job_info])

        with gr.Tab("Validate existing Space"):
            gr.Markdown(
                """
Use this after the builder generated a Space, especially if you had to set the GPU manually. This job does not rerun Pi. It waits for the existing Space, calls a live generation endpoint, checks the output type, stores returned artifacts in the Bucket, measures latency, and recommends a conservative ZeroGPU duration.
"""
            )
            with gr.Row():
                validate_run_id = gr.Textbox(label="Run ID", value=propose_validate_run_id, interactive=True)
                gr.Button("Generate new validation run id").click(fn=propose_validate_run_id, inputs=None, outputs=validate_run_id)
            target_space = gr.Textbox(
                label="Existing target Space",
                placeholder="fffiloni/space-factory-... or https://huggingface.co/spaces/...",
            )
            with gr.Row():
                api_name = gr.Textbox(label="Generation API name", value="/generate")
                expected_type = gr.Dropdown(label="Expected output type", choices=["image", "video", "audio", "text", "any"], value="image")
            test_args = gr.Code(label="Test args JSON list", language="json", value='["a cinematic robot cat astronaut, detailed, studio lighting"]')
            test_kwargs = gr.Code(label="Test kwargs JSON object", language="json", value="{}")
            timeout_s = gr.Number(label="Live wait timeout seconds", value=1800, precision=0)

            validate_btn = gr.Button("Validate Space + smoke-test generation", variant="primary")
            validate_job_id = gr.Textbox(label="Job ID", interactive=True)
            validate_job_url = gr.Textbox(label="Job URL", interactive=False)
            validate_space_url = gr.Textbox(label="Target Space URL", interactive=False)
            gr.Markdown("Quick links")
            with gr.Row():
                validate_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False)
                validate_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False)
                validate_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False)
                validate_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False)
            validate_result = gr.Code(label="Launch result", language="json")

            validate_btn.click(
                fn=launch_validate_existing_space_job_ui,
                inputs=[validate_run_id, target_space, api_name, test_args, test_kwargs, expected_type, timeout_s, global_bucket_name],
                outputs=[
                    validate_run_id,
                    validate_job_id,
                    validate_job_url,
                    validate_space_url,
                    validate_job_button,
                    validate_space_button,
                    validate_settings_button,
                    validate_artifacts_button,
                    validate_result,
                ],
            )

            validate_refresh = gr.Button("Refresh validation run status")
            with gr.Tab("Validation state"):
                validate_state = gr.Code(label="state.json", language="json")
            with gr.Tab("Validation events"):
                validate_events = gr.Code(label="events.jsonl", language="json")
            with gr.Tab("Validation report"):
                validate_report = gr.Markdown()
            with gr.Tab("Validation job"):
                validate_job_info = gr.Code(label="Job info/logs", language="json")

            validate_refresh.click(fn=refresh_run_ui, inputs=[validate_run_id, validate_job_id, global_bucket_name], outputs=[validate_state, validate_events, validate_report, validate_job_info])

        with gr.Tab("About & limits"):
            gr.Markdown(
                """
## Result statuses

- `full_inference_success`: a live generation smoke test returned the expected output type.
- `manual_hardware_required`: the Space was generated but automatic ZeroGPU/fixed-GPU assignment failed; set hardware manually, then validate.
- `full_inference_candidate_health_passed`: the Space boots and contains inference signals, but generation was not smoke-tested yet.
- `health_only`: the Space boots, but no real inference path was validated.
- `technical_blocker`: the agent found concrete blockers such as multi-GPU requirements, missing licenses, custom CUDA, or unclear usage.
- `failed`: the build, runtime, or validation job failed.

## Hardware policy

The builder tries to create an app optimized for ZeroGPU when GPU is needed. It attempts ZeroGPU first, then a fixed-GPU fallback if enabled. Hardware assignment through OAuth may fail because of quota, billing, or permission limits; manual hardware selection is a supported path.

## What this app cannot guarantee

It cannot guarantee that every model card becomes a working Space. It cannot bypass model licenses, ZeroGPU quota, billing requirements, custom CUDA build failures, multi-GPU needs, or missing model documentation.
"""
            )

    return demo


if __name__ == "__main__":
    build_demo().launch()