fffiloni's picture
Upload 5 files
a584da0 verified
Raw
History Blame Contribute Delete
22.4 kB
from __future__ import annotations
import json
from typing import Any
import gradio as gr
from src.bucket import check_user_bucket, create_user_bucket, read_run_bundle
from src.config import settings, user_bucket_source
from src.jobs import (
fetch_recent_logs_safe,
inspect_job_safe,
launch_universal_model_card_job,
launch_validate_existing_space_job,
)
from src.runs import make_run_id, validate_run_id
from src.security import redact
APP_DESCRIPTION = f"""
# Agentic Space Factory
Turn a Hugging Face model card into a **private, testable Gradio Space** using an agentic HF Job.
## Recommended workflow
```text
1. Build from model card
→ creates a private Space
→ attempts ZeroGPU first
→ falls back to a fixed GPU if automatic hardware assignment is available
→ otherwise marks the run as manual_hardware_required
2. If hardware had to be changed manually
→ set the GPU in the generated Space Settings
→ run Validate existing Space
→ smoke-test generation
→ measure latency
→ store the output artifact in the Bucket
```
Each launch returns quick links to open the HF Job, generated Space, Space settings, and run artifacts in new tabs.
## Honest guarantees
- Spaces are private by default.
- Nothing is published automatically.
- Runs, reports, generated files, traces, validation results, and artifacts are written to your private Bucket.
- Success is based on the deployed Space, not only generated code.
- ZeroGPU and fixed-GPU upgrades are best-effort through OAuth; manual hardware selection is an expected fallback.
## Limits
This app attempts model-card builds; it does not guarantee that every model will run. Multi-GPU models, Docker-only apps, custom CUDA/FlashAttention stacks, gated models, very large models, or models with unclear documentation may produce `technical_blocker`, `health_only`, or `manual_hardware_required` instead of a full inference success.
Run Bucket: by default each signed-in user writes to their own private bucket: `<username>/{settings.bucket_name}`. Use **Check run bucket** or **Create private run bucket** before launching Jobs.
"""
def _profile_username(profile: Any) -> str | None:
if profile is None:
return None
if isinstance(profile, dict):
return profile.get("preferred_username") or profile.get("username") or profile.get("name")
return getattr(profile, "preferred_username", None) or getattr(profile, "username", None) or getattr(profile, "name", None)
def _token_value(oauth_token: Any) -> str | None:
if oauth_token is None:
return None
if isinstance(oauth_token, str):
return oauth_token
return getattr(oauth_token, "token", None) or getattr(oauth_token, "access_token", None)
def get_login_status(profile: gr.OAuthProfile | None) -> str:
username = _profile_username(profile)
if not username:
return "Not signed in. Use the Hugging Face login button before launching a Job."
return f"Signed in as **{username}**. Generated Spaces are created under `{username}/...` and remain private."
def _safe_url(url: str | None) -> str:
return (url or "").strip()
def _run_artifacts_url(run_id: str | None, bucket_source: str | None) -> str:
if not run_id or not bucket_source:
return ""
return f"https://huggingface.co/buckets/{bucket_source}/tree/main/runs/{run_id}"
def _button_link(label: str, url: str | None):
url = _safe_url(url)
return gr.update(value=label, link=url or None, visible=bool(url))
def _job_button(job_url: str | None):
return _button_link("Open HF Job ↗", job_url)
def _space_button(target_space_url: str | None):
return _button_link("Open target Space ↗", target_space_url)
def _settings_button(target_space_url: str | None):
target_space_url = _safe_url(target_space_url)
return _button_link("Open Space settings ↗", f"{target_space_url}/settings" if target_space_url else "")
def _artifacts_button(run_id: str | None, bucket_source: str | None):
return _button_link("Open run artifacts ↗", _run_artifacts_url(run_id, bucket_source))
def _format_bucket_status(status: dict[str, Any]) -> str:
source = status.get("bucket_source") or "unknown"
uri = status.get("bucket_uri") or ""
if status.get("ok"):
return (
f"✅ Run bucket ready: `{source}`\n\n"
f"Bucket URI: `{uri}`\n\n"
"New Jobs will mount this private bucket and write runs under `runs/<run_id>/`."
)
if status.get("exists") is False:
return (
f"⚠️ Run bucket not found: `{source}`\n\n"
"Click **Create private run bucket** before launching a Job, or create it manually in Hugging Face Storage Buckets."
)
return (
f"❌ Could not check run bucket: `{source}`\n\n"
f"```text\n{redact(str(status.get('error') or 'Unknown error'))}\n```"
)
def check_run_bucket_ui(
bucket_name: str,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
) -> str:
username = _profile_username(profile)
token = _token_value(oauth_token)
if not username or not token:
raise gr.Error("Please sign in with Hugging Face first.")
return _format_bucket_status(check_user_bucket(username=username, bucket_name=bucket_name, token=token))
def create_run_bucket_ui(
bucket_name: str,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
) -> str:
username = _profile_username(profile)
token = _token_value(oauth_token)
if not username or not token:
raise gr.Error("Please sign in with Hugging Face first.")
return _format_bucket_status(create_user_bucket(username=username, bucket_name=bucket_name, token=token))
def propose_universal_run_id() -> str:
return make_run_id("universal")
def propose_validate_run_id() -> str:
return make_run_id("validate")
def launch_universal_model_card_job_ui(
requested_run_id: str,
model_id: str,
target_space_name: str,
pi_model: str,
preferred_hardware: str,
allow_fixed_gpu_fallback: bool,
fallback_hardware: str,
implementation_mode: str,
bucket_name: str,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
) -> tuple[str, str, str, str, str, Any, Any, Any, Any, str]:
username = _profile_username(profile)
token = _token_value(oauth_token)
if not username or not token:
raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")
run_id = validate_run_id(requested_run_id or propose_universal_run_id())
result = launch_universal_model_card_job(
token=token,
username=username,
target_slug=target_space_name,
model_id=model_id,
pi_model=pi_model,
preferred_space_hardware=preferred_hardware,
fallback_space_hardware=fallback_hardware,
allow_fixed_gpu_fallback=allow_fixed_gpu_fallback,
implementation_mode=implementation_mode,
run_id=run_id,
bucket_name=bucket_name,
)
job_url = result.get("job_url") or ""
target_space_url = result.get("target_space_url") or ""
bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name)
return (
run_id,
result["job_id"],
job_url,
result.get("target_space") or "",
target_space_url,
_job_button(job_url),
_space_button(target_space_url),
_settings_button(target_space_url),
_artifacts_button(run_id, bucket_source),
json.dumps(result, indent=2),
)
def launch_validate_existing_space_job_ui(
requested_run_id: str,
target_space_id: str,
api_name: str,
test_args_json: str,
test_kwargs_json: str,
expected_output_type: str,
live_timeout_seconds: float,
bucket_name: str,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
) -> tuple[str, str, str, str, Any, Any, Any, Any, str]:
username = _profile_username(profile)
token = _token_value(oauth_token)
if not username or not token:
raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")
run_id = validate_run_id(requested_run_id or propose_validate_run_id())
try:
json.loads(test_args_json or "[]")
json.loads(test_kwargs_json or "{}")
except Exception as exc:
raise gr.Error(f"Invalid JSON test args/kwargs: {exc}") from exc
result = launch_validate_existing_space_job(
token=token,
username=username,
target_space_id=target_space_id,
api_name=api_name,
test_args_json=test_args_json,
test_kwargs_json=test_kwargs_json,
expected_output_type=expected_output_type,
live_timeout_seconds=int(live_timeout_seconds or 1800),
run_id=run_id,
bucket_name=bucket_name,
)
job_url = result.get("job_url") or ""
target_space_url = result.get("target_space_url") or f"https://huggingface.co/spaces/{result.get('target_space', target_space_id)}"
bucket_source = result.get("bucket_source") or user_bucket_source(username=username, bucket_name=bucket_name)
return (
run_id,
result["job_id"],
job_url,
target_space_url,
_job_button(job_url),
_space_button(target_space_url),
_settings_button(target_space_url),
_artifacts_button(run_id, bucket_source),
json.dumps(result, indent=2),
)
def refresh_run_ui(
run_id: str,
job_id: str,
bucket_name: str,
profile: gr.OAuthProfile | None,
oauth_token: gr.OAuthToken | None,
) -> tuple[str, str, str, str]:
username = _profile_username(profile)
token = _token_value(oauth_token)
if not username or not token:
raise gr.Error("Please sign in with Hugging Face first.")
run_id = validate_run_id(run_id)
bucket_source = user_bucket_source(username=username, bucket_name=bucket_name)
bundle = read_run_bundle(run_id, bucket_source=bucket_source, token=token)
job_info = inspect_job_safe(job_id, token=token) if job_id else {}
logs = redact(fetch_recent_logs_safe(job_id, token=token)) if job_id else ""
state_text = json.dumps(bundle.get("state") or {"status": "not_available_yet"}, indent=2, ensure_ascii=False)
events = bundle.get("events") or []
events_text = "\n".join(json.dumps(event, ensure_ascii=False) for event in events) or "No events found yet. The Job may still be scheduling."
report_text = bundle.get("report") or "No report found yet. Refresh after the Job has started writing to the Bucket."
job_text = json.dumps(job_info, indent=2, ensure_ascii=False)
if logs:
job_text += "\n\nRecent job logs:\n" + logs
return state_text, events_text, report_text, job_text
def build_demo() -> gr.Blocks:
with gr.Blocks(title="Agentic Space Factory") as demo:
gr.Markdown(APP_DESCRIPTION)
gr.LoginButton()
login_status = gr.Markdown()
demo.load(fn=get_login_status, inputs=None, outputs=login_status)
gr.Markdown("## Run storage")
gr.Markdown(
"Runs are stored in a private Storage Bucket under the signed-in user's namespace. "
"Create it once here, then use the same bucket name for Build and Validate."
)
global_bucket_name = gr.Textbox(
label="Run Bucket name",
value=settings.bucket_name,
info="The app uses <your-username>/<bucket-name>. Default: space-factory-runs.",
)
with gr.Row():
check_bucket_btn = gr.Button("Check run bucket")
create_bucket_btn = gr.Button("Create private run bucket", variant="primary")
bucket_status = gr.Markdown("Sign in, then check or create your private run bucket before launching Jobs.")
check_bucket_btn.click(fn=check_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status)
create_bucket_btn.click(fn=create_run_bucket_ui, inputs=[global_bucket_name], outputs=bucket_status)
with gr.Tab("Build from model card"):
gr.Markdown(
"""
Paste a Hugging Face model ID or model-card URL. The worker creates a **private** Space, asks Pi + Qwen Coder to build the best Gradio app it can, attempts ZeroGPU first, then a fixed-GPU fallback if enabled. If automatic hardware assignment fails, set the hardware manually in the generated Space settings and run the validation tab.
"""
)
with gr.Row():
build_run_id = gr.Textbox(label="Run ID", value=propose_universal_run_id, interactive=True)
gr.Button("Generate new run id").click(fn=propose_universal_run_id, inputs=None, outputs=build_run_id)
model_id = gr.Textbox(
label="Model card URL or model ID",
value="Tongyi-MAI/Z-Image-Turbo",
info="Examples: owner/model, https://huggingface.co/owner/model",
)
target_space_name = gr.Textbox(
label="Target Space name",
placeholder="e.g. space-factory-z-image-v1",
info="Use a fresh name. The Space is created under your username and remains private.",
)
pi_model = gr.Textbox(
label="Pi model",
value="Qwen/Qwen3-Coder-Next",
info="Model used by Pi through Hugging Face Inference Providers.",
)
implementation_mode = gr.Dropdown(
label="Implementation mode",
choices=["full-inference-gated", "full-inference-attempt", "safe-scaffold"],
value="full-inference-gated",
info="Gated mode forbids placeholder success; impossible models must produce technical blockers.",
)
with gr.Row():
preferred_hw = gr.Dropdown(
label="Preferred Space hardware",
choices=["zero-a10g", "cpu-basic", "t4-small", "t4-medium", "a10g-large", "l40sx1", "a100-large", "h200"],
value="zero-a10g",
info="ZeroGPU is attempted first by the worker. If your quota is exceeded, use manual hardware selection after generation.",
)
allow_fallback = gr.Checkbox(label="Allow fixed GPU fallback", value=True)
fallback_hw = gr.Dropdown(
label="Fallback Space hardware",
choices=["l40sx1", "a10g-large", "a100-large", "h200", "t4-medium"],
value="l40sx1",
)
build_btn = gr.Button("Build private Space", variant="primary")
build_job_id = gr.Textbox(label="Job ID", interactive=True)
build_job_url = gr.Textbox(label="Job URL", interactive=False)
generated_space = gr.Textbox(label="Generated Space", interactive=False)
generated_space_url = gr.Textbox(label="Generated Space URL", interactive=False)
gr.Markdown("Quick links")
with gr.Row():
build_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False)
build_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False)
build_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False)
build_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False)
build_result = gr.Code(label="Launch result", language="json")
build_btn.click(
fn=launch_universal_model_card_job_ui,
inputs=[build_run_id, model_id, target_space_name, pi_model, preferred_hw, allow_fallback, fallback_hw, implementation_mode, global_bucket_name],
outputs=[
build_run_id,
build_job_id,
build_job_url,
generated_space,
generated_space_url,
build_job_button,
build_space_button,
build_settings_button,
build_artifacts_button,
build_result,
],
)
build_refresh = gr.Button("Refresh build run status")
with gr.Tab("Build state"):
build_state = gr.Code(label="state.json", language="json")
with gr.Tab("Build events"):
build_events = gr.Code(label="events.jsonl", language="json")
with gr.Tab("Build report"):
build_report = gr.Markdown()
with gr.Tab("Build job"):
build_job_info = gr.Code(label="Job info/logs", language="json")
build_refresh.click(fn=refresh_run_ui, inputs=[build_run_id, build_job_id, global_bucket_name], outputs=[build_state, build_events, build_report, build_job_info])
with gr.Tab("Validate existing Space"):
gr.Markdown(
"""
Use this after the builder generated a Space, especially if you had to set the GPU manually. This job does not rerun Pi. It waits for the existing Space, calls a live generation endpoint, checks the output type, stores returned artifacts in the Bucket, measures latency, and recommends a conservative ZeroGPU duration.
"""
)
with gr.Row():
validate_run_id = gr.Textbox(label="Run ID", value=propose_validate_run_id, interactive=True)
gr.Button("Generate new validation run id").click(fn=propose_validate_run_id, inputs=None, outputs=validate_run_id)
target_space = gr.Textbox(
label="Existing target Space",
placeholder="fffiloni/space-factory-... or https://huggingface.co/spaces/...",
)
with gr.Row():
api_name = gr.Textbox(label="Generation API name", value="/generate")
expected_type = gr.Dropdown(label="Expected output type", choices=["image", "video", "audio", "text", "any"], value="image")
test_args = gr.Code(label="Test args JSON list", language="json", value='["a cinematic robot cat astronaut, detailed, studio lighting"]')
test_kwargs = gr.Code(label="Test kwargs JSON object", language="json", value="{}")
timeout_s = gr.Number(label="Live wait timeout seconds", value=1800, precision=0)
validate_btn = gr.Button("Validate Space + smoke-test generation", variant="primary")
validate_job_id = gr.Textbox(label="Job ID", interactive=True)
validate_job_url = gr.Textbox(label="Job URL", interactive=False)
validate_space_url = gr.Textbox(label="Target Space URL", interactive=False)
gr.Markdown("Quick links")
with gr.Row():
validate_job_button = gr.Button("Open HF Job ↗", link=None, link_target="_blank", visible=False)
validate_space_button = gr.Button("Open target Space ↗", link=None, link_target="_blank", visible=False)
validate_settings_button = gr.Button("Open Space settings ↗", link=None, link_target="_blank", visible=False)
validate_artifacts_button = gr.Button("Open run artifacts ↗", link=None, link_target="_blank", visible=False)
validate_result = gr.Code(label="Launch result", language="json")
validate_btn.click(
fn=launch_validate_existing_space_job_ui,
inputs=[validate_run_id, target_space, api_name, test_args, test_kwargs, expected_type, timeout_s, global_bucket_name],
outputs=[
validate_run_id,
validate_job_id,
validate_job_url,
validate_space_url,
validate_job_button,
validate_space_button,
validate_settings_button,
validate_artifacts_button,
validate_result,
],
)
validate_refresh = gr.Button("Refresh validation run status")
with gr.Tab("Validation state"):
validate_state = gr.Code(label="state.json", language="json")
with gr.Tab("Validation events"):
validate_events = gr.Code(label="events.jsonl", language="json")
with gr.Tab("Validation report"):
validate_report = gr.Markdown()
with gr.Tab("Validation job"):
validate_job_info = gr.Code(label="Job info/logs", language="json")
validate_refresh.click(fn=refresh_run_ui, inputs=[validate_run_id, validate_job_id, global_bucket_name], outputs=[validate_state, validate_events, validate_report, validate_job_info])
with gr.Tab("About & limits"):
gr.Markdown(
"""
## Result statuses
- `full_inference_success`: a live generation smoke test returned the expected output type.
- `manual_hardware_required`: the Space was generated but automatic ZeroGPU/fixed-GPU assignment failed; set hardware manually, then validate.
- `full_inference_candidate_health_passed`: the Space boots and contains inference signals, but generation was not smoke-tested yet.
- `health_only`: the Space boots, but no real inference path was validated.
- `technical_blocker`: the agent found concrete blockers such as multi-GPU requirements, missing licenses, custom CUDA, or unclear usage.
- `failed`: the build, runtime, or validation job failed.
## Hardware policy
The builder tries to create an app optimized for ZeroGPU when GPU is needed. It attempts ZeroGPU first, then a fixed-GPU fallback if enabled. Hardware assignment through OAuth may fail because of quota, billing, or permission limits; manual hardware selection is a supported path.
## What this app cannot guarantee
It cannot guarantee that every model card becomes a working Space. It cannot bypass model licenses, ZeroGPU quota, billing requirements, custom CUDA build failures, multi-GPU needs, or missing model documentation.
"""
)
return demo
if __name__ == "__main__":
build_demo().launch()