agentic-space-factory-etheroi

Paused

App Files Files Community

fffiloni commited on 27 days ago

Commit

47bf6fa

verified ·

1 Parent(s): e8c9edb

Upload 6 files

Browse files

Files changed (6) hide show

src/bucket.py +89 -0
src/config.py +22 -0
src/jobs.py +140 -0
src/runs.py +23 -0
src/security.py +24 -0
src/worker_payload.py +587 -0

src/bucket.py ADDED Viewed

	@@ -0,0 +1,89 @@

+from __future__ import annotations
+import json
+from dataclasses import dataclass
+from typing import Any
+from huggingface_hub import HfFileSystem
+from .config import settings
+from .security import redact
+@dataclass(frozen=True)
+class RunPaths:
+    run_id: str
+    bucket_uri: str = settings.bucket_uri
+    @property
+    def root(self) -> str:
+        return f"{self.bucket_uri}/runs/{self.run_id}"
+    @property
+    def state(self) -> str:
+        return f"{self.root}/state.json"
+    @property
+    def events(self) -> str:
+        return f"{self.root}/events.jsonl"
+    @property
+    def report(self) -> str:
+        return f"{self.root}/report.md"
+def _fs(token: str | None = None) -> HfFileSystem:
+    return HfFileSystem(token=token)
+def read_text(path: str, token: str | None = None) -> str | None:
+    fs = _fs(token)
+    try:
+        with fs.open(path, "r") as f:
+            return f.read()
+    except FileNotFoundError:
+        return None
+    except Exception as exc:  # noqa: BLE001 - surface readable error in UI
+        return f"[Could not read {path}: {exc}]"
+def read_json(path: str, token: str | None = None) -> dict[str, Any] | None:
+    content = read_text(path, token=token)
+    if not content or content.startswith("[Could not read"):
+        return None
+    try:
+        return json.loads(content)
+    except json.JSONDecodeError:
+        return {"_error": "Invalid JSON", "raw": redact(content)}
+def read_events(run_id: str, token: str | None = None) -> list[dict[str, Any]]:
+    paths = RunPaths(run_id)
+    content = read_text(paths.events, token=token)
+    if not content:
+        return []
+    events: list[dict[str, Any]] = []
+    for line in content.splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        try:
+            events.append(json.loads(line))
+        except json.JSONDecodeError:
+            events.append({"step": "parse_events", "status": "warning", "message": redact(line)})
+    return events
+def read_run_bundle(run_id: str, token: str | None = None) -> dict[str, Any]:
+    paths = RunPaths(run_id)
+    return {
+        "paths": {
+            "root": paths.root,
+            "state": paths.state,
+            "events": paths.events,
+            "report": paths.report,
+        },
+        "state": read_json(paths.state, token=token),
+        "events": read_events(run_id, token=token),
+        "report": redact(read_text(paths.report, token=token) or ""),
+    }

src/config.py ADDED Viewed

	@@ -0,0 +1,22 @@

+from __future__ import annotations
+import os
+from dataclasses import dataclass
+@dataclass(frozen=True)
+class Settings:
+    """Runtime configuration for the orchestrator Space."""
+    bucket_source: str = os.getenv("SPACE_FACTORY_BUCKET_SOURCE", "fffiloni/space-factory-runs")
+    bucket_mount: str = os.getenv("SPACE_FACTORY_BUCKET_MOUNT", "/output")
+    job_flavor: str = os.getenv("SPACE_FACTORY_JOB_FLAVOR", "cpu-basic")
+    job_timeout: str = os.getenv("SPACE_FACTORY_JOB_TIMEOUT", "15m")
+    job_image: str = os.getenv("SPACE_FACTORY_JOB_IMAGE", "python:3.12")
+    @property
+    def bucket_uri(self) -> str:
+        return f"hf://buckets/{self.bucket_source}"
+settings = Settings()

src/jobs.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from __future__ import annotations
+import re
+from typing import Any
+from huggingface_hub import Volume, fetch_job_logs, inspect_job, run_job
+from .config import settings
+from .runs import make_run_id, utc_now_iso, validate_run_id
+from .worker_payload import (
+    encoded_create_space_worker_script,
+    encoded_worker_script,
+    python_decode_and_run_command,
+)
+SPACE_SLUG_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{1,95}$")
+def _base_env(*, run_id: str, username: str, worker_script_b64: str) -> dict[str, str]:
+    return {
+        "RUN_ID": run_id,
+        "HF_USERNAME": username or "unknown",
+        "BUCKET_SOURCE": settings.bucket_source,
+        "OUTPUT_ROOT": settings.bucket_mount,
+        "WORKER_SCRIPT_B64": worker_script_b64,
+        "LAUNCHED_AT": utc_now_iso(),
+    }
+def _launch_job(*, token: str, env: dict[str, str]) -> Any:
+    return run_job(
+        image=settings.job_image,
+        command=python_decode_and_run_command(),
+        flavor=settings.job_flavor,
+        timeout=settings.job_timeout,
+        env=env,
+        secrets={"HF_TOKEN": token},
+        volumes=[Volume(type="bucket", source=settings.bucket_source, mount_path=settings.bucket_mount)],
+        token=token,
+    )
+def _job_result(job: Any, *, run_id: str, kind: str, extra: dict[str, Any] | None = None) -> dict[str, Any]:
+    payload: dict[str, Any] = {
+        "run_id": run_id,
+        "kind": kind,
+        "job_id": job.id,
+        "job_url": getattr(job, "url", None),
+        "status": getattr(getattr(job, "status", None), "stage", None),
+        "bucket_source": settings.bucket_source,
+        "bucket_uri": settings.bucket_uri,
+    }
+    if extra:
+        payload.update(extra)
+    return payload
+def launch_hello_job(*, token: str, username: str, run_id: str | None = None) -> dict[str, Any]:
+    """Launch the Phase 1 HF Job that only writes state/events/report to the bucket."""
+    if not token:
+        raise ValueError("Missing OAuth token. Please sign in with Hugging Face first.")
+    safe_run_id = validate_run_id(run_id) if run_id else make_run_id("hello")
+    env = _base_env(run_id=safe_run_id, username=username, worker_script_b64=encoded_worker_script())
+    job = _launch_job(token=token, env=env)
+    return _job_result(job, run_id=safe_run_id, kind="hello_job")
+def normalize_target_space(*, username: str, target_slug: str | None, run_id: str) -> str:
+    """Return `username/slug`, constrained to the signed-in user's namespace for V2."""
+    slug = (target_slug or "").strip()
+    if not slug:
+        slug = f"space-factory-{run_id}".lower()[:80]
+    # If user pasted a full repo id, only allow their own namespace in Phase 2.
+    if "/" in slug:
+        namespace, repo = slug.split("/", 1)
+        if namespace != username:
+            raise ValueError("For Phase 2, the target Space must be created in your own namespace.")
+        slug = repo
+    if not SPACE_SLUG_RE.match(slug):
+        raise ValueError("Invalid target Space name. Use letters, numbers, dots, underscores, or dashes.")
+    return f"{username}/{slug}"
+def launch_create_private_space_job(
+    *,
+    token: str,
+    username: str,
+    target_slug: str | None = None,
+    run_id: str | None = None,
+) -> dict[str, Any]:
+    """Launch the Phase 2 Job: create a private target Gradio Space and validate it live."""
+    if not token:
+        raise ValueError("Missing OAuth token. Please sign in with Hugging Face first.")
+    safe_run_id = validate_run_id(run_id) if run_id else make_run_id("space")
+    target_space_id = normalize_target_space(username=username, target_slug=target_slug, run_id=safe_run_id)
+    env = _base_env(
+        run_id=safe_run_id,
+        username=username,
+        worker_script_b64=encoded_create_space_worker_script(),
+    )
+    env["TARGET_SPACE_ID"] = target_space_id
+    job = _launch_job(token=token, env=env)
+    return _job_result(
+        job,
+        run_id=safe_run_id,
+        kind="create_private_space",
+        extra={"target_space": target_space_id, "target_space_url": f"https://huggingface.co/spaces/{target_space_id}"},
+    )
+def inspect_job_safe(job_id: str, token: str | None = None) -> dict[str, Any]:
+    if not job_id:
+        return {"error": "Missing job_id"}
+    try:
+        info = inspect_job(job_id=job_id, token=token)
+        status = getattr(info, "status", None)
+        return {
+            "id": info.id,
+            "url": getattr(info, "url", None),
+            "stage": getattr(status, "stage", None),
+            "message": getattr(status, "message", None),
+            "flavor": getattr(info, "flavor", None),
+            "created_at": str(getattr(info, "created_at", "")),
+            "started_at": str(getattr(info, "started_at", "")),
+            "finished_at": str(getattr(info, "finished_at", "")),
+        }
+    except Exception as exc:  # noqa: BLE001
+        return {"error": str(exc)}
+def fetch_recent_logs_safe(job_id: str, token: str | None = None, max_lines: int = 120) -> str:
+    if not job_id:
+        return ""
+    try:
+        logs = list(fetch_job_logs(job_id=job_id, token=token))
+        return "\n".join(str(line).rstrip("\n") for line in logs[-max_lines:])
+    except Exception as exc:  # noqa: BLE001
+        return f"Could not fetch job logs: {exc}"

src/runs.py ADDED Viewed

	@@ -0,0 +1,23 @@

+from __future__ import annotations
+import re
+import uuid
+from datetime import datetime, timezone
+RUN_ID_RE = re.compile(r"^[a-zA-Z0-9][a-zA-Z0-9_.-]{2,80}$")
+def utc_now_iso() -> str:
+    return datetime.now(timezone.utc).isoformat()
+def make_run_id(prefix: str = "run") -> str:
+    stamp = datetime.now(timezone.utc).strftime("%Y%m%d-%H%M%S")
+    return f"{prefix}-{stamp}-{uuid.uuid4().hex[:8]}"
+def validate_run_id(run_id: str) -> str:
+    cleaned = (run_id or "").strip()
+    if not RUN_ID_RE.match(cleaned):
+        raise ValueError("Invalid run_id. Use 3-80 characters: letters, numbers, dots, underscores or dashes.")
+    return cleaned

src/security.py ADDED Viewed

	@@ -0,0 +1,24 @@

+from __future__ import annotations
+import re
+SECRET_PATTERNS = [
+    re.compile(r"hf_[A-Za-z0-9_\-]{20,}"),
+    re.compile(r"Bearer\s+[A-Za-z0-9_\.\-]+", re.IGNORECASE),
+    re.compile(r"(HF_TOKEN|OAUTH_TOKEN|ACCESS_TOKEN|AUTHORIZATION|PASSWORD|SECRET)\s*[:=]\s*[^\s]+", re.IGNORECASE),
+]
+def redact(text: str | None) -> str:
+    """Best-effort redaction for logs/reports shown in the UI.
+    This is intentionally conservative. It is not a complete DLP system,
+    but it protects against obvious token leaks in first-version outputs.
+    """
+    if not text:
+        return ""
+    redacted = text
+    for pattern in SECRET_PATTERNS:
+        redacted = pattern.sub("[REDACTED]", redacted)
+    return redacted

src/worker_payload.py ADDED Viewed

	@@ -0,0 +1,587 @@

+from __future__ import annotations
+import base64
+import textwrap
+HELLO_WORKER_SCRIPT = r'''
+import json
+import os
+from datetime import datetime, timezone
+from pathlib import Path
+def now():
+    return datetime.now(timezone.utc).isoformat()
+def write_json(path: Path, payload: dict):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+def append_event(path: Path, step: str, status: str, message: str, data: dict | None = None):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    event = {
+        "ts": now(),
+        "step": step,
+        "status": status,
+        "message": message,
+        "data": data or {},
+    }
+    line = json.dumps(event, ensure_ascii=False)
+    with path.open("a", encoding="utf-8") as f:
+        f.write(line + "\n")
+    # Keep HF Job logs useful as well as Bucket events.
+    print(line, flush=True)
+def main():
+    run_id = os.environ["RUN_ID"]
+    hf_username = os.environ.get("HF_USERNAME", "unknown")
+    bucket_source = os.environ.get("BUCKET_SOURCE", "unknown")
+    output_root = Path(os.environ.get("OUTPUT_ROOT", "/output"))
+    job_id = os.environ.get("JOB_ID")
+    accelerator = os.environ.get("ACCELERATOR") or "none"
+    cpu_cores = os.environ.get("CPU_CORES")
+    memory = os.environ.get("MEMORY")
+    has_hf_token = bool(os.environ.get("HF_TOKEN"))
+    run_dir = output_root / "runs" / run_id
+    state_path = run_dir / "state.json"
+    events_path = run_dir / "events.jsonl"
+    report_path = run_dir / "report.md"
+    append_event(events_path, "bootstrap", "started", "HF Job started")
+    append_event(
+        events_path,
+        "environment",
+        "success",
+        "Collected non-sensitive job environment metadata",
+        {
+            "job_id": job_id,
+            "accelerator": accelerator,
+            "cpu_cores": cpu_cores,
+            "memory": memory,
+            "has_hf_token": has_hf_token,
+        },
+    )
+    state = {
+        "run_id": run_id,
+        "status": "success",
+        "kind": "hello_job",
+        "message": "Hello from HF Job. OAuth → Job → Bucket write succeeded.",
+        "created_at": now(),
+        "updated_at": now(),
+        "created_by": hf_username,
+        "bucket_source": bucket_source,
+        "job_id": job_id,
+        "accelerator": accelerator,
+        "cpu_cores": cpu_cores,
+        "memory": memory,
+        "has_hf_token": has_hf_token,
+        "security_notes": [
+            "HF_TOKEN was not printed.",
+            "This run does not create or publish any repository.",
+            "The bucket should remain private.",
+        ],
+    }
+    write_json(state_path, state)
+    append_event(events_path, "state_write", "success", "Wrote state.json")
+    report = f"""# Agentic Space Factory — Hello Job Report
+Run ID: `{run_id}`
+Status: **success**
+This first worker validated the critical foundation:
+```text
+OAuth user → HF Job → mounted Storage Bucket → state/events/report write
+```
+## Non-sensitive job metadata
+- Job ID: `{job_id}`
+- User: `{hf_username}`
+- Bucket: `{bucket_source}`
+- Accelerator: `{accelerator}`
+- CPU cores: `{cpu_cores}`
+- Memory: `{memory}`
+- HF token present in job env: `{has_hf_token}`
+## Security posture
+- The token was passed as a secret and was not printed.
+- This run did not create or modify any Hugging Face repository.
+- This run did not publish anything publicly.
+## Next implementation step
+The next increment should create a private target Gradio Space and validate it with `gradio_client` before reporting success.
+"""
+    report_path.write_text(report, encoding="utf-8")
+    append_event(events_path, "report_write", "success", "Wrote report.md")
+    append_event(events_path, "done", "success", "Hello Job completed")
+if __name__ == "__main__":
+    main()
+'''
+CREATE_SPACE_WORKER_SCRIPT = r'''
+import json
+import os
+import re
+import subprocess
+import sys
+import time
+from datetime import datetime, timezone
+from pathlib import Path
+from textwrap import dedent
+TARGET_RE = re.compile(r"^[A-Za-z0-9][A-Za-z0-9._-]{1,95}/[A-Za-z0-9][A-Za-z0-9._-]{1,95}$")
+def now():
+    return datetime.now(timezone.utc).isoformat()
+def write_json(path: Path, payload: dict):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    path.write_text(json.dumps(payload, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
+def append_event(path: Path, step: str, status: str, message: str, data: dict | None = None):
+    path.parent.mkdir(parents=True, exist_ok=True)
+    event = {"ts": now(), "step": step, "status": status, "message": message, "data": data or {}}
+    line = json.dumps(event, ensure_ascii=False)
+    with path.open("a", encoding="utf-8") as f:
+        f.write(line + "\n")
+    # Keep HF Job logs useful as well as Bucket events.
+    print(line, flush=True)
+def fail(run_dir: Path, events_path: Path, message: str, details: dict | None = None, status: str = "failed"):
+    state_path = run_dir / "state.json"
+    append_event(events_path, "failure", "failed", message, details or {})
+    write_json(state_path, {
+        "run_id": os.environ.get("RUN_ID"),
+        "kind": "create_private_space",
+        "status": status,
+        "message": message,
+        "updated_at": now(),
+        "details": details or {},
+    })
+    report = f"""# Agentic Space Factory — Private Space Creation Report
+Status: **{status}**
+{message}
+```json
+{json.dumps(details or {}, indent=2, ensure_ascii=False)}
+```
+"""
+    (run_dir / "report.md").write_text(report, encoding="utf-8")
+    raise SystemExit(1)
+def pip_install(events_path: Path):
+    append_event(events_path, "dependencies", "started", "Installing worker dependencies")
+    cmd = [sys.executable, "-m", "pip", "install", "-q", "--upgrade", "huggingface_hub>=1.0.0", "gradio_client>=2.0.0"]
+    result = subprocess.run(cmd, text=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT)
+    if result.returncode != 0:
+        append_event(events_path, "dependencies", "failed", "Dependency installation failed", {"output_tail": result.stdout[-4000:]})
+        raise RuntimeError(result.stdout)
+    append_event(events_path, "dependencies", "success", "Worker dependencies installed")
+def target_files(target_space_id: str) -> dict[str, str]:
+    app_py = dedent(f"""
+    import gradio as gr
+    def greet(name: str) -> str:
+        name = (name or "friend").strip() or "friend"
+        return f"Hello {{name}} — this private Space was generated by Agentic Space Factory."
+    demo = gr.Interface(
+        fn=greet,
+        inputs=gr.Textbox(label="Name", value="Hugging Face"),
+        outputs=gr.Textbox(label="Result"),
+        title="Generated private Space",
+        description="A minimal Gradio Space created by an HF Job, then validated through the live Gradio API.",
+        examples=[["Hugging Face"], ["Agentic Space Factory"]],
+    )
+    if __name__ == "__main__":
+        demo.launch()
+    """).strip() + "\n"
+    readme = dedent(f"""
+    ---
+    title: Generated Private Space
+    emoji: 🧪
+    colorFrom: blue
+    colorTo: purple
+    sdk: gradio
+    app_file: app.py
+    python_version: "3.11"
+    pinned: false
+    ---
+    # Generated Private Space
+    This private Space was generated by **Agentic Space Factory**.
+    Target repo: `{target_space_id}`
+    This Phase 2 version intentionally creates only a safe hello-world Gradio app.
+    Later phases will add Pi, model-card analysis, ZeroGPU templates, and automatic repair.
+    """).strip() + "\n"
+    requirements = "gradio>=5.0.0\n"
+    return {"app.py": app_py, "README.md": readme, "requirements.txt": requirements}
+def save_generated_files(run_dir: Path, files: dict[str, str]):
+    generated_dir = run_dir / "generated"
+    generated_dir.mkdir(parents=True, exist_ok=True)
+    for filename, content in files.items():
+        (generated_dir / filename).write_text(content, encoding="utf-8")
+def create_and_upload_space(api, token: str, target_space_id: str, files: dict[str, str], events_path: Path):
+    append_event(events_path, "create_space", "started", f"Creating private target Space {target_space_id}")
+    try:
+        api.create_repo(
+            repo_id=target_space_id,
+            repo_type="space",
+            space_sdk="gradio",
+            private=True,
+            exist_ok=False,
+            token=token,
+        )
+        append_event(events_path, "create_space", "success", "Private target Space created", {"target_space": target_space_id})
+    except Exception as exc:
+        # If it already exists, fail safely instead of overwriting user resources unexpectedly.
+        append_event(events_path, "create_space", "failed", "Could not create target Space", {"error": str(exc)})
+        raise
+    append_event(events_path, "upload_files", "started", "Uploading generated files to target Space")
+    for path_in_repo, content in files.items():
+        api.upload_file(
+            path_or_fileobj=content.encode("utf-8"),
+            path_in_repo=path_in_repo,
+            repo_id=target_space_id,
+            repo_type="space",
+            token=token,
+        )
+        append_event(events_path, "upload_files", "success", f"Uploaded {path_in_repo}")
+def make_gradio_client(target_space_id: str, token: str):
+    """Create a Gradio Client across gradio_client versions.
+    gradio_client 2.x uses `token=...`; older/newer docs often mention
+    `hf_token=...`; some versions expose `api_key` or `headers`. Using
+    signature introspection prevents a permanent wait loop on a TypeError.
+    """
+    import inspect
+    from gradio_client import Client
+    params = inspect.signature(Client).parameters
+    if "token" in params:
+        return Client(target_space_id, token=token)
+    if "hf_token" in params:
+        return Client(target_space_id, hf_token=token)
+    if "api_key" in params:
+        return Client(target_space_id, api_key=token)
+    if "headers" in params:
+        return Client(target_space_id, headers={"Authorization": f"Bearer {token}"})
+    # Last-resort fallback: if the process is logged in via HF_TOKEN/HF CLI,
+    # some client versions can pick credentials from the environment/cache.
+    return Client(target_space_id)
+def get_api_schema(client):
+    try:
+        return client.view_api(return_format="dict")
+    except TypeError:
+        return client.view_api()
+def extract_api_names(api_schema) -> list[str]:
+    """Best-effort extraction across gradio_client schema formats.
+    Gradio/Gradio Client versions differ: an Interface can expose `/predict`,
+    `/greet`, or another named endpoint. For the generated hello app the live
+    Job logs show `/greet`, so validation must discover endpoints instead of
+    hardcoding `/predict`.
+    """
+    names: list[str] = []
+    def add(value):
+        if not value or not isinstance(value, str):
+            return
+        name = value if value.startswith("/") else f"/{value}"
+        if name not in names:
+            names.append(name)
+    def walk(obj):
+        if isinstance(obj, dict):
+            for key, value in obj.items():
+                if key in {"api_name", "apiName"}:
+                    add(value)
+                # Some schemas use endpoint paths as keys, for example `/greet`.
+                if isinstance(key, str) and key.startswith("/"):
+                    add(key)
+                walk(value)
+        elif isinstance(obj, list):
+            for item in obj:
+                walk(item)
+    walk(api_schema)
+    return names
+def predict_with_available_endpoint(client, api_schema, value: str):
+    candidates = extract_api_names(api_schema)
+    for fallback in ["/greet", "/predict"]:
+        if fallback not in candidates:
+            candidates.append(fallback)
+    errors = []
+    for api_name in candidates:
+        try:
+            return api_name, client.predict(value, api_name=api_name)
+        except Exception as exc:
+            errors.append({"api_name": api_name, "error": str(exc)[-500:]})
+    # Last fallback for old/simple gradio_client versions where api_name may be optional.
+    try:
+        return None, client.predict(value)
+    except Exception as exc:
+        errors.append({"api_name": None, "error": str(exc)[-500:]})
+        raise RuntimeError(f"No candidate Gradio endpoint worked: {json.dumps(errors, ensure_ascii=False)}")
+def validate_live_api(target_space_id: str, token: str, events_path: Path, tests_dir: Path, timeout_seconds: int = 360):
+    tests_dir.mkdir(parents=True, exist_ok=True)
+    deadline = time.time() + timeout_seconds
+    last_error = None
+    attempt = 0
+    append_event(events_path, "api_validation", "started", "Waiting for live Gradio API to become available")
+    while time.time() < deadline:
+        attempt += 1
+        try:
+            client = make_gradio_client(target_space_id, token)
+            api_schema = get_api_schema(client)
+            api_names = extract_api_names(api_schema)
+            write_json(tests_dir / "api_schema.json", {"schema": api_schema, "api_names": api_names})
+            used_api_name, result = predict_with_available_endpoint(client, api_schema, "Agentic Space Factory")
+            result_text = str(result)
+            ok = "Agentic Space Factory" in result_text and "Hello" in result_text
+            payload = {
+                "attempt": attempt,
+                "target_space": target_space_id,
+                "api_test_passed": ok,
+                "api_name": used_api_name,
+                "discovered_api_names": api_names,
+                "result": result_text,
+                "validated_at": now(),
+            }
+            write_json(tests_dir / "test_result.json", payload)
+            if ok:
+                append_event(
+                    events_path,
+                    "api_validation",
+                    "success",
+                    "Live Gradio API test passed",
+                    {"attempt": attempt, "api_name": used_api_name, "discovered_api_names": api_names},
+                )
+                return payload
+            last_error = f"Unexpected API result from {used_api_name}: {result_text}"
+        except Exception as exc:
+            last_error = str(exc)
+            append_event(events_path, "api_validation", "waiting", "Live API not ready yet", {"attempt": attempt, "error": last_error[-1000:]})
+        time.sleep(20)
+    payload = {
+        "target_space": target_space_id,
+        "api_test_passed": False,
+        "error": last_error,
+        "validated_at": now(),
+    }
+    write_json(tests_dir / "test_result.json", payload)
+    raise RuntimeError(f"Live API validation did not pass before timeout: {last_error}")
+def main():
+    run_id = os.environ["RUN_ID"]
+    hf_username = os.environ.get("HF_USERNAME", "unknown")
+    bucket_source = os.environ.get("BUCKET_SOURCE", "unknown")
+    output_root = Path(os.environ.get("OUTPUT_ROOT", "/output"))
+    target_space_id = os.environ["TARGET_SPACE_ID"]
+    token = os.environ.get("HF_TOKEN")
+    run_dir = output_root / "runs" / run_id
+    events_path = run_dir / "events.jsonl"
+    state_path = run_dir / "state.json"
+    report_path = run_dir / "report.md"
+    target_json_path = run_dir / "target_space.json"
+    append_event(events_path, "bootstrap", "started", "Private Space creation worker started")
+    write_json(state_path, {
+        "run_id": run_id,
+        "kind": "create_private_space",
+        "status": "running",
+        "message": "Creating private target Space",
+        "target_space": target_space_id,
+        "created_by": hf_username,
+        "bucket_source": bucket_source,
+        "created_at": now(),
+        "updated_at": now(),
+    })
+    if not token:
+        fail(run_dir, events_path, "HF_TOKEN is missing from Job secrets")
+    if not TARGET_RE.match(target_space_id):
+        fail(run_dir, events_path, "Invalid TARGET_SPACE_ID", {"target_space": target_space_id})
+    if not target_space_id.startswith(f"{hf_username}/"):
+        fail(run_dir, events_path, "For Phase 2, target Space must be in the signed-in user's namespace", {"target_space": target_space_id, "username": hf_username})
+    try:
+        pip_install(events_path)
+        from huggingface_hub import HfApi
+        api = HfApi(token=token)
+        whoami = api.whoami(token=token)
+        append_event(events_path, "auth", "success", "Authenticated inside Job", {"whoami_name": whoami.get("name")})
+        files = target_files(target_space_id)
+        save_generated_files(run_dir, files)
+        append_event(events_path, "generate_files", "success", "Generated minimal Gradio Space files", {"files": list(files)})
+        create_and_upload_space(api, token, target_space_id, files, events_path)
+        write_json(target_json_path, {
+            "target_space": target_space_id,
+            "url": f"https://huggingface.co/spaces/{target_space_id}",
+            "private": True,
+            "sdk": "gradio",
+            "created_by": hf_username,
+        })
+        validation = validate_live_api(target_space_id, token, events_path, run_dir / "tests")
+        final_state = {
+            "run_id": run_id,
+            "kind": "create_private_space",
+            "status": "success",
+            "message": "Private Gradio Space created and validated through the live API.",
+            "target_space": target_space_id,
+            "target_space_url": f"https://huggingface.co/spaces/{target_space_id}",
+            "created_by": hf_username,
+            "bucket_source": bucket_source,
+            "validation": validation,
+            "updated_at": now(),
+            "security_notes": [
+                "The target Space was created as private.",
+                "The HF token was not printed or written to report files.",
+                "Success was declared only after a live Gradio API test passed.",
+            ],
+        }
+        write_json(state_path, final_state)
+        report = f"""# Agentic Space Factory — Private Space Creation Report
+Run ID: `{run_id}`
+Status: **success**
+Created private Space: [`{target_space_id}`](https://huggingface.co/spaces/{target_space_id})
+## What happened
+```text
+OAuth user → HF Job → private Space creation → file upload → live Gradio API validation → Bucket report
+```
+## Generated files
+- `app.py`
+- `requirements.txt`
+- `README.md`
+Copies are stored in:
+```text
+runs/{run_id}/generated/
+```
+## Live API validation
+```json
+{json.dumps(validation, indent=2, ensure_ascii=False)}
+```
+## Security posture
+- The target Space was created as private.
+- No token was printed or intentionally persisted.
+- Success was declared only after the live Gradio API returned the expected output.
+## Next step
+Phase 3 should introduce Pi inside the Job and ask it to modify/repair this simple Space while preserving the live API validation gate.
+"""
+        report_path.write_text(report, encoding="utf-8")
+        append_event(events_path, "report_write", "success", "Wrote report.md")
+        append_event(events_path, "done", "success", "Private Space creation worker completed")
+    except Exception as exc:
+        fail(run_dir, events_path, "Private Space creation worker failed", {"error": str(exc)})
+if __name__ == "__main__":
+    main()
+'''
+def _encode(script: str) -> str:
+    return base64.b64encode(script.encode("utf-8")).decode("ascii")
+def encoded_worker_script() -> str:
+    """Return the base64-encoded Phase 1 hello worker script."""
+    return _encode(HELLO_WORKER_SCRIPT)
+def encoded_create_space_worker_script() -> str:
+    """Return the base64-encoded Phase 2 private Space creation worker script."""
+    return _encode(CREATE_SPACE_WORKER_SCRIPT)
+def python_decode_and_run_command() -> list[str]:
+    """Command list for `run_job`.
+    The Job image only needs Python. The script is passed via env as base64 and
+    executed from /tmp, which avoids persisting code or exposing secrets.
+    """
+    runner = textwrap.dedent(
+        """
+        import base64, os, pathlib, subprocess, sys
+        script = base64.b64decode(os.environ['WORKER_SCRIPT_B64']).decode('utf-8')
+        path = pathlib.Path('/tmp/space_factory_worker.py')
+        path.write_text(script, encoding='utf-8')
+        raise SystemExit(subprocess.call([sys.executable, str(path)]))
+        """
+    ).strip()
+    return ["python", "-c", runner]