agentic-space-factory-etheroi

Paused

App Files Files Community

fffiloni commited on 27 days ago

Commit

52793cb

verified ·

1 Parent(s): a8de4ea

Upload 5 files

Browse files

Files changed (3) hide show

CHANGELOG.md +6 -0
README.md +6 -1
app.py +138 -0

CHANGELOG.md CHANGED Viewed

@@ -1,3 +1,9 @@
 # Changelog
 ## V9

+## V10
+- Added Phase 10 universal model-card builder.
+- Generalized the LongCat full-inference gate to arbitrary Hugging Face models.
+- Added `INFERENCE_CONTRACT.json`/`TECHNICAL_BLOCKERS.json` contract prompts and private Space generation flow.
 # Changelog
 ## V9

README.md CHANGED Viewed

@@ -19,7 +19,7 @@ hf_oauth_scopes:
   - read-billing
 ---
-# Agentic Space Factory — V9 LongCat Full-Inference Gate
 This version validates the safe foundation for a Hugging Face-native “Agentic Space Factory”.
@@ -179,3 +179,8 @@ Key behavior:
 - hardware changes remain best-effort because OAuth tokens may create/write Spaces but fail on paid hardware changes.
 This phase is designed to distinguish “bootable scaffold” from “functional model reproduction”.

   - read-billing
 ---
+# Agentic Space Factory — V10 Universal Model-Card Builder
 This version validates the safe foundation for a Hugging Face-native “Agentic Space Factory”.
 - hardware changes remain best-effort because OAuth tokens may create/write Spaces but fail on paid hardware changes.
 This phase is designed to distinguish “bootable scaffold” from “functional model reproduction”.
+## V10 Universal builder
+Phase 10 accepts any Hugging Face model card URL or `owner/model` ID, launches Pi in a HF Job, creates a private Space, and classifies the result with a full-inference gate or technical blockers.

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ from src.jobs import (
     launch_pi_gist_recipe_job,
     launch_pi_model_card_job,
     launch_runtime_recommender_job,
     launch_pi_space_smoke_job,
 )
 from src.runs import make_run_id, validate_run_id
@@ -35,6 +36,7 @@ Phase 4: HF OAuth → HF Job → Pi reads gist → uses hf CLI → private Space
 Phase 5: HF OAuth → HF Job → model-card analysis → Pi adapts template → private model Space → live API validation
 Phase 6: HF OAuth → HF Job → model-card/runtime analysis → runtime/hardware recommendation → Bucket report
 Phase 9: HF OAuth → HF Job → LongCat full-inference gate → HF Kernels/SDPA investigation → health/blocker reporting
 ```
 Configured bucket: `{settings.bucket_uri}`
@@ -93,6 +95,47 @@ def propose_longcat_run_id() -> str:
     return make_run_id("longcat")
 def launch_longcat_article_job_ui(
     requested_run_id: str,
     model_id: str,
@@ -316,6 +359,101 @@ def build_demo() -> gr.Blocks:
         demo.load(fn=get_login_status, inputs=None, outputs=login_status)
         with gr.Tab("Phase 9 — LongCat full-inference gate"):
             gr.Markdown(
                 """

     launch_pi_gist_recipe_job,
     launch_pi_model_card_job,
     launch_runtime_recommender_job,
+    launch_universal_model_card_job,
     launch_pi_space_smoke_job,
 )
 from src.runs import make_run_id, validate_run_id
 Phase 5: HF OAuth → HF Job → model-card analysis → Pi adapts template → private model Space → live API validation
 Phase 6: HF OAuth → HF Job → model-card/runtime analysis → runtime/hardware recommendation → Bucket report
 Phase 9: HF OAuth → HF Job → LongCat full-inference gate → HF Kernels/SDPA investigation → health/blocker reporting
+Phase 10: HF OAuth → HF Job → arbitrary model card → Pi/Qwen builds private Space → health/full-inference gate → Bucket traces/report
 ```
 Configured bucket: `{settings.bucket_uri}`
     return make_run_id("longcat")
+def propose_universal_run_id() -> str:
+    return make_run_id("universal")
+def launch_universal_model_card_job_ui(
+    requested_run_id: str,
+    model_id: str,
+    target_space_name: str,
+    pi_model: str,
+    preferred_hardware: str,
+    allow_fixed_gpu_fallback: bool,
+    fallback_hardware: str,
+    implementation_mode: str,
+    profile: gr.OAuthProfile | None,
+    oauth_token: gr.OAuthToken | None,
+) -> tuple[str, str, str, str, str, str]:
+    username = _profile_username(profile)
+    token = _token_value(oauth_token)
+    if not username or not token:
+        raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")
+    run_id = validate_run_id(requested_run_id or propose_universal_run_id())
+    result = launch_universal_model_card_job(
+        token=token,
+        username=username,
+        target_slug=target_space_name,
+        model_id=model_id,
+        pi_model=pi_model,
+        preferred_space_hardware=preferred_hardware,
+        fallback_space_hardware=fallback_hardware,
+        allow_fixed_gpu_fallback=allow_fixed_gpu_fallback,
+        implementation_mode=implementation_mode,
+        run_id=run_id,
+    )
+    job_url = result.get("job_url") or ""
+    target_space = result.get("target_space") or ""
+    target_url = result.get("target_space_url") or ""
+    summary = json.dumps(result, indent=2)
+    return run_id, result["job_id"], job_url, target_space, target_url, summary
 def launch_longcat_article_job_ui(
     requested_run_id: str,
     model_id: str,
         demo.load(fn=get_login_status, inputs=None, outputs=login_status)
+        with gr.Tab("Phase 10 — Universal model-card builder"):
+            gr.Markdown(
+                """
+This phase generalizes the LongCat workflow: paste any Hugging Face model card URL or `owner/model` ID, and Pi will attempt to build the best possible private Gradio Space while following the HF Spaces gist.
+It uses a strict contract: if real inference is feasible, Pi should wire it. If not, it must produce `TECHNICAL_BLOCKERS.json` and the wrapper will classify the result as health-only or technical-blocker rather than pretending it is a full success.
+Hardware requests remain best-effort because OAuth/billing/ZeroGPU quota may block automatic upgrades. You can manually set hardware while the Job is waiting.
+"""
+            )
+            with gr.Row():
+                universal_run_id_box = gr.Textbox(label="Run ID", value=propose_universal_run_id, interactive=True)
+                new_universal_run_btn = gr.Button("Generate new run id")
+            new_universal_run_btn.click(fn=propose_universal_run_id, inputs=None, outputs=universal_run_id_box)
+            universal_model_id_box = gr.Textbox(
+                label="Model card URL or model ID",
+                value="sshleifer/tiny-gpt2",
+                info="Examples: sshleifer/tiny-gpt2, runwayml/stable-diffusion-v1-5, or https://huggingface.co/owner/model",
+            )
+            universal_target_space_name = gr.Textbox(
+                label="Target Space name",
+                placeholder="e.g. space-factory-custom-model-v1",
+                info="Use a fresh name. The Space is created under your username and remains private.",
+            )
+            universal_pi_model_box = gr.Textbox(
+                label="Pi model",
+                value="Qwen/Qwen3-Coder-Next",
+                info="Model used by Pi through Hugging Face Inference Providers.",
+            )
+            universal_impl_mode = gr.Dropdown(
+                label="Implementation mode",
+                choices=["full-inference-gated", "full-inference-attempt", "safe-scaffold"],
+                value="full-inference-gated",
+                info="Gated mode forbids placeholder success; impossible models must produce TECHNICAL_BLOCKERS.json.",
+            )
+            with gr.Row():
+                universal_preferred_hw = gr.Dropdown(
+                    label="Preferred Space hardware",
+                    choices=["cpu-basic", "zero-a10g", "t4-small", "t4-medium", "a10g-large", "l40sx1", "a100-large", "h200"],
+                    value="cpu-basic",
+                    info="Best-effort request. Use CPU for small models; choose GPU if you expect heavy inference and can set it manually if OAuth cannot.",
+                )
+                universal_allow_fallback = gr.Checkbox(label="Allow fixed GPU fallback", value=False)
+                universal_fallback_hw = gr.Dropdown(
+                    label="Fallback Space hardware",
+                    choices=["l40sx1", "a10g-large", "a100-large", "h200", "t4-medium"],
+                    value="l40sx1",
+                )
+            launch_universal_btn = gr.Button("Build private Space from model card", variant="primary")
+            phase10_job_id_box = gr.Textbox(label="Job ID", interactive=True)
+            phase10_job_url_box = gr.Textbox(label="Job URL", interactive=False)
+            phase10_target_space_box = gr.Textbox(label="Target Space", interactive=False)
+            phase10_target_url_box = gr.Textbox(label="Target Space URL", interactive=False)
+            phase10_launch_result = gr.Code(label="Launch result", language="json")
+            launch_universal_btn.click(
+                fn=launch_universal_model_card_job_ui,
+                inputs=[
+                    universal_run_id_box,
+                    universal_model_id_box,
+                    universal_target_space_name,
+                    universal_pi_model_box,
+                    universal_preferred_hw,
+                    universal_allow_fallback,
+                    universal_fallback_hw,
+                    universal_impl_mode,
+                ],
+                outputs=[
+                    universal_run_id_box,
+                    phase10_job_id_box,
+                    phase10_job_url_box,
+                    phase10_target_space_box,
+                    phase10_target_url_box,
+                    phase10_launch_result,
+                ],
+            )
+            phase10_refresh_btn = gr.Button("Refresh Phase 10 run status")
+            with gr.Tab("Phase 10 state"):
+                phase10_state = gr.Code(label="state.json", language="json")
+            with gr.Tab("Phase 10 events"):
+                phase10_events = gr.Code(label="events.jsonl", language="json")
+            with gr.Tab("Phase 10 report"):
+                phase10_report = gr.Markdown()
+            with gr.Tab("Phase 10 job"):
+                phase10_job_info = gr.Code(label="Job info/logs", language="json")
+            phase10_refresh_btn.click(
+                fn=refresh_run_ui,
+                inputs=[universal_run_id_box, phase10_job_id_box],
+                outputs=[phase10_state, phase10_events, phase10_report, phase10_job_info],
+            )
         with gr.Tab("Phase 9 — LongCat full-inference gate"):
             gr.Markdown(
                 """