fffiloni commited on
Commit
52793cb
Β·
verified Β·
1 Parent(s): a8de4ea

Upload 5 files

Browse files
Files changed (3) hide show
  1. CHANGELOG.md +6 -0
  2. README.md +6 -1
  3. app.py +138 -0
CHANGELOG.md CHANGED
@@ -1,3 +1,9 @@
 
 
 
 
 
 
1
  # Changelog
2
 
3
  ## V9
 
1
+ ## V10
2
+
3
+ - Added Phase 10 universal model-card builder.
4
+ - Generalized the LongCat full-inference gate to arbitrary Hugging Face models.
5
+ - Added `INFERENCE_CONTRACT.json`/`TECHNICAL_BLOCKERS.json` contract prompts and private Space generation flow.
6
+
7
  # Changelog
8
 
9
  ## V9
README.md CHANGED
@@ -19,7 +19,7 @@ hf_oauth_scopes:
19
  - read-billing
20
  ---
21
 
22
- # Agentic Space Factory β€” V9 LongCat Full-Inference Gate
23
 
24
  This version validates the safe foundation for a Hugging Face-native β€œAgentic Space Factory”.
25
 
@@ -179,3 +179,8 @@ Key behavior:
179
  - hardware changes remain best-effort because OAuth tokens may create/write Spaces but fail on paid hardware changes.
180
 
181
  This phase is designed to distinguish β€œbootable scaffold” from β€œfunctional model reproduction”.
 
 
 
 
 
 
19
  - read-billing
20
  ---
21
 
22
+ # Agentic Space Factory β€” V10 Universal Model-Card Builder
23
 
24
  This version validates the safe foundation for a Hugging Face-native β€œAgentic Space Factory”.
25
 
 
179
  - hardware changes remain best-effort because OAuth tokens may create/write Spaces but fail on paid hardware changes.
180
 
181
  This phase is designed to distinguish β€œbootable scaffold” from β€œfunctional model reproduction”.
182
+
183
+
184
+ ## V10 Universal builder
185
+
186
+ Phase 10 accepts any Hugging Face model card URL or `owner/model` ID, launches Pi in a HF Job, creates a private Space, and classifies the result with a full-inference gate or technical blockers.
app.py CHANGED
@@ -16,6 +16,7 @@ from src.jobs import (
16
  launch_pi_gist_recipe_job,
17
  launch_pi_model_card_job,
18
  launch_runtime_recommender_job,
 
19
  launch_pi_space_smoke_job,
20
  )
21
  from src.runs import make_run_id, validate_run_id
@@ -35,6 +36,7 @@ Phase 4: HF OAuth β†’ HF Job β†’ Pi reads gist β†’ uses hf CLI β†’ private Space
35
  Phase 5: HF OAuth β†’ HF Job β†’ model-card analysis β†’ Pi adapts template β†’ private model Space β†’ live API validation
36
  Phase 6: HF OAuth β†’ HF Job β†’ model-card/runtime analysis β†’ runtime/hardware recommendation β†’ Bucket report
37
  Phase 9: HF OAuth β†’ HF Job β†’ LongCat full-inference gate β†’ HF Kernels/SDPA investigation β†’ health/blocker reporting
 
38
  ```
39
 
40
  Configured bucket: `{settings.bucket_uri}`
@@ -93,6 +95,47 @@ def propose_longcat_run_id() -> str:
93
  return make_run_id("longcat")
94
 
95
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
  def launch_longcat_article_job_ui(
97
  requested_run_id: str,
98
  model_id: str,
@@ -316,6 +359,101 @@ def build_demo() -> gr.Blocks:
316
  demo.load(fn=get_login_status, inputs=None, outputs=login_status)
317
 
318
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
319
  with gr.Tab("Phase 9 β€” LongCat full-inference gate"):
320
  gr.Markdown(
321
  """
 
16
  launch_pi_gist_recipe_job,
17
  launch_pi_model_card_job,
18
  launch_runtime_recommender_job,
19
+ launch_universal_model_card_job,
20
  launch_pi_space_smoke_job,
21
  )
22
  from src.runs import make_run_id, validate_run_id
 
36
  Phase 5: HF OAuth β†’ HF Job β†’ model-card analysis β†’ Pi adapts template β†’ private model Space β†’ live API validation
37
  Phase 6: HF OAuth β†’ HF Job β†’ model-card/runtime analysis β†’ runtime/hardware recommendation β†’ Bucket report
38
  Phase 9: HF OAuth β†’ HF Job β†’ LongCat full-inference gate β†’ HF Kernels/SDPA investigation β†’ health/blocker reporting
39
+ Phase 10: HF OAuth β†’ HF Job β†’ arbitrary model card β†’ Pi/Qwen builds private Space β†’ health/full-inference gate β†’ Bucket traces/report
40
  ```
41
 
42
  Configured bucket: `{settings.bucket_uri}`
 
95
  return make_run_id("longcat")
96
 
97
 
98
+ def propose_universal_run_id() -> str:
99
+ return make_run_id("universal")
100
+
101
+
102
+ def launch_universal_model_card_job_ui(
103
+ requested_run_id: str,
104
+ model_id: str,
105
+ target_space_name: str,
106
+ pi_model: str,
107
+ preferred_hardware: str,
108
+ allow_fixed_gpu_fallback: bool,
109
+ fallback_hardware: str,
110
+ implementation_mode: str,
111
+ profile: gr.OAuthProfile | None,
112
+ oauth_token: gr.OAuthToken | None,
113
+ ) -> tuple[str, str, str, str, str, str]:
114
+ username = _profile_username(profile)
115
+ token = _token_value(oauth_token)
116
+ if not username or not token:
117
+ raise gr.Error("Please sign in with Hugging Face first. OAuth profile/token is missing.")
118
+
119
+ run_id = validate_run_id(requested_run_id or propose_universal_run_id())
120
+ result = launch_universal_model_card_job(
121
+ token=token,
122
+ username=username,
123
+ target_slug=target_space_name,
124
+ model_id=model_id,
125
+ pi_model=pi_model,
126
+ preferred_space_hardware=preferred_hardware,
127
+ fallback_space_hardware=fallback_hardware,
128
+ allow_fixed_gpu_fallback=allow_fixed_gpu_fallback,
129
+ implementation_mode=implementation_mode,
130
+ run_id=run_id,
131
+ )
132
+ job_url = result.get("job_url") or ""
133
+ target_space = result.get("target_space") or ""
134
+ target_url = result.get("target_space_url") or ""
135
+ summary = json.dumps(result, indent=2)
136
+ return run_id, result["job_id"], job_url, target_space, target_url, summary
137
+
138
+
139
  def launch_longcat_article_job_ui(
140
  requested_run_id: str,
141
  model_id: str,
 
359
  demo.load(fn=get_login_status, inputs=None, outputs=login_status)
360
 
361
 
362
+ with gr.Tab("Phase 10 β€” Universal model-card builder"):
363
+ gr.Markdown(
364
+ """
365
+ This phase generalizes the LongCat workflow: paste any Hugging Face model card URL or `owner/model` ID, and Pi will attempt to build the best possible private Gradio Space while following the HF Spaces gist.
366
+
367
+ It uses a strict contract: if real inference is feasible, Pi should wire it. If not, it must produce `TECHNICAL_BLOCKERS.json` and the wrapper will classify the result as health-only or technical-blocker rather than pretending it is a full success.
368
+
369
+ Hardware requests remain best-effort because OAuth/billing/ZeroGPU quota may block automatic upgrades. You can manually set hardware while the Job is waiting.
370
+ """
371
+ )
372
+ with gr.Row():
373
+ universal_run_id_box = gr.Textbox(label="Run ID", value=propose_universal_run_id, interactive=True)
374
+ new_universal_run_btn = gr.Button("Generate new run id")
375
+ new_universal_run_btn.click(fn=propose_universal_run_id, inputs=None, outputs=universal_run_id_box)
376
+
377
+ universal_model_id_box = gr.Textbox(
378
+ label="Model card URL or model ID",
379
+ value="sshleifer/tiny-gpt2",
380
+ info="Examples: sshleifer/tiny-gpt2, runwayml/stable-diffusion-v1-5, or https://huggingface.co/owner/model",
381
+ )
382
+ universal_target_space_name = gr.Textbox(
383
+ label="Target Space name",
384
+ placeholder="e.g. space-factory-custom-model-v1",
385
+ info="Use a fresh name. The Space is created under your username and remains private.",
386
+ )
387
+ universal_pi_model_box = gr.Textbox(
388
+ label="Pi model",
389
+ value="Qwen/Qwen3-Coder-Next",
390
+ info="Model used by Pi through Hugging Face Inference Providers.",
391
+ )
392
+ universal_impl_mode = gr.Dropdown(
393
+ label="Implementation mode",
394
+ choices=["full-inference-gated", "full-inference-attempt", "safe-scaffold"],
395
+ value="full-inference-gated",
396
+ info="Gated mode forbids placeholder success; impossible models must produce TECHNICAL_BLOCKERS.json.",
397
+ )
398
+ with gr.Row():
399
+ universal_preferred_hw = gr.Dropdown(
400
+ label="Preferred Space hardware",
401
+ choices=["cpu-basic", "zero-a10g", "t4-small", "t4-medium", "a10g-large", "l40sx1", "a100-large", "h200"],
402
+ value="cpu-basic",
403
+ info="Best-effort request. Use CPU for small models; choose GPU if you expect heavy inference and can set it manually if OAuth cannot.",
404
+ )
405
+ universal_allow_fallback = gr.Checkbox(label="Allow fixed GPU fallback", value=False)
406
+ universal_fallback_hw = gr.Dropdown(
407
+ label="Fallback Space hardware",
408
+ choices=["l40sx1", "a10g-large", "a100-large", "h200", "t4-medium"],
409
+ value="l40sx1",
410
+ )
411
+
412
+ launch_universal_btn = gr.Button("Build private Space from model card", variant="primary")
413
+ phase10_job_id_box = gr.Textbox(label="Job ID", interactive=True)
414
+ phase10_job_url_box = gr.Textbox(label="Job URL", interactive=False)
415
+ phase10_target_space_box = gr.Textbox(label="Target Space", interactive=False)
416
+ phase10_target_url_box = gr.Textbox(label="Target Space URL", interactive=False)
417
+ phase10_launch_result = gr.Code(label="Launch result", language="json")
418
+
419
+ launch_universal_btn.click(
420
+ fn=launch_universal_model_card_job_ui,
421
+ inputs=[
422
+ universal_run_id_box,
423
+ universal_model_id_box,
424
+ universal_target_space_name,
425
+ universal_pi_model_box,
426
+ universal_preferred_hw,
427
+ universal_allow_fallback,
428
+ universal_fallback_hw,
429
+ universal_impl_mode,
430
+ ],
431
+ outputs=[
432
+ universal_run_id_box,
433
+ phase10_job_id_box,
434
+ phase10_job_url_box,
435
+ phase10_target_space_box,
436
+ phase10_target_url_box,
437
+ phase10_launch_result,
438
+ ],
439
+ )
440
+
441
+ phase10_refresh_btn = gr.Button("Refresh Phase 10 run status")
442
+ with gr.Tab("Phase 10 state"):
443
+ phase10_state = gr.Code(label="state.json", language="json")
444
+ with gr.Tab("Phase 10 events"):
445
+ phase10_events = gr.Code(label="events.jsonl", language="json")
446
+ with gr.Tab("Phase 10 report"):
447
+ phase10_report = gr.Markdown()
448
+ with gr.Tab("Phase 10 job"):
449
+ phase10_job_info = gr.Code(label="Job info/logs", language="json")
450
+
451
+ phase10_refresh_btn.click(
452
+ fn=refresh_run_ui,
453
+ inputs=[universal_run_id_box, phase10_job_id_box],
454
+ outputs=[phase10_state, phase10_events, phase10_report, phase10_job_info],
455
+ )
456
+
457
  with gr.Tab("Phase 9 β€” LongCat full-inference gate"):
458
  gr.Markdown(
459
  """