Quazim0t0 commited on
Commit
a12992d
·
verified ·
1 Parent(s): 7f5d4f2

Upload trace_store.py

Browse files
Files changed (1) hide show
  1. game/trace_store.py +74 -0
game/trace_store.py ADDED
@@ -0,0 +1,74 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """Persist agent traces to a Hugging Face dataset repo.
2
+
3
+ Uses `huggingface_hub.CommitScheduler`: the `traces/` folder is committed to a
4
+ dataset repo in the background every minute (only new/changed files are
5
+ uploaded). This is the recommended pattern for persisting data from a Space —
6
+ ZeroGPU Spaces have ephemeral disks, so without this every trace is lost on
7
+ restart.
8
+
9
+ Configuration (env vars):
10
+ HF_TOKEN write token; on Spaces add it as a secret. Required.
11
+ TRACES_DATASET_REPO dataset repo id, e.g. "you/hugging-wizards-traces".
12
+ Defaults to "<SPACE_ID>-traces" when running on a Space.
13
+ TRACES_DATASET_PRIVATE "0" to create the dataset public (default private).
14
+
15
+ If the token or repo id is missing, syncing is disabled and the game runs
16
+ exactly as before (traces still land on local disk).
17
+ """
18
+ from __future__ import annotations
19
+
20
+ import contextlib
21
+ import os
22
+
23
+ _scheduler = None
24
+ _disabled = False
25
+
26
+
27
+ def _repo_id() -> str | None:
28
+ repo = os.environ.get("TRACES_DATASET_REPO")
29
+ if repo:
30
+ return repo
31
+ space = os.environ.get("SPACE_ID") # "owner/space-name" on HF Spaces
32
+ return f"{space}-traces" if space else None
33
+
34
+
35
+ def start(trace_dir: str):
36
+ """Begin background sync of `trace_dir` to the dataset repo (idempotent)."""
37
+ global _scheduler, _disabled
38
+ if _scheduler is not None or _disabled:
39
+ return _scheduler
40
+
41
+ token = os.environ.get("HF_TOKEN") or os.environ.get("HUGGING_FACE_HUB_TOKEN")
42
+ repo = _repo_id()
43
+ if not token or not repo:
44
+ _disabled = True
45
+ print("[trace_store] HF sync disabled "
46
+ "(set HF_TOKEN and TRACES_DATASET_REPO to enable)")
47
+ return None
48
+
49
+ try:
50
+ from huggingface_hub import CommitScheduler
51
+
52
+ _scheduler = CommitScheduler(
53
+ repo_id=repo,
54
+ repo_type="dataset",
55
+ folder_path=trace_dir,
56
+ path_in_repo="traces",
57
+ every=1, # minutes
58
+ private=os.environ.get("TRACES_DATASET_PRIVATE", "1") != "0",
59
+ allow_patterns=["*.json"],
60
+ token=token,
61
+ )
62
+ print(f"[trace_store] syncing traces to dataset {repo} every minute")
63
+ except Exception as e: # never let trace upload break the game
64
+ _disabled = True
65
+ print(f"[trace_store] HF sync disabled: {e}")
66
+ return _scheduler
67
+
68
+
69
+ def lock():
70
+ """Lock to hold while writing trace files, so a half-written file is
71
+ never committed. A no-op context manager when syncing is disabled."""
72
+ if _scheduler is not None:
73
+ return _scheduler.lock
74
+ return contextlib.nullcontext()