import os, json, time, schedule, threading from pathlib import Path from datetime import datetime # FIX #13: Added filelock to prevent race conditions when Node.js and this # script both try to read/write conversations.json simultaneously from filelock import FileLock HF_TOKEN = os.environ.get("HF_TOKEN") HF_DATASET = os.environ.get("HF_BACKUP_DATASET", "your-username/llm-space-backup") DATA_DIR = Path("/app/data") SYNC_EVERY = int(os.environ.get("SYNC_INTERVAL_MINUTES", "3")) LOCK_PATH = str(DATA_DIR / "conversations.lock") DATA_DIR.mkdir(parents=True, exist_ok=True) def save_conversations(conversations: dict): """Save in-memory conversations to disk (with file lock).""" path = DATA_DIR / "conversations.json" # FIX #13: File lock prevents torn writes when Node.js is also writing with FileLock(LOCK_PATH, timeout=10): with open(path, "w") as f: json.dump(conversations, f, indent=2, default=str) def load_conversations() -> dict: """Load conversations from disk on boot (with file lock).""" path = DATA_DIR / "conversations.json" if path.exists(): with FileLock(LOCK_PATH, timeout=10): with open(path) as f: return json.load(f) return {} def sync_to_hf_dataset(): """Push backup data to HuggingFace private Dataset.""" if not HF_TOKEN: print("⚠️ HF_TOKEN not set, skipping HF sync") return try: from huggingface_hub import HfApi api = HfApi(token=HF_TOKEN) # Ensure dataset exists try: api.create_repo( repo_id=HF_DATASET, repo_type="dataset", private=True, exist_ok=True ) except Exception: pass # Upload all data files (skip lock file) for file in DATA_DIR.glob("*"): if file.suffix == ".lock": continue api.upload_file( path_or_fileobj=str(file), path_in_repo=file.name, repo_id=HF_DATASET, repo_type="dataset" ) print(f"✅ [{datetime.now().isoformat()}] Synced to HF Dataset: {HF_DATASET}") except Exception as e: print(f"❌ Sync failed: {e}") def restore_from_hf_dataset(): """Restore backup from HF Dataset on boot.""" if not HF_TOKEN: return try: # FIX #1 (CRITICAL): The original code called `api.hf_hub_download()` # which does NOT exist on the HfApi class. This caused a silent # AttributeError meaning conversations were NEVER restored on restart. # Correct approach: use the module-level `hf_hub_download` function. from huggingface_hub import HfApi, hf_hub_download, list_repo_files files = list_repo_files(repo_id=HF_DATASET, repo_type="dataset", token=HF_TOKEN) for fname in files: if fname.endswith(".lock"): continue hf_hub_download( # ← was `api.hf_hub_download` (bug) repo_id=HF_DATASET, filename=fname, repo_type="dataset", local_dir=str(DATA_DIR), token=HF_TOKEN ) print(f"✅ Restored backup from {HF_DATASET}") except Exception as e: print(f"⚠️ Could not restore backup (first run?): {e}") if __name__ == "__main__": print("💾 Hermes Sync starting...") restore_from_hf_dataset() schedule.every(SYNC_EVERY).minutes.do(sync_to_hf_dataset) print(f"🔄 Syncing every {SYNC_EVERY} minutes to {HF_DATASET}") while True: schedule.run_pending() time.sleep(30)