import gradio as gr from huggingface_hub import HfApi, CommitOperationDelete def fetch_files(source_repo, hf_token): # Input validation if not hf_token.strip(): yield gr.update(), gr.update(), "Error: Hugging Face Access Token is required.", [] return if not source_repo.strip(): yield gr.update(), gr.update(), "Error: Source repository name is required.", [] return api = HfApi(token=hf_token.strip()) try: # Initial status update before fetching yield gr.update(), gr.update(), f"Status: Fetching file list from '{source_repo}'...", [] # Fetch the flat list of files from the repository files = api.list_repo_files(repo_id=source_repo.strip(), repo_type="model") files.sort() # Build a visual Linux-like tree representation for the CheckboxGroup choices = [] for f in files: parts = f.split('/') if len(parts) == 1: display = f"📄 {f}" else: folder_path = "/".join(parts[:-1]) display = f"📁 {folder_path}/ ├── 📄 {parts[-1]}" # Gradio allows (display_name, value) tuples for CheckboxGroup choices choices.append((display, f)) yield ( gr.update(visible=True), gr.update(choices=choices, value=files), f"Success: Fetched {len(files)} files. Uncheck the ones you do not want to copy.", files ) except Exception as e: yield gr.update(visible=False), gr.update(), f"An error occurred: {str(e)}", [] def clone_repository(source_repo, target_repo, hf_token, selected_files, all_files): if not target_repo.strip(): yield "Error: Target repository name is required." return api = HfApi(token=hf_token.strip()) try: yield f"Status: Initiating server-side clone of '{source_repo}' to '{target_repo}'..." # 1. Duplicate the entire repo first (Server-side, almost instant, 0 disk space) api.duplicate_repo( from_id=source_repo.strip(), to_id=target_repo.strip(), repo_type="model" ) # 2. Determine which files to delete based on what the user unchecked files_to_delete = set(all_files) - set(selected_files) if files_to_delete: yield f"Status: Deleting {len(files_to_delete)} unselected files from '{target_repo}'..." # Use 'path_in_repo' instead of 'path' to comply with the HF SDK operations = [CommitOperationDelete(path_in_repo=f) for f in files_to_delete] # Execute deletion in a single commit natively on HF servers api.create_commit( repo_id=target_repo.strip(), repo_type="model", operations=operations, commit_message="Clean up: Remove unselected files during selective clone" ) yield f"Success! Selective clone completed. View your model here: https://huggingface.co/{target_repo.strip()}" except Exception as e: yield f"An error occurred: {str(e)}" # Notice: The 'theme' parameter has been removed from Blocks to satisfy Gradio 6.0 specs with gr.Blocks(title="Selective HF Model Cloner") as demo: gr.Markdown("# 📑 Selective Hugging Face Model Cloner") gr.Markdown("Fetch files from a model repository, uncheck what you don't need, and clone via high-speed server-side duplication.") with gr.Row(): hf_token = gr.Textbox( label="Hugging Face Access Token", placeholder="hf_...", type="password", info="Requires 'Write' permissions." ) with gr.Row(): source_repo = gr.Textbox( label="Source Model Repository", placeholder="e.g., meta-llama/Llama-3.2-1B" ) fetch_btn = gr.Button("1. Fetch Files", variant="secondary") # We use a Gradio State to quietly keep track of the complete list of files fetched all_files_state = gr.State([]) # This group remains hidden until files are successfully fetched with gr.Group(visible=False) as selection_group: gr.Markdown("### 🗂️ Repository Files") gr.Markdown("*All files are selected by default. Uncheck any files you do **not** want to carry over.*") file_checkboxes = gr.CheckboxGroup(label="Repository Tree", interactive=True) target_repo = gr.Textbox( label="Target Model Repository", placeholder="e.g., your-username/my-custom-llama" ) clone_btn = gr.Button("2. Clone Selected Files", variant="primary") output_logs = gr.Textbox(label="Execution Status / Logs", interactive=False) # Trigger Step 1: File Fetching fetch_btn.click( fn=fetch_files, inputs=[source_repo, hf_token], outputs=[selection_group, file_checkboxes, output_logs, all_files_state] ) # Trigger Step 2: Cloning Process clone_btn.click( fn=clone_repository, inputs=[source_repo, target_repo, hf_token, file_checkboxes, all_files_state], outputs=output_logs ) if __name__ == "__main__": # The theme parameter is now passed into launch() per Gradio 6.0 rules demo.queue().launch(theme=gr.themes.Soft())