import os import gradio as gr import json import time from datetime import datetime from llama_cpp import Llama from faster_whisper import WhisperModel from huggingface_hub import hf_hub_download # ===== CONFIG ===== MODELS_DIR = "/models" CONTEXT_SIZE = 4096 MODEL_REPOS = { "qwen2.5-coder-7b-instruct-q4_k_m.gguf": "bartowski/Qwen2.5-Coder-7B-Instruct-GGUF", "qwen2.5-coder-3b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-3B-Instruct-GGUF", "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GGUF", "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GGUF", "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf": "bartowski/DeepSeek-Coder-V2-Lite-Instruct-GGUF", } MODELS = { "๐ง DeepSeek V2 Lite (Best)": "DeepSeek-Coder-V2-Lite-Instruct-Q4_K_M.gguf", "โ๏ธ Qwen2.5 Coder 7B (Balanced)": "qwen2.5-coder-7b-instruct-q4_k_m.gguf", "๐ Qwen2.5 Coder 3B (Fast)": "qwen2.5-coder-3b-instruct-q4_k_m.gguf", "๐จ Qwen2.5 Coder 1.5B (Quick)": "qwen2.5-coder-1.5b-instruct-q4_k_m.gguf", "๐ฌ Qwen2.5 Coder 0.5B (Instant)": "qwen2.5-coder-0.5b-instruct-q4_k_m.gguf", } MODEL_INFO = { "๐ง DeepSeek V2 Lite (Best)": "๐ MoE 16B โข ~9GB โข Best quality", "โ๏ธ Qwen2.5 Coder 7B (Balanced)": "โ๏ธ Balanced โข ~4.5GB โข Recommended", "๐ Qwen2.5 Coder 3B (Fast)": "๐ Fast โข ~2GB โข Great all-rounder", "๐จ Qwen2.5 Coder 1.5B (Quick)": "๐จ Quick โข ~1GB โข Simple tasks", "๐ฌ Qwen2.5 Coder 0.5B (Instant)": "๐ฌ Instant โข ~0.3GB โข Lightning fast", } LANGUAGES = [ "Python", "JavaScript", "TypeScript", "Go", "Rust", "Java", "C++", "C#", "C", "PHP", "Ruby", "Swift", "Kotlin", "Scala", "R", "Julia", "Perl", "HTML/CSS", "SQL", "Bash", "PowerShell", "Lua" ] # ===== MODEL CACHE ===== loaded_models = {} current_model_name = None def load_model(model_name): global loaded_models, current_model_name if model_name == current_model_name and model_name in loaded_models: return loaded_models[model_name] if current_model_name and current_model_name != model_name: if current_model_name in loaded_models: del loaded_models[current_model_name] print(f"๐๏ธ Unloaded {current_model_name}") filename = MODELS.get(model_name) if not filename: return None model_path = os.path.join(MODELS_DIR, filename) # Auto-download if needed if not os.path.exists(model_path): repo_id = MODEL_REPOS.get(filename) if repo_id: print(f"โฌ๏ธ Downloading {filename}...") try: hf_hub_download(repo_id=repo_id, filename=filename, local_dir=MODELS_DIR) print(f"โ Downloaded {filename}") except Exception as e: print(f"โ Download failed: {e}") return None else: return None print(f"๐ฅ Loading {model_name}...") try: llm = Llama( model_path=model_path, n_ctx=CONTEXT_SIZE, n_threads=4, n_batch=512, verbose=False ) loaded_models[model_name] = llm current_model_name = model_name print(f"โ {model_name} loaded!") return llm except Exception as e: print(f"โ Failed to load: {e}") return None # ===== WHISPER ===== whisper_model = None def init_whisper(): global whisper_model try: print("Loading Whisper...") whisper_model = WhisperModel("tiny", device="cpu", compute_type="int8") print("โ Whisper ready!") except Exception as e: print(f"โ Whisper failed: {e}") init_whisper() # ===== HELPERS ===== def get_status(): available = [name for name, file in MODELS.items() if os.path.exists(os.path.join(MODELS_DIR, file))] if current_model_name: short = current_model_name.split('(')[0].strip().split()[-1] return f"๐ข Ready โข {len(available)}/{len(MODELS)} cached โข Active: {short}" return f"๐ก {len(available)}/{len(MODELS)} models cached" def get_model_info(model_name): return MODEL_INFO.get(model_name, "") def validate_input(text, name="Input"): if not text or not text.strip(): return False, f"โ ๏ธ {name} cannot be empty." if len(text) > 50000: return False, f"โ ๏ธ {name} too long." return True, None def transcribe_audio(audio): if not audio: return "" if not whisper_model: return "โ Whisper unavailable." try: segments, _ = whisper_model.transcribe(audio) return " ".join([s.text for s in segments]).strip() or "โ ๏ธ No speech detected." except Exception as e: return f"โ {str(e)[:50]}" def generate_response(model_name, prompt, temperature=0.7, max_tokens=2048): llm = load_model(model_name) if not llm: return "โ **Model not available.** Try selecting a different model." try: if "deepseek" in model_name.lower(): formatted = f"### Instruction:\n{prompt}\n\n### Response:\n" stop_tokens = ["### Instruction:", "### Response:"] else: formatted = f"<|im_start|>system\nYou are an expert coding assistant.<|im_end|>\n<|im_start|>user\n{prompt}<|im_end|>\n<|im_start|>assistant\n" stop_tokens = ["<|im_end|>", "<|im_start|>"] output = llm( formatted, max_tokens=max_tokens, temperature=temperature, top_p=0.9, top_k=40, repeat_penalty=1.1, stop=stop_tokens, echo=False ) response = output["choices"][0]["text"].strip() return response if response else "โ ๏ธ Empty response." except Exception as e: return f"โ **Error:** {str(e)[:100]}" def extract_code(text): if not text or "```" not in text: return text try: parts = text.split("```") if len(parts) >= 2: code = parts[1] if "\n" in code: code = code.split("\n", 1)[-1] return code.strip() except: pass return text # ===== HISTORY ===== def export_chat_history(history): if not history: return None, "โ ๏ธ No chat history to export." filename = f"/tmp/axon_chat_{datetime.now().strftime('%Y%m%d_%H%M%S')}.json" with open(filename, "w") as f: json.dump({"exported_at": datetime.now().isoformat(), "messages": history}, f, indent=2) return filename, f"โ Exported {len(history)} messages!" def export_code(code, language): if not code or not code.strip(): return None, "โ ๏ธ No code to export." ext_map = {"Python": "py", "JavaScript": "js", "TypeScript": "ts", "Go": "go", "Rust": "rs", "Java": "java", "C++": "cpp", "C#": "cs", "C": "c", "PHP": "php", "Ruby": "rb", "Swift": "swift", "Kotlin": "kt", "HTML/CSS": "html", "SQL": "sql", "Bash": "sh", "PowerShell": "ps1", "Lua": "lua"} ext = ext_map.get(language, "txt") filename = f"/tmp/axon_code_{datetime.now().strftime('%Y%m%d_%H%M%S')}.{ext}" with open(filename, "w") as f: f.write(code) return filename, f"โ Exported as .{ext}!" # ===== STREAMING ===== def chat_stream(message, history, model_name, temperature, max_tokens): history = history or [] valid, error = validate_input(message, "Message") if not valid: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": error}) yield history return llm = load_model(model_name) if not llm: history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": "โ Model not available."}) yield history return if "deepseek" in model_name.lower(): conv = "### Instruction:\nYou are an expert coding assistant. Use markdown code blocks.\n\n" for msg in history: conv += f"{'User' if msg['role']=='user' else 'Assistant'}: {msg['content']}\n\n" conv += f"User: {message}\n\n### Response:\n" stop_tokens = ["### Instruction:", "User:"] else: conv = "<|im_start|>system\nYou are an expert coding assistant. Use markdown code blocks.<|im_end|>\n" for msg in history: conv += f"<|im_start|>{msg['role']}\n{msg['content']}<|im_end|>\n" conv += f"<|im_start|>user\n{message}<|im_end|>\n<|im_start|>assistant\n" stop_tokens = ["<|im_end|>", "<|im_start|>"] history.append({"role": "user", "content": message}) history.append({"role": "assistant", "content": ""}) try: full = "" for chunk in llm(conv, max_tokens=max_tokens, temperature=temperature, top_p=0.9, stop=stop_tokens, stream=True): full += chunk["choices"][0]["text"] history[-1]['content'] = full yield history except Exception as e: history[-1]['content'] = f"โ Error: {str(e)[:100]}" yield history def generate_stream(prompt, language, model_name, temperature, max_tokens): valid, error = validate_input(prompt, "Description") if not valid: yield error return llm = load_model(model_name) if not llm: yield "โ Model not available." return if "deepseek" in model_name.lower(): formatted = f"### Instruction:\nWrite clean {language} code with comments:\n{prompt}\n\n### Response:\n" stop_tokens = ["### Instruction:"] else: formatted = f"<|im_start|>system\nYou are an expert coder.<|im_end|>\n<|im_start|>user\nWrite clean {language} code with comments:\n{prompt}<|im_end|>\n<|im_start|>assistant\n" stop_tokens = ["<|im_end|>"] try: full = "" for chunk in llm(formatted, max_tokens=max_tokens, temperature=temperature, stop=stop_tokens, stream=True): full += chunk["choices"][0]["text"] yield extract_code(full) except Exception as e: yield f"โ {str(e)[:50]}" # ===== FEATURES ===== def explain_code(code, model_name, detail, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err prompts = {"Brief": f"Explain briefly:\n{code}", "Normal": f"Explain this code:\n{code}", "Detailed": f"Detailed explanation:\n{code}"} return generate_response(model_name, prompts.get(detail, prompts["Normal"]), 0.5, max_tokens) def fix_code(code, error_msg, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Fix this code. Error: {error_msg or 'Not working'}\n\n{code}", 0.3, max_tokens) def review_code(code, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Review for bugs, performance, security:\n{code}", 0.4, max_tokens) def convert_code(code, from_lang, to_lang, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err if from_lang == to_lang: return "โ ๏ธ Same language." result = generate_response(model_name, f"Convert {from_lang} to {to_lang}. Code only:\n{code}", 0.3, max_tokens) return result if result.startswith("โ") else extract_code(result) def generate_tests(code, language, framework, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err result = generate_response(model_name, f"Generate {framework or 'pytest'} tests for {language}:\n{code}", 0.3, max_tokens) return result if result.startswith("โ") else extract_code(result) def document_code(code, language, style, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err result = generate_response(model_name, f"Add {style.lower()} to this {language} code:\n{code}", 0.4, max_tokens) return result if style == "README" or result.startswith("โ") else extract_code(result) def optimize_code(code, language, focus, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Optimize {language} for {focus.lower()}. Explain:\n{code}", 0.3, max_tokens) def security_scan(code, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Security audit. Check for injection, auth issues, data exposure, input validation. For each: Severity, Location, Fix.\n\nCode:\n{code}", 0.3, max_tokens) def analyze_complexity(code, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err return generate_response(model_name, f"Analyze time/space complexity (Big O), bottlenecks, optimizations:\n{code}", 0.4, max_tokens) def build_sql(description, db_type, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err result = generate_response(model_name, f"Write {db_type} SQL for:\n{description}", 0.2, max_tokens) return result if result.startswith("โ") else extract_code(result) def build_shell(description, shell_type, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err result = generate_response(model_name, f"Write {shell_type} command for:\n{description}", 0.2, max_tokens) return result if result.startswith("โ") else extract_code(result) def code_diff(code1, code2, model_name, max_tokens): v1, e1 = validate_input(code1, "Code 1") v2, e2 = validate_input(code2, "Code 2") if not v1: return e1 if not v2: return e2 return generate_response(model_name, f"Compare:\n=== CODE 1 ===\n{code1}\n\n=== CODE 2 ===\n{code2}", 0.4, max_tokens) def generate_mock_data(schema, count, format_type, model_name, max_tokens): valid, err = validate_input(schema, "Schema") if not valid: return err result = generate_response(model_name, f"Generate {count} mock entries as {format_type}:\n{schema}", 0.7, max_tokens) return result if result.startswith("โ") else extract_code(result) def interview_challenge(topic, difficulty, language, model_name, max_tokens): valid, err = validate_input(topic, "Topic") if not valid: return err return generate_response(model_name, f"Create {difficulty} {language} interview challenge about {topic}. Include problem, examples, constraints, hints, solution.", 0.6, max_tokens) def to_pseudocode(code, output_type, model_name, max_tokens): valid, err = validate_input(code, "Code") if not valid: return err prompt = f"Convert to pseudocode:\n{code}" if output_type == "Pseudocode" else f"Create Mermaid flowchart:\n{code}" return generate_response(model_name, prompt, 0.3, max_tokens) def build_cron(description, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err return generate_response(model_name, f"Create cron expression for: {description}\nInclude: expression, breakdown, next 5 runs", 0.2, max_tokens) def build_regex(description, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err return generate_response(model_name, f"Create regex for: {description}\nPattern, explanation, examples, Python code:", 0.3, max_tokens) def build_api(description, framework, model_name, max_tokens): valid, err = validate_input(description, "Description") if not valid: return err result = generate_response(model_name, f"Create {framework} REST endpoint:\n{description}", 0.3, max_tokens) return result if result.startswith("โ") else extract_code(result) def convert_data_format(data, from_fmt, to_fmt, model_name, max_tokens): valid, err = validate_input(data, "Data") if not valid: return err if from_fmt == to_fmt: return "โ ๏ธ Same format." result = generate_response(model_name, f"Convert {from_fmt} to {to_fmt}:\n{data}", 0.1, max_tokens) return result if result.startswith("โ") else extract_code(result) # ===== UI ===== with gr.Blocks(title="Axon v6") as demo: gr.HTML("""
AI Coding Assistant โข 5 Models โข 19 Tools โข 100% Local
Free AI coding assistant - 100% local, no API keys.
Models download automatically on first use
| Model | Size | Best For |
|---|---|---|
| ๐ง DeepSeek V2 Lite | ~9GB | Best quality |
| โ๏ธ Qwen2.5 7B | ~4.5GB | Balanced |
| ๐ Qwen2.5 3B | ~2GB | Fast |
| ๐จ Qwen2.5 1.5B | ~1GB | Quick |
| ๐ฌ Qwen2.5 0.5B | ~0.3GB | Instant |