"""Copilot orchestration: retrieval, prompt assembly, generation, and response shaping.""" from __future__ import annotations from .config import Settings, get_settings from .embeddings import create_embedding_backend from .index_store import SearchResult, VectorIndex, unique_citations from .llm import create_llm_provider from .prompts import SYSTEM_PROMPT, build_user_prompt from .schemas import ChatRequest, ChatResponse class CopilotService: """Runtime service used by both the API and Gradio UI.""" def __init__(self, settings: Settings | None = None) -> None: self.settings = settings or get_settings() self.embedder = create_embedding_backend( self.settings.embedding_backend, model_name=self.settings.embedding_model, dimensions=self.settings.embedding_dimensions, ) self.index = VectorIndex(self.settings.index_dir, self.embedder) self.llm = create_llm_provider(self.settings) def health(self) -> dict: if not self.index.loaded: self.index.load() return { "status": "ok", "index_loaded": self.index.loaded, "chunk_count": len(self.index.chunks), "embedding_backend": self.embedder.name, "llm_provider": self.llm.name, "index_dir": str(self.settings.index_dir), } def reload_index(self) -> dict: """Reload the on-disk vector index after ingestion.""" self.index.loaded = False self.index.load() return self.health() def chat(self, request: ChatRequest) -> ChatResponse: warnings: list[str] = [] results: list[SearchResult] = [] if request.retrieval_k > 0: try: results = self.index.search( query=f"{request.mode}\n{request.project_context}\n{request.message}", k=request.retrieval_k, min_score=self.settings.retrieval_min_score, ) except Exception as exc: warnings.append(f"retrieval failed: {exc}") if not results: warnings.append("No approved source context was retrieved. Answer is not grounded in the RAG corpus.") user_prompt = build_user_prompt( mode=request.mode, message=request.message, project_context=request.project_context, results=results, ) answer = self.llm.generate( system_prompt=SYSTEM_PROMPT, user_prompt=user_prompt, has_context=bool(results), ) return ChatResponse( answer=answer, citations=unique_citations(results), retrieved_chunks=[result.to_retrieved_chunk() for result in results], warnings=warnings, )