Spaces:
Sleeping
Sleeping
Upload folder using huggingface_hub
Browse files- rag_books_mcp/app.py +34 -15
- rag_books_mcp/ingest.py +17 -0
- rag_books_mcp/tools.py +64 -12
rag_books_mcp/app.py
CHANGED
|
@@ -33,14 +33,22 @@ def _build_search_tab() -> gr.Interface:
|
|
| 33 |
value="bias-variance tradeoff",
|
| 34 |
placeholder="Consulta en lenguaje natural",
|
| 35 |
),
|
| 36 |
-
gr.Radio(
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 37 |
gr.Slider(minimum=1, maximum=10, step=1, value=5, label="top_k"),
|
| 38 |
],
|
| 39 |
outputs=gr.Markdown(label="Resultados"),
|
| 40 |
title="🔎 search_theory",
|
| 41 |
description=(
|
| 42 |
-
"Búsqueda semántica en ESL
|
| 43 |
-
"relevantes ordenados por similitud."
|
| 44 |
),
|
| 45 |
api_name="search_theory",
|
| 46 |
)
|
|
@@ -50,7 +58,7 @@ def _build_get_section_tab() -> gr.Interface:
|
|
| 50 |
return gr.Interface(
|
| 51 |
fn=get_section,
|
| 52 |
inputs=[
|
| 53 |
-
gr.Radio(choices=["esl", "islp"], value="islp", label="book"),
|
| 54 |
gr.Textbox(
|
| 55 |
label="chapter",
|
| 56 |
value="8 Tree-Based Methods",
|
|
@@ -66,8 +74,8 @@ def _build_get_section_tab() -> gr.Interface:
|
|
| 66 |
outputs=gr.Markdown(label="Sección"),
|
| 67 |
title="📑 get_section",
|
| 68 |
description=(
|
| 69 |
-
"Recupera una sección específica de ESL o
|
| 70 |
-
"por metadata, hace fallback a búsqueda semántica."
|
| 71 |
),
|
| 72 |
api_name="get_section",
|
| 73 |
)
|
|
@@ -80,7 +88,7 @@ def _build_cite_tab() -> gr.Interface:
|
|
| 80 |
gr.Textbox(
|
| 81 |
label="topic",
|
| 82 |
value="ridge regression",
|
| 83 |
-
placeholder="Tema a fundamentar",
|
| 84 |
),
|
| 85 |
gr.Radio(
|
| 86 |
choices=["brief", "medium", "deep"],
|
|
@@ -91,8 +99,9 @@ def _build_cite_tab() -> gr.Interface:
|
|
| 91 |
outputs=gr.Markdown(label="Fundamentación"),
|
| 92 |
title="📚 cite_foundation",
|
| 93 |
description=(
|
| 94 |
-
"Fundamentación teórica que cita
|
| 95 |
-
"ESL (riguroso)
|
|
|
|
| 96 |
),
|
| 97 |
api_name="cite_foundation",
|
| 98 |
)
|
|
@@ -111,16 +120,26 @@ def _build_list_topics_tab() -> gr.Interface:
|
|
| 111 |
|
| 112 |
def build_demo() -> gr.Blocks:
|
| 113 |
"""Construye la UI tabulada del MCP Server v2."""
|
| 114 |
-
with gr.Blocks(title="rag-books-mcp v2 · ESL + ISLP") as demo:
|
| 115 |
gr.Markdown(
|
| 116 |
"""
|
| 117 |
-
# 📖 RAG Books MCP v2 — ESL + ISLP
|
| 118 |
|
| 119 |
-
Servidor MCP que expone búsqueda semántica sobre
|
| 120 |
-
referencia de Statistical Learning:
|
| 121 |
|
| 122 |
- **ESL** — *The Elements of Statistical Learning* (Hastie, Tibshirani, Friedman)
|
| 123 |
- **ISLP** — *An Introduction to Statistical Learning with Python* (James, Witten, Hastie, Tibshirani)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 124 |
|
| 125 |
**v2 vs v1:** la base ChromaDB se carga desde el dataset HF
|
| 126 |
`gusdelact/rag-esl-islp-chromadb` en lugar de empaquetarla con el
|
|
@@ -129,9 +148,9 @@ def build_demo() -> gr.Blocks:
|
|
| 129 |
|
| 130 |
**Endpoint MCP:** `/gradio_api/mcp/` (streamable HTTP).
|
| 131 |
**Embeddings:** `sentence-transformers/all-MiniLM-L6-v2` (local, sin API key).
|
| 132 |
-
**Vector store:** ChromaDB con
|
| 133 |
|
| 134 |
-
La primera tool call descarga el dataset (~
|
| 135 |
son cache hit.
|
| 136 |
"""
|
| 137 |
)
|
|
|
|
| 33 |
value="bias-variance tradeoff",
|
| 34 |
placeholder="Consulta en lenguaje natural",
|
| 35 |
),
|
| 36 |
+
gr.Radio(
|
| 37 |
+
choices=["all", "both", "esl", "islp", "fes", "pdsh"],
|
| 38 |
+
value="all",
|
| 39 |
+
label="book",
|
| 40 |
+
info=(
|
| 41 |
+
"R4DS no se ofrece en este Space por su licencia CC BY-NC-ND. "
|
| 42 |
+
"Disponible solo en la variante local con RAG_CHROMA_DIR."
|
| 43 |
+
),
|
| 44 |
+
),
|
| 45 |
gr.Slider(minimum=1, maximum=10, step=1, value=5, label="top_k"),
|
| 46 |
],
|
| 47 |
outputs=gr.Markdown(label="Resultados"),
|
| 48 |
title="🔎 search_theory",
|
| 49 |
description=(
|
| 50 |
+
"Búsqueda semántica en ESL, ISLP, FES y PDSH. Devuelve los fragmentos "
|
| 51 |
+
"más relevantes ordenados por similitud."
|
| 52 |
),
|
| 53 |
api_name="search_theory",
|
| 54 |
)
|
|
|
|
| 58 |
return gr.Interface(
|
| 59 |
fn=get_section,
|
| 60 |
inputs=[
|
| 61 |
+
gr.Radio(choices=["esl", "islp", "fes", "pdsh"], value="islp", label="book"),
|
| 62 |
gr.Textbox(
|
| 63 |
label="chapter",
|
| 64 |
value="8 Tree-Based Methods",
|
|
|
|
| 74 |
outputs=gr.Markdown(label="Sección"),
|
| 75 |
title="📑 get_section",
|
| 76 |
description=(
|
| 77 |
+
"Recupera una sección específica de ESL, ISLP, FES o PDSH. Si no se "
|
| 78 |
+
"encuentra por metadata, hace fallback a búsqueda semántica."
|
| 79 |
),
|
| 80 |
api_name="get_section",
|
| 81 |
)
|
|
|
|
| 88 |
gr.Textbox(
|
| 89 |
label="topic",
|
| 90 |
value="ridge regression",
|
| 91 |
+
placeholder="Tema a fundamentar (ej: 'bagging', 'feature selection')",
|
| 92 |
),
|
| 93 |
gr.Radio(
|
| 94 |
choices=["brief", "medium", "deep"],
|
|
|
|
| 99 |
outputs=gr.Markdown(label="Fundamentación"),
|
| 100 |
title="📚 cite_foundation",
|
| 101 |
description=(
|
| 102 |
+
"Fundamentación teórica que cita los libros publicados: "
|
| 103 |
+
"ISLP (intuitivo), ESL (riguroso), FES (feature engineering) y "
|
| 104 |
+
"PDSH (código Python)."
|
| 105 |
),
|
| 106 |
api_name="cite_foundation",
|
| 107 |
)
|
|
|
|
| 120 |
|
| 121 |
def build_demo() -> gr.Blocks:
|
| 122 |
"""Construye la UI tabulada del MCP Server v2."""
|
| 123 |
+
with gr.Blocks(title="rag-books-mcp v2 · ESL + ISLP + FES + PDSH") as demo:
|
| 124 |
gr.Markdown(
|
| 125 |
"""
|
| 126 |
+
# 📖 RAG Books MCP v2 — ESL + ISLP + FES + PDSH
|
| 127 |
|
| 128 |
+
Servidor MCP que expone búsqueda semántica sobre cuatro libros de
|
| 129 |
+
referencia de Statistical Learning y Data Science:
|
| 130 |
|
| 131 |
- **ESL** — *The Elements of Statistical Learning* (Hastie, Tibshirani, Friedman)
|
| 132 |
- **ISLP** — *An Introduction to Statistical Learning with Python* (James, Witten, Hastie, Tibshirani)
|
| 133 |
+
- **FES** — *Feature Engineering and Selection* (Kuhn, Johnson)
|
| 134 |
+
- **PDSH** — *Python Data Science Handbook* (VanderPlas)
|
| 135 |
+
|
| 136 |
+
> ℹ️ **Sobre R4DS** — *R for Data Science, 2nd Ed.* (Wickham et al.)
|
| 137 |
+
> está indexado en la **variante local** del server pero **NO**
|
| 138 |
+
> en este Space. La razón es su licencia CC BY-NC-ND 3.0 US
|
| 139 |
+
> (NoDerivatives), incompatible con redistribución pública en
|
| 140 |
+
> formato vectorial. Para usarlo, corre el server localmente con
|
| 141 |
+
> `RAG_CHROMA_DIR` apuntando a tu propio `chroma_db/` con R4DS
|
| 142 |
+
> indexado. Detalles en el [DATA_CARD del dataset](https://huggingface.co/datasets/gusdelact/rag-esl-islp-chromadb).
|
| 143 |
|
| 144 |
**v2 vs v1:** la base ChromaDB se carga desde el dataset HF
|
| 145 |
`gusdelact/rag-esl-islp-chromadb` en lugar de empaquetarla con el
|
|
|
|
| 148 |
|
| 149 |
**Endpoint MCP:** `/gradio_api/mcp/` (streamable HTTP).
|
| 150 |
**Embeddings:** `sentence-transformers/all-MiniLM-L6-v2` (local, sin API key).
|
| 151 |
+
**Vector store:** ChromaDB con 3005 chunks (1093 ESL + 884 ISLP + 465 FES + 563 PDSH).
|
| 152 |
|
| 153 |
+
La primera tool call descarga el dataset (~95 MB). Las siguientes
|
| 154 |
son cache hit.
|
| 155 |
"""
|
| 156 |
)
|
rag_books_mcp/ingest.py
CHANGED
|
@@ -43,6 +43,21 @@ BOOKS_CONFIG = {
|
|
| 43 |
"collection": "pdsh_chapters",
|
| 44 |
"full_name": "Python Data Science Handbook (VanderPlas)",
|
| 45 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 46 |
}
|
| 47 |
|
| 48 |
|
|
@@ -173,6 +188,8 @@ def ingest_book(books_dir: Path, book_key: str, client: chromadb.ClientAPI, embe
|
|
| 173 |
|
| 174 |
total_chunks = 0
|
| 175 |
md_files = sorted(chapters_dir.glob("*.md"))
|
|
|
|
|
|
|
| 176 |
|
| 177 |
print(f"\n 📚 {config['full_name']}")
|
| 178 |
print(f" Archivos encontrados: {len(md_files)}")
|
|
|
|
| 43 |
"collection": "pdsh_chapters",
|
| 44 |
"full_name": "Python Data Science Handbook (VanderPlas)",
|
| 45 |
},
|
| 46 |
+
"r4ds": {
|
| 47 |
+
"dir_name": "capitulos_r4ds",
|
| 48 |
+
"collection": "r4ds_chapters",
|
| 49 |
+
"full_name": (
|
| 50 |
+
"R for Data Science, 2nd Edition "
|
| 51 |
+
"(Wickham, Çetinkaya-Rundel, Grolemund) — examples in R; "
|
| 52 |
+
"principles transfer to pandas/Python"
|
| 53 |
+
),
|
| 54 |
+
# ⚠️ R4DS está bajo licencia CC BY-NC-ND 3.0 US (NoDerivatives).
|
| 55 |
+
# `local_only=True` indica que esta colección NO debe redistribuirse
|
| 56 |
+
# como dataset HF público. `publish_chroma_dataset.py` la elimina del
|
| 57 |
+
# snapshot antes de subir al Hub. Para uso local (RAG_CHROMA_DIR) el
|
| 58 |
+
# comportamiento es transparente.
|
| 59 |
+
"local_only": True,
|
| 60 |
+
},
|
| 61 |
}
|
| 62 |
|
| 63 |
|
|
|
|
| 188 |
|
| 189 |
total_chunks = 0
|
| 190 |
md_files = sorted(chapters_dir.glob("*.md"))
|
| 191 |
+
# Excluir READMEs y notas auxiliares: solo capítulos NN_*.md
|
| 192 |
+
md_files = [f for f in md_files if not f.name.upper().startswith("README")]
|
| 193 |
|
| 194 |
print(f"\n 📚 {config['full_name']}")
|
| 195 |
print(f" Archivos encontrados: {len(md_files)}")
|
rag_books_mcp/tools.py
CHANGED
|
@@ -132,22 +132,30 @@ def search_theory(
|
|
| 132 |
top_k: int = 5,
|
| 133 |
) -> str:
|
| 134 |
"""
|
| 135 |
-
Busca fragmentos relevantes en los libros ESL, ISLP, FES y
|
| 136 |
|
| 137 |
Args:
|
| 138 |
query (str): Consulta en lenguaje natural (ej: "bias-variance tradeoff",
|
| 139 |
-
"regularización L1 vs L2", "random forest out-of-bag error"
|
| 140 |
-
|
| 141 |
-
|
|
|
|
| 142 |
top_k (int): Número de resultados a devolver (default: 5, máximo: 10).
|
| 143 |
|
| 144 |
Returns:
|
| 145 |
str: Fragmentos relevantes con metadata (libro, capítulo, sección, similitud).
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
"""
|
| 147 |
top_k = min(max(int(top_k), 1), 10)
|
| 148 |
|
| 149 |
# "both" se mantiene para retro-compatibilidad (ESL + ISLP)
|
| 150 |
-
# "all" incluye FES también
|
| 151 |
collections_to_search = []
|
| 152 |
if book in ("esl", "both", "all"):
|
| 153 |
try:
|
|
@@ -169,6 +177,11 @@ def search_theory(
|
|
| 169 |
collections_to_search.append(("PDSH", get_collection("pdsh_chapters")))
|
| 170 |
except Exception:
|
| 171 |
pass
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
if not collections_to_search:
|
| 174 |
return (
|
|
@@ -219,7 +232,7 @@ def get_section(
|
|
| 219 |
Recupera una sección específica de un libro por referencia exacta.
|
| 220 |
|
| 221 |
Args:
|
| 222 |
-
book (str): Libro a consultar. Opciones: "esl", "islp", "fes" o "
|
| 223 |
chapter (str): Nombre del capítulo (búsqueda parcial soportada).
|
| 224 |
section (str): Nombre de la sección dentro del capítulo (opcional).
|
| 225 |
max_chunks (int): Máximo de chunks a devolver (default: 5).
|
|
@@ -232,7 +245,7 @@ def get_section(
|
|
| 232 |
try:
|
| 233 |
collection = get_collection(collection_name)
|
| 234 |
except Exception:
|
| 235 |
-
return f"❌ Colección '{collection_name}' no encontrada. Opciones: esl, islp, fes, pdsh"
|
| 236 |
|
| 237 |
try:
|
| 238 |
if section:
|
|
@@ -292,17 +305,19 @@ def cite_foundation(
|
|
| 292 |
detail_level: str = "medium",
|
| 293 |
) -> str:
|
| 294 |
"""
|
| 295 |
-
Devuelve la fundamentación teórica para un tema citando los libros (ESL + ISLP + FES + PDSH).
|
| 296 |
|
| 297 |
Args:
|
| 298 |
topic (str): Tema a fundamentar (ej: "ridge regression", "bagging",
|
| 299 |
-
"feature engineering", "missing data imputation"
|
|
|
|
| 300 |
detail_level (str): "brief" (1-2), "medium" (3-4) o "deep" (6-8).
|
| 301 |
|
| 302 |
Returns:
|
| 303 |
str: Fundamentación teórica con citas, organizada de intuitivo (ISLP)
|
| 304 |
-
a riguroso (ESL), más prácticas de feature engineering (FES)
|
| 305 |
-
|
|
|
|
| 306 |
"""
|
| 307 |
top_k_map = {"brief": 2, "medium": 4, "deep": 8}
|
| 308 |
top_k = top_k_map.get(detail_level, 4)
|
|
@@ -375,7 +390,24 @@ def cite_foundation(
|
|
| 375 |
except Exception:
|
| 376 |
pass
|
| 377 |
|
| 378 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
return (
|
| 380 |
f"❌ No se encontró fundamentación para '{topic}'. "
|
| 381 |
"Verifica que la ingesta se haya ejecutado correctamente."
|
|
@@ -422,6 +454,20 @@ def cite_foundation(
|
|
| 422 |
f"{r['content'][:1200]}\n\n---\n"
|
| 423 |
)
|
| 424 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 425 |
output_parts.append("\n## 📚 Referencias\n")
|
| 426 |
if islp_results:
|
| 427 |
chapters = set(r["chapter"] for r in islp_results)
|
|
@@ -435,6 +481,11 @@ def cite_foundation(
|
|
| 435 |
if pdsh_results:
|
| 436 |
chapters = set(r["chapter"] for r in pdsh_results)
|
| 437 |
output_parts.append(f"- **PDSH:** {', '.join(chapters)}\n")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 438 |
|
| 439 |
return "\n".join(output_parts)
|
| 440 |
|
|
@@ -453,6 +504,7 @@ def list_available_topics() -> str:
|
|
| 453 |
("ISLP", "islp_chapters"),
|
| 454 |
("FES", "fes_chapters"),
|
| 455 |
("PDSH", "pdsh_chapters"),
|
|
|
|
| 456 |
]:
|
| 457 |
try:
|
| 458 |
collection = get_collection(collection_name)
|
|
|
|
| 132 |
top_k: int = 5,
|
| 133 |
) -> str:
|
| 134 |
"""
|
| 135 |
+
Busca fragmentos relevantes en los libros ESL, ISLP, FES, PDSH y R4DS usando búsqueda semántica.
|
| 136 |
|
| 137 |
Args:
|
| 138 |
query (str): Consulta en lenguaje natural (ej: "bias-variance tradeoff",
|
| 139 |
+
"regularización L1 vs L2", "random forest out-of-bag error",
|
| 140 |
+
"exploratory data analysis iterative cycle").
|
| 141 |
+
book (str): Libro donde buscar. Opciones: "esl", "islp", "fes", "pdsh", "r4ds",
|
| 142 |
+
"both" (ESL+ISLP, retro-compat) o "all" (los 5, default).
|
| 143 |
top_k (int): Número de resultados a devolver (default: 5, máximo: 10).
|
| 144 |
|
| 145 |
Returns:
|
| 146 |
str: Fragmentos relevantes con metadata (libro, capítulo, sección, similitud).
|
| 147 |
+
|
| 148 |
+
Nota:
|
| 149 |
+
R4DS está escrito en R (tidyverse). Sus principios de EDA, transformación
|
| 150 |
+
de datos y manipulación tabular son agnósticos del lenguaje y se traducen
|
| 151 |
+
directamente a pandas/Python; el código en R debe leerse como pseudocódigo.
|
| 152 |
+
En v2, R4DS solo está disponible cuando se usa con `RAG_CHROMA_DIR` local;
|
| 153 |
+
no se publica en el dataset HF por su licencia CC BY-NC-ND 3.0 US.
|
| 154 |
"""
|
| 155 |
top_k = min(max(int(top_k), 1), 10)
|
| 156 |
|
| 157 |
# "both" se mantiene para retro-compatibilidad (ESL + ISLP)
|
| 158 |
+
# "all" incluye FES, PDSH y R4DS también
|
| 159 |
collections_to_search = []
|
| 160 |
if book in ("esl", "both", "all"):
|
| 161 |
try:
|
|
|
|
| 177 |
collections_to_search.append(("PDSH", get_collection("pdsh_chapters")))
|
| 178 |
except Exception:
|
| 179 |
pass
|
| 180 |
+
if book in ("r4ds", "all"):
|
| 181 |
+
try:
|
| 182 |
+
collections_to_search.append(("R4DS", get_collection("r4ds_chapters")))
|
| 183 |
+
except Exception:
|
| 184 |
+
pass
|
| 185 |
|
| 186 |
if not collections_to_search:
|
| 187 |
return (
|
|
|
|
| 232 |
Recupera una sección específica de un libro por referencia exacta.
|
| 233 |
|
| 234 |
Args:
|
| 235 |
+
book (str): Libro a consultar. Opciones: "esl", "islp", "fes", "pdsh" o "r4ds".
|
| 236 |
chapter (str): Nombre del capítulo (búsqueda parcial soportada).
|
| 237 |
section (str): Nombre de la sección dentro del capítulo (opcional).
|
| 238 |
max_chunks (int): Máximo de chunks a devolver (default: 5).
|
|
|
|
| 245 |
try:
|
| 246 |
collection = get_collection(collection_name)
|
| 247 |
except Exception:
|
| 248 |
+
return f"❌ Colección '{collection_name}' no encontrada. Opciones: esl, islp, fes, pdsh, r4ds"
|
| 249 |
|
| 250 |
try:
|
| 251 |
if section:
|
|
|
|
| 305 |
detail_level: str = "medium",
|
| 306 |
) -> str:
|
| 307 |
"""
|
| 308 |
+
Devuelve la fundamentación teórica para un tema citando los libros (ESL + ISLP + FES + PDSH + R4DS).
|
| 309 |
|
| 310 |
Args:
|
| 311 |
topic (str): Tema a fundamentar (ej: "ridge regression", "bagging",
|
| 312 |
+
"feature engineering", "missing data imputation",
|
| 313 |
+
"exploratory data analysis").
|
| 314 |
detail_level (str): "brief" (1-2), "medium" (3-4) o "deep" (6-8).
|
| 315 |
|
| 316 |
Returns:
|
| 317 |
str: Fundamentación teórica con citas, organizada de intuitivo (ISLP)
|
| 318 |
+
a riguroso (ESL), más prácticas de feature engineering (FES),
|
| 319 |
+
código práctico Python (PDSH) y workflow iterativo de EDA / data
|
| 320 |
+
wrangling (R4DS, ejemplos en R).
|
| 321 |
"""
|
| 322 |
top_k_map = {"brief": 2, "medium": 4, "deep": 8}
|
| 323 |
top_k = top_k_map.get(detail_level, 4)
|
|
|
|
| 390 |
except Exception:
|
| 391 |
pass
|
| 392 |
|
| 393 |
+
r4ds_results = []
|
| 394 |
+
try:
|
| 395 |
+
r4ds_col = get_collection("r4ds_chapters")
|
| 396 |
+
res = r4ds_col.query(query_texts=[topic], n_results=top_k)
|
| 397 |
+
if res["documents"] and res["documents"][0]:
|
| 398 |
+
for doc, meta, dist in zip(
|
| 399 |
+
res["documents"][0], res["metadatas"][0], res["distances"][0]
|
| 400 |
+
):
|
| 401 |
+
r4ds_results.append({
|
| 402 |
+
"content": doc,
|
| 403 |
+
"chapter": meta.get("chapter", ""),
|
| 404 |
+
"section": meta.get("section", ""),
|
| 405 |
+
"similarity": 1 - dist,
|
| 406 |
+
})
|
| 407 |
+
except Exception:
|
| 408 |
+
pass
|
| 409 |
+
|
| 410 |
+
if not islp_results and not esl_results and not fes_results and not pdsh_results and not r4ds_results:
|
| 411 |
return (
|
| 412 |
f"❌ No se encontró fundamentación para '{topic}'. "
|
| 413 |
"Verifica que la ingesta se haya ejecutado correctamente."
|
|
|
|
| 454 |
f"{r['content'][:1200]}\n\n---\n"
|
| 455 |
)
|
| 456 |
|
| 457 |
+
if r4ds_results:
|
| 458 |
+
output_parts.append(
|
| 459 |
+
"\n## 📕 R4DS (EDA & Data Wrangling — ejemplos en R, principios universales)\n"
|
| 460 |
+
"> ⚠️ El código está en R con tidyverse. Léelo como pseudocódigo: el flujo, "
|
| 461 |
+
"las heurísticas y la filosofía iterativa de EDA se traducen directamente a "
|
| 462 |
+
"pandas/Python.\n"
|
| 463 |
+
)
|
| 464 |
+
for i, r in enumerate(r4ds_results, 1):
|
| 465 |
+
output_parts.append(
|
| 466 |
+
f"### [{i}] Cap. {r['chapter']} § {r['section']} "
|
| 467 |
+
f"(sim: {r['similarity']:.3f})\n\n"
|
| 468 |
+
f"{r['content'][:1200]}\n\n---\n"
|
| 469 |
+
)
|
| 470 |
+
|
| 471 |
output_parts.append("\n## 📚 Referencias\n")
|
| 472 |
if islp_results:
|
| 473 |
chapters = set(r["chapter"] for r in islp_results)
|
|
|
|
| 481 |
if pdsh_results:
|
| 482 |
chapters = set(r["chapter"] for r in pdsh_results)
|
| 483 |
output_parts.append(f"- **PDSH:** {', '.join(chapters)}\n")
|
| 484 |
+
if r4ds_results:
|
| 485 |
+
chapters = set(r["chapter"] for r in r4ds_results)
|
| 486 |
+
output_parts.append(
|
| 487 |
+
f"- **R4DS:** {', '.join(chapters)} _(R / tidyverse — principios transferibles a pandas)_\n"
|
| 488 |
+
)
|
| 489 |
|
| 490 |
return "\n".join(output_parts)
|
| 491 |
|
|
|
|
| 504 |
("ISLP", "islp_chapters"),
|
| 505 |
("FES", "fes_chapters"),
|
| 506 |
("PDSH", "pdsh_chapters"),
|
| 507 |
+
("R4DS", "r4ds_chapters"),
|
| 508 |
]:
|
| 509 |
try:
|
| 510 |
collection = get_collection(collection_name)
|