Spaces:
Running
Running
Claude
refactor(web): extraire les 5 routers simples (system, normalization, reports, importers, home)
f14102c unverified | """Router des importers de corpus distants : HTR-United et HuggingFace.""" | |
| from __future__ import annotations | |
| from fastapi import APIRouter, HTTPException, Query | |
| from picarones.web.models import HTRUnitedImportRequest, HuggingFaceImportRequest | |
| router = APIRouter() | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HTR-United | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def api_htr_united_catalogue( | |
| query: str = Query(default="", description="Recherche textuelle"), | |
| language: str = Query(default="", description="Filtre langue"), | |
| script: str = Query(default="", description="Filtre type d'Γ©criture"), | |
| ) -> dict: | |
| """Catalogue HTR-United filtrable.""" | |
| from picarones.extras.importers.htr_united import HTRUnitedCatalogue | |
| cat = HTRUnitedCatalogue.from_demo() | |
| results = cat.search( | |
| query=query, | |
| language=language or None, | |
| script=script or None, | |
| ) | |
| return { | |
| "source": cat.source, | |
| "total": len(results), | |
| "entries": [e.as_dict() for e in results], | |
| "available_languages": cat.available_languages(), | |
| "available_scripts": cat.available_scripts(), | |
| } | |
| async def api_htr_united_import(req: HTRUnitedImportRequest) -> dict: | |
| """Importe une entrΓ©e HTR-United dans ``req.output_dir``.""" | |
| from picarones.extras.importers.htr_united import ( | |
| HTRUnitedCatalogue, | |
| import_htr_united_corpus, | |
| ) | |
| cat = HTRUnitedCatalogue.from_demo() | |
| entry = cat.get_by_id(req.entry_id) | |
| if not entry: | |
| raise HTTPException( | |
| status_code=404, detail=f"EntrΓ©e non trouvΓ©e : {req.entry_id}", | |
| ) | |
| return import_htr_united_corpus( | |
| entry=entry, | |
| output_dir=req.output_dir, | |
| max_samples=req.max_samples, | |
| ) | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| # HuggingFace Datasets | |
| # ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ | |
| async def api_huggingface_search( | |
| query: str = Query(default="", description="RequΓͺte de recherche"), | |
| language: str = Query(default="", description="Filtre langue"), | |
| tags: str = Query(default="", description="Tags sΓ©parΓ©s par des virgules"), | |
| limit: int = Query(default=20, ge=1, le=50), | |
| ) -> dict: | |
| """Recherche de datasets sur HuggingFace Hub.""" | |
| from picarones.extras.importers.huggingface import HuggingFaceImporter | |
| tag_list = [t.strip() for t in tags.split(",") if t.strip()] if tags else None | |
| importer = HuggingFaceImporter() | |
| results = importer.search( | |
| query=query, | |
| tags=tag_list, | |
| language=language or None, | |
| limit=limit, | |
| ) | |
| return { | |
| "total": len(results), | |
| "datasets": [ds.as_dict() for ds in results], | |
| } | |
| async def api_huggingface_import(req: HuggingFaceImportRequest) -> dict: | |
| """Importe un dataset HuggingFace dans ``req.output_dir``.""" | |
| from picarones.extras.importers.huggingface import HuggingFaceImporter | |
| importer = HuggingFaceImporter() | |
| return importer.import_dataset( | |
| dataset_id=req.dataset_id, | |
| output_dir=req.output_dir, | |
| split=req.split, | |
| max_samples=req.max_samples, | |
| ) | |