Spaces:
Sleeping
Sleeping
Claude
fix(web): durcir le parsing XML (defusedxml en dépendance dure) + exceptions précises
de46be0 unverified | [build-system] | |
| requires = ["setuptools>=68.0", "wheel"] | |
| build-backend = "setuptools.build_meta" | |
| [project] | |
| name = "picarones" | |
| version = "1.0.0" | |
| description = "Plateforme de comparaison de moteurs OCR/HTR pour documents patrimoniaux" | |
| readme = "README.md" | |
| requires-python = ">=3.11" | |
| license = { text = "Apache-2.0" } | |
| authors = [{ name = "maribakulj" }] | |
| keywords = ["ocr", "htr", "patrimoine", "benchmark", "cer", "wer", "gallica", "escriptorium", "iiif"] | |
| classifiers = [ | |
| "Development Status :: 4 - Beta", | |
| "Programming Language :: Python :: 3.11", | |
| "Programming Language :: Python :: 3.12", | |
| "License :: OSI Approved :: Apache Software License", | |
| "Operating System :: OS Independent", | |
| "Topic :: Scientific/Engineering :: Artificial Intelligence", | |
| "Topic :: Text Processing :: Linguistic", | |
| "Intended Audience :: Science/Research", | |
| "Natural Language :: French", | |
| ] | |
| dependencies = [ | |
| "click>=8.1.0", | |
| "jiwer>=3.0.0", | |
| "Pillow>=10.0.0", | |
| "pyyaml>=6.0.0", | |
| "pytesseract>=0.3.10", | |
| "tqdm>=4.66.0", | |
| "numpy>=1.24.0", | |
| "jinja2>=3.1.0", | |
| # XML parsing sécurisé contre les attaques XXE / Billion Laughs. | |
| # Utilisé par ``picarones.web.corpus_utils`` pour le parsing ALTO/PAGE | |
| # quand un utilisateur uploade un corpus XML. | |
| "defusedxml>=0.7.1", | |
| ] | |
| [project.urls] | |
| Homepage = "https://github.com/maribakulj/Picarones" | |
| Documentation = "https://github.com/maribakulj/Picarones/blob/main/INSTALL.md" | |
| Repository = "https://github.com/maribakulj/Picarones" | |
| Changelog = "https://github.com/maribakulj/Picarones/blob/main/CHANGELOG.md" | |
| "Bug Tracker" = "https://github.com/maribakulj/Picarones/issues" | |
| [project.optional-dependencies] | |
| # Développement et tests | |
| dev = ["pytest>=7.4.0", "pytest-cov>=4.1.0", "httpx>=0.27.0", "fastapi>=0.111.0", "uvicorn[standard]>=0.29.0", "python-multipart>=0.0.9"] | |
| # Interface web FastAPI | |
| web = ["fastapi>=0.111.0", "uvicorn[standard]>=0.29.0", "httpx>=0.27.0", "python-multipart>=0.0.9"] | |
| # Tests statistiques avancés (Wilcoxon exact, Friedman chi² exact, Nemenyi) | |
| # Sinon fallback pur Python (approximations normale / Wilson-Hilferty). | |
| stats = ["scipy>=1.11.0"] | |
| # Extracteurs d'entités nommées (Sprint 40 — A.II.1.a du plan d'évolution). | |
| # Sans cet extra, picarones.core.ner_backends.SpacyEntityExtractor tombe | |
| # en mode dégradé silencieux et le runner saute le calcul NER. | |
| ner = ["spacy>=3.7.0"] | |
| # Import HuggingFace Datasets | |
| hf = ["datasets>=2.19.0"] | |
| # Moteurs OCR optionnels | |
| pero = ["pero-ocr>=0.1.0"] | |
| kraken = ["kraken>=4.0.0"] | |
| # Adaptateurs LLM | |
| llm = [ | |
| "openai>=1.0.0", | |
| "anthropic>=0.20.0", | |
| "mistralai>=1.0.0", | |
| ] | |
| # OCR cloud APIs | |
| ocr-cloud = [ | |
| "google-cloud-vision>=3.0.0", | |
| "boto3>=1.34.0", | |
| "azure-ai-formrecognizer>=3.3.0", | |
| ] | |
| # Métriques philologiques pour documents historiques (Cercle 3, phase B | |
| # du chantier de refonte post-Sprint 97). Aujourd'hui les modules | |
| # philologiques (`picarones.extras.historical.*`) sont livrés dans le | |
| # package principal sans dépendance externe — l'extra ``[historical]`` | |
| # n'ajoute donc aucun paquet à installer. Il est déclaré ici pour | |
| # **documenter l'intention** : un usage purement moderne (sans cas | |
| # d'usage patrimonial) peut ignorer le sous-package extras/historical/ | |
| # entièrement, et un futur split en package PyPI séparé | |
| # ``picarones-historical`` réutilisera ce nom d'extra. | |
| historical = [] | |
| # Importeurs de corpus depuis sources distantes (Cercle 3, phase C). | |
| # Les 6 importeurs (sous extras/importers/, dotted | |
| # ``picarones.extras.importers.*``) sont livrés dans le package | |
| # principal. ``[importers]`` documente l'intention de séparation | |
| # future en package PyPI ``picarones-importers``. Les modules | |
| # ``huggingface`` et ``escriptorium`` émettent un ``UserWarning`` à | |
| # l'import (statut expérimental). | |
| importers = [] | |
| # Installation complète (tous les extras sauf les OCR cloud) | |
| all = [ | |
| "picarones[web,hf,llm,dev,historical,importers]", | |
| ] | |
| [project.scripts] | |
| picarones = "picarones.cli:cli" | |
| [tool.setuptools.packages.find] | |
| where = ["."] | |
| include = ["picarones*"] | |
| [tool.setuptools.package-data] | |
| picarones = [ | |
| "prompts/*.txt", | |
| "web/static/*.css", | |
| "web/static/*.js", | |
| "web/templates/*.j2", | |
| "web/templates/*.html", | |
| "report/templates/*.j2", | |
| "report/templates/*.html", | |
| "report/templates/*.css", | |
| "report/templates/*.js", | |
| "report/i18n/*.json", | |
| "core/narrative/templates/*.yaml", | |
| "data/*.yaml", | |
| "report/glossary/*.yaml", | |
| ] | |
| [tool.pytest.ini_options] | |
| testpaths = ["tests"] | |
| addopts = "-v --tb=short" | |
| [tool.ruff] | |
| # Configuration centralisée pour que `ruff check`, `make lint` et le job CI | |
| # produisent exactement les mêmes résultats sans flags en ligne de commande. | |
| line-length = 100 | |
| target-version = "py311" | |
| [tool.ruff.lint] | |
| # E/W = pycodestyle, F = pyflakes. On conserve les mêmes règles que le CI | |
| # d'origine (avant Sprint 22), qui excluait les lignes longues (E501) et les | |
| # imports non-top (E402, parfois utiles pour imports conditionnels). | |
| select = ["E", "W", "F"] | |
| ignore = ["E501", "E402"] | |