File size: 3,735 Bytes
49cc409
 
 
 
 
 
bff1348
 
49cc409
 
 
cecde1f
bff1348
49cc409
b283975
49cc409
 
 
bff1348
49cc409
bff1348
 
 
49cc409
 
 
 
 
 
 
 
 
a0dc23e
49cc409
 
bff1348
cecde1f
 
 
 
 
bff1348
49cc409
bff1348
e224609
bff1348
7afb12e
35001ff
 
 
d733b48
 
 
 
bff1348
b4cc1c5
bff1348
 
 
 
 
 
 
001e605
bff1348
 
 
 
 
 
 
 
 
 
 
49cc409
 
 
 
 
 
 
 
8c509eb
a0dc23e
 
 
b914841
 
 
a0dc23e
 
 
 
 
d28d854
b6bdecc
76e79a0
a0dc23e
8c509eb
49cc409
 
 
a76711b
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
[build-system]
requires = ["setuptools>=68.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "picarones"
version = "1.0.0"
description = "Plateforme de comparaison de moteurs OCR/HTR pour documents patrimoniaux"
readme = "README.md"
requires-python = ">=3.11"
license = { text = "Apache-2.0" }
authors = [{ name = "maribakulj" }]
keywords = ["ocr", "htr", "patrimoine", "benchmark", "cer", "wer", "gallica", "escriptorium", "iiif"]
classifiers = [
    "Development Status :: 4 - Beta",
    "Programming Language :: Python :: 3.11",
    "Programming Language :: Python :: 3.12",
    "License :: OSI Approved :: Apache Software License",
    "Operating System :: OS Independent",
    "Topic :: Scientific/Engineering :: Artificial Intelligence",
    "Topic :: Text Processing :: Linguistic",
    "Intended Audience :: Science/Research",
    "Natural Language :: French",
]
dependencies = [
    "click>=8.1.0",
    "jiwer>=3.0.0",
    "Pillow>=10.0.0",
    "pyyaml>=6.0.0",
    "pytesseract>=0.3.10",
    "tqdm>=4.66.0",
    "numpy>=1.24.0",
    "jinja2>=3.1.0",
]

[project.urls]
Homepage = "https://github.com/maribakulj/Picarones"
Documentation = "https://github.com/maribakulj/Picarones/blob/main/INSTALL.md"
Repository = "https://github.com/maribakulj/Picarones"
Changelog = "https://github.com/maribakulj/Picarones/blob/main/CHANGELOG.md"
"Bug Tracker" = "https://github.com/maribakulj/Picarones/issues"

[project.optional-dependencies]
# Développement et tests
dev = ["pytest>=7.4.0", "pytest-cov>=4.1.0", "httpx>=0.27.0", "fastapi>=0.111.0", "uvicorn[standard]>=0.29.0", "python-multipart>=0.0.9"]
# Interface web FastAPI
web = ["fastapi>=0.111.0", "uvicorn[standard]>=0.29.0", "httpx>=0.27.0", "python-multipart>=0.0.9"]
# Tests statistiques avancés (Wilcoxon exact, Friedman chi² exact, Nemenyi)
# Sinon fallback pur Python (approximations normale / Wilson-Hilferty).
stats = ["scipy>=1.11.0"]
# Extracteurs d'entités nommées (Sprint 40 — A.II.1.a du plan d'évolution).
# Sans cet extra, picarones.core.ner_backends.SpacyEntityExtractor tombe
# en mode dégradé silencieux et le runner saute le calcul NER.
ner = ["spacy>=3.7.0"]
# Import HuggingFace Datasets
hf = ["datasets>=2.19.0"]
# Moteurs OCR optionnels
pero = ["pero-ocr>=0.1.0"]
kraken = ["kraken>=4.0.0"]
# Adaptateurs LLM
llm = [
    "openai>=1.0.0",
    "anthropic>=0.20.0",
    "mistralai>=1.0.0",
]
# OCR cloud APIs
ocr-cloud = [
    "google-cloud-vision>=3.0.0",
    "boto3>=1.34.0",
    "azure-ai-formrecognizer>=3.3.0",
]
# Installation complète (tous les extras sauf les OCR cloud)
all = [
    "picarones[web,hf,llm,dev]",
]

[project.scripts]
picarones = "picarones.cli:cli"

[tool.setuptools.packages.find]
where = ["."]
include = ["picarones*"]

[tool.setuptools.package-data]
picarones = [
    "prompts/*.txt",
    "web/static/*.css",
    "web/static/*.js",
    "web/templates/*.j2",
    "web/templates/*.html",
    "report/templates/*.j2",
    "report/templates/*.html",
    "report/templates/*.css",
    "report/templates/*.js",
    "report/i18n/*.json",
    "core/narrative/templates/*.yaml",
    "data/*.yaml",
    "report/glossary/*.yaml",
]

[tool.pytest.ini_options]
testpaths = ["tests"]
addopts = "-v --tb=short"

[tool.ruff]
# Configuration centralisée pour que `ruff check`, `make lint` et le job CI
# produisent exactement les mêmes résultats sans flags en ligne de commande.
line-length = 100
target-version = "py311"

[tool.ruff.lint]
# E/W = pycodestyle, F = pyflakes. On conserve les mêmes règles que le CI
# d'origine (avant Sprint 22), qui excluait les lignes longues (E501) et les
# imports non-top (E402, parfois utiles pour imports conditionnels).
select = ["E", "W", "F"]
ignore = ["E501", "E402"]