Spaces:
Running
refactor(app): Phase A — extrait les 4 builders @staticmethod (run_orchestrator)
Browse filesSuite à la question « pas <500 ? » : correction de mon erreur
d'analyse (j'avais classé ces staticmethods en « risqué » à tort —
sans self, extraction sûre, pattern verbatim prouvé 7×).
_load_corpus / _build_pipelines / _build_views / _build_benchmark
_service → run_orchestrator_helpers/builders.py. _build_views et
_build_benchmark_service : retrait complet (call-sites → nom
module-global). _build_pipelines + _load_corpus : wrapper mince
@staticmethod conservé (tests les appellent via orch.<m>()).
Couplage white-box rattrapé (la suite l'a attrapé, 3 échecs) :
- test_run_orchestrator_feature_parity : orch._load_corpus → wrapper
- test_sprint_a14_s53 : patch resolve_adapter_class retargeté vers
run_orchestrator_helpers.builders (le symbole a migré ; le test
white-box suit, ce n'est pas masquer un bug).
run_orchestrator.py : 1316 → 731 (-44 % cumulé). Budget CI
RATCHETÉ VERS LE BAS 1050 → 775 (731+6 %, pas +15 % : signale
l'intention <500, n'entérine pas la taille). 413 app/CLI/archi
verts, lint propre (ratchet ISC/FLY/G inclus).
https://claude.ai/code/session_01EmLiMPJJuB44QHEFzDWUvF
- picarones/app/services/run_orchestrator.py +25 -196
- picarones/app/services/run_orchestrator_helpers/__init__.py +12 -0
- picarones/app/services/run_orchestrator_helpers/builders.py +234 -0
- tests/app/services/test_sprint_a14_s53_inputs_from_propagation.py +2 -2
- tests/architecture/test_file_budgets.py +6 -3
|
@@ -36,10 +36,8 @@ Anti-sur-ingénierie
|
|
| 36 |
|
| 37 |
from __future__ import annotations
|
| 38 |
|
| 39 |
-
import io
|
| 40 |
import logging
|
| 41 |
import threading
|
| 42 |
-
import zipfile
|
| 43 |
from dataclasses import dataclass, field
|
| 44 |
from pathlib import Path
|
| 45 |
from typing import Any, Callable
|
|
@@ -47,30 +45,16 @@ from typing import Any, Callable
|
|
| 47 |
logger = logging.getLogger(__name__)
|
| 48 |
|
| 49 |
from picarones.app.results import ReportRenderer, RunResult
|
| 50 |
-
from picarones.app.schemas import RunSpec
|
| 51 |
-
from picarones.app.services.benchmark_service import BenchmarkService
|
| 52 |
from picarones.app.services.dependencies import (
|
| 53 |
capture_dependencies_lock,
|
| 54 |
capture_system_binaries_lock,
|
| 55 |
)
|
| 56 |
-
from picarones.app.services.corpus_service import (
|
| 57 |
-
CorpusImportError,
|
| 58 |
-
CorpusService,
|
| 59 |
-
)
|
| 60 |
from picarones.app.services.path_security import WorkspaceManager
|
| 61 |
from picarones.app.services.registry_service import RegistryService
|
| 62 |
from picarones.domain.corpus import CorpusSpec
|
| 63 |
-
from picarones.evaluation.views import (
|
| 64 |
-
DefaultEvaluationViewExecutor,
|
| 65 |
-
build_alto_view,
|
| 66 |
-
build_search_view,
|
| 67 |
-
build_text_view,
|
| 68 |
-
)
|
| 69 |
from picarones.pipeline import (
|
| 70 |
-
CorpusRunner,
|
| 71 |
-
PipelineExecutor,
|
| 72 |
PipelineSpec,
|
| 73 |
-
PipelineStep,
|
| 74 |
)
|
| 75 |
|
| 76 |
# Helpers stateless extraits (audit prod P1 — dégonflage god-module).
|
|
@@ -80,10 +64,14 @@ from picarones.pipeline import (
|
|
| 80 |
# (donc ``monkeypatch.setattr(run_orchestrator, …)`` fonctionne aussi).
|
| 81 |
from picarones.app.services.run_orchestrator_helpers import (
|
| 82 |
_PipelineEngineProxy as _PipelineEngineProxy,
|
|
|
|
|
|
|
|
|
|
| 83 |
_default_gt_factory as _default_gt_factory,
|
| 84 |
_default_inputs_factory as _default_inputs_factory,
|
| 85 |
_filesystem_payload_loader as _filesystem_payload_loader,
|
| 86 |
_kwargs_signature as _kwargs_signature,
|
|
|
|
| 87 |
_make_context_factory as _make_context_factory,
|
| 88 |
_persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
|
| 89 |
_resolve_entity_extractor as _resolve_entity_extractor,
|
|
@@ -203,7 +191,7 @@ class RunOrchestrator:
|
|
| 203 |
workspace = WorkspaceManager(self._output_dir)
|
| 204 |
|
| 205 |
# 1. Corpus.
|
| 206 |
-
corpus_spec, extracted_dir =
|
| 207 |
|
| 208 |
# 2. Registres.
|
| 209 |
registries = RegistryService.bootstrap_defaults()
|
|
@@ -215,14 +203,14 @@ class RunOrchestrator:
|
|
| 215 |
|
| 216 |
# 4. Vues canoniques. Phase B2.5 — propage normalization +
|
| 217 |
# char_exclude aux vues text_final/searchability.
|
| 218 |
-
views =
|
| 219 |
spec.views,
|
| 220 |
normalization_profile=spec.normalization_profile,
|
| 221 |
char_exclude=spec.char_exclude,
|
| 222 |
)
|
| 223 |
|
| 224 |
# 5. BenchmarkService.
|
| 225 |
-
bench =
|
| 226 |
registries=registries,
|
| 227 |
adapter_resolver=adapter_resolver,
|
| 228 |
code_version=spec.code_version,
|
|
@@ -379,12 +367,12 @@ class RunOrchestrator:
|
|
| 379 |
self._output_dir.mkdir(parents=True, exist_ok=True)
|
| 380 |
|
| 381 |
registries = RegistryService.bootstrap_defaults()
|
| 382 |
-
views =
|
| 383 |
spec.views,
|
| 384 |
normalization_profile=spec.normalization_profile,
|
| 385 |
char_exclude=spec.char_exclude,
|
| 386 |
)
|
| 387 |
-
bench =
|
| 388 |
registries=registries,
|
| 389 |
adapter_resolver=adapter_resolver,
|
| 390 |
code_version=spec.code_version,
|
|
@@ -472,112 +460,25 @@ class RunOrchestrator:
|
|
| 472 |
def _load_corpus(
|
| 473 |
spec: RunSpec, workspace: WorkspaceManager,
|
| 474 |
) -> tuple[CorpusSpec, Path]:
|
| 475 |
-
"""
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
zip_bytes,
|
| 482 |
-
corpus_name=spec.corpus_name or zip_path.stem,
|
| 483 |
-
metadata=spec.corpus_metadata,
|
| 484 |
-
)
|
| 485 |
-
return report.spec, report.extracted_dir
|
| 486 |
-
|
| 487 |
-
# corpus_dir : on zippe à la volée le contenu du dir et on
|
| 488 |
-
# délègue à ``CorpusService`` — réutilise toute la détection
|
| 489 |
-
# sans dupliquer la logique de classification image / GT.
|
| 490 |
-
assert spec.corpus_dir is not None # garanti par RunSpec validator
|
| 491 |
-
src_dir = Path(spec.corpus_dir)
|
| 492 |
-
if not src_dir.is_dir():
|
| 493 |
-
raise CorpusImportError(
|
| 494 |
-
f"corpus_dir n'est pas un répertoire : {src_dir!r}.",
|
| 495 |
-
)
|
| 496 |
-
buf = io.BytesIO()
|
| 497 |
-
with zipfile.ZipFile(buf, mode="w") as zf:
|
| 498 |
-
for file_path in src_dir.rglob("*"):
|
| 499 |
-
if file_path.is_file():
|
| 500 |
-
arc = file_path.relative_to(src_dir).as_posix()
|
| 501 |
-
zf.write(file_path, arcname=arc)
|
| 502 |
-
report = corpus_service.import_zip(
|
| 503 |
-
buf.getvalue(),
|
| 504 |
-
corpus_name=spec.corpus_name or src_dir.name,
|
| 505 |
-
metadata=spec.corpus_metadata,
|
| 506 |
-
)
|
| 507 |
-
return report.spec, report.extracted_dir
|
| 508 |
|
| 509 |
@staticmethod
|
| 510 |
def _build_pipelines(
|
| 511 |
spec: RunSpec,
|
| 512 |
-
) -> tuple[
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
|
| 519 |
-
|
| 520 |
-
|
| 521 |
-
- Deux steps avec la même ``(class, kwargs)`` partagent la
|
| 522 |
-
même instance d'adapter (cache).
|
| 523 |
-
- Deux steps avec la même ``id`` mais une ``class`` ou des
|
| 524 |
-
``kwargs`` différents reçoivent des ``adapter_name``
|
| 525 |
-
distincts (préfixés par le nom de pipeline).
|
| 526 |
-
|
| 527 |
-
C'est essentiel pour le cas où plusieurs pipelines utilisent
|
| 528 |
-
la **même classe** avec des **kwargs différents** (ex :
|
| 529 |
-
``PrecomputedTextAdapter`` instancié N fois avec
|
| 530 |
-
``source_label`` distincts).
|
| 531 |
-
"""
|
| 532 |
-
instance_cache: dict[str, Any] = {}
|
| 533 |
-
registered: dict[str, tuple[type, str]] = {}
|
| 534 |
-
name_to_class: dict[str, type] = {}
|
| 535 |
-
name_to_kwargs: dict[str, dict[str, Any]] = {}
|
| 536 |
-
|
| 537 |
-
pipeline_specs: list[PipelineSpec] = []
|
| 538 |
-
for p in spec.pipelines:
|
| 539 |
-
steps: list[PipelineStep] = []
|
| 540 |
-
for s in p.steps:
|
| 541 |
-
cls = resolve_adapter_class(s.adapter_class)
|
| 542 |
-
kwargs_sig = _kwargs_signature(s.adapter_kwargs)
|
| 543 |
-
adapter_name = s.id
|
| 544 |
-
existing = registered.get(adapter_name)
|
| 545 |
-
if existing is not None and existing != (cls, kwargs_sig):
|
| 546 |
-
adapter_name = f"{p.name}__{s.id}"
|
| 547 |
-
registered[adapter_name] = (cls, kwargs_sig)
|
| 548 |
-
name_to_class[adapter_name] = cls
|
| 549 |
-
name_to_kwargs[adapter_name] = s.adapter_kwargs
|
| 550 |
-
# ``inputs_from`` du StepSpec YAML doit être propagé au
|
| 551 |
-
# ``domain.PipelineSpec`` pour que le DAG branchant soit
|
| 552 |
-
# honoré ; sans ce passage, un DAG branchant déclaré dans
|
| 553 |
-
# le YAML serait silencieusement exécuté en linéaire.
|
| 554 |
-
steps.append(PipelineStep(
|
| 555 |
-
id=s.id,
|
| 556 |
-
kind="step",
|
| 557 |
-
adapter_name=adapter_name,
|
| 558 |
-
input_types=s.input_types,
|
| 559 |
-
output_types=s.output_types,
|
| 560 |
-
inputs_from=dict(s.inputs_from),
|
| 561 |
-
))
|
| 562 |
-
pipeline_specs.append(PipelineSpec(
|
| 563 |
-
name=p.name,
|
| 564 |
-
initial_inputs=p.initial_inputs,
|
| 565 |
-
steps=tuple(steps),
|
| 566 |
-
))
|
| 567 |
-
|
| 568 |
-
def resolver(name: str) -> Any:
|
| 569 |
-
if name not in instance_cache:
|
| 570 |
-
cls = name_to_class[name]
|
| 571 |
-
kwargs = name_to_kwargs[name]
|
| 572 |
-
instance_cache[name] = cls(**kwargs)
|
| 573 |
-
return instance_cache[name]
|
| 574 |
-
|
| 575 |
-
# Copie défensive — le manifest doit recevoir un snapshot
|
| 576 |
-
# immuable, pas la map vivante du resolver.
|
| 577 |
-
adapter_kwargs_dump = {
|
| 578 |
-
name: dict(kwargs) for name, kwargs in name_to_kwargs.items()
|
| 579 |
-
}
|
| 580 |
-
return pipeline_specs, resolver, adapter_kwargs_dump
|
| 581 |
|
| 582 |
def _execute_with_partial(
|
| 583 |
self,
|
|
@@ -628,7 +529,6 @@ class RunOrchestrator:
|
|
| 628 |
load_partial_pipeline_results,
|
| 629 |
partial_path_for_pipeline,
|
| 630 |
)
|
| 631 |
-
from picarones.domain.corpus import CorpusSpec
|
| 632 |
from picarones.domain.run_manifest import RunManifest
|
| 633 |
from picarones.pipeline.run_result import RunDocumentResult
|
| 634 |
|
|
@@ -835,77 +735,6 @@ class RunOrchestrator:
|
|
| 835 |
document_results=tuple(final_doc_results),
|
| 836 |
)
|
| 837 |
|
| 838 |
-
@staticmethod
|
| 839 |
-
def _build_views(
|
| 840 |
-
view_names: tuple[str, ...],
|
| 841 |
-
*,
|
| 842 |
-
normalization_profile: str | None = None,
|
| 843 |
-
char_exclude: str | None = None,
|
| 844 |
-
) -> list[Any]:
|
| 845 |
-
"""Map noms canoniques → vues construites.
|
| 846 |
-
|
| 847 |
-
Phase B2.5 — ``normalization_profile`` et ``char_exclude``
|
| 848 |
-
sont propagés aux vues qui les supportent (``text_final`` et
|
| 849 |
-
``searchability``). ``alto_documentary`` les ignore : ses
|
| 850 |
-
métriques structurelles n'opèrent pas sur du texte.
|
| 851 |
-
"""
|
| 852 |
-
text_view_kwargs = {
|
| 853 |
-
"normalization_profile": normalization_profile,
|
| 854 |
-
"char_exclude": char_exclude,
|
| 855 |
-
}
|
| 856 |
-
builders: dict[str, Callable[[], Any]] = {
|
| 857 |
-
"text_final": lambda: build_text_view(**text_view_kwargs),
|
| 858 |
-
"alto_documentary": build_alto_view,
|
| 859 |
-
"searchability": lambda: build_search_view(**text_view_kwargs),
|
| 860 |
-
}
|
| 861 |
-
return [builders[name]() for name in view_names]
|
| 862 |
-
|
| 863 |
-
@staticmethod
|
| 864 |
-
def _build_benchmark_service(
|
| 865 |
-
*,
|
| 866 |
-
registries: RegistryService,
|
| 867 |
-
adapter_resolver: Callable[[str], Any],
|
| 868 |
-
code_version: str,
|
| 869 |
-
cancel_event: threading.Event | None = None,
|
| 870 |
-
timeout_seconds_per_doc: float = 300.0,
|
| 871 |
-
) -> BenchmarkService:
|
| 872 |
-
"""Assemble ``BenchmarkService`` avec un loader filesystem.
|
| 873 |
-
|
| 874 |
-
Phase B2.2 — quand ``cancel_event`` est fourni, le
|
| 875 |
-
``CorpusRunner.run`` est wrappé pour injecter l'event dans
|
| 876 |
-
chaque appel. Pattern strictement copié de
|
| 877 |
-
``_benchmark_execution.py:142-149`` (legacy).
|
| 878 |
-
"""
|
| 879 |
-
pipeline_executor = PipelineExecutor(
|
| 880 |
-
adapter_resolver=adapter_resolver,
|
| 881 |
-
)
|
| 882 |
-
corpus_runner = CorpusRunner(
|
| 883 |
-
pipeline_executor,
|
| 884 |
-
max_in_flight=2,
|
| 885 |
-
timeout_seconds_per_doc=timeout_seconds_per_doc,
|
| 886 |
-
poll_interval_seconds=0.05,
|
| 887 |
-
)
|
| 888 |
-
|
| 889 |
-
if cancel_event is not None:
|
| 890 |
-
original_run = corpus_runner.run
|
| 891 |
-
|
| 892 |
-
def _runner_run_with_cancel(*args: Any, **kwargs: Any) -> Any:
|
| 893 |
-
kwargs.setdefault("cancel_event", cancel_event)
|
| 894 |
-
return original_run(*args, **kwargs)
|
| 895 |
-
|
| 896 |
-
corpus_runner.run = _runner_run_with_cancel # type: ignore[method-assign]
|
| 897 |
-
|
| 898 |
-
view_executor = DefaultEvaluationViewExecutor.from_registries(
|
| 899 |
-
registries.metrics,
|
| 900 |
-
registries.projectors,
|
| 901 |
-
_filesystem_payload_loader,
|
| 902 |
-
)
|
| 903 |
-
return BenchmarkService(
|
| 904 |
-
corpus_runner=corpus_runner,
|
| 905 |
-
view_executor=view_executor,
|
| 906 |
-
code_version=code_version,
|
| 907 |
-
)
|
| 908 |
-
|
| 909 |
|
| 910 |
__all__ = [
|
| 911 |
"OrchestrationResult",
|
|
|
|
| 36 |
|
| 37 |
from __future__ import annotations
|
| 38 |
|
|
|
|
| 39 |
import logging
|
| 40 |
import threading
|
|
|
|
| 41 |
from dataclasses import dataclass, field
|
| 42 |
from pathlib import Path
|
| 43 |
from typing import Any, Callable
|
|
|
|
| 45 |
logger = logging.getLogger(__name__)
|
| 46 |
|
| 47 |
from picarones.app.results import ReportRenderer, RunResult
|
| 48 |
+
from picarones.app.schemas import RunSpec
|
|
|
|
| 49 |
from picarones.app.services.dependencies import (
|
| 50 |
capture_dependencies_lock,
|
| 51 |
capture_system_binaries_lock,
|
| 52 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
from picarones.app.services.path_security import WorkspaceManager
|
| 54 |
from picarones.app.services.registry_service import RegistryService
|
| 55 |
from picarones.domain.corpus import CorpusSpec
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
from picarones.pipeline import (
|
|
|
|
|
|
|
| 57 |
PipelineSpec,
|
|
|
|
| 58 |
)
|
| 59 |
|
| 60 |
# Helpers stateless extraits (audit prod P1 — dégonflage god-module).
|
|
|
|
| 64 |
# (donc ``monkeypatch.setattr(run_orchestrator, …)`` fonctionne aussi).
|
| 65 |
from picarones.app.services.run_orchestrator_helpers import (
|
| 66 |
_PipelineEngineProxy as _PipelineEngineProxy,
|
| 67 |
+
_build_benchmark_service as _build_benchmark_service,
|
| 68 |
+
_build_pipelines as _build_pipelines,
|
| 69 |
+
_build_views as _build_views,
|
| 70 |
_default_gt_factory as _default_gt_factory,
|
| 71 |
_default_inputs_factory as _default_inputs_factory,
|
| 72 |
_filesystem_payload_loader as _filesystem_payload_loader,
|
| 73 |
_kwargs_signature as _kwargs_signature,
|
| 74 |
+
_load_corpus as _load_corpus,
|
| 75 |
_make_context_factory as _make_context_factory,
|
| 76 |
_persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
|
| 77 |
_resolve_entity_extractor as _resolve_entity_extractor,
|
|
|
|
| 191 |
workspace = WorkspaceManager(self._output_dir)
|
| 192 |
|
| 193 |
# 1. Corpus.
|
| 194 |
+
corpus_spec, extracted_dir = _load_corpus(spec, workspace)
|
| 195 |
|
| 196 |
# 2. Registres.
|
| 197 |
registries = RegistryService.bootstrap_defaults()
|
|
|
|
| 203 |
|
| 204 |
# 4. Vues canoniques. Phase B2.5 — propage normalization +
|
| 205 |
# char_exclude aux vues text_final/searchability.
|
| 206 |
+
views = _build_views(
|
| 207 |
spec.views,
|
| 208 |
normalization_profile=spec.normalization_profile,
|
| 209 |
char_exclude=spec.char_exclude,
|
| 210 |
)
|
| 211 |
|
| 212 |
# 5. BenchmarkService.
|
| 213 |
+
bench = _build_benchmark_service(
|
| 214 |
registries=registries,
|
| 215 |
adapter_resolver=adapter_resolver,
|
| 216 |
code_version=spec.code_version,
|
|
|
|
| 367 |
self._output_dir.mkdir(parents=True, exist_ok=True)
|
| 368 |
|
| 369 |
registries = RegistryService.bootstrap_defaults()
|
| 370 |
+
views = _build_views(
|
| 371 |
spec.views,
|
| 372 |
normalization_profile=spec.normalization_profile,
|
| 373 |
char_exclude=spec.char_exclude,
|
| 374 |
)
|
| 375 |
+
bench = _build_benchmark_service(
|
| 376 |
registries=registries,
|
| 377 |
adapter_resolver=adapter_resolver,
|
| 378 |
code_version=spec.code_version,
|
|
|
|
| 460 |
def _load_corpus(
|
| 461 |
spec: RunSpec, workspace: WorkspaceManager,
|
| 462 |
) -> tuple[CorpusSpec, Path]:
|
| 463 |
+
"""Wrapper mince — délègue à
|
| 464 |
+
:func:`run_orchestrator_helpers.builders._load_corpus`
|
| 465 |
+
(audit Phase A). Conservé comme ``@staticmethod`` car un test
|
| 466 |
+
de parité l'appelle via ``orch._load_corpus(...)`` pour
|
| 467 |
+
recalculer un fingerprint de partial cohérent."""
|
| 468 |
+
return _load_corpus(spec, workspace)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 469 |
|
| 470 |
@staticmethod
|
| 471 |
def _build_pipelines(
|
| 472 |
spec: RunSpec,
|
| 473 |
+
) -> tuple[list[PipelineSpec], Callable[[str], Any], dict[str, dict[str, Any]]]:
|
| 474 |
+
"""Wrapper mince — délègue à
|
| 475 |
+
:func:`run_orchestrator_helpers.builders._build_pipelines`
|
| 476 |
+
(audit Phase A : corps extrait hors du god-module). Conservé
|
| 477 |
+
comme ``@staticmethod`` car un test l'appelle via
|
| 478 |
+
``orch._build_pipelines(spec)`` ; ``_build_pipelines`` réfère
|
| 479 |
+
ici le nom module-global réimporté (pas de récursion : la
|
| 480 |
+
méthode de classe n'est pas dans les globals de la fonction)."""
|
| 481 |
+
return _build_pipelines(spec)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 482 |
|
| 483 |
def _execute_with_partial(
|
| 484 |
self,
|
|
|
|
| 529 |
load_partial_pipeline_results,
|
| 530 |
partial_path_for_pipeline,
|
| 531 |
)
|
|
|
|
| 532 |
from picarones.domain.run_manifest import RunManifest
|
| 533 |
from picarones.pipeline.run_result import RunDocumentResult
|
| 534 |
|
|
|
|
| 735 |
document_results=tuple(final_doc_results),
|
| 736 |
)
|
| 737 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 738 |
|
| 739 |
__all__ = [
|
| 740 |
"OrchestrationResult",
|
|
@@ -6,6 +6,8 @@ en sous-modules cohésifs :
|
|
| 6 |
|
| 7 |
- :mod:`.factories` — GT / inputs / RunContext (stateless)
|
| 8 |
- :mod:`.loaders` — payload filesystem + signature kwargs
|
|
|
|
|
|
|
| 9 |
- :mod:`.legacy` — pont converter ``BenchmarkResult`` + résolution
|
| 10 |
NER + persistance JSON legacy
|
| 11 |
|
|
@@ -25,6 +27,12 @@ from picarones.app.services.run_orchestrator_helpers.loaders import (
|
|
| 25 |
_filesystem_payload_loader as _filesystem_payload_loader,
|
| 26 |
_kwargs_signature as _kwargs_signature,
|
| 27 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 28 |
from picarones.app.services.run_orchestrator_helpers.legacy import (
|
| 29 |
_PipelineEngineProxy as _PipelineEngineProxy,
|
| 30 |
_persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
|
|
@@ -33,10 +41,14 @@ from picarones.app.services.run_orchestrator_helpers.legacy import (
|
|
| 33 |
|
| 34 |
__all__ = [
|
| 35 |
"_PipelineEngineProxy",
|
|
|
|
|
|
|
|
|
|
| 36 |
"_default_gt_factory",
|
| 37 |
"_default_inputs_factory",
|
| 38 |
"_filesystem_payload_loader",
|
| 39 |
"_kwargs_signature",
|
|
|
|
| 40 |
"_make_context_factory",
|
| 41 |
"_persist_legacy_benchmark_json",
|
| 42 |
"_resolve_entity_extractor",
|
|
|
|
| 6 |
|
| 7 |
- :mod:`.factories` — GT / inputs / RunContext (stateless)
|
| 8 |
- :mod:`.loaders` — payload filesystem + signature kwargs
|
| 9 |
+
- :mod:`.builders` — corpus / pipelines / vues / BenchmarkService
|
| 10 |
+
(ex-``@staticmethod`` du god-module, Phase A)
|
| 11 |
- :mod:`.legacy` — pont converter ``BenchmarkResult`` + résolution
|
| 12 |
NER + persistance JSON legacy
|
| 13 |
|
|
|
|
| 27 |
_filesystem_payload_loader as _filesystem_payload_loader,
|
| 28 |
_kwargs_signature as _kwargs_signature,
|
| 29 |
)
|
| 30 |
+
from picarones.app.services.run_orchestrator_helpers.builders import (
|
| 31 |
+
_build_benchmark_service as _build_benchmark_service,
|
| 32 |
+
_build_pipelines as _build_pipelines,
|
| 33 |
+
_build_views as _build_views,
|
| 34 |
+
_load_corpus as _load_corpus,
|
| 35 |
+
)
|
| 36 |
from picarones.app.services.run_orchestrator_helpers.legacy import (
|
| 37 |
_PipelineEngineProxy as _PipelineEngineProxy,
|
| 38 |
_persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
|
|
|
|
| 41 |
|
| 42 |
__all__ = [
|
| 43 |
"_PipelineEngineProxy",
|
| 44 |
+
"_build_benchmark_service",
|
| 45 |
+
"_build_pipelines",
|
| 46 |
+
"_build_views",
|
| 47 |
"_default_gt_factory",
|
| 48 |
"_default_inputs_factory",
|
| 49 |
"_filesystem_payload_loader",
|
| 50 |
"_kwargs_signature",
|
| 51 |
+
"_load_corpus",
|
| 52 |
"_make_context_factory",
|
| 53 |
"_persist_legacy_benchmark_json",
|
| 54 |
"_resolve_entity_extractor",
|
|
@@ -0,0 +1,234 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""Constructeurs stateless du ``RunOrchestrator`` (corpus / pipelines /
|
| 2 |
+
vues / service).
|
| 3 |
+
|
| 4 |
+
Audit prod Phase A — extraction des 4 ``@staticmethod`` (sans
|
| 5 |
+
``self``) hors du god-module ``run_orchestrator.py``. Déplacement
|
| 6 |
+
verbatim, comportement strictement préservé : ``run_orchestrator``
|
| 7 |
+
réimporte ces noms (façade) et conserve un wrapper mince
|
| 8 |
+
``_build_pipelines`` (un test l'appelle via ``orch._build_pipelines``).
|
| 9 |
+
"""
|
| 10 |
+
|
| 11 |
+
from __future__ import annotations
|
| 12 |
+
|
| 13 |
+
import io
|
| 14 |
+
import threading
|
| 15 |
+
import zipfile
|
| 16 |
+
from pathlib import Path
|
| 17 |
+
from typing import Any, Callable
|
| 18 |
+
|
| 19 |
+
from picarones.app.schemas import RunSpec, resolve_adapter_class
|
| 20 |
+
from picarones.app.services.benchmark_service import BenchmarkService
|
| 21 |
+
from picarones.app.services.corpus_service import (
|
| 22 |
+
CorpusImportError,
|
| 23 |
+
CorpusService,
|
| 24 |
+
)
|
| 25 |
+
from picarones.app.services.path_security import WorkspaceManager
|
| 26 |
+
from picarones.app.services.registry_service import RegistryService
|
| 27 |
+
from picarones.domain.corpus import CorpusSpec
|
| 28 |
+
from picarones.evaluation.views import (
|
| 29 |
+
DefaultEvaluationViewExecutor,
|
| 30 |
+
build_alto_view,
|
| 31 |
+
build_search_view,
|
| 32 |
+
build_text_view,
|
| 33 |
+
)
|
| 34 |
+
from picarones.pipeline import (
|
| 35 |
+
CorpusRunner,
|
| 36 |
+
PipelineExecutor,
|
| 37 |
+
PipelineSpec,
|
| 38 |
+
PipelineStep,
|
| 39 |
+
)
|
| 40 |
+
|
| 41 |
+
from picarones.app.services.run_orchestrator_helpers.loaders import (
|
| 42 |
+
_filesystem_payload_loader,
|
| 43 |
+
_kwargs_signature,
|
| 44 |
+
)
|
| 45 |
+
|
| 46 |
+
|
| 47 |
+
def _load_corpus(
|
| 48 |
+
spec: RunSpec, workspace: WorkspaceManager,
|
| 49 |
+
) -> tuple[CorpusSpec, Path]:
|
| 50 |
+
"""Charge le corpus selon ``corpus_zip`` ou ``corpus_dir``."""
|
| 51 |
+
corpus_service = CorpusService(workspace)
|
| 52 |
+
if spec.corpus_zip is not None:
|
| 53 |
+
zip_path = Path(spec.corpus_zip)
|
| 54 |
+
zip_bytes = zip_path.read_bytes()
|
| 55 |
+
report = corpus_service.import_zip(
|
| 56 |
+
zip_bytes,
|
| 57 |
+
corpus_name=spec.corpus_name or zip_path.stem,
|
| 58 |
+
metadata=spec.corpus_metadata,
|
| 59 |
+
)
|
| 60 |
+
return report.spec, report.extracted_dir
|
| 61 |
+
|
| 62 |
+
# corpus_dir : on zippe à la volée le contenu du dir et on
|
| 63 |
+
# délègue à ``CorpusService`` — réutilise toute la détection
|
| 64 |
+
# sans dupliquer la logique de classification image / GT.
|
| 65 |
+
assert spec.corpus_dir is not None # garanti par RunSpec validator
|
| 66 |
+
src_dir = Path(spec.corpus_dir)
|
| 67 |
+
if not src_dir.is_dir():
|
| 68 |
+
raise CorpusImportError(
|
| 69 |
+
f"corpus_dir n'est pas un répertoire : {src_dir!r}.",
|
| 70 |
+
)
|
| 71 |
+
buf = io.BytesIO()
|
| 72 |
+
with zipfile.ZipFile(buf, mode="w") as zf:
|
| 73 |
+
for file_path in src_dir.rglob("*"):
|
| 74 |
+
if file_path.is_file():
|
| 75 |
+
arc = file_path.relative_to(src_dir).as_posix()
|
| 76 |
+
zf.write(file_path, arcname=arc)
|
| 77 |
+
report = corpus_service.import_zip(
|
| 78 |
+
buf.getvalue(),
|
| 79 |
+
corpus_name=spec.corpus_name or src_dir.name,
|
| 80 |
+
metadata=spec.corpus_metadata,
|
| 81 |
+
)
|
| 82 |
+
return report.spec, report.extracted_dir
|
| 83 |
+
|
| 84 |
+
|
| 85 |
+
def _build_pipelines(
|
| 86 |
+
spec: RunSpec,
|
| 87 |
+
) -> tuple[
|
| 88 |
+
list[PipelineSpec],
|
| 89 |
+
Callable[[str], Any],
|
| 90 |
+
dict[str, dict[str, Any]],
|
| 91 |
+
]:
|
| 92 |
+
"""Construit les ``PipelineSpec`` + un resolver d'adapters.
|
| 93 |
+
|
| 94 |
+
Disambiguation des steps :
|
| 95 |
+
|
| 96 |
+
- Deux steps avec la même ``(class, kwargs)`` partagent la
|
| 97 |
+
même instance d'adapter (cache).
|
| 98 |
+
- Deux steps avec la même ``id`` mais une ``class`` ou des
|
| 99 |
+
``kwargs`` différents reçoivent des ``adapter_name``
|
| 100 |
+
distincts (préfixés par le nom de pipeline).
|
| 101 |
+
|
| 102 |
+
C'est essentiel pour le cas où plusieurs pipelines utilisent
|
| 103 |
+
la **même classe** avec des **kwargs différents** (ex :
|
| 104 |
+
``PrecomputedTextAdapter`` instancié N fois avec
|
| 105 |
+
``source_label`` distincts).
|
| 106 |
+
"""
|
| 107 |
+
instance_cache: dict[str, Any] = {}
|
| 108 |
+
registered: dict[str, tuple[type, str]] = {}
|
| 109 |
+
name_to_class: dict[str, type] = {}
|
| 110 |
+
name_to_kwargs: dict[str, dict[str, Any]] = {}
|
| 111 |
+
|
| 112 |
+
pipeline_specs: list[PipelineSpec] = []
|
| 113 |
+
for p in spec.pipelines:
|
| 114 |
+
steps: list[PipelineStep] = []
|
| 115 |
+
for s in p.steps:
|
| 116 |
+
cls = resolve_adapter_class(s.adapter_class)
|
| 117 |
+
kwargs_sig = _kwargs_signature(s.adapter_kwargs)
|
| 118 |
+
adapter_name = s.id
|
| 119 |
+
existing = registered.get(adapter_name)
|
| 120 |
+
if existing is not None and existing != (cls, kwargs_sig):
|
| 121 |
+
adapter_name = f"{p.name}__{s.id}"
|
| 122 |
+
registered[adapter_name] = (cls, kwargs_sig)
|
| 123 |
+
name_to_class[adapter_name] = cls
|
| 124 |
+
name_to_kwargs[adapter_name] = s.adapter_kwargs
|
| 125 |
+
# ``inputs_from`` du StepSpec YAML doit être propagé au
|
| 126 |
+
# ``domain.PipelineSpec`` pour que le DAG branchant soit
|
| 127 |
+
# honoré ; sans ce passage, un DAG branchant déclaré dans
|
| 128 |
+
# le YAML serait silencieusement exécuté en linéaire.
|
| 129 |
+
steps.append(PipelineStep(
|
| 130 |
+
id=s.id,
|
| 131 |
+
kind="step",
|
| 132 |
+
adapter_name=adapter_name,
|
| 133 |
+
input_types=s.input_types,
|
| 134 |
+
output_types=s.output_types,
|
| 135 |
+
inputs_from=dict(s.inputs_from),
|
| 136 |
+
))
|
| 137 |
+
pipeline_specs.append(PipelineSpec(
|
| 138 |
+
name=p.name,
|
| 139 |
+
initial_inputs=p.initial_inputs,
|
| 140 |
+
steps=tuple(steps),
|
| 141 |
+
))
|
| 142 |
+
|
| 143 |
+
def resolver(name: str) -> Any:
|
| 144 |
+
if name not in instance_cache:
|
| 145 |
+
cls = name_to_class[name]
|
| 146 |
+
kwargs = name_to_kwargs[name]
|
| 147 |
+
instance_cache[name] = cls(**kwargs)
|
| 148 |
+
return instance_cache[name]
|
| 149 |
+
|
| 150 |
+
# Copie défensive — le manifest doit recevoir un snapshot
|
| 151 |
+
# immuable, pas la map vivante du resolver.
|
| 152 |
+
adapter_kwargs_dump = {
|
| 153 |
+
name: dict(kwargs) for name, kwargs in name_to_kwargs.items()
|
| 154 |
+
}
|
| 155 |
+
return pipeline_specs, resolver, adapter_kwargs_dump
|
| 156 |
+
|
| 157 |
+
|
| 158 |
+
def _build_views(
|
| 159 |
+
view_names: tuple[str, ...],
|
| 160 |
+
*,
|
| 161 |
+
normalization_profile: str | None = None,
|
| 162 |
+
char_exclude: str | None = None,
|
| 163 |
+
) -> list[Any]:
|
| 164 |
+
"""Map noms canoniques → vues construites.
|
| 165 |
+
|
| 166 |
+
Phase B2.5 — ``normalization_profile`` et ``char_exclude``
|
| 167 |
+
sont propagés aux vues qui les supportent (``text_final`` et
|
| 168 |
+
``searchability``). ``alto_documentary`` les ignore : ses
|
| 169 |
+
métriques structurelles n'opèrent pas sur du texte.
|
| 170 |
+
"""
|
| 171 |
+
text_view_kwargs = {
|
| 172 |
+
"normalization_profile": normalization_profile,
|
| 173 |
+
"char_exclude": char_exclude,
|
| 174 |
+
}
|
| 175 |
+
builders: dict[str, Callable[[], Any]] = {
|
| 176 |
+
"text_final": lambda: build_text_view(**text_view_kwargs),
|
| 177 |
+
"alto_documentary": build_alto_view,
|
| 178 |
+
"searchability": lambda: build_search_view(**text_view_kwargs),
|
| 179 |
+
}
|
| 180 |
+
return [builders[name]() for name in view_names]
|
| 181 |
+
|
| 182 |
+
|
| 183 |
+
def _build_benchmark_service(
|
| 184 |
+
*,
|
| 185 |
+
registries: RegistryService,
|
| 186 |
+
adapter_resolver: Callable[[str], Any],
|
| 187 |
+
code_version: str,
|
| 188 |
+
cancel_event: threading.Event | None = None,
|
| 189 |
+
timeout_seconds_per_doc: float = 300.0,
|
| 190 |
+
) -> BenchmarkService:
|
| 191 |
+
"""Assemble ``BenchmarkService`` avec un loader filesystem.
|
| 192 |
+
|
| 193 |
+
Phase B2.2 — quand ``cancel_event`` est fourni, le
|
| 194 |
+
``CorpusRunner.run`` est wrappé pour injecter l'event dans
|
| 195 |
+
chaque appel. Pattern strictement copié de
|
| 196 |
+
``_benchmark_execution.py:142-149`` (legacy).
|
| 197 |
+
"""
|
| 198 |
+
pipeline_executor = PipelineExecutor(
|
| 199 |
+
adapter_resolver=adapter_resolver,
|
| 200 |
+
)
|
| 201 |
+
corpus_runner = CorpusRunner(
|
| 202 |
+
pipeline_executor,
|
| 203 |
+
max_in_flight=2,
|
| 204 |
+
timeout_seconds_per_doc=timeout_seconds_per_doc,
|
| 205 |
+
poll_interval_seconds=0.05,
|
| 206 |
+
)
|
| 207 |
+
|
| 208 |
+
if cancel_event is not None:
|
| 209 |
+
original_run = corpus_runner.run
|
| 210 |
+
|
| 211 |
+
def _runner_run_with_cancel(*args: Any, **kwargs: Any) -> Any:
|
| 212 |
+
kwargs.setdefault("cancel_event", cancel_event)
|
| 213 |
+
return original_run(*args, **kwargs)
|
| 214 |
+
|
| 215 |
+
corpus_runner.run = _runner_run_with_cancel # type: ignore[method-assign]
|
| 216 |
+
|
| 217 |
+
view_executor = DefaultEvaluationViewExecutor.from_registries(
|
| 218 |
+
registries.metrics,
|
| 219 |
+
registries.projectors,
|
| 220 |
+
_filesystem_payload_loader,
|
| 221 |
+
)
|
| 222 |
+
return BenchmarkService(
|
| 223 |
+
corpus_runner=corpus_runner,
|
| 224 |
+
view_executor=view_executor,
|
| 225 |
+
code_version=code_version,
|
| 226 |
+
)
|
| 227 |
+
|
| 228 |
+
|
| 229 |
+
__all__ = [
|
| 230 |
+
"_build_benchmark_service",
|
| 231 |
+
"_build_pipelines",
|
| 232 |
+
"_build_views",
|
| 233 |
+
"_load_corpus",
|
| 234 |
+
]
|
|
@@ -59,7 +59,7 @@ def test_orchestrator_propagates_inputs_from_to_pipeline_step(
|
|
| 59 |
# inputs_from.
|
| 60 |
from unittest.mock import MagicMock, patch
|
| 61 |
with patch(
|
| 62 |
-
"picarones.app.services.
|
| 63 |
return_value=MagicMock,
|
| 64 |
):
|
| 65 |
pipeline_specs, _resolver, _kwargs = orch._build_pipelines(spec)
|
|
@@ -95,7 +95,7 @@ def test_step_without_inputs_from_yields_empty_dict(tmp_path) -> None:
|
|
| 95 |
orch = RunOrchestrator(output_dir=tmp_path / "out")
|
| 96 |
from unittest.mock import MagicMock, patch
|
| 97 |
with patch(
|
| 98 |
-
"picarones.app.services.
|
| 99 |
return_value=MagicMock,
|
| 100 |
):
|
| 101 |
pipeline_specs, _, _ = orch._build_pipelines(spec)
|
|
|
|
| 59 |
# inputs_from.
|
| 60 |
from unittest.mock import MagicMock, patch
|
| 61 |
with patch(
|
| 62 |
+
"picarones.app.services.run_orchestrator_helpers.builders.resolve_adapter_class",
|
| 63 |
return_value=MagicMock,
|
| 64 |
):
|
| 65 |
pipeline_specs, _resolver, _kwargs = orch._build_pipelines(spec)
|
|
|
|
| 95 |
orch = RunOrchestrator(output_dir=tmp_path / "out")
|
| 96 |
from unittest.mock import MagicMock, patch
|
| 97 |
with patch(
|
| 98 |
+
"picarones.app.services.run_orchestrator_helpers.builders.resolve_adapter_class",
|
| 99 |
return_value=MagicMock,
|
| 100 |
):
|
| 101 |
pipeline_specs, _, _ = orch._build_pipelines(spec)
|
|
@@ -124,9 +124,12 @@ FILE_BUDGETS: dict[str, int] = {
|
|
| 124 |
"picarones/app/services/corpus_service.py": 625, # actuel 541
|
| 125 |
"picarones/app/services/path_security.py": 470, # actuel 410
|
| 126 |
# Audit prod P1 — dégonflage du god-module : helpers extraits, puis
|
| 127 |
-
# (P1.1) ``run_orchestrator_helpers`` éclaté en sous-package
|
| 128 |
-
# (
|
| 129 |
-
|
|
|
|
|
|
|
|
|
|
| 130 |
"picarones/adapters/ocr/tesseract.py": 560, # actuel 479 — Phase B5 migration Option B (+ ALTO_XML expose)
|
| 131 |
"picarones/app/schemas/run_spec.py": 620, # actuel 530 — Phase B1 migration Option B (+90 LOC : 7 nouveaux champs + 2 validators)
|
| 132 |
"picarones/reports/html/render.py": 700, # actuel 615
|
|
|
|
| 124 |
"picarones/app/services/corpus_service.py": 625, # actuel 541
|
| 125 |
"picarones/app/services/path_security.py": 470, # actuel 410
|
| 126 |
# Audit prod P1 — dégonflage du god-module : helpers extraits, puis
|
| 127 |
+
# (P1.1) ``run_orchestrator_helpers`` éclaté en sous-package, puis
|
| 128 |
+
# (Phase A) 4 builders @staticmethod extraits → builders.py.
|
| 129 |
+
# Budget RATCHETÉ VERS LE BAS (731 + ~6 %, pas +15 % paresseux) :
|
| 130 |
+
# signale l'intention de poursuivre vers <500 (Phase B), n'entérine
|
| 131 |
+
# pas la taille actuelle comme acceptable.
|
| 132 |
+
"picarones/app/services/run_orchestrator.py": 775, # actuel ~731
|
| 133 |
"picarones/adapters/ocr/tesseract.py": 560, # actuel 479 — Phase B5 migration Option B (+ ALTO_XML expose)
|
| 134 |
"picarones/app/schemas/run_spec.py": 620, # actuel 530 — Phase B1 migration Option B (+90 LOC : 7 nouveaux champs + 2 validators)
|
| 135 |
"picarones/reports/html/render.py": 700, # actuel 615
|