Spaces:

Ma-Ri-Ba-Ku
/

Picarones

Running

Claude commited on May 19

Commit

95c8668

unverified ·

1 Parent(s): cd404f5

refactor(app): Phase A — extrait les 4 builders @staticmethod (run_orchestrator)

Suite à la question « pas <500 ? » : correction de mon erreur
d'analyse (j'avais classé ces staticmethods en « risqué » à tort —
sans self, extraction sûre, pattern verbatim prouvé 7×).

_load_corpus / _build_pipelines / _build_views / _build_benchmark
_service → run_orchestrator_helpers/builders.py. _build_views et
_build_benchmark_service : retrait complet (call-sites → nom
module-global). _build_pipelines + _load_corpus : wrapper mince
@staticmethod conservé (tests les appellent via orch.<m>()).

Couplage white-box rattrapé (la suite l'a attrapé, 3 échecs) :
- test_run_orchestrator_feature_parity : orch._load_corpus → wrapper
- test_sprint_a14_s53 : patch resolve_adapter_class retargeté vers
run_orchestrator_helpers.builders (le symbole a migré ; le test
white-box suit, ce n'est pas masquer un bug).

run_orchestrator.py : 1316 → 731 (-44 % cumulé). Budget CI
RATCHETÉ VERS LE BAS 1050 → 775 (731+6 %, pas +15 % : signale
l'intention <500, n'entérine pas la taille). 413 app/CLI/archi
verts, lint propre (ratchet ISC/FLY/G inclus).

https://claude.ai/code/session_01EmLiMPJJuB44QHEFzDWUvF

Files changed (5) hide show

picarones/app/services/run_orchestrator.py +25 -196
picarones/app/services/run_orchestrator_helpers/__init__.py +12 -0
picarones/app/services/run_orchestrator_helpers/builders.py +234 -0
tests/app/services/test_sprint_a14_s53_inputs_from_propagation.py +2 -2
tests/architecture/test_file_budgets.py +6 -3

picarones/app/services/run_orchestrator.py CHANGED Viewed

@@ -36,10 +36,8 @@ Anti-sur-ingénierie
 from __future__ import annotations
-import io
 import logging
 import threading
-import zipfile
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable
@@ -47,30 +45,16 @@ from typing import Any, Callable
 logger = logging.getLogger(__name__)
 from picarones.app.results import ReportRenderer, RunResult
-from picarones.app.schemas import RunSpec, resolve_adapter_class
-from picarones.app.services.benchmark_service import BenchmarkService
 from picarones.app.services.dependencies import (
     capture_dependencies_lock,
     capture_system_binaries_lock,
 )
-from picarones.app.services.corpus_service import (
-    CorpusImportError,
-    CorpusService,
-)
 from picarones.app.services.path_security import WorkspaceManager
 from picarones.app.services.registry_service import RegistryService
 from picarones.domain.corpus import CorpusSpec
-from picarones.evaluation.views import (
-    DefaultEvaluationViewExecutor,
-    build_alto_view,
-    build_search_view,
-    build_text_view,
-)
 from picarones.pipeline import (
-    CorpusRunner,
-    PipelineExecutor,
     PipelineSpec,
-    PipelineStep,
 )
 # Helpers stateless extraits (audit prod P1 — dégonflage god-module).
@@ -80,10 +64,14 @@ from picarones.pipeline import (
 # (donc ``monkeypatch.setattr(run_orchestrator, …)`` fonctionne aussi).
 from picarones.app.services.run_orchestrator_helpers import (
     _PipelineEngineProxy as _PipelineEngineProxy,
     _default_gt_factory as _default_gt_factory,
     _default_inputs_factory as _default_inputs_factory,
     _filesystem_payload_loader as _filesystem_payload_loader,
     _kwargs_signature as _kwargs_signature,
     _make_context_factory as _make_context_factory,
     _persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
     _resolve_entity_extractor as _resolve_entity_extractor,
@@ -203,7 +191,7 @@ class RunOrchestrator:
         workspace = WorkspaceManager(self._output_dir)
         # 1. Corpus.
-        corpus_spec, extracted_dir = self._load_corpus(spec, workspace)
         # 2. Registres.
         registries = RegistryService.bootstrap_defaults()
@@ -215,14 +203,14 @@ class RunOrchestrator:
         # 4. Vues canoniques.  Phase B2.5 — propage normalization +
         # char_exclude aux vues text_final/searchability.
-        views = self._build_views(
             spec.views,
             normalization_profile=spec.normalization_profile,
             char_exclude=spec.char_exclude,
         )
         # 5. BenchmarkService.
-        bench = self._build_benchmark_service(
             registries=registries,
             adapter_resolver=adapter_resolver,
             code_version=spec.code_version,
@@ -379,12 +367,12 @@ class RunOrchestrator:
         self._output_dir.mkdir(parents=True, exist_ok=True)
         registries = RegistryService.bootstrap_defaults()
-        views = self._build_views(
             spec.views,
             normalization_profile=spec.normalization_profile,
             char_exclude=spec.char_exclude,
         )
-        bench = self._build_benchmark_service(
             registries=registries,
             adapter_resolver=adapter_resolver,
             code_version=spec.code_version,
@@ -472,112 +460,25 @@ class RunOrchestrator:
     def _load_corpus(
         spec: RunSpec, workspace: WorkspaceManager,
     ) -> tuple[CorpusSpec, Path]:
-        """Charge le corpus selon ``corpus_zip`` ou ``corpus_dir``."""
-        corpus_service = CorpusService(workspace)
-        if spec.corpus_zip is not None:
-            zip_path = Path(spec.corpus_zip)
-            zip_bytes = zip_path.read_bytes()
-            report = corpus_service.import_zip(
-                zip_bytes,
-                corpus_name=spec.corpus_name or zip_path.stem,
-                metadata=spec.corpus_metadata,
-            )
-            return report.spec, report.extracted_dir
-        # corpus_dir : on zippe à la volée le contenu du dir et on
-        # délègue à ``CorpusService`` — réutilise toute la détection
-        # sans dupliquer la logique de classification image / GT.
-        assert spec.corpus_dir is not None  # garanti par RunSpec validator
-        src_dir = Path(spec.corpus_dir)
-        if not src_dir.is_dir():
-            raise CorpusImportError(
-                f"corpus_dir n'est pas un répertoire : {src_dir!r}.",
-            )
-        buf = io.BytesIO()
-        with zipfile.ZipFile(buf, mode="w") as zf:
-            for file_path in src_dir.rglob("*"):
-                if file_path.is_file():
-                    arc = file_path.relative_to(src_dir).as_posix()
-                    zf.write(file_path, arcname=arc)
-        report = corpus_service.import_zip(
-            buf.getvalue(),
-            corpus_name=spec.corpus_name or src_dir.name,
-            metadata=spec.corpus_metadata,
-        )
-        return report.spec, report.extracted_dir
     @staticmethod
     def _build_pipelines(
         spec: RunSpec,
-    ) -> tuple[
-        list[PipelineSpec],
-        Callable[[str], Any],
-        dict[str, dict[str, Any]],
-    ]:
-        """Construit les ``PipelineSpec`` + un resolver d'adapters.
-        Disambiguation des steps :
-        - Deux steps avec la même ``(class, kwargs)`` partagent la
-          même instance d'adapter (cache).
-        - Deux steps avec la même ``id`` mais une ``class`` ou des
-          ``kwargs`` différents reçoivent des ``adapter_name``
-          distincts (préfixés par le nom de pipeline).
-        C'est essentiel pour le cas où plusieurs pipelines utilisent
-        la **même classe** avec des **kwargs différents** (ex :
-        ``PrecomputedTextAdapter`` instancié N fois avec
-        ``source_label`` distincts).
-        """
-        instance_cache: dict[str, Any] = {}
-        registered: dict[str, tuple[type, str]] = {}
-        name_to_class: dict[str, type] = {}
-        name_to_kwargs: dict[str, dict[str, Any]] = {}
-        pipeline_specs: list[PipelineSpec] = []
-        for p in spec.pipelines:
-            steps: list[PipelineStep] = []
-            for s in p.steps:
-                cls = resolve_adapter_class(s.adapter_class)
-                kwargs_sig = _kwargs_signature(s.adapter_kwargs)
-                adapter_name = s.id
-                existing = registered.get(adapter_name)
-                if existing is not None and existing != (cls, kwargs_sig):
-                    adapter_name = f"{p.name}__{s.id}"
-                registered[adapter_name] = (cls, kwargs_sig)
-                name_to_class[adapter_name] = cls
-                name_to_kwargs[adapter_name] = s.adapter_kwargs
-                # ``inputs_from`` du StepSpec YAML doit être propagé au
-                # ``domain.PipelineSpec`` pour que le DAG branchant soit
-                # honoré ; sans ce passage, un DAG branchant déclaré dans
-                # le YAML serait silencieusement exécuté en linéaire.
-                steps.append(PipelineStep(
-                    id=s.id,
-                    kind="step",
-                    adapter_name=adapter_name,
-                    input_types=s.input_types,
-                    output_types=s.output_types,
-                    inputs_from=dict(s.inputs_from),
-                ))
-            pipeline_specs.append(PipelineSpec(
-                name=p.name,
-                initial_inputs=p.initial_inputs,
-                steps=tuple(steps),
-            ))
-        def resolver(name: str) -> Any:
-            if name not in instance_cache:
-                cls = name_to_class[name]
-                kwargs = name_to_kwargs[name]
-                instance_cache[name] = cls(**kwargs)
-            return instance_cache[name]
-        # Copie défensive — le manifest doit recevoir un snapshot
-        # immuable, pas la map vivante du resolver.
-        adapter_kwargs_dump = {
-            name: dict(kwargs) for name, kwargs in name_to_kwargs.items()
-        }
-        return pipeline_specs, resolver, adapter_kwargs_dump
     def _execute_with_partial(
         self,
@@ -628,7 +529,6 @@ class RunOrchestrator:
             load_partial_pipeline_results,
             partial_path_for_pipeline,
         )
-        from picarones.domain.corpus import CorpusSpec
         from picarones.domain.run_manifest import RunManifest
         from picarones.pipeline.run_result import RunDocumentResult
@@ -835,77 +735,6 @@ class RunOrchestrator:
             document_results=tuple(final_doc_results),
         )
-    @staticmethod
-    def _build_views(
-        view_names: tuple[str, ...],
-        *,
-        normalization_profile: str | None = None,
-        char_exclude: str | None = None,
-    ) -> list[Any]:
-        """Map noms canoniques → vues construites.
-        Phase B2.5 — ``normalization_profile`` et ``char_exclude``
-        sont propagés aux vues qui les supportent (``text_final`` et
-        ``searchability``).  ``alto_documentary`` les ignore : ses
-        métriques structurelles n'opèrent pas sur du texte.
-        """
-        text_view_kwargs = {
-            "normalization_profile": normalization_profile,
-            "char_exclude": char_exclude,
-        }
-        builders: dict[str, Callable[[], Any]] = {
-            "text_final": lambda: build_text_view(**text_view_kwargs),
-            "alto_documentary": build_alto_view,
-            "searchability": lambda: build_search_view(**text_view_kwargs),
-        }
-        return [builders[name]() for name in view_names]
-    @staticmethod
-    def _build_benchmark_service(
-        *,
-        registries: RegistryService,
-        adapter_resolver: Callable[[str], Any],
-        code_version: str,
-        cancel_event: threading.Event | None = None,
-        timeout_seconds_per_doc: float = 300.0,
-    ) -> BenchmarkService:
-        """Assemble ``BenchmarkService`` avec un loader filesystem.
-        Phase B2.2 — quand ``cancel_event`` est fourni, le
-        ``CorpusRunner.run`` est wrappé pour injecter l'event dans
-        chaque appel.  Pattern strictement copié de
-        ``_benchmark_execution.py:142-149`` (legacy).
-        """
-        pipeline_executor = PipelineExecutor(
-            adapter_resolver=adapter_resolver,
-        )
-        corpus_runner = CorpusRunner(
-            pipeline_executor,
-            max_in_flight=2,
-            timeout_seconds_per_doc=timeout_seconds_per_doc,
-            poll_interval_seconds=0.05,
-        )
-        if cancel_event is not None:
-            original_run = corpus_runner.run
-            def _runner_run_with_cancel(*args: Any, **kwargs: Any) -> Any:
-                kwargs.setdefault("cancel_event", cancel_event)
-                return original_run(*args, **kwargs)
-            corpus_runner.run = _runner_run_with_cancel  # type: ignore[method-assign]
-        view_executor = DefaultEvaluationViewExecutor.from_registries(
-            registries.metrics,
-            registries.projectors,
-            _filesystem_payload_loader,
-        )
-        return BenchmarkService(
-            corpus_runner=corpus_runner,
-            view_executor=view_executor,
-            code_version=code_version,
-        )
 __all__ = [
     "OrchestrationResult",

 from __future__ import annotations
 import logging
 import threading
 from dataclasses import dataclass, field
 from pathlib import Path
 from typing import Any, Callable
 logger = logging.getLogger(__name__)
 from picarones.app.results import ReportRenderer, RunResult
+from picarones.app.schemas import RunSpec
 from picarones.app.services.dependencies import (
     capture_dependencies_lock,
     capture_system_binaries_lock,
 )
 from picarones.app.services.path_security import WorkspaceManager
 from picarones.app.services.registry_service import RegistryService
 from picarones.domain.corpus import CorpusSpec
 from picarones.pipeline import (
     PipelineSpec,
 )
 # Helpers stateless extraits (audit prod P1 — dégonflage god-module).
 # (donc ``monkeypatch.setattr(run_orchestrator, …)`` fonctionne aussi).
 from picarones.app.services.run_orchestrator_helpers import (
     _PipelineEngineProxy as _PipelineEngineProxy,
+    _build_benchmark_service as _build_benchmark_service,
+    _build_pipelines as _build_pipelines,
+    _build_views as _build_views,
     _default_gt_factory as _default_gt_factory,
     _default_inputs_factory as _default_inputs_factory,
     _filesystem_payload_loader as _filesystem_payload_loader,
     _kwargs_signature as _kwargs_signature,
+    _load_corpus as _load_corpus,
     _make_context_factory as _make_context_factory,
     _persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
     _resolve_entity_extractor as _resolve_entity_extractor,
         workspace = WorkspaceManager(self._output_dir)
         # 1. Corpus.
+        corpus_spec, extracted_dir = _load_corpus(spec, workspace)
         # 2. Registres.
         registries = RegistryService.bootstrap_defaults()
         # 4. Vues canoniques.  Phase B2.5 — propage normalization +
         # char_exclude aux vues text_final/searchability.
+        views = _build_views(
             spec.views,
             normalization_profile=spec.normalization_profile,
             char_exclude=spec.char_exclude,
         )
         # 5. BenchmarkService.
+        bench = _build_benchmark_service(
             registries=registries,
             adapter_resolver=adapter_resolver,
             code_version=spec.code_version,
         self._output_dir.mkdir(parents=True, exist_ok=True)
         registries = RegistryService.bootstrap_defaults()
+        views = _build_views(
             spec.views,
             normalization_profile=spec.normalization_profile,
             char_exclude=spec.char_exclude,
         )
+        bench = _build_benchmark_service(
             registries=registries,
             adapter_resolver=adapter_resolver,
             code_version=spec.code_version,
     def _load_corpus(
         spec: RunSpec, workspace: WorkspaceManager,
     ) -> tuple[CorpusSpec, Path]:
+        """Wrapper mince — délègue à
+        :func:`run_orchestrator_helpers.builders._load_corpus`
+        (audit Phase A).  Conservé comme ``@staticmethod`` car un test
+        de parité l'appelle via ``orch._load_corpus(...)`` pour
+        recalculer un fingerprint de partial cohérent."""
+        return _load_corpus(spec, workspace)
     @staticmethod
     def _build_pipelines(
         spec: RunSpec,
+    ) -> tuple[list[PipelineSpec], Callable[[str], Any], dict[str, dict[str, Any]]]:
+        """Wrapper mince — délègue à
+        :func:`run_orchestrator_helpers.builders._build_pipelines`
+        (audit Phase A : corps extrait hors du god-module).  Conservé
+        comme ``@staticmethod`` car un test l'appelle via
+        ``orch._build_pipelines(spec)`` ; ``_build_pipelines`` réfère
+        ici le nom module-global réimporté (pas de récursion : la
+        méthode de classe n'est pas dans les globals de la fonction)."""
+        return _build_pipelines(spec)
     def _execute_with_partial(
         self,
             load_partial_pipeline_results,
             partial_path_for_pipeline,
         )
         from picarones.domain.run_manifest import RunManifest
         from picarones.pipeline.run_result import RunDocumentResult
             document_results=tuple(final_doc_results),
         )
 __all__ = [
     "OrchestrationResult",

picarones/app/services/run_orchestrator_helpers/__init__.py CHANGED Viewed

@@ -6,6 +6,8 @@ en sous-modules cohésifs :
 - :mod:`.factories`  — GT / inputs / RunContext (stateless)
 - :mod:`.loaders`    — payload filesystem + signature kwargs
 - :mod:`.legacy`     — pont converter ``BenchmarkResult`` + résolution
   NER + persistance JSON legacy
@@ -25,6 +27,12 @@ from picarones.app.services.run_orchestrator_helpers.loaders import (
     _filesystem_payload_loader as _filesystem_payload_loader,
     _kwargs_signature as _kwargs_signature,
 )
 from picarones.app.services.run_orchestrator_helpers.legacy import (
     _PipelineEngineProxy as _PipelineEngineProxy,
     _persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
@@ -33,10 +41,14 @@ from picarones.app.services.run_orchestrator_helpers.legacy import (
 __all__ = [
     "_PipelineEngineProxy",
     "_default_gt_factory",
     "_default_inputs_factory",
     "_filesystem_payload_loader",
     "_kwargs_signature",
     "_make_context_factory",
     "_persist_legacy_benchmark_json",
     "_resolve_entity_extractor",

 - :mod:`.factories`  — GT / inputs / RunContext (stateless)
 - :mod:`.loaders`    — payload filesystem + signature kwargs
+- :mod:`.builders`   — corpus / pipelines / vues / BenchmarkService
+  (ex-``@staticmethod`` du god-module, Phase A)
 - :mod:`.legacy`     — pont converter ``BenchmarkResult`` + résolution
   NER + persistance JSON legacy
     _filesystem_payload_loader as _filesystem_payload_loader,
     _kwargs_signature as _kwargs_signature,
 )
+from picarones.app.services.run_orchestrator_helpers.builders import (
+    _build_benchmark_service as _build_benchmark_service,
+    _build_pipelines as _build_pipelines,
+    _build_views as _build_views,
+    _load_corpus as _load_corpus,
+)
 from picarones.app.services.run_orchestrator_helpers.legacy import (
     _PipelineEngineProxy as _PipelineEngineProxy,
     _persist_legacy_benchmark_json as _persist_legacy_benchmark_json,
 __all__ = [
     "_PipelineEngineProxy",
+    "_build_benchmark_service",
+    "_build_pipelines",
+    "_build_views",
     "_default_gt_factory",
     "_default_inputs_factory",
     "_filesystem_payload_loader",
     "_kwargs_signature",
+    "_load_corpus",
     "_make_context_factory",
     "_persist_legacy_benchmark_json",
     "_resolve_entity_extractor",

picarones/app/services/run_orchestrator_helpers/builders.py ADDED Viewed

	@@ -0,0 +1,234 @@

+"""Constructeurs stateless du ``RunOrchestrator`` (corpus / pipelines /
+vues / service).
+Audit prod Phase A — extraction des 4 ``@staticmethod`` (sans
+``self``) hors du god-module ``run_orchestrator.py``.  Déplacement
+verbatim, comportement strictement préservé : ``run_orchestrator``
+réimporte ces noms (façade) et conserve un wrapper mince
+``_build_pipelines`` (un test l'appelle via ``orch._build_pipelines``).
+"""
+from __future__ import annotations
+import io
+import threading
+import zipfile
+from pathlib import Path
+from typing import Any, Callable
+from picarones.app.schemas import RunSpec, resolve_adapter_class
+from picarones.app.services.benchmark_service import BenchmarkService
+from picarones.app.services.corpus_service import (
+    CorpusImportError,
+    CorpusService,
+)
+from picarones.app.services.path_security import WorkspaceManager
+from picarones.app.services.registry_service import RegistryService
+from picarones.domain.corpus import CorpusSpec
+from picarones.evaluation.views import (
+    DefaultEvaluationViewExecutor,
+    build_alto_view,
+    build_search_view,
+    build_text_view,
+)
+from picarones.pipeline import (
+    CorpusRunner,
+    PipelineExecutor,
+    PipelineSpec,
+    PipelineStep,
+)
+from picarones.app.services.run_orchestrator_helpers.loaders import (
+    _filesystem_payload_loader,
+    _kwargs_signature,
+)
+def _load_corpus(
+    spec: RunSpec, workspace: WorkspaceManager,
+) -> tuple[CorpusSpec, Path]:
+    """Charge le corpus selon ``corpus_zip`` ou ``corpus_dir``."""
+    corpus_service = CorpusService(workspace)
+    if spec.corpus_zip is not None:
+        zip_path = Path(spec.corpus_zip)
+        zip_bytes = zip_path.read_bytes()
+        report = corpus_service.import_zip(
+            zip_bytes,
+            corpus_name=spec.corpus_name or zip_path.stem,
+            metadata=spec.corpus_metadata,
+        )
+        return report.spec, report.extracted_dir
+    # corpus_dir : on zippe à la volée le contenu du dir et on
+    # délègue à ``CorpusService`` — réutilise toute la détection
+    # sans dupliquer la logique de classification image / GT.
+    assert spec.corpus_dir is not None  # garanti par RunSpec validator
+    src_dir = Path(spec.corpus_dir)
+    if not src_dir.is_dir():
+        raise CorpusImportError(
+            f"corpus_dir n'est pas un répertoire : {src_dir!r}.",
+        )
+    buf = io.BytesIO()
+    with zipfile.ZipFile(buf, mode="w") as zf:
+        for file_path in src_dir.rglob("*"):
+            if file_path.is_file():
+                arc = file_path.relative_to(src_dir).as_posix()
+                zf.write(file_path, arcname=arc)
+    report = corpus_service.import_zip(
+        buf.getvalue(),
+        corpus_name=spec.corpus_name or src_dir.name,
+        metadata=spec.corpus_metadata,
+    )
+    return report.spec, report.extracted_dir
+def _build_pipelines(
+    spec: RunSpec,
+) -> tuple[
+    list[PipelineSpec],
+    Callable[[str], Any],
+    dict[str, dict[str, Any]],
+]:
+    """Construit les ``PipelineSpec`` + un resolver d'adapters.
+    Disambiguation des steps :
+    - Deux steps avec la même ``(class, kwargs)`` partagent la
+      même instance d'adapter (cache).
+    - Deux steps avec la même ``id`` mais une ``class`` ou des
+      ``kwargs`` différents reçoivent des ``adapter_name``
+      distincts (préfixés par le nom de pipeline).
+    C'est essentiel pour le cas où plusieurs pipelines utilisent
+    la **même classe** avec des **kwargs différents** (ex :
+    ``PrecomputedTextAdapter`` instancié N fois avec
+    ``source_label`` distincts).
+    """
+    instance_cache: dict[str, Any] = {}
+    registered: dict[str, tuple[type, str]] = {}
+    name_to_class: dict[str, type] = {}
+    name_to_kwargs: dict[str, dict[str, Any]] = {}
+    pipeline_specs: list[PipelineSpec] = []
+    for p in spec.pipelines:
+        steps: list[PipelineStep] = []
+        for s in p.steps:
+            cls = resolve_adapter_class(s.adapter_class)
+            kwargs_sig = _kwargs_signature(s.adapter_kwargs)
+            adapter_name = s.id
+            existing = registered.get(adapter_name)
+            if existing is not None and existing != (cls, kwargs_sig):
+                adapter_name = f"{p.name}__{s.id}"
+            registered[adapter_name] = (cls, kwargs_sig)
+            name_to_class[adapter_name] = cls
+            name_to_kwargs[adapter_name] = s.adapter_kwargs
+            # ``inputs_from`` du StepSpec YAML doit être propagé au
+            # ``domain.PipelineSpec`` pour que le DAG branchant soit
+            # honoré ; sans ce passage, un DAG branchant déclaré dans
+            # le YAML serait silencieusement exécuté en linéaire.
+            steps.append(PipelineStep(
+                id=s.id,
+                kind="step",
+                adapter_name=adapter_name,
+                input_types=s.input_types,
+                output_types=s.output_types,
+                inputs_from=dict(s.inputs_from),
+            ))
+        pipeline_specs.append(PipelineSpec(
+            name=p.name,
+            initial_inputs=p.initial_inputs,
+            steps=tuple(steps),
+        ))
+    def resolver(name: str) -> Any:
+        if name not in instance_cache:
+            cls = name_to_class[name]
+            kwargs = name_to_kwargs[name]
+            instance_cache[name] = cls(**kwargs)
+        return instance_cache[name]
+    # Copie défensive — le manifest doit recevoir un snapshot
+    # immuable, pas la map vivante du resolver.
+    adapter_kwargs_dump = {
+        name: dict(kwargs) for name, kwargs in name_to_kwargs.items()
+    }
+    return pipeline_specs, resolver, adapter_kwargs_dump
+def _build_views(
+    view_names: tuple[str, ...],
+    *,
+    normalization_profile: str | None = None,
+    char_exclude: str | None = None,
+) -> list[Any]:
+    """Map noms canoniques → vues construites.
+    Phase B2.5 — ``normalization_profile`` et ``char_exclude``
+    sont propagés aux vues qui les supportent (``text_final`` et
+    ``searchability``).  ``alto_documentary`` les ignore : ses
+    métriques structurelles n'opèrent pas sur du texte.
+    """
+    text_view_kwargs = {
+        "normalization_profile": normalization_profile,
+        "char_exclude": char_exclude,
+    }
+    builders: dict[str, Callable[[], Any]] = {
+        "text_final": lambda: build_text_view(**text_view_kwargs),
+        "alto_documentary": build_alto_view,
+        "searchability": lambda: build_search_view(**text_view_kwargs),
+    }
+    return [builders[name]() for name in view_names]
+def _build_benchmark_service(
+    *,
+    registries: RegistryService,
+    adapter_resolver: Callable[[str], Any],
+    code_version: str,
+    cancel_event: threading.Event | None = None,
+    timeout_seconds_per_doc: float = 300.0,
+) -> BenchmarkService:
+    """Assemble ``BenchmarkService`` avec un loader filesystem.
+    Phase B2.2 — quand ``cancel_event`` est fourni, le
+    ``CorpusRunner.run`` est wrappé pour injecter l'event dans
+    chaque appel.  Pattern strictement copié de
+    ``_benchmark_execution.py:142-149`` (legacy).
+    """
+    pipeline_executor = PipelineExecutor(
+        adapter_resolver=adapter_resolver,
+    )
+    corpus_runner = CorpusRunner(
+        pipeline_executor,
+        max_in_flight=2,
+        timeout_seconds_per_doc=timeout_seconds_per_doc,
+        poll_interval_seconds=0.05,
+    )
+    if cancel_event is not None:
+        original_run = corpus_runner.run
+        def _runner_run_with_cancel(*args: Any, **kwargs: Any) -> Any:
+            kwargs.setdefault("cancel_event", cancel_event)
+            return original_run(*args, **kwargs)
+        corpus_runner.run = _runner_run_with_cancel  # type: ignore[method-assign]
+    view_executor = DefaultEvaluationViewExecutor.from_registries(
+        registries.metrics,
+        registries.projectors,
+        _filesystem_payload_loader,
+    )
+    return BenchmarkService(
+        corpus_runner=corpus_runner,
+        view_executor=view_executor,
+        code_version=code_version,
+    )
+__all__ = [
+    "_build_benchmark_service",
+    "_build_pipelines",
+    "_build_views",
+    "_load_corpus",
+]

tests/app/services/test_sprint_a14_s53_inputs_from_propagation.py CHANGED Viewed

@@ -59,7 +59,7 @@ def test_orchestrator_propagates_inputs_from_to_pipeline_step(
     # inputs_from.
     from unittest.mock import MagicMock, patch
     with patch(
-        "picarones.app.services.run_orchestrator.resolve_adapter_class",
         return_value=MagicMock,
     ):
         pipeline_specs, _resolver, _kwargs = orch._build_pipelines(spec)
@@ -95,7 +95,7 @@ def test_step_without_inputs_from_yields_empty_dict(tmp_path) -> None:
     orch = RunOrchestrator(output_dir=tmp_path / "out")
     from unittest.mock import MagicMock, patch
     with patch(
-        "picarones.app.services.run_orchestrator.resolve_adapter_class",
         return_value=MagicMock,
     ):
         pipeline_specs, _, _ = orch._build_pipelines(spec)

     # inputs_from.
     from unittest.mock import MagicMock, patch
     with patch(
+        "picarones.app.services.run_orchestrator_helpers.builders.resolve_adapter_class",
         return_value=MagicMock,
     ):
         pipeline_specs, _resolver, _kwargs = orch._build_pipelines(spec)
     orch = RunOrchestrator(output_dir=tmp_path / "out")
     from unittest.mock import MagicMock, patch
     with patch(
+        "picarones.app.services.run_orchestrator_helpers.builders.resolve_adapter_class",
         return_value=MagicMock,
     ):
         pipeline_specs, _, _ = orch._build_pipelines(spec)

tests/architecture/test_file_budgets.py CHANGED Viewed

@@ -124,9 +124,12 @@ FILE_BUDGETS: dict[str, int] = {
     "picarones/app/services/corpus_service.py": 625,      # actuel 541
     "picarones/app/services/path_security.py": 470,       # actuel 410
     # Audit prod P1 — dégonflage du god-module : helpers extraits, puis
-    # (P1.1) ``run_orchestrator_helpers`` éclaté en sous-package cohésif
-    # (factories/loaders/legacy, chacun < 400 : pas d'entrée budget).
-    "picarones/app/services/run_orchestrator.py": 1050,   # actuel ~913
     "picarones/adapters/ocr/tesseract.py": 560,          # actuel 479 — Phase B5 migration Option B (+ ALTO_XML expose)
     "picarones/app/schemas/run_spec.py": 620,             # actuel 530 — Phase B1 migration Option B (+90 LOC : 7 nouveaux champs + 2 validators)
     "picarones/reports/html/render.py": 700,           # actuel 615

     "picarones/app/services/corpus_service.py": 625,      # actuel 541
     "picarones/app/services/path_security.py": 470,       # actuel 410
     # Audit prod P1 — dégonflage du god-module : helpers extraits, puis
+    # (P1.1) ``run_orchestrator_helpers`` éclaté en sous-package, puis
+    # (Phase A) 4 builders @staticmethod extraits → builders.py.
+    # Budget RATCHETÉ VERS LE BAS (731 + ~6 %, pas +15 % paresseux) :
+    # signale l'intention de poursuivre vers <500 (Phase B), n'entérine
+    # pas la taille actuelle comme acceptable.
+    "picarones/app/services/run_orchestrator.py": 775,   # actuel ~731
     "picarones/adapters/ocr/tesseract.py": 560,          # actuel 479 — Phase B5 migration Option B (+ ALTO_XML expose)
     "picarones/app/schemas/run_spec.py": 620,             # actuel 530 — Phase B1 migration Option B (+90 LOC : 7 nouveaux champs + 2 validators)
     "picarones/reports/html/render.py": 700,           # actuel 615