Spaces:
Sleeping
Sleeping
File size: 4,967 Bytes
ea4c81b bb31829 ea4c81b bb31829 ea4c81b | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 | """Adaptateur OCR — Google Cloud Vision API.
Utilise l'API Google Cloud Vision pour la détection de texte dans des
documents (méthode ``DOCUMENT_TEXT_DETECTION``, optimisée pour les textes
denses et multilinguistiques).
Authentification :
- Via service account JSON : variable d'environnement
``GOOGLE_APPLICATION_CREDENTIALS`` → chemin vers le fichier JSON
- Via clé API simple : variable d'environnement ``GOOGLE_API_KEY``
Le mode service account est recommandé pour la production.
"""
from __future__ import annotations
import base64
import json
import os
import urllib.error
import urllib.request
from pathlib import Path
from typing import Optional
from picarones.engines.base import BaseOCREngine
class GoogleVisionEngine(BaseOCREngine):
"""Moteur OCR via l'API Google Cloud Vision.
Configuration
-------------
language_hints : list[str]
Suggestions de langue (ex : ``["fr"]``). Améliore la précision.
feature_type : str
Type de détection : ``"DOCUMENT_TEXT_DETECTION"`` (défaut, pour textes
denses) ou ``"TEXT_DETECTION"`` (pour textes courts).
"""
@property
def name(self) -> str:
return "google_vision"
def version(self) -> str:
return "v1"
def __init__(self, config: Optional[dict] = None) -> None:
super().__init__(config)
self._api_key = os.environ.get("GOOGLE_API_KEY")
self._credentials_path = os.environ.get("GOOGLE_APPLICATION_CREDENTIALS")
self._language_hints: list[str] = self.config.get("language_hints", ["fr"])
self._feature_type: str = self.config.get("feature_type", "DOCUMENT_TEXT_DETECTION")
def _run_ocr(self, image_path: Path) -> str:
# Priorité : SDK google-cloud-vision si disponible, sinon REST direct
if self._credentials_path:
return self._run_via_sdk(image_path)
elif self._api_key:
return self._run_via_rest(image_path)
else:
raise RuntimeError(
"Authentification Google Vision manquante. Définissez "
"GOOGLE_APPLICATION_CREDENTIALS (service account JSON) "
"ou GOOGLE_API_KEY."
)
def _run_via_sdk(self, image_path: Path) -> str:
try:
from google.cloud import vision
except ImportError as exc:
raise RuntimeError(
"Le package 'google-cloud-vision' n'est pas installé. "
"Lancez : pip install google-cloud-vision"
) from exc
client = vision.ImageAnnotatorClient()
image_bytes = image_path.read_bytes()
image = vision.Image(content=image_bytes)
if self._feature_type == "DOCUMENT_TEXT_DETECTION":
response = client.document_text_detection(
image=image,
image_context=vision.ImageContext(
language_hints=self._language_hints
),
)
return response.full_text_annotation.text
else:
response = client.text_detection(
image=image,
image_context=vision.ImageContext(
language_hints=self._language_hints
),
)
texts = response.text_annotations
return texts[0].description if texts else ""
def _run_via_rest(self, image_path: Path) -> str:
"""Appel REST direct (sans SDK), avec clé API simple."""
image_b64 = base64.b64encode(image_path.read_bytes()).decode("ascii")
payload = {
"requests": [
{
"image": {"content": image_b64},
"features": [{"type": self._feature_type, "maxResults": 1}],
"imageContext": {"languageHints": self._language_hints},
}
]
}
url = "https://vision.googleapis.com/v1/images:annotate"
data = json.dumps(payload).encode("utf-8")
req = urllib.request.Request(
url, data=data,
headers={
"Content-Type": "application/json",
"X-Goog-Api-Key": self._api_key,
},
)
try:
with urllib.request.urlopen(req, timeout=60) as resp:
result = json.loads(resp.read().decode("utf-8"))
except urllib.error.HTTPError as exc:
raise RuntimeError(f"Google Vision API erreur {exc.code}: {exc.read().decode()}") from exc
responses = result.get("responses", [{}])
if not responses:
return ""
r = responses[0]
if "error" in r:
raise RuntimeError(f"Google Vision API erreur : {r['error']}")
if self._feature_type == "DOCUMENT_TEXT_DETECTION":
return r.get("fullTextAnnotation", {}).get("text", "")
else:
texts = r.get("textAnnotations", [])
return texts[0]["description"] if texts else ""
|