B_Asistant / app.py
Asem75's picture
Update app.py
7284cb3 verified
Raw
History Blame Contribute Delete
2.6 kB
import gradio as gr
import easyocr
import fitz
import docx
import os
import numpy as np
from PIL import Image
# تحميل المحرك
reader = easyocr.Reader(['ar', 'en'], gpu=False)
def process_raw_ocr(file_objs, img_input, current_text):
text_output = ""
# 1. معالجة الملفات
if file_objs:
for file in file_objs:
ext = os.path.splitext(file.name)[1].lower()
# إضافة دعم الصور (JPG) فقط
if ext in [".jpg", ".jpeg", ".png"]:
res = reader.readtext(file.name, detail=0, paragraph=True)
text_output += "\n".join(res) + "\n"
elif ext == ".pdf":
doc = fitz.open(file.name)
for page in doc:
# الدقة الأصلية (Matrix 2)
pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
img.save("temp.png")
res = reader.readtext("temp.png", detail=0, paragraph=True)
text_output += "\n".join(res) + "\n"
doc.close()
elif ext == ".docx":
d = docx.Document(file.name)
text_output += "\n".join([p.text for p in d.paragraphs]) + "\n"
elif ext == ".txt":
with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
text_output += f.read() + "\n"
# 2. معالجة الصور/الكاميرا
if img_input is not None:
img_pil = Image.fromarray(img_input.astype('uint8')) if isinstance(img_input, np.ndarray) else img_input
img_pil.save("temp_c.png")
res = reader.readtext("temp_c.png", detail=0, paragraph=True)
text_output += "\n".join(res) + "\n"
# تنظيف الملفات المؤقتة
for f in ["temp.png", "temp_c.png"]:
if os.path.exists(f): os.remove(f)
return f"{current_text}\n\n{text_output}".strip()
with gr.Blocks() as demo:
gr.Markdown("# 🔍 Raw OCR Engine (نسخة خام)")
with gr.Row():
with gr.Column():
f_in = gr.File(label="الملفات", file_count="multiple")
i_in = gr.Image(label="الكاميرا")
btn = gr.Button("استخراج النص الخام", variant="primary")
with gr.Column():
out = gr.Textbox(label="النص المستخرج كما هو", lines=25)
btn.click(process_raw_ocr, inputs=[f_in, i_in, out], outputs=[out])
demo.launch()