Spaces:

Asem75
/

B_Asistant

Sleeping

App Files Files Community

B_Asistant / app.py

Asem75

Update app.py

7284cb3 verified about 2 months ago

Raw

History Blame Contribute Delete

2.6 kB

	import gradio as gr
	import easyocr
	import fitz
	import docx
	import os
	import numpy as np
	from PIL import Image

	# تحميل المحرك
	reader = easyocr.Reader(['ar', 'en'], gpu=False)

	def process_raw_ocr(file_objs, img_input, current_text):
	text_output = ""

	# 1. معالجة الملفات
	if file_objs:
	for file in file_objs:
	ext = os.path.splitext(file.name)[1].lower()

	# إضافة دعم الصور (JPG) فقط
	if ext in [".jpg", ".jpeg", ".png"]:
	res = reader.readtext(file.name, detail=0, paragraph=True)
	text_output += "\n".join(res) + "\n"

	elif ext == ".pdf":
	doc = fitz.open(file.name)
	for page in doc:
	# الدقة الأصلية (Matrix 2)
	pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
	img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
	img.save("temp.png")
	res = reader.readtext("temp.png", detail=0, paragraph=True)
	text_output += "\n".join(res) + "\n"
	doc.close()
	elif ext == ".docx":
	d = docx.Document(file.name)
	text_output += "\n".join([p.text for p in d.paragraphs]) + "\n"
	elif ext == ".txt":
	with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
	text_output += f.read() + "\n"

	# 2. معالجة الصور/الكاميرا
	if img_input is not None:
	img_pil = Image.fromarray(img_input.astype('uint8')) if isinstance(img_input, np.ndarray) else img_input
	img_pil.save("temp_c.png")
	res = reader.readtext("temp_c.png", detail=0, paragraph=True)
	text_output += "\n".join(res) + "\n"

	# تنظيف الملفات المؤقتة
	for f in ["temp.png", "temp_c.png"]:
	if os.path.exists(f): os.remove(f)

	return f"{current_text}\n\n{text_output}".strip()

	with gr.Blocks() as demo:
	gr.Markdown("# 🔍 Raw OCR Engine (نسخة خام)")
	with gr.Row():
	with gr.Column():
	f_in = gr.File(label="الملفات", file_count="multiple")
	i_in = gr.Image(label="الكاميرا")
	btn = gr.Button("استخراج النص الخام", variant="primary")
	with gr.Column():
	out = gr.Textbox(label="النص المستخرج كما هو", lines=25)

	btn.click(process_raw_ocr, inputs=[f_in, i_in, out], outputs=[out])

	demo.launch()