import gradio as gr
import easyocr
import fitz
import docx
import os
import numpy as np
from PIL import Image

# تحميل المحرك
reader = easyocr.Reader(['ar', 'en'], gpu=False)

def process_raw_ocr(file_objs, img_input, current_text):
    text_output = ""
    
    # 1. معالجة الملفات
    if file_objs:
        for file in file_objs:
            ext = os.path.splitext(file.name)[1].lower()
            
            # إضافة دعم الصور (JPG) فقط
            if ext in [".jpg", ".jpeg", ".png"]:
                res = reader.readtext(file.name, detail=0, paragraph=True)
                text_output += "\n".join(res) + "\n"
            
            elif ext == ".pdf":
                doc = fitz.open(file.name)
                for page in doc:
                    # الدقة الأصلية (Matrix 2)
                    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
                    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
                    img.save("temp.png")
                    res = reader.readtext("temp.png", detail=0, paragraph=True)
                    text_output += "\n".join(res) + "\n"
                doc.close()
            elif ext == ".docx":
                d = docx.Document(file.name)
                text_output += "\n".join([p.text for p in d.paragraphs]) + "\n"
            elif ext == ".txt":
                with open(file.name, "r", encoding="utf-8", errors="ignore") as f:
                    text_output += f.read() + "\n"

    # 2. معالجة الصور/الكاميرا
    if img_input is not None:
        img_pil = Image.fromarray(img_input.astype('uint8')) if isinstance(img_input, np.ndarray) else img_input
        img_pil.save("temp_c.png")
        res = reader.readtext("temp_c.png", detail=0, paragraph=True)
        text_output += "\n".join(res) + "\n"

    # تنظيف الملفات المؤقتة
    for f in ["temp.png", "temp_c.png"]:
        if os.path.exists(f): os.remove(f)

    return f"{current_text}\n\n{text_output}".strip()

with gr.Blocks() as demo:
    gr.Markdown("# 🔍 Raw OCR Engine (نسخة خام)")
    with gr.Row():
        with gr.Column():
            f_in = gr.File(label="الملفات", file_count="multiple")
            i_in = gr.Image(label="الكاميرا")
            btn = gr.Button("استخراج النص الخام", variant="primary")
        with gr.Column():
            out = gr.Textbox(label="النص المستخرج كما هو", lines=25)

    btn.click(process_raw_ocr, inputs=[f_in, i_in, out], outputs=[out])

demo.launch()