translator / app.py
chanceown's picture
Update app.py
7e174f6 verified
import gradio as gr
from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
import fasttext
import urllib.request
import os
MODEL_PATH = "lid.176.ftz"
if not os.path.exists(MODEL_PATH):
urllib.request.urlretrieve(
"https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz",
MODEL_PATH
)
lang_model = fasttext.load_model(MODEL_PATH)
def detect_language(text):
text = text.strip().lower()
# tiny Kinyarwanda keyword safety net
rw_keywords = ["muraho", "amakuru", "neza", "murakoze", "yego", "oya"]
if any(word in text for word in rw_keywords):
return "rw"
pred = lang_model.predict(text.replace("\n", " "))
return pred[0][0].replace("__label__", "")
def auto_translate(text):
lang = detect_language(text)
if lang == "rw":
return rw_to_en(text)
if lang == "en":
return text, lang
return f"Detected '{lang}'. Only English and Kinyarwanda supported."
MODEL_ID = "mbazaNLP/Nllb_finetuned_general_en_kin"
print("Loading NLLB model once… please wait.")
tokenizer = AutoTokenizer.from_pretrained(MODEL_ID)
model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID)
def translate(text, src_lang, tgt_lang):
tokenizer.src_lang = src_lang
inputs = tokenizer(text, return_tensors="pt")
tokens = model.generate(
**inputs,
forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang),
max_length=200,
repetition_penalty=1.2,
length_penalty=1.0,
do_sample=False,
num_beams=1
)
return tokenizer.batch_decode(tokens, skip_special_tokens=True)[0]
# Function 1: Kinyarwanda β†’ English
def rw_to_en(text):
return translate(text, "kin_Latn", "eng_Latn")
# Function 2: English β†’ Kinyarwanda
def en_to_rw(text):
return translate(text, "eng_Latn", "kin_Latn")
app = gr.TabbedInterface(
[
gr.Interface(fn=rw_to_en, inputs="text", outputs="text", title="Kiny β†’ English"),
gr.Interface(fn=en_to_rw, inputs="text", outputs="text", title="English β†’ Kiny"),
gr.Interface(fn=auto_translate, inputs="text", outputs="text", title="Auto Detect"),
],
tab_names=["Kiny β†’ English", "English β†’ Kiny", "Auto Detect"]
)
app.launch()