import gradio as gr from transformers import AutoTokenizer, AutoModelForSeq2SeqLM import fasttext import urllib.request import os MODEL_PATH = "lid.176.ftz" if not os.path.exists(MODEL_PATH): urllib.request.urlretrieve( "https://dl.fbaipublicfiles.com/fasttext/supervised-models/lid.176.ftz", MODEL_PATH ) lang_model = fasttext.load_model(MODEL_PATH) def detect_language(text): text = text.strip().lower() # tiny Kinyarwanda keyword safety net rw_keywords = ["muraho", "amakuru", "neza", "murakoze", "yego", "oya"] if any(word in text for word in rw_keywords): return "rw" pred = lang_model.predict(text.replace("\n", " ")) return pred[0][0].replace("__label__", "") def auto_translate(text): lang = detect_language(text) if lang == "rw": return rw_to_en(text) if lang == "en": return text, lang return f"Detected '{lang}'. Only English and Kinyarwanda supported." MODEL_ID = "mbazaNLP/Nllb_finetuned_general_en_kin" print("Loading NLLB model once… please wait.") tokenizer = AutoTokenizer.from_pretrained(MODEL_ID) model = AutoModelForSeq2SeqLM.from_pretrained(MODEL_ID) def translate(text, src_lang, tgt_lang): tokenizer.src_lang = src_lang inputs = tokenizer(text, return_tensors="pt") tokens = model.generate( **inputs, forced_bos_token_id=tokenizer.convert_tokens_to_ids(tgt_lang), max_length=200, repetition_penalty=1.2, length_penalty=1.0, do_sample=False, num_beams=1 ) return tokenizer.batch_decode(tokens, skip_special_tokens=True)[0] # Function 1: Kinyarwanda → English def rw_to_en(text): return translate(text, "kin_Latn", "eng_Latn") # Function 2: English → Kinyarwanda def en_to_rw(text): return translate(text, "eng_Latn", "kin_Latn") app = gr.TabbedInterface( [ gr.Interface(fn=rw_to_en, inputs="text", outputs="text", title="Kiny → English"), gr.Interface(fn=en_to_rw, inputs="text", outputs="text", title="English → Kiny"), gr.Interface(fn=auto_translate, inputs="text", outputs="text", title="Auto Detect"), ], tab_names=["Kiny → English", "English → Kiny", "Auto Detect"] ) app.launch()