Spaces:

chi-vi
/

hirashiba-mt-zh-vi

Sleeping

App Files Files Community

Moleys commited on Feb 25, 2025

Commit

04aeedf

verified ·

1 Parent(s): 172828d

Update app.py

Browse files

Files changed (1) hide show

app.py +29 -6

app.py CHANGED Viewed

@@ -1,13 +1,36 @@
 import gradio as gr
-from transformers import pipeline
-# Load translation pipeline
-pipe = pipeline("translation", model="chi-vi/hirashiba-mt-tiny-zh-vi")
 def translate_text(input_text):
     lines = input_text.split('\n')  # Tách từng dòng
-    translated_lines = [pipe(line, max_length=512)[0]['translation_text'] if line.strip() else '' for line in lines]
-    return '\n'.join(translated_lines)  # Gộp lại với xuống dòng
 if __name__ == '__main__':
     with gr.Blocks() as app:
@@ -18,7 +41,7 @@ if __name__ == '__main__':
                 input_text = gr.Textbox(label='Input Chinese Text', lines=5, placeholder='Enter Chinese text here...')
                 translate_button = gr.Button('Translate')
                 output_text = gr.Textbox(label='Output Vietnamese Text', lines=5, interactive=False)
         translate_button.click(
             fn=translate_text,
             inputs=input_text,

+import torch
+from transformers import AutoModelForSeq2SeqLM, AutoTokenizer
 import gradio as gr
+# Load model và tokenizer
+model_name = "chi-vi/hirashiba-mt-tiny-zh-vi"
+device = "cuda" if torch.cuda.is_available() else "cpu"
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to(device)
 def translate_text(input_text):
     lines = input_text.split('\n')  # Tách từng dòng
+    translated_lines = []
+    for line in lines:
+        raw_text = line.strip()
+        if not raw_text:
+            translated_lines.append('')  # Giữ dòng trống
+            continue
+        # Tokenize input
+        inputs = tokenizer(raw_text, return_tensors="pt", padding=True, truncation=True).to(device)
+        # Dịch với mô hình (không cần tính gradient)
+        with torch.no_grad():
+            output_tokens = model.generate(**inputs, max_length=512)
+        # Giải mã kết quả
+        translated_text = tokenizer.decode(output_tokens[0], skip_special_tokens=True)
+        translated_lines.append(translated_text)
+    return '\n'.join(translated_lines)
 if __name__ == '__main__':
     with gr.Blocks() as app:
                 input_text = gr.Textbox(label='Input Chinese Text', lines=5, placeholder='Enter Chinese text here...')
                 translate_button = gr.Button('Translate')
                 output_text = gr.Textbox(label='Output Vietnamese Text', lines=5, interactive=False)
         translate_button.click(
             fn=translate_text,
             inputs=input_text,