Instructions to use AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E with Transformers:
# Use a pipeline as a high-level helper # Warning: Pipeline type "translation" is no longer supported in transformers v5. # You must load the model directly (see below) or downgrade to v4.x with: # 'pip install "transformers<5.0.0' from transformers import pipeline pipe = pipeline("translation", model="AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E", dtype="auto") - Notebooks
- Google Colab
- Kaggle
| import argparse | |
| import socket | |
| import json | |
| import requests | |
| import gradio as gr | |
| DEFAULT_LANGUAGES = [ | |
| "English", | |
| "Chinese", | |
| "Japanese", | |
| "Korean", | |
| "French", | |
| "German", | |
| "Spanish", | |
| "Italian", | |
| "Portuguese", | |
| "Russian", | |
| "Arabic", | |
| "Hindi", | |
| "Bengali", | |
| "Thai", | |
| "Vietnamese", | |
| "Indonesian", | |
| "Turkish", | |
| "Polish", | |
| "Dutch", | |
| "Swedish", | |
| "Danish", | |
| "Norwegian", | |
| "Finnish", | |
| "Greek", | |
| "Czech", | |
| "Hungarian", | |
| "Romanian", | |
| "Ukrainian", | |
| "Malay", | |
| "Filipino", | |
| "Urdu", | |
| "Hebrew", | |
| "Persian", | |
| ] | |
| def _get_ipv4_address() -> str: | |
| try: | |
| s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) | |
| s.connect(("8.8.8.8", 80)) | |
| ip = s.getsockname()[0] | |
| s.close() | |
| return ip | |
| except Exception: | |
| return "127.0.0.1" | |
| def build_prompt(source_text: str, target_language: str, use_zh_template: bool) -> str: | |
| if use_zh_template: | |
| return ( | |
| f"将以下文本翻译为{target_language},注意只需要输出翻译后的结果,不要额外解释:\n" | |
| f"{source_text}" | |
| ) | |
| return ( | |
| f"Translate the following segment into {target_language}, without additional explanation.\n" | |
| f"{source_text}" | |
| ) | |
| def create_demo(api_base: str, model_name: str): | |
| def translate_stream( | |
| text, | |
| target_language, | |
| use_zh_template, | |
| temperature, | |
| top_p, | |
| top_k, | |
| repetition_penalty, | |
| max_new_tokens, | |
| ): | |
| if not text or not text.strip(): | |
| yield "" | |
| return | |
| payload = { | |
| "model": model_name, | |
| "messages": [{"role": "user", "content": text.strip()}], | |
| "stream": True, | |
| "temperature": temperature, | |
| "top_p": top_p, | |
| "top_k": int(top_k), | |
| "repetition_penalty": repetition_penalty, | |
| "max_tokens": int(max_new_tokens), | |
| "target_language": target_language, | |
| "use_zh_template": bool(use_zh_template), | |
| } | |
| url = f"{api_base}/v1/chat/completions" | |
| with requests.post(url, json=payload, stream=True, timeout=300) as resp: | |
| resp.raise_for_status() | |
| resp.encoding = "utf-8" | |
| buffer = "" | |
| for raw_line in resp.iter_lines(decode_unicode=False): | |
| if not raw_line: | |
| continue | |
| try: | |
| line = raw_line.decode("utf-8") | |
| except Exception: | |
| line = raw_line.decode("utf-8", errors="replace") | |
| if line.startswith("data: "): | |
| data = line[len("data: "):].strip() | |
| else: | |
| data = line.strip() | |
| if data == "[DONE]": | |
| break | |
| if data: | |
| try: | |
| obj = json.loads(data) | |
| delta = obj.get("choices", [{}])[0].get("delta", {}) | |
| content = delta.get("content", "") | |
| if content: | |
| buffer += content | |
| yield buffer.strip() | |
| except Exception: | |
| continue | |
| with gr.Blocks(title="HY-MT1.5-1.8B_GPTQ_INT4 Multilingual Translation (C++ Backend)") as demo: | |
| gr.Markdown("## HY-MT1.5-1.8B_GPTQ_INT4 Multilingual Translation (C++ Backend)") | |
| with gr.Group(): | |
| input_text = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Please enter the text you want to translate...", | |
| lines=6, | |
| ) | |
| with gr.Group(): | |
| with gr.Row(equal_height=True): | |
| target_language = gr.Dropdown( | |
| choices=DEFAULT_LANGUAGES, | |
| value="English", | |
| label="Target Language", | |
| ) | |
| use_zh_template = gr.Checkbox( | |
| label="Use Chinese Prompt Template", | |
| value=False, | |
| ) | |
| with gr.Group(): | |
| with gr.Row(equal_height=True): | |
| temperature = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.5, | |
| value=0.7, | |
| step=0.05, | |
| label="Temperature", | |
| ) | |
| top_p = gr.Slider( | |
| minimum=0.1, | |
| maximum=1.0, | |
| value=0.6, | |
| step=0.05, | |
| label="Top-p", | |
| ) | |
| top_k = gr.Slider( | |
| minimum=1, | |
| maximum=100, | |
| value=20, | |
| step=1, | |
| label="Top-k", | |
| ) | |
| with gr.Group(): | |
| with gr.Row(equal_height=True): | |
| repetition_penalty = gr.Slider( | |
| minimum=1.0, | |
| maximum=1.5, | |
| value=1.05, | |
| step=0.01, | |
| label="Repetition Penalty", | |
| ) | |
| max_new_tokens = gr.Slider( | |
| minimum=1, | |
| maximum=1024, | |
| value=512, | |
| step=1, | |
| label="Max New Tokens", | |
| ) | |
| translate_btn = gr.Button("Translate", variant="primary") | |
| output_text = gr.Textbox( | |
| label="Translation Result", | |
| lines=6, | |
| interactive=False, | |
| ) | |
| translate_btn.click( | |
| translate_stream, | |
| inputs=[ | |
| input_text, | |
| target_language, | |
| use_zh_template, | |
| temperature, | |
| top_p, | |
| top_k, | |
| repetition_penalty, | |
| max_new_tokens, | |
| ], | |
| outputs=output_text, | |
| ) | |
| return demo | |
| def parse_args(): | |
| parser = argparse.ArgumentParser(description="HY-MT1.5-1.8B_GPTQ_INT4 Gradio Demo (C++ Backend)") | |
| parser.add_argument("--api_base", type=str, default="http://127.0.0.1:8000") | |
| parser.add_argument("--model", type=str, default="AXERA-TECH/HY-MT1.5-1.8B_GPTQ_INT4-AX620E") | |
| parser.add_argument("--server_name", type=str, default="0.0.0.0") | |
| parser.add_argument("--server_port", type=int, default=7860) | |
| return parser.parse_args() | |
| if __name__ == "__main__": | |
| args = parse_args() | |
| app = create_demo(args.api_base, args.model) | |
| ipv4 = _get_ipv4_address() | |
| print(f"* Running on local URL: http://{ipv4}:{args.server_port}") | |
| app.launch(server_name=args.server_name, server_port=args.server_port) | |