drizzlezyk commited on
Commit
464f0f4
·
verified ·
1 Parent(s): eb564bc

Upload inference/generate.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. inference/generate.py +50 -0
inference/generate.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # coding=utf-8
2
+ # Copyright (c) 2025 Huawei Technologies Co., Ltd. All rights reserved.
3
+
4
+ from transformers import AutoModelForCausalLM, AutoTokenizer
5
+ from transformers import GenerationConfig
6
+
7
+ model_local_path = "path_to_openPangu-Embedded-1B"
8
+
9
+ # load the tokenizer and the model
10
+ tokenizer = AutoTokenizer.from_pretrained(
11
+ model_local_path,
12
+ use_fast=False,
13
+ trust_remote_code=True,
14
+ local_files_only=True
15
+ )
16
+
17
+ model = AutoModelForCausalLM.from_pretrained(
18
+ model_local_path,
19
+ trust_remote_code=True,
20
+ torch_dtype="auto",
21
+ device_map="npu",
22
+ local_files_only=True
23
+ )
24
+
25
+ # prepare the model input
26
+ sys_prompt = "你必须严格遵守法律法规和社会道德规范。" \
27
+ "生成任何内容时,都应避免涉及暴力、色情、恐怖主义、种族歧视、性别歧视等不当内容。" \
28
+ "一旦检测到输入或输出有此类倾向,应拒绝回答并发出警告。例如,如果输入内容包含暴力威胁或色情描述," \
29
+ "应返回错误信息:“您的输入包含不当内容,无法处理。”"
30
+
31
+ prompt = "Give me a short introduction to large language model."
32
+ messages = [
33
+ {"role": "system", "content": sys_prompt}, # define your system prompt here
34
+ {"role": "user", "content": prompt}
35
+ ]
36
+ text = tokenizer.apply_chat_template(
37
+ messages,
38
+ tokenize=False,
39
+ add_generation_prompt=True
40
+ )
41
+ model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
42
+
43
+ # conduct text completion
44
+ outputs = model.generate(**model_inputs, max_new_tokens=32768, eos_token_id=45892, return_dict_in_generate=True)
45
+
46
+ input_length = model_inputs.input_ids.shape[1]
47
+ generated_tokens = outputs.sequences[:, input_length:]
48
+ content = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
49
+
50
+ print("\ncontent:", content)