jmcinern commited on
Commit
a9f0d06
·
verified ·
1 Parent(s): b5eb228

Update app.py

Browse files

set based on tokenizer ID and not just text

Files changed (1) hide show
  1. app.py +6 -2
app.py CHANGED
@@ -68,7 +68,10 @@ class ChatBot:
68
 
69
  # Tokenize
70
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
71
-
 
 
 
72
  # Generate response
73
  with torch.no_grad():
74
  outputs = self.model.generate(
@@ -77,7 +80,8 @@ class ChatBot:
77
  temperature=0.6,
78
  do_sample=True,
79
  return_dict_in_generate=True,
80
- pad_token_id=self.tokenizer.eos_token_id
 
81
  )
82
 
83
  # Decode and clean response, with multiple debugs
 
68
 
69
  # Tokenize
70
  inputs = self.tokenizer(prompt, return_tensors="pt").to(self.model.device)
71
+
72
+ # Set EOS seen in training (per Qwen chat template)
73
+ stop_id = tokenizer.convert_tokens_to_ids("<|im_end|>")
74
+
75
  # Generate response
76
  with torch.no_grad():
77
  outputs = self.model.generate(
 
80
  temperature=0.6,
81
  do_sample=True,
82
  return_dict_in_generate=True,
83
+ pad_token_id=self.tokenizer.eos_token_id,
84
+ eos_token_id=stop_id
85
  )
86
 
87
  # Decode and clean response, with multiple debugs