Update README.md
Browse files
README.md
CHANGED
|
@@ -65,7 +65,7 @@ sampling_params = SamplingParams(max_tokens=8192)
|
|
| 65 |
# If you want to divide the GPU requirement over multiple devices, please add *e.g.* `tensor_parallel=2`
|
| 66 |
llm = LLM(model=model_name, tokenizer_mode="mistral", config_format="mistral", load_format="mistral")
|
| 67 |
|
| 68 |
-
prompt = "How
|
| 69 |
|
| 70 |
messages = [
|
| 71 |
{
|
|
@@ -105,7 +105,7 @@ curl --location 'http://<your-node-url>:8000/v1/chat/completions' \
|
|
| 105 |
"messages": [
|
| 106 |
{
|
| 107 |
"role": "user",
|
| 108 |
-
"content": "How
|
| 109 |
}
|
| 110 |
]
|
| 111 |
}'
|
|
@@ -159,7 +159,7 @@ from mistral_common.protocol.instruct.request import ChatCompletionRequest
|
|
| 159 |
tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
|
| 160 |
model = Transformer.from_folder(mistral_models_path)
|
| 161 |
|
| 162 |
-
completion_request = ChatCompletionRequest(messages=[UserMessage(content="How
|
| 163 |
|
| 164 |
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
| 165 |
|
|
|
|
| 65 |
# If you want to divide the GPU requirement over multiple devices, please add *e.g.* `tensor_parallel=2`
|
| 66 |
llm = LLM(model=model_name, tokenizer_mode="mistral", config_format="mistral", load_format="mistral")
|
| 67 |
|
| 68 |
+
prompt = "How often does the letter r occur in Mistral?"
|
| 69 |
|
| 70 |
messages = [
|
| 71 |
{
|
|
|
|
| 105 |
"messages": [
|
| 106 |
{
|
| 107 |
"role": "user",
|
| 108 |
+
"content": "How often does the letter r occur in Mistral?"
|
| 109 |
}
|
| 110 |
]
|
| 111 |
}'
|
|
|
|
| 159 |
tokenizer = MistralTokenizer.from_file(f"{mistral_models_path}/tokenizer.model.v3")
|
| 160 |
model = Transformer.from_folder(mistral_models_path)
|
| 161 |
|
| 162 |
+
completion_request = ChatCompletionRequest(messages=[UserMessage(content="How often does the letter r occur in Mistral?")])
|
| 163 |
|
| 164 |
tokens = tokenizer.encode_chat_completion(completion_request).tokens
|
| 165 |
|