--- license: mit --- ## FunctionGemma-270m-ONNX-CPU This is a quantized FP32 model based on X86 CPU FunctionGemma-270m. You can deploy it on your CPU devices. Note: This is unoffical version,just for test and dev. ### Installation ```bash pip install onnxruntime-genai ``` ### Running ```Python import onnxruntime_genai as og import argparse import os import json import time model_folder = {Your FunctionGemma-270m-ONNX-CPU Path} config = og.Config(model_folder) model = og.Model(config) tokenizer = og.Tokenizer(model) tokenizer_stream = tokenizer.create_stream() def get_current_weather(location: str, unit: str = "celsius"): """ Get the current temperature at a location. Args: location: The location to get the temperature for. unit: The unit to return the temperature in. (choices: ["celsius", "fahrenheit"]) """ return 22.0 import json messages_list = [ {"role": "developer", "content": "You are a model that can do function calling with the following functionsdeclaration:get_current_weather{description:Gets the current weather in a given location.,parameters:{properties:{location:{description:The city and state, e.g. \"San Francisco, CA\" or \"Tokyo, JP\",type:STRING},unit:{description:The unit to return the temperature in.,enum:[celsius,fahrenheit],type:STRING}},required:[location],type:OBJECT}}"}, {"role": "user", "content": "Hey, what's the weather in Tokyo right now?"}, ] messages = json.dumps(messages_list) prompt = tokenizer.apply_chat_template(messages=messages, add_generation_prompt=True) print(prompt) params = og.GeneratorParams(model) generator = og.Generator(model, params) input_tokens = tokenizer.encode(prompt) generator.append_tokens(input_tokens) while not generator.is_done(): generator.generate_next_token() new_token = generator.get_next_tokens()[0] print(tokenizer_stream.decode(new_token), end='', flush=True) ```