import torch from transformers import AutoTokenizer from optimum.intel.openvino import OVModelForCausalLM class OpenVinoPipeline: def __init__(self, model, tokenizer): self.model = model self.tokenizer = tokenizer def get_openvino_pipeline(model_name: str): """ Loads an OpenVINO CausalLM pipeline for the given model name or IR directory. """ # If model_name is a directory, try to load IR from there; else, download and export import os if os.path.isdir(model_name): model = OVModelForCausalLM.from_pretrained(model_name, compile=True, device="CPU", cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface')) tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface')) else: model = OVModelForCausalLM.from_pretrained(model_name, export=False, compile=False, device="CPU", cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface')) tokenizer = AutoTokenizer.from_pretrained(model_name, use_fast=True, cache_dir=os.environ.get('HF_HOME', '/tmp/huggingface')) return OpenVinoPipeline(model, tokenizer)