from llama_index.llms.huggingface_api import HuggingFaceInferenceAPI import os from dotenv import load_dotenv # Load the .env file load_dotenv() # Retrieve HF_TOKEN from the environment variables hf_token = os.getenv("HF_TOKEN") gemini_api_key = os.getenv("GEMINI_API_KEY"); model = LiteLLMModel( model_id="gemini/gemini-2.5-flash", temperature=0.2 ) agent = CodeAgent( tools=[query_market_asset], model=model, max_steps=5 ) llm = HuggingFaceInferenceAPI( model_name="Qwen/Qwen2.5-Coder-32B-Instruct", temperature=0.7, max_tokens=100, token=hf_token, provider="auto" ) response = llm.complete("Hello, how are you?") print(response) # I am good, how can I help you today?