Text Generation
Safetensors
NemotronH_Nano_Omni_Reasoning_V3
nvidia
unsloth
nemotron-3
multimodal
conversational
custom_code
Instructions to use unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Local Apps Settings
- Unsloth Studio
How to use unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning with Unsloth Studio:
Install Unsloth Studio (macOS, Linux, WSL)
curl -fsSL https://unsloth.ai/install.sh | sh # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning to start chatting
Install Unsloth Studio (Windows)
irm https://unsloth.ai/install.ps1 | iex # Run unsloth studio unsloth studio -H 0.0.0.0 -p 8888 # Then open http://localhost:8888 in your browser # Search for unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning to start chatting
Using HuggingFace Spaces for Unsloth
# No setup required # Open https://huggingface.co/spaces/unsloth/studio in your browser # Search for unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning to start chatting
Load model with FastModel
pip install unsloth from unsloth import FastModel model, tokenizer = FastModel.from_pretrained( model_name="unsloth/NVIDIA-Nemotron-3-Nano-Omni-30B-A3B-Reasoning", max_seq_length=2048, )
File size: 3,738 Bytes
1cc1a90 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 | import torch
from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM, AutoImageProcessor, AutoProcessor
from PIL import Image
from pathlib import Path
model_path = "."
device = "cuda:0"
model = AutoModelForCausalLM.from_pretrained(model_path, trust_remote_code=True, device_map=device, torch_dtype=torch.bfloat16).eval()
tokenizer = AutoTokenizer.from_pretrained(model_path)
config = AutoConfig.from_pretrained(model_path, trust_remote_code=True)
image_processor = AutoImageProcessor.from_pretrained(model_path, trust_remote_code=True)
processor = AutoProcessor.from_pretrained(model_path, trust_remote_code=True)
generation_config = dict(max_new_tokens=1024, do_sample=False, eos_token_id=tokenizer.eos_token_id)
img_lst = [
"images/example1a.jpeg",
"images/example1b.jpeg",
"images/table.png",
"images/tech.png",
]
print("="*50)
print("Text-only test")
print("="*50)
messages = [
{"role": "system", "content": "/no_think"},
{"role": "user", "content": [{"type": "text", "text": "Write a short haiku about the moon."}]},
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
text_inputs = tokenizer([prompt], return_tensors="pt").to(device)
text_outputs = model.generate(
input_ids=text_inputs.input_ids,
attention_mask=text_inputs.attention_mask,
max_new_tokens=64,
)
print(tokenizer.batch_decode(text_outputs[:, text_inputs.input_ids.shape[1]:], skip_special_tokens=True)[0])
print("="*50)
print("Test single image")
print("="*50)
for idx, img_path in enumerate(img_lst):
images = [Image.open(img_lst[idx])]
messages = [
{"role": "system", "content": "/no_think"},
{
"role": "user",
"content": [
{
"type": "text",
"text": "<image>\nDescribe the image.",
},
],
}
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(
text=[prompt],
images=[Image.open(img_lst[idx])],
return_tensors="pt",
)
inputs = inputs.to(device)
# Inference: Generation of the output
generated_ids = model.generate(
pixel_values=inputs.pixel_values,
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=128,
)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(f"Prompt: {prompt}\nOutput: {output_text[0]}\n\n\n")
print("="*50)
print("Test multi-images")
print("="*50)
multi_img_lst = [
"images/example1a.jpeg",
"images/example1b.jpeg",
]
images = [Image.open(p) for p in multi_img_lst]
messages = [
{"role": "system", "content": "/no_think"},
{"role": "user", "content": "Image-1: <image>\nImage-2: <image>\nDescribe the two images in detail."},
]
prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
inputs = processor(
text=[prompt],
images=images,
return_tensors="pt",
)
inputs = inputs.to(device)
generated_ids = model.generate(
pixel_values=inputs.pixel_values,
input_ids=inputs.input_ids,
attention_mask=inputs.attention_mask,
max_new_tokens=1024,
)
generated_ids_trimmed = [
out_ids[len(in_ids) :] for in_ids, out_ids in zip(inputs.input_ids, generated_ids)
]
output_text = processor.batch_decode(
generated_ids_trimmed, skip_special_tokens=True, clean_up_tokenization_spaces=False
)
print(f"Output: {output_text[0]}\n") |