Text Generation
Transformers
Safetensors
PyTorch
nvidia
STEM_Code / nemo-evaluator-launcher-configs /local_nvidia-nemotron-3-nano-30b-a3b-base.yaml
SuperQAI2050's picture
Duplicate from nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
2c83b69
# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
#
# How to use:
#
# 1. copy this file locally or clone the repository
# 2. (optional) uncomment limit_samples in the config file to run with 10 samples for quick testing
# 3. export your HF token in the terminal (some benchmark datasets might be gated)
# 4. run `nemo-evaluator-launcher run --config path/to/local_nvidia-nemotron-nano-3-30b-a3b-base.yaml`
#
# ⚠️ WARNING:
# Always run full evaluations (without limit_samples) for actual benchmark results.
# Using a subset of samples is solely for testing configuration and setup.
# Results from such test runs should NEVER be used to compare models or
# report benchmark performance.
defaults:
- execution: local
- deployment: vllm
- _self_
execution:
output_dir: NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
# mode: sequential # enables sequential execution
# specify deployment arguments
deployment:
image: vllm/vllm-openai:v0.12.0
checkpoint_path: null
hf_model_handle: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
served_model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
tensor_parallel_size: 1
data_parallel_size: 1
extra_args: "--max-model-len 262144 --mamba_ssm_cache_dtype float32 --no-enable-prefix-caching"
# specify the benchmarks to evaluate
evaluation:
env_vars:
HF_TOKEN: HF_TOKEN
nemo_evaluator_config: # global config settings that apply to all tasks
config:
params:
max_retries: 5 # number of retries for API requests
request_timeout: 360 # timeout for API requests in seconds
parallelism: 4 # number of parallel requests
# limit_samples: 10 # uncomment to limit number of samples for quick testing
extra:
tokenizer: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
tokenizer_backend: huggingface
tasks:
- name: adlr_mmlu_pro_5_shot_base
- name: adlr_mmlu
- name: adlr_agieval_en_cot
- name: adlr_humaneval_greedy
- name: adlr_mbpp_sanitized_3_shot_greedy
- name: adlr_gsm8k_cot_8_shot
- name: adlr_minerva_math_nemo_4_shot
- name: adlr_math_500_4_shot_sampled
- name: adlr_arc_challenge_llama_25_shot
- name: hellaswag
- name: openbookqa
- name: piqa
- name: adlr_race
- name: adlr_winogrande_5_shot
- name: adlr_global_mmlu_lite_5_shot
- name: adlr_mgsm_native_cot_8_shot