STEM_Code / nemo-evaluator-launcher-configs /local_nvidia-nemotron-3-nano-30b-a3b-base.yaml

SuperQAI2050

Duplicate from nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16

2c83b69 2 months ago

3.02 kB

	# SPDX-FileCopyrightText: Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
	# SPDX-License-Identifier: Apache-2.0
	#
	# Licensed under the Apache License, Version 2.0 (the "License");
	# you may not use this file except in compliance with the License.
	# You may obtain a copy of the License at
	#
	# http://www.apache.org/licenses/LICENSE-2.0
	#
	# Unless required by applicable law or agreed to in writing, software
	# distributed under the License is distributed on an "AS IS" BASIS,
	# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
	# See the License for the specific language governing permissions and
	# limitations under the License.
	#
	#
	# How to use:
	#
	# 1. copy this file locally or clone the repository
	# 2. (optional) uncomment limit_samples in the config file to run with 10 samples for quick testing
	# 3. export your HF token in the terminal (some benchmark datasets might be gated)
	# 4. run `nemo-evaluator-launcher run --config path/to/local_nvidia-nemotron-nano-3-30b-a3b-base.yaml`
	#
	# ⚠️ WARNING:
	# Always run full evaluations (without limit_samples) for actual benchmark results.
	# Using a subset of samples is solely for testing configuration and setup.
	# Results from such test runs should NEVER be used to compare models or
	# report benchmark performance.
	defaults:
	- execution: local
	- deployment: vllm
	- _self_

	execution:
	output_dir: NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
	# mode: sequential # enables sequential execution

	# specify deployment arguments
	deployment:
	image: vllm/vllm-openai:v0.12.0
	checkpoint_path: null
	hf_model_handle: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
	served_model_name: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
	tensor_parallel_size: 1
	data_parallel_size: 1
	extra_args: "--max-model-len 262144 --mamba_ssm_cache_dtype float32 --no-enable-prefix-caching"

	# specify the benchmarks to evaluate
	evaluation:
	env_vars:
	HF_TOKEN: HF_TOKEN
	nemo_evaluator_config: # global config settings that apply to all tasks
	config:
	params:
	max_retries: 5 # number of retries for API requests
	request_timeout: 360 # timeout for API requests in seconds
	parallelism: 4 # number of parallel requests
	# limit_samples: 10 # uncomment to limit number of samples for quick testing
	extra:
	tokenizer: nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-Base-BF16
	tokenizer_backend: huggingface
	tasks:
	- name: adlr_mmlu_pro_5_shot_base
	- name: adlr_mmlu
	- name: adlr_agieval_en_cot
	- name: adlr_humaneval_greedy
	- name: adlr_mbpp_sanitized_3_shot_greedy
	- name: adlr_gsm8k_cot_8_shot
	- name: adlr_minerva_math_nemo_4_shot
	- name: adlr_math_500_4_shot_sampled
	- name: adlr_arc_challenge_llama_25_shot
	- name: hellaswag
	- name: openbookqa
	- name: piqa
	- name: adlr_race
	- name: adlr_winogrande_5_shot
	- name: adlr_global_mmlu_lite_5_shot
	- name: adlr_mgsm_native_cot_8_shot