cluster_args: cluster_description: null cluster_method: agglomerative compression_ratio: 0.3 expert_sim: ttm frequency_penalty: true linkage_method: average max_cluster_size: null multi_layer: null num_clusters: null singleton_outlier_experts: false singleton_super_experts: false softmax_temperature: null ds_args: dataset_config_name: null dataset_name: theblackcat102/evol-codealpaca-v1:171,Salesforce/xlam-function-calling-60k:171,open-r1/Mixture-of-Thoughts[code]:171,open-r1/Mixture-of-Thoughts[math]:171,open-r1/Mixture-of-Thoughts[science]:170,SWE-bench/SWE-smith-trajectories(tool):170 dataset_test_split: test shuffle: true split: train eval_args: evalplus_tasks: - mbpp - humaneval greedy: true lm_eval_tasks: - winogrande - arc_challenge - arc_easy - boolq - hellaswag - mmlu - openbookqa - rte min_p: 0.0 parallel_tasks: 32 results_dir: null run_evalplus: true run_livecodebench: true run_lm_eval: true run_math: false run_wildbench: false server_log_file_name: server.log temperature: 0.7 top_k: 20 top_p: 0.8 use_server: true vllm_port: 8000 model_args: model_name: /disk1/rongxiao/hf_cache/hub/models--Qwen--Qwen3.6-35B-A3B/snapshots/995ad96eacd98c81ed38be0c5b274b04031597b0 num_experts_per_tok_override: null obs_args: batch_size: 1 batches_per_category: 1024 distance_measure: angular model_max_length: 2048 output_file_name: observations_qwen36_pre_reap_bnb4_paper_1024_2048_standard_streaming-pre_reap-bnb_4bit-nf4-bfloat16-dq_true.pt overwrite_observations: false record_pruning_metrics_only: true renormalize_router_weights: true return_vllm_tokens_prompt: false select_only_categories: null split_by_category: false truncate: false pre_reap_quant_args: pre_reap_bnb_4bit_compute_dtype: bfloat16 pre_reap_bnb_4bit_quant_type: nf4 pre_reap_bnb_4bit_use_double_quant: true pre_reap_quantization_method: bnb_4bit prune_args: n_experts_to_prune: null overwrite_pruned_model: true perserve_outliers: false perserve_super_experts: false prune_method: reap reap_args: debug: false do_eval: false plot_clusters: true profile: false run_observer_only: false seed: 42 smoke_test: false