RangerX's picture
Upload pre-REAP bnb4 ratio 0.3 pruned checkpoint
9692517 verified
Raw
History Blame Contribute Delete
2.25 kB
cluster_args:
cluster_description: null
cluster_method: agglomerative
compression_ratio: 0.3
expert_sim: ttm
frequency_penalty: true
linkage_method: average
max_cluster_size: null
multi_layer: null
num_clusters: null
singleton_outlier_experts: false
singleton_super_experts: false
softmax_temperature: null
ds_args:
dataset_config_name: null
dataset_name: theblackcat102/evol-codealpaca-v1:171,Salesforce/xlam-function-calling-60k:171,open-r1/Mixture-of-Thoughts[code]:171,open-r1/Mixture-of-Thoughts[math]:171,open-r1/Mixture-of-Thoughts[science]:170,SWE-bench/SWE-smith-trajectories(tool):170
dataset_test_split: test
shuffle: true
split: train
eval_args:
evalplus_tasks:
- mbpp
- humaneval
greedy: true
lm_eval_tasks:
- winogrande
- arc_challenge
- arc_easy
- boolq
- hellaswag
- mmlu
- openbookqa
- rte
min_p: 0.0
parallel_tasks: 32
results_dir: null
run_evalplus: true
run_livecodebench: true
run_lm_eval: true
run_math: false
run_wildbench: false
server_log_file_name: server.log
temperature: 0.7
top_k: 20
top_p: 0.8
use_server: true
vllm_port: 8000
model_args:
model_name: /disk1/rongxiao/hf_cache/hub/models--Qwen--Qwen3.6-35B-A3B/snapshots/995ad96eacd98c81ed38be0c5b274b04031597b0
num_experts_per_tok_override: null
obs_args:
batch_size: 1
batches_per_category: 1024
distance_measure: angular
model_max_length: 2048
output_file_name: observations_qwen36_pre_reap_bnb4_paper_1024_2048_standard_streaming-pre_reap-bnb_4bit-nf4-bfloat16-dq_true.pt
overwrite_observations: false
record_pruning_metrics_only: true
renormalize_router_weights: true
return_vllm_tokens_prompt: false
select_only_categories: null
split_by_category: false
truncate: false
pre_reap_quant_args:
pre_reap_bnb_4bit_compute_dtype: bfloat16
pre_reap_bnb_4bit_quant_type: nf4
pre_reap_bnb_4bit_use_double_quant: true
pre_reap_quantization_method: bnb_4bit
prune_args:
n_experts_to_prune: null
overwrite_pruned_model: true
perserve_outliers: false
perserve_super_experts: false
prune_method: reap
reap_args:
debug: false
do_eval: false
plot_clusters: true
profile: false
run_observer_only: false
seed: 42
smoke_test: false