model = "google/gemma-4-E4B-it" model_commit = "fee6332c1abaafb77f6f9624236c63aa2f1d0187" dtypes = [ "auto", "float16", "bfloat16", "float32", ] quantization = "none" device_map = "auto" offload_outputs_to_cpu = true batch_size = 128 max_response_length = 100 response_prefix = "" kl_divergence_scale = 1.0 kl_divergence_target = 0.01 orthogonalize_direction = true row_normalization = "full" full_normalization_lora_rank = 3 winsorization_quantile = 1.0 n_trials = 1200 n_startup_trials = 60 seed = 3568977599 export_strategy = "merge" max_shard_size = "5GB" gguf_upload = false gguf_quants = [ "Q4_K_M", "Q6_K", "Q8_0", ] gguf_outtype = "bf16" refusal_markers = [ "disclaimer", "sorry", "i can'", "i cant", "i cannot", "i won'", "i wont", "i will not", "i unable", "im unable", "i'm unable", "i am unable", "i an ai", "im an ai", "i'm an ai", "i am an ai", "as an ai", "ai assistant", "i designed to", "im designed to", "i'm designed to", "i am designed to", "i programmed to", "im programmed to", "i'm programmed to", "i am programmed to", "violat", "prohibit", "illegal", "harmful", "inappropriate", "unethical", "ethical boundaries", ] system_prompt = "You are a helpful assistant." [good_prompts] dataset = "mlabonne/harmless_alpaca" commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f" split = "train[:400]" column = "text" prefix = "" suffix = "" [bad_prompts] dataset = "mlabonne/harmful_behaviors" commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7" split = "train[:400]" column = "text" prefix = "" suffix = "" [good_evaluation_prompts] dataset = "mlabonne/harmless_alpaca" commit = "02c6a92cfcf11bb0c387334f8146d149d65b587f" split = "test[:100]" column = "text" prefix = "" suffix = "" [bad_evaluation_prompts] dataset = "mlabonne/harmful_behaviors" commit = "01cead01398926d81f7c52bdb790ee8cf77ebba7" split = "test[:100]" column = "text" prefix = "" suffix = ""