{
  "source_model": "outputs/qwen3.6-27b-golden-n3_reg025-merge-alpha080",
  "technique": "refusal_direction_ablation",
  "method": "advanced",
  "method_config": {
    "n_directions": 2,
    "direction_method": "diff_means",
    "norm_preserve": true,
    "regularization": 0.5,
    "refinement_passes": 1,
    "project_biases": true,
    "use_chat_template": true,
    "use_whitened_svd": false,
    "true_iterative_refinement": false,
    "winsorize_activations": false,
    "float_layer_interpolation": false,
    "cot_aware": false,
    "use_kl_optimization": false,
    "use_lora_ablation": false,
    "som_iterations": null,
    "som_learning_rate": null,
    "som_sigma": null,
    "som_candidate_count": null,
    "som_harmless_pc_count": null,
    "som_distortion_aware": null,
    "som_diversity_penalty": null,
    "som_min_signal_to_noise": null,
    "layer_selection": "knee_cosmic",
    "min_layer_fraction": 0.75,
    "max_layer_fraction": 0.25,
    "harmless_pc_count": 0,
    "shield_concept_count": 0,
    "shield_ridge": 0.05,
    "shield_residualize": false,
    "shield_layer_penalty": 0.0,
    "projection_target": "all",
    "projection_row_fraction": 1.0,
    "som_contiguous_layer_budget": null,
    "spectral_cascade": false,
    "spectral_bands": 3,
    "spectral_threshold": 0.05
  },
  "references": [
    "Arditi et al., Refusal in Language Models Is Mediated by a Single Direction (NeurIPS 2024)",
    "Gabliteration: SVD-based multi-direction extraction (arXiv:2512.18901)",
    "Norm-Preserving Biprojected Abliteration (grimjim, 2025)",
    "Young, Comparative Analysis of LLM Abliteration Methods (arXiv:2512.13655)",
    "Joad et al., More to Refusal than a Single Direction (2026)",
    "Piras et al., SOM Directions Are Better than One (AAAI 2026)",
    "Heretic (p-e-w, 2025): Bayesian optimization, LoRA-mediated ablation, winsorization",
    "OBLITERATUS: Whitened SVD, EGA, CoT-aware, KL co-optimization, float interpolation (novel)"
  ],
  "strong_layers": [
    63,
    62,
    61,
    60,
    59,
    55,
    54,
    58,
    57,
    56,
    53,
    52,
    48,
    50,
    49
  ],
  "n_harmful_prompts": 842,
  "n_harmless_prompts": 842,
  "quality_metrics": {
    "perplexity": 3.8536766982114554,
    "coherence": 1.0,
    "refusal_rate": 0.0,
    "degenerate_count": 4,
    "kl_divergence": 0.10729097574949265,
    "spectral_certification": "RED"
  },
  "kl_contributions": {},
  "cot_preserved_layers": [],
  "float_layer_weights": {},
  "lora_adapters_saved": false
}