Feature Extraction
Transformers
audio
speech
sparse-autoencoder
sae
interpretability
mechanistic-interpretability
hubert
Instructions to use Egorgij21/Audio-SAE-HuBERT-large with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- Transformers
How to use Egorgij21/Audio-SAE-HuBERT-large with Transformers:
# Use a pipeline as a high-level helper from transformers import pipeline pipe = pipeline("feature-extraction", model="Egorgij21/Audio-SAE-HuBERT-large")# Load model directly from transformers import AutoModel model = AutoModel.from_pretrained("Egorgij21/Audio-SAE-HuBERT-large", dtype="auto") - Notebooks
- Google Colab
- Kaggle
Add files using upload-large-folder tool
Browse files- layer_1/ae.pt +3 -0
- layer_1/config.json +23 -0
- layer_10/ae.pt +3 -0
- layer_10/config.json +23 -0
- layer_11/ae.pt +3 -0
- layer_11/config.json +23 -0
- layer_12/ae.pt +3 -0
- layer_12/config.json +23 -0
- layer_13/ae.pt +3 -0
- layer_13/config.json +23 -0
- layer_14/ae.pt +3 -0
- layer_14/config.json +23 -0
- layer_15/ae.pt +3 -0
- layer_15/config.json +23 -0
- layer_16/ae.pt +3 -0
- layer_16/config.json +23 -0
- layer_17/ae.pt +3 -0
- layer_17/config.json +23 -0
- layer_18/ae.pt +3 -0
- layer_18/config.json +23 -0
- layer_19/ae.pt +3 -0
- layer_19/config.json +23 -0
- layer_2/ae.pt +3 -0
- layer_2/config.json +23 -0
- layer_20/ae.pt +3 -0
- layer_20/config.json +23 -0
- layer_21/ae.pt +3 -0
- layer_21/config.json +23 -0
- layer_22/ae.pt +3 -0
- layer_22/config.json +23 -0
- layer_23/ae.pt +3 -0
- layer_23/config.json +23 -0
- layer_24/ae.pt +3 -0
- layer_24/config.json +23 -0
- layer_3/ae.pt +3 -0
- layer_3/config.json +23 -0
- layer_4/ae.pt +3 -0
- layer_4/config.json +23 -0
- layer_5/ae.pt +3 -0
- layer_5/config.json +23 -0
- layer_6/ae.pt +3 -0
- layer_6/config.json +23 -0
- layer_7/ae.pt +3 -0
- layer_7/config.json +23 -0
- layer_8/ae.pt +3 -0
- layer_8/config.json +23 -0
- layer_9/ae.pt +3 -0
- layer_9/config.json +23 -0
layer_1/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c059c25228ce8381b51db64d1763f7c5777d0ea72f3ab8d0defe6139ace82679
|
| 3 |
+
size 201443287
|
layer_1/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 0,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_10/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:7e9f4415540d486b2029e92f2e5ad133be0be7abfd6d6df869d5899d05d8f8c9
|
| 3 |
+
size 201443287
|
layer_10/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 9,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_11/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1e62fc79486518da3fe5e99883fb5ef2a0b7f8fa97e38ea1d8196acb1a9ed934
|
| 3 |
+
size 201443287
|
layer_11/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 10,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_12/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e9abfb2edb248d8dca1b7361f3d703ab3d9c90c2e8ad613d6fdbebf1d6fb4959
|
| 3 |
+
size 201443287
|
layer_12/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 11,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_13/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e92d5b98735b28cddfc329dcdfd252b8dec4ba38d769a40b307acd9df0c54805
|
| 3 |
+
size 201443287
|
layer_13/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 12,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_14/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c09c6384b10c110949bae360708382a5669b6501da9db2fa2155b07671c2e9fc
|
| 3 |
+
size 201443287
|
layer_14/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 13,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_15/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1f1a1e583c7788722d098146018548ff40e34ab025b235bac078e9e6ba1558a8
|
| 3 |
+
size 201443287
|
layer_15/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 14,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_16/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6ce82076a9b74f239a460708d96812ae7667ef265d77425f0b2f2780d2c092e9
|
| 3 |
+
size 201443287
|
layer_16/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 15,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_17/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:60991d7431945c2f1183bf581b8ba2dcd93313db39e356529567bab5828229ba
|
| 3 |
+
size 201443287
|
layer_17/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 16,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_18/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87305504268ceb2a164c022855db09340f79ef99dd8d286247836145f744c7fe
|
| 3 |
+
size 201443287
|
layer_18/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 17,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_19/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6a21abb1a223139e920984a9eeff31bb5787d39564cfa9c45dcf83dc93903710
|
| 3 |
+
size 201443287
|
layer_19/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 18,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_2/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:b8307d7e7c472b03e4ab3974778f93c8224c94027063274170898f5b584160eb
|
| 3 |
+
size 201443287
|
layer_2/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 1,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_20/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:639b314097536aeee49f5ac5e0a79a5e7225b3820ea1ba03bf623fad439266af
|
| 3 |
+
size 201443287
|
layer_20/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 19,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_21/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:8099f1fae767787d7a1b559eee78595398750cadd7f8637f50eef32e7a196ed9
|
| 3 |
+
size 201443287
|
layer_21/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 20,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_22/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fa202aa187b83bf43db2eaa6187fe8244e7b1fa4c45071d591fdf672f2663e8
|
| 3 |
+
size 201443287
|
layer_22/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 21,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_23/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d50d5fd3c1cf4e4255cb3a5219edc208ec0877026bd3912a5b17ae00ec101b2c
|
| 3 |
+
size 201443287
|
layer_23/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 22,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_24/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:96f6a5ffeac5e1675ddaa02b0d3703da81643de3e41938ca816f510e2092ecab
|
| 3 |
+
size 201443287
|
layer_24/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 23,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_3/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:24c7e6a655460398deb7eb08baf839c1622791827628970d8453e38e8479f987
|
| 3 |
+
size 201443287
|
layer_3/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 2,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_4/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:bb5fac60448d1fc5e4fd95327e14c9712ccd36b58b3b2db2d1a0cbf5b15bee34
|
| 3 |
+
size 201443287
|
layer_4/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 3,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_5/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:486406910d024c6ccd52b551f1cd3ecd5b108068e579ac11f795a3882a5ab157
|
| 3 |
+
size 201443287
|
layer_5/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 4,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_6/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:185c5a6ab133883d7284b9bec3d38ee258525cb565c857e7e285ca018bb98aa9
|
| 3 |
+
size 201443287
|
layer_6/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 5,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_7/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:99c7cf4b3fda6e38f85553b1ecd8756f199ac86bf04e5debb65b71b702ae2f2f
|
| 3 |
+
size 201443287
|
layer_7/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 6,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_8/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d592d33fe94b23a49e8732c6fcc00a7cc5d224580e3ba57016b2a56bde564d18
|
| 3 |
+
size 201443287
|
layer_8/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 7,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|
layer_9/ae.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a5cfbb42e12ef2bdcda0a263e0004af4ee662f383d77da4530bc9f78942777dd
|
| 3 |
+
size 201443287
|
layer_9/config.json
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"trainer": {
|
| 3 |
+
"trainer_class": "BatchTopKTrainer",
|
| 4 |
+
"dict_class": "BatchTopKSAE",
|
| 5 |
+
"lr": 0.0002,
|
| 6 |
+
"steps": 200001,
|
| 7 |
+
"auxk_alpha": 0.0,
|
| 8 |
+
"warmup_steps": 10000,
|
| 9 |
+
"decay_start": 160000,
|
| 10 |
+
"threshold_beta": 0.999,
|
| 11 |
+
"threshold_start_step": 1000,
|
| 12 |
+
"top_k_aux": 512,
|
| 13 |
+
"seed": 21,
|
| 14 |
+
"activation_dim": 1024,
|
| 15 |
+
"dict_size": 8192,
|
| 16 |
+
"k": 50,
|
| 17 |
+
"device": "cuda:6",
|
| 18 |
+
"layer": 8,
|
| 19 |
+
"lm_name": "hubert",
|
| 20 |
+
"wandb_name": "BatchTopKSAE",
|
| 21 |
+
"submodule_name": null
|
| 22 |
+
}
|
| 23 |
+
}
|