Egorgij21 commited on
Commit
eb37d01
·
verified ·
1 Parent(s): 1cdab6f

Add files using upload-large-folder tool

Browse files
layer_1/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c059c25228ce8381b51db64d1763f7c5777d0ea72f3ab8d0defe6139ace82679
3
+ size 201443287
layer_1/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 0,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_10/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e9f4415540d486b2029e92f2e5ad133be0be7abfd6d6df869d5899d05d8f8c9
3
+ size 201443287
layer_10/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 9,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_11/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e62fc79486518da3fe5e99883fb5ef2a0b7f8fa97e38ea1d8196acb1a9ed934
3
+ size 201443287
layer_11/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 10,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_12/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9abfb2edb248d8dca1b7361f3d703ab3d9c90c2e8ad613d6fdbebf1d6fb4959
3
+ size 201443287
layer_12/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 11,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_13/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e92d5b98735b28cddfc329dcdfd252b8dec4ba38d769a40b307acd9df0c54805
3
+ size 201443287
layer_13/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 12,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_14/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c09c6384b10c110949bae360708382a5669b6501da9db2fa2155b07671c2e9fc
3
+ size 201443287
layer_14/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 13,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_15/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1a1e583c7788722d098146018548ff40e34ab025b235bac078e9e6ba1558a8
3
+ size 201443287
layer_15/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 14,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_16/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ce82076a9b74f239a460708d96812ae7667ef265d77425f0b2f2780d2c092e9
3
+ size 201443287
layer_16/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 15,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_17/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60991d7431945c2f1183bf581b8ba2dcd93313db39e356529567bab5828229ba
3
+ size 201443287
layer_17/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 16,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_18/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87305504268ceb2a164c022855db09340f79ef99dd8d286247836145f744c7fe
3
+ size 201443287
layer_18/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 17,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_19/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a21abb1a223139e920984a9eeff31bb5787d39564cfa9c45dcf83dc93903710
3
+ size 201443287
layer_19/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 18,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_2/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8307d7e7c472b03e4ab3974778f93c8224c94027063274170898f5b584160eb
3
+ size 201443287
layer_2/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 1,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_20/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:639b314097536aeee49f5ac5e0a79a5e7225b3820ea1ba03bf623fad439266af
3
+ size 201443287
layer_20/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 19,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_21/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8099f1fae767787d7a1b559eee78595398750cadd7f8637f50eef32e7a196ed9
3
+ size 201443287
layer_21/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 20,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_22/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa202aa187b83bf43db2eaa6187fe8244e7b1fa4c45071d591fdf672f2663e8
3
+ size 201443287
layer_22/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 21,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_23/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d50d5fd3c1cf4e4255cb3a5219edc208ec0877026bd3912a5b17ae00ec101b2c
3
+ size 201443287
layer_23/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 22,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_24/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96f6a5ffeac5e1675ddaa02b0d3703da81643de3e41938ca816f510e2092ecab
3
+ size 201443287
layer_24/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 23,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_3/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24c7e6a655460398deb7eb08baf839c1622791827628970d8453e38e8479f987
3
+ size 201443287
layer_3/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 2,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_4/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb5fac60448d1fc5e4fd95327e14c9712ccd36b58b3b2db2d1a0cbf5b15bee34
3
+ size 201443287
layer_4/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 3,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_5/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:486406910d024c6ccd52b551f1cd3ecd5b108068e579ac11f795a3882a5ab157
3
+ size 201443287
layer_5/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 4,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_6/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:185c5a6ab133883d7284b9bec3d38ee258525cb565c857e7e285ca018bb98aa9
3
+ size 201443287
layer_6/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 5,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_7/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99c7cf4b3fda6e38f85553b1ecd8756f199ac86bf04e5debb65b71b702ae2f2f
3
+ size 201443287
layer_7/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 6,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_8/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d592d33fe94b23a49e8732c6fcc00a7cc5d224580e3ba57016b2a56bde564d18
3
+ size 201443287
layer_8/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 7,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }
layer_9/ae.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a5cfbb42e12ef2bdcda0a263e0004af4ee662f383d77da4530bc9f78942777dd
3
+ size 201443287
layer_9/config.json ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "trainer": {
3
+ "trainer_class": "BatchTopKTrainer",
4
+ "dict_class": "BatchTopKSAE",
5
+ "lr": 0.0002,
6
+ "steps": 200001,
7
+ "auxk_alpha": 0.0,
8
+ "warmup_steps": 10000,
9
+ "decay_start": 160000,
10
+ "threshold_beta": 0.999,
11
+ "threshold_start_step": 1000,
12
+ "top_k_aux": 512,
13
+ "seed": 21,
14
+ "activation_dim": 1024,
15
+ "dict_size": 8192,
16
+ "k": 50,
17
+ "device": "cuda:6",
18
+ "layer": 8,
19
+ "lm_name": "hubert",
20
+ "wandb_name": "BatchTopKSAE",
21
+ "submodule_name": null
22
+ }
23
+ }