Skull18500 commited on
Commit
3b4e90e
·
verified ·
1 Parent(s): 92b7e1a

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +65 -65
app.py CHANGED
@@ -1,66 +1,66 @@
1
- import tiktoken
2
- import torch
3
- import biggerbrain as biggerbrain
4
- import ai_extras as A_E
5
- from safetensors.torch import save_file, load_file
6
-
7
- model = biggerbrain.initmodel("cpu")
8
- model = model._orig_mod if hasattr(model, '_orig_mod') else model
9
-
10
- while True:
11
- user_input = input("You: ")
12
- user_input = user_input.lower()
13
- if user_input.lower() in {"exit", "quit"}:
14
- print("Exiting the app.")
15
- break
16
-
17
- elif user_input == "print model":
18
- model.print_parameter_breakdown(model)
19
- elif user_input == "cpu":
20
- model.to("cpu")
21
- print("Model moved to CPU.")
22
- elif user_input == "train":
23
- if 'pretrain_ds' not in locals():
24
- pretrain_ds = A_E.StreamDataset(bin_file="C:\\AIs\\biggerbrain2_135m\\total_dataset.bin", seq_len=model.sequencelength)
25
- print(f"Dataset loaded: {len(pretrain_ds)} samples")
26
- model.trainingloop(data=pretrain_ds, epochs=10, lr=3e-4, batchsize=4, accumulation_steps=32, warmup_steps=5000)#train
27
- elif user_input.lower() == "load":
28
- weights = load_file("best_model.safetensors")
29
- model.load_state_dict(weights, strict=False)
30
- print("Weights loaded!")
31
- elif user_input.lower() == "check1":
32
- print("alpha pre:")
33
- print(model.get_parameter("alpha_pre").item())
34
- print("alpha loop:")
35
- print(model.get_parameter("alpha_loop").item())
36
- print("alpha post:")
37
- print(model.get_parameter("alpha_post").item())
38
- print("alpha mem:")
39
- print(model.get_parameter("alpha_mem").item())
40
- model.debugprints = True
41
- model.forward_training(1)
42
- elif user_input.lower() == "check2":
43
- print("\n--- Model Internal Stats ---")
44
- # Print all alpha parameters dynamically
45
- for name, param in model.named_parameters():
46
- if 'alpha' in name:
47
- # Using .item() to get the actual number instead of the tensor object
48
- print(f"{name}: {param.item():.6f}")
49
-
50
- # Check the Engram Gate (mem_gate)
51
- if hasattr(model, 'mem_gate'):
52
- # We look at the bias because that's what controls the initial "openness"
53
- gate_bias = model.mem_gate.bias.item()
54
- # Calculate the actual sigmoid value to see the % it's open
55
- gate_open_pct = torch.sigmoid(torch.tensor(gate_bias)).item() * 100
56
- print(f"mem_gate bias: {gate_bias:.6f} ({gate_open_pct:.2f}% open)")
57
-
58
- print("----------------------------\n")
59
- elif user_input.lower() == "check3":
60
- for name, param in model.named_parameters():
61
- if 'alpha' in name:
62
- print(f"{name} | Requires Grad: {param.requires_grad} | Device: {param.device}")
63
- elif user_input == "debug":
64
- model.debugprints = True
65
- else:
66
  biggerbrain.think(prompt=user_input, model=model, max_length=10, iter=3, top_k=10, temperature=1.0)
 
1
+ import tiktoken
2
+ import torch
3
+ import biggerbrain as biggerbrain
4
+ import ai_extras as A_E
5
+ from safetensors.torch import save_file, load_file
6
+
7
+ model = biggerbrain.initmodel("cpu")
8
+ model = model._orig_mod if hasattr(model, '_orig_mod') else model
9
+
10
+ while True:
11
+ user_input = input("You: ")
12
+ user_input = user_input.lower()
13
+ if user_input.lower() in {"exit", "quit"}:
14
+ print("Exiting the app.")
15
+ break
16
+
17
+ elif user_input == "print model":
18
+ model.print_parameter_breakdown(model)
19
+ elif user_input == "cpu":
20
+ model.to("cpu")
21
+ print("Model moved to CPU.")
22
+ elif user_input == "train":
23
+ if 'pretrain_ds' not in locals():
24
+ pretrain_ds = A_E.StreamDataset(bin_file="C:\\AIs\\biggerbrain2_135m\\total_dataset.bin", seq_len=model.sequencelength)
25
+ print(f"Dataset loaded: {len(pretrain_ds)} samples")
26
+ model.trainingloop(data=pretrain_ds, epochs=10, lr=3e-4, batchsize=4, accumulation_steps=32, warmup_steps=5000)#train
27
+ elif user_input.lower() == "load":
28
+ weights = load_file("model.safetensors")
29
+ model.load_state_dict(weights, strict=False)
30
+ print("Weights loaded!")
31
+ elif user_input.lower() == "check1":
32
+ print("alpha pre:")
33
+ print(model.get_parameter("alpha_pre").item())
34
+ print("alpha loop:")
35
+ print(model.get_parameter("alpha_loop").item())
36
+ print("alpha post:")
37
+ print(model.get_parameter("alpha_post").item())
38
+ print("alpha mem:")
39
+ print(model.get_parameter("alpha_mem").item())
40
+ model.debugprints = True
41
+ model.forward_training(1)
42
+ elif user_input.lower() == "check2":
43
+ print("\n--- Model Internal Stats ---")
44
+ # Print all alpha parameters dynamically
45
+ for name, param in model.named_parameters():
46
+ if 'alpha' in name:
47
+ # Using .item() to get the actual number instead of the tensor object
48
+ print(f"{name}: {param.item():.6f}")
49
+
50
+ # Check the Engram Gate (mem_gate)
51
+ if hasattr(model, 'mem_gate'):
52
+ # We look at the bias because that's what controls the initial "openness"
53
+ gate_bias = model.mem_gate.bias.item()
54
+ # Calculate the actual sigmoid value to see the % it's open
55
+ gate_open_pct = torch.sigmoid(torch.tensor(gate_bias)).item() * 100
56
+ print(f"mem_gate bias: {gate_bias:.6f} ({gate_open_pct:.2f}% open)")
57
+
58
+ print("----------------------------\n")
59
+ elif user_input.lower() == "check3":
60
+ for name, param in model.named_parameters():
61
+ if 'alpha' in name:
62
+ print(f"{name} | Requires Grad: {param.requires_grad} | Device: {param.device}")
63
+ elif user_input == "debug":
64
+ model.debugprints = True
65
+ else:
66
  biggerbrain.think(prompt=user_input, model=model, max_length=10, iter=3, top_k=10, temperature=1.0)