Compare commits

..

No commits in common. "58d4736f6d2a5c796f9ad33db3040118a6e593a4" and "9ab043dc7837ec94b2771022575c1e0aaabad6d0" have entirely different histories.

12 changed files with 126 additions and 143 deletions

1
.gitignore vendored
View File

@ -175,4 +175,3 @@ cython_debug/
data/memory/brainmap.json data/memory/brainmap.json
/data/memory/vocab.json /data/memory/vocab.json
data/memory/book_progress.json data/memory/book_progress.json
/data/memory/journal.json

View File

@ -6,10 +6,10 @@ import datetime
import logging import logging
from model.brainmap import get_brainmap from model.brainmap import get_brainmap
from model.journal import read_journal_entries from model.journal import read_journal_entries
from model.dreams import load_dreams from model.memory import load_dreams
from model.tokenizer import Tokenizer from model.tokenizer import Tokenizer
from model.abstraction import cluster_vocab from model.abstraction import cluster_vocab
from model.dreams import load_dreams from model.memory import load_dreams
from model.scheduler import get_time_until_next_action, get_next_action_label from model.scheduler import get_time_until_next_action, get_next_action_label
from context.context import load_context from context.context import load_context
from reader.reader import get_books, load_progress from reader.reader import get_books, load_progress

View File

@ -4,9 +4,9 @@ import threading
from dotenv import load_dotenv from dotenv import load_dotenv
import os import os
from model.trainer import train_on_message from model.trainer import train_on_message
from model.brain import generate_response from model.brain import generate_response, daydream
from model.cleanup import full_cleanup from model.cleanup import full_cleanup
from model.dreamer import replay_dreams, daydream from model.dream_replay import replay_dreams
from model.rehearsal import simulate_conversation from model.rehearsal import simulate_conversation
from model.scheduler import set_next_action from model.scheduler import set_next_action
from model.reweaver import memory_reweaver_loop from model.reweaver import memory_reweaver_loop

View File

@ -1,5 +1,14 @@
import random
import re
import torch import torch
import torch.nn.functional as F
from model.memory import save_dream
from model.brain_state import model, tokenizer, DEVICE from model.brain_state import model, tokenizer, DEVICE
from model.journal import record_to_journal
from model.trainer import train_on_message
from context.context import get_recent_context
recent_dreams = []
@torch.inference_mode() @torch.inference_mode()
@ -99,3 +108,28 @@ def score_sentence(sentence: str) -> float:
score += 0.1 # Bonus if there's an action! score += 0.1 # Bonus if there's an action!
return min(score, 1.0) return min(score, 1.0)
def daydream():
model.eval()
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0)
dream = []
for _ in range(12):
out = model(seed)
logits = out[:, -1, :]
probs = F.softmax(logits, dim=-1)
token = torch.multinomial(probs, num_samples=1)
dream.append(token.item())
seed = torch.cat([seed, token], dim=1)
sentence = tokenizer.detokenize(dream)
score = score_sentence(sentence)
if score > 0.5:
save_dream(sentence, score)
record_to_journal(sentence)
train_on_message(sentence)
if len(recent_dreams) > 10:
recent_dreams.pop(0)

View File

@ -1,20 +1,12 @@
import torch import torch
import torch.nn as nn import torch.nn as nn
import os
from model.brain_architecture import TinyTransformer from model.brain_architecture import TinyTransformer
from model.tokenizer import Tokenizer from model.tokenizer import Tokenizer
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MODEL_SAVE_PATH = "data/memory/model.pt"
tokenizer = Tokenizer() tokenizer = Tokenizer()
VOCAB_SIZE = len(tokenizer.vocab) + 10 # with a small buffer VOCAB_SIZE = len(tokenizer.vocab) + 10 # with a small buffer
model = TinyTransformer(vocab_size=VOCAB_SIZE).to(DEVICE) model = TinyTransformer(vocab_size=VOCAB_SIZE).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.95)
loss_fn = nn.CrossEntropyLoss() loss_fn = nn.CrossEntropyLoss()
def save_model():
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
torch.save(model.state_dict(), MODEL_SAVE_PATH)

View File

@ -3,7 +3,7 @@ import json
import os import os
import time import time
from model.tokenizer import VOCAB_PATH from model.tokenizer import VOCAB_PATH
from model.dreams import DREAM_LOG_PATH from model.memory import DREAM_LOG_PATH
from context.context import CONTEXT_FILE from context.context import CONTEXT_FILE
from model.brainmap import load_brainmap, save_brainmap from model.brainmap import load_brainmap, save_brainmap

27
model/dream_replay.py Normal file
View File

@ -0,0 +1,27 @@
import random
from model.memory import load_dreams
from model.trainer import train_on_message
from model.dynamic_expand import expand_model_if_needed
from context.context import load_context
def replay_dreams():
expand_model_if_needed()
dreams = load_dreams()
context = load_context()
if not dreams or not context:
return
selected_dreams = random.sample(dreams, min(len(dreams), 5))
selected_contexts = random.sample(context, min(len(context), 5))
# Mix dreams and past contexts into a chaotic dream
all_sources = [d["sentence"] for d in selected_dreams] + [c["text"] for c in selected_contexts]
random.shuffle(all_sources)
mixed_sentence = " ".join(random.sample(all_sources, min(len(all_sources), 3)))
if mixed_sentence:
train_on_message(mixed_sentence, source="dream")

View File

@ -1,58 +0,0 @@
import random
import torch
import torch.nn.functional as F
from model.brain import model, tokenizer, DEVICE, score_sentence
from model.trainer import train_on_message
from model.dreams import save_dream, load_dreams
from model.journal import record_to_journal
from model.dynamic_expand import expand_model_if_needed
from context.context import load_context
recent_dreams = []
def daydream():
model.eval()
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0)
dream = []
for _ in range(12):
out = model(seed)
logits = out[:, -1, :]
probs = F.softmax(logits, dim=-1)
token = torch.multinomial(probs, num_samples=1)
dream.append(token.item())
seed = torch.cat([seed, token], dim=1)
sentence = tokenizer.detokenize(dream)
score = score_sentence(sentence)
if score > 0.5:
save_dream(sentence, score)
record_to_journal(sentence)
train_on_message(sentence)
if len(recent_dreams) > 10:
recent_dreams.pop(0)
def replay_dreams():
expand_model_if_needed()
dreams = load_dreams()
context = load_context()
if not dreams or not context:
return
selected_dreams = random.sample(dreams, min(len(dreams), 5))
selected_contexts = random.sample(context, min(len(context), 5))
# Mix dreams and past contexts into a chaotic dream
all_sources = [d["sentence"] for d in selected_dreams] + [c["text"] for c in selected_contexts]
random.shuffle(all_sources)
mixed_sentence = " ".join(random.sample(all_sources, min(len(all_sources), 3)))
if mixed_sentence:
train_on_message(mixed_sentence, source="dream")

View File

@ -1,43 +1,49 @@
import torch import torch
import threading import threading
import time import time
from model.tokenizer import Tokenizer from model.brain_architecture import TinyTransformer
from model.brain_state import save_model, DEVICE, model, optimizer from model.brain_state import model, tokenizer, DEVICE
tokenizer = Tokenizer() optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
_last_vocab_size = 0
expand_lock = threading.Lock() expand_lock = threading.Lock()
_last_expansion_time = 0 _last_expansion_time = 0
def get_optimizer():
return optimizer
def expand_model_if_needed(): def expand_model_if_needed():
global _last_expansion_time global model, optimizer, _last_expansion_time
with expand_lock: with expand_lock:
# Check if expansion is actually needed current_vocab_size = len(tokenizer.vocab) + 10
needed_vocab_size = tokenizer.next_id old_vocab_size = model.head.out_features
current_vocab_size = model.head.out_features
if needed_vocab_size <= current_vocab_size: if current_vocab_size <= old_vocab_size:
return # No expansion needed return False # No expansion needed
# print(f"[Expand] Expanding vocabulary: {current_vocab_size} -> {needed_vocab_size}") # print(f"[Expand] Expanding model from {old_vocab_size} -> {current_vocab_size}")
# Expand the head layer safely without rebuilding everything old_state = model.state_dict()
old_head_weight = model.head.weight.data new_model = TinyTransformer(
old_out_features = old_head_weight.size(0) vocab_size=current_vocab_size,
in_features = model.head.in_features embed_dim=model.token_embed.embedding_dim,
depth=len(model.blocks),
heads=model.blocks[0].attn.heads
).to(DEVICE)
new_head = torch.nn.Linear(in_features, needed_vocab_size, bias=False)
new_head = new_head.to(DEVICE)
# Copy old weights into the new head
with torch.no_grad(): with torch.no_grad():
new_head.weight[:old_out_features] = old_head_weight for name, param in new_model.named_parameters():
if name in old_state and old_state[name].shape == param.shape:
param.copy_(old_state[name])
model.head = new_head model = new_model
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
# Rebuild optimizer and scheduler
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.95)
_last_expansion_time = time.time() _last_expansion_time = time.time()
save_model()
# print("[Expand] Expansion complete.")
return True # <<< tell trainer we expanded

View File

@ -1,25 +1,15 @@
import os import os
import json import time
from model.trainer import train_on_message
import random import random
JOURNAL_PATH = "data/memory/journal.json" JOURNAL_PATH = "data/memory/journal.txt"
def record_to_journal(entry: dict): def record_to_journal(thought: str):
if not os.path.exists(JOURNAL_PATH): os.makedirs(os.path.dirname(JOURNAL_PATH), exist_ok=True)
with open(JOURNAL_PATH, "w", encoding="utf-8") as f: with open(JOURNAL_PATH, "a", encoding="utf-8") as f:
json.dump([], f) f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} | {thought.strip()}\n")
with open(JOURNAL_PATH, "r", encoding="utf-8") as f:
try:
journal = json.load(f)
except json.JSONDecodeError:
journal = []
journal.append(entry)
with open(JOURNAL_PATH, "w", encoding="utf-8") as f:
json.dump(journal, f, indent=2)
def read_journal_entries(): def read_journal_entries():
@ -30,7 +20,7 @@ def read_journal_entries():
return [line.split("|", 1)[-1].strip() for line in lines if "|" in line] return [line.split("|", 1)[-1].strip() for line in lines if "|" in line]
def sample_journal_entries(n=5): def replay_journal():
"""Return up to `n` random entries from the journal."""
entries = read_journal_entries() entries = read_journal_entries()
return random.sample(entries, min(n, len(entries))) for entry in random.sample(entries, min(5, len(entries))):
train_on_message(entry)

View File

@ -1,13 +1,14 @@
import torch import torch
import time import time
from model.dynamic_expand import expand_model_if_needed, _last_expansion_time, expand_lock from model.dynamic_expand import expand_model_if_needed, _last_expansion_time, get_optimizer, expand_lock
from model.brain_state import model, tokenizer, DEVICE, loss_fn, optimizer, scheduler from model.brain_state import model, tokenizer, DEVICE, loss_fn
from model.brainmap import add_to_brainmap from model.brainmap import add_to_brainmap, save_brainmap
from model.journal import record_to_journal from model.tokenizer import save_vocab
from context.context import add_to_context, get_recent_context from context.context import add_to_context, get_recent_context
LOSS_FILE = "data/logs/loss.log" LOSS_FILE = "data/logs/loss.log"
VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log" VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
scheduler = torch.optim.lr_scheduler.StepLR(get_optimizer(), step_size=500, gamma=0.95)
def log_vocab_growth(): def log_vocab_growth():
@ -25,61 +26,53 @@ def train_on_message(text: str, source: str = "user"):
now = time.time() now = time.time()
if now - _last_expansion_time < 5: if now - _last_expansion_time < 5:
print("[Trainer] Skipping to stabilize after expansion.") print("[Train] Skipping to stabilize after expansion.")
return return
if not expand_lock.acquire(timeout=0.5): if not expand_lock.acquire(timeout=0.5):
print("[Trainer] Skipped training due to active expansion.") print("[Train] Skipped training due to active expansion.")
return return
try: try:
model.train() model.train()
context_texts = get_recent_context(10)
# Augment the input with recent context context_texts = get_recent_context(30)
augmented_text = "<start> " + " ".join(context_texts + [text]) + " <end>" augmented_text = "<start> " + " ".join(context_texts + [text]) + " <end>"
tokens = tokenizer.tokenize(augmented_text) tokens = tokenizer.tokenize(augmented_text)
if len(tokens) < 2: if len(tokens) < 2:
print("[Trainer] Message too short after cleaning.")
return return
# Clamp any token IDs beyond the model's output size
max_token_id = model.head.out_features - 1 max_token_id = model.head.out_features - 1
tokens = [min(t, max_token_id) for t in tokens] tokens = [t if t <= max_token_id else max_token_id for t in tokens]
tokens = tokens[:128] # Hard clamp input length tokens = tokens[:128]
if len(tokens) < 2: if len(tokens) < 2:
print("[Trainer] Message too short after clamping.")
return return
input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0) input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0)
target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0) target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0)
opt = get_optimizer()
output = model(input_tensor) output = model(input_tensor)
loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1)) loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
if torch.isnan(loss): if torch.isnan(loss):
print("[Trainer] Detected NaN loss, skipping update.") print("[Trainer] Detected NaN loss, skipping update.")
return return
optimizer.zero_grad() opt.zero_grad()
loss.backward() loss.backward()
optimizer.step() opt.step()
scheduler.step() scheduler.step()
# Update brainmap and context log_loss(loss.item())
add_to_brainmap(augmented_text.split()) log_vocab_growth()
add_to_context(text, source=source) add_to_context(text, source=source)
add_to_brainmap(augmented_text.split())
# Log training success to journal save_brainmap()
record_to_journal({ save_vocab(tokenizer.vocab)
"timestamp": time.time(),
"source": source,
"text": text,
"loss": round(loss.item(), 4),
"vocab_size": len(tokenizer.vocab)
})
finally: finally:
expand_lock.release() expand_lock.release()