Fixed it so Ruby actually dreams/Journals now
This commit is contained in:
parent
9ab043dc78
commit
141931c2a4
@ -6,10 +6,10 @@ import datetime
|
|||||||
import logging
|
import logging
|
||||||
from model.brainmap import get_brainmap
|
from model.brainmap import get_brainmap
|
||||||
from model.journal import read_journal_entries
|
from model.journal import read_journal_entries
|
||||||
from model.memory import load_dreams
|
from model.dreams import load_dreams
|
||||||
from model.tokenizer import Tokenizer
|
from model.tokenizer import Tokenizer
|
||||||
from model.abstraction import cluster_vocab
|
from model.abstraction import cluster_vocab
|
||||||
from model.memory import load_dreams
|
from model.dreams import load_dreams
|
||||||
from model.scheduler import get_time_until_next_action, get_next_action_label
|
from model.scheduler import get_time_until_next_action, get_next_action_label
|
||||||
from context.context import load_context
|
from context.context import load_context
|
||||||
from reader.reader import get_books, load_progress
|
from reader.reader import get_books, load_progress
|
||||||
|
4
main.py
4
main.py
@ -4,9 +4,9 @@ import threading
|
|||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
import os
|
import os
|
||||||
from model.trainer import train_on_message
|
from model.trainer import train_on_message
|
||||||
from model.brain import generate_response, daydream
|
from model.brain import generate_response
|
||||||
from model.cleanup import full_cleanup
|
from model.cleanup import full_cleanup
|
||||||
from model.dream_replay import replay_dreams
|
from model.dreamer import replay_dreams, daydream
|
||||||
from model.rehearsal import simulate_conversation
|
from model.rehearsal import simulate_conversation
|
||||||
from model.scheduler import set_next_action
|
from model.scheduler import set_next_action
|
||||||
from model.reweaver import memory_reweaver_loop
|
from model.reweaver import memory_reweaver_loop
|
||||||
|
@ -1,14 +1,5 @@
|
|||||||
import random
|
|
||||||
import re
|
|
||||||
import torch
|
import torch
|
||||||
import torch.nn.functional as F
|
|
||||||
from model.memory import save_dream
|
|
||||||
from model.brain_state import model, tokenizer, DEVICE
|
from model.brain_state import model, tokenizer, DEVICE
|
||||||
from model.journal import record_to_journal
|
|
||||||
from model.trainer import train_on_message
|
|
||||||
from context.context import get_recent_context
|
|
||||||
|
|
||||||
recent_dreams = []
|
|
||||||
|
|
||||||
|
|
||||||
@torch.inference_mode()
|
@torch.inference_mode()
|
||||||
@ -108,28 +99,3 @@ def score_sentence(sentence: str) -> float:
|
|||||||
score += 0.1 # Bonus if there's an action!
|
score += 0.1 # Bonus if there's an action!
|
||||||
|
|
||||||
return min(score, 1.0)
|
return min(score, 1.0)
|
||||||
|
|
||||||
|
|
||||||
def daydream():
|
|
||||||
model.eval()
|
|
||||||
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0)
|
|
||||||
dream = []
|
|
||||||
|
|
||||||
for _ in range(12):
|
|
||||||
out = model(seed)
|
|
||||||
logits = out[:, -1, :]
|
|
||||||
probs = F.softmax(logits, dim=-1)
|
|
||||||
token = torch.multinomial(probs, num_samples=1)
|
|
||||||
dream.append(token.item())
|
|
||||||
seed = torch.cat([seed, token], dim=1)
|
|
||||||
|
|
||||||
sentence = tokenizer.detokenize(dream)
|
|
||||||
score = score_sentence(sentence)
|
|
||||||
|
|
||||||
if score > 0.5:
|
|
||||||
save_dream(sentence, score)
|
|
||||||
record_to_journal(sentence)
|
|
||||||
train_on_message(sentence)
|
|
||||||
|
|
||||||
if len(recent_dreams) > 10:
|
|
||||||
recent_dreams.pop(0)
|
|
||||||
|
@ -1,12 +1,20 @@
|
|||||||
import torch
|
import torch
|
||||||
import torch.nn as nn
|
import torch.nn as nn
|
||||||
|
import os
|
||||||
from model.brain_architecture import TinyTransformer
|
from model.brain_architecture import TinyTransformer
|
||||||
from model.tokenizer import Tokenizer
|
from model.tokenizer import Tokenizer
|
||||||
|
|
||||||
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||||
|
MODEL_SAVE_PATH = "data/memory/model.pt"
|
||||||
|
|
||||||
tokenizer = Tokenizer()
|
tokenizer = Tokenizer()
|
||||||
VOCAB_SIZE = len(tokenizer.vocab) + 10 # with a small buffer
|
VOCAB_SIZE = len(tokenizer.vocab) + 10 # with a small buffer
|
||||||
|
|
||||||
model = TinyTransformer(vocab_size=VOCAB_SIZE).to(DEVICE)
|
model = TinyTransformer(vocab_size=VOCAB_SIZE).to(DEVICE)
|
||||||
|
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
|
||||||
|
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=500, gamma=0.95)
|
||||||
loss_fn = nn.CrossEntropyLoss()
|
loss_fn = nn.CrossEntropyLoss()
|
||||||
|
|
||||||
|
|
||||||
|
def save_model():
|
||||||
|
os.makedirs(os.path.dirname(MODEL_SAVE_PATH), exist_ok=True)
|
||||||
|
torch.save(model.state_dict(), MODEL_SAVE_PATH)
|
||||||
|
@ -3,7 +3,7 @@ import json
|
|||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from model.tokenizer import VOCAB_PATH
|
from model.tokenizer import VOCAB_PATH
|
||||||
from model.memory import DREAM_LOG_PATH
|
from model.dreams import DREAM_LOG_PATH
|
||||||
from context.context import CONTEXT_FILE
|
from context.context import CONTEXT_FILE
|
||||||
from model.brainmap import load_brainmap, save_brainmap
|
from model.brainmap import load_brainmap, save_brainmap
|
||||||
|
|
||||||
|
@ -1,27 +0,0 @@
|
|||||||
import random
|
|
||||||
from model.memory import load_dreams
|
|
||||||
from model.trainer import train_on_message
|
|
||||||
from model.dynamic_expand import expand_model_if_needed
|
|
||||||
from context.context import load_context
|
|
||||||
|
|
||||||
|
|
||||||
def replay_dreams():
|
|
||||||
expand_model_if_needed()
|
|
||||||
|
|
||||||
dreams = load_dreams()
|
|
||||||
context = load_context()
|
|
||||||
|
|
||||||
if not dreams or not context:
|
|
||||||
return
|
|
||||||
|
|
||||||
selected_dreams = random.sample(dreams, min(len(dreams), 5))
|
|
||||||
selected_contexts = random.sample(context, min(len(context), 5))
|
|
||||||
|
|
||||||
# Mix dreams and past contexts into a chaotic dream
|
|
||||||
all_sources = [d["sentence"] for d in selected_dreams] + [c["text"] for c in selected_contexts]
|
|
||||||
random.shuffle(all_sources)
|
|
||||||
|
|
||||||
mixed_sentence = " ".join(random.sample(all_sources, min(len(all_sources), 3)))
|
|
||||||
|
|
||||||
if mixed_sentence:
|
|
||||||
train_on_message(mixed_sentence, source="dream")
|
|
58
model/dreamer.py
Normal file
58
model/dreamer.py
Normal file
@ -0,0 +1,58 @@
|
|||||||
|
import random
|
||||||
|
import torch
|
||||||
|
import torch.nn.functional as F
|
||||||
|
from model.brain import model, tokenizer, DEVICE, score_sentence
|
||||||
|
from model.trainer import train_on_message
|
||||||
|
from model.dreams import save_dream, load_dreams
|
||||||
|
from model.journal import record_to_journal
|
||||||
|
from model.dynamic_expand import expand_model_if_needed
|
||||||
|
from context.context import load_context
|
||||||
|
|
||||||
|
recent_dreams = []
|
||||||
|
|
||||||
|
|
||||||
|
def daydream():
|
||||||
|
model.eval()
|
||||||
|
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0)
|
||||||
|
dream = []
|
||||||
|
|
||||||
|
for _ in range(12):
|
||||||
|
out = model(seed)
|
||||||
|
logits = out[:, -1, :]
|
||||||
|
probs = F.softmax(logits, dim=-1)
|
||||||
|
token = torch.multinomial(probs, num_samples=1)
|
||||||
|
dream.append(token.item())
|
||||||
|
seed = torch.cat([seed, token], dim=1)
|
||||||
|
|
||||||
|
sentence = tokenizer.detokenize(dream)
|
||||||
|
score = score_sentence(sentence)
|
||||||
|
|
||||||
|
if score > 0.5:
|
||||||
|
save_dream(sentence, score)
|
||||||
|
record_to_journal(sentence)
|
||||||
|
train_on_message(sentence)
|
||||||
|
|
||||||
|
if len(recent_dreams) > 10:
|
||||||
|
recent_dreams.pop(0)
|
||||||
|
|
||||||
|
|
||||||
|
def replay_dreams():
|
||||||
|
expand_model_if_needed()
|
||||||
|
|
||||||
|
dreams = load_dreams()
|
||||||
|
context = load_context()
|
||||||
|
|
||||||
|
if not dreams or not context:
|
||||||
|
return
|
||||||
|
|
||||||
|
selected_dreams = random.sample(dreams, min(len(dreams), 5))
|
||||||
|
selected_contexts = random.sample(context, min(len(context), 5))
|
||||||
|
|
||||||
|
# Mix dreams and past contexts into a chaotic dream
|
||||||
|
all_sources = [d["sentence"] for d in selected_dreams] + [c["text"] for c in selected_contexts]
|
||||||
|
random.shuffle(all_sources)
|
||||||
|
|
||||||
|
mixed_sentence = " ".join(random.sample(all_sources, min(len(all_sources), 3)))
|
||||||
|
|
||||||
|
if mixed_sentence:
|
||||||
|
train_on_message(mixed_sentence, source="dream")
|
@ -1,49 +1,43 @@
|
|||||||
import torch
|
import torch
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
from model.brain_architecture import TinyTransformer
|
from model.tokenizer import Tokenizer
|
||||||
from model.brain_state import model, tokenizer, DEVICE
|
from model.brain_state import save_model, DEVICE, model, optimizer
|
||||||
|
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=3e-4)
|
tokenizer = Tokenizer()
|
||||||
|
|
||||||
_last_vocab_size = 0
|
|
||||||
expand_lock = threading.Lock()
|
expand_lock = threading.Lock()
|
||||||
_last_expansion_time = 0
|
_last_expansion_time = 0
|
||||||
|
|
||||||
|
|
||||||
def get_optimizer():
|
|
||||||
return optimizer
|
|
||||||
|
|
||||||
|
|
||||||
def expand_model_if_needed():
|
def expand_model_if_needed():
|
||||||
global model, optimizer, _last_expansion_time
|
global _last_expansion_time
|
||||||
|
|
||||||
with expand_lock:
|
with expand_lock:
|
||||||
current_vocab_size = len(tokenizer.vocab) + 10
|
# Check if expansion is actually needed
|
||||||
old_vocab_size = model.head.out_features
|
needed_vocab_size = tokenizer.next_id
|
||||||
|
current_vocab_size = model.head.out_features
|
||||||
|
|
||||||
if current_vocab_size <= old_vocab_size:
|
if needed_vocab_size <= current_vocab_size:
|
||||||
return False # No expansion needed
|
return # ✅ No expansion needed
|
||||||
|
|
||||||
# print(f"[Expand] Expanding model from {old_vocab_size} -> {current_vocab_size}")
|
# print(f"[Expand] Expanding vocabulary: {current_vocab_size} -> {needed_vocab_size}")
|
||||||
|
|
||||||
old_state = model.state_dict()
|
# Expand the head layer safely without rebuilding everything
|
||||||
new_model = TinyTransformer(
|
old_head_weight = model.head.weight.data
|
||||||
vocab_size=current_vocab_size,
|
old_out_features = old_head_weight.size(0)
|
||||||
embed_dim=model.token_embed.embedding_dim,
|
in_features = model.head.in_features
|
||||||
depth=len(model.blocks),
|
|
||||||
heads=model.blocks[0].attn.heads
|
|
||||||
).to(DEVICE)
|
|
||||||
|
|
||||||
|
new_head = torch.nn.Linear(in_features, needed_vocab_size, bias=False)
|
||||||
|
new_head = new_head.to(DEVICE)
|
||||||
|
|
||||||
|
# Copy old weights into the new head
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
for name, param in new_model.named_parameters():
|
new_head.weight[:old_out_features] = old_head_weight
|
||||||
if name in old_state and old_state[name].shape == param.shape:
|
|
||||||
param.copy_(old_state[name])
|
|
||||||
|
|
||||||
model = new_model
|
model.head = new_head
|
||||||
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
|
|
||||||
|
# Rebuild optimizer and scheduler
|
||||||
|
scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.95)
|
||||||
|
|
||||||
_last_expansion_time = time.time()
|
_last_expansion_time = time.time()
|
||||||
|
save_model()
|
||||||
# print("[Expand] Expansion complete.")
|
|
||||||
return True # <<< tell trainer we expanded
|
|
||||||
|
@ -1,15 +1,25 @@
|
|||||||
import os
|
import os
|
||||||
import time
|
import json
|
||||||
from model.trainer import train_on_message
|
|
||||||
import random
|
import random
|
||||||
|
|
||||||
JOURNAL_PATH = "data/memory/journal.txt"
|
JOURNAL_PATH = "data/memory/journal.json"
|
||||||
|
|
||||||
|
|
||||||
def record_to_journal(thought: str):
|
def record_to_journal(entry: dict):
|
||||||
os.makedirs(os.path.dirname(JOURNAL_PATH), exist_ok=True)
|
if not os.path.exists(JOURNAL_PATH):
|
||||||
with open(JOURNAL_PATH, "a", encoding="utf-8") as f:
|
with open(JOURNAL_PATH, "w", encoding="utf-8") as f:
|
||||||
f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} | {thought.strip()}\n")
|
json.dump([], f)
|
||||||
|
|
||||||
|
with open(JOURNAL_PATH, "r", encoding="utf-8") as f:
|
||||||
|
try:
|
||||||
|
journal = json.load(f)
|
||||||
|
except json.JSONDecodeError:
|
||||||
|
journal = []
|
||||||
|
|
||||||
|
journal.append(entry)
|
||||||
|
|
||||||
|
with open(JOURNAL_PATH, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(journal, f, indent=2)
|
||||||
|
|
||||||
|
|
||||||
def read_journal_entries():
|
def read_journal_entries():
|
||||||
@ -20,7 +30,7 @@ def read_journal_entries():
|
|||||||
return [line.split("|", 1)[-1].strip() for line in lines if "|" in line]
|
return [line.split("|", 1)[-1].strip() for line in lines if "|" in line]
|
||||||
|
|
||||||
|
|
||||||
def replay_journal():
|
def sample_journal_entries(n=5):
|
||||||
|
"""Return up to `n` random entries from the journal."""
|
||||||
entries = read_journal_entries()
|
entries = read_journal_entries()
|
||||||
for entry in random.sample(entries, min(5, len(entries))):
|
return random.sample(entries, min(n, len(entries)))
|
||||||
train_on_message(entry)
|
|
||||||
|
@ -1,14 +1,13 @@
|
|||||||
import torch
|
import torch
|
||||||
import time
|
import time
|
||||||
from model.dynamic_expand import expand_model_if_needed, _last_expansion_time, get_optimizer, expand_lock
|
from model.dynamic_expand import expand_model_if_needed, _last_expansion_time, expand_lock
|
||||||
from model.brain_state import model, tokenizer, DEVICE, loss_fn
|
from model.brain_state import model, tokenizer, DEVICE, loss_fn, optimizer, scheduler
|
||||||
from model.brainmap import add_to_brainmap, save_brainmap
|
from model.brainmap import add_to_brainmap
|
||||||
from model.tokenizer import save_vocab
|
from model.journal import record_to_journal
|
||||||
from context.context import add_to_context, get_recent_context
|
from context.context import add_to_context, get_recent_context
|
||||||
|
|
||||||
LOSS_FILE = "data/logs/loss.log"
|
LOSS_FILE = "data/logs/loss.log"
|
||||||
VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
|
VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
|
||||||
scheduler = torch.optim.lr_scheduler.StepLR(get_optimizer(), step_size=500, gamma=0.95)
|
|
||||||
|
|
||||||
|
|
||||||
def log_vocab_growth():
|
def log_vocab_growth():
|
||||||
@ -26,53 +25,61 @@ def train_on_message(text: str, source: str = "user"):
|
|||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if now - _last_expansion_time < 5:
|
if now - _last_expansion_time < 5:
|
||||||
print("[Train] Skipping to stabilize after expansion.")
|
print("[Trainer] Skipping to stabilize after expansion.")
|
||||||
return
|
return
|
||||||
|
|
||||||
if not expand_lock.acquire(timeout=0.5):
|
if not expand_lock.acquire(timeout=0.5):
|
||||||
print("[Train] Skipped training due to active expansion.")
|
print("[Trainer] Skipped training due to active expansion.")
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
model.train()
|
model.train()
|
||||||
|
context_texts = get_recent_context(10)
|
||||||
|
|
||||||
context_texts = get_recent_context(30)
|
# Augment the input with recent context
|
||||||
augmented_text = "<start> " + " ".join(context_texts + [text]) + " <end>"
|
augmented_text = "<start> " + " ".join(context_texts + [text]) + " <end>"
|
||||||
|
|
||||||
tokens = tokenizer.tokenize(augmented_text)
|
tokens = tokenizer.tokenize(augmented_text)
|
||||||
|
|
||||||
if len(tokens) < 2:
|
if len(tokens) < 2:
|
||||||
|
print("[Trainer] Message too short after cleaning.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Clamp any token IDs beyond the model's output size
|
||||||
max_token_id = model.head.out_features - 1
|
max_token_id = model.head.out_features - 1
|
||||||
tokens = [t if t <= max_token_id else max_token_id for t in tokens]
|
tokens = [min(t, max_token_id) for t in tokens]
|
||||||
tokens = tokens[:128]
|
tokens = tokens[:128] # Hard clamp input length
|
||||||
|
|
||||||
if len(tokens) < 2:
|
if len(tokens) < 2:
|
||||||
|
print("[Trainer] Message too short after clamping.")
|
||||||
return
|
return
|
||||||
|
|
||||||
input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0)
|
input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0)
|
||||||
target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0)
|
target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0)
|
||||||
|
|
||||||
opt = get_optimizer()
|
|
||||||
|
|
||||||
output = model(input_tensor)
|
output = model(input_tensor)
|
||||||
loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
|
loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
|
||||||
if torch.isnan(loss):
|
if torch.isnan(loss):
|
||||||
print("[Trainer] Detected NaN loss, skipping update.")
|
print("[Trainer] Detected NaN loss, skipping update.")
|
||||||
return
|
return
|
||||||
|
|
||||||
opt.zero_grad()
|
optimizer.zero_grad()
|
||||||
loss.backward()
|
loss.backward()
|
||||||
opt.step()
|
optimizer.step()
|
||||||
scheduler.step()
|
scheduler.step()
|
||||||
|
|
||||||
log_loss(loss.item())
|
# Update brainmap and context
|
||||||
log_vocab_growth()
|
|
||||||
add_to_context(text, source=source)
|
|
||||||
add_to_brainmap(augmented_text.split())
|
add_to_brainmap(augmented_text.split())
|
||||||
save_brainmap()
|
add_to_context(text, source=source)
|
||||||
save_vocab(tokenizer.vocab)
|
|
||||||
|
# Log training success to journal
|
||||||
|
record_to_journal({
|
||||||
|
"timestamp": time.time(),
|
||||||
|
"source": source,
|
||||||
|
"text": text,
|
||||||
|
"loss": round(loss.item(), 4),
|
||||||
|
"vocab_size": len(tokenizer.vocab)
|
||||||
|
})
|
||||||
|
|
||||||
finally:
|
finally:
|
||||||
expand_lock.release()
|
expand_lock.release()
|
||||||
|
Loading…
x
Reference in New Issue
Block a user