From 84f98247eeb4e1a8403fab7ee9e4d4c3a792357b Mon Sep 17 00:00:00 2001 From: Dani Date: Fri, 25 Apr 2025 22:34:29 -0400 Subject: [PATCH] Added some more stuff to enchance her brain --- main.py | 17 +++++++++++++++++ model/abstraction.py | 23 +++++++++++++++++++++++ model/brain.py | 4 +++- model/dream_replay.py | 16 ++++++++++++++++ model/journal.py | 26 ++++++++++++++++++++++++++ model/rehearsal.py | 16 ++++++++++++++++ 6 files changed, 101 insertions(+), 1 deletion(-) create mode 100644 model/abstraction.py create mode 100644 model/dream_replay.py create mode 100644 model/journal.py create mode 100644 model/rehearsal.py diff --git a/main.py b/main.py index 88659df..b7acfe4 100644 --- a/main.py +++ b/main.py @@ -6,6 +6,8 @@ import os from model.train import train_on_message from model.brain import generate_response from model.cleanup import full_cleanup +from model.dream_replay import replay_dreams +from model.rehearsal import simulate_conversation from reader.reader import read_books_forever from dashboard.dashboard import run_dashboard @@ -44,9 +46,24 @@ async def background_cleanup_loop(): await asyncio.sleep(300) # 5 minutes +async def dream_replay_loop(): + while True: + replay_dreams() + await asyncio.sleep(900) # Replay every 15 minutes + + +async def rehearsal_loop(): + while True: + simulate_conversation() + await asyncio.sleep(1200) # Every 20 minutes + + +# Launch background tasks loop = asyncio.get_event_loop() loop.create_task(read_books_forever()) # Book reader task loop.create_task(background_cleanup_loop()) +loop.create_task(dream_replay_loop()) +loop.create_task(rehearsal_loop()) # Launch Discord bot (blocking) client.run(TOKEN) diff --git a/model/abstraction.py b/model/abstraction.py new file mode 100644 index 0000000..fb91787 --- /dev/null +++ b/model/abstraction.py @@ -0,0 +1,23 @@ +import torch +from sklearn.cluster import KMeans +from model.tokenizer import Tokenizer + +tokenizer = Tokenizer() + + +def cluster_vocab(n_clusters=10): + vocab_items = list(tokenizer.vocab.items()) + words, ids = zip(*vocab_items) + + embeds = torch.nn.Embedding(len(ids), 256) # Same as model size + with torch.no_grad(): + vectors = embeds(torch.tensor(list(ids))) + + kmeans = KMeans(n_clusters=n_clusters) + labels = kmeans.fit_predict(vectors.cpu().numpy()) + + clusters = {} + for idx, label in enumerate(labels): + clusters.setdefault(label, []).append(words[idx]) + + return clusters diff --git a/model/brain.py b/model/brain.py index bcfe0e0..9a99b70 100644 --- a/model/brain.py +++ b/model/brain.py @@ -1,10 +1,11 @@ import torch import torch.nn as nn +import torch.nn.functional as F import random from model.tokenizer import Tokenizer -import torch.nn.functional as F from model.memory import save_dream from model.train import train_on_message +from model.journal import record_to_journal recent_dreams = [] @@ -117,6 +118,7 @@ def daydream(): if score > 0.45: save_dream(sentence, score) + record_to_journal(sentence) train_on_message(sentence) recent_dreams.append((score, sentence)) if len(recent_dreams) > 10: diff --git a/model/dream_replay.py b/model/dream_replay.py new file mode 100644 index 0000000..8bac8bc --- /dev/null +++ b/model/dream_replay.py @@ -0,0 +1,16 @@ +import random +import torch +from model.memory import load_dreams +from model.train import train_on_message + + +def replay_dreams(): + dreams = load_dreams() + if not dreams: + return + + selected = random.sample(dreams, min(len(dreams), 5)) + for dream in selected: + text = dream["sentence"] + if text: + train_on_message(text) diff --git a/model/journal.py b/model/journal.py new file mode 100644 index 0000000..5495746 --- /dev/null +++ b/model/journal.py @@ -0,0 +1,26 @@ +import os +import time +from model.train import train_on_message +import random + +JOURNAL_PATH = "data/memory/journal.txt" + + +def record_to_journal(thought: str): + os.makedirs(os.path.dirname(JOURNAL_PATH), exist_ok=True) + with open(JOURNAL_PATH, "a", encoding="utf-8") as f: + f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} | {thought.strip()}\n") + + +def read_journal_entries(): + if not os.path.exists(JOURNAL_PATH): + return [] + with open(JOURNAL_PATH, "r", encoding="utf-8") as f: + lines = f.readlines() + return [line.split("|", 1)[-1].strip() for line in lines if "|" in line] + + +def replay_journal(): + entries = read_journal_entries() + for entry in random.sample(entries, min(5, len(entries))): + train_on_message(entry) diff --git a/model/rehearsal.py b/model/rehearsal.py new file mode 100644 index 0000000..d1f0a2b --- /dev/null +++ b/model/rehearsal.py @@ -0,0 +1,16 @@ +import torch +from model.brain import model, tokenizer, DEVICE +from model.train import train_on_message + + +def simulate_conversation(): + seed = torch.randint(0, tokenizer.next_id, (1, 5), device=DEVICE) + output = model(seed) + preds = torch.argmax(output, dim=-1).squeeze().tolist() + + if isinstance(preds, int): + preds = [preds] + + text = tokenizer.detokenize(preds) + if text and len(text.split()) >= 3: + train_on_message(text)