Added some more stuff to enchance her brain

2025-04-25 22:34:29 -04:00 · 2025-04-25 22:34:29 -04:00 · 84f98247ee
commit 84f98247ee
parent e907211d48
6 changed files with 101 additions and 1 deletions
--- a/main.py
+++ b/main.py
@ -6,6 +6,8 @@ import os
 from model.train import train_on_message
 from model.brain import generate_response
 from model.cleanup import full_cleanup
+from model.dream_replay import replay_dreams
+from model.rehearsal import simulate_conversation
 from reader.reader import read_books_forever
 from dashboard.dashboard import run_dashboard

@ -44,9 +46,24 @@ async def background_cleanup_loop():
        await asyncio.sleep(300)  # 5 minutes


+async def dream_replay_loop():
+    while True:
+        replay_dreams()
+        await asyncio.sleep(900)  # Replay every 15 minutes
+
+
+async def rehearsal_loop():
+    while True:
+        simulate_conversation()
+        await asyncio.sleep(1200)  # Every 20 minutes
+
+
+# Launch background tasks
 loop = asyncio.get_event_loop()
 loop.create_task(read_books_forever())  # Book reader task
 loop.create_task(background_cleanup_loop())
+loop.create_task(dream_replay_loop())
+loop.create_task(rehearsal_loop())

 # Launch Discord bot (blocking)
 client.run(TOKEN)
--- a/model/abstraction.py
+++ b/model/abstraction.py
@ -0,0 +1,23 @@
+import torch
+from sklearn.cluster import KMeans
+from model.tokenizer import Tokenizer
+
+tokenizer = Tokenizer()
+
+
+def cluster_vocab(n_clusters=10):
+    vocab_items = list(tokenizer.vocab.items())
+    words, ids = zip(*vocab_items)
+
+    embeds = torch.nn.Embedding(len(ids), 256)  # Same as model size
+    with torch.no_grad():
+        vectors = embeds(torch.tensor(list(ids)))
+
+    kmeans = KMeans(n_clusters=n_clusters)
+    labels = kmeans.fit_predict(vectors.cpu().numpy())
+
+    clusters = {}
+    for idx, label in enumerate(labels):
+        clusters.setdefault(label, []).append(words[idx])
+
+    return clusters
--- a/model/brain.py
+++ b/model/brain.py
@ -1,10 +1,11 @@
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 import random
 from model.tokenizer import Tokenizer
-import torch.nn.functional as F
 from model.memory import save_dream
 from model.train import train_on_message
+from model.journal import record_to_journal


 recent_dreams = []
@ -117,6 +118,7 @@ def daydream():

    if score > 0.45:
        save_dream(sentence, score)
+        record_to_journal(sentence)
        train_on_message(sentence)
        recent_dreams.append((score, sentence))
        if len(recent_dreams) > 10:
--- a/model/dream_replay.py
+++ b/model/dream_replay.py
@ -0,0 +1,16 @@
+import random
+import torch
+from model.memory import load_dreams
+from model.train import train_on_message
+
+
+def replay_dreams():
+    dreams = load_dreams()
+    if not dreams:
+        return
+
+    selected = random.sample(dreams, min(len(dreams), 5))
+    for dream in selected:
+        text = dream["sentence"]
+        if text:
+            train_on_message(text)
--- a/model/journal.py
+++ b/model/journal.py
@ -0,0 +1,26 @@
+import os
+import time
+from model.train import train_on_message
+import random
+
+JOURNAL_PATH = "data/memory/journal.txt"
+
+
+def record_to_journal(thought: str):
+    os.makedirs(os.path.dirname(JOURNAL_PATH), exist_ok=True)
+    with open(JOURNAL_PATH, "a", encoding="utf-8") as f:
+        f.write(f"{time.strftime('%Y-%m-%d %H:%M:%S')} | {thought.strip()}\n")
+
+
+def read_journal_entries():
+    if not os.path.exists(JOURNAL_PATH):
+        return []
+    with open(JOURNAL_PATH, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+    return [line.split("|", 1)[-1].strip() for line in lines if "|" in line]
+
+
+def replay_journal():
+    entries = read_journal_entries()
+    for entry in random.sample(entries, min(5, len(entries))):
+        train_on_message(entry)
--- a/model/rehearsal.py
+++ b/model/rehearsal.py
@ -0,0 +1,16 @@
+import torch
+from model.brain import model, tokenizer, DEVICE
+from model.train import train_on_message
+
+
+def simulate_conversation():
+    seed = torch.randint(0, tokenizer.next_id, (1, 5), device=DEVICE)
+    output = model(seed)
+    preds = torch.argmax(output, dim=-1).squeeze().tolist()
+
+    if isinstance(preds, int):
+        preds = [preds]
+
+    text = tokenizer.detokenize(preds)
+    if text and len(text.split()) >= 3:
+        train_on_message(text)