From e907211d48fd9c52ca1ef43fe10480e299b7a794 Mon Sep 17 00:00:00 2001
From: Dani <dsapelli@yahoo.com>
Date: Fri, 25 Apr 2025 22:29:48 -0400
Subject: [PATCH] Added loss tracking

---
 .gitignore                     |  4 +++-
 dashboard/dashboard.py         | 15 +++++++++++++--
 dashboard/templates/index.html |  6 ++++++
 model/brain.py                 |  7 ++++---
 model/train.py                 |  8 ++++++++
 5 files changed, 34 insertions(+), 6 deletions(-)
diff --git a/.gitignore b/.gitignore
index 04d8c76..05b7fbe 100644
--- a/.gitignore
+++ b/.gitignore
@@ -170,4 +170,6 @@ cython_debug/
 
 .vscode/launch.json
 /data/books/alice_in_wonderland.txt
-/data/books/wizard_of_oz.txt
\ No newline at end of file
+/data/books/wizard_of_oz.txt
+/data/memory/context.json
+/data/memory/dreams.json
\ No newline at end of file
diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py
index cb66b6d..547579a 100644
--- a/dashboard/dashboard.py
+++ b/dashboard/dashboard.py
@@ -2,22 +2,33 @@ from flask import Flask, render_template
 from model.memory import load_dreams
 from model.tokenizer import Tokenizer
 from context.context import load_context
-import threading
+import os
 
 
 app = Flask(__name__)
 tokenizer = Tokenizer()
 
 
+def load_loss_data():
+    path = "data/logs/loss.log"
+    if not os.path.exists(path):
+        return []
+    with open(path, "r", encoding="utf-8") as f:
+        lines = f.readlines()
+    return [float(line.strip().split(",")[1]) for line in lines[-50:]]
+
+
 @app.route("/")
 def index():
     dreams = load_dreams()
     top_dreams = dreams[:5]
     memory_size = len(load_context())
+    loss_data = load_loss_data()
     return render_template("index.html",
                            vocab_size=len(tokenizer.vocab),
                            top_dreams=top_dreams,
-                           memory_size=memory_size)
+                           memory_size=memory_size,
+                           loss_data=loss_data)
 
 
 def run_dashboard():
diff --git a/dashboard/templates/index.html b/dashboard/templates/index.html
index 5c0b5aa..e6b323b 100644
--- a/dashboard/templates/index.html
+++ b/dashboard/templates/index.html
@@ -14,5 +14,11 @@
             <li><strong>{{ dream.score }}</strong> | {{ dream.sentence }}</li>
         {% endfor %}
     </ul>
+    <h2>📉 Recent Loss</h2>
+    <ul>
+        {% for loss in loss_data %}
+            <li>{{ loss }}</li>
+        {% endfor %}
+    </ul>
 </body>
 </html>
diff --git a/model/brain.py b/model/brain.py
index 171fb3e..bcfe0e0 100644
--- a/model/brain.py
+++ b/model/brain.py
@@ -4,6 +4,7 @@ import random
 from model.tokenizer import Tokenizer
 import torch.nn.functional as F
 from model.memory import save_dream
+from model.train import train_on_message
 
 
 recent_dreams = []
@@ -57,10 +58,10 @@ class TransformerBlock(nn.Module):
 
 
 class TinyTransformer(nn.Module):
-    def __init__(self, vocab_size=VOCAB_SIZE, embed_dim=EMBED_DIM, depth=2, heads=4):
+    def __init__(self, vocab_size=VOCAB_SIZE, embed_dim=256, depth=4, heads=8):
         super().__init__()
         self.token_embed = nn.Embedding(vocab_size, embed_dim)
-        self.pos_embed = nn.Parameter(torch.randn(1, 128, embed_dim))  # max sequence length = 128
+        self.pos_embed = nn.Parameter(torch.randn(1, 128, embed_dim))
         self.blocks = nn.Sequential(*[TransformerBlock(embed_dim, heads) for _ in range(depth)])
         self.norm = nn.LayerNorm(embed_dim)
         self.head = nn.Linear(embed_dim, vocab_size)
@@ -114,7 +115,7 @@ def daydream():
     sentence = tokenizer.detokenize(dream)
     score = score_sentence(sentence)
 
-    if score > 0.3:
+    if score > 0.45:
         save_dream(sentence, score)
         train_on_message(sentence)
         recent_dreams.append((score, sentence))
diff --git a/model/train.py b/model/train.py
index 7b0eab4..23b3ac0 100644
--- a/model/train.py
+++ b/model/train.py
@@ -7,6 +7,13 @@ from context.context import get_recent_context, add_to_context
 
 _last_thought = time.time()
 
+LOSS_FILE = "data/logs/loss.log"
+
+
+def log_loss(value: float):
+    with open(LOSS_FILE, "a", encoding="utf-8") as f:
+        f.write(f"{time.time()},{round(value, 4)}\n")
+
 
 def train_on_message(text: str):
     global _last_thought
@@ -23,6 +30,7 @@ def train_on_message(text: str):
 
     output = model(input_tensor)
     loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
+    log_loss(loss.item())
 
     optimizer.zero_grad()
     loss.backward()