From e907211d48fd9c52ca1ef43fe10480e299b7a794 Mon Sep 17 00:00:00 2001 From: Dani Date: Fri, 25 Apr 2025 22:29:48 -0400 Subject: [PATCH] Added loss tracking --- .gitignore | 4 +++- dashboard/dashboard.py | 15 +++++++++++++-- dashboard/templates/index.html | 6 ++++++ model/brain.py | 7 ++++--- model/train.py | 8 ++++++++ 5 files changed, 34 insertions(+), 6 deletions(-) diff --git a/.gitignore b/.gitignore index 04d8c76..05b7fbe 100644 --- a/.gitignore +++ b/.gitignore @@ -170,4 +170,6 @@ cython_debug/ .vscode/launch.json /data/books/alice_in_wonderland.txt -/data/books/wizard_of_oz.txt \ No newline at end of file +/data/books/wizard_of_oz.txt +/data/memory/context.json +/data/memory/dreams.json \ No newline at end of file diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py index cb66b6d..547579a 100644 --- a/dashboard/dashboard.py +++ b/dashboard/dashboard.py @@ -2,22 +2,33 @@ from flask import Flask, render_template from model.memory import load_dreams from model.tokenizer import Tokenizer from context.context import load_context -import threading +import os app = Flask(__name__) tokenizer = Tokenizer() +def load_loss_data(): + path = "data/logs/loss.log" + if not os.path.exists(path): + return [] + with open(path, "r", encoding="utf-8") as f: + lines = f.readlines() + return [float(line.strip().split(",")[1]) for line in lines[-50:]] + + @app.route("/") def index(): dreams = load_dreams() top_dreams = dreams[:5] memory_size = len(load_context()) + loss_data = load_loss_data() return render_template("index.html", vocab_size=len(tokenizer.vocab), top_dreams=top_dreams, - memory_size=memory_size) + memory_size=memory_size, + loss_data=loss_data) def run_dashboard(): diff --git a/dashboard/templates/index.html b/dashboard/templates/index.html index 5c0b5aa..e6b323b 100644 --- a/dashboard/templates/index.html +++ b/dashboard/templates/index.html @@ -14,5 +14,11 @@
  • {{ dream.score }} | {{ dream.sentence }}
  • {% endfor %} +

    📉 Recent Loss

    + diff --git a/model/brain.py b/model/brain.py index 171fb3e..bcfe0e0 100644 --- a/model/brain.py +++ b/model/brain.py @@ -4,6 +4,7 @@ import random from model.tokenizer import Tokenizer import torch.nn.functional as F from model.memory import save_dream +from model.train import train_on_message recent_dreams = [] @@ -57,10 +58,10 @@ class TransformerBlock(nn.Module): class TinyTransformer(nn.Module): - def __init__(self, vocab_size=VOCAB_SIZE, embed_dim=EMBED_DIM, depth=2, heads=4): + def __init__(self, vocab_size=VOCAB_SIZE, embed_dim=256, depth=4, heads=8): super().__init__() self.token_embed = nn.Embedding(vocab_size, embed_dim) - self.pos_embed = nn.Parameter(torch.randn(1, 128, embed_dim)) # max sequence length = 128 + self.pos_embed = nn.Parameter(torch.randn(1, 128, embed_dim)) self.blocks = nn.Sequential(*[TransformerBlock(embed_dim, heads) for _ in range(depth)]) self.norm = nn.LayerNorm(embed_dim) self.head = nn.Linear(embed_dim, vocab_size) @@ -114,7 +115,7 @@ def daydream(): sentence = tokenizer.detokenize(dream) score = score_sentence(sentence) - if score > 0.3: + if score > 0.45: save_dream(sentence, score) train_on_message(sentence) recent_dreams.append((score, sentence)) diff --git a/model/train.py b/model/train.py index 7b0eab4..23b3ac0 100644 --- a/model/train.py +++ b/model/train.py @@ -7,6 +7,13 @@ from context.context import get_recent_context, add_to_context _last_thought = time.time() +LOSS_FILE = "data/logs/loss.log" + + +def log_loss(value: float): + with open(LOSS_FILE, "a", encoding="utf-8") as f: + f.write(f"{time.time()},{round(value, 4)}\n") + def train_on_message(text: str): global _last_thought @@ -23,6 +30,7 @@ def train_on_message(text: str): output = model(input_tensor) loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1)) + log_loss(loss.item()) optimizer.zero_grad() loss.backward()