Prevented new books from being uploaded by mistake.

Added a graph to track vocab growth.
2025-04-26 23:45:06 -04:00 · 2025-04-26 23:45:06 -04:00 · 8d7cf38f1b
commit 8d7cf38f1b
parent a9b4871420
4 changed files with 85 additions and 3 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,8 +169,7 @@ cython_debug/
 #.idea/
 .vscode/launch.json
-/data/books/alice_in_wonderland.txt
+/data/books/*
 /data/books/wizard_of_oz.txt
 /data/memory/context.json
 /data/memory/dreams.json
 data/memory/brainmap.json
--- a/dashboard/dashboard.py
+++ b/dashboard/dashboard.py
@ -10,6 +10,7 @@ from context.context import load_context
 import json
 import os
 import time
 import datetime
 app = Flask(__name__)
@ -27,6 +28,21 @@ def load_loss_data():
    return [float(line.strip().split(",")[1]) for line in lines[-50:]]
 def load_vocab_growth():
    path = "data/logs/vocab_growth.log"
    if not os.path.exists(path):
        return []
    with open(path, "r", encoding="utf-8") as f:
        lines = f.readlines()
    data = []
    for line in lines:
        timestamp, vocab_size = line.strip().split(",")
        # Reformat timestamp to human-readable
        readable_time = datetime.datetime.fromtimestamp(float(timestamp)).strftime("%H:%M:%S")
        data.append((readable_time, int(vocab_size)))
    return data
 def update_next_cycle(seconds):
    global next_cycle_time
    next_cycle_time = time.time() + seconds
@ -55,10 +71,13 @@ def growth():
    vocab_size = len(tokenizer.vocab)
    brainmap_size = len(get_brainmap())
    memory_size = len(load_context())
    vocab_growth = load_vocab_growth()
    return render_template("growth.html",
                           vocab_size=vocab_size,
                           brainmap_size=brainmap_size,
-                           memory_size=memory_size)
+                           memory_size=memory_size,
                           vocab_growth=vocab_growth)
@app.route("/brainmap")
--- a/dashboard/templates/growth.html
+++ b/dashboard/templates/growth.html
@ -2,6 +2,7 @@
 <html lang="en">
 <head>
    <meta charset="UTF-8">
    <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
    <title>Ruby's Brain Growth</title>
    <style>
        body {
@ -46,5 +47,61 @@
 <div class="stat">Brain Map Size: {{ brainmap_size }}</div>
 <div class="stat">Memory Entries: {{ memory_size }}</div>
 <div class="divider"></div>
 <h2>🧠 Vocabulary Growth Over Time</h2>
 <canvas id="vocabChart" width="600" height="300"></canvas>
 <script>
 const ctx = document.getElementById('vocabChart').getContext('2d');
 const vocabData = {
    labels: [
        {% for entry in vocab_growth %}
            "{{ entry[0] }}",
        {% endfor %}
    ],
    datasets: [{
        label: 'Vocab Size',
        data: [
            {% for entry in vocab_growth %}
                {{ entry[1] }},
            {% endfor %}
        ],
        fill: true,
        borderColor: 'rgb(75, 192, 192)',
        backgroundColor: 'rgba(75, 192, 192, 0.2)',
        tension: 0.3
    }]
 };
 const vocabChart = new Chart(ctx, {
    type: 'line',
    data: vocabData,
    options: {
        scales: {
            x: {
                ticks: {
                    autoSkip: true,
                    maxTicksLimit: 10  // only show up to 10 x-axis labels
                },
                title: {
                    display: true,
                    text: 'Time'
                }
            },
            y: {
                title: {
                    display: true,
                    text: 'Vocabulary Size'
                },
                beginAtZero: true
            }
        }
    }
 });
 </script>
 </body>
 </html>
--- a/model/trainer.py
+++ b/model/trainer.py
@ -5,6 +5,12 @@ from model.brain_state import model, tokenizer, DEVICE, loss_fn
 from context.context import add_to_context, get_recent_context
 LOSS_FILE = "data/logs/loss.log"
 VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
 def log_vocab_growth():
    with open(VOCAB_GROWTH_FILE, "a", encoding="utf-8") as f:
        f.write(f"{time.time()},{len(tokenizer.vocab)}\n")
 def log_loss(value: float):
@ -45,4 +51,5 @@ def train_on_message(text: str, source: str = "user"):
    opt.step()
    log_loss(loss.item())
    log_vocab_growth()
    add_to_context(text, source=source)