Prevented new books from being uploaded by mistake.

Added a graph to track vocab growth.
This commit is contained in:
Dani 2025-04-26 23:45:06 -04:00
parent a9b4871420
commit 8d7cf38f1b
4 changed files with 85 additions and 3 deletions

3
.gitignore vendored
View File

@ -169,8 +169,7 @@ cython_debug/
#.idea/
.vscode/launch.json
/data/books/alice_in_wonderland.txt
/data/books/wizard_of_oz.txt
/data/books/*
/data/memory/context.json
/data/memory/dreams.json
data/memory/brainmap.json

View File

@ -10,6 +10,7 @@ from context.context import load_context
import json
import os
import time
import datetime
app = Flask(__name__)
@ -27,6 +28,21 @@ def load_loss_data():
return [float(line.strip().split(",")[1]) for line in lines[-50:]]
def load_vocab_growth():
path = "data/logs/vocab_growth.log"
if not os.path.exists(path):
return []
with open(path, "r", encoding="utf-8") as f:
lines = f.readlines()
data = []
for line in lines:
timestamp, vocab_size = line.strip().split(",")
# Reformat timestamp to human-readable
readable_time = datetime.datetime.fromtimestamp(float(timestamp)).strftime("%H:%M:%S")
data.append((readable_time, int(vocab_size)))
return data
def update_next_cycle(seconds):
global next_cycle_time
next_cycle_time = time.time() + seconds
@ -55,10 +71,13 @@ def growth():
vocab_size = len(tokenizer.vocab)
brainmap_size = len(get_brainmap())
memory_size = len(load_context())
vocab_growth = load_vocab_growth()
return render_template("growth.html",
vocab_size=vocab_size,
brainmap_size=brainmap_size,
memory_size=memory_size)
memory_size=memory_size,
vocab_growth=vocab_growth)
@app.route("/brainmap")

View File

@ -2,6 +2,7 @@
<html lang="en">
<head>
<meta charset="UTF-8">
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<title>Ruby's Brain Growth</title>
<style>
body {
@ -46,5 +47,61 @@
<div class="stat">Brain Map Size: {{ brainmap_size }}</div>
<div class="stat">Memory Entries: {{ memory_size }}</div>
<div class="divider"></div>
<h2>🧠 Vocabulary Growth Over Time</h2>
<canvas id="vocabChart" width="600" height="300"></canvas>
<script>
const ctx = document.getElementById('vocabChart').getContext('2d');
const vocabData = {
labels: [
{% for entry in vocab_growth %}
"{{ entry[0] }}",
{% endfor %}
],
datasets: [{
label: 'Vocab Size',
data: [
{% for entry in vocab_growth %}
{{ entry[1] }},
{% endfor %}
],
fill: true,
borderColor: 'rgb(75, 192, 192)',
backgroundColor: 'rgba(75, 192, 192, 0.2)',
tension: 0.3
}]
};
const vocabChart = new Chart(ctx, {
type: 'line',
data: vocabData,
options: {
scales: {
x: {
ticks: {
autoSkip: true,
maxTicksLimit: 10 // only show up to 10 x-axis labels
},
title: {
display: true,
text: 'Time'
}
},
y: {
title: {
display: true,
text: 'Vocabulary Size'
},
beginAtZero: true
}
}
}
});
</script>
</body>
</html>

View File

@ -5,6 +5,12 @@ from model.brain_state import model, tokenizer, DEVICE, loss_fn
from context.context import add_to_context, get_recent_context
LOSS_FILE = "data/logs/loss.log"
VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
def log_vocab_growth():
with open(VOCAB_GROWTH_FILE, "a", encoding="utf-8") as f:
f.write(f"{time.time()},{len(tokenizer.vocab)}\n")
def log_loss(value: float):
@ -45,4 +51,5 @@ def train_on_message(text: str, source: str = "user"):
opt.step()
log_loss(loss.item())
log_vocab_growth()
add_to_context(text, source=source)