Prevented new books from being uploaded by mistake.
Added a graph to track vocab growth.
This commit is contained in:
parent
a9b4871420
commit
8d7cf38f1b
3
.gitignore
vendored
3
.gitignore
vendored
@ -169,8 +169,7 @@ cython_debug/
|
||||
#.idea/
|
||||
|
||||
.vscode/launch.json
|
||||
/data/books/alice_in_wonderland.txt
|
||||
/data/books/wizard_of_oz.txt
|
||||
/data/books/*
|
||||
/data/memory/context.json
|
||||
/data/memory/dreams.json
|
||||
data/memory/brainmap.json
|
||||
|
@ -10,6 +10,7 @@ from context.context import load_context
|
||||
import json
|
||||
import os
|
||||
import time
|
||||
import datetime
|
||||
|
||||
|
||||
app = Flask(__name__)
|
||||
@ -27,6 +28,21 @@ def load_loss_data():
|
||||
return [float(line.strip().split(",")[1]) for line in lines[-50:]]
|
||||
|
||||
|
||||
def load_vocab_growth():
|
||||
path = "data/logs/vocab_growth.log"
|
||||
if not os.path.exists(path):
|
||||
return []
|
||||
with open(path, "r", encoding="utf-8") as f:
|
||||
lines = f.readlines()
|
||||
data = []
|
||||
for line in lines:
|
||||
timestamp, vocab_size = line.strip().split(",")
|
||||
# Reformat timestamp to human-readable
|
||||
readable_time = datetime.datetime.fromtimestamp(float(timestamp)).strftime("%H:%M:%S")
|
||||
data.append((readable_time, int(vocab_size)))
|
||||
return data
|
||||
|
||||
|
||||
def update_next_cycle(seconds):
|
||||
global next_cycle_time
|
||||
next_cycle_time = time.time() + seconds
|
||||
@ -55,10 +71,13 @@ def growth():
|
||||
vocab_size = len(tokenizer.vocab)
|
||||
brainmap_size = len(get_brainmap())
|
||||
memory_size = len(load_context())
|
||||
vocab_growth = load_vocab_growth()
|
||||
|
||||
return render_template("growth.html",
|
||||
vocab_size=vocab_size,
|
||||
brainmap_size=brainmap_size,
|
||||
memory_size=memory_size)
|
||||
memory_size=memory_size,
|
||||
vocab_growth=vocab_growth)
|
||||
|
||||
|
||||
@app.route("/brainmap")
|
||||
|
@ -2,6 +2,7 @@
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||
<title>Ruby's Brain Growth</title>
|
||||
<style>
|
||||
body {
|
||||
@ -46,5 +47,61 @@
|
||||
<div class="stat">Brain Map Size: {{ brainmap_size }}</div>
|
||||
<div class="stat">Memory Entries: {{ memory_size }}</div>
|
||||
|
||||
<div class="divider"></div>
|
||||
|
||||
<h2>🧠 Vocabulary Growth Over Time</h2>
|
||||
<canvas id="vocabChart" width="600" height="300"></canvas>
|
||||
|
||||
<script>
|
||||
const ctx = document.getElementById('vocabChart').getContext('2d');
|
||||
const vocabData = {
|
||||
labels: [
|
||||
{% for entry in vocab_growth %}
|
||||
"{{ entry[0] }}",
|
||||
{% endfor %}
|
||||
],
|
||||
datasets: [{
|
||||
label: 'Vocab Size',
|
||||
data: [
|
||||
{% for entry in vocab_growth %}
|
||||
{{ entry[1] }},
|
||||
{% endfor %}
|
||||
],
|
||||
fill: true,
|
||||
borderColor: 'rgb(75, 192, 192)',
|
||||
backgroundColor: 'rgba(75, 192, 192, 0.2)',
|
||||
tension: 0.3
|
||||
}]
|
||||
};
|
||||
|
||||
const vocabChart = new Chart(ctx, {
|
||||
type: 'line',
|
||||
data: vocabData,
|
||||
options: {
|
||||
scales: {
|
||||
x: {
|
||||
ticks: {
|
||||
autoSkip: true,
|
||||
maxTicksLimit: 10 // only show up to 10 x-axis labels
|
||||
},
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Time'
|
||||
}
|
||||
},
|
||||
y: {
|
||||
title: {
|
||||
display: true,
|
||||
text: 'Vocabulary Size'
|
||||
},
|
||||
beginAtZero: true
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
</script>
|
||||
|
||||
|
||||
|
||||
</body>
|
||||
</html>
|
||||
|
@ -5,6 +5,12 @@ from model.brain_state import model, tokenizer, DEVICE, loss_fn
|
||||
from context.context import add_to_context, get_recent_context
|
||||
|
||||
LOSS_FILE = "data/logs/loss.log"
|
||||
VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
|
||||
|
||||
|
||||
def log_vocab_growth():
|
||||
with open(VOCAB_GROWTH_FILE, "a", encoding="utf-8") as f:
|
||||
f.write(f"{time.time()},{len(tokenizer.vocab)}\n")
|
||||
|
||||
|
||||
def log_loss(value: float):
|
||||
@ -45,4 +51,5 @@ def train_on_message(text: str, source: str = "user"):
|
||||
opt.step()
|
||||
|
||||
log_loss(loss.item())
|
||||
log_vocab_growth()
|
||||
add_to_context(text, source=source)
|
||||
|
Loading…
x
Reference in New Issue
Block a user