Prevented new books from being uploaded by mistake.
Added a graph to track vocab growth.
This commit is contained in:
parent
a9b4871420
commit
8d7cf38f1b
3
.gitignore
vendored
3
.gitignore
vendored
@ -169,8 +169,7 @@ cython_debug/
|
|||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
.vscode/launch.json
|
.vscode/launch.json
|
||||||
/data/books/alice_in_wonderland.txt
|
/data/books/*
|
||||||
/data/books/wizard_of_oz.txt
|
|
||||||
/data/memory/context.json
|
/data/memory/context.json
|
||||||
/data/memory/dreams.json
|
/data/memory/dreams.json
|
||||||
data/memory/brainmap.json
|
data/memory/brainmap.json
|
||||||
|
@ -10,6 +10,7 @@ from context.context import load_context
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
|
import datetime
|
||||||
|
|
||||||
|
|
||||||
app = Flask(__name__)
|
app = Flask(__name__)
|
||||||
@ -27,6 +28,21 @@ def load_loss_data():
|
|||||||
return [float(line.strip().split(",")[1]) for line in lines[-50:]]
|
return [float(line.strip().split(",")[1]) for line in lines[-50:]]
|
||||||
|
|
||||||
|
|
||||||
|
def load_vocab_growth():
|
||||||
|
path = "data/logs/vocab_growth.log"
|
||||||
|
if not os.path.exists(path):
|
||||||
|
return []
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
data = []
|
||||||
|
for line in lines:
|
||||||
|
timestamp, vocab_size = line.strip().split(",")
|
||||||
|
# Reformat timestamp to human-readable
|
||||||
|
readable_time = datetime.datetime.fromtimestamp(float(timestamp)).strftime("%H:%M:%S")
|
||||||
|
data.append((readable_time, int(vocab_size)))
|
||||||
|
return data
|
||||||
|
|
||||||
|
|
||||||
def update_next_cycle(seconds):
|
def update_next_cycle(seconds):
|
||||||
global next_cycle_time
|
global next_cycle_time
|
||||||
next_cycle_time = time.time() + seconds
|
next_cycle_time = time.time() + seconds
|
||||||
@ -55,10 +71,13 @@ def growth():
|
|||||||
vocab_size = len(tokenizer.vocab)
|
vocab_size = len(tokenizer.vocab)
|
||||||
brainmap_size = len(get_brainmap())
|
brainmap_size = len(get_brainmap())
|
||||||
memory_size = len(load_context())
|
memory_size = len(load_context())
|
||||||
|
vocab_growth = load_vocab_growth()
|
||||||
|
|
||||||
return render_template("growth.html",
|
return render_template("growth.html",
|
||||||
vocab_size=vocab_size,
|
vocab_size=vocab_size,
|
||||||
brainmap_size=brainmap_size,
|
brainmap_size=brainmap_size,
|
||||||
memory_size=memory_size)
|
memory_size=memory_size,
|
||||||
|
vocab_growth=vocab_growth)
|
||||||
|
|
||||||
|
|
||||||
@app.route("/brainmap")
|
@app.route("/brainmap")
|
||||||
|
@ -2,6 +2,7 @@
|
|||||||
<html lang="en">
|
<html lang="en">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
|
||||||
<title>Ruby's Brain Growth</title>
|
<title>Ruby's Brain Growth</title>
|
||||||
<style>
|
<style>
|
||||||
body {
|
body {
|
||||||
@ -46,5 +47,61 @@
|
|||||||
<div class="stat">Brain Map Size: {{ brainmap_size }}</div>
|
<div class="stat">Brain Map Size: {{ brainmap_size }}</div>
|
||||||
<div class="stat">Memory Entries: {{ memory_size }}</div>
|
<div class="stat">Memory Entries: {{ memory_size }}</div>
|
||||||
|
|
||||||
|
<div class="divider"></div>
|
||||||
|
|
||||||
|
<h2>🧠 Vocabulary Growth Over Time</h2>
|
||||||
|
<canvas id="vocabChart" width="600" height="300"></canvas>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
const ctx = document.getElementById('vocabChart').getContext('2d');
|
||||||
|
const vocabData = {
|
||||||
|
labels: [
|
||||||
|
{% for entry in vocab_growth %}
|
||||||
|
"{{ entry[0] }}",
|
||||||
|
{% endfor %}
|
||||||
|
],
|
||||||
|
datasets: [{
|
||||||
|
label: 'Vocab Size',
|
||||||
|
data: [
|
||||||
|
{% for entry in vocab_growth %}
|
||||||
|
{{ entry[1] }},
|
||||||
|
{% endfor %}
|
||||||
|
],
|
||||||
|
fill: true,
|
||||||
|
borderColor: 'rgb(75, 192, 192)',
|
||||||
|
backgroundColor: 'rgba(75, 192, 192, 0.2)',
|
||||||
|
tension: 0.3
|
||||||
|
}]
|
||||||
|
};
|
||||||
|
|
||||||
|
const vocabChart = new Chart(ctx, {
|
||||||
|
type: 'line',
|
||||||
|
data: vocabData,
|
||||||
|
options: {
|
||||||
|
scales: {
|
||||||
|
x: {
|
||||||
|
ticks: {
|
||||||
|
autoSkip: true,
|
||||||
|
maxTicksLimit: 10 // only show up to 10 x-axis labels
|
||||||
|
},
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: 'Time'
|
||||||
|
}
|
||||||
|
},
|
||||||
|
y: {
|
||||||
|
title: {
|
||||||
|
display: true,
|
||||||
|
text: 'Vocabulary Size'
|
||||||
|
},
|
||||||
|
beginAtZero: true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
@ -5,6 +5,12 @@ from model.brain_state import model, tokenizer, DEVICE, loss_fn
|
|||||||
from context.context import add_to_context, get_recent_context
|
from context.context import add_to_context, get_recent_context
|
||||||
|
|
||||||
LOSS_FILE = "data/logs/loss.log"
|
LOSS_FILE = "data/logs/loss.log"
|
||||||
|
VOCAB_GROWTH_FILE = "data/logs/vocab_growth.log"
|
||||||
|
|
||||||
|
|
||||||
|
def log_vocab_growth():
|
||||||
|
with open(VOCAB_GROWTH_FILE, "a", encoding="utf-8") as f:
|
||||||
|
f.write(f"{time.time()},{len(tokenizer.vocab)}\n")
|
||||||
|
|
||||||
|
|
||||||
def log_loss(value: float):
|
def log_loss(value: float):
|
||||||
@ -45,4 +51,5 @@ def train_on_message(text: str, source: str = "user"):
|
|||||||
opt.step()
|
opt.step()
|
||||||
|
|
||||||
log_loss(loss.item())
|
log_loss(loss.item())
|
||||||
|
log_vocab_growth()
|
||||||
add_to_context(text, source=source)
|
add_to_context(text, source=source)
|
||||||
|
Loading…
x
Reference in New Issue
Block a user