diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py
index a89be06..6175337 100644
--- a/dashboard/dashboard.py
+++ b/dashboard/dashboard.py
@@ -59,28 +59,34 @@ def update_next_cycle(seconds):
next_cycle_time = time.time() + seconds
-@app.route("/")
-def index():
- dreams = load_dreams()
- top_dreams = dreams[:5]
- memory_size = len(load_context())
- loss_data = load_loss_data()
+def get_status_summary():
progress = load_progress()
books = get_books()
current_book = books[0] if books else None
current_line = progress.get(current_book, 0)
- next_cycle = get_time_until_next_action()
- next_action_label = get_next_action_label()
+ total_lines = 1
+ if current_book:
+ with open(f"books/{current_book}", "r", encoding="utf-8") as f:
+ total_lines = len(f.readlines())
- return render_template("index.html",
- vocab_size=get_vocab_size(),
- top_dreams=top_dreams,
- memory_size=memory_size,
- loss_data=loss_data,
- current_book=current_book,
- current_line=current_line,
- next_cycle=next_cycle,
- next_action_label=next_action_label)
+ return {
+ "current_book": current_book,
+ "current_line": current_line,
+ "percent_done": round((current_line / total_lines) * 100, 2),
+ "memory_size": len(load_context()),
+ "vocab_size": get_vocab_size(),
+ "brainmap_size": len(get_brainmap()),
+ "journal_count": len(read_journal_entries()),
+ "dream": load_dreams()[-1] if load_dreams() else None,
+ "next_action_label": get_next_action_label(),
+ "next_cycle": get_time_until_next_action()
+ }
+
+
+@app.route("/")
+def index():
+ status = get_status_summary()
+ return render_template("index.html", status=status)
@app.route("/growth")
@@ -131,7 +137,10 @@ def journal():
@app.route("/concepts")
def concepts():
clusters = cluster_vocab(n_clusters=10)
- return render_template("concepts.html", clusters={i: cluster for i, cluster in enumerate(clusters)})
+ return render_template("concepts.html",
+ clusters={i: cluster for i,
+ cluster in enumerate(clusters)
+ })
@app.route("/dreams")
diff --git a/dashboard/templates/index.html b/dashboard/templates/index.html
index 4573035..d181b0a 100644
--- a/dashboard/templates/index.html
+++ b/dashboard/templates/index.html
@@ -2,48 +2,16 @@
-
-
- Ruby's Dashboard
+
+ Ruby Status Dashboard
-
-
-
Ruby is Running π§
-
-
-
β³ Next Cycle
-
Next: {{ next_action_label }}
-
{{ next_cycle }} seconds
-
-
-
-
-
π§ Brain Stats
-
Vocabulary Size: {{ vocab_size }}
-
Memory Entries: {{ memory_size }}
-
-
-
-
π Current Book Progress
-
Currently Reading: {{ current_book }}
-
Line: {{ current_line }}
-
- {{ current_passage }}
-
-
-
-
-
π Highest Scoring Dreams
-
- {% for dream in top_dreams %}
- - {{ dream.score }} | {{ dream.sentence }}
- {% endfor %}
-
-
-
-
-
π Recent Loss
-
-
-
-
+
+
+
π§ Ruby System Status
+
+
+
+
+
π Current Activity
+
Action: {{ status.next_action_label }}
+
Next in: {{ status.next_cycle }} sec
+
Reading: {{ status.current_book or "None" }}
+
Line: {{ status.current_line }} ({{ status.percent_done }}%)
+
+
+
+
π System Stats
+
Vocabulary: {{ status.vocab_size }}
+
Memory: {{ status.memory_size }}
+
Brain Map: {{ status.brainmap_size }}
+
Journal: {{ status.journal_count }}
+
+
+
+
+
+
π€ Latest Dream
+ {% if status.dream %}
+
Score: {{ status.dream.score }}
+
{{ status.dream.sentence }}
+ {% else %}
+
No dreams yet.
+ {% endif %}
+
+
diff --git a/main.py b/main.py
index c7fd5a6..df35244 100644
--- a/main.py
+++ b/main.py
@@ -29,6 +29,13 @@ empty_response_counter = 0
async def on_ready():
print(f"Ruby is online as {client.user}.")
+ # β
Start async loops on Discord's own event loop
+ client.loop.create_task(read_books_forever())
+ client.loop.create_task(dream_replay_loop())
+ client.loop.create_task(background_cleanup_loop())
+ client.loop.create_task(rehearsal_loop())
+ client.loop.create_task(memory_reweaver_loop())
+
@client.event
async def on_message(message):
@@ -40,16 +47,16 @@ async def on_message(message):
if not message.content.strip():
return
- train_on_message(message.content, source="user")
+ await train_on_message(message.content, source="user")
response = generate_response()
if not response.strip():
empty_response_counter += 1
- if empty_response_counter % 10 == 0: # only every 10 failures
+ if empty_response_counter % 10 == 0:
print(f"[Brain] Skipped {empty_response_counter} empty replies so far.")
return
- empty_response_counter = 0 # reset counter when Ruby replies
+ empty_response_counter = 0
await message.channel.send(response)
@@ -57,40 +64,26 @@ async def background_cleanup_loop():
while True:
full_cleanup()
set_next_action(300, "Cleaning up")
- await asyncio.sleep(300) # 5 minutes
+ await asyncio.sleep(300)
async def dream_replay_loop():
while True:
- replay_dreams()
+ await replay_dreams()
set_next_action(90, "Dreaming new dreams")
- await asyncio.sleep(90) # Replay every 15 minutes
- daydream()
+ await asyncio.sleep(90)
+ await daydream()
async def rehearsal_loop():
while True:
- simulate_conversation()
+ await simulate_conversation()
set_next_action(120, "Practicing Conversations")
- await asyncio.sleep(120) # Every 20 minutes
-
-
-# Start Ruby's Brain Loops in a separate thread
-def start_brain_loops():
- loop = asyncio.new_event_loop()
- asyncio.set_event_loop(loop)
-
- loop.create_task(read_books_forever())
- loop.create_task(dream_replay_loop())
- loop.create_task(background_cleanup_loop())
- loop.create_task(rehearsal_loop())
- loop.create_task(memory_reweaver_loop())
-
- loop.run_forever()
+ await asyncio.sleep(120)
+# β
Launch dashboard in background thread
threading.Thread(target=run_dashboard, daemon=True).start()
-threading.Thread(target=start_brain_loops, daemon=True).start()
-# Launch Discord bot (blocking)
+# β
Launch Discord bot (this owns the event loop now)
client.run(TOKEN)
diff --git a/model/abstraction.py b/model/abstraction.py
index 47fafb4..97fd2af 100644
--- a/model/abstraction.py
+++ b/model/abstraction.py
@@ -4,22 +4,25 @@ from model.tokenizer import Tokenizer
tokenizer = Tokenizer()
+SPECIAL_TOKENS = {"
", "", "", "", ""}
+
def cluster_vocab(n_clusters=10):
- vocab_items = list(tokenizer.vocab.items())
+ vocab_items = [(word, idx) for word, idx in tokenizer.vocab.items() if word not in SPECIAL_TOKENS]
- if not vocab_items:
- return [] # If vocab is empty, just return empty clusters safely
+ if len(vocab_items) < 2:
+ return [] # Not enough real words to cluster
words, ids = zip(*vocab_items)
- ids = torch.tensor(ids, dtype=torch.float32).unsqueeze(1)
- kmeans = KMeans(n_clusters=min(n_clusters, len(words)))
- labels = kmeans.fit_predict(ids)
+ # Use 1D embedding: you can expand this to real model vectors later
+ vectors = torch.eye(len(words), dtype=torch.float32) # fake embeddings
+
+ kmeans = KMeans(n_clusters=min(n_clusters, len(words)), n_init="auto")
+ labels = kmeans.fit_predict(vectors)
clusters = [[] for _ in range(max(labels) + 1)]
for word, label in zip(words, labels):
clusters[label].append(word)
return clusters
-
diff --git a/model/dreamer.py b/model/dreamer.py
index 1db8236..65dee35 100644
--- a/model/dreamer.py
+++ b/model/dreamer.py
@@ -11,22 +11,18 @@ from context.context import load_context
recent_dreams = []
-def daydream():
+async def daydream():
model.eval()
max_token_id = model.head.out_features - 1
seed = torch.randint(0, max_token_id + 1, (1, 1), device=DEVICE)
dream = []
- max_token_id = model.head.out_features - 1
for _ in range(12):
out = model(seed)
logits = out[:, -1, :]
probs = F.softmax(logits, dim=-1)
token = torch.multinomial(probs, num_samples=1)
-
- # CLAMP the token
token = torch.clamp(token, max=max_token_id)
-
dream.append(token.item())
seed = torch.cat([seed, token], dim=1)
@@ -36,15 +32,14 @@ def daydream():
if score > 0.5:
save_dream(sentence, score)
record_to_journal(sentence)
- train_on_message(sentence)
+ await train_on_message(sentence)
if len(recent_dreams) > 10:
recent_dreams.pop(0)
-def replay_dreams():
- expand_model_if_needed()
-
+async def replay_dreams():
+ await expand_model_if_needed()
dreams = load_dreams()
context = load_context()
@@ -54,11 +49,10 @@ def replay_dreams():
selected_dreams = random.sample(dreams, min(len(dreams), 5))
selected_contexts = random.sample(context, min(len(context), 5))
- # Mix dreams and past contexts into a chaotic dream
all_sources = [d["sentence"] for d in selected_dreams] + [c["text"] for c in selected_contexts]
random.shuffle(all_sources)
mixed_sentence = " ".join(random.sample(all_sources, min(len(all_sources), 3)))
if mixed_sentence:
- train_on_message(mixed_sentence, source="dream")
+ await train_on_message(mixed_sentence, source="dream")
diff --git a/model/dreams.py b/model/dreams.py
index 429ae1d..02e894d 100644
--- a/model/dreams.py
+++ b/model/dreams.py
@@ -7,8 +7,12 @@ DREAM_LOG_PATH = "data/memory/dreams.json"
def load_dreams():
if not os.path.exists(DREAM_LOG_PATH):
return []
- with open(DREAM_LOG_PATH, "r", encoding="utf-8") as f:
- return json.load(f)
+ try:
+ with open(DREAM_LOG_PATH, "r", encoding="utf-8") as f:
+ return json.load(f)
+ except json.JSONDecodeError:
+ print("[Dreams] Failed to parse dreams.json.")
+ return []
def save_dream(sentence: str, score: float):
diff --git a/model/dynamic_expand.py b/model/dynamic_expand.py
index 108380d..4f7b928 100644
--- a/model/dynamic_expand.py
+++ b/model/dynamic_expand.py
@@ -1,43 +1,37 @@
import torch
-import threading
+import asyncio
import time
from model.tokenizer import Tokenizer
from model.brain_state import save_model, DEVICE, model, optimizer
tokenizer = Tokenizer()
-expand_lock = threading.Lock()
+expand_lock = asyncio.Lock()
_last_expansion_time = 0
-def expand_model_if_needed():
+async def expand_model_if_needed():
global _last_expansion_time
- with expand_lock:
- # Check if expansion is actually needed
+ async with expand_lock:
needed_vocab_size = tokenizer.next_id
current_vocab_size = model.head.out_features
if needed_vocab_size <= current_vocab_size:
- return # β
No expansion needed
+ return
- # print(f"[Expand] Expanding vocabulary: {current_vocab_size} -> {needed_vocab_size}")
+ print(f"[Expand] Expanding vocabulary: {current_vocab_size} -> {needed_vocab_size}")
- # Expand the head layer safely without rebuilding everything
old_head_weight = model.head.weight.data
old_out_features = old_head_weight.size(0)
in_features = model.head.in_features
- new_head = torch.nn.Linear(in_features, needed_vocab_size, bias=False)
- new_head = new_head.to(DEVICE)
+ new_head = torch.nn.Linear(in_features, needed_vocab_size, bias=False).to(DEVICE)
- # Copy old weights into the new head
with torch.no_grad():
new_head.weight[:old_out_features] = old_head_weight
model.head = new_head
-
- # Rebuild optimizer and scheduler
- scheduler = torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.95)
+ torch.optim.lr_scheduler.StepLR(optimizer, step_size=1000, gamma=0.95)
_last_expansion_time = time.time()
save_model()
diff --git a/model/journal.py b/model/journal.py
index 44075d1..e306434 100644
--- a/model/journal.py
+++ b/model/journal.py
@@ -26,8 +26,11 @@ def read_journal_entries():
if not os.path.exists(JOURNAL_PATH):
return []
with open(JOURNAL_PATH, "r", encoding="utf-8") as f:
- lines = f.readlines()
- return [line.split("|", 1)[-1].strip() for line in lines if "|" in line]
+ try:
+ journal = json.load(f)
+ return [entry.get("text", "") for entry in journal if isinstance(entry, dict)]
+ except json.JSONDecodeError:
+ return []
def sample_journal_entries(n=5):
diff --git a/model/rehearsal.py b/model/rehearsal.py
index b18f6b9..cc84095 100644
--- a/model/rehearsal.py
+++ b/model/rehearsal.py
@@ -1,30 +1,27 @@
import torch
from model.brain import model, tokenizer, DEVICE
-from model.trainer import train_on_message
from model.dynamic_expand import expand_model_if_needed
+from model.trainer import train_on_message
-def simulate_conversation():
- expand_model_if_needed()
-
+async def simulate_conversation():
+ await expand_model_if_needed()
model.eval()
max_token_id = model.head.out_features - 1
if max_token_id < 1:
- return # Safeguard if model is still too small
+ return
seed = torch.randint(0, max_token_id + 1, (1, 5), device=DEVICE)
- seed = seed[:, -128:] # Clamp sequence length
+ seed = seed[:, -128:]
output = model(seed)
-
preds = torch.argmax(output, dim=-1).squeeze().tolist()
if isinstance(preds, int):
preds = [preds]
- # π‘ Clamp predictions too
preds = [min(max(p, 0), max_token_id) for p in preds]
-
text = tokenizer.detokenize(preds)
+
if text and len(text.split()) >= 3:
- train_on_message(text)
+ await train_on_message(text)
diff --git a/model/reweaver.py b/model/reweaver.py
index ce7c192..66c72ce 100644
--- a/model/reweaver.py
+++ b/model/reweaver.py
@@ -8,7 +8,7 @@ from model.dynamic_expand import expand_model_if_needed
async def memory_reweaver_loop():
while True:
await asyncio.sleep(600) # every 10 minutes
- expand_model_if_needed()
+ await expand_model_if_needed()
context = load_context()
if not context:
@@ -18,4 +18,4 @@ async def memory_reweaver_loop():
combined_text = " ".join([s["text"] for s in selected])
if combined_text:
- train_on_message(combined_text, source="reweaver")
+ await train_on_message(combined_text, source="reweaver")
diff --git a/model/trainer.py b/model/trainer.py
index b8fb0b6..82c9709 100644
--- a/model/trainer.py
+++ b/model/trainer.py
@@ -1,6 +1,6 @@
import torch
import time
-from model.dynamic_expand import expand_model_if_needed, _last_expansion_time, expand_lock
+from model.dynamic_expand import expand_model_if_needed, _last_expansion_time
from model.brain_state import model, tokenizer, DEVICE, loss_fn, optimizer, scheduler
from model.brainmap import add_to_brainmap
from model.journal import record_to_journal
@@ -20,68 +20,56 @@ def log_loss(value: float):
f.write(f"{time.time()},{round(value, 4)}\n")
-def train_on_message(text: str, source: str = "user"):
- expand_model_if_needed()
+async def train_on_message(text: str, source: str = "user"):
+ await expand_model_if_needed()
now = time.time()
if now - _last_expansion_time < 5:
print("[Trainer] Skipping to stabilize after expansion.")
return
- if not expand_lock.acquire(timeout=0.5):
- print("[Trainer] Skipped training due to active expansion.")
+ model.train()
+ context_texts = get_recent_context(10)
+ augmented_text = " " + " ".join(context_texts + [text]) + " "
+ tokens = tokenizer.tokenize(augmented_text)
+
+ if len(tokens) < 2:
+ print("[Trainer] Message too short after cleaning.")
return
- try:
- model.train()
- context_texts = get_recent_context(10)
+ max_token_id = model.head.out_features - 1
+ tokens = [max(0, min(t, max_token_id)) for t in tokens][:128]
- # Augment the input with recent context
- augmented_text = " " + " ".join(context_texts + [text]) + " "
-
- tokens = tokenizer.tokenize(augmented_text)
-
- if len(tokens) < 2:
- print("[Trainer] Message too short after cleaning.")
+ for t in tokens:
+ if t > max_token_id or t < 0:
+ print(f"[Trainer] Invalid token ID {t} (max={max_token_id})")
return
- # Clamp any token IDs beyond the model's output size
- max_token_id = model.head.out_features - 1
- if tokenizer.next_id > model.head.out_features:
- expand_model_if_needed()
- tokens = [t if t <= max_token_id else max_token_id for t in tokens]
- tokens = tokens[:128] # Hard clamp input length
+ if len(tokens) < 2:
+ print("[Trainer] Message too short after clamping.")
+ return
- if len(tokens) < 2:
- print("[Trainer] Message too short after clamping.")
- return
+ input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0)
+ target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0)
- input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0)
- target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0)
+ output = model(input_tensor)
+ loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
+ if torch.isnan(loss):
+ print("[Trainer] Detected NaN loss, skipping update.")
+ return
- output = model(input_tensor)
- loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
- if torch.isnan(loss):
- print("[Trainer] Detected NaN loss, skipping update.")
- return
+ optimizer.zero_grad()
+ loss.backward()
+ optimizer.step()
+ scheduler.step()
- optimizer.zero_grad()
- loss.backward()
- optimizer.step()
- scheduler.step()
+ add_to_brainmap(augmented_text.split())
+ add_to_context(text, source=source)
- # Update brainmap and context
- add_to_brainmap(augmented_text.split())
- add_to_context(text, source=source)
-
- # Log training success to journal
- record_to_journal({
- "timestamp": time.time(),
- "source": source,
- "text": text,
- "loss": round(loss.item(), 4),
- "vocab_size": len(tokenizer.vocab)
- })
-
- finally:
- expand_lock.release()
+ record_to_journal({
+ "timestamp": time.time(),
+ "source": source,
+ "text": text,
+ "loss": round(loss.item(), 4),
+ "vocab_size": len(tokenizer.vocab)
+ })
diff --git a/reader/reader.py b/reader/reader.py
index c26ebe0..34a625a 100644
--- a/reader/reader.py
+++ b/reader/reader.py
@@ -76,7 +76,7 @@ async def read_books_forever():
paragraph += " " + line
if line[-1] in END_PUNCTUATION and len(paragraph) > PARAGRAPH_MIN_LENGTH:
- train_on_message(paragraph.strip(), source="book")
+ await train_on_message(paragraph.strip(), source="book")
paragraph = ""
await asyncio.sleep(READ_DELAY)
set_next_action(READ_DELAY, "Reading")
@@ -87,7 +87,7 @@ async def read_books_forever():
if paragraph.strip():
if len(paragraph) > PARAGRAPH_MIN_LENGTH:
- train_on_message(paragraph.strip(), source="book")
+ await train_on_message(paragraph.strip(), source="book")
await asyncio.sleep(READ_DELAY)
set_next_action(READ_DELAY, "Reading")