diff --git a/.gitignore b/.gitignore index 3b2462f..8c4ef7a 100644 --- a/.gitignore +++ b/.gitignore @@ -171,5 +171,4 @@ cython_debug/ .vscode/launch.json /books/* /memory/* -vocab.json -progress.json +*.json diff --git a/body.py b/body.py index f0aeaf4..4efd484 100644 --- a/body.py +++ b/body.py @@ -1,97 +1,215 @@ +# body.py +# flake8: noqa import os -import asyncio import glob -import threading import json +import threading +import asyncio from collections import deque +from datetime import datetime, time, timedelta import logging + +import requests import discord from nervous_system import NervousSystem -import dashboard # <-- import your new Flask app -import brain_map # <-- import the blueprint to inject system +from persona import Persona +from life_log import LifeLog +from plugin_manager import PluginManager +import dashboard +import brain_map -# Path for progress persistence -PROGRESS_PATH = 'progress.json' - -# Mute logger -for noisy_logger in ["werkzeug", "flask", "flask.app"]: - logging.getLogger(noisy_logger).setLevel(logging.CRITICAL) - -# ─── Initialize Ruby & Discord ───────────────────────────────────────────────── # noqa: E501 +# ─── Config & Paths ──────────────────────────────────────────────────────────── +VOCAB_PATH = "vocab.json" +PROGRESS_PATH = "progress.json" +PERSONA_PATH = "persona.json" +LIFELOG_PATH = "life_log.json" +OPENWEATHER_KEY = os.getenv("OPENWEATHER_API_KEY") +WEATHER_CITY = os.getenv("WEATHER_CITY", "New York") +WEATHER_HOUR = 6 # daily at 6 AM +# ─── Discord & System Setup ──────────────────────────────────────────────────── intents = discord.Intents.default() intents.message_content = True client = discord.Client(intents=intents) -system = NervousSystem() -system.history = deque(maxlen=100) -# Load or resume vocab + embeddings -system.sensory.load_vocab('vocab.json') +system = NervousSystem() +persona = Persona(path=PERSONA_PATH) +life_log = LifeLog(path=LIFELOG_PATH) +plugins = PluginManager() + +# keep last 6 turns for conversational context +system.history = deque(maxlen=6) + +# load/resize vocab +system.sensory.load_vocab(VOCAB_PATH) system._resize_embeddings() -print('Loaded vocab size:', len(system.sensory.stoi)) -# Resume progress +# resume book‐training progress (per-book) if os.path.isfile(PROGRESS_PATH): - with open(PROGRESS_PATH, 'r', encoding='utf-8') as f: - data = json.load(f) - system.processed_lines = data.get('processed_lines', 0) + with open(PROGRESS_PATH, "r", encoding="utf-8") as f: + prog = json.load(f) + current_book = prog.get("current_book", 0) + line_offset = prog.get("line_offset", 0) else: - system.processed_lines = 0 + current_book = 0 + line_offset = 0 -# Compute total book lines -total = sum( - 1 - for path in glob.glob('books/*.txt') - for line in open(path, encoding='utf-8') - if line.strip() +book_list = sorted(glob.glob("books/*.txt")) +total_books = len(book_list) + +# count lines per book +book_line_counts = [] +for path in book_list: + cnt = sum(1 for line in open(path, encoding="utf-8") if line.strip()) + book_line_counts.append(cnt) + +# set up overall progress counters for the dashboard +system.total_lines = sum(book_line_counts) +lines_done = sum(book_line_counts[:current_book]) + line_offset +system.processed_lines = lines_done +print( + f"Resuming training: book {current_book+1}/{total_books}, " + f"line {line_offset}/{book_line_counts[current_book] if current_book < total_books else 0}\n" + f"Overall progress: {system.processed_lines}/{system.total_lines} lines" ) -system.total_lines = total -print(f'Resuming training at {system.processed_lines}/{system.total_lines} lines') - -# Inject into Flask contexts +# inject for dashboard routes dashboard.system = system brain_map.system = system -# ─── Book-training when idle ──────────────────────────────────────────────────── # noqa: E501 +print( + f"Loaded vocab {len(system.sensory.stoi)}, " + f"resuming at book {current_book+1}/{total_books}, " + f"line {line_offset}/{book_line_counts[current_book] if current_book < total_books else 0}" +) +def refine_diary_entry(raw: str) -> str: + prompt = ( + "Here is a rough diary draft. Rewrite it as a clear, first-person " + "diary entry in 2–3 sentences:\n\n" + f"Draft:\n{raw}\n\nRefined entry:" + ) + return system.generate(prompt, max_len=100, temperature=0.7, top_p=0.9) + + +# ─── Seed Book-Title Diary Entries ───────────────────────────────────────────── +for path in book_list: + title = os.path.splitext(os.path.basename(path))[0] + fact = f"I just finished reading “{title}.”" + system.train("", fact) + draft = system.generate( + f"Diary prompt: Stream-of-thought draft about reading “{title}.”", + max_len=100, temperature=0.9, top_p=0.95 + ) + entry = refine_diary_entry(draft) + life_log.add(entry) + + +# ─── Idle Book-Training Task (per-book) ──────────────────────────────────────── async def train_books_idle(): + global current_book, line_offset await client.wait_until_ready() await asyncio.sleep(5) - processed = 0 - skip = system.processed_lines - for path in glob.glob('books/*.txt'): - with open(path, encoding='utf-8') as f: + # process one book at a time + while current_book < total_books: + path = book_list[current_book] + processed = 0 + cnt = book_line_counts[current_book] + with open(path, encoding="utf-8") as f: for raw in f: text = raw.strip() if not text: continue - if processed < skip: + if processed < line_offset: processed += 1 continue + # train on this line await asyncio.to_thread(system.train, text, text) processed += 1 - system.processed_lines = processed + line_offset = processed - if processed % 200 == 0 or processed == system.total_lines: - system.sensory.save_vocab('vocab.json') - with open(PROGRESS_PATH, 'w', encoding='utf-8') as pf: - json.dump({'processed_lines': processed}, pf) + # checkpoint every 200 lines or at end + if processed % 200 == 0 or processed == cnt: + system.sensory.save_vocab(VOCAB_PATH) + with open(PROGRESS_PATH, "w", encoding="utf-8") as pf: + json.dump({ + "current_book": current_book, + "line_offset": line_offset + }, pf, indent=2) - # Final checkpoint - system.sensory.save_vocab('vocab.json') - with open(PROGRESS_PATH, 'w', encoding='utf-8') as pf: - json.dump({'processed_lines': system.processed_lines}, pf) + # finished current book + print(f"Finished book {current_book+1}/{total_books}: {path}") + # reset for next book + current_book += 1 + line_offset = 0 + # save progress + with open(PROGRESS_PATH, "w", encoding="utf-8") as pf: + json.dump({ + "current_book": current_book, + "line_offset": 0 + }, pf, indent=2) + # optional small break between books + await asyncio.sleep(2) + + # all books done; bootstrap persona + await asyncio.to_thread(persona.bootstrap, system) + print("All books trained. Persona bootstrapped:", persona.traits) + + +# ─── Idle Weather-Ingestion & Diary Task ─────────────────────────────────────── +async def ingest_weather_idle(): + await client.wait_until_ready() + # sleep until next WEATHER_HOUR + now = datetime.now() + target = datetime.combine(now.date(), time(WEATHER_HOUR)) + if now >= target: + target += timedelta(days=1) + await asyncio.sleep((target - now).total_seconds()) + + while True: + if OPENWEATHER_KEY: + try: + url = ( + "https://api.openweathermap.org/data/2.5/weather" + f"?q={WEATHER_CITY}&units=metric&appid={OPENWEATHER_KEY}" + ) + data = requests.get(url, timeout=5).json() + desc = data["weather"][0]["description"] + temp = data["main"]["temp"] + fact = f"Current weather: {desc}, {temp:.1f}°C." + await asyncio.to_thread(system.train, "", fact) + + draft = system.generate( + f"Diary prompt: Draft about today’s weather: {desc}, {temp:.1f}°C.", + max_len=100, temperature=0.9, top_p=0.95 + ) + entry = refine_diary_entry(draft) + life_log.add(entry) + print("Journaled weather:", entry) + except Exception: + pass + await asyncio.sleep(24 * 3600) + + +# ─── Idle Self-Reflection Task ───────────────────────────────────────────────── +async def reflect_idle(): + await client.wait_until_ready() + while True: + await asyncio.sleep(600) + await asyncio.to_thread(persona.bootstrap, system) + print("Persona adapted:", persona.traits) @client.event async def on_ready(): - print(f'Ruby is online as {client.user}!') + print(f"Ruby is online as {client.user}!") asyncio.create_task(train_books_idle()) + asyncio.create_task(ingest_weather_idle()) + asyncio.create_task(reflect_idle()) @client.event @@ -100,27 +218,53 @@ async def on_message(message: discord.Message): return user_text = message.content.strip() - reply = system.generate(user_text) + + # 1) Diary: top 5 recent + entries = life_log.recent(5) + diary_sec = "### Diary Entries\n" + for e in reversed(entries): + diary_sec += f"- {e}\n" + + # 2) Persona + persona_sec = "\n### Persona\n" + persona.summary() + + # 3) Conversation + convo_sec = "\n### Conversation\n" + for turn in system.history: + convo_sec += f"User: {turn['user']}\nRuby: {turn['bot']}\n" + convo_sec += f"User: {user_text}\nRuby:" + + prompt = diary_sec + persona_sec + convo_sec + reply = system.generate(prompt) await message.channel.send(reply) - system.history.append({'user': user_text, 'bot': reply}) - asyncio.create_task(asyncio.to_thread(system.train, user_text, reply)) - - -# ─── Launch Dashboard & Bot ──────────────────────────────────────────────────── # noqa: E501 + # record & train + system.history.append({"user": user_text, "bot": reply}) + asyncio.create_task( + asyncio.to_thread(system.train, user_text, reply) + ) +# ─── Silence Flask/Werkzeug request logs ──────────────────────────────────────── +logging.getLogger('werkzeug').setLevel(logging.ERROR) def run_dashboard(): dashboard.app.run( - host='0.0.0.0', port=5000, - debug=False, use_reloader=False + host="0.0.0.0", + port=5000, + debug=False, + use_reloader=False ) threading.Thread(target=run_dashboard, daemon=True).start() -print('Dashboard available at http://127.0.0.1:5000') +dashboard.system = system +dashboard.persona = persona +dashboard.life_log = life_log +dashboard.plugins = plugins +dashboard.brain_map = brain_map +print("Dashboard available at http://127.0.0.1:5000") -token = os.getenv('DISCORD_TOKEN') +token = os.getenv("DISCORD_TOKEN") if not token: - raise RuntimeError('Please set the DISCORD_TOKEN environment variable') + raise RuntimeError("Please set DISCORD_TOKEN in env") client.run(token) diff --git a/dashboard.py b/dashboard.py index ed2f39e..29009b2 100644 --- a/dashboard.py +++ b/dashboard.py @@ -1,17 +1,21 @@ +# dashboard.py + from flask import Flask, render_template, jsonify +from datetime import datetime import brain_map app = Flask( __name__, template_folder='templates', - static_folder='static', + static_folder='static' ) - -# Register the brain_map blueprint app.register_blueprint(brain_map.bp) -# Will be injected from body.py -system = None +# Injected from body.py +system = None +persona = None +life_log = None +plugins = None @app.route('/') @@ -19,6 +23,25 @@ def dashboard(): return render_template('dashboard.html') +@app.route('/stats') +def stats(): + """ + Returns JSON with the key metrics for the dashboard to display. + Uses getattr to supply 0 if any attribute is missing. + """ + if system is None: + return jsonify({}) + + return jsonify({ + 'processed_lines': getattr(system, 'processed_lines', 0), + 'total_lines': getattr(system, 'total_lines', 0), + 'history_len': len(getattr(system, 'history', [])), + 'life_log_count': len(getattr(life_log, 'entries', [])), + 'plugin_count': len(getattr(plugins, 'registry', {})), + 'timestamp': datetime.utcnow().timestamp() + }) + + @app.route('/progress') def progress(): if system is None: diff --git a/life_log.py b/life_log.py new file mode 100644 index 0000000..9a6b4f1 --- /dev/null +++ b/life_log.py @@ -0,0 +1,28 @@ +import json +import os + + +class LifeLog: + """ + Records Ruby’s diary entries over time and lets you fetch her recent reflections. + """ + def __init__(self, path="life_log.json"): + self.path = path + self.entries = [] + self.load() + + def load(self): + if os.path.isfile(self.path): + with open(self.path, "r", encoding="utf-8") as f: + self.entries = json.load(f) + + def save(self): + with open(self.path, "w", encoding="utf-8") as f: + json.dump(self.entries, f, ensure_ascii=False, indent=2) + + def add(self, entry: str): + self.entries.append(entry) + self.save() + + def recent(self, n=5): + return self.entries[-n:] diff --git a/nervous_system.py b/nervous_system.py index 165dcd2..050c4f5 100644 --- a/nervous_system.py +++ b/nervous_system.py @@ -1,106 +1,159 @@ +# nervous_system.py +import threading import torch -import torch.optim as optim -from torch.nn import CrossEntropyLoss +import torch.nn as nn import torch.nn.functional as F +import torch.optim as optim from sensory import Sensory from brain import Brain class NervousSystem: - """Wraps the Brain, handles token growth, generation and on-the-fly training.""" # noqa: E501 + """Wraps the Brain, handles token growth, generation, and training.""" def __init__(self, device: str = "cuda"): - self.device = torch.device(device if torch.cuda.is_available() else "cpu") # noqa: E501 + self.device = torch.device( + device if torch.cuda.is_available() else "cpu" + ) self.sensory = Sensory() vocab_size = len(self.sensory.stoi) self.brain = Brain(vocab_size).to(self.device) + # disable any inplace ops in the model + for m in self.brain.modules(): + if hasattr(m, "inplace"): + m.inplace = False # ensure no in-place ReLUs, etc. + self.optimizer = optim.Adam(self.brain.parameters(), lr=1e-4) - self.criterion = CrossEntropyLoss(ignore_index=0) + self.criterion = nn.CrossEntropyLoss(ignore_index=0) self.meta_steps = 0 + # ← NEW: lock to serialize all training calls + self._train_lock = threading.Lock() + def _resize_embeddings(self) -> None: + """ + Resize token & output embeddings entirely on-device + to match the current vocab size, avoiding any CPU↔GPU copies. + """ + device = self.device new_size = len(self.sensory.stoi) + + # ─── Resize token embeddings ──────────────────────────────────────── old_emb = self.brain.token_emb + old_num, dim = old_emb.num_embeddings, old_emb.embedding_dim - # rebuild token embeddings - self.brain.token_emb = torch.nn.Embedding( - new_size, old_emb.embedding_dim - ).to(self.device) + # allocate new on same device + new_emb = nn.Embedding(new_size, dim).to(device) + # copy existing weights with torch.no_grad(): - self.brain.token_emb.weight[: old_emb.num_embeddings] = old_emb.weight # noqa: E501 + new_emb.weight[:old_num].copy_(old_emb.weight) + self.brain.token_emb = new_emb - # rebuild output head + # ─── Resize output head ───────────────────────────────────────────── old_out = self.brain.fc_out - self.brain.fc_out = torch.nn.Linear( - old_emb.embedding_dim, new_size - ).to(self.device) + out_dim, old_out_feat = old_out.in_features, old_out.out_features + + new_out = nn.Linear(out_dim, new_size).to(device) with torch.no_grad(): - self.brain.fc_out.weight[: old_out.out_features] = old_out.weight - self.brain.fc_out.bias[: old_out.out_features] = old_out.bias + new_out.weight[:old_out_feat].copy_(old_out.weight) + new_out.bias[:old_out_feat].copy_(old_out.bias) + self.brain.fc_out = new_out - def generate(self, prompt: str, max_len: int = 50, - temperature: float = 0.8, top_k: int = 50) -> str: + def generate( + self, + prompt: str, + max_len: int = 50, + temperature: float = 0.8, + top_p: float = 0.9, + ) -> str: + """Autoregressive nucleus sampling with proper cloning to avoid aliasing.""" self.brain.eval() - raw_ids = self.sensory.encode(prompt, grow=False)[-self.brain.max_seq_len:] # noqa: E501 - out = torch.tensor(raw_ids, dtype=torch.long, device=self.device).unsqueeze(0) # noqa: E501 + eos_id = self.sensory.stoi.get("") - result = [] - for _ in range(max_len): - logits = self.brain(out)[:, -1, :] - # apply temperature - logits = logits / temperature - # top-k filtering - values, indices = torch.topk(logits, top_k) - probs = F.softmax(values, dim=-1) - next_tok = indices[0, torch.multinomial(probs, 1)].unsqueeze(0).unsqueeze(0) # noqa: E501 - tok_id = next_tok.item() - if tok_id == self.sensory.stoi[""]: - break - result.append(tok_id) - out = torch.cat([out, next_tok], dim=1) + raw_ids = self.sensory.encode(prompt, grow=False) + max_ctx = self.brain.max_seq_len - 1 + if len(raw_ids) > max_ctx: + raw_ids = raw_ids[-max_ctx:] - return self.sensory.decode(result) - - def train(self, user_text: str, bot_text: str) -> None: - # 1) grow vocab on _train_ only - for txt in (user_text, bot_text): - _ = self.sensory.encode(txt, grow=True) - self._resize_embeddings() - - # ensure - if "" not in self.sensory.stoi: - idx = len(self.sensory.stoi) - self.sensory.stoi[""] = idx - self.sensory.itos[idx] = "" - self._resize_embeddings() - - combined = f"{user_text} {bot_text}" - ids = torch.tensor( - self.sensory.encode(combined, grow=False), dtype=torch.long, device=self.device # noqa: E501 + input_ids = torch.tensor( + raw_ids, dtype=torch.long, device=self.device ).unsqueeze(0) - if ids.size(1) < 2: - return + generated = [] + for _ in range(max_len): + logits = self.brain(input_ids)[:, -1, :] / temperature + probs = F.softmax(logits, dim=-1) - inputs = ids[:, :-1] - targets = ids[:, 1:] + # top-p (nucleus) filtering + sorted_p, sorted_idx = torch.sort(probs, descending=True) + cum_p = torch.cumsum(sorted_p, dim=-1) + mask = cum_p > top_p + # clone the slice before writing to avoid overlap + mask_shift = mask[..., :-1].clone() + mask[..., 1:] = mask_shift + sorted_p[mask] = 0 + sorted_p = sorted_p / sorted_p.sum(dim=-1, keepdim=True) - self.brain.train() - logits = self.brain(inputs) - loss = self.criterion( - logits.view(-1, logits.size(-1)), targets.view(-1) - ) - self.optimizer.zero_grad() - loss.backward() - self.optimizer.step() + next_tok = sorted_idx[0, torch.multinomial(sorted_p[0], 1)] + t_id = next_tok.item() - # a tiny meta-learning bump - self.meta_steps += 1 - if self.meta_steps % 100 == 0: - for g in self.optimizer.param_groups: - old_lr = g["lr"] - g["lr"] = old_lr * 1.1 - torch.cuda.synchronize(self.device) - g["lr"] = old_lr + if eos_id is not None and t_id == eos_id: + break + + generated.append(t_id) + input_ids = torch.cat([input_ids, next_tok.unsqueeze(0)], dim=1) + + if input_ids.size(1) > self.brain.max_seq_len: + input_ids = input_ids[:, -self.brain.max_seq_len:] + + return self.sensory.decode(generated) + + def train(self, user_text: str, bot_text: str) -> None: + """ + On-the-fly self-supervised training from a user↔bot exchange. + Serialized by a threading.Lock to avoid in-place grad conflicts. + """ + with self._train_lock: # ← NEW: no two threads can enter here at once + # 1) grow vocab & resize embeddings + for txt in (user_text, bot_text): + _ = self.sensory.encode(txt, grow=True) + self._resize_embeddings() + + # 2) ensure a token for concatenation + if "" not in self.sensory.stoi: + idx = len(self.sensory.stoi) + self.sensory.stoi[""] = idx + self.sensory.itos[idx] = "" + self._resize_embeddings() + + # 3) build input/output IDs + combined = f"{user_text} {bot_text}" + ids = torch.tensor( + self.sensory.encode(combined, grow=False), + dtype=torch.long, + device=self.device, + ).unsqueeze(0) + + if ids.size(1) < 2: + return + + inputs = ids[:, :-1] + targets = ids[:, 1:] + + # 4) forward + backward + step + self.brain.train() + logits = self.brain(inputs) + loss = self.criterion( + logits.view(-1, logits.size(-1)), targets.view(-1) + ) + self.optimizer.zero_grad() + loss.backward() + self.optimizer.step() + + # 5) optional meta learning step adjustment + self.meta_steps += 1 + if self.meta_steps % 100 == 0: + for g in self.optimizer.param_groups: + g["lr"] *= 1.1 diff --git a/persona.py b/persona.py new file mode 100644 index 0000000..659f510 --- /dev/null +++ b/persona.py @@ -0,0 +1,67 @@ +import json +import os +import re + +class Persona: + """ + Learns Ruby’s persona entirely from her model by asking for JSON, + with no hard-coded examples or defaults. + """ + def __init__(self, path="persona.json"): + self.path = path + self.traits = {} + self.load() + + def load(self): + if os.path.isfile(self.path): + with open(self.path, "r", encoding="utf-8") as f: + self.traits = json.load(f) + + def save(self): + with open(self.path, "w", encoding="utf-8") as f: + json.dump(self.traits, f, ensure_ascii=False, indent=2) + + def summary(self) -> str: + if not self.traits: + return "" + name = self.traits.get("name", "") + age = self.traits.get("age", "") + hobbies = self.traits.get("hobbies", []) + tone = self.traits.get("tone", "") + hlist = ", ".join(hobbies) if isinstance(hobbies, list) else str(hobbies) + return f"I'm {name}, {age} years old, I love {hlist}, and speak in a {tone} tone." + + def bootstrap(self, system): + """ + Ask Ruby to introspect and output a JSON object with keys: + name, age, hobbies (array), and tone (string). No examples given. + """ + prompt = ( + "Based on all the text you have absorbed, introduce yourself by OUTPUTTING " + "ONLY a JSON object with these keys exactly:\n" + ' "name": string,\n' + ' "age": number,\n' + ' "hobbies": array of strings,\n' + ' "tone": string describing your speaking style\n' + "Do not output anything else." + ) + raw = system.generate(prompt, max_len=150, temperature=0.7, top_p=0.9) + + # extract the first JSON object block + m = re.search(r"\{.*?\}", raw, flags=re.DOTALL) + if not m: + return + try: + data = json.loads(m.group(0)) + except json.JSONDecodeError: + return + + # keep only expected keys + updated = {} + for key in ("name", "age", "hobbies", "tone"): + if key in data: + updated[key] = data[key] + + if updated: + self.traits = updated + self.save() diff --git a/plugin_manager.py b/plugin_manager.py new file mode 100644 index 0000000..46cd4d9 --- /dev/null +++ b/plugin_manager.py @@ -0,0 +1,55 @@ +import os +import importlib.util +import sys +from typing import Any, Callable, Dict + +PLUGINS_DIR = "plugins" +os.makedirs(PLUGINS_DIR, exist_ok=True) + + +class PluginManager: + """ + Dynamically loads Python modules from plugins/ and + exposes their functions in a registry. + """ + + def __init__(self): + self.registry: Dict[str, Callable[..., Any]] = {} + self._load_all() + + def _load_all(self): + """Scan plugins/ and import every .py as a module.""" + for fname in os.listdir(PLUGINS_DIR): + if not fname.endswith(".py"): + continue + path = os.path.join(PLUGINS_DIR, fname) + name = os.path.splitext(fname)[0] + self._load_module(name, path) + + def _load_module(self, name: str, path: str): + """Load a single plugin module and register its callables.""" + spec = importlib.util.spec_from_file_location(name, path) + if spec and spec.loader: + mod = importlib.util.module_from_spec(spec) + sys.modules[name] = mod + spec.loader.exec_module(mod) # type: ignore + for attr in dir(mod): + if attr.startswith("_"): + continue + obj = getattr(mod, attr) + if callable(obj): + key = f"{name}.{attr}" + self.registry[key] = obj + + def register_plugin(self, code: str, name: str): + """ + Persist a new plugin file (name.py), load it immediately, + and add its functions to the registry. + """ + path = os.path.join(PLUGINS_DIR, f"{name}.py") + with open(path, "w", encoding="utf-8") as f: + f.write(code) + # replace any existing module + if name in sys.modules: + del sys.modules[name] + self._load_module(name, path) diff --git a/sensory.py b/sensory.py index 5597211..5a1af82 100644 --- a/sensory.py +++ b/sensory.py @@ -1,13 +1,14 @@ -import os import json +import os class Sensory: """Dynamic whitespace tokenizer that can grow (or not) its vocab.""" def __init__(self): - self.stoi = {"": 0, "": 1} - self.itos = {0: "", 1: ""} + # ensure , , AND are present from the start + self.stoi = {"": 0, "": 1, "": 2} + self.itos = {0: "", 1: "", 2: ""} def encode(self, text: str, grow: bool = True) -> list[int]: ids: list[int] = [] @@ -25,24 +26,27 @@ class Sensory: return ids def decode(self, ids: list[int]) -> str: - return " ".join(self.itos.get(i, "") for i in ids) + out = [] + for i in ids: + if i == self.stoi[""]: + break + out.append(self.itos.get(i, "")) + return " ".join(out) def save_vocab(self, path: str = "vocab.json") -> None: - """Dump stoi+itos to disk.""" - data = { - "stoi": self.stoi, - # JSON keys must be strings - "itos": {str(k): v for k, v in self.itos.items()} - } + data = {"stoi": self.stoi, "itos": {str(k): v for k, v in self.itos.items()}} with open(path, "w", encoding="utf-8") as f: - json.dump(data, f, ensure_ascii=False, indent=2) + json.dump(data, f, indent=2) def load_vocab(self, path: str = "vocab.json") -> None: - """Load stoi+itos if it exists.""" if not os.path.isfile(path): return with open(path, encoding="utf-8") as f: data = json.load(f) self.stoi = data["stoi"] - # convert itos keys back to int self.itos = {int(k): v for k, v in data["itos"].items()} + # if somehow got lost, re-add it + if "" not in self.stoi: + idx = len(self.stoi) + self.stoi[""] = idx + self.itos[idx] = "" diff --git a/templates/dashboard.html b/templates/dashboard.html index 035e625..dad7d2d 100644 --- a/templates/dashboard.html +++ b/templates/dashboard.html @@ -1,72 +1,148 @@ - + Ruby Dashboard + + + + + -

Ruby Dashboard

+
+

Ruby Dashboard

-
- Progress: 0/0 -
+ +
+
+
+
Training Progress
+
+
0%
+
+

+ 0 / + 0 lines +

+
+
+
+
+
Recent Turns
+

0 turns

+
+
+
+
+
Diary Entries
+

0 entries

+
+
+
+
+
Plugins
+

0

+
+
+
-
-

Recent Interactions

-
Loading…
-
+ +
+
Processed Lines Over Time
+ +
-
- + +