Good starting point

2025-05-04 17:32:25 -04:00 · 2025-05-04 17:32:25 -04:00 · a1a6c77e59
commit a1a6c77e59
parent 58442b86ee
9 changed files with 619 additions and 2 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,5 +169,7 @@ cython_debug/
 #.idea/
 .vscode/launch.json
-/content/*
+/books/*
-/memory/*
+/memory/*
 vocab.json
 progress.json
--- a/body.py
+++ b/body.py
@ -0,0 +1,126 @@
 import os
 import asyncio
 import glob
 import threading
 import json
 from collections import deque
 import logging
 import discord
 from nervous_system import NervousSystem
 import dashboard    # <-- import your new Flask app
 import brain_map    # <-- import the blueprint to inject system
 # Path for progress persistence
 PROGRESS_PATH = 'progress.json'
 # Mute logger
 for noisy_logger in ["werkzeug", "flask", "flask.app"]:
    logging.getLogger(noisy_logger).setLevel(logging.CRITICAL)
 # ─── Initialize Ruby & Discord ───────────────────────────────────────────────── # noqa: E501
 intents = discord.Intents.default()
 intents.message_content = True
 client = discord.Client(intents=intents)
 system = NervousSystem()
 system.history = deque(maxlen=100)
 # Load or resume vocab + embeddings
 system.sensory.load_vocab('vocab.json')
 system._resize_embeddings()
 print('Loaded vocab size:', len(system.sensory.stoi))
 # Resume progress
 if os.path.isfile(PROGRESS_PATH):
    with open(PROGRESS_PATH, 'r', encoding='utf-8') as f:
        data = json.load(f)
        system.processed_lines = data.get('processed_lines', 0)
 else:
    system.processed_lines = 0
 # Compute total book lines
 total = sum(
    1
    for path in glob.glob('books/*.txt')
    for line in open(path, encoding='utf-8')
    if line.strip()
 )
 system.total_lines = total
 print(f'Resuming training at {system.processed_lines}/{system.total_lines} lines')
 # Inject into Flask contexts
 dashboard.system = system
 brain_map.system = system
 # ─── Book-training when idle ──────────────────────────────────────────────────── # noqa: E501
 async def train_books_idle():
    await client.wait_until_ready()
    await asyncio.sleep(5)
    processed = 0
    skip = system.processed_lines
    for path in glob.glob('books/*.txt'):
        with open(path, encoding='utf-8') as f:
            for raw in f:
                text = raw.strip()
                if not text:
                    continue
                if processed < skip:
                    processed += 1
                    continue
                await asyncio.to_thread(system.train, text, text)
                processed += 1
                system.processed_lines = processed
                if processed % 200 == 0 or processed == system.total_lines:
                    system.sensory.save_vocab('vocab.json')
                    with open(PROGRESS_PATH, 'w', encoding='utf-8') as pf:
                        json.dump({'processed_lines': processed}, pf)
    # Final checkpoint
    system.sensory.save_vocab('vocab.json')
    with open(PROGRESS_PATH, 'w', encoding='utf-8') as pf:
        json.dump({'processed_lines': system.processed_lines}, pf)
@client.event
 async def on_ready():
    print(f'Ruby is online as {client.user}!')
    asyncio.create_task(train_books_idle())
@client.event
 async def on_message(message: discord.Message):
    if message.author == client.user or not message.content:
        return
    user_text = message.content.strip()
    reply = system.generate(user_text)
    await message.channel.send(reply)
    system.history.append({'user': user_text, 'bot': reply})
    asyncio.create_task(asyncio.to_thread(system.train, user_text, reply))
 # ─── Launch Dashboard & Bot ──────────────────────────────────────────────────── # noqa: E501
 def run_dashboard():
    dashboard.app.run(
        host='0.0.0.0', port=5000,
        debug=False, use_reloader=False
    )
 threading.Thread(target=run_dashboard, daemon=True).start()
 print('Dashboard available at http://127.0.0.1:5000')
 token = os.getenv('DISCORD_TOKEN')
 if not token:
    raise RuntimeError('Please set the DISCORD_TOKEN environment variable')
 client.run(token)
--- a/brain.py
+++ b/brain.py
@ -0,0 +1,36 @@
 import torch
 import torch.nn as nn
 class Brain(nn.Module):
    """
    Minimal Transformer-based autoregressive model.
    """
    def __init__(
        self,
        vocab_size: int,
        d_model: int = 256,
        nhead: int = 4,
        num_layers: int = 2,
        dim_feedforward: int = 512,
        max_seq_len: int = 128,
    ):
        super().__init__()
        self.token_emb = nn.Embedding(vocab_size, d_model)
        self.pos_emb = nn.Parameter(torch.zeros(1, max_seq_len, d_model))
        encoder_layer = nn.TransformerEncoderLayer(
            d_model=d_model,
            nhead=nhead,
            dim_feedforward=dim_feedforward,
            batch_first=True,
        )
        self.transformer = nn.TransformerEncoder(encoder_layer, num_layers)
        self.fc_out = nn.Linear(d_model, vocab_size)
        self.max_seq_len = max_seq_len
    def forward(self, x: torch.Tensor) -> torch.Tensor:
        seq_len = x.size(1)
        x = self.token_emb(x) + self.pos_emb[:, :seq_len, :]
        x = self.transformer(x)
        return self.fc_out(x)
--- a/brain_map.py
+++ b/brain_map.py
@ -0,0 +1,92 @@
 import numpy as np
 from flask import Blueprint, render_template, jsonify, request
 bp = Blueprint(
    'brain_map',
    __name__,
    template_folder='templates',
    static_folder='static',
 )
 # Will be injected from body.py
 system = None
@bp.route('/graph')
 def graph():
    return render_template('graph.html')
@bp.route('/data')
 def data():
    if system is None:
        return jsonify({"nodes": [], "edges": []})
    # 1) embeddings → cosine sims
    emb = system.brain.token_emb.weight.detach().cpu().numpy()
    N = emb.shape[0]
    norms = np.linalg.norm(emb, axis=1, keepdims=True)
    emb_norm = emb / (norms + 1e-8)
    sim = emb_norm.dot(emb_norm.T)
    # 2) filters
    min_degree = int(request.args.get('min_degree', 1))
    max_nodes  = int(request.args.get('max_nodes', 200))
    # 3) valid tokens
    items = [(tok, idx) for tok, idx in system.sensory.stoi.items() if 0 <= idx < N]
    # 4) build undirected unique pairs & degree counts
    deg = {int(idx): 0 for _, idx in items}
    unique_pairs = set()
    for _, raw_i in items:
        i = int(raw_i)
        count = 0
        for raw_j in np.argsort(-sim[i]):
            j = int(raw_j)
            if j == i or j not in deg:
                continue
            pair = (min(i, j), max(i, j))
            if pair in unique_pairs:
                continue
            unique_pairs.add(pair)
            deg[i] += 1
            deg[j] += 1
            count += 1
            if count >= 3:
                break
    # 5) filter & cap nodes
    filtered = [(tok, idx) for tok, idx in items if deg[int(idx)] >= min_degree]
    filtered.sort(key=lambda x: (-deg[int(x[1])], int(x[1])))
    subset = filtered[:max_nodes]
    subset_ids = {int(idx) for _, idx in subset}
    # 6) build nodes with HSL coloring
    max_deg = max((deg[idx] for _, idx in subset), default=1)
    nodes = []
    for tok, raw_idx in subset:
        idx = int(raw_idx)
        d = deg[idx]
        hue = int((1 - d / max_deg) * 240)
        nodes.append({
            'id': idx,
            'label': tok,
            'color': {
                'background': f'hsl({hue},80%,40%)',
                'border':     f'hsl({hue},60%,30%)',
                'highlight': {
                    'background': f'hsl({hue},100%,50%)',
                    'border':     f'hsl({hue},80%,40%)'
                }
            }
        })
    # 7) build edges
    edges = [
        {'from': a, 'to': b}
        for (a, b) in unique_pairs
        if a in subset_ids and b in subset_ids
    ]
    return jsonify({'nodes': nodes, 'edges': edges})
--- a/dashboard.py
+++ b/dashboard.py
@ -0,0 +1,36 @@
 from flask import Flask, render_template, jsonify
 import brain_map
 app = Flask(
    __name__,
    template_folder='templates',
    static_folder='static',
 )
 # Register the brain_map blueprint
 app.register_blueprint(brain_map.bp)
 # Will be injected from body.py
 system = None
@app.route('/')
 def dashboard():
    return render_template('dashboard.html')
@app.route('/progress')
 def progress():
    if system is None:
        return jsonify({'processed': 0, 'total': 0})
    return jsonify({
        'processed': getattr(system, 'processed_lines', 0),
        'total':     getattr(system, 'total_lines', 0)
    })
@app.route('/interactions')
 def interactions():
    if system is None or not hasattr(system, 'history'):
        return jsonify([])
    return jsonify(list(system.history))
--- a/nervous_system.py
+++ b/nervous_system.py
@ -0,0 +1,106 @@
 import torch
 import torch.optim as optim
 from torch.nn import CrossEntropyLoss
 import torch.nn.functional as F
 from sensory import Sensory
 from brain import Brain
 class NervousSystem:
    """Wraps the Brain, handles token growth, generation and on-the-fly training."""  # noqa: E501
    def __init__(self, device: str = "cuda"):
        self.device = torch.device(device if torch.cuda.is_available() else "cpu")  # noqa: E501
        self.sensory = Sensory()
        vocab_size = len(self.sensory.stoi)
        self.brain = Brain(vocab_size).to(self.device)
        self.optimizer = optim.Adam(self.brain.parameters(), lr=1e-4)
        self.criterion = CrossEntropyLoss(ignore_index=0)
        self.meta_steps = 0
    def _resize_embeddings(self) -> None:
        new_size = len(self.sensory.stoi)
        old_emb = self.brain.token_emb
        # rebuild token embeddings
        self.brain.token_emb = torch.nn.Embedding(
            new_size, old_emb.embedding_dim
        ).to(self.device)
        with torch.no_grad():
            self.brain.token_emb.weight[: old_emb.num_embeddings] = old_emb.weight  # noqa: E501
        # rebuild output head
        old_out = self.brain.fc_out
        self.brain.fc_out = torch.nn.Linear(
            old_emb.embedding_dim, new_size
        ).to(self.device)
        with torch.no_grad():
            self.brain.fc_out.weight[: old_out.out_features] = old_out.weight
            self.brain.fc_out.bias[: old_out.out_features] = old_out.bias
    def generate(self, prompt: str, max_len: int = 50,
                 temperature: float = 0.8, top_k: int = 50) -> str:
        self.brain.eval()
        raw_ids = self.sensory.encode(prompt, grow=False)[-self.brain.max_seq_len:]  # noqa: E501
        out = torch.tensor(raw_ids, dtype=torch.long, device=self.device).unsqueeze(0)  # noqa: E501
        result = []
        for _ in range(max_len):
            logits = self.brain(out)[:, -1, :]
            # apply temperature
            logits = logits / temperature
            # top-k filtering
            values, indices = torch.topk(logits, top_k)
            probs = F.softmax(values, dim=-1)
            next_tok = indices[0, torch.multinomial(probs, 1)].unsqueeze(0).unsqueeze(0)  # noqa: E501
            tok_id = next_tok.item()
            if tok_id == self.sensory.stoi["<eos>"]:
                break
            result.append(tok_id)
            out = torch.cat([out, next_tok], dim=1)
        return self.sensory.decode(result)
    def train(self, user_text: str, bot_text: str) -> None:
        # 1) grow vocab on _train_ only
        for txt in (user_text, bot_text):
            _ = self.sensory.encode(txt, grow=True)
        self._resize_embeddings()
        # ensure <sep>
        if "<sep>" not in self.sensory.stoi:
            idx = len(self.sensory.stoi)
            self.sensory.stoi["<sep>"] = idx
            self.sensory.itos[idx] = "<sep>"
            self._resize_embeddings()
        combined = f"{user_text} <sep> {bot_text}"
        ids = torch.tensor(
            self.sensory.encode(combined, grow=False), dtype=torch.long, device=self.device  # noqa: E501
        ).unsqueeze(0)
        if ids.size(1) < 2:
            return
        inputs = ids[:, :-1]
        targets = ids[:, 1:]
        self.brain.train()
        logits = self.brain(inputs)
        loss = self.criterion(
            logits.view(-1, logits.size(-1)), targets.view(-1)
        )
        self.optimizer.zero_grad()
        loss.backward()
        self.optimizer.step()
        # a tiny meta-learning bump
        self.meta_steps += 1
        if self.meta_steps % 100 == 0:
            for g in self.optimizer.param_groups:
                old_lr = g["lr"]
                g["lr"] = old_lr * 1.1
                torch.cuda.synchronize(self.device)
                g["lr"] = old_lr
--- a/sensory.py
+++ b/sensory.py
@ -0,0 +1,48 @@
 import os
 import json
 class Sensory:
    """Dynamic whitespace tokenizer that can grow (or not) its vocab."""
    def __init__(self):
        self.stoi = {"<pad>": 0, "<unk>": 1}
        self.itos = {0: "<pad>", 1: "<unk>"}
    def encode(self, text: str, grow: bool = True) -> list[int]:
        ids: list[int] = []
        for tok in text.strip().split():
            if tok not in self.stoi:
                if grow:
                    idx = len(self.stoi)
                    self.stoi[tok] = idx
                    self.itos[idx] = tok
                else:
                    idx = self.stoi["<unk>"]
            else:
                idx = self.stoi[tok]
            ids.append(idx)
        return ids
    def decode(self, ids: list[int]) -> str:
        return " ".join(self.itos.get(i, "<unk>") for i in ids)
    def save_vocab(self, path: str = "vocab.json") -> None:
        """Dump stoi+itos to disk."""
        data = {
            "stoi": self.stoi,
            # JSON keys must be strings
            "itos": {str(k): v for k, v in self.itos.items()}
        }
        with open(path, "w", encoding="utf-8") as f:
            json.dump(data, f, ensure_ascii=False, indent=2)
    def load_vocab(self, path: str = "vocab.json") -> None:
        """Load stoi+itos if it exists."""
        if not os.path.isfile(path):
            return
        with open(path, encoding="utf-8") as f:
            data = json.load(f)
        self.stoi = data["stoi"]
        # convert itos keys back to int
        self.itos = {int(k): v for k, v in data["itos"].items()}
--- a/templates/dashboard.html
+++ b/templates/dashboard.html
@ -0,0 +1,72 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="utf-8" />
  <title>Ruby Dashboard</title>
  <style>
    body {
      background:#1e1e1e; color:#ddd;
      font-family:sans-serif; padding:20px;
    }
    h1 { color:#fff; }
    .section { margin-bottom:20px; }
    button {
      background:#333; border:1px solid #555;
      color:#ddd; padding:8px 12px;
      border-radius:4px; cursor:pointer;
    }
    #history {
      max-height:300px; overflow:auto;
      border:1px solid #444; padding:10px;
      border-radius:4px; background:#2e2e2e;
    }
    .entry { margin-bottom:8px; }
    .user  { color:#8af; }
    .bot   { color:#fa8; }
  </style>
 </head>
 <body>
  <h1>Ruby Dashboard</h1>
  <div class="section">
    <strong id="progress">Progress: 0/0</strong>
  </div>
  <div class="section">
    <h2>Recent Interactions</h2>
    <div id="history">Loading…</div>
  </div>
  <div class="section">
    <button id="load-graph">Load Brain Map</button>
  </div>
  <script>
    async function refreshProgress() {
      const { processed, total } = await fetch('/progress').then(r=>r.json());
      document.getElementById('progress').textContent =
        `Progress: ${processed}/${total}`;
    }
    async function refreshHistory() {
      const hist = await fetch('/interactions').then(r=>r.json());
      const div = document.getElementById('history');
      div.innerHTML = '';
      if (hist.length === 0) {
        div.textContent = 'No interactions yet.';
        return;
      }
      hist.slice(-20).forEach(({user, bot}) => {
        const e = document.createElement('div'); e.className='entry';
        const u = document.createElement('div'); u.className='user'; u.textContent='User: '+user;
        const b = document.createElement('div'); b.className='bot';  b.textContent='Bot: '+bot;
        e.append(u, b); div.appendChild(e);
      });
    }
    document.getElementById('load-graph')
      .onclick = () => window.location = '/graph';
    refreshProgress(); refreshHistory();
    setInterval(refreshProgress, 5000);
    setInterval(refreshHistory, 5000);
  </script>
 </body>
 </html>
--- a/templates/graph.html
+++ b/templates/graph.html
@ -0,0 +1,99 @@
 <!DOCTYPE html>
 <html lang="en">
 <head>
  <meta charset="utf-8" />
  <title>Ruby Brain Map</title>
  <script src="https://unpkg.com/vis-network@9.1.2/dist/vis-network.min.js"></script>
  <style>
    html, body {
      margin: 0; padding: 0; height: 100%;
      background: #1e1e1e; color: #ddd;
      font-family: sans-serif;
    }
    #controls {
      position: absolute; top: 10px; right: 10px; z-index: 2;
      display: flex; gap: 8px; align-items: center;
    }
    #network { width: 100%; height: 100%; }
    button, input {
      background: #333; border: 1px solid #555;
      color: #ddd; padding: 4px 8px; border-radius: 4px;
      font-size: 14px; cursor: pointer;
    }
  </style>
 </head>
 <body>
  <div id="controls">
    <label>Min degree:
      <input id="min-degree" type="range" min="1" max="10" value="1">
    </label>
    <label>Max nodes:
      <input id="max-nodes" type="number" min="50" max="500" step="50" value="200">
    </label>
    <button id="apply-filters">Apply</button>
    <button id="fit-btn">Fit Graph</button>
  </div>
  <div id="network"></div>
  <script>
    let network, nodesDS, edgesDS;
    const filters = { min_degree:1, max_nodes:200 };
    document.getElementById('apply-filters').onclick = () => {
      filters.min_degree = +document.getElementById('min-degree').value;
      filters.max_nodes  = +document.getElementById('max-nodes').value;
      refreshData();
    };
    async function initNetwork(){
      nodesDS = new vis.DataSet();
      edgesDS = new vis.DataSet();
      const container = document.getElementById('network');
      const options = {
        nodes: { font:{color:'#ddd'}, shape:'dot', size:8 },
        edges: {
          color:'#555',
          smooth:false,           // no curves
          selfReferenceSize:0,    // disable self-loops
          arrows:{ to:false, from:false }
        },
        physics:{
          enabled:true,
          stabilization:{iterations:300, fit:true},
          barnesHut:{gravitationalConstant:-500,springLength:150,centralGravity:0.2}
        },
        interaction:{
          hover:true, tooltipDelay:200,
          zoomView:true, dragNodes:true,
          navigationButtons:true
        },
        minZoom:0.05, maxZoom:3
      };
      network = new vis.Network(container,
        { nodes: nodesDS, edges: edgesDS }, options);
      network.once('stabilizationIterationsDone', ()=>network.setOptions({physics:false}));
      document.getElementById('fit-btn').onclick = ()=>network.fit();
    }
    async function refreshData(){
      const qs = new URLSearchParams(filters);
      const graphRaw = await fetch(`/data?${qs}&_=${Date.now()}`,{cache:'no-store'}).then(r=>r.json());
      nodesDS.update(graphRaw.nodes);
      edgesDS.update(graphRaw.edges);
      // prune removed
      const validNodes = new Set(graphRaw.nodes.map(n=>n.id));
      nodesDS.getIds().forEach(id=>validNodes.has(id)||nodesDS.remove(id));
      const validEdges = new Set(graphRaw.edges.map(e=>`${e.from}-${e.to}`));
      edgesDS.get().forEach(e=>{
        if(!validEdges.has(`${e.from}-${e.to}`)) edgesDS.remove(e.id);
      });
    }
    // start up
    initNetwork().then(()=>{
      refreshData();
      setInterval(refreshData, 5000);
    });
  </script>
 </body>
 </html>