diff --git a/context/context.py b/context/context.py new file mode 100644 index 0000000..e69de29 diff --git a/dashboard/dashboard.py b/dashboard/dashboard.py new file mode 100644 index 0000000..93a7a46 --- /dev/null +++ b/dashboard/dashboard.py @@ -0,0 +1,14 @@ +from flask import Flask, render_template +import threading + + +app = Flask(__name__) + + +@app.route("/") +def index(): + return render_template("index.html") + + +def run_dashboard(): + app.run(host="0.0.0.0", port=5000, debug=False, use_reloader=False) diff --git a/dashboard/templates/index.html b/dashboard/templates/index.html new file mode 100644 index 0000000..34e30a3 --- /dev/null +++ b/dashboard/templates/index.html @@ -0,0 +1,10 @@ + + +
+Vocabulary Size: {{ vocab_size }}
+ + diff --git a/data/memory/vocab.json b/data/memory/vocab.json new file mode 100644 index 0000000..5c40c60 --- /dev/null +++ b/data/memory/vocab.json @@ -0,0 +1,8 @@ +{ + "hi": 1, + "ruby": 2, + "how": 3, + "are": 4, + "you": 5, + "today": 6 +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..67cf9ac --- /dev/null +++ b/main.py @@ -0,0 +1,39 @@ +import discord +import asyncio +import threading +from dotenv import load_dotenv +import os +from model.train import train_on_message +from model.brain import generate_response +from dashboard.dashboard import run_dashboard + +load_dotenv() +TOKEN = os.getenv("DISCORD_TOKEN") + +intents = discord.Intents.default() +intents.messages = True +intents.message_content = True + +client = discord.Client(intents=intents) + + +@client.event +async def on_ready(): + print(f"Ruby is online as {client.user}.") + + +@client.event +async def on_message(message): + if message.author.bot: + return + + content = message.content.strip() + train_on_message(content) + response = generate_response() + await message.channel.send(response) + +# Launch Flask in background +threading.Thread(target=run_dashboard, daemon=True).start() + +# Launch Discord bot (blocking) +client.run(TOKEN) diff --git a/model/brain.py b/model/brain.py new file mode 100644 index 0000000..63e30c5 --- /dev/null +++ b/model/brain.py @@ -0,0 +1,36 @@ +import torch +import torch.nn as nn +import random +from model.tokenizer import Tokenizer + +DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu") +tokenizer = Tokenizer() +VOCAB_SIZE = 10000 # Temporary cap, grows dynamically +EMBED_DIM = 128 + + +class TinyTransformer(nn.Module): + def __init__(self): + super().__init__() + self.embed = nn.Embedding(VOCAB_SIZE, EMBED_DIM) + self.ln1 = nn.LayerNorm(EMBED_DIM) + self.fc = nn.Linear(EMBED_DIM, VOCAB_SIZE) + + def forward(self, x): + x = self.embed(x) + x = self.ln1(x) + return self.fc(x) + + +model = TinyTransformer().to(DEVICE) +optimizer = torch.optim.Adam(model.parameters(), lr=1e-4) +loss_fn = nn.CrossEntropyLoss() + + +def generate_response(): + seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE) + output = model(seed.unsqueeze(0)) + pred = torch.argmax(output, dim=-1).squeeze().tolist() + if not isinstance(pred, list): + pred = [pred] + return tokenizer.detokenize(pred) diff --git a/model/memory.py b/model/memory.py new file mode 100644 index 0000000..e69de29 diff --git a/model/tokenizer.py b/model/tokenizer.py new file mode 100644 index 0000000..5983b3f --- /dev/null +++ b/model/tokenizer.py @@ -0,0 +1,39 @@ +import re +import os +import json + +VOCAB_PATH = "data/memory/vocab.json" + + +def load_vocab(): + if os.path.exists(VOCAB_PATH): + with open(VOCAB_PATH, "r", encoding="utf-8") as f: + return json.load(f) + return {} + + +def save_vocab(vocab): + with open(VOCAB_PATH, "w", encoding="utf-8") as f: + json.dump(vocab, f, indent=2) + + +class Tokenizer: + def __init__(self): + self.vocab = load_vocab() + self.reverse_vocab = {v: k for k, v in self.vocab.items()} + self.next_id = max(self.vocab.values(), default=0) + 1 + + def tokenize(self, text): + words = re.findall(r"\b\w+\b", text.lower()) + tokens = [] + for word in words: + if word not in self.vocab: + self.vocab[word] = self.next_id + self.reverse_vocab[self.next_id] = word + self.next_id += 1 + tokens.append(self.vocab[word]) + save_vocab(self.vocab) + return tokens + + def detokenize(self, tokens): + return " ".join(self.reverse_vocab.get(t, "