Didn't sync

This commit is contained in:
Dani 2025-04-24 13:17:08 -04:00
parent 9d85d969bb
commit a069e9b7dd
12 changed files with 169 additions and 0 deletions

0
context/context.py Normal file
View File

14
dashboard/dashboard.py Normal file
View File

@ -0,0 +1,14 @@
from flask import Flask, render_template
import threading
app = Flask(__name__)
@app.route("/")
def index():
return render_template("index.html")
def run_dashboard():
app.run(host="0.0.0.0", port=5000, debug=False, use_reloader=False)

View File

@ -0,0 +1,10 @@
<!DOCTYPE html>
<html>
<head>
<title>Ruby's Dashboard</title>
</head>
<body>
<h1>Ruby is running</h1>
<p>Vocabulary Size: {{ vocab_size }}</p>
</body>
</html>

8
data/memory/vocab.json Normal file
View File

@ -0,0 +1,8 @@
{
"hi": 1,
"ruby": 2,
"how": 3,
"are": 4,
"you": 5,
"today": 6
}

39
main.py Normal file
View File

@ -0,0 +1,39 @@
import discord
import asyncio
import threading
from dotenv import load_dotenv
import os
from model.train import train_on_message
from model.brain import generate_response
from dashboard.dashboard import run_dashboard
load_dotenv()
TOKEN = os.getenv("DISCORD_TOKEN")
intents = discord.Intents.default()
intents.messages = True
intents.message_content = True
client = discord.Client(intents=intents)
@client.event
async def on_ready():
print(f"Ruby is online as {client.user}.")
@client.event
async def on_message(message):
if message.author.bot:
return
content = message.content.strip()
train_on_message(content)
response = generate_response()
await message.channel.send(response)
# Launch Flask in background
threading.Thread(target=run_dashboard, daemon=True).start()
# Launch Discord bot (blocking)
client.run(TOKEN)

36
model/brain.py Normal file
View File

@ -0,0 +1,36 @@
import torch
import torch.nn as nn
import random
from model.tokenizer import Tokenizer
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
tokenizer = Tokenizer()
VOCAB_SIZE = 10000 # Temporary cap, grows dynamically
EMBED_DIM = 128
class TinyTransformer(nn.Module):
def __init__(self):
super().__init__()
self.embed = nn.Embedding(VOCAB_SIZE, EMBED_DIM)
self.ln1 = nn.LayerNorm(EMBED_DIM)
self.fc = nn.Linear(EMBED_DIM, VOCAB_SIZE)
def forward(self, x):
x = self.embed(x)
x = self.ln1(x)
return self.fc(x)
model = TinyTransformer().to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=1e-4)
loss_fn = nn.CrossEntropyLoss()
def generate_response():
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE)
output = model(seed.unsqueeze(0))
pred = torch.argmax(output, dim=-1).squeeze().tolist()
if not isinstance(pred, list):
pred = [pred]
return tokenizer.detokenize(pred)

0
model/memory.py Normal file
View File

39
model/tokenizer.py Normal file
View File

@ -0,0 +1,39 @@
import re
import os
import json
VOCAB_PATH = "data/memory/vocab.json"
def load_vocab():
if os.path.exists(VOCAB_PATH):
with open(VOCAB_PATH, "r", encoding="utf-8") as f:
return json.load(f)
return {}
def save_vocab(vocab):
with open(VOCAB_PATH, "w", encoding="utf-8") as f:
json.dump(vocab, f, indent=2)
class Tokenizer:
def __init__(self):
self.vocab = load_vocab()
self.reverse_vocab = {v: k for k, v in self.vocab.items()}
self.next_id = max(self.vocab.values(), default=0) + 1
def tokenize(self, text):
words = re.findall(r"\b\w+\b", text.lower())
tokens = []
for word in words:
if word not in self.vocab:
self.vocab[word] = self.next_id
self.reverse_vocab[self.next_id] = word
self.next_id += 1
tokens.append(self.vocab[word])
save_vocab(self.vocab)
return tokens
def detokenize(self, tokens):
return " ".join(self.reverse_vocab.get(t, "<unk>") for t in tokens)

19
model/train.py Normal file
View File

@ -0,0 +1,19 @@
import torch
from model.brain import model, optimizer, loss_fn, tokenizer, DEVICE
def train_on_message(text: str):
model.train()
tokens = tokenizer.tokenize(text)
if len(tokens) < 2:
return
input_tensor = torch.tensor(tokens[:-1], dtype=torch.long, device=DEVICE).unsqueeze(0)
target_tensor = torch.tensor(tokens[1:], dtype=torch.long, device=DEVICE).unsqueeze(0)
output = model(input_tensor)
loss = loss_fn(output.view(-1, output.size(-1)), target_tensor.view(-1))
optimizer.zero_grad()
loss.backward()
optimizer.step()

0
reader/filter.py Normal file
View File

0
reader/reader.py Normal file
View File

4
requirements.txt Normal file
View File

@ -0,0 +1,4 @@
discord.py==2.3.2
python-dotenv
flask
torch