Started trying to make Ruby learn as we talk to her

This commit is contained in:
Dan 2025-01-08 19:46:07 -05:00
parent 326a7b81d7
commit bc3e368d0d
10 changed files with 234 additions and 49 deletions

20
dataset.py Normal file
View File

@ -0,0 +1,20 @@
import torch
from tokenizer import simple_tokenizer, load_vocab
def create_dataset():
vocab = load_vocab()
data = [
("a", "b"),
("ab", "c"),
("abc", "d"),
]
inputs = [torch.tensor(simple_tokenizer(src, vocab), dtype=torch.long) for src, tgt in data]
targets = [torch.tensor(simple_tokenizer(tgt, vocab), dtype=torch.long) for src, tgt in data]
return inputs, targets
if __name__ == "__main__":
inputs, targets = create_dataset()
print(inputs)
print(targets)

63
main.py
View File

@ -1,70 +1,35 @@
import discord
import requests
import json
from model_manager import ModelManager
import os
from dotenv import load_dotenv
# Load environment variables from .env file
# Load environment variables
load_dotenv()
# Replace with your bot token
BOT_TOKEN = os.getenv('DISCORD_TOKEN')
# Ollama configuration
OLLAMA_API_URL = 'http://192.168.1.159:11434/api/generate' # Adjust if your Ollama setup is different
# Set up the Discord client
# Discord bot setup
intents = discord.Intents.default()
intents.messages = True
intents.message_content = True
client = discord.Client(intents=intents)
# Initialize the ModelManager
USE_CUSTOM_MODEL = True
OLLAMA_URL = None # Set to your Ollama endpoint if needed
model_manager = ModelManager(use_custom_model=USE_CUSTOM_MODEL, ollama_url=OLLAMA_URL)
# Function to query Ollama
def query_ollama(prompt):
payload = {
"prompt": prompt,
"model": "nollama/mythomax-l2-13b:Q4_K_M" # Replace with your Ollama model
}
try:
response = requests.post(OLLAMA_API_URL, json=payload, stream=True)
if response.status_code == 200:
collected_response = ""
# Stream and parse each line of JSON from the response
for line in response.iter_lines(decode_unicode=True):
if line.strip(): # Skip empty lines
try:
data = json.loads(line) # Parse each line as JSON
collected_response += data.get("response", "")
if data.get("done", False):
break
except json.JSONDecodeError as e:
print(f"Error decoding JSON line: {line}, Error: {e}")
return collected_response.strip() or "No response from model."
else:
return f"Error: {response.status_code} - {response.text}"
except requests.RequestException as e:
return f"Error connecting to Ollama: {str(e)}"
# Event for when the bot is ready
@client.event
async def on_ready():
print(f'We have logged in as {client.user}')
print(f"Logged in as {client.user}")
# Event for when a message is sent
@client.event
async def on_message(message):
# Ignore the bot's own messages
if message.author == client.user:
return
# Respond to all messages except those in DMs
if not isinstance(message.channel, discord.DMChannel):
response = query_ollama(message.content.strip())
await message.channel.send(response)
# Generate response
user_input = message.content
bot_response = model_manager.generate_response(user_input)
# Run the bot
client.run(BOT_TOKEN)
await message.channel.send(bot_response)
client.run(os.getenv("DISCORD_TOKEN"))

11
memory_buffer.py Normal file
View File

@ -0,0 +1,11 @@
from collections import deque
class MemoryBuffer:
def __init__(self, capacity=10):
self.buffer = deque(maxlen=capacity)
def add_interaction(self, user_input, bot_response):
self.buffer.append((user_input, bot_response))
def get_data(self):
return list(self.buffer)

25
model.py Normal file
View File

@ -0,0 +1,25 @@
import torch
import torch.nn as nn
class TinyGPT(nn.Module):
def __init__(self, vocab_size, embed_size, num_heads, num_layers):
super().__init__()
self.embedding = nn.Embedding(vocab_size, embed_size)
self.transformer = nn.Transformer(
d_model=embed_size,
nhead=num_heads,
num_encoder_layers=num_layers,
num_decoder_layers=num_layers,
batch_first=True # Ensures batch is the first dimension
)
self.fc = nn.Linear(embed_size, vocab_size)
def forward(self, src, tgt):
# Embed inputs
src_embed = self.embedding(src) # Shape: (batch_size, seq_len, embed_size)
tgt_embed = self.embedding(tgt) # Shape: (batch_size, seq_len, embed_size)
# Pass through transformer
transformer_out = self.transformer(src_embed, tgt_embed)
# Linear projection to vocabulary size
output = self.fc(transformer_out)
return output

62
model_manager.py Normal file
View File

@ -0,0 +1,62 @@
import torch
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from memory_buffer import MemoryBuffer
from model import TinyGPT
from tokenizer import simple_tokenizer, detokenizer, load_vocab
class ModelManager:
def __init__(self, use_custom_model=True, ollama_url=None):
self.use_custom_model = use_custom_model
self.ollama_url = ollama_url
self.memory = MemoryBuffer(capacity=10) # Memory for 10 recent interactions
if self.use_custom_model:
self._load_custom_model()
def _load_custom_model(self):
"""Load the custom GPT model."""
self.vocab = load_vocab()
self.model = TinyGPT(vocab_size=len(self.vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
self.model.load_state_dict(torch.load("ruby_model.pth", weights_only=True))
self.model.eval()
self.optimizer = Adam(self.model.parameters(), lr=0.0001)
self.criterion = CrossEntropyLoss()
def query_custom_model(self, input_text):
"""Generate a response using the custom GPT model."""
tokens = torch.tensor(simple_tokenizer(input_text, self.vocab), dtype=torch.long).cuda()
with torch.no_grad():
output = self.model(tokens.unsqueeze(0), tokens.unsqueeze(0))
predicted_idx = output.argmax(-1).squeeze()[-1].item()
return detokenizer([predicted_idx], self.vocab)
def train_on_interaction(self, user_input, bot_response):
"""Train the model on a single interaction."""
self.model.train()
input_tokens = torch.tensor(simple_tokenizer(user_input, self.vocab), dtype=torch.long).cuda()
target_tokens = torch.tensor(simple_tokenizer(bot_response, self.vocab), dtype=torch.long).cuda()
# Padding to ensure equal lengths
max_len = max(len(input_tokens), len(target_tokens))
input_tokens = torch.cat([input_tokens, torch.zeros(max_len - len(input_tokens), dtype=torch.long).cuda()])
target_tokens = torch.cat([target_tokens, torch.zeros(max_len - len(target_tokens), dtype=torch.long).cuda()])
# Perform a single training step
self.optimizer.zero_grad()
output = self.model(input_tokens.unsqueeze(0), target_tokens.unsqueeze(0))
loss = self.criterion(output.view(-1, len(self.vocab)), target_tokens.view(-1))
loss.backward()
self.optimizer.step()
self.model.eval()
def generate_response(self, input_text):
"""Generate a response using the selected model."""
if self.use_custom_model:
bot_response = self.query_custom_model(input_text)
self.memory.add_interaction(input_text, bot_response)
self.train_on_interaction(input_text, bot_response)
return bot_response
elif self.ollama_url:
return self.query_ollama(input_text)
else:
raise ValueError("No valid model selected or configured.")

BIN
ruby_model.pth Normal file

Binary file not shown.

23
test.py Normal file
View File

@ -0,0 +1,23 @@
import torch
from model import TinyGPT
from tokenizer import simple_tokenizer, detokenizer, load_vocab
def test_model():
vocab = load_vocab()
# Load model
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
model.load_state_dict(torch.load("ruby_model.pth", weights_only=True)) # Set weights_only=True
model.eval()
# Test input
test_input = torch.tensor(simple_tokenizer("abc", vocab), dtype=torch.long).cuda()
with torch.no_grad():
output = model(test_input.unsqueeze(0), test_input.unsqueeze(0))
predicted_idx = output.argmax(-1).squeeze()[-1].item()
predicted_char = detokenizer([predicted_idx], vocab)
print(f"Ruby says: {predicted_char}")
if __name__ == "__main__":
test_model()

32
tokenizer.py Normal file
View File

@ -0,0 +1,32 @@
import json
# Save vocabulary
def save_vocab():
vocab = {char: idx for idx, char in enumerate("abcdefghijklmnopqrstuvwxyz ")}
vocab["<unk>"] = len(vocab) # Add unknown token
with open('vocab.json', 'w') as f:
json.dump(vocab, f)
# Load vocabulary
def load_vocab():
with open('vocab.json', 'r') as f:
return json.load(f)
# Tokenizer
def simple_tokenizer(text, vocab):
# Convert text to lowercase and replace unknown characters with <unk>
text = text.lower()
unk_token = vocab.get("<unk>", None)
return [vocab[char] if char in vocab else unk_token for char in text]
# Detokenizer
def detokenizer(tokens, vocab):
reverse_vocab = {idx: char for char, idx in vocab.items()}
return ''.join(reverse_vocab[token] for token in tokens)
if __name__ == "__main__":
save_vocab()
vocab = load_vocab()
print(simple_tokenizer("hello world", vocab))

46
train.py Normal file
View File

@ -0,0 +1,46 @@
import torch
from torch import nn
from torch.optim import Adam
from model import TinyGPT
from dataset import create_dataset
from tokenizer import load_vocab
def pad_sequence(seq, max_len):
"""Pads a sequence to the given maximum length."""
return torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)], dim=0)
def train_model():
vocab = load_vocab()
inputs, targets = create_dataset()
# Determine the maximum sequence length for padding
max_len = max(len(seq) for seq in inputs + targets)
# Pad inputs and targets
inputs = [pad_sequence(seq, max_len) for seq in inputs]
targets = [pad_sequence(seq, max_len) for seq in targets]
# Convert to batch tensors
inputs = torch.stack(inputs).cuda()
targets = torch.stack(targets).cuda()
# Model setup
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
criterion = nn.CrossEntropyLoss()
optimizer = Adam(model.parameters(), lr=0.001)
# Training loop
for epoch in range(100):
optimizer.zero_grad()
output = model(inputs, targets)
loss = criterion(output.view(-1, len(vocab)), targets.view(-1))
loss.backward()
optimizer.step()
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
# Save the model
torch.save(model.state_dict(), "ruby_model.pth")
print("Model saved as ruby_model.pth")
if __name__ == "__main__":
train_model()

1
vocab.json Normal file
View File

@ -0,0 +1 @@
{"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, " ": 26, "<unk>": 27}