Started trying to make Ruby learn as we talk to her
This commit is contained in:
parent
326a7b81d7
commit
bc3e368d0d
20
dataset.py
Normal file
20
dataset.py
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
import torch
|
||||||
|
from tokenizer import simple_tokenizer, load_vocab
|
||||||
|
|
||||||
|
def create_dataset():
|
||||||
|
vocab = load_vocab()
|
||||||
|
data = [
|
||||||
|
("a", "b"),
|
||||||
|
("ab", "c"),
|
||||||
|
("abc", "d"),
|
||||||
|
]
|
||||||
|
|
||||||
|
inputs = [torch.tensor(simple_tokenizer(src, vocab), dtype=torch.long) for src, tgt in data]
|
||||||
|
targets = [torch.tensor(simple_tokenizer(tgt, vocab), dtype=torch.long) for src, tgt in data]
|
||||||
|
|
||||||
|
return inputs, targets
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
inputs, targets = create_dataset()
|
||||||
|
print(inputs)
|
||||||
|
print(targets)
|
63
main.py
63
main.py
@ -1,70 +1,35 @@
|
|||||||
import discord
|
import discord
|
||||||
import requests
|
from model_manager import ModelManager
|
||||||
import json
|
|
||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Load environment variables from .env file
|
# Load environment variables
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Replace with your bot token
|
# Discord bot setup
|
||||||
BOT_TOKEN = os.getenv('DISCORD_TOKEN')
|
|
||||||
|
|
||||||
# Ollama configuration
|
|
||||||
OLLAMA_API_URL = 'http://192.168.1.159:11434/api/generate' # Adjust if your Ollama setup is different
|
|
||||||
|
|
||||||
# Set up the Discord client
|
|
||||||
intents = discord.Intents.default()
|
intents = discord.Intents.default()
|
||||||
intents.messages = True
|
intents.messages = True
|
||||||
intents.message_content = True
|
intents.message_content = True
|
||||||
|
|
||||||
client = discord.Client(intents=intents)
|
client = discord.Client(intents=intents)
|
||||||
|
|
||||||
|
# Initialize the ModelManager
|
||||||
|
USE_CUSTOM_MODEL = True
|
||||||
|
OLLAMA_URL = None # Set to your Ollama endpoint if needed
|
||||||
|
model_manager = ModelManager(use_custom_model=USE_CUSTOM_MODEL, ollama_url=OLLAMA_URL)
|
||||||
|
|
||||||
# Function to query Ollama
|
|
||||||
def query_ollama(prompt):
|
|
||||||
payload = {
|
|
||||||
"prompt": prompt,
|
|
||||||
"model": "nollama/mythomax-l2-13b:Q4_K_M" # Replace with your Ollama model
|
|
||||||
}
|
|
||||||
try:
|
|
||||||
response = requests.post(OLLAMA_API_URL, json=payload, stream=True)
|
|
||||||
if response.status_code == 200:
|
|
||||||
collected_response = ""
|
|
||||||
# Stream and parse each line of JSON from the response
|
|
||||||
for line in response.iter_lines(decode_unicode=True):
|
|
||||||
if line.strip(): # Skip empty lines
|
|
||||||
try:
|
|
||||||
data = json.loads(line) # Parse each line as JSON
|
|
||||||
collected_response += data.get("response", "")
|
|
||||||
if data.get("done", False):
|
|
||||||
break
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
print(f"Error decoding JSON line: {line}, Error: {e}")
|
|
||||||
return collected_response.strip() or "No response from model."
|
|
||||||
else:
|
|
||||||
return f"Error: {response.status_code} - {response.text}"
|
|
||||||
except requests.RequestException as e:
|
|
||||||
return f"Error connecting to Ollama: {str(e)}"
|
|
||||||
|
|
||||||
|
|
||||||
# Event for when the bot is ready
|
|
||||||
@client.event
|
@client.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
print(f'We have logged in as {client.user}')
|
print(f"Logged in as {client.user}")
|
||||||
|
|
||||||
|
|
||||||
# Event for when a message is sent
|
|
||||||
@client.event
|
@client.event
|
||||||
async def on_message(message):
|
async def on_message(message):
|
||||||
# Ignore the bot's own messages
|
|
||||||
if message.author == client.user:
|
if message.author == client.user:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Respond to all messages except those in DMs
|
# Generate response
|
||||||
if not isinstance(message.channel, discord.DMChannel):
|
user_input = message.content
|
||||||
response = query_ollama(message.content.strip())
|
bot_response = model_manager.generate_response(user_input)
|
||||||
await message.channel.send(response)
|
|
||||||
|
|
||||||
# Run the bot
|
await message.channel.send(bot_response)
|
||||||
client.run(BOT_TOKEN)
|
|
||||||
|
client.run(os.getenv("DISCORD_TOKEN"))
|
||||||
|
11
memory_buffer.py
Normal file
11
memory_buffer.py
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
from collections import deque
|
||||||
|
|
||||||
|
class MemoryBuffer:
|
||||||
|
def __init__(self, capacity=10):
|
||||||
|
self.buffer = deque(maxlen=capacity)
|
||||||
|
|
||||||
|
def add_interaction(self, user_input, bot_response):
|
||||||
|
self.buffer.append((user_input, bot_response))
|
||||||
|
|
||||||
|
def get_data(self):
|
||||||
|
return list(self.buffer)
|
25
model.py
Normal file
25
model.py
Normal file
@ -0,0 +1,25 @@
|
|||||||
|
import torch
|
||||||
|
import torch.nn as nn
|
||||||
|
|
||||||
|
class TinyGPT(nn.Module):
|
||||||
|
def __init__(self, vocab_size, embed_size, num_heads, num_layers):
|
||||||
|
super().__init__()
|
||||||
|
self.embedding = nn.Embedding(vocab_size, embed_size)
|
||||||
|
self.transformer = nn.Transformer(
|
||||||
|
d_model=embed_size,
|
||||||
|
nhead=num_heads,
|
||||||
|
num_encoder_layers=num_layers,
|
||||||
|
num_decoder_layers=num_layers,
|
||||||
|
batch_first=True # Ensures batch is the first dimension
|
||||||
|
)
|
||||||
|
self.fc = nn.Linear(embed_size, vocab_size)
|
||||||
|
|
||||||
|
def forward(self, src, tgt):
|
||||||
|
# Embed inputs
|
||||||
|
src_embed = self.embedding(src) # Shape: (batch_size, seq_len, embed_size)
|
||||||
|
tgt_embed = self.embedding(tgt) # Shape: (batch_size, seq_len, embed_size)
|
||||||
|
# Pass through transformer
|
||||||
|
transformer_out = self.transformer(src_embed, tgt_embed)
|
||||||
|
# Linear projection to vocabulary size
|
||||||
|
output = self.fc(transformer_out)
|
||||||
|
return output
|
62
model_manager.py
Normal file
62
model_manager.py
Normal file
@ -0,0 +1,62 @@
|
|||||||
|
import torch
|
||||||
|
from torch.optim import Adam
|
||||||
|
from torch.nn import CrossEntropyLoss
|
||||||
|
from memory_buffer import MemoryBuffer
|
||||||
|
from model import TinyGPT
|
||||||
|
from tokenizer import simple_tokenizer, detokenizer, load_vocab
|
||||||
|
|
||||||
|
class ModelManager:
|
||||||
|
def __init__(self, use_custom_model=True, ollama_url=None):
|
||||||
|
self.use_custom_model = use_custom_model
|
||||||
|
self.ollama_url = ollama_url
|
||||||
|
self.memory = MemoryBuffer(capacity=10) # Memory for 10 recent interactions
|
||||||
|
if self.use_custom_model:
|
||||||
|
self._load_custom_model()
|
||||||
|
|
||||||
|
def _load_custom_model(self):
|
||||||
|
"""Load the custom GPT model."""
|
||||||
|
self.vocab = load_vocab()
|
||||||
|
self.model = TinyGPT(vocab_size=len(self.vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
||||||
|
self.model.load_state_dict(torch.load("ruby_model.pth", weights_only=True))
|
||||||
|
self.model.eval()
|
||||||
|
self.optimizer = Adam(self.model.parameters(), lr=0.0001)
|
||||||
|
self.criterion = CrossEntropyLoss()
|
||||||
|
|
||||||
|
def query_custom_model(self, input_text):
|
||||||
|
"""Generate a response using the custom GPT model."""
|
||||||
|
tokens = torch.tensor(simple_tokenizer(input_text, self.vocab), dtype=torch.long).cuda()
|
||||||
|
with torch.no_grad():
|
||||||
|
output = self.model(tokens.unsqueeze(0), tokens.unsqueeze(0))
|
||||||
|
predicted_idx = output.argmax(-1).squeeze()[-1].item()
|
||||||
|
return detokenizer([predicted_idx], self.vocab)
|
||||||
|
|
||||||
|
def train_on_interaction(self, user_input, bot_response):
|
||||||
|
"""Train the model on a single interaction."""
|
||||||
|
self.model.train()
|
||||||
|
input_tokens = torch.tensor(simple_tokenizer(user_input, self.vocab), dtype=torch.long).cuda()
|
||||||
|
target_tokens = torch.tensor(simple_tokenizer(bot_response, self.vocab), dtype=torch.long).cuda()
|
||||||
|
|
||||||
|
# Padding to ensure equal lengths
|
||||||
|
max_len = max(len(input_tokens), len(target_tokens))
|
||||||
|
input_tokens = torch.cat([input_tokens, torch.zeros(max_len - len(input_tokens), dtype=torch.long).cuda()])
|
||||||
|
target_tokens = torch.cat([target_tokens, torch.zeros(max_len - len(target_tokens), dtype=torch.long).cuda()])
|
||||||
|
|
||||||
|
# Perform a single training step
|
||||||
|
self.optimizer.zero_grad()
|
||||||
|
output = self.model(input_tokens.unsqueeze(0), target_tokens.unsqueeze(0))
|
||||||
|
loss = self.criterion(output.view(-1, len(self.vocab)), target_tokens.view(-1))
|
||||||
|
loss.backward()
|
||||||
|
self.optimizer.step()
|
||||||
|
self.model.eval()
|
||||||
|
|
||||||
|
def generate_response(self, input_text):
|
||||||
|
"""Generate a response using the selected model."""
|
||||||
|
if self.use_custom_model:
|
||||||
|
bot_response = self.query_custom_model(input_text)
|
||||||
|
self.memory.add_interaction(input_text, bot_response)
|
||||||
|
self.train_on_interaction(input_text, bot_response)
|
||||||
|
return bot_response
|
||||||
|
elif self.ollama_url:
|
||||||
|
return self.query_ollama(input_text)
|
||||||
|
else:
|
||||||
|
raise ValueError("No valid model selected or configured.")
|
BIN
ruby_model.pth
Normal file
BIN
ruby_model.pth
Normal file
Binary file not shown.
23
test.py
Normal file
23
test.py
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
import torch
|
||||||
|
from model import TinyGPT
|
||||||
|
from tokenizer import simple_tokenizer, detokenizer, load_vocab
|
||||||
|
|
||||||
|
def test_model():
|
||||||
|
vocab = load_vocab()
|
||||||
|
|
||||||
|
# Load model
|
||||||
|
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
||||||
|
model.load_state_dict(torch.load("ruby_model.pth", weights_only=True)) # Set weights_only=True
|
||||||
|
model.eval()
|
||||||
|
|
||||||
|
# Test input
|
||||||
|
test_input = torch.tensor(simple_tokenizer("abc", vocab), dtype=torch.long).cuda()
|
||||||
|
with torch.no_grad():
|
||||||
|
output = model(test_input.unsqueeze(0), test_input.unsqueeze(0))
|
||||||
|
predicted_idx = output.argmax(-1).squeeze()[-1].item()
|
||||||
|
|
||||||
|
predicted_char = detokenizer([predicted_idx], vocab)
|
||||||
|
print(f"Ruby says: {predicted_char}")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
test_model()
|
32
tokenizer.py
Normal file
32
tokenizer.py
Normal file
@ -0,0 +1,32 @@
|
|||||||
|
import json
|
||||||
|
|
||||||
|
# Save vocabulary
|
||||||
|
def save_vocab():
|
||||||
|
vocab = {char: idx for idx, char in enumerate("abcdefghijklmnopqrstuvwxyz ")}
|
||||||
|
vocab["<unk>"] = len(vocab) # Add unknown token
|
||||||
|
with open('vocab.json', 'w') as f:
|
||||||
|
json.dump(vocab, f)
|
||||||
|
|
||||||
|
|
||||||
|
# Load vocabulary
|
||||||
|
def load_vocab():
|
||||||
|
with open('vocab.json', 'r') as f:
|
||||||
|
return json.load(f)
|
||||||
|
|
||||||
|
# Tokenizer
|
||||||
|
def simple_tokenizer(text, vocab):
|
||||||
|
# Convert text to lowercase and replace unknown characters with <unk>
|
||||||
|
text = text.lower()
|
||||||
|
unk_token = vocab.get("<unk>", None)
|
||||||
|
return [vocab[char] if char in vocab else unk_token for char in text]
|
||||||
|
|
||||||
|
|
||||||
|
# Detokenizer
|
||||||
|
def detokenizer(tokens, vocab):
|
||||||
|
reverse_vocab = {idx: char for char, idx in vocab.items()}
|
||||||
|
return ''.join(reverse_vocab[token] for token in tokens)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
save_vocab()
|
||||||
|
vocab = load_vocab()
|
||||||
|
print(simple_tokenizer("hello world", vocab))
|
46
train.py
Normal file
46
train.py
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
import torch
|
||||||
|
from torch import nn
|
||||||
|
from torch.optim import Adam
|
||||||
|
from model import TinyGPT
|
||||||
|
from dataset import create_dataset
|
||||||
|
from tokenizer import load_vocab
|
||||||
|
|
||||||
|
def pad_sequence(seq, max_len):
|
||||||
|
"""Pads a sequence to the given maximum length."""
|
||||||
|
return torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)], dim=0)
|
||||||
|
|
||||||
|
def train_model():
|
||||||
|
vocab = load_vocab()
|
||||||
|
inputs, targets = create_dataset()
|
||||||
|
|
||||||
|
# Determine the maximum sequence length for padding
|
||||||
|
max_len = max(len(seq) for seq in inputs + targets)
|
||||||
|
|
||||||
|
# Pad inputs and targets
|
||||||
|
inputs = [pad_sequence(seq, max_len) for seq in inputs]
|
||||||
|
targets = [pad_sequence(seq, max_len) for seq in targets]
|
||||||
|
|
||||||
|
# Convert to batch tensors
|
||||||
|
inputs = torch.stack(inputs).cuda()
|
||||||
|
targets = torch.stack(targets).cuda()
|
||||||
|
|
||||||
|
# Model setup
|
||||||
|
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
||||||
|
criterion = nn.CrossEntropyLoss()
|
||||||
|
optimizer = Adam(model.parameters(), lr=0.001)
|
||||||
|
|
||||||
|
# Training loop
|
||||||
|
for epoch in range(100):
|
||||||
|
optimizer.zero_grad()
|
||||||
|
output = model(inputs, targets)
|
||||||
|
loss = criterion(output.view(-1, len(vocab)), targets.view(-1))
|
||||||
|
loss.backward()
|
||||||
|
optimizer.step()
|
||||||
|
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
|
||||||
|
|
||||||
|
# Save the model
|
||||||
|
torch.save(model.state_dict(), "ruby_model.pth")
|
||||||
|
print("Model saved as ruby_model.pth")
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
train_model()
|
1
vocab.json
Normal file
1
vocab.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, " ": 26, "<unk>": 27}
|
Loading…
x
Reference in New Issue
Block a user