Started trying to make Ruby learn as we talk to her
This commit is contained in:
parent
326a7b81d7
commit
bc3e368d0d
20
dataset.py
Normal file
20
dataset.py
Normal file
@ -0,0 +1,20 @@
|
||||
import torch
|
||||
from tokenizer import simple_tokenizer, load_vocab
|
||||
|
||||
def create_dataset():
|
||||
vocab = load_vocab()
|
||||
data = [
|
||||
("a", "b"),
|
||||
("ab", "c"),
|
||||
("abc", "d"),
|
||||
]
|
||||
|
||||
inputs = [torch.tensor(simple_tokenizer(src, vocab), dtype=torch.long) for src, tgt in data]
|
||||
targets = [torch.tensor(simple_tokenizer(tgt, vocab), dtype=torch.long) for src, tgt in data]
|
||||
|
||||
return inputs, targets
|
||||
|
||||
if __name__ == "__main__":
|
||||
inputs, targets = create_dataset()
|
||||
print(inputs)
|
||||
print(targets)
|
63
main.py
63
main.py
@ -1,70 +1,35 @@
|
||||
import discord
|
||||
import requests
|
||||
import json
|
||||
from model_manager import ModelManager
|
||||
import os
|
||||
from dotenv import load_dotenv
|
||||
|
||||
# Load environment variables from .env file
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
|
||||
# Replace with your bot token
|
||||
BOT_TOKEN = os.getenv('DISCORD_TOKEN')
|
||||
|
||||
# Ollama configuration
|
||||
OLLAMA_API_URL = 'http://192.168.1.159:11434/api/generate' # Adjust if your Ollama setup is different
|
||||
|
||||
# Set up the Discord client
|
||||
# Discord bot setup
|
||||
intents = discord.Intents.default()
|
||||
intents.messages = True
|
||||
intents.message_content = True
|
||||
|
||||
client = discord.Client(intents=intents)
|
||||
|
||||
# Initialize the ModelManager
|
||||
USE_CUSTOM_MODEL = True
|
||||
OLLAMA_URL = None # Set to your Ollama endpoint if needed
|
||||
model_manager = ModelManager(use_custom_model=USE_CUSTOM_MODEL, ollama_url=OLLAMA_URL)
|
||||
|
||||
# Function to query Ollama
|
||||
def query_ollama(prompt):
|
||||
payload = {
|
||||
"prompt": prompt,
|
||||
"model": "nollama/mythomax-l2-13b:Q4_K_M" # Replace with your Ollama model
|
||||
}
|
||||
try:
|
||||
response = requests.post(OLLAMA_API_URL, json=payload, stream=True)
|
||||
if response.status_code == 200:
|
||||
collected_response = ""
|
||||
# Stream and parse each line of JSON from the response
|
||||
for line in response.iter_lines(decode_unicode=True):
|
||||
if line.strip(): # Skip empty lines
|
||||
try:
|
||||
data = json.loads(line) # Parse each line as JSON
|
||||
collected_response += data.get("response", "")
|
||||
if data.get("done", False):
|
||||
break
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"Error decoding JSON line: {line}, Error: {e}")
|
||||
return collected_response.strip() or "No response from model."
|
||||
else:
|
||||
return f"Error: {response.status_code} - {response.text}"
|
||||
except requests.RequestException as e:
|
||||
return f"Error connecting to Ollama: {str(e)}"
|
||||
|
||||
|
||||
# Event for when the bot is ready
|
||||
@client.event
|
||||
async def on_ready():
|
||||
print(f'We have logged in as {client.user}')
|
||||
print(f"Logged in as {client.user}")
|
||||
|
||||
|
||||
# Event for when a message is sent
|
||||
@client.event
|
||||
async def on_message(message):
|
||||
# Ignore the bot's own messages
|
||||
if message.author == client.user:
|
||||
return
|
||||
|
||||
# Respond to all messages except those in DMs
|
||||
if not isinstance(message.channel, discord.DMChannel):
|
||||
response = query_ollama(message.content.strip())
|
||||
await message.channel.send(response)
|
||||
# Generate response
|
||||
user_input = message.content
|
||||
bot_response = model_manager.generate_response(user_input)
|
||||
|
||||
# Run the bot
|
||||
client.run(BOT_TOKEN)
|
||||
await message.channel.send(bot_response)
|
||||
|
||||
client.run(os.getenv("DISCORD_TOKEN"))
|
||||
|
11
memory_buffer.py
Normal file
11
memory_buffer.py
Normal file
@ -0,0 +1,11 @@
|
||||
from collections import deque
|
||||
|
||||
class MemoryBuffer:
|
||||
def __init__(self, capacity=10):
|
||||
self.buffer = deque(maxlen=capacity)
|
||||
|
||||
def add_interaction(self, user_input, bot_response):
|
||||
self.buffer.append((user_input, bot_response))
|
||||
|
||||
def get_data(self):
|
||||
return list(self.buffer)
|
25
model.py
Normal file
25
model.py
Normal file
@ -0,0 +1,25 @@
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
|
||||
class TinyGPT(nn.Module):
|
||||
def __init__(self, vocab_size, embed_size, num_heads, num_layers):
|
||||
super().__init__()
|
||||
self.embedding = nn.Embedding(vocab_size, embed_size)
|
||||
self.transformer = nn.Transformer(
|
||||
d_model=embed_size,
|
||||
nhead=num_heads,
|
||||
num_encoder_layers=num_layers,
|
||||
num_decoder_layers=num_layers,
|
||||
batch_first=True # Ensures batch is the first dimension
|
||||
)
|
||||
self.fc = nn.Linear(embed_size, vocab_size)
|
||||
|
||||
def forward(self, src, tgt):
|
||||
# Embed inputs
|
||||
src_embed = self.embedding(src) # Shape: (batch_size, seq_len, embed_size)
|
||||
tgt_embed = self.embedding(tgt) # Shape: (batch_size, seq_len, embed_size)
|
||||
# Pass through transformer
|
||||
transformer_out = self.transformer(src_embed, tgt_embed)
|
||||
# Linear projection to vocabulary size
|
||||
output = self.fc(transformer_out)
|
||||
return output
|
62
model_manager.py
Normal file
62
model_manager.py
Normal file
@ -0,0 +1,62 @@
|
||||
import torch
|
||||
from torch.optim import Adam
|
||||
from torch.nn import CrossEntropyLoss
|
||||
from memory_buffer import MemoryBuffer
|
||||
from model import TinyGPT
|
||||
from tokenizer import simple_tokenizer, detokenizer, load_vocab
|
||||
|
||||
class ModelManager:
|
||||
def __init__(self, use_custom_model=True, ollama_url=None):
|
||||
self.use_custom_model = use_custom_model
|
||||
self.ollama_url = ollama_url
|
||||
self.memory = MemoryBuffer(capacity=10) # Memory for 10 recent interactions
|
||||
if self.use_custom_model:
|
||||
self._load_custom_model()
|
||||
|
||||
def _load_custom_model(self):
|
||||
"""Load the custom GPT model."""
|
||||
self.vocab = load_vocab()
|
||||
self.model = TinyGPT(vocab_size=len(self.vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
||||
self.model.load_state_dict(torch.load("ruby_model.pth", weights_only=True))
|
||||
self.model.eval()
|
||||
self.optimizer = Adam(self.model.parameters(), lr=0.0001)
|
||||
self.criterion = CrossEntropyLoss()
|
||||
|
||||
def query_custom_model(self, input_text):
|
||||
"""Generate a response using the custom GPT model."""
|
||||
tokens = torch.tensor(simple_tokenizer(input_text, self.vocab), dtype=torch.long).cuda()
|
||||
with torch.no_grad():
|
||||
output = self.model(tokens.unsqueeze(0), tokens.unsqueeze(0))
|
||||
predicted_idx = output.argmax(-1).squeeze()[-1].item()
|
||||
return detokenizer([predicted_idx], self.vocab)
|
||||
|
||||
def train_on_interaction(self, user_input, bot_response):
|
||||
"""Train the model on a single interaction."""
|
||||
self.model.train()
|
||||
input_tokens = torch.tensor(simple_tokenizer(user_input, self.vocab), dtype=torch.long).cuda()
|
||||
target_tokens = torch.tensor(simple_tokenizer(bot_response, self.vocab), dtype=torch.long).cuda()
|
||||
|
||||
# Padding to ensure equal lengths
|
||||
max_len = max(len(input_tokens), len(target_tokens))
|
||||
input_tokens = torch.cat([input_tokens, torch.zeros(max_len - len(input_tokens), dtype=torch.long).cuda()])
|
||||
target_tokens = torch.cat([target_tokens, torch.zeros(max_len - len(target_tokens), dtype=torch.long).cuda()])
|
||||
|
||||
# Perform a single training step
|
||||
self.optimizer.zero_grad()
|
||||
output = self.model(input_tokens.unsqueeze(0), target_tokens.unsqueeze(0))
|
||||
loss = self.criterion(output.view(-1, len(self.vocab)), target_tokens.view(-1))
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
self.model.eval()
|
||||
|
||||
def generate_response(self, input_text):
|
||||
"""Generate a response using the selected model."""
|
||||
if self.use_custom_model:
|
||||
bot_response = self.query_custom_model(input_text)
|
||||
self.memory.add_interaction(input_text, bot_response)
|
||||
self.train_on_interaction(input_text, bot_response)
|
||||
return bot_response
|
||||
elif self.ollama_url:
|
||||
return self.query_ollama(input_text)
|
||||
else:
|
||||
raise ValueError("No valid model selected or configured.")
|
BIN
ruby_model.pth
Normal file
BIN
ruby_model.pth
Normal file
Binary file not shown.
23
test.py
Normal file
23
test.py
Normal file
@ -0,0 +1,23 @@
|
||||
import torch
|
||||
from model import TinyGPT
|
||||
from tokenizer import simple_tokenizer, detokenizer, load_vocab
|
||||
|
||||
def test_model():
|
||||
vocab = load_vocab()
|
||||
|
||||
# Load model
|
||||
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
||||
model.load_state_dict(torch.load("ruby_model.pth", weights_only=True)) # Set weights_only=True
|
||||
model.eval()
|
||||
|
||||
# Test input
|
||||
test_input = torch.tensor(simple_tokenizer("abc", vocab), dtype=torch.long).cuda()
|
||||
with torch.no_grad():
|
||||
output = model(test_input.unsqueeze(0), test_input.unsqueeze(0))
|
||||
predicted_idx = output.argmax(-1).squeeze()[-1].item()
|
||||
|
||||
predicted_char = detokenizer([predicted_idx], vocab)
|
||||
print(f"Ruby says: {predicted_char}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
test_model()
|
32
tokenizer.py
Normal file
32
tokenizer.py
Normal file
@ -0,0 +1,32 @@
|
||||
import json
|
||||
|
||||
# Save vocabulary
|
||||
def save_vocab():
|
||||
vocab = {char: idx for idx, char in enumerate("abcdefghijklmnopqrstuvwxyz ")}
|
||||
vocab["<unk>"] = len(vocab) # Add unknown token
|
||||
with open('vocab.json', 'w') as f:
|
||||
json.dump(vocab, f)
|
||||
|
||||
|
||||
# Load vocabulary
|
||||
def load_vocab():
|
||||
with open('vocab.json', 'r') as f:
|
||||
return json.load(f)
|
||||
|
||||
# Tokenizer
|
||||
def simple_tokenizer(text, vocab):
|
||||
# Convert text to lowercase and replace unknown characters with <unk>
|
||||
text = text.lower()
|
||||
unk_token = vocab.get("<unk>", None)
|
||||
return [vocab[char] if char in vocab else unk_token for char in text]
|
||||
|
||||
|
||||
# Detokenizer
|
||||
def detokenizer(tokens, vocab):
|
||||
reverse_vocab = {idx: char for char, idx in vocab.items()}
|
||||
return ''.join(reverse_vocab[token] for token in tokens)
|
||||
|
||||
if __name__ == "__main__":
|
||||
save_vocab()
|
||||
vocab = load_vocab()
|
||||
print(simple_tokenizer("hello world", vocab))
|
46
train.py
Normal file
46
train.py
Normal file
@ -0,0 +1,46 @@
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch.optim import Adam
|
||||
from model import TinyGPT
|
||||
from dataset import create_dataset
|
||||
from tokenizer import load_vocab
|
||||
|
||||
def pad_sequence(seq, max_len):
|
||||
"""Pads a sequence to the given maximum length."""
|
||||
return torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)], dim=0)
|
||||
|
||||
def train_model():
|
||||
vocab = load_vocab()
|
||||
inputs, targets = create_dataset()
|
||||
|
||||
# Determine the maximum sequence length for padding
|
||||
max_len = max(len(seq) for seq in inputs + targets)
|
||||
|
||||
# Pad inputs and targets
|
||||
inputs = [pad_sequence(seq, max_len) for seq in inputs]
|
||||
targets = [pad_sequence(seq, max_len) for seq in targets]
|
||||
|
||||
# Convert to batch tensors
|
||||
inputs = torch.stack(inputs).cuda()
|
||||
targets = torch.stack(targets).cuda()
|
||||
|
||||
# Model setup
|
||||
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
||||
criterion = nn.CrossEntropyLoss()
|
||||
optimizer = Adam(model.parameters(), lr=0.001)
|
||||
|
||||
# Training loop
|
||||
for epoch in range(100):
|
||||
optimizer.zero_grad()
|
||||
output = model(inputs, targets)
|
||||
loss = criterion(output.view(-1, len(vocab)), targets.view(-1))
|
||||
loss.backward()
|
||||
optimizer.step()
|
||||
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
|
||||
|
||||
# Save the model
|
||||
torch.save(model.state_dict(), "ruby_model.pth")
|
||||
print("Model saved as ruby_model.pth")
|
||||
|
||||
if __name__ == "__main__":
|
||||
train_model()
|
1
vocab.json
Normal file
1
vocab.json
Normal file
@ -0,0 +1 @@
|
||||
{"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, " ": 26, "<unk>": 27}
|
Loading…
x
Reference in New Issue
Block a user