diff --git a/dataset.py b/dataset.py new file mode 100644 index 0000000..3766f5c --- /dev/null +++ b/dataset.py @@ -0,0 +1,20 @@ +import torch +from tokenizer import simple_tokenizer, load_vocab + +def create_dataset(): + vocab = load_vocab() + data = [ + ("a", "b"), + ("ab", "c"), + ("abc", "d"), + ] + + inputs = [torch.tensor(simple_tokenizer(src, vocab), dtype=torch.long) for src, tgt in data] + targets = [torch.tensor(simple_tokenizer(tgt, vocab), dtype=torch.long) for src, tgt in data] + + return inputs, targets + +if __name__ == "__main__": + inputs, targets = create_dataset() + print(inputs) + print(targets) diff --git a/main.py b/main.py index 70d26b7..d8b929a 100644 --- a/main.py +++ b/main.py @@ -1,70 +1,35 @@ import discord -import requests -import json +from model_manager import ModelManager import os from dotenv import load_dotenv -# Load environment variables from .env file +# Load environment variables load_dotenv() -# Replace with your bot token -BOT_TOKEN = os.getenv('DISCORD_TOKEN') - -# Ollama configuration -OLLAMA_API_URL = 'http://192.168.1.159:11434/api/generate' # Adjust if your Ollama setup is different - -# Set up the Discord client +# Discord bot setup intents = discord.Intents.default() intents.messages = True intents.message_content = True - client = discord.Client(intents=intents) +# Initialize the ModelManager +USE_CUSTOM_MODEL = True +OLLAMA_URL = None # Set to your Ollama endpoint if needed +model_manager = ModelManager(use_custom_model=USE_CUSTOM_MODEL, ollama_url=OLLAMA_URL) -# Function to query Ollama -def query_ollama(prompt): - payload = { - "prompt": prompt, - "model": "nollama/mythomax-l2-13b:Q4_K_M" # Replace with your Ollama model - } - try: - response = requests.post(OLLAMA_API_URL, json=payload, stream=True) - if response.status_code == 200: - collected_response = "" - # Stream and parse each line of JSON from the response - for line in response.iter_lines(decode_unicode=True): - if line.strip(): # Skip empty lines - try: - data = json.loads(line) # Parse each line as JSON - collected_response += data.get("response", "") - if data.get("done", False): - break - except json.JSONDecodeError as e: - print(f"Error decoding JSON line: {line}, Error: {e}") - return collected_response.strip() or "No response from model." - else: - return f"Error: {response.status_code} - {response.text}" - except requests.RequestException as e: - return f"Error connecting to Ollama: {str(e)}" - - -# Event for when the bot is ready @client.event async def on_ready(): - print(f'We have logged in as {client.user}') + print(f"Logged in as {client.user}") - -# Event for when a message is sent @client.event async def on_message(message): - # Ignore the bot's own messages if message.author == client.user: return - # Respond to all messages except those in DMs - if not isinstance(message.channel, discord.DMChannel): - response = query_ollama(message.content.strip()) - await message.channel.send(response) + # Generate response + user_input = message.content + bot_response = model_manager.generate_response(user_input) -# Run the bot -client.run(BOT_TOKEN) + await message.channel.send(bot_response) + +client.run(os.getenv("DISCORD_TOKEN")) diff --git a/memory_buffer.py b/memory_buffer.py new file mode 100644 index 0000000..7e55ebe --- /dev/null +++ b/memory_buffer.py @@ -0,0 +1,11 @@ +from collections import deque + +class MemoryBuffer: + def __init__(self, capacity=10): + self.buffer = deque(maxlen=capacity) + + def add_interaction(self, user_input, bot_response): + self.buffer.append((user_input, bot_response)) + + def get_data(self): + return list(self.buffer) diff --git a/model.py b/model.py new file mode 100644 index 0000000..07c04ae --- /dev/null +++ b/model.py @@ -0,0 +1,25 @@ +import torch +import torch.nn as nn + +class TinyGPT(nn.Module): + def __init__(self, vocab_size, embed_size, num_heads, num_layers): + super().__init__() + self.embedding = nn.Embedding(vocab_size, embed_size) + self.transformer = nn.Transformer( + d_model=embed_size, + nhead=num_heads, + num_encoder_layers=num_layers, + num_decoder_layers=num_layers, + batch_first=True # Ensures batch is the first dimension + ) + self.fc = nn.Linear(embed_size, vocab_size) + + def forward(self, src, tgt): + # Embed inputs + src_embed = self.embedding(src) # Shape: (batch_size, seq_len, embed_size) + tgt_embed = self.embedding(tgt) # Shape: (batch_size, seq_len, embed_size) + # Pass through transformer + transformer_out = self.transformer(src_embed, tgt_embed) + # Linear projection to vocabulary size + output = self.fc(transformer_out) + return output diff --git a/model_manager.py b/model_manager.py new file mode 100644 index 0000000..69ee4ad --- /dev/null +++ b/model_manager.py @@ -0,0 +1,62 @@ +import torch +from torch.optim import Adam +from torch.nn import CrossEntropyLoss +from memory_buffer import MemoryBuffer +from model import TinyGPT +from tokenizer import simple_tokenizer, detokenizer, load_vocab + +class ModelManager: + def __init__(self, use_custom_model=True, ollama_url=None): + self.use_custom_model = use_custom_model + self.ollama_url = ollama_url + self.memory = MemoryBuffer(capacity=10) # Memory for 10 recent interactions + if self.use_custom_model: + self._load_custom_model() + + def _load_custom_model(self): + """Load the custom GPT model.""" + self.vocab = load_vocab() + self.model = TinyGPT(vocab_size=len(self.vocab), embed_size=32, num_heads=2, num_layers=2).cuda() + self.model.load_state_dict(torch.load("ruby_model.pth", weights_only=True)) + self.model.eval() + self.optimizer = Adam(self.model.parameters(), lr=0.0001) + self.criterion = CrossEntropyLoss() + + def query_custom_model(self, input_text): + """Generate a response using the custom GPT model.""" + tokens = torch.tensor(simple_tokenizer(input_text, self.vocab), dtype=torch.long).cuda() + with torch.no_grad(): + output = self.model(tokens.unsqueeze(0), tokens.unsqueeze(0)) + predicted_idx = output.argmax(-1).squeeze()[-1].item() + return detokenizer([predicted_idx], self.vocab) + + def train_on_interaction(self, user_input, bot_response): + """Train the model on a single interaction.""" + self.model.train() + input_tokens = torch.tensor(simple_tokenizer(user_input, self.vocab), dtype=torch.long).cuda() + target_tokens = torch.tensor(simple_tokenizer(bot_response, self.vocab), dtype=torch.long).cuda() + + # Padding to ensure equal lengths + max_len = max(len(input_tokens), len(target_tokens)) + input_tokens = torch.cat([input_tokens, torch.zeros(max_len - len(input_tokens), dtype=torch.long).cuda()]) + target_tokens = torch.cat([target_tokens, torch.zeros(max_len - len(target_tokens), dtype=torch.long).cuda()]) + + # Perform a single training step + self.optimizer.zero_grad() + output = self.model(input_tokens.unsqueeze(0), target_tokens.unsqueeze(0)) + loss = self.criterion(output.view(-1, len(self.vocab)), target_tokens.view(-1)) + loss.backward() + self.optimizer.step() + self.model.eval() + + def generate_response(self, input_text): + """Generate a response using the selected model.""" + if self.use_custom_model: + bot_response = self.query_custom_model(input_text) + self.memory.add_interaction(input_text, bot_response) + self.train_on_interaction(input_text, bot_response) + return bot_response + elif self.ollama_url: + return self.query_ollama(input_text) + else: + raise ValueError("No valid model selected or configured.") diff --git a/ruby_model.pth b/ruby_model.pth new file mode 100644 index 0000000..112a2ad Binary files /dev/null and b/ruby_model.pth differ diff --git a/test.py b/test.py new file mode 100644 index 0000000..a6f18b6 --- /dev/null +++ b/test.py @@ -0,0 +1,23 @@ +import torch +from model import TinyGPT +from tokenizer import simple_tokenizer, detokenizer, load_vocab + +def test_model(): + vocab = load_vocab() + + # Load model + model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda() + model.load_state_dict(torch.load("ruby_model.pth", weights_only=True)) # Set weights_only=True + model.eval() + + # Test input + test_input = torch.tensor(simple_tokenizer("abc", vocab), dtype=torch.long).cuda() + with torch.no_grad(): + output = model(test_input.unsqueeze(0), test_input.unsqueeze(0)) + predicted_idx = output.argmax(-1).squeeze()[-1].item() + + predicted_char = detokenizer([predicted_idx], vocab) + print(f"Ruby says: {predicted_char}") + +if __name__ == "__main__": + test_model() diff --git a/tokenizer.py b/tokenizer.py new file mode 100644 index 0000000..eb4ba22 --- /dev/null +++ b/tokenizer.py @@ -0,0 +1,32 @@ +import json + +# Save vocabulary +def save_vocab(): + vocab = {char: idx for idx, char in enumerate("abcdefghijklmnopqrstuvwxyz ")} + vocab[""] = len(vocab) # Add unknown token + with open('vocab.json', 'w') as f: + json.dump(vocab, f) + + +# Load vocabulary +def load_vocab(): + with open('vocab.json', 'r') as f: + return json.load(f) + +# Tokenizer +def simple_tokenizer(text, vocab): + # Convert text to lowercase and replace unknown characters with + text = text.lower() + unk_token = vocab.get("", None) + return [vocab[char] if char in vocab else unk_token for char in text] + + +# Detokenizer +def detokenizer(tokens, vocab): + reverse_vocab = {idx: char for char, idx in vocab.items()} + return ''.join(reverse_vocab[token] for token in tokens) + +if __name__ == "__main__": + save_vocab() + vocab = load_vocab() + print(simple_tokenizer("hello world", vocab)) diff --git a/train.py b/train.py new file mode 100644 index 0000000..9a5ea2e --- /dev/null +++ b/train.py @@ -0,0 +1,46 @@ +import torch +from torch import nn +from torch.optim import Adam +from model import TinyGPT +from dataset import create_dataset +from tokenizer import load_vocab + +def pad_sequence(seq, max_len): + """Pads a sequence to the given maximum length.""" + return torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)], dim=0) + +def train_model(): + vocab = load_vocab() + inputs, targets = create_dataset() + + # Determine the maximum sequence length for padding + max_len = max(len(seq) for seq in inputs + targets) + + # Pad inputs and targets + inputs = [pad_sequence(seq, max_len) for seq in inputs] + targets = [pad_sequence(seq, max_len) for seq in targets] + + # Convert to batch tensors + inputs = torch.stack(inputs).cuda() + targets = torch.stack(targets).cuda() + + # Model setup + model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda() + criterion = nn.CrossEntropyLoss() + optimizer = Adam(model.parameters(), lr=0.001) + + # Training loop + for epoch in range(100): + optimizer.zero_grad() + output = model(inputs, targets) + loss = criterion(output.view(-1, len(vocab)), targets.view(-1)) + loss.backward() + optimizer.step() + print(f"Epoch {epoch + 1}, Loss: {loss.item()}") + + # Save the model + torch.save(model.state_dict(), "ruby_model.pth") + print("Model saved as ruby_model.pth") + +if __name__ == "__main__": + train_model() diff --git a/vocab.json b/vocab.json new file mode 100644 index 0000000..af04559 --- /dev/null +++ b/vocab.json @@ -0,0 +1 @@ +{"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, " ": 26, "": 27} \ No newline at end of file