diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..eef7e53 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,15 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Phoebe", + "type": "debugpy", + "request": "launch", + "program": "E:\\Development\\AI Development\\Phoebe\\phoebe\\discord_bot.py", + "console": "integratedTerminal" + } + ] +} diff --git a/phoebe/discord_bot.py b/phoebe/discord_bot.py index fe660a6..80be9e1 100644 --- a/phoebe/discord_bot.py +++ b/phoebe/discord_bot.py @@ -1,13 +1,39 @@ import discord -import os -from dotenv import load_dotenv from train_gpt_model import process_message from gpt_model import load_model +import torch +from dotenv import load_dotenv +import os load_dotenv() + # Discord bot token TOKEN = os.getenv("DISCORD_TOKEN") +# Load the vocabulary +with open("vocab.txt", "r", encoding="utf-8") as f: + text = f.read() + chars = sorted(list(set(text))) + +# Ensure that space and other special characters are included +required_chars = " \n\r\t" +for char in required_chars: + if char not in chars: + chars.append(char) + +# Add a special token for unknown characters +special_token = "" +if special_token not in chars: + chars.append(special_token) + +vocab_size = len(chars) +string_to_int = {ch: i for i, ch in enumerate(chars)} +int_to_string = {i: ch for i, ch in enumerate(chars)} + +# Initialize and load the model +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model = load_model(vocab_size, "phoebe_model.pt").to(device) + # Initialize Discord client intents = discord.Intents.default() intents.message_content = True @@ -17,7 +43,6 @@ client = discord.Client(intents=intents) @client.event async def on_ready(): print(f"We have logged in as {client.user}") - load_model(5641, "phoebe_model.pt") @client.event @@ -25,6 +50,9 @@ async def on_message(message): if message.author == client.user: return + # Debug: print the message content + print(f"Received message: '{message.content}'") + # Process the message and get a response response = process_message(message.content) diff --git a/phoebe/train_gpt_model.py b/phoebe/train_gpt_model.py index f1fcc55..27ad604 100644 --- a/phoebe/train_gpt_model.py +++ b/phoebe/train_gpt_model.py @@ -1,24 +1,22 @@ import torch -import mmap import random -from gpt_model import GPT, encode, decode +from gpt_model import encode, decode, load_model device = torch.device("cuda" if torch.cuda.is_available() else "cpu") # Hyperparameters batch_size = 64 block_size = 256 -max_iters = 500 -learning_rate = 2e-5 +max_iters = 3000 +learning_rate = 2e-4 eval_iters = 250 dropout = 0.2 -chars = "" +# Load the vocabulary and encoded data with open("vocab.txt", "r", encoding="utf-8") as f: text = f.read() chars = sorted(list(set(text))) -# Ensure that space and other special characters are included # Ensure that space and other special characters are included required_chars = " \n\r\t" for char in required_chars: @@ -34,34 +32,33 @@ vocab_size = len(chars) string_to_int = {ch: i for i, ch in enumerate(chars)} int_to_string = {i: ch for i, ch in enumerate(chars)} +# Load and preprocess training and validation data from .txt files +with open("train_split.txt", "r", encoding="utf-8") as f: + train_data = f.read() -def get_random_chunk(split): - filename = "train_split.txt" if split == "train" else "eval_split.txt" - with open(filename, "rb") as f: - with mmap.mmap(f.fileno(), length=0, access=mmap.ACCESS_READ) as mm: - file_size = len(mm) - start = random.randint(0, file_size - block_size * batch_size) - mm.seek(start) - block = mm.read(block_size * batch_size - 1) - decoded_block = block.decode("utf-8", errors="ignore").replace( - "\r", "" - ) - data = torch.tensor( - encode(decoded_block, string_to_int), dtype=torch.long - ) - return data +with open("eval_split.txt", "r", encoding="utf-8") as f: + val_data = f.read() + +train_data = torch.tensor(encode(train_data, string_to_int), dtype=torch.long) +val_data = torch.tensor(encode(val_data, string_to_int), dtype=torch.long) -def get_batch(split): - data = get_random_chunk(split) - ix = torch.randint(len(data) - block_size, (batch_size,)) - x = torch.stack([data[i : i + block_size] for i in ix]) - y = torch.stack([data[i + 1 : i + block_size + 1] for i in ix]) +def get_random_chunk(data, chunk_size): + start = random.randint(0, len(data) - chunk_size - 1) + chunk = data[start : start + chunk_size] + return chunk + + +def get_batch(data, block_size, batch_size): + chunk_size = block_size * (batch_size + 1) + chunk = get_random_chunk(data, chunk_size) + x = chunk[: block_size * batch_size].view(batch_size, block_size) + y = chunk[1 : block_size * batch_size + 1].view(batch_size, block_size) x, y = x.to(device), y.to(device) return x, y -model = GPT(vocab_size).to(device) +model = load_model(vocab_size).to(device) @torch.no_grad() @@ -69,9 +66,10 @@ def estimate_loss(): out = {} model.eval() for split in ["train", "val"]: + data = train_data if split == "train" else val_data losses = torch.zeros(eval_iters) for k in range(eval_iters): - X, Y = get_batch(split) + X, Y = get_batch(data, block_size, batch_size) logits, loss = model(X, Y) losses[k] = loss.item() out[split] = losses.mean().item() @@ -88,7 +86,7 @@ def train_model(): f"step {iter}: train loss {losses['train']:.3f}, " f"val loss {losses['val']:.3f}" ) - xb, yb = get_batch("train") + xb, yb = get_batch(train_data, block_size, batch_size) logits, loss = model(xb, yb) optimizer.zero_grad(set_to_none=True) loss.backward() @@ -107,12 +105,15 @@ def check_input_chars(s, string_to_int): def process_message(message): + print(f"Processing message: '{message}'") # Debug print if not message.strip(): + print("Message is empty or invalid.") # Debug print return "Message is empty or invalid." # Check for unknown characters unknown_chars = check_input_chars(message, string_to_int) if unknown_chars: + print(f"Message contains unknown characters: {unknown_chars}") return f"Message contains unknown characters: {unknown_chars}" encoded_text = torch.tensor( @@ -120,11 +121,14 @@ def process_message(message): ).to(device) print(f"Encoded text shape: {encoded_text.shape}") # Debug print if encoded_text.size(1) == 0: + print("Message could not be processed.") # Debug print return "Message could not be processed." response = model.generate(encoded_text, max_new_tokens=50) decoded_response = decode(response[0].tolist(), int_to_string) + print(f"Generated response: '{decoded_response}'") # Debug print return decoded_response -# train_model() +if __name__ == "__main__": + train_model()