Compare commits
No commits in common. "GPT" and "main" have entirely different histories.
20
dataset.py
20
dataset.py
@ -1,20 +0,0 @@
|
|||||||
import torch
|
|
||||||
from tokenizer import simple_tokenizer, load_vocab
|
|
||||||
|
|
||||||
def create_dataset():
|
|
||||||
vocab = load_vocab()
|
|
||||||
data = [
|
|
||||||
("a", "b"),
|
|
||||||
("ab", "c"),
|
|
||||||
("abc", "d"),
|
|
||||||
]
|
|
||||||
|
|
||||||
inputs = [torch.tensor(simple_tokenizer(src, vocab), dtype=torch.long) for src, tgt in data]
|
|
||||||
targets = [torch.tensor(simple_tokenizer(tgt, vocab), dtype=torch.long) for src, tgt in data]
|
|
||||||
|
|
||||||
return inputs, targets
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
inputs, targets = create_dataset()
|
|
||||||
print(inputs)
|
|
||||||
print(targets)
|
|
63
main.py
63
main.py
@ -1,35 +1,70 @@
|
|||||||
import discord
|
import discord
|
||||||
from model_manager import ModelManager
|
import requests
|
||||||
|
import json
|
||||||
import os
|
import os
|
||||||
from dotenv import load_dotenv
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
# Load environment variables
|
# Load environment variables from .env file
|
||||||
load_dotenv()
|
load_dotenv()
|
||||||
|
|
||||||
# Discord bot setup
|
# Replace with your bot token
|
||||||
|
BOT_TOKEN = os.getenv('DISCORD_TOKEN')
|
||||||
|
|
||||||
|
# Ollama configuration
|
||||||
|
OLLAMA_API_URL = 'http://192.168.1.159:11434/api/generate' # Adjust if your Ollama setup is different
|
||||||
|
|
||||||
|
# Set up the Discord client
|
||||||
intents = discord.Intents.default()
|
intents = discord.Intents.default()
|
||||||
intents.messages = True
|
intents.messages = True
|
||||||
intents.message_content = True
|
intents.message_content = True
|
||||||
|
|
||||||
client = discord.Client(intents=intents)
|
client = discord.Client(intents=intents)
|
||||||
|
|
||||||
# Initialize the ModelManager
|
|
||||||
USE_CUSTOM_MODEL = True
|
|
||||||
OLLAMA_URL = None # Set to your Ollama endpoint if needed
|
|
||||||
model_manager = ModelManager(use_custom_model=USE_CUSTOM_MODEL, ollama_url=OLLAMA_URL)
|
|
||||||
|
|
||||||
|
# Function to query Ollama
|
||||||
|
def query_ollama(prompt):
|
||||||
|
payload = {
|
||||||
|
"prompt": prompt,
|
||||||
|
"model": "nollama/mythomax-l2-13b:Q4_K_M" # Replace with your Ollama model
|
||||||
|
}
|
||||||
|
try:
|
||||||
|
response = requests.post(OLLAMA_API_URL, json=payload, stream=True)
|
||||||
|
if response.status_code == 200:
|
||||||
|
collected_response = ""
|
||||||
|
# Stream and parse each line of JSON from the response
|
||||||
|
for line in response.iter_lines(decode_unicode=True):
|
||||||
|
if line.strip(): # Skip empty lines
|
||||||
|
try:
|
||||||
|
data = json.loads(line) # Parse each line as JSON
|
||||||
|
collected_response += data.get("response", "")
|
||||||
|
if data.get("done", False):
|
||||||
|
break
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
print(f"Error decoding JSON line: {line}, Error: {e}")
|
||||||
|
return collected_response.strip() or "No response from model."
|
||||||
|
else:
|
||||||
|
return f"Error: {response.status_code} - {response.text}"
|
||||||
|
except requests.RequestException as e:
|
||||||
|
return f"Error connecting to Ollama: {str(e)}"
|
||||||
|
|
||||||
|
|
||||||
|
# Event for when the bot is ready
|
||||||
@client.event
|
@client.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
print(f"Logged in as {client.user}")
|
print(f'We have logged in as {client.user}')
|
||||||
|
|
||||||
|
|
||||||
|
# Event for when a message is sent
|
||||||
@client.event
|
@client.event
|
||||||
async def on_message(message):
|
async def on_message(message):
|
||||||
|
# Ignore the bot's own messages
|
||||||
if message.author == client.user:
|
if message.author == client.user:
|
||||||
return
|
return
|
||||||
|
|
||||||
# Generate response
|
# Respond to all messages except those in DMs
|
||||||
user_input = message.content
|
if not isinstance(message.channel, discord.DMChannel):
|
||||||
bot_response = model_manager.generate_response(user_input)
|
response = query_ollama(message.content.strip())
|
||||||
|
await message.channel.send(response)
|
||||||
|
|
||||||
await message.channel.send(bot_response)
|
# Run the bot
|
||||||
|
client.run(BOT_TOKEN)
|
||||||
client.run(os.getenv("DISCORD_TOKEN"))
|
|
||||||
|
@ -1,11 +0,0 @@
|
|||||||
from collections import deque
|
|
||||||
|
|
||||||
class MemoryBuffer:
|
|
||||||
def __init__(self, capacity=10):
|
|
||||||
self.buffer = deque(maxlen=capacity)
|
|
||||||
|
|
||||||
def add_interaction(self, user_input, bot_response):
|
|
||||||
self.buffer.append((user_input, bot_response))
|
|
||||||
|
|
||||||
def get_data(self):
|
|
||||||
return list(self.buffer)
|
|
25
model.py
25
model.py
@ -1,25 +0,0 @@
|
|||||||
import torch
|
|
||||||
import torch.nn as nn
|
|
||||||
|
|
||||||
class TinyGPT(nn.Module):
|
|
||||||
def __init__(self, vocab_size, embed_size, num_heads, num_layers):
|
|
||||||
super().__init__()
|
|
||||||
self.embedding = nn.Embedding(vocab_size, embed_size)
|
|
||||||
self.transformer = nn.Transformer(
|
|
||||||
d_model=embed_size,
|
|
||||||
nhead=num_heads,
|
|
||||||
num_encoder_layers=num_layers,
|
|
||||||
num_decoder_layers=num_layers,
|
|
||||||
batch_first=True # Ensures batch is the first dimension
|
|
||||||
)
|
|
||||||
self.fc = nn.Linear(embed_size, vocab_size)
|
|
||||||
|
|
||||||
def forward(self, src, tgt):
|
|
||||||
# Embed inputs
|
|
||||||
src_embed = self.embedding(src) # Shape: (batch_size, seq_len, embed_size)
|
|
||||||
tgt_embed = self.embedding(tgt) # Shape: (batch_size, seq_len, embed_size)
|
|
||||||
# Pass through transformer
|
|
||||||
transformer_out = self.transformer(src_embed, tgt_embed)
|
|
||||||
# Linear projection to vocabulary size
|
|
||||||
output = self.fc(transformer_out)
|
|
||||||
return output
|
|
@ -1,62 +0,0 @@
|
|||||||
import torch
|
|
||||||
from torch.optim import Adam
|
|
||||||
from torch.nn import CrossEntropyLoss
|
|
||||||
from memory_buffer import MemoryBuffer
|
|
||||||
from model import TinyGPT
|
|
||||||
from tokenizer import simple_tokenizer, detokenizer, load_vocab
|
|
||||||
|
|
||||||
class ModelManager:
|
|
||||||
def __init__(self, use_custom_model=True, ollama_url=None):
|
|
||||||
self.use_custom_model = use_custom_model
|
|
||||||
self.ollama_url = ollama_url
|
|
||||||
self.memory = MemoryBuffer(capacity=10) # Memory for 10 recent interactions
|
|
||||||
if self.use_custom_model:
|
|
||||||
self._load_custom_model()
|
|
||||||
|
|
||||||
def _load_custom_model(self):
|
|
||||||
"""Load the custom GPT model."""
|
|
||||||
self.vocab = load_vocab()
|
|
||||||
self.model = TinyGPT(vocab_size=len(self.vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
|
||||||
self.model.load_state_dict(torch.load("ruby_model.pth", weights_only=True))
|
|
||||||
self.model.eval()
|
|
||||||
self.optimizer = Adam(self.model.parameters(), lr=0.0001)
|
|
||||||
self.criterion = CrossEntropyLoss()
|
|
||||||
|
|
||||||
def query_custom_model(self, input_text):
|
|
||||||
"""Generate a response using the custom GPT model."""
|
|
||||||
tokens = torch.tensor(simple_tokenizer(input_text, self.vocab), dtype=torch.long).cuda()
|
|
||||||
with torch.no_grad():
|
|
||||||
output = self.model(tokens.unsqueeze(0), tokens.unsqueeze(0))
|
|
||||||
predicted_idx = output.argmax(-1).squeeze()[-1].item()
|
|
||||||
return detokenizer([predicted_idx], self.vocab)
|
|
||||||
|
|
||||||
def train_on_interaction(self, user_input, bot_response):
|
|
||||||
"""Train the model on a single interaction."""
|
|
||||||
self.model.train()
|
|
||||||
input_tokens = torch.tensor(simple_tokenizer(user_input, self.vocab), dtype=torch.long).cuda()
|
|
||||||
target_tokens = torch.tensor(simple_tokenizer(bot_response, self.vocab), dtype=torch.long).cuda()
|
|
||||||
|
|
||||||
# Padding to ensure equal lengths
|
|
||||||
max_len = max(len(input_tokens), len(target_tokens))
|
|
||||||
input_tokens = torch.cat([input_tokens, torch.zeros(max_len - len(input_tokens), dtype=torch.long).cuda()])
|
|
||||||
target_tokens = torch.cat([target_tokens, torch.zeros(max_len - len(target_tokens), dtype=torch.long).cuda()])
|
|
||||||
|
|
||||||
# Perform a single training step
|
|
||||||
self.optimizer.zero_grad()
|
|
||||||
output = self.model(input_tokens.unsqueeze(0), target_tokens.unsqueeze(0))
|
|
||||||
loss = self.criterion(output.view(-1, len(self.vocab)), target_tokens.view(-1))
|
|
||||||
loss.backward()
|
|
||||||
self.optimizer.step()
|
|
||||||
self.model.eval()
|
|
||||||
|
|
||||||
def generate_response(self, input_text):
|
|
||||||
"""Generate a response using the selected model."""
|
|
||||||
if self.use_custom_model:
|
|
||||||
bot_response = self.query_custom_model(input_text)
|
|
||||||
self.memory.add_interaction(input_text, bot_response)
|
|
||||||
self.train_on_interaction(input_text, bot_response)
|
|
||||||
return bot_response
|
|
||||||
elif self.ollama_url:
|
|
||||||
return self.query_ollama(input_text)
|
|
||||||
else:
|
|
||||||
raise ValueError("No valid model selected or configured.")
|
|
BIN
ruby_model.pth
BIN
ruby_model.pth
Binary file not shown.
23
test.py
23
test.py
@ -1,23 +0,0 @@
|
|||||||
import torch
|
|
||||||
from model import TinyGPT
|
|
||||||
from tokenizer import simple_tokenizer, detokenizer, load_vocab
|
|
||||||
|
|
||||||
def test_model():
|
|
||||||
vocab = load_vocab()
|
|
||||||
|
|
||||||
# Load model
|
|
||||||
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
|
||||||
model.load_state_dict(torch.load("ruby_model.pth", weights_only=True)) # Set weights_only=True
|
|
||||||
model.eval()
|
|
||||||
|
|
||||||
# Test input
|
|
||||||
test_input = torch.tensor(simple_tokenizer("abc", vocab), dtype=torch.long).cuda()
|
|
||||||
with torch.no_grad():
|
|
||||||
output = model(test_input.unsqueeze(0), test_input.unsqueeze(0))
|
|
||||||
predicted_idx = output.argmax(-1).squeeze()[-1].item()
|
|
||||||
|
|
||||||
predicted_char = detokenizer([predicted_idx], vocab)
|
|
||||||
print(f"Ruby says: {predicted_char}")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
test_model()
|
|
32
tokenizer.py
32
tokenizer.py
@ -1,32 +0,0 @@
|
|||||||
import json
|
|
||||||
|
|
||||||
# Save vocabulary
|
|
||||||
def save_vocab():
|
|
||||||
vocab = {char: idx for idx, char in enumerate("abcdefghijklmnopqrstuvwxyz ")}
|
|
||||||
vocab["<unk>"] = len(vocab) # Add unknown token
|
|
||||||
with open('vocab.json', 'w') as f:
|
|
||||||
json.dump(vocab, f)
|
|
||||||
|
|
||||||
|
|
||||||
# Load vocabulary
|
|
||||||
def load_vocab():
|
|
||||||
with open('vocab.json', 'r') as f:
|
|
||||||
return json.load(f)
|
|
||||||
|
|
||||||
# Tokenizer
|
|
||||||
def simple_tokenizer(text, vocab):
|
|
||||||
# Convert text to lowercase and replace unknown characters with <unk>
|
|
||||||
text = text.lower()
|
|
||||||
unk_token = vocab.get("<unk>", None)
|
|
||||||
return [vocab[char] if char in vocab else unk_token for char in text]
|
|
||||||
|
|
||||||
|
|
||||||
# Detokenizer
|
|
||||||
def detokenizer(tokens, vocab):
|
|
||||||
reverse_vocab = {idx: char for char, idx in vocab.items()}
|
|
||||||
return ''.join(reverse_vocab[token] for token in tokens)
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
save_vocab()
|
|
||||||
vocab = load_vocab()
|
|
||||||
print(simple_tokenizer("hello world", vocab))
|
|
46
train.py
46
train.py
@ -1,46 +0,0 @@
|
|||||||
import torch
|
|
||||||
from torch import nn
|
|
||||||
from torch.optim import Adam
|
|
||||||
from model import TinyGPT
|
|
||||||
from dataset import create_dataset
|
|
||||||
from tokenizer import load_vocab
|
|
||||||
|
|
||||||
def pad_sequence(seq, max_len):
|
|
||||||
"""Pads a sequence to the given maximum length."""
|
|
||||||
return torch.cat([seq, torch.zeros(max_len - len(seq), dtype=torch.long)], dim=0)
|
|
||||||
|
|
||||||
def train_model():
|
|
||||||
vocab = load_vocab()
|
|
||||||
inputs, targets = create_dataset()
|
|
||||||
|
|
||||||
# Determine the maximum sequence length for padding
|
|
||||||
max_len = max(len(seq) for seq in inputs + targets)
|
|
||||||
|
|
||||||
# Pad inputs and targets
|
|
||||||
inputs = [pad_sequence(seq, max_len) for seq in inputs]
|
|
||||||
targets = [pad_sequence(seq, max_len) for seq in targets]
|
|
||||||
|
|
||||||
# Convert to batch tensors
|
|
||||||
inputs = torch.stack(inputs).cuda()
|
|
||||||
targets = torch.stack(targets).cuda()
|
|
||||||
|
|
||||||
# Model setup
|
|
||||||
model = TinyGPT(vocab_size=len(vocab), embed_size=32, num_heads=2, num_layers=2).cuda()
|
|
||||||
criterion = nn.CrossEntropyLoss()
|
|
||||||
optimizer = Adam(model.parameters(), lr=0.001)
|
|
||||||
|
|
||||||
# Training loop
|
|
||||||
for epoch in range(100):
|
|
||||||
optimizer.zero_grad()
|
|
||||||
output = model(inputs, targets)
|
|
||||||
loss = criterion(output.view(-1, len(vocab)), targets.view(-1))
|
|
||||||
loss.backward()
|
|
||||||
optimizer.step()
|
|
||||||
print(f"Epoch {epoch + 1}, Loss: {loss.item()}")
|
|
||||||
|
|
||||||
# Save the model
|
|
||||||
torch.save(model.state_dict(), "ruby_model.pth")
|
|
||||||
print("Model saved as ruby_model.pth")
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
|
||||||
train_model()
|
|
@ -1 +0,0 @@
|
|||||||
{"a": 0, "b": 1, "c": 2, "d": 3, "e": 4, "f": 5, "g": 6, "h": 7, "i": 8, "j": 9, "k": 10, "l": 11, "m": 12, "n": 13, "o": 14, "p": 15, "q": 16, "r": 17, "s": 18, "t": 19, "u": 20, "v": 21, "w": 22, "x": 23, "y": 24, "z": 25, " ": 26, "<unk>": 27}
|
|
Loading…
x
Reference in New Issue
Block a user