Compare commits
No commits in common. "e0ea105872187954dcd3a7716d8e00ddf0bba78e" and "main" have entirely different histories.
e0ea105872
...
main
15
.github/workflows/discord_sync.yml
vendored
Normal file
15
.github/workflows/discord_sync.yml
vendored
Normal file
@ -0,0 +1,15 @@
|
||||
name: Discord Webhook
|
||||
|
||||
on: [push]
|
||||
|
||||
jobs:
|
||||
git:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
|
||||
- uses: actions/checkout@v2
|
||||
|
||||
- name: Run Discord Webhook
|
||||
uses: johnnyhuy/actions-discord-git-webhook@main
|
||||
with:
|
||||
webhook_url: ${{ secrets.YOUR_DISCORD_WEBHOOK_URL }}
|
8
.gitignore
vendored
8
.gitignore
vendored
@ -1,4 +1,3 @@
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
@ -107,8 +106,10 @@ ipython_config.py
|
||||
#pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/#use-with-ide
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
@ -159,4 +160,5 @@ cython_debug/
|
||||
# and can be added to the global gitignore or merged into this file. For a more nuclear
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
#.idea/
|
||||
|
||||
/client_secret.json
|
||||
/token.json
|
15
.vscode/launch.json
vendored
15
.vscode/launch.json
vendored
@ -1,15 +0,0 @@
|
||||
{
|
||||
// Use IntelliSense to learn about possible attributes.
|
||||
// Hover to view descriptions of existing attributes.
|
||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||
"version": "0.2.0",
|
||||
"configurations": [
|
||||
{
|
||||
"name": "Jade",
|
||||
"type": "debugpy",
|
||||
"request": "launch",
|
||||
"program": "E:\\Development\\AI Development\\Jade\\main.py",
|
||||
"console": "integratedTerminal"
|
||||
}
|
||||
]
|
||||
}
|
9
LICENSE
9
LICENSE
@ -1,9 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) <year> <copyright holders>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
262
main.py
262
main.py
@ -1,10 +1,24 @@
|
||||
# main.py: Discord Bot Code
|
||||
|
||||
import discord
|
||||
import torch
|
||||
from model import JadeModel
|
||||
import os
|
||||
import sqlite3
|
||||
import time
|
||||
import torch
|
||||
import discord
|
||||
from google.oauth2.credentials import Credentials
|
||||
from google_auth_oauthlib.flow import InstalledAppFlow
|
||||
from google.auth.transport.requests import Request
|
||||
from googleapiclient.discovery import build
|
||||
from datetime import datetime, timedelta, timezone
|
||||
from model import JadeModel
|
||||
from dotenv import load_dotenv
|
||||
from collections import deque
|
||||
import uuid as uuid_lib
|
||||
import json
|
||||
|
||||
# Constants
|
||||
SCOPES = ['https://www.googleapis.com/auth/youtube.readonly']
|
||||
DATABASE_FILE = 'global_user_data.db' # Updated database file name
|
||||
CHANNEL_HANDLE = 'UCsVJcf4KbO8Vz308EKpSYxw'
|
||||
STREAM_KEYWORD = "Live"
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
@ -18,23 +32,247 @@ client = discord.Client(intents=intents)
|
||||
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
model = JadeModel().to(device)
|
||||
|
||||
# Context management for conversation continuity
|
||||
conversation_history = deque(maxlen=5) # Store the last 5 messages for context
|
||||
training_data = [] # Store live messages for training purposes
|
||||
|
||||
# Profile Manager
|
||||
class ProfileManager:
|
||||
def __init__(self):
|
||||
self._create_profiles_table()
|
||||
|
||||
def _create_profiles_table(self):
|
||||
conn = sqlite3.connect(DATABASE_FILE)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
CREATE TABLE IF NOT EXISTS global_profiles (
|
||||
uuid TEXT PRIMARY KEY,
|
||||
discord_user_id TEXT UNIQUE,
|
||||
youtube_channel_id TEXT UNIQUE,
|
||||
points INTEGER DEFAULT 0,
|
||||
last_interaction TIMESTAMP,
|
||||
subscription_status TEXT,
|
||||
first_seen_as_member TIMESTAMP,
|
||||
has_opted_in INTEGER DEFAULT 0
|
||||
)
|
||||
''')
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def get_or_create_uuid(self, discord_id=None, youtube_id=None):
|
||||
conn = sqlite3.connect(DATABASE_FILE)
|
||||
cursor = conn.cursor()
|
||||
uuid = None
|
||||
|
||||
if discord_id:
|
||||
cursor.execute("SELECT uuid FROM global_profiles WHERE discord_user_id = ?", (discord_id,))
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
uuid = result[0]
|
||||
|
||||
if not uuid and youtube_id:
|
||||
cursor.execute("SELECT uuid FROM global_profiles WHERE youtube_channel_id = ?", (youtube_id,))
|
||||
result = cursor.fetchone()
|
||||
if result:
|
||||
uuid = result[0]
|
||||
|
||||
if not uuid:
|
||||
uuid = str(uuid_lib.uuid4())
|
||||
cursor.execute('''
|
||||
INSERT INTO global_profiles (uuid, discord_user_id, youtube_channel_id)
|
||||
VALUES (?, ?, ?)
|
||||
''', (uuid, discord_id, youtube_id))
|
||||
conn.commit()
|
||||
|
||||
conn.close()
|
||||
return uuid
|
||||
|
||||
def update_subscription_status(self, youtube_id, status):
|
||||
conn = sqlite3.connect(DATABASE_FILE)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE global_profiles
|
||||
SET subscription_status = ?, last_interaction = ?
|
||||
WHERE youtube_channel_id = ?
|
||||
''', (status, datetime.utcnow(), youtube_id))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def delete_user_data(self, uuid):
|
||||
# Delete user data to comply with GDPR
|
||||
conn = sqlite3.connect(DATABASE_FILE)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT * FROM global_profiles WHERE uuid = ?', (uuid,))
|
||||
user_data = cursor.fetchone()
|
||||
if user_data:
|
||||
with open(f'deleted_user_data_{uuid}.json', 'w') as f:
|
||||
json.dump({
|
||||
'uuid': user_data[0],
|
||||
'discord_user_id': user_data[1],
|
||||
'youtube_channel_id': user_data[2],
|
||||
'points': user_data[3],
|
||||
'last_interaction': user_data[4],
|
||||
'subscription_status': user_data[5],
|
||||
'first_seen_as_member': user_data[6],
|
||||
'has_opted_in': user_data[7]
|
||||
}, f)
|
||||
cursor.execute('DELETE FROM global_profiles WHERE uuid = ?', (uuid,))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
def has_opted_in(self, uuid):
|
||||
conn = sqlite3.connect(DATABASE_FILE)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('SELECT has_opted_in FROM global_profiles WHERE uuid = ?', (uuid,))
|
||||
result = cursor.fetchone()
|
||||
conn.close()
|
||||
return result and result[0] == 1
|
||||
|
||||
def set_opt_in(self, uuid, opted_in=True):
|
||||
conn = sqlite3.connect(DATABASE_FILE)
|
||||
cursor = conn.cursor()
|
||||
cursor.execute('''
|
||||
UPDATE global_profiles
|
||||
SET has_opted_in = ?
|
||||
WHERE uuid = ?
|
||||
''', (1 if opted_in else 0, uuid))
|
||||
conn.commit()
|
||||
conn.close()
|
||||
|
||||
profile_manager = ProfileManager()
|
||||
|
||||
# YouTube API Functions
|
||||
def get_authenticated_service():
|
||||
flow = InstalledAppFlow.from_client_secrets_file(
|
||||
'client_secret.json', SCOPES)
|
||||
creds = flow.run_local_server(port=63355)
|
||||
with open('token.json', 'w') as token:
|
||||
token.write(creds.to_json())
|
||||
return build('youtube', 'v3', credentials=creds)
|
||||
|
||||
def find_correct_live_video(youtube, channel_id, keyword):
|
||||
request = youtube.search().list(
|
||||
part="snippet",
|
||||
channelId=channel_id,
|
||||
eventType="live",
|
||||
type="video"
|
||||
)
|
||||
response = request.execute()
|
||||
items = response.get('items', [])
|
||||
for item in items:
|
||||
title = item['snippet']['title']
|
||||
if keyword.lower() in title.lower():
|
||||
return item['id']['videoId']
|
||||
return None
|
||||
|
||||
def get_live_chat_id(youtube, video_id):
|
||||
request = youtube.videos().list(
|
||||
part="liveStreamingDetails",
|
||||
id=video_id
|
||||
)
|
||||
response = request.execute()
|
||||
items = response.get('items', [])
|
||||
if items:
|
||||
return items[0]['liveStreamingDetails'].get('activeLiveChatId')
|
||||
return None
|
||||
|
||||
def monitor_youtube_chat(youtube, live_chat_id):
|
||||
if not live_chat_id:
|
||||
print("No valid live chat ID found.")
|
||||
return False
|
||||
|
||||
next_page_token = None
|
||||
while True:
|
||||
try:
|
||||
request = youtube.liveChatMessages().list(
|
||||
liveChatId=live_chat_id,
|
||||
part="snippet,authorDetails",
|
||||
maxResults=200,
|
||||
pageToken=next_page_token
|
||||
)
|
||||
response = request.execute()
|
||||
|
||||
if 'items' in response and response['items']:
|
||||
for item in response['items']:
|
||||
user_id = item['authorDetails']['channelId']
|
||||
display_name = item['authorDetails']['displayName']
|
||||
is_moderator = item['authorDetails']['isChatModerator']
|
||||
is_member = item['authorDetails']['isChatSponsor']
|
||||
message = item['snippet']['displayMessage']
|
||||
|
||||
uuid = profile_manager.get_or_create_uuid(youtube_id=user_id)
|
||||
if is_member:
|
||||
profile_manager.update_subscription_status(user_id, "subscribed")
|
||||
else:
|
||||
profile_manager.update_subscription_status(user_id, "none")
|
||||
|
||||
print(f"[{datetime.utcnow()}] {display_name}: {message} (UUID: {uuid})")
|
||||
|
||||
# Add live chat message to training data if the user has opted in
|
||||
if profile_manager.has_opted_in(uuid):
|
||||
training_data.append((display_name, message))
|
||||
|
||||
next_page_token = response.get('nextPageToken')
|
||||
|
||||
else:
|
||||
print("No new messages detected; continuing to poll...")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Error while monitoring chat: {e}")
|
||||
time.sleep(30) # Wait before retrying in case of an error
|
||||
|
||||
time.sleep(10) # Adjust this delay as needed
|
||||
|
||||
# Discord Event Handlers
|
||||
@client.event
|
||||
async def on_ready():
|
||||
print(f'We have logged in as {client.user}')
|
||||
|
||||
|
||||
@client.event
|
||||
async def on_message(message):
|
||||
if message.author == client.user:
|
||||
return
|
||||
|
||||
# Train Jade with the new message
|
||||
model.train_on_message(message.content)
|
||||
# Link the Discord user to the correct global profile UUID
|
||||
uuid = profile_manager.get_or_create_uuid(discord_id=str(message.author.id))
|
||||
|
||||
# Generate a response using Jade
|
||||
response = model.generate_response(message.content)
|
||||
# Ensure user has opted in before interacting
|
||||
if not profile_manager.has_opted_in(uuid):
|
||||
await message.channel.send("Please type '!optin' to confirm that you agree to data usage and interaction with this bot.")
|
||||
return
|
||||
|
||||
if message.content.lower() == '!optin':
|
||||
profile_manager.set_opt_in(uuid, True)
|
||||
await message.channel.send("You have successfully opted in to data usage.")
|
||||
return
|
||||
|
||||
# Add the message to conversation history for context
|
||||
conversation_history.append(message.content)
|
||||
|
||||
# Generate a response using Jade with context
|
||||
context = "\n".join(conversation_history)
|
||||
response = model.generate_response(context)
|
||||
if response:
|
||||
await message.channel.send(response)
|
||||
|
||||
# Start the bot with your token
|
||||
client.run(os.getenv('DISCORD_TOKEN'))
|
||||
print(f"Discord Interaction: User {message.author} (UUID: {uuid})")
|
||||
|
||||
# Main Function to Start Both Services
|
||||
def main():
|
||||
youtube = get_authenticated_service()
|
||||
channel_id = profile_manager.get_or_create_uuid(youtube_id=CHANNEL_HANDLE)
|
||||
video_id = find_correct_live_video(youtube, channel_id, STREAM_KEYWORD)
|
||||
if video_id:
|
||||
live_chat_id = get_live_chat_id(youtube, video_id)
|
||||
if live_chat_id:
|
||||
print("Monitoring YouTube live chat...")
|
||||
monitor_youtube_chat(youtube, live_chat_id)
|
||||
else:
|
||||
print("No live chat ID available.")
|
||||
else:
|
||||
print("Could not find the correct live stream or it is not live.")
|
||||
|
||||
client.run(os.getenv('DISCORD_TOKEN'))
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
|
98
model.py
98
model.py
@ -1,17 +1,14 @@
|
||||
# Suggested Refinements for Jade (Model.py)
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
import torch.optim as optim
|
||||
import random
|
||||
import string
|
||||
import numpy as np
|
||||
import os
|
||||
from torch.cuda.amp import GradScaler, autocast
|
||||
|
||||
class JadeModel(nn.Module):
|
||||
def __init__(self):
|
||||
def __init__(self, load_model_path=None):
|
||||
super(JadeModel, self).__init__()
|
||||
# GPT-like Transformer architecture
|
||||
self.vocab_size = 256 # Character-level tokenization (ASCII range)
|
||||
self.vocab_size = 512 # Character-level tokenization (ASCII range)
|
||||
self.embedding_dim = 768 # GPT-like embedding dimension
|
||||
self.num_heads = 12 # Number of attention heads
|
||||
self.num_layers = 12 # Number of transformer layers
|
||||
@ -31,19 +28,20 @@ class JadeModel(nn.Module):
|
||||
self.fc = nn.Linear(self.embedding_dim, self.vocab_size)
|
||||
self.softmax = nn.Softmax(dim=-1)
|
||||
|
||||
# Optimizer and loss function
|
||||
self.optimizer = optim.Adam(self.parameters(), lr=0.001)
|
||||
self.criterion = nn.CrossEntropyLoss()
|
||||
|
||||
# Device setup
|
||||
self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
|
||||
self.to(self.device)
|
||||
|
||||
# Debug message to verify changes (updated unique message for each change)
|
||||
self.debug_message = "[DEBUG] Model initialized with version: Jade-Solstice-Horizon"
|
||||
print(self.debug_message)
|
||||
# Load model state if path is provided
|
||||
if load_model_path and os.path.exists(load_model_path):
|
||||
self.load_model(load_model_path)
|
||||
print(f"Model loaded from {load_model_path}")
|
||||
|
||||
def forward(self, input_ids):
|
||||
# Truncate input_ids if longer than max_position_embeddings
|
||||
if input_ids.size(1) > self.max_position_embeddings:
|
||||
input_ids = input_ids[:, -self.max_position_embeddings:]
|
||||
|
||||
# Create position ids for input sequence
|
||||
seq_length = input_ids.size(1)
|
||||
position_ids = torch.arange(0, seq_length, dtype=torch.long, device=self.device)
|
||||
@ -60,16 +58,18 @@ class JadeModel(nn.Module):
|
||||
x = self.fc(x)
|
||||
return x
|
||||
|
||||
def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2):
|
||||
def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2, max_length=50, min_response_length=5):
|
||||
# Convert input_text to token ids
|
||||
input_ids = self.tokenize(input_text)
|
||||
if len(input_ids) > self.max_position_embeddings:
|
||||
input_ids = input_ids[-self.max_position_embeddings:] # Truncate if too long
|
||||
input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
|
||||
generated_tokens = input_ids.copy()
|
||||
recent_tokens = list(input_ids[-10:]) # Expanded recent tokens window to 10
|
||||
generated_tokens = input_ids.copy() # Start with input tokens to use as context
|
||||
temperature = initial_temperature
|
||||
recent_tokens = list(input_ids[-10:]) # Expanded recent tokens window to 10
|
||||
|
||||
with torch.no_grad():
|
||||
for i in range(50): # Generate up to 50 more tokens
|
||||
with torch.no_grad(), autocast():
|
||||
for _ in range(max_length): # Generate up to max_length more tokens
|
||||
output = self.forward(input_tensor)
|
||||
logits = output[:, -1, :] # Consider only the last token's logits
|
||||
logits = logits / (temperature + 1e-2) # Apply temperature for sampling diversity
|
||||
@ -79,9 +79,6 @@ class JadeModel(nn.Module):
|
||||
if generated_tokens.count(token) > 1:
|
||||
logits[0, token] /= (repetition_penalty + generated_tokens.count(token) * 0.02) # Frequency-based scaling for penalty
|
||||
|
||||
# Apply slight logits smoothing to avoid overly confident peaks
|
||||
logits = logits - torch.mean(logits) * 0.01
|
||||
|
||||
# Dynamic Nucleus (top-p) sampling with adjusted threshold
|
||||
sorted_logits, sorted_indices = torch.sort(logits, descending=True)
|
||||
cumulative_probs = torch.cumsum(self.softmax(sorted_logits), dim=-1)
|
||||
@ -95,18 +92,8 @@ class JadeModel(nn.Module):
|
||||
else:
|
||||
sampled_token = sorted_indices[0, 0].item() # Fallback to the most probable token if none pass the top-p threshold
|
||||
|
||||
# Enforce diversity constraint by limiting token frequency
|
||||
if generated_tokens.count(sampled_token) >= max_token_frequency:
|
||||
logits[0, sampled_token] -= 1.5 # Adjusted penalty to limit token frequency
|
||||
continue # Skip adding this token if it has reached the max frequency
|
||||
|
||||
# Stop repetition if the sampled token was recently repeated
|
||||
if len(generated_tokens) > 1 and generated_tokens[-1] == sampled_token:
|
||||
continue
|
||||
|
||||
# Add token and update state
|
||||
generated_tokens.append(sampled_token)
|
||||
recent_tokens.append(sampled_token)
|
||||
if len(recent_tokens) > 10:
|
||||
recent_tokens.pop(0) # Maintain a window of recent tokens to suppress
|
||||
|
||||
@ -116,43 +103,16 @@ class JadeModel(nn.Module):
|
||||
# Gradually decrease temperature to reduce randomness more smoothly
|
||||
temperature = max(0.75, temperature * 0.98)
|
||||
|
||||
response = self.detokenize(generated_tokens)
|
||||
print("[DEBUG] Generated response:", response) # Debug statement to verify changes
|
||||
print(f"[DEBUG] Generation loss rate (approximated): {temperature}") # Approximate loss rate
|
||||
return response
|
||||
response = self.detokenize(generated_tokens[len(input_ids):]) # Exclude the input from the response
|
||||
return response if len(response.strip()) > 0 else None
|
||||
|
||||
def load_model(self, path):
|
||||
self.load_state_dict(torch.load(path, map_location=self.device))
|
||||
|
||||
# Placeholder tokenization method (to be replaced with optimized tokenizer)
|
||||
def tokenize(self, text):
|
||||
# Character-level tokenizer: converts text to ASCII values
|
||||
token_ids = [ord(char) for char in text if ord(char) < self.vocab_size]
|
||||
return token_ids
|
||||
return [ord(c) for c in text]
|
||||
|
||||
def detokenize(self, token_ids):
|
||||
# Detokenizer to convert ASCII values back to characters
|
||||
return "".join([chr(id) for id in token_ids])
|
||||
|
||||
def train_on_message(self, message):
|
||||
# Tokenize the message
|
||||
input_ids = self.tokenize(message)
|
||||
input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
|
||||
|
||||
# Create target labels (next character prediction task)
|
||||
labels = input_ids[1:] + [input_ids[-1]] # Shift tokens for training
|
||||
labels_tensor = torch.tensor(labels).unsqueeze(0).to(self.device)
|
||||
|
||||
# Training step
|
||||
self.optimizer.zero_grad()
|
||||
outputs = self.forward(input_tensor)
|
||||
loss = self.criterion(outputs.view(-1, outputs.size(-1)), labels_tensor.view(-1))
|
||||
loss.backward()
|
||||
self.optimizer.step()
|
||||
print(f"Training loss: {loss.item()}")
|
||||
|
||||
# Changes made:
|
||||
# Version: Jade-Solstice-Horizon
|
||||
# - Reverted temperature, top_p, and repetition penalty settings to be closer to Solstice.
|
||||
# - Introduced explicit stop criteria to prevent repeating tokens consecutively.
|
||||
# - Applied slight smoothing to logits to prevent high peaks and excessive repetition.
|
||||
# - Updated debug message to reflect the new version.
|
||||
|
||||
# Observations:
|
||||
# - Aimed to retain the strengths of Solstice while reducing remaining issues with repetitive tokens by adding specific repetition stop criteria.
|
||||
# Placeholder detokenization method (to be replaced with optimized tokenizer)
|
||||
def detokenize(self, tokens):
|
||||
return ''.join([chr(t) for t in tokens])
|
||||
|
Loading…
x
Reference in New Issue
Block a user