From 567c2d5f848103d0f07d926f1174418e3cb6b423 Mon Sep 17 00:00:00 2001 From: Dan Date: Thu, 28 Nov 2024 23:55:41 -0500 Subject: [PATCH] First Commit: Merged two projects into one --- .github/workflows/discord_sync.yml | 15 ++ .gitignore | 164 +++++++++++++++++ main.py | 278 +++++++++++++++++++++++++++++ model.py | 118 ++++++++++++ 4 files changed, 575 insertions(+) create mode 100644 .github/workflows/discord_sync.yml create mode 100644 .gitignore create mode 100644 main.py create mode 100644 model.py diff --git a/.github/workflows/discord_sync.yml b/.github/workflows/discord_sync.yml new file mode 100644 index 0000000..6a92ee8 --- /dev/null +++ b/.github/workflows/discord_sync.yml @@ -0,0 +1,15 @@ +name: Discord Webhook + +on: [push] + +jobs: + git: + runs-on: ubuntu-latest + steps: + + - uses: actions/checkout@v2 + + - name: Run Discord Webhook + uses: johnnyhuy/actions-discord-git-webhook@main + with: + webhook_url: ${{ secrets.YOUR_DISCORD_WEBHOOK_URL }} diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..050a815 --- /dev/null +++ b/.gitignore @@ -0,0 +1,164 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/latest/usage/project/#working-with-version-control +.pdm.toml +.pdm-python +.pdm-build/ + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +#.idea/ +/client_secret.json +/token.json \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..253b4d5 --- /dev/null +++ b/main.py @@ -0,0 +1,278 @@ +import os +import sqlite3 +import time +import torch +import discord +from google.oauth2.credentials import Credentials +from google_auth_oauthlib.flow import InstalledAppFlow +from google.auth.transport.requests import Request +from googleapiclient.discovery import build +from datetime import datetime, timedelta, timezone +from model import JadeModel +from dotenv import load_dotenv +from collections import deque +import uuid as uuid_lib +import json + +# Constants +SCOPES = ['https://www.googleapis.com/auth/youtube.readonly'] +DATABASE_FILE = 'global_user_data.db' # Updated database file name +CHANNEL_HANDLE = 'UCsVJcf4KbO8Vz308EKpSYxw' +STREAM_KEYWORD = "Live" + +# Load environment variables +load_dotenv() + +intents = discord.Intents.default() +intents.messages = True +intents.message_content = True +client = discord.Client(intents=intents) + +# Initialize the model +device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +model = JadeModel().to(device) + +# Context management for conversation continuity +conversation_history = deque(maxlen=5) # Store the last 5 messages for context +training_data = [] # Store live messages for training purposes + +# Profile Manager +class ProfileManager: + def __init__(self): + self._create_profiles_table() + + def _create_profiles_table(self): + conn = sqlite3.connect(DATABASE_FILE) + cursor = conn.cursor() + cursor.execute(''' + CREATE TABLE IF NOT EXISTS global_profiles ( + uuid TEXT PRIMARY KEY, + discord_user_id TEXT UNIQUE, + youtube_channel_id TEXT UNIQUE, + points INTEGER DEFAULT 0, + last_interaction TIMESTAMP, + subscription_status TEXT, + first_seen_as_member TIMESTAMP, + has_opted_in INTEGER DEFAULT 0 + ) + ''') + conn.commit() + conn.close() + + def get_or_create_uuid(self, discord_id=None, youtube_id=None): + conn = sqlite3.connect(DATABASE_FILE) + cursor = conn.cursor() + uuid = None + + if discord_id: + cursor.execute("SELECT uuid FROM global_profiles WHERE discord_user_id = ?", (discord_id,)) + result = cursor.fetchone() + if result: + uuid = result[0] + + if not uuid and youtube_id: + cursor.execute("SELECT uuid FROM global_profiles WHERE youtube_channel_id = ?", (youtube_id,)) + result = cursor.fetchone() + if result: + uuid = result[0] + + if not uuid: + uuid = str(uuid_lib.uuid4()) + cursor.execute(''' + INSERT INTO global_profiles (uuid, discord_user_id, youtube_channel_id) + VALUES (?, ?, ?) + ''', (uuid, discord_id, youtube_id)) + conn.commit() + + conn.close() + return uuid + + def update_subscription_status(self, youtube_id, status): + conn = sqlite3.connect(DATABASE_FILE) + cursor = conn.cursor() + cursor.execute(''' + UPDATE global_profiles + SET subscription_status = ?, last_interaction = ? + WHERE youtube_channel_id = ? + ''', (status, datetime.utcnow(), youtube_id)) + conn.commit() + conn.close() + + def delete_user_data(self, uuid): + # Delete user data to comply with GDPR + conn = sqlite3.connect(DATABASE_FILE) + cursor = conn.cursor() + cursor.execute('SELECT * FROM global_profiles WHERE uuid = ?', (uuid,)) + user_data = cursor.fetchone() + if user_data: + with open(f'deleted_user_data_{uuid}.json', 'w') as f: + json.dump({ + 'uuid': user_data[0], + 'discord_user_id': user_data[1], + 'youtube_channel_id': user_data[2], + 'points': user_data[3], + 'last_interaction': user_data[4], + 'subscription_status': user_data[5], + 'first_seen_as_member': user_data[6], + 'has_opted_in': user_data[7] + }, f) + cursor.execute('DELETE FROM global_profiles WHERE uuid = ?', (uuid,)) + conn.commit() + conn.close() + + def has_opted_in(self, uuid): + conn = sqlite3.connect(DATABASE_FILE) + cursor = conn.cursor() + cursor.execute('SELECT has_opted_in FROM global_profiles WHERE uuid = ?', (uuid,)) + result = cursor.fetchone() + conn.close() + return result and result[0] == 1 + + def set_opt_in(self, uuid, opted_in=True): + conn = sqlite3.connect(DATABASE_FILE) + cursor = conn.cursor() + cursor.execute(''' + UPDATE global_profiles + SET has_opted_in = ? + WHERE uuid = ? + ''', (1 if opted_in else 0, uuid)) + conn.commit() + conn.close() + +profile_manager = ProfileManager() + +# YouTube API Functions +def get_authenticated_service(): + flow = InstalledAppFlow.from_client_secrets_file( + 'client_secret.json', SCOPES) + creds = flow.run_local_server(port=63355) + with open('token.json', 'w') as token: + token.write(creds.to_json()) + return build('youtube', 'v3', credentials=creds) + +def find_correct_live_video(youtube, channel_id, keyword): + request = youtube.search().list( + part="snippet", + channelId=channel_id, + eventType="live", + type="video" + ) + response = request.execute() + items = response.get('items', []) + for item in items: + title = item['snippet']['title'] + if keyword.lower() in title.lower(): + return item['id']['videoId'] + return None + +def get_live_chat_id(youtube, video_id): + request = youtube.videos().list( + part="liveStreamingDetails", + id=video_id + ) + response = request.execute() + items = response.get('items', []) + if items: + return items[0]['liveStreamingDetails'].get('activeLiveChatId') + return None + +def monitor_youtube_chat(youtube, live_chat_id): + if not live_chat_id: + print("No valid live chat ID found.") + return False + + next_page_token = None + while True: + try: + request = youtube.liveChatMessages().list( + liveChatId=live_chat_id, + part="snippet,authorDetails", + maxResults=200, + pageToken=next_page_token + ) + response = request.execute() + + if 'items' in response and response['items']: + for item in response['items']: + user_id = item['authorDetails']['channelId'] + display_name = item['authorDetails']['displayName'] + is_moderator = item['authorDetails']['isChatModerator'] + is_member = item['authorDetails']['isChatSponsor'] + message = item['snippet']['displayMessage'] + + uuid = profile_manager.get_or_create_uuid(youtube_id=user_id) + if is_member: + profile_manager.update_subscription_status(user_id, "subscribed") + else: + profile_manager.update_subscription_status(user_id, "none") + + print(f"[{datetime.utcnow()}] {display_name}: {message} (UUID: {uuid})") + + # Add live chat message to training data if the user has opted in + if profile_manager.has_opted_in(uuid): + training_data.append((display_name, message)) + + next_page_token = response.get('nextPageToken') + + else: + print("No new messages detected; continuing to poll...") + + except Exception as e: + print(f"Error while monitoring chat: {e}") + time.sleep(30) # Wait before retrying in case of an error + + time.sleep(10) # Adjust this delay as needed + +# Discord Event Handlers +@client.event +async def on_ready(): + print(f'We have logged in as {client.user}') + +@client.event +async def on_message(message): + if message.author == client.user: + return + + # Link the Discord user to the correct global profile UUID + uuid = profile_manager.get_or_create_uuid(discord_id=str(message.author.id)) + + # Ensure user has opted in before interacting + if not profile_manager.has_opted_in(uuid): + await message.channel.send("Please type '!optin' to confirm that you agree to data usage and interaction with this bot.") + return + + if message.content.lower() == '!optin': + profile_manager.set_opt_in(uuid, True) + await message.channel.send("You have successfully opted in to data usage.") + return + + # Add the message to conversation history for context + conversation_history.append(message.content) + + # Generate a response using Jade with context + context = "\n".join(conversation_history) + response = model.generate_response(context) + if response: + await message.channel.send(response) + + print(f"Discord Interaction: User {message.author} (UUID: {uuid})") + +# Main Function to Start Both Services +def main(): + youtube = get_authenticated_service() + channel_id = profile_manager.get_or_create_uuid(youtube_id=CHANNEL_HANDLE) + video_id = find_correct_live_video(youtube, channel_id, STREAM_KEYWORD) + if video_id: + live_chat_id = get_live_chat_id(youtube, video_id) + if live_chat_id: + print("Monitoring YouTube live chat...") + monitor_youtube_chat(youtube, live_chat_id) + else: + print("No live chat ID available.") + else: + print("Could not find the correct live stream or it is not live.") + + client.run(os.getenv('DISCORD_TOKEN')) + +if __name__ == "__main__": + main() diff --git a/model.py b/model.py new file mode 100644 index 0000000..221dc94 --- /dev/null +++ b/model.py @@ -0,0 +1,118 @@ +import torch +import torch.nn as nn +import torch.optim as optim +import os +from torch.cuda.amp import GradScaler, autocast + +class JadeModel(nn.Module): + def __init__(self, load_model_path=None): + super(JadeModel, self).__init__() + # GPT-like Transformer architecture + self.vocab_size = 512 # Character-level tokenization (ASCII range) + self.embedding_dim = 768 # GPT-like embedding dimension + self.num_heads = 12 # Number of attention heads + self.num_layers = 12 # Number of transformer layers + self.max_position_embeddings = 512 # Maximum sequence length + + # Embedding layers + self.embedding = nn.Embedding(self.vocab_size, self.embedding_dim) + self.position_embedding = nn.Embedding(self.max_position_embeddings, self.embedding_dim) + + # Transformer layers + self.transformer_layers = nn.ModuleList([ + nn.TransformerEncoderLayer(d_model=self.embedding_dim, nhead=self.num_heads) + for _ in range(self.num_layers) + ]) + + # Output layer + self.fc = nn.Linear(self.embedding_dim, self.vocab_size) + self.softmax = nn.Softmax(dim=-1) + + # Device setup + self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") + self.to(self.device) + + # Load model state if path is provided + if load_model_path and os.path.exists(load_model_path): + self.load_model(load_model_path) + print(f"Model loaded from {load_model_path}") + + def forward(self, input_ids): + # Truncate input_ids if longer than max_position_embeddings + if input_ids.size(1) > self.max_position_embeddings: + input_ids = input_ids[:, -self.max_position_embeddings:] + + # Create position ids for input sequence + seq_length = input_ids.size(1) + position_ids = torch.arange(0, seq_length, dtype=torch.long, device=self.device) + position_ids = position_ids.unsqueeze(0).expand_as(input_ids) + + # Embedding lookup + x = self.embedding(input_ids) + self.position_embedding(position_ids) + + # Pass through transformer layers + for layer in self.transformer_layers: + x = layer(x) + + # Output layer + x = self.fc(x) + return x + + def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2, max_length=50, min_response_length=5): + # Convert input_text to token ids + input_ids = self.tokenize(input_text) + if len(input_ids) > self.max_position_embeddings: + input_ids = input_ids[-self.max_position_embeddings:] # Truncate if too long + input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device) + generated_tokens = input_ids.copy() # Start with input tokens to use as context + temperature = initial_temperature + recent_tokens = list(input_ids[-10:]) # Expanded recent tokens window to 10 + + with torch.no_grad(), autocast(): + for _ in range(max_length): # Generate up to max_length more tokens + output = self.forward(input_tensor) + logits = output[:, -1, :] # Consider only the last token's logits + logits = logits / (temperature + 1e-2) # Apply temperature for sampling diversity + + # Apply repetition penalty + for token in set(generated_tokens): + if generated_tokens.count(token) > 1: + logits[0, token] /= (repetition_penalty + generated_tokens.count(token) * 0.02) # Frequency-based scaling for penalty + + # Dynamic Nucleus (top-p) sampling with adjusted threshold + sorted_logits, sorted_indices = torch.sort(logits, descending=True) + cumulative_probs = torch.cumsum(self.softmax(sorted_logits), dim=-1) + top_p_mask = cumulative_probs < top_p + top_p_logits = sorted_logits[top_p_mask] + top_p_indices = sorted_indices[top_p_mask] + + if len(top_p_logits) > 1: + top_p_probs = self.softmax(top_p_logits) + sampled_token = top_p_indices[torch.multinomial(top_p_probs, num_samples=1).item()].item() + else: + sampled_token = sorted_indices[0, 0].item() # Fallback to the most probable token if none pass the top-p threshold + + # Add token and update state + generated_tokens.append(sampled_token) + if len(recent_tokens) > 10: + recent_tokens.pop(0) # Maintain a window of recent tokens to suppress + + # Update input tensor to include the generated token + input_tensor = torch.tensor(generated_tokens).unsqueeze(0).to(self.device) + + # Gradually decrease temperature to reduce randomness more smoothly + temperature = max(0.75, temperature * 0.98) + + response = self.detokenize(generated_tokens[len(input_ids):]) # Exclude the input from the response + return response if len(response.strip()) > 0 else None + + def load_model(self, path): + self.load_state_dict(torch.load(path, map_location=self.device)) + + # Placeholder tokenization method (to be replaced with optimized tokenizer) + def tokenize(self, text): + return [ord(c) for c in text] + + # Placeholder detokenization method (to be replaced with optimized tokenizer) + def detokenize(self, tokens): + return ''.join([chr(t) for t in tokens])