7 changed files with 301 additions and 113 deletions
--- a/.github/workflows/discord_sync.yml
+++ b/.github/workflows/discord_sync.yml
@ -0,0 +1,15 @@
+name: Discord Webhook
+
+on: [push]
+
+jobs:
+  git:
+    runs-on: ubuntu-latest
+    steps:
+
+    - uses: actions/checkout@v2
+
+    - name: Run Discord Webhook
+      uses: johnnyhuy/actions-discord-git-webhook@main 
+      with:
+        webhook_url: ${{ secrets.YOUR_DISCORD_WEBHOOK_URL }}
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,3 @@
-# ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@ -107,8 +106,10 @@ ipython_config.py
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
-#   https://pdm.fming.dev/#use-with-ide
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
+.pdm-python
+.pdm-build/

 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
@ -159,4 +160,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-
+/client_secret.json
+/token.json
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,15 +0,0 @@
-{
-    // Use IntelliSense to learn about possible attributes.
-    // Hover to view descriptions of existing attributes.
-    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
-    "version": "0.2.0",
-    "configurations": [
-        {
-            "name": "Jade",
-            "type": "debugpy",
-            "request": "launch",
-            "program": "E:\\Development\\AI Development\\Jade\\main.py",
-            "console": "integratedTerminal"
-        }
-    ]
-}
--- a/9
+++ b/9
@ -1,9 +0,0 @@
-MIT License
-
-Copyright (c) <year> <copyright holders>
-
-Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
-
-The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
-
-THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -1,3 +0,0 @@
-# Jade
-
-Jade is an active learning AI project
--- a/main.py
+++ b/main.py
@ -1,10 +1,24 @@
-# main.py: Discord Bot Code
-
-import discord
-import torch
-from model import JadeModel
 import os
+import sqlite3
+import time
+import torch
+import discord
+from google.oauth2.credentials import Credentials
+from google_auth_oauthlib.flow import InstalledAppFlow
+from google.auth.transport.requests import Request
+from googleapiclient.discovery import build
+from datetime import datetime, timedelta, timezone
+from model import JadeModel
 from dotenv import load_dotenv
+from collections import deque
+import uuid as uuid_lib
+import json
+
+# Constants
+SCOPES = ['https://www.googleapis.com/auth/youtube.readonly']
+DATABASE_FILE = 'global_user_data.db'  # Updated database file name
+CHANNEL_HANDLE = 'UCsVJcf4KbO8Vz308EKpSYxw'
+STREAM_KEYWORD = "Live"

 # Load environment variables
 load_dotenv()
@ -18,23 +32,247 @@ client = discord.Client(intents=intents)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = JadeModel().to(device)

+# Context management for conversation continuity
+conversation_history = deque(maxlen=5)  # Store the last 5 messages for context
+training_data = []  # Store live messages for training purposes

+# Profile Manager
+class ProfileManager:
+    def __init__(self):
+        self._create_profiles_table()
+
+    def _create_profiles_table(self):
+        conn = sqlite3.connect(DATABASE_FILE)
+        cursor = conn.cursor()
+        cursor.execute('''
+            CREATE TABLE IF NOT EXISTS global_profiles (
+                uuid TEXT PRIMARY KEY,
+                discord_user_id TEXT UNIQUE,
+                youtube_channel_id TEXT UNIQUE,
+                points INTEGER DEFAULT 0,
+                last_interaction TIMESTAMP,
+                subscription_status TEXT,
+                first_seen_as_member TIMESTAMP,
+                has_opted_in INTEGER DEFAULT 0
+            )
+        ''')
+        conn.commit()
+        conn.close()
+
+    def get_or_create_uuid(self, discord_id=None, youtube_id=None):
+        conn = sqlite3.connect(DATABASE_FILE)
+        cursor = conn.cursor()
+        uuid = None
+
+        if discord_id:
+            cursor.execute("SELECT uuid FROM global_profiles WHERE discord_user_id = ?", (discord_id,))
+            result = cursor.fetchone()
+            if result:
+                uuid = result[0]
+
+        if not uuid and youtube_id:
+            cursor.execute("SELECT uuid FROM global_profiles WHERE youtube_channel_id = ?", (youtube_id,))
+            result = cursor.fetchone()
+            if result:
+                uuid = result[0]
+
+        if not uuid:
+            uuid = str(uuid_lib.uuid4())
+            cursor.execute('''
+                INSERT INTO global_profiles (uuid, discord_user_id, youtube_channel_id)
+                VALUES (?, ?, ?)
+            ''', (uuid, discord_id, youtube_id))
+            conn.commit()
+
+        conn.close()
+        return uuid
+
+    def update_subscription_status(self, youtube_id, status):
+        conn = sqlite3.connect(DATABASE_FILE)
+        cursor = conn.cursor()
+        cursor.execute('''
+            UPDATE global_profiles
+            SET subscription_status = ?, last_interaction = ?
+            WHERE youtube_channel_id = ?
+        ''', (status, datetime.utcnow(), youtube_id))
+        conn.commit()
+        conn.close()
+
+    def delete_user_data(self, uuid):
+        # Delete user data to comply with GDPR
+        conn = sqlite3.connect(DATABASE_FILE)
+        cursor = conn.cursor()
+        cursor.execute('SELECT * FROM global_profiles WHERE uuid = ?', (uuid,))
+        user_data = cursor.fetchone()
+        if user_data:
+            with open(f'deleted_user_data_{uuid}.json', 'w') as f:
+                json.dump({
+                    'uuid': user_data[0],
+                    'discord_user_id': user_data[1],
+                    'youtube_channel_id': user_data[2],
+                    'points': user_data[3],
+                    'last_interaction': user_data[4],
+                    'subscription_status': user_data[5],
+                    'first_seen_as_member': user_data[6],
+                    'has_opted_in': user_data[7]
+                }, f)
+        cursor.execute('DELETE FROM global_profiles WHERE uuid = ?', (uuid,))
+        conn.commit()
+        conn.close()
+
+    def has_opted_in(self, uuid):
+        conn = sqlite3.connect(DATABASE_FILE)
+        cursor = conn.cursor()
+        cursor.execute('SELECT has_opted_in FROM global_profiles WHERE uuid = ?', (uuid,))
+        result = cursor.fetchone()
+        conn.close()
+        return result and result[0] == 1
+
+    def set_opt_in(self, uuid, opted_in=True):
+        conn = sqlite3.connect(DATABASE_FILE)
+        cursor = conn.cursor()
+        cursor.execute('''
+            UPDATE global_profiles
+            SET has_opted_in = ?
+            WHERE uuid = ?
+        ''', (1 if opted_in else 0, uuid))
+        conn.commit()
+        conn.close()
+
+profile_manager = ProfileManager()
+
+# YouTube API Functions
+def get_authenticated_service():
+    flow = InstalledAppFlow.from_client_secrets_file(
+        'client_secret.json', SCOPES)
+    creds = flow.run_local_server(port=63355)
+    with open('token.json', 'w') as token:
+        token.write(creds.to_json())
+    return build('youtube', 'v3', credentials=creds)
+
+def find_correct_live_video(youtube, channel_id, keyword):
+    request = youtube.search().list(
+        part="snippet",
+        channelId=channel_id,
+        eventType="live",
+        type="video"
+    )
+    response = request.execute()
+    items = response.get('items', [])
+    for item in items:
+        title = item['snippet']['title']
+        if keyword.lower() in title.lower():
+            return item['id']['videoId']
+    return None
+
+def get_live_chat_id(youtube, video_id):
+    request = youtube.videos().list(
+        part="liveStreamingDetails",
+        id=video_id
+    )
+    response = request.execute()
+    items = response.get('items', [])
+    if items:
+        return items[0]['liveStreamingDetails'].get('activeLiveChatId')
+    return None
+
+def monitor_youtube_chat(youtube, live_chat_id):
+    if not live_chat_id:
+        print("No valid live chat ID found.")
+        return False
+
+    next_page_token = None
+    while True:
+        try:
+            request = youtube.liveChatMessages().list(
+                liveChatId=live_chat_id,
+                part="snippet,authorDetails",
+                maxResults=200,
+                pageToken=next_page_token
+            )
+            response = request.execute()
+
+            if 'items' in response and response['items']:
+                for item in response['items']:
+                    user_id = item['authorDetails']['channelId']
+                    display_name = item['authorDetails']['displayName']
+                    is_moderator = item['authorDetails']['isChatModerator']
+                    is_member = item['authorDetails']['isChatSponsor']
+                    message = item['snippet']['displayMessage']
+
+                    uuid = profile_manager.get_or_create_uuid(youtube_id=user_id)
+                    if is_member:
+                        profile_manager.update_subscription_status(user_id, "subscribed")
+                    else:
+                        profile_manager.update_subscription_status(user_id, "none")
+
+                    print(f"[{datetime.utcnow()}] {display_name}: {message} (UUID: {uuid})")
+
+                    # Add live chat message to training data if the user has opted in
+                    if profile_manager.has_opted_in(uuid):
+                        training_data.append((display_name, message))
+
+                next_page_token = response.get('nextPageToken')
+
+            else:
+                print("No new messages detected; continuing to poll...")
+
+        except Exception as e:
+            print(f"Error while monitoring chat: {e}")
+            time.sleep(30)  # Wait before retrying in case of an error
+
+        time.sleep(10)  # Adjust this delay as needed
+
+# Discord Event Handlers
@client.event
 async def on_ready():
    print(f'We have logged in as {client.user}')

-
@client.event
 async def on_message(message):
    if message.author == client.user:
        return

-    # Train Jade with the new message
-    model.train_on_message(message.content)
+    # Link the Discord user to the correct global profile UUID
+    uuid = profile_manager.get_or_create_uuid(discord_id=str(message.author.id))

-    # Generate a response using Jade
-    response = model.generate_response(message.content)
-    await message.channel.send(response)
+    # Ensure user has opted in before interacting
+    if not profile_manager.has_opted_in(uuid):
+        await message.channel.send("Please type '!optin' to confirm that you agree to data usage and interaction with this bot.")
+        return

-# Start the bot with your token
-client.run(os.getenv('DISCORD_TOKEN'))
+    if message.content.lower() == '!optin':
+        profile_manager.set_opt_in(uuid, True)
+        await message.channel.send("You have successfully opted in to data usage.")
+        return
+
+    # Add the message to conversation history for context
+    conversation_history.append(message.content)
+
+    # Generate a response using Jade with context
+    context = "\n".join(conversation_history)
+    response = model.generate_response(context)
+    if response:
+        await message.channel.send(response)
+
+    print(f"Discord Interaction: User {message.author} (UUID: {uuid})")
+
+# Main Function to Start Both Services
+def main():
+    youtube = get_authenticated_service()
+    channel_id = profile_manager.get_or_create_uuid(youtube_id=CHANNEL_HANDLE)
+    video_id = find_correct_live_video(youtube, channel_id, STREAM_KEYWORD)
+    if video_id:
+        live_chat_id = get_live_chat_id(youtube, video_id)
+        if live_chat_id:
+            print("Monitoring YouTube live chat...")
+            monitor_youtube_chat(youtube, live_chat_id)
+        else:
+            print("No live chat ID available.")
+    else:
+        print("Could not find the correct live stream or it is not live.")
+
+    client.run(os.getenv('DISCORD_TOKEN'))
+
+if __name__ == "__main__":
+    main()
--- a/model.py
+++ b/model.py
@ -1,17 +1,14 @@
-# Suggested Refinements for Jade (Model.py)
-
 import torch
 import torch.nn as nn
 import torch.optim as optim
-import random
-import string
-import numpy as np
+import os
+from torch.cuda.amp import GradScaler, autocast

 class JadeModel(nn.Module):
-    def __init__(self):
+    def __init__(self, load_model_path=None):
        super(JadeModel, self).__init__()
        # GPT-like Transformer architecture
-        self.vocab_size = 256  # Character-level tokenization (ASCII range)
+        self.vocab_size = 512  # Character-level tokenization (ASCII range)
        self.embedding_dim = 768  # GPT-like embedding dimension
        self.num_heads = 12  # Number of attention heads
        self.num_layers = 12  # Number of transformer layers
@ -31,19 +28,20 @@ class JadeModel(nn.Module):
        self.fc = nn.Linear(self.embedding_dim, self.vocab_size)
        self.softmax = nn.Softmax(dim=-1)
        
-        # Optimizer and loss function
-        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
-        self.criterion = nn.CrossEntropyLoss()
-
        # Device setup
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)

-        # Debug message to verify changes (updated unique message for each change)
-        self.debug_message = "[DEBUG] Model initialized with version: Jade-Solstice-Horizon"
-        print(self.debug_message)
+        # Load model state if path is provided
+        if load_model_path and os.path.exists(load_model_path):
+            self.load_model(load_model_path)
+            print(f"Model loaded from {load_model_path}")

    def forward(self, input_ids):
+        # Truncate input_ids if longer than max_position_embeddings
+        if input_ids.size(1) > self.max_position_embeddings:
+            input_ids = input_ids[:, -self.max_position_embeddings:]
+        
        # Create position ids for input sequence
        seq_length = input_ids.size(1)
        position_ids = torch.arange(0, seq_length, dtype=torch.long, device=self.device)
@ -60,16 +58,18 @@ class JadeModel(nn.Module):
        x = self.fc(x)
        return x

-    def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2):
+    def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2, max_length=50, min_response_length=5):
        # Convert input_text to token ids
        input_ids = self.tokenize(input_text)
+        if len(input_ids) > self.max_position_embeddings:
+            input_ids = input_ids[-self.max_position_embeddings:]  # Truncate if too long
        input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
-        generated_tokens = input_ids.copy()
-        recent_tokens = list(input_ids[-10:])  # Expanded recent tokens window to 10
+        generated_tokens = input_ids.copy()  # Start with input tokens to use as context
        temperature = initial_temperature
+        recent_tokens = list(input_ids[-10:])  # Expanded recent tokens window to 10

-        with torch.no_grad():
-            for i in range(50):  # Generate up to 50 more tokens
+        with torch.no_grad(), autocast():
+            for _ in range(max_length):  # Generate up to max_length more tokens
                output = self.forward(input_tensor)
                logits = output[:, -1, :]  # Consider only the last token's logits
                logits = logits / (temperature + 1e-2)  # Apply temperature for sampling diversity
@ -79,9 +79,6 @@ class JadeModel(nn.Module):
                    if generated_tokens.count(token) > 1:
                        logits[0, token] /= (repetition_penalty + generated_tokens.count(token) * 0.02)  # Frequency-based scaling for penalty

-                # Apply slight logits smoothing to avoid overly confident peaks
-                logits = logits - torch.mean(logits) * 0.01
-
                # Dynamic Nucleus (top-p) sampling with adjusted threshold
                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cumulative_probs = torch.cumsum(self.softmax(sorted_logits), dim=-1)
@ -95,18 +92,8 @@ class JadeModel(nn.Module):
                else:
                    sampled_token = sorted_indices[0, 0].item()  # Fallback to the most probable token if none pass the top-p threshold
                
-                # Enforce diversity constraint by limiting token frequency
-                if generated_tokens.count(sampled_token) >= max_token_frequency:
-                    logits[0, sampled_token] -= 1.5  # Adjusted penalty to limit token frequency
-                    continue  # Skip adding this token if it has reached the max frequency
-                
-                # Stop repetition if the sampled token was recently repeated
-                if len(generated_tokens) > 1 and generated_tokens[-1] == sampled_token:
-                    continue
-                
                # Add token and update state
                generated_tokens.append(sampled_token)
-                recent_tokens.append(sampled_token)
                if len(recent_tokens) > 10:
                    recent_tokens.pop(0)  # Maintain a window of recent tokens to suppress
                
@ -116,43 +103,16 @@ class JadeModel(nn.Module):
                # Gradually decrease temperature to reduce randomness more smoothly
                temperature = max(0.75, temperature * 0.98)

-        response = self.detokenize(generated_tokens)
-        print("[DEBUG] Generated response:", response)  # Debug statement to verify changes
-        print(f"[DEBUG] Generation loss rate (approximated): {temperature}")  # Approximate loss rate
-        return response
+        response = self.detokenize(generated_tokens[len(input_ids):])  # Exclude the input from the response
+        return response if len(response.strip()) > 0 else None

+    def load_model(self, path):
+        self.load_state_dict(torch.load(path, map_location=self.device))
+
+    # Placeholder tokenization method (to be replaced with optimized tokenizer)
    def tokenize(self, text):
-        # Character-level tokenizer: converts text to ASCII values
-        token_ids = [ord(char) for char in text if ord(char) < self.vocab_size]
-        return token_ids
+        return [ord(c) for c in text]

-    def detokenize(self, token_ids):
-        # Detokenizer to convert ASCII values back to characters
-        return "".join([chr(id) for id in token_ids])
-
-    def train_on_message(self, message):
-        # Tokenize the message
-        input_ids = self.tokenize(message)
-        input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
-        
-        # Create target labels (next character prediction task)
-        labels = input_ids[1:] + [input_ids[-1]]  # Shift tokens for training
-        labels_tensor = torch.tensor(labels).unsqueeze(0).to(self.device)
-        
-        # Training step
-        self.optimizer.zero_grad()
-        outputs = self.forward(input_tensor)
-        loss = self.criterion(outputs.view(-1, outputs.size(-1)), labels_tensor.view(-1))
-        loss.backward()
-        self.optimizer.step()
-        print(f"Training loss: {loss.item()}")
-
-# Changes made:
-# Version: Jade-Solstice-Horizon
-# - Reverted temperature, top_p, and repetition penalty settings to be closer to Solstice.
-# - Introduced explicit stop criteria to prevent repeating tokens consecutively.
-# - Applied slight smoothing to logits to prevent high peaks and excessive repetition.
-# - Updated debug message to reflect the new version.
-
-# Observations:
-# - Aimed to retain the strengths of Solstice while reducing remaining issues with repetitive tokens by adding specific repetition stop criteria.
+    # Placeholder detokenization method (to be replaced with optimized tokenizer)
+    def detokenize(self, tokens):
+        return ''.join([chr(t) for t in tokens])