7 changed files with 301 additions and 113 deletions
--- a/.github/workflows/discord_sync.yml
+++ b/.github/workflows/discord_sync.yml
@ -0,0 +1,15 @@
 name: Discord Webhook
 on: [push]
 jobs:
  git:
    runs-on: ubuntu-latest
    steps:
    - uses: actions/checkout@v2
    - name: Run Discord Webhook
      uses: johnnyhuy/actions-discord-git-webhook@main 
      with:
        webhook_url: ${{ secrets.YOUR_DISCORD_WEBHOOK_URL }}
--- a/.gitignore
+++ b/.gitignore
@ -1,4 +1,3 @@
 # ---> Python
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@ -107,8 +106,10 @@ ipython_config.py
 #pdm.lock
 #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
 #   in version control.
-#   https://pdm.fming.dev/#use-with-ide
+#   https://pdm.fming.dev/latest/usage/project/#working-with-version-control
 .pdm.toml
 .pdm-python
 .pdm-build/
 # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
 __pypackages__/
@ -159,4 +160,5 @@ cython_debug/
 #  and can be added to the global gitignore or merged into this file.  For a more nuclear
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
-
+/client_secret.json
 /token.json
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@ -1,15 +0,0 @@
 {
    // Use IntelliSense to learn about possible attributes.
    // Hover to view descriptions of existing attributes.
    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
    "version": "0.2.0",
    "configurations": [
        {
            "name": "Jade",
            "type": "debugpy",
            "request": "launch",
            "program": "E:\\Development\\AI Development\\Jade\\main.py",
            "console": "integratedTerminal"
        }
    ]
 }
--- a/9
+++ b/9
@ -1,9 +0,0 @@
 MIT License
 Copyright (c) <year> <copyright holders>
 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -1,3 +0,0 @@
 # Jade
 Jade is an active learning AI project
--- a/main.py
+++ b/main.py
@ -1,10 +1,24 @@
 # main.py: Discord Bot Code
 import discord
 import torch
 from model import JadeModel
 import os
 import sqlite3
 import time
 import torch
 import discord
 from google.oauth2.credentials import Credentials
 from google_auth_oauthlib.flow import InstalledAppFlow
 from google.auth.transport.requests import Request
 from googleapiclient.discovery import build
 from datetime import datetime, timedelta, timezone
 from model import JadeModel
 from dotenv import load_dotenv
 from collections import deque
 import uuid as uuid_lib
 import json
 # Constants
 SCOPES = ['https://www.googleapis.com/auth/youtube.readonly']
 DATABASE_FILE = 'global_user_data.db'  # Updated database file name
 CHANNEL_HANDLE = 'UCsVJcf4KbO8Vz308EKpSYxw'
 STREAM_KEYWORD = "Live"
 # Load environment variables
 load_dotenv()
@ -18,23 +32,247 @@ client = discord.Client(intents=intents)
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 model = JadeModel().to(device)
 # Context management for conversation continuity
 conversation_history = deque(maxlen=5)  # Store the last 5 messages for context
 training_data = []  # Store live messages for training purposes
 # Profile Manager
 class ProfileManager:
    def __init__(self):
        self._create_profiles_table()
    def _create_profiles_table(self):
        conn = sqlite3.connect(DATABASE_FILE)
        cursor = conn.cursor()
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS global_profiles (
                uuid TEXT PRIMARY KEY,
                discord_user_id TEXT UNIQUE,
                youtube_channel_id TEXT UNIQUE,
                points INTEGER DEFAULT 0,
                last_interaction TIMESTAMP,
                subscription_status TEXT,
                first_seen_as_member TIMESTAMP,
                has_opted_in INTEGER DEFAULT 0
            )
        ''')
        conn.commit()
        conn.close()
    def get_or_create_uuid(self, discord_id=None, youtube_id=None):
        conn = sqlite3.connect(DATABASE_FILE)
        cursor = conn.cursor()
        uuid = None
        if discord_id:
            cursor.execute("SELECT uuid FROM global_profiles WHERE discord_user_id = ?", (discord_id,))
            result = cursor.fetchone()
            if result:
                uuid = result[0]
        if not uuid and youtube_id:
            cursor.execute("SELECT uuid FROM global_profiles WHERE youtube_channel_id = ?", (youtube_id,))
            result = cursor.fetchone()
            if result:
                uuid = result[0]
        if not uuid:
            uuid = str(uuid_lib.uuid4())
            cursor.execute('''
                INSERT INTO global_profiles (uuid, discord_user_id, youtube_channel_id)
                VALUES (?, ?, ?)
            ''', (uuid, discord_id, youtube_id))
            conn.commit()
        conn.close()
        return uuid
    def update_subscription_status(self, youtube_id, status):
        conn = sqlite3.connect(DATABASE_FILE)
        cursor = conn.cursor()
        cursor.execute('''
            UPDATE global_profiles
            SET subscription_status = ?, last_interaction = ?
            WHERE youtube_channel_id = ?
        ''', (status, datetime.utcnow(), youtube_id))
        conn.commit()
        conn.close()
    def delete_user_data(self, uuid):
        # Delete user data to comply with GDPR
        conn = sqlite3.connect(DATABASE_FILE)
        cursor = conn.cursor()
        cursor.execute('SELECT * FROM global_profiles WHERE uuid = ?', (uuid,))
        user_data = cursor.fetchone()
        if user_data:
            with open(f'deleted_user_data_{uuid}.json', 'w') as f:
                json.dump({
                    'uuid': user_data[0],
                    'discord_user_id': user_data[1],
                    'youtube_channel_id': user_data[2],
                    'points': user_data[3],
                    'last_interaction': user_data[4],
                    'subscription_status': user_data[5],
                    'first_seen_as_member': user_data[6],
                    'has_opted_in': user_data[7]
                }, f)
        cursor.execute('DELETE FROM global_profiles WHERE uuid = ?', (uuid,))
        conn.commit()
        conn.close()
    def has_opted_in(self, uuid):
        conn = sqlite3.connect(DATABASE_FILE)
        cursor = conn.cursor()
        cursor.execute('SELECT has_opted_in FROM global_profiles WHERE uuid = ?', (uuid,))
        result = cursor.fetchone()
        conn.close()
        return result and result[0] == 1
    def set_opt_in(self, uuid, opted_in=True):
        conn = sqlite3.connect(DATABASE_FILE)
        cursor = conn.cursor()
        cursor.execute('''
            UPDATE global_profiles
            SET has_opted_in = ?
            WHERE uuid = ?
        ''', (1 if opted_in else 0, uuid))
        conn.commit()
        conn.close()
 profile_manager = ProfileManager()
 # YouTube API Functions
 def get_authenticated_service():
    flow = InstalledAppFlow.from_client_secrets_file(
        'client_secret.json', SCOPES)
    creds = flow.run_local_server(port=63355)
    with open('token.json', 'w') as token:
        token.write(creds.to_json())
    return build('youtube', 'v3', credentials=creds)
 def find_correct_live_video(youtube, channel_id, keyword):
    request = youtube.search().list(
        part="snippet",
        channelId=channel_id,
        eventType="live",
        type="video"
    )
    response = request.execute()
    items = response.get('items', [])
    for item in items:
        title = item['snippet']['title']
        if keyword.lower() in title.lower():
            return item['id']['videoId']
    return None
 def get_live_chat_id(youtube, video_id):
    request = youtube.videos().list(
        part="liveStreamingDetails",
        id=video_id
    )
    response = request.execute()
    items = response.get('items', [])
    if items:
        return items[0]['liveStreamingDetails'].get('activeLiveChatId')
    return None
 def monitor_youtube_chat(youtube, live_chat_id):
    if not live_chat_id:
        print("No valid live chat ID found.")
        return False
    next_page_token = None
    while True:
        try:
            request = youtube.liveChatMessages().list(
                liveChatId=live_chat_id,
                part="snippet,authorDetails",
                maxResults=200,
                pageToken=next_page_token
            )
            response = request.execute()
            if 'items' in response and response['items']:
                for item in response['items']:
                    user_id = item['authorDetails']['channelId']
                    display_name = item['authorDetails']['displayName']
                    is_moderator = item['authorDetails']['isChatModerator']
                    is_member = item['authorDetails']['isChatSponsor']
                    message = item['snippet']['displayMessage']
                    uuid = profile_manager.get_or_create_uuid(youtube_id=user_id)
                    if is_member:
                        profile_manager.update_subscription_status(user_id, "subscribed")
                    else:
                        profile_manager.update_subscription_status(user_id, "none")
                    print(f"[{datetime.utcnow()}] {display_name}: {message} (UUID: {uuid})")
                    # Add live chat message to training data if the user has opted in
                    if profile_manager.has_opted_in(uuid):
                        training_data.append((display_name, message))
                next_page_token = response.get('nextPageToken')
            else:
                print("No new messages detected; continuing to poll...")
        except Exception as e:
            print(f"Error while monitoring chat: {e}")
            time.sleep(30)  # Wait before retrying in case of an error
        time.sleep(10)  # Adjust this delay as needed
 # Discord Event Handlers
@client.event
 async def on_ready():
    print(f'We have logged in as {client.user}')
@client.event
 async def on_message(message):
    if message.author == client.user:
        return
-    # Train Jade with the new message
+    # Link the Discord user to the correct global profile UUID
-    model.train_on_message(message.content)
+    uuid = profile_manager.get_or_create_uuid(discord_id=str(message.author.id))
-    # Generate a response using Jade
+    # Ensure user has opted in before interacting
-    response = model.generate_response(message.content)
+    if not profile_manager.has_opted_in(uuid):
-    await message.channel.send(response)
+        await message.channel.send("Please type '!optin' to confirm that you agree to data usage and interaction with this bot.")
        return
-# Start the bot with your token
+    if message.content.lower() == '!optin':
-client.run(os.getenv('DISCORD_TOKEN'))
+        profile_manager.set_opt_in(uuid, True)
        await message.channel.send("You have successfully opted in to data usage.")
        return
    # Add the message to conversation history for context
    conversation_history.append(message.content)
    # Generate a response using Jade with context
    context = "\n".join(conversation_history)
    response = model.generate_response(context)
    if response:
        await message.channel.send(response)
    print(f"Discord Interaction: User {message.author} (UUID: {uuid})")
 # Main Function to Start Both Services
 def main():
    youtube = get_authenticated_service()
    channel_id = profile_manager.get_or_create_uuid(youtube_id=CHANNEL_HANDLE)
    video_id = find_correct_live_video(youtube, channel_id, STREAM_KEYWORD)
    if video_id:
        live_chat_id = get_live_chat_id(youtube, video_id)
        if live_chat_id:
            print("Monitoring YouTube live chat...")
            monitor_youtube_chat(youtube, live_chat_id)
        else:
            print("No live chat ID available.")
    else:
        print("Could not find the correct live stream or it is not live.")
    client.run(os.getenv('DISCORD_TOKEN'))
 if __name__ == "__main__":
    main()
--- a/model.py
+++ b/model.py
@ -1,17 +1,14 @@
 # Suggested Refinements for Jade (Model.py)
 import torch
 import torch.nn as nn
 import torch.optim as optim
-import random
+import os
-import string
+from torch.cuda.amp import GradScaler, autocast
 import numpy as np
 class JadeModel(nn.Module):
-    def __init__(self):
+    def __init__(self, load_model_path=None):
        super(JadeModel, self).__init__()
        # GPT-like Transformer architecture
-        self.vocab_size = 256  # Character-level tokenization (ASCII range)
+        self.vocab_size = 512  # Character-level tokenization (ASCII range)
        self.embedding_dim = 768  # GPT-like embedding dimension
        self.num_heads = 12  # Number of attention heads
        self.num_layers = 12  # Number of transformer layers
@ -31,19 +28,20 @@ class JadeModel(nn.Module):
        self.fc = nn.Linear(self.embedding_dim, self.vocab_size)
        self.softmax = nn.Softmax(dim=-1)
        # Optimizer and loss function
        self.optimizer = optim.Adam(self.parameters(), lr=0.001)
        self.criterion = nn.CrossEntropyLoss()
        # Device setup
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        self.to(self.device)
-        # Debug message to verify changes (updated unique message for each change)
+        # Load model state if path is provided
-        self.debug_message = "[DEBUG] Model initialized with version: Jade-Solstice-Horizon"
+        if load_model_path and os.path.exists(load_model_path):
-        print(self.debug_message)
+            self.load_model(load_model_path)
            print(f"Model loaded from {load_model_path}")
    def forward(self, input_ids):
        # Truncate input_ids if longer than max_position_embeddings
        if input_ids.size(1) > self.max_position_embeddings:
            input_ids = input_ids[:, -self.max_position_embeddings:]
        # Create position ids for input sequence
        seq_length = input_ids.size(1)
        position_ids = torch.arange(0, seq_length, dtype=torch.long, device=self.device)
@ -60,16 +58,18 @@ class JadeModel(nn.Module):
        x = self.fc(x)
        return x
-    def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2):
+    def generate_response(self, input_text, initial_temperature=0.85, top_p=0.8, repetition_penalty=1.4, max_token_frequency=2, max_length=50, min_response_length=5):
        # Convert input_text to token ids
        input_ids = self.tokenize(input_text)
        if len(input_ids) > self.max_position_embeddings:
            input_ids = input_ids[-self.max_position_embeddings:]  # Truncate if too long
        input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
-        generated_tokens = input_ids.copy()
+        generated_tokens = input_ids.copy()  # Start with input tokens to use as context
        recent_tokens = list(input_ids[-10:])  # Expanded recent tokens window to 10
        temperature = initial_temperature
        recent_tokens = list(input_ids[-10:])  # Expanded recent tokens window to 10
-        with torch.no_grad():
+        with torch.no_grad(), autocast():
-            for i in range(50):  # Generate up to 50 more tokens
+            for _ in range(max_length):  # Generate up to max_length more tokens
                output = self.forward(input_tensor)
                logits = output[:, -1, :]  # Consider only the last token's logits
                logits = logits / (temperature + 1e-2)  # Apply temperature for sampling diversity
@ -79,9 +79,6 @@ class JadeModel(nn.Module):
                    if generated_tokens.count(token) > 1:
                        logits[0, token] /= (repetition_penalty + generated_tokens.count(token) * 0.02)  # Frequency-based scaling for penalty
                # Apply slight logits smoothing to avoid overly confident peaks
                logits = logits - torch.mean(logits) * 0.01
                # Dynamic Nucleus (top-p) sampling with adjusted threshold
                sorted_logits, sorted_indices = torch.sort(logits, descending=True)
                cumulative_probs = torch.cumsum(self.softmax(sorted_logits), dim=-1)
@ -95,18 +92,8 @@ class JadeModel(nn.Module):
                else:
                    sampled_token = sorted_indices[0, 0].item()  # Fallback to the most probable token if none pass the top-p threshold
                # Enforce diversity constraint by limiting token frequency
                if generated_tokens.count(sampled_token) >= max_token_frequency:
                    logits[0, sampled_token] -= 1.5  # Adjusted penalty to limit token frequency
                    continue  # Skip adding this token if it has reached the max frequency
                # Stop repetition if the sampled token was recently repeated
                if len(generated_tokens) > 1 and generated_tokens[-1] == sampled_token:
                    continue
                # Add token and update state
                generated_tokens.append(sampled_token)
                recent_tokens.append(sampled_token)
                if len(recent_tokens) > 10:
                    recent_tokens.pop(0)  # Maintain a window of recent tokens to suppress
@ -115,44 +102,17 @@ class JadeModel(nn.Module):
                # Gradually decrease temperature to reduce randomness more smoothly
                temperature = max(0.75, temperature * 0.98)
        response = self.detokenize(generated_tokens)
        print("[DEBUG] Generated response:", response)  # Debug statement to verify changes
        print(f"[DEBUG] Generation loss rate (approximated): {temperature}")  # Approximate loss rate
        return response
        response = self.detokenize(generated_tokens[len(input_ids):])  # Exclude the input from the response
        return response if len(response.strip()) > 0 else None
    def load_model(self, path):
        self.load_state_dict(torch.load(path, map_location=self.device))
    # Placeholder tokenization method (to be replaced with optimized tokenizer)
    def tokenize(self, text):
-        # Character-level tokenizer: converts text to ASCII values
+        return [ord(c) for c in text]
        token_ids = [ord(char) for char in text if ord(char) < self.vocab_size]
        return token_ids
-    def detokenize(self, token_ids):
+    # Placeholder detokenization method (to be replaced with optimized tokenizer)
-        # Detokenizer to convert ASCII values back to characters
+    def detokenize(self, tokens):
-        return "".join([chr(id) for id in token_ids])
+        return ''.join([chr(t) for t in tokens])
    def train_on_message(self, message):
        # Tokenize the message
        input_ids = self.tokenize(message)
        input_tensor = torch.tensor(input_ids).unsqueeze(0).to(self.device)
        # Create target labels (next character prediction task)
        labels = input_ids[1:] + [input_ids[-1]]  # Shift tokens for training
        labels_tensor = torch.tensor(labels).unsqueeze(0).to(self.device)
        # Training step
        self.optimizer.zero_grad()
        outputs = self.forward(input_tensor)
        loss = self.criterion(outputs.view(-1, outputs.size(-1)), labels_tensor.view(-1))
        loss.backward()
        self.optimizer.step()
        print(f"Training loss: {loss.item()}")
 # Changes made:
 # Version: Jade-Solstice-Horizon
 # - Reverted temperature, top_p, and repetition penalty settings to be closer to Solstice.
 # - Introduced explicit stop criteria to prevent repeating tokens consecutively.
 # - Applied slight smoothing to logits to prevent high peaks and excessive repetition.
 # - Updated debug message to reflect the new version.
 # Observations:
 # - Aimed to retain the strengths of Solstice while reducing remaining issues with repetitive tokens by adding specific repetition stop criteria.
		`@ -1,3 +0,0 @@`
			`# Jade`

			`Jade is an active learning AI project`