Mai/src/memory/storage/sqlite_manager.py

"""
SQLite database manager for conversation memory storage.

This module provides SQLite database operations and schema management
for storing conversations, messages, and associated metadata.
"""

import sqlite3
import threading
from datetime import datetime
from typing import Optional, Dict, Any, List
import json
import logging

# Import from existing models module
import sys
import os

sys.path.append(os.path.join(os.path.dirname(__file__), "..", ".."))
from models.conversation import Message, MessageRole, ConversationMetadata


class SQLiteManager:
    """
    SQLite database manager with connection pooling and thread safety.

    Manages conversations, messages, and metadata with proper indexing
    and migration support for persistent storage.
    """

    def __init__(self, db_path: str):
        """
        Initialize SQLite manager with database path.

        Args:
            db_path: Path to SQLite database file
        """
        self.db_path = db_path
        self._local = threading.local()
        self.logger = logging.getLogger(__name__)
        self._initialize_database()

    def _get_connection(self) -> sqlite3.Connection:
        """
        Get thread-local database connection.

        Returns:
            SQLite connection for current thread
        """
        if not hasattr(self._local, "connection"):
            self._local.connection = sqlite3.connect(
                self.db_path, check_same_thread=False, timeout=30.0
            )
            self._local.connection.row_factory = sqlite3.Row
            # Enable WAL mode for better concurrency
            self._local.connection.execute("PRAGMA journal_mode=WAL")
            # Enable foreign key constraints
            self._local.connection.execute("PRAGMA foreign_keys=ON")
            # Optimize for performance
            self._local.connection.execute("PRAGMA synchronous=NORMAL")
            self._local.connection.execute("PRAGMA cache_size=10000")
        return self._local.connection

    def _initialize_database(self) -> None:
        """
        Initialize database schema with all required tables.

        Creates conversations, messages, and metadata tables with proper
        indexing and relationships for efficient querying.
        """
        conn = sqlite3.connect(self.db_path)
        try:
            # Enable WAL mode for better concurrency
            conn.execute("PRAGMA journal_mode=WAL")
            conn.execute("PRAGMA foreign_keys=ON")

            # Create conversations table
            conn.execute("""
                CREATE TABLE IF NOT EXISTS conversations (
                    id TEXT PRIMARY KEY,
                    title TEXT,
                    created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    metadata TEXT DEFAULT '{}',
                    session_id TEXT,
                    total_messages INTEGER DEFAULT 0,
                    total_tokens INTEGER DEFAULT 0,
                    context_window_size INTEGER DEFAULT 4096,
                    model_history TEXT DEFAULT '[]'
                )
            """)

            # Create messages table
            conn.execute("""
                CREATE TABLE IF NOT EXISTS messages (
                    id TEXT PRIMARY KEY,
                    conversation_id TEXT NOT NULL,
                    role TEXT NOT NULL CHECK (role IN ('user', 'assistant', 'system', 'tool_call', 'tool_result')),
                    content TEXT NOT NULL,
                    timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                    token_count INTEGER DEFAULT 0,
                    importance_score REAL DEFAULT 0.5 CHECK (importance_score >= 0.0 AND importance_score <= 1.0),
                    metadata TEXT DEFAULT '{}',
                    embedding_id TEXT,
                    FOREIGN KEY (conversation_id) REFERENCES conversations(id) ON DELETE CASCADE
                )
            """)

            # Create indexes for efficient querying
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_messages_conversation_id ON messages(conversation_id)"
            )
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_messages_timestamp ON messages(timestamp)"
            )
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_messages_role ON messages(role)"
            )
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_conversations_created_at ON conversations(created_at)"
            )
            conn.execute(
                "CREATE INDEX IF NOT EXISTS idx_conversations_updated_at ON conversations(updated_at)"
            )

            # Create metadata table for application state
            conn.execute("""
                CREATE TABLE IF NOT EXISTS app_metadata (
                    key TEXT PRIMARY KEY,
                    value TEXT NOT NULL,
                    updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
                )
            """)

            # Insert initial schema version
            conn.execute("""
                INSERT OR IGNORE INTO app_metadata (key, value)
                VALUES ('schema_version', '1.0.0')
            """)

            conn.commit()
            self.logger.info(f"Database initialized: {self.db_path}")

        except Exception as e:
            conn.rollback()
            self.logger.error(f"Failed to initialize database: {e}")
            raise
        finally:
            conn.close()

    def create_conversation(
        self,
        conversation_id: str,
        title: Optional[str] = None,
        session_id: Optional[str] = None,
        metadata: Optional[Dict[str, Any]] = None,
    ) -> None:
        """
        Create a new conversation.

        Args:
            conversation_id: Unique conversation identifier
            title: Optional conversation title
            session_id: Optional session identifier
            metadata: Optional metadata dictionary
        """
        conn = self._get_connection()
        try:
            conn.execute(
                """
                INSERT INTO conversations
                (id, title, session_id, metadata)
                VALUES (?, ?, ?, ?)
            """,
                (
                    conversation_id,
                    title or conversation_id,
                    session_id or conversation_id,
                    json.dumps(metadata or {}),
                ),
            )
            conn.commit()
            self.logger.debug(f"Created conversation: {conversation_id}")
        except Exception as e:
            conn.rollback()
            self.logger.error(f"Failed to create conversation {conversation_id}: {e}")
            raise

    def add_message(
        self,
        message_id: str,
        conversation_id: str,
        role: str,
        content: str,
        token_count: int = 0,
        importance_score: float = 0.5,
        metadata: Optional[Dict[str, Any]] = None,
        embedding_id: Optional[str] = None,
    ) -> None:
        """
        Add a message to a conversation.

        Args:
            message_id: Unique message identifier
            conversation_id: Target conversation ID
            role: Message role (user/assistant/system/tool_call/tool_result)
            content: Message content
            token_count: Estimated token count
            importance_score: Importance score 0.0-1.0
            metadata: Optional message metadata
            embedding_id: Optional embedding reference
        """
        conn = self._get_connection()
        try:
            # Add message
            conn.execute(
                """
                INSERT INTO messages
                (id, conversation_id, role, content, token_count, importance_score, metadata, embedding_id)
                VALUES (?, ?, ?, ?, ?, ?, ?, ?)
            """,
                (
                    message_id,
                    conversation_id,
                    role,
                    content,
                    token_count,
                    importance_score,
                    json.dumps(metadata or {}),
                    embedding_id,
                ),
            )

            # Update conversation stats
            conn.execute(
                """
                UPDATE conversations
                SET
                    total_messages = total_messages + 1,
                    total_tokens = total_tokens + ?,
                    updated_at = CURRENT_TIMESTAMP
                WHERE id = ?
            """,
                (token_count, conversation_id),
            )

            conn.commit()
            self.logger.debug(
                f"Added message {message_id} to conversation {conversation_id}"
            )
        except Exception as e:
            conn.rollback()
            self.logger.error(f"Failed to add message {message_id}: {e}")
            raise

    def get_conversation(
        self, conversation_id: str, include_messages: bool = True
    ) -> Optional[Dict[str, Any]]:
        """
        Get conversation details.

        Args:
            conversation_id: Conversation ID to retrieve
            include_messages: Whether to include messages

        Returns:
            Conversation data or None if not found
        """
        conn = self._get_connection()
        try:
            # Get conversation info
            cursor = conn.execute(
                """
                SELECT * FROM conversations WHERE id = ?
            """,
                (conversation_id,),
            )
            conversation = cursor.fetchone()

            if not conversation:
                return None

            result = {
                "id": conversation["id"],
                "title": conversation["title"],
                "created_at": conversation["created_at"],
                "updated_at": conversation["updated_at"],
                "metadata": json.loads(conversation["metadata"]),
                "session_id": conversation["session_id"],
                "total_messages": conversation["total_messages"],
                "total_tokens": conversation["total_tokens"],
                "context_window_size": conversation["context_window_size"],
                "model_history": json.loads(conversation["model_history"]),
            }

            if include_messages:
                cursor = conn.execute(
                    """
                    SELECT * FROM messages
                    WHERE conversation_id = ?
                    ORDER BY timestamp ASC
                """,
                    (conversation_id,),
                )
                messages = []
                for row in cursor:
                    messages.append(
                        {
                            "id": row["id"],
                            "conversation_id": row["conversation_id"],
                            "role": row["role"],
                            "content": row["content"],
                            "timestamp": row["timestamp"],
                            "token_count": row["token_count"],
                            "importance_score": row["importance_score"],
                            "metadata": json.loads(row["metadata"]),
                            "embedding_id": row["embedding_id"],
                        }
                    )
                result["messages"] = messages

            return result
        except Exception as e:
            self.logger.error(f"Failed to get conversation {conversation_id}: {e}")
            raise

    def get_recent_conversations(
        self, limit: int = 10, offset: int = 0
    ) -> List[Dict[str, Any]]:
        """
        Get recent conversations.

        Args:
            limit: Maximum number of conversations to return
            offset: Offset for pagination

        Returns:
            List of conversation summaries
        """
        conn = self._get_connection()
        try:
            cursor = conn.execute(
                """
                SELECT
                    id, title, created_at, updated_at,
                    total_messages, total_tokens, session_id
                FROM conversations
                ORDER BY updated_at DESC
                LIMIT ? OFFSET ?
            """,
                (limit, offset),
            )

            conversations = []
            for row in cursor:
                conversations.append(
                    {
                        "id": row["id"],
                        "title": row["title"],
                        "created_at": row["created_at"],
                        "updated_at": row["updated_at"],
                        "total_messages": row["total_messages"],
                        "total_tokens": row["total_tokens"],
                        "session_id": row["session_id"],
                    }
                )

            return conversations
        except Exception as e:
            self.logger.error(f"Failed to get recent conversations: {e}")
            raise

    def get_messages_by_role(
        self, conversation_id: str, role: str, limit: Optional[int] = None
    ) -> List[Dict[str, Any]]:
        """
        Get messages from a conversation filtered by role.

        Args:
            conversation_id: Conversation ID
            role: Message role filter
            limit: Optional message limit

        Returns:
            List of messages
        """
        conn = self._get_connection()
        try:
            query = """
                SELECT * FROM messages
                WHERE conversation_id = ? AND role = ?
                ORDER BY timestamp ASC
            """
            params = [conversation_id, role]

            if limit:
                query += " LIMIT ?"
                params.append(limit)

            cursor = conn.execute(query, tuple(params))
            messages = []
            for row in cursor:
                messages.append(
                    {
                        "id": row["id"],
                        "conversation_id": row["conversation_id"],
                        "role": row["role"],
                        "content": row["content"],
                        "timestamp": row["timestamp"],
                        "token_count": row["token_count"],
                        "importance_score": row["importance_score"],
                        "metadata": json.loads(row["metadata"]),
                        "embedding_id": row["embedding_id"],
                    }
                )

            return messages
        except Exception as e:
            self.logger.error(f"Failed to get messages by role {role}: {e}")
            raise

    def get_recent_messages(
        self, conversation_id: str, limit: int = 10, offset: int = 0
    ) -> List[Dict[str, Any]]:
        """
        Get recent messages from a conversation.

        Args:
            conversation_id: Conversation ID
            limit: Maximum number of messages to return
            offset: Offset for pagination

        Returns:
            List of messages ordered by timestamp (newest first)
        """
        conn = self._get_connection()
        try:
            query = """
                SELECT * FROM messages
                WHERE conversation_id = ?
                ORDER BY timestamp DESC
                LIMIT ? OFFSET ?
            """

            cursor = conn.execute(query, (conversation_id, limit, offset))
            messages = []
            for row in cursor:
                messages.append(
                    {
                        "id": row["id"],
                        "conversation_id": row["conversation_id"],
                        "role": row["role"],
                        "content": row["content"],
                        "timestamp": row["timestamp"],
                        "token_count": row["token_count"],
                        "importance_score": row["importance_score"],
                        "metadata": json.loads(row["metadata"]),
                        "embedding_id": row["embedding_id"],
                    }
                )

            return messages
        except Exception as e:
            self.logger.error(f"Failed to get recent messages: {e}")
            raise

    def get_conversation_metadata(
        self, conversation_ids: List[str]
    ) -> Dict[str, Dict[str, Any]]:
        """
        Get comprehensive metadata for specified conversations.

        Args:
            conversation_ids: List of conversation IDs to retrieve metadata for

        Returns:
            Dictionary mapping conversation_id to comprehensive metadata
        """
        conn = self._get_connection()
        try:
            metadata = {}

            # Create placeholders for IN clause
            placeholders = ",".join(["?" for _ in conversation_ids])

            # Get basic conversation metadata
            cursor = conn.execute(
                f"""
                SELECT
                    id, title, created_at, updated_at, metadata,
                    session_id, total_messages, total_tokens, context_window_size,
                    model_history
                FROM conversations
                WHERE id IN ({placeholders})
                ORDER BY updated_at DESC
                """,
                conversation_ids,
            )

            conversations_data = cursor.fetchall()

            for conv in conversations_data:
                conv_id = conv["id"]

                # Parse JSON metadata fields
                try:
                    conv_metadata = (
                        json.loads(conv["metadata"]) if conv["metadata"] else {}
                    )
                    model_history = (
                        json.loads(conv["model_history"])
                        if conv["model_history"]
                        else []
                    )
                except json.JSONDecodeError:
                    conv_metadata = {}
                    model_history = []

                # Initialize metadata structure
                metadata[conv_id] = {
                    # Basic conversation metadata
                    "conversation_info": {
                        "id": conv_id,
                        "title": conv["title"],
                        "created_at": conv["created_at"],
                        "updated_at": conv["updated_at"],
                        "session_id": conv["session_id"],
                        "total_messages": conv["total_messages"],
                        "total_tokens": conv["total_tokens"],
                        "context_window_size": conv["context_window_size"],
                    },
                    # Topic information from metadata
                    "topic_info": {
                        "main_topics": conv_metadata.get("main_topics", []),
                        "topic_frequency": conv_metadata.get("topic_frequency", {}),
                        "topic_sentiment": conv_metadata.get("topic_sentiment", {}),
                        "primary_topic": conv_metadata.get("primary_topic", "general"),
                    },
                    # Conversation metadata
                    "metadata": conv_metadata,
                    # Model history
                    "model_history": model_history,
                }

            # Calculate engagement metrics for each conversation
            for conv_id in conversation_ids:
                if conv_id in metadata:
                    # Get message statistics
                    cursor = conn.execute(
                        """
                        SELECT
                            role,
                            COUNT(*) as count,
                            AVG(importance_score) as avg_importance,
                            MIN(timestamp) as first_message,
                            MAX(timestamp) as last_message
                        FROM messages
                        WHERE conversation_id = ?
                        GROUP BY role
                        """,
                        (conv_id,),
                    )

                    role_stats = cursor.fetchall()

                    # Calculate engagement metrics
                    total_user_messages = 0
                    total_assistant_messages = 0
                    total_importance = 0
                    message_count = 0
                    first_message_time = None
                    last_message_time = None

                    for stat in role_stats:
                        if stat["role"] == "user":
                            total_user_messages = stat["count"]
                        elif stat["role"] == "assistant":
                            total_assistant_messages = stat["count"]

                        total_importance += stat["avg_importance"] or 0
                        message_count += stat["count"]

                        if (
                            not first_message_time
                            or stat["first_message"] < first_message_time
                        ):
                            first_message_time = stat["first_message"]
                        if (
                            not last_message_time
                            or stat["last_message"] > last_message_time
                        ):
                            last_message_time = stat["last_message"]

                    # Calculate user message ratio
                    user_message_ratio = total_user_messages / max(1, message_count)

                    # Add engagement metrics
                    metadata[conv_id]["engagement_metrics"] = {
                        "message_count": message_count,
                        "user_message_count": total_user_messages,
                        "assistant_message_count": total_assistant_messages,
                        "user_message_ratio": user_message_ratio,
                        "avg_importance": total_importance / max(1, len(role_stats)),
                        "conversation_duration_seconds": (
                            (last_message_time - first_message_time).total_seconds()
                            if first_message_time and last_message_time
                            else 0
                        ),
                    }

                    # Calculate temporal patterns
                    if last_message_time:
                        cursor = conn.execute(
                            """
                            SELECT
                                strftime('%H', timestamp) as hour,
                                strftime('%w', timestamp) as day_of_week,
                                COUNT(*) as count
                            FROM messages
                            WHERE conversation_id = ?
                            GROUP BY hour, day_of_week
                            """,
                            (conv_id,),
                        )

                        temporal_data = cursor.fetchall()

                        # Analyze temporal patterns
                        hour_counts = {}
                        day_counts = {}
                        for row in temporal_data:
                            hour = row["hour"]
                            day = int(row["day_of_week"])
                            hour_counts[hour] = hour_counts.get(hour, 0) + row["count"]
                            day_counts[day] = day_counts.get(day, 0) + row["count"]

                        # Find most common hour and day
                        most_common_hour = (
                            max(hour_counts.items(), key=lambda x: x[1])[0]
                            if hour_counts
                            else None
                        )
                        most_common_day = (
                            max(day_counts.items(), key=lambda x: x[1])[0]
                            if day_counts
                            else None
                        )

                        metadata[conv_id]["temporal_patterns"] = {
                            "most_common_hour": int(most_common_hour)
                            if most_common_hour
                            else None,
                            "most_common_day": most_common_day,
                            "hour_distribution": hour_counts,
                            "day_distribution": day_counts,
                            "last_activity": last_message_time,
                        }
                    else:
                        metadata[conv_id]["temporal_patterns"] = {
                            "most_common_hour": None,
                            "most_common_day": None,
                            "hour_distribution": {},
                            "day_distribution": {},
                            "last_activity": None,
                        }

                    # Get related conversations (same session or similar topics)
                    if metadata[conv_id]["conversation_info"]["session_id"]:
                        cursor = conn.execute(
                            """
                            SELECT id, title, updated_at
                            FROM conversations
                            WHERE session_id = ? AND id != ?
                            ORDER BY updated_at DESC
                            LIMIT 5
                            """,
                            (
                                metadata[conv_id]["conversation_info"]["session_id"],
                                conv_id,
                            ),
                        )

                        related = cursor.fetchall()
                        metadata[conv_id]["context_clues"] = {
                            "related_conversations": [
                                {
                                    "id": r["id"],
                                    "title": r["title"],
                                    "updated_at": r["updated_at"],
                                    "relationship": "same_session",
                                }
                                for r in related
                            ]
                        }
                    else:
                        metadata[conv_id]["context_clues"] = {
                            "related_conversations": []
                        }

            return metadata

        except Exception as e:
            self.logger.error(f"Failed to get conversation metadata: {e}")
            raise

    def update_conversation_metadata(
        self, conversation_id: str, metadata: Dict[str, Any]
    ) -> None:
        """
        Update conversation metadata.

        Args:
            conversation_id: Conversation ID
            metadata: New metadata dictionary
        """
        conn = self._get_connection()
        try:
            conn.execute(
                """
                UPDATE conversations
                SET metadata = ?, updated_at = CURRENT_TIMESTAMP
                WHERE id = ?
            """,
                (json.dumps(metadata), conversation_id),
            )
            conn.commit()
            self.logger.debug(f"Updated metadata for conversation {conversation_id}")
        except Exception as e:
            conn.rollback()
            self.logger.error(f"Failed to update conversation metadata: {e}")
            raise

    def delete_conversation(self, conversation_id: str) -> None:
        """
        Delete a conversation and all its messages.

        Args:
            conversation_id: Conversation ID to delete
        """
        conn = self._get_connection()
        try:
            conn.execute("DELETE FROM conversations WHERE id = ?", (conversation_id,))
            conn.commit()
            self.logger.info(f"Deleted conversation {conversation_id}")
        except Exception as e:
            conn.rollback()
            self.logger.error(f"Failed to delete conversation {conversation_id}: {e}")
            raise

    def get_database_stats(self) -> Dict[str, Any]:
        """
        Get database statistics.

        Returns:
            Dictionary with database statistics
        """
        conn = self._get_connection()
        try:
            stats = {}

            # Conversation stats
            cursor = conn.execute("SELECT COUNT(*) as count FROM conversations")
            stats["total_conversations"] = cursor.fetchone()["count"]

            # Message stats
            cursor = conn.execute("SELECT COUNT(*) as count FROM messages")
            stats["total_messages"] = cursor.fetchone()["count"]

            cursor = conn.execute("SELECT SUM(token_count) as total FROM messages")
            result = cursor.fetchone()
            stats["total_tokens"] = result["total"] or 0

            # Database size
            cursor = conn.execute(
                "SELECT page_count * page_size as size FROM pragma_page_count(), pragma_page_size()"
            )
            result = cursor.fetchone()
            stats["database_size_bytes"] = result["size"] if result else 0

            return stats
        except Exception as e:
            self.logger.error(f"Failed to get database stats: {e}")
            raise

    def close(self) -> None:
        """Close database connection."""
        if hasattr(self._local, "connection"):
            self._local.connection.close()
            delattr(self._local, "connection")
        self.logger.info("SQLite manager closed")

    def __enter__(self):
        """Context manager entry."""
        return self

    def __exit__(self, exc_type, exc_val, exc_tb):
        """Context manager exit."""
        self.close()