Files
Lyra/lyra/database/manager.py
Dani d9c526fa5c feat: Add database setup guide and local configuration files
- Added DATABASE_SETUP.md with comprehensive guide for PostgreSQL and Redis installation on Windows
- Created .claude/settings.local.json with permission settings for pytest and database fix scripts
- Updated .gitignore to exclude .env.backup file
- Included database connection test utilities in lyra/database_setup.py
- Added environment variable configuration examples for local development
2025-09-29 16:29:18 -04:00

688 lines
24 KiB
Python

"""
Database manager for Lyra's persistent storage.
Handles database connections, transactions, and high-level data operations
with proper error handling and connection pooling.
"""
import asyncio
import logging
from contextlib import asynccontextmanager
from typing import Dict, List, Any, Optional, AsyncGenerator, Union
from datetime import datetime, timedelta
import json
from sqlalchemy import create_engine, text
from sqlalchemy.ext.asyncio import create_async_engine, AsyncSession, async_sessionmaker
from sqlalchemy.orm import sessionmaker, Session
from sqlalchemy.pool import QueuePool
import redis.asyncio as redis
from .models import (
Base, UserModel, ConversationModel, PersonalityStateModel,
EmotionalMemoryModel, KnowledgeModel, LearningProgressModel,
ThinkingProcessModel, EvolutionEventModel, SystemMetricsModel,
PersonalityAdaptationModel
)
logger = logging.getLogger(__name__)
class DatabaseManager:
"""
Comprehensive database manager for Lyra's data persistence.
Handles PostgreSQL for structured data and Redis for caching and real-time data.
"""
def __init__(
self,
database_url: str,
redis_url: str = "redis://localhost:6379/0",
pool_size: int = 20,
max_overflow: int = 30,
echo: bool = False
):
self.database_url = database_url
self.redis_url = redis_url
self.pool_size = pool_size
self.max_overflow = max_overflow
self.echo = echo
# Database engines
self.engine = None
self.async_engine = None
self.Session = None
self.AsyncSession = None
# Redis connection
self.redis = None
# Connection status
self.is_connected = False
async def initialize(self):
"""Initialize database connections and create tables."""
try:
# Create async engine for main operations
database_url = self.database_url
if "postgresql://" in database_url:
database_url = database_url.replace("postgresql://", "postgresql+asyncpg://")
# Configure engine based on database type
engine_kwargs = {"echo": self.echo}
if "sqlite" in database_url:
# SQLite doesn't support connection pooling in the same way
engine_kwargs.update({
"pool_pre_ping": True,
})
else:
# PostgreSQL with connection pooling
engine_kwargs.update({
"poolclass": QueuePool,
"pool_size": self.pool_size,
"max_overflow": self.max_overflow,
"pool_pre_ping": True,
"pool_recycle": 3600
})
self.async_engine = create_async_engine(database_url, **engine_kwargs)
# Create sync engine for admin operations
sync_engine_kwargs = {"echo": self.echo}
if "sqlite" not in self.database_url:
# Only use pooling for non-SQLite databases
sync_engine_kwargs.update({
"poolclass": QueuePool,
"pool_size": 5,
"max_overflow": 10,
"pool_pre_ping": True
})
else:
sync_engine_kwargs.update({
"pool_pre_ping": True
})
self.engine = create_engine(self.database_url, **sync_engine_kwargs)
# Create session factories
self.AsyncSession = async_sessionmaker(
self.async_engine, class_=AsyncSession, expire_on_commit=False
)
self.Session = sessionmaker(bind=self.engine)
# Initialize Redis (with fallback to FakeRedis)
try:
self.redis = redis.from_url(self.redis_url, decode_responses=True)
# Test Redis connection
await self.redis.ping()
logger.info("Connected to Redis")
except Exception as e:
logger.warning(f"Redis connection failed, using FakeRedis: {e}")
import fakeredis.aioredis as fakeredis
self.redis = fakeredis.FakeRedis(decode_responses=True)
# Create tables
await self._create_tables()
# Test connections
await self._test_connections()
self.is_connected = True
logger.info("Database manager initialized successfully")
except Exception as e:
logger.error(f"Failed to initialize database manager: {e}")
raise
async def _create_tables(self):
"""Create database tables if they don't exist."""
try:
async with self.async_engine.begin() as conn:
await conn.run_sync(Base.metadata.create_all)
logger.info("Database tables created/verified")
except Exception as e:
logger.error(f"Failed to create tables: {e}")
raise
async def _test_connections(self):
"""Test database and Redis connections."""
# Test PostgreSQL directly without using async_session (which checks is_connected)
session = self.AsyncSession()
try:
result = await session.execute(text("SELECT 1"))
assert result.scalar() == 1
await session.commit()
except Exception as e:
await session.rollback()
raise
finally:
await session.close()
# Test Redis
await self.redis.ping()
logger.info("Database connections tested successfully")
@asynccontextmanager
async def async_session(self) -> AsyncGenerator[AsyncSession, None]:
"""Async context manager for database sessions."""
if not self.is_connected:
raise RuntimeError("Database manager not initialized")
session = self.AsyncSession()
try:
yield session
await session.commit()
except Exception as e:
await session.rollback()
logger.error(f"Database session error: {e}")
raise
finally:
await session.close()
@asynccontextmanager
async def sync_session(self) -> AsyncGenerator[Session, None]:
"""Sync context manager for database sessions."""
if not self.is_connected:
raise RuntimeError("Database manager not initialized")
session = self.Session()
try:
yield session
session.commit()
except Exception as e:
session.rollback()
logger.error(f"Database session error: {e}")
raise
finally:
session.close()
# User management
async def create_user(
self,
discord_id: str,
username: str,
display_name: Optional[str] = None
) -> UserModel:
"""Create a new user record."""
async with self.async_session() as session:
user = UserModel(
discord_id=discord_id,
username=username,
display_name=display_name or username
)
session.add(user)
await session.flush()
await session.refresh(user)
return user
async def get_user_by_discord_id(self, discord_id: str) -> Optional[UserModel]:
"""Get user by Discord ID."""
async with self.async_session() as session:
result = await session.execute(
text("SELECT * FROM users WHERE discord_id = :discord_id"),
{"discord_id": discord_id}
)
user_data = result.fetchone()
if user_data:
user = UserModel()
for key, value in user_data._mapping.items():
setattr(user, key, value)
return user
return None
async def update_user_interaction(
self,
user_id: str,
satisfaction_rating: Optional[float] = None
):
"""Update user interaction metrics."""
async with self.async_session() as session:
user = await session.get(UserModel, user_id)
if user:
user.interaction_count += 1
user.last_interaction = datetime.utcnow()
if satisfaction_rating is not None:
ratings = user.satisfaction_ratings or []
ratings.append(satisfaction_rating)
# Keep only last 100 ratings
user.satisfaction_ratings = ratings[-100:]
await session.flush()
# Conversation management
async def store_conversation(
self,
user_id: str,
channel_id: str,
message_id: str,
user_message: str,
lyra_response: str,
context: Dict[str, Any],
emotional_state: Dict[str, Any],
personality_state: Dict[str, Any],
thinking_process: List[Dict[str, Any]],
response_time: float,
user_satisfaction: Optional[float] = None,
response_quality: Optional[float] = None
) -> ConversationModel:
"""Store a complete conversation interaction."""
async with self.async_session() as session:
conversation = ConversationModel(
user_id=user_id,
channel_id=channel_id,
message_id=message_id,
user_message=user_message,
lyra_response=lyra_response,
context=context,
emotional_state=emotional_state,
personality_state=personality_state,
thinking_process=thinking_process,
response_time=response_time,
user_satisfaction=user_satisfaction,
response_quality=response_quality
)
session.add(conversation)
await session.flush()
await session.refresh(conversation)
# Cache recent conversation for quick access
await self._cache_recent_conversation(conversation)
return conversation
async def get_recent_conversations(
self,
user_id: str,
limit: int = 10
) -> List[ConversationModel]:
"""Get recent conversations for a user."""
# Try cache first
cached = await self._get_cached_conversations(user_id, limit)
if cached:
return cached
# Fallback to database
async with self.async_session() as session:
result = await session.execute(
text("""
SELECT * FROM conversations
WHERE user_id = :user_id
ORDER BY timestamp DESC
LIMIT :limit
"""),
{"user_id": user_id, "limit": limit}
)
conversations = []
for row in result.fetchall():
conv = ConversationModel()
for key, value in row._mapping.items():
setattr(conv, key, value)
conversations.append(conv)
return conversations
# Personality state management
async def store_personality_state(
self,
openness: float,
conscientiousness: float,
extraversion: float,
agreeableness: float,
neuroticism: float,
myers_briggs_type: str,
custom_traits: Dict[str, Any],
total_interactions: int,
adaptation_rate: float,
emotional_maturity: float,
trigger_event: Optional[str] = None,
change_magnitude: Optional[float] = None
) -> PersonalityStateModel:
"""Store a personality state snapshot."""
async with self.async_session() as session:
state = PersonalityStateModel(
openness=openness,
conscientiousness=conscientiousness,
extraversion=extraversion,
agreeableness=agreeableness,
neuroticism=neuroticism,
myers_briggs_type=myers_briggs_type,
custom_traits=custom_traits,
total_interactions=total_interactions,
adaptation_rate=adaptation_rate,
emotional_maturity=emotional_maturity,
trigger_event=trigger_event,
change_magnitude=change_magnitude
)
session.add(state)
await session.flush()
await session.refresh(state)
return state
async def get_personality_evolution(
self,
days: int = 30
) -> List[PersonalityStateModel]:
"""Get personality evolution over time."""
cutoff_date = datetime.utcnow() - timedelta(days=days)
async with self.async_session() as session:
result = await session.execute(
text("""
SELECT * FROM personality_states
WHERE timestamp >= :cutoff_date
ORDER BY timestamp ASC
"""),
{"cutoff_date": cutoff_date}
)
states = []
for row in result.fetchall():
state = PersonalityStateModel()
for key, value in row._mapping.items():
setattr(state, key, value)
states.append(state)
return states
# Emotional memory management
async def store_emotional_memory(
self,
emotional_state: Dict[str, Any],
dominant_emotion: str,
emotion_intensity: float,
emotional_valence: float,
context: str,
trigger: Optional[str],
impact_score: float,
conversation_id: Optional[str] = None,
user_id: Optional[str] = None
) -> EmotionalMemoryModel:
"""Store an emotional memory."""
async with self.async_session() as session:
memory = EmotionalMemoryModel(
emotional_state=emotional_state,
dominant_emotion=dominant_emotion,
emotion_intensity=emotion_intensity,
emotional_valence=emotional_valence,
context=context,
trigger=trigger,
impact_score=impact_score,
conversation_id=conversation_id,
user_id=user_id
)
session.add(memory)
await session.flush()
await session.refresh(memory)
return memory
async def get_significant_emotional_memories(
self,
threshold: float = 0.5,
limit: int = 50
) -> List[EmotionalMemoryModel]:
"""Get emotionally significant memories."""
async with self.async_session() as session:
result = await session.execute(
text("""
SELECT * FROM emotional_memories
WHERE impact_score >= :threshold
ORDER BY impact_score DESC, timestamp DESC
LIMIT :limit
"""),
{"threshold": threshold, "limit": limit}
)
memories = []
for row in result.fetchall():
memory = EmotionalMemoryModel()
for key, value in row._mapping.items():
setattr(memory, key, value)
memories.append(memory)
return memories
# Knowledge management
async def store_knowledge(
self,
title: str,
content: str,
category: str,
source_type: str,
summary: Optional[str] = None,
subcategory: Optional[str] = None,
source_url: Optional[str] = None,
source_metadata: Optional[Dict[str, Any]] = None,
quality_score: float = 0.5,
relevance_score: float = 0.5,
embedding_vector: Optional[List[float]] = None,
keywords: Optional[List[str]] = None,
related_concepts: Optional[List[str]] = None
) -> KnowledgeModel:
"""Store a knowledge item."""
async with self.async_session() as session:
knowledge = KnowledgeModel(
title=title,
content=content,
summary=summary,
category=category,
subcategory=subcategory,
source_type=source_type,
source_url=source_url,
source_metadata=source_metadata or {},
quality_score=quality_score,
relevance_score=relevance_score,
embedding_vector=embedding_vector,
keywords=keywords or [],
related_concepts=related_concepts or []
)
session.add(knowledge)
await session.flush()
await session.refresh(knowledge)
return knowledge
async def search_knowledge(
self,
query: str,
category: Optional[str] = None,
min_quality: float = 0.3,
limit: int = 20
) -> List[KnowledgeModel]:
"""Search knowledge by text query."""
conditions = ["quality_score >= :min_quality"]
params = {"min_quality": min_quality, "limit": limit}
if category:
conditions.append("category = :category")
params["category"] = category
# Simple text search (in production, would use full-text search)
conditions.append("(title ILIKE :query OR content ILIKE :query)")
params["query"] = f"%{query}%"
query_sql = f"""
SELECT * FROM knowledge
WHERE {' AND '.join(conditions)}
ORDER BY quality_score DESC, relevance_score DESC
LIMIT :limit
"""
async with self.async_session() as session:
result = await session.execute(text(query_sql), params)
knowledge_items = []
for row in result.fetchall():
item = KnowledgeModel()
for key, value in row._mapping.items():
setattr(item, key, value)
knowledge_items.append(item)
return knowledge_items
# Analytics and metrics
async def get_conversation_analytics(
self,
days: int = 7
) -> Dict[str, Any]:
"""Get conversation analytics."""
cutoff_date = datetime.utcnow() - timedelta(days=days)
async with self.async_session() as session:
result = await session.execute(
text("""
SELECT
COUNT(*) as total_conversations,
COUNT(DISTINCT user_id) as unique_users,
AVG(user_satisfaction) as avg_satisfaction,
AVG(response_quality) as avg_quality,
AVG(response_time) as avg_response_time
FROM conversations
WHERE timestamp >= :cutoff_date
"""),
{"cutoff_date": cutoff_date}
)
row = result.fetchone()
return {
"total_conversations": row.total_conversations or 0,
"unique_users": row.unique_users or 0,
"avg_satisfaction": float(row.avg_satisfaction or 0),
"avg_quality": float(row.avg_quality or 0),
"avg_response_time": float(row.avg_response_time or 0),
"period_days": days
}
async def store_learning_progress(
self,
total_conversations: int,
total_knowledge_items: int,
personality_evolution_count: int,
emotional_memories_count: int,
avg_user_satisfaction: float,
avg_response_quality: float,
conversation_success_rate: float,
knowledge_categories_mastered: List[str],
personality_stability: float,
emotional_maturity: float,
social_adaptation_score: float,
self_evolution_events: int = 0,
conscious_personality_modifications: int = 0,
meta_learning_instances: int = 0
) -> LearningProgressModel:
"""Store learning progress snapshot."""
async with self.async_session() as session:
progress = LearningProgressModel(
total_conversations=total_conversations,
total_knowledge_items=total_knowledge_items,
personality_evolution_count=personality_evolution_count,
emotional_memories_count=emotional_memories_count,
avg_user_satisfaction=avg_user_satisfaction,
avg_response_quality=avg_response_quality,
conversation_success_rate=conversation_success_rate,
knowledge_categories_mastered=knowledge_categories_mastered,
personality_stability=personality_stability,
emotional_maturity=emotional_maturity,
social_adaptation_score=social_adaptation_score,
self_evolution_events=self_evolution_events,
conscious_personality_modifications=conscious_personality_modifications,
meta_learning_instances=meta_learning_instances
)
session.add(progress)
await session.flush()
await session.refresh(progress)
return progress
# Cache management
async def _cache_recent_conversation(self, conversation: ConversationModel):
"""Cache recent conversation for quick access."""
key = f"conversations:{conversation.user_id}"
conversation_data = {
"id": conversation.id,
"user_message": conversation.user_message,
"lyra_response": conversation.lyra_response,
"timestamp": conversation.timestamp.isoformat(),
"emotional_state": conversation.emotional_state,
"context": conversation.context
}
# Add to list (keep last 20)
await self.redis.lpush(key, json.dumps(conversation_data))
await self.redis.ltrim(key, 0, 19)
await self.redis.expire(key, 3600) # 1 hour TTL
async def _get_cached_conversations(
self,
user_id: str,
limit: int
) -> Optional[List[ConversationModel]]:
"""Get cached conversations."""
try:
key = f"conversations:{user_id}"
cached_data = await self.redis.lrange(key, 0, limit - 1)
if not cached_data:
return None
conversations = []
for data in cached_data:
conv_dict = json.loads(data)
conv = ConversationModel()
for key, value in conv_dict.items():
if key == "timestamp":
setattr(conv, key, datetime.fromisoformat(value))
else:
setattr(conv, key, value)
conversations.append(conv)
return conversations
except Exception as e:
logger.warning(f"Failed to get cached conversations: {e}")
return None
async def cleanup_old_data(self, days: int = 90):
"""Clean up old data to manage database size."""
cutoff_date = datetime.utcnow() - timedelta(days=days)
async with self.async_session() as session:
# Clean up old conversations (keep satisfaction ratings)
await session.execute(
text("""
DELETE FROM conversations
WHERE timestamp < :cutoff_date
AND user_satisfaction IS NULL
"""),
{"cutoff_date": cutoff_date}
)
# Clean up low-impact emotional memories
await session.execute(
text("""
DELETE FROM emotional_memories
WHERE timestamp < :cutoff_date
AND impact_score < 0.3
"""),
{"cutoff_date": cutoff_date}
)
# Clean up old system metrics
await session.execute(
text("""
DELETE FROM system_metrics
WHERE timestamp < :cutoff_date
"""),
{"cutoff_date": cutoff_date}
)
await session.commit()
logger.info(f"Cleaned up data older than {days} days")
async def close(self):
"""Close database connections."""
if self.async_engine:
await self.async_engine.dispose()
if self.engine:
self.engine.dispose()
if self.redis:
await self.redis.close()
self.is_connected = False
logger.info("Database manager closed")