feat(04-07): integrate SQLiteManager metadata in ContextAwareSearch

- Enhanced _calculate_topic_relevance with conversation metadata support
- Added metadata-based topic boosts for primary topics and engagement
- Incorporated temporal patterns for recent activity preference
- Updated prioritize_by_topic to use get_conversation_metadata
- Enhanced get_topic_summary with comprehensive metadata insights
- Added related conversation context and engagement metrics
- Maintained backward compatibility with existing functionality
This commit is contained in:
Mai Development
2026-01-28 13:15:17 -05:00
parent 1e4ceec820
commit 346a013a6f

View File

@@ -192,7 +192,11 @@ class ContextAwareSearch:
return context
def _calculate_topic_relevance(
self, result: SearchResult, current_topic: str, active_keywords: Set[str]
self,
result: SearchResult,
current_topic: str,
active_keywords: Set[str],
conversation_metadata: Optional[Dict[str, Any]] = None,
) -> float:
"""
Calculate topic relevance score for a search result.
@@ -201,6 +205,7 @@ class ContextAwareSearch:
result: SearchResult to score
current_topic: Current conversation topic
active_keywords: Keywords active in current conversation
conversation_metadata: Optional conversation metadata for enhanced analysis
Returns:
Topic relevance boost factor (1.0 = no boost, >1.0 = boosted)
@@ -224,8 +229,48 @@ class ContextAwareSearch:
total_keywords = len(result_keywords) or 1
keyword_boost = 1.0 + (keyword_overlap / total_keywords) * 0.3 # Max 30% boost
# Enhanced metadata-based boosts
metadata_boost = 1.0
if conversation_metadata:
# Topic information boost
topic_info = conversation_metadata.get("topic_info", {})
if topic_info.get("primary_topic") == current_topic:
metadata_boost *= 1.2 # 20% boost for matching primary topic
main_topics = topic_info.get("main_topics", [])
if current_topic in main_topics:
metadata_boost *= 1.1 # 10% boost for topic in main topics
# Engagement metrics boost
engagement = conversation_metadata.get("engagement_metrics", {})
message_count = engagement.get("message_count", 0)
avg_importance = engagement.get("avg_importance", 0)
if message_count > 10: # Substantial conversation
metadata_boost *= 1.1
if avg_importance > 0.7: # High importance
metadata_boost *= 1.15
# Temporal patterns boost (recent activity preferred)
temporal = conversation_metadata.get("temporal_patterns", {})
last_activity = temporal.get("last_activity")
if last_activity:
from datetime import datetime, timedelta
if last_activity > datetime.now() - timedelta(days=7):
metadata_boost *= 1.2 # 20% boost for recent activity
elif last_activity > datetime.now() - timedelta(days=30):
metadata_boost *= 1.1 # 10% boost for somewhat recent
# Context clues boost (related conversations)
context_clues = conversation_metadata.get("context_clues", {})
related_conversations = context_clues.get("related_conversations", [])
if related_conversations:
metadata_boost *= 1.05 # Small boost for conversations with context
# Combined boost (limited to prevent over-boosting)
combined_boost = min(2.0, topic_boost * keyword_boost)
combined_boost = min(3.0, topic_boost * keyword_boost * metadata_boost)
return float(combined_boost)
@@ -256,12 +301,44 @@ class ContextAwareSearch:
topic = current_topic or context["current_topic"]
active_keywords = context["active_keywords"]
# Get conversation metadata for enhanced analysis
conversation_metadata = {}
if conversation_id:
try:
# Extract conversation IDs from results to get their metadata
result_conversation_ids = list(
set(
[
result.conversation_id
for result in results
if result.conversation_id
]
)
)
if result_conversation_ids:
conversation_metadata = (
self.sqlite_manager.get_conversation_metadata(
result_conversation_ids
)
)
except Exception as e:
self.logger.error(f"Failed to get conversation metadata: {e}")
# Apply topic relevance scoring
scored_results = []
for result in results:
# Calculate topic relevance boost
# Get metadata for this result's conversation
result_metadata = None
if (
result.conversation_id
and result.conversation_id in conversation_metadata
):
result_metadata = conversation_metadata[result.conversation_id]
# Calculate topic relevance boost with metadata
topic_boost = self._calculate_topic_relevance(
result, topic, active_keywords
result, topic, active_keywords, result_metadata
)
# Apply boost to relevance score
@@ -269,7 +346,7 @@ class ContextAwareSearch:
# Update result with boosted score
result.relevance_score = boosted_score
result.search_type = "context_aware"
result.search_type = "context_aware_enhanced"
scored_results.append(result)
@@ -278,7 +355,8 @@ class ContextAwareSearch:
self.logger.info(
f"Prioritized {len(results)} results for topic '{topic}' "
f"with active keywords: {len(active_keywords)}"
f"with active keywords: {len(active_keywords)} and "
f"{len(conversation_metadata)} conversations with metadata"
)
return scored_results
@@ -287,23 +365,38 @@ class ContextAwareSearch:
self, conversation_id: str, limit: int = 20
) -> Dict[str, Any]:
"""
Get topic summary for a conversation.
Get topic summary for a conversation with enhanced metadata analysis.
Args:
conversation_id: ID of conversation to analyze
limit: Number of messages to analyze
Returns:
Dictionary with topic analysis
Dictionary with comprehensive topic analysis
"""
try:
# Get recent messages
# Get conversation metadata for comprehensive analysis
try:
metadata = self.sqlite_manager.get_conversation_metadata(
[conversation_id]
)
conv_metadata = metadata.get(conversation_id, {})
except Exception as e:
self.logger.error(f"Failed to get conversation metadata: {e}")
conv_metadata = {}
# Get recent messages for content analysis
messages = self.sqlite_manager.get_recent_messages(
conversation_id, limit=limit
)
if not messages:
return {"topic": "general", "keywords": [], "message_count": 0}
return {
"topic": "general",
"keywords": [],
"message_count": 0,
"metadata_enhanced": False,
}
# Combine all message content
all_text = " ".join([msg.get("content", "") for msg in messages])
@@ -318,17 +411,72 @@ class ContextAwareSearch:
msg_topic = self._classify_topic(msg.get("content", ""))
topic_distribution[msg_topic] = topic_distribution.get(msg_topic, 0) + 1
return {
# Build enhanced summary with metadata
summary = {
"primary_topic": topic,
"all_keywords": keywords,
"message_count": len(messages),
"topic_distribution": topic_distribution,
"recent_focus": topic if len(messages) >= 5 else "general",
"metadata_enhanced": bool(conv_metadata),
}
# Add metadata-enhanced insights if available
if conv_metadata:
# Topic information from metadata
topic_info = conv_metadata.get("topic_info", {})
summary["stored_topics"] = {
"main_topics": topic_info.get("main_topics", []),
"primary_topic": topic_info.get("primary_topic", "general"),
"topic_frequency": topic_info.get("topic_frequency", {}),
"topic_sentiment": topic_info.get("topic_sentiment", {}),
}
# Engagement insights
engagement = conv_metadata.get("engagement_metrics", {})
summary["engagement_insights"] = {
"total_messages": engagement.get("message_count", 0),
"user_message_ratio": engagement.get("user_message_ratio", 0),
"avg_importance": engagement.get("avg_importance", 0),
"conversation_duration_minutes": engagement.get(
"conversation_duration_seconds", 0
)
/ 60,
}
# Temporal patterns
temporal = conv_metadata.get("temporal_patterns", {})
if temporal.get("most_common_hour") is not None:
summary["temporal_patterns"] = {
"most_active_hour": temporal.get("most_common_hour"),
"most_active_day": temporal.get("most_common_day"),
"last_activity": temporal.get("last_activity"),
}
# Context clues
context_clues = conv_metadata.get("context_clues", {})
related_conversations = context_clues.get("related_conversations", [])
if related_conversations:
summary["related_contexts"] = [
{
"id": rel["id"],
"title": rel["title"],
"relationship": rel["relationship"],
}
for rel in related_conversations[:3] # Top 3 related
]
return summary
except Exception as e:
self.logger.error(f"Failed to get topic summary: {e}")
return {"topic": "general", "keywords": [], "message_count": 0}
return {
"topic": "general",
"keywords": [],
"message_count": 0,
"metadata_enhanced": False,
"error": str(e),
}
def suggest_related_topics(self, query: str, limit: int = 3) -> List[str]:
"""