feat(04-07): integrate SQLiteManager metadata in ContextAwareSearch
- Enhanced _calculate_topic_relevance with conversation metadata support - Added metadata-based topic boosts for primary topics and engagement - Incorporated temporal patterns for recent activity preference - Updated prioritize_by_topic to use get_conversation_metadata - Enhanced get_topic_summary with comprehensive metadata insights - Added related conversation context and engagement metrics - Maintained backward compatibility with existing functionality
This commit is contained in:
@@ -192,7 +192,11 @@ class ContextAwareSearch:
|
|||||||
return context
|
return context
|
||||||
|
|
||||||
def _calculate_topic_relevance(
|
def _calculate_topic_relevance(
|
||||||
self, result: SearchResult, current_topic: str, active_keywords: Set[str]
|
self,
|
||||||
|
result: SearchResult,
|
||||||
|
current_topic: str,
|
||||||
|
active_keywords: Set[str],
|
||||||
|
conversation_metadata: Optional[Dict[str, Any]] = None,
|
||||||
) -> float:
|
) -> float:
|
||||||
"""
|
"""
|
||||||
Calculate topic relevance score for a search result.
|
Calculate topic relevance score for a search result.
|
||||||
@@ -201,6 +205,7 @@ class ContextAwareSearch:
|
|||||||
result: SearchResult to score
|
result: SearchResult to score
|
||||||
current_topic: Current conversation topic
|
current_topic: Current conversation topic
|
||||||
active_keywords: Keywords active in current conversation
|
active_keywords: Keywords active in current conversation
|
||||||
|
conversation_metadata: Optional conversation metadata for enhanced analysis
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Topic relevance boost factor (1.0 = no boost, >1.0 = boosted)
|
Topic relevance boost factor (1.0 = no boost, >1.0 = boosted)
|
||||||
@@ -224,8 +229,48 @@ class ContextAwareSearch:
|
|||||||
total_keywords = len(result_keywords) or 1
|
total_keywords = len(result_keywords) or 1
|
||||||
keyword_boost = 1.0 + (keyword_overlap / total_keywords) * 0.3 # Max 30% boost
|
keyword_boost = 1.0 + (keyword_overlap / total_keywords) * 0.3 # Max 30% boost
|
||||||
|
|
||||||
|
# Enhanced metadata-based boosts
|
||||||
|
metadata_boost = 1.0
|
||||||
|
|
||||||
|
if conversation_metadata:
|
||||||
|
# Topic information boost
|
||||||
|
topic_info = conversation_metadata.get("topic_info", {})
|
||||||
|
if topic_info.get("primary_topic") == current_topic:
|
||||||
|
metadata_boost *= 1.2 # 20% boost for matching primary topic
|
||||||
|
|
||||||
|
main_topics = topic_info.get("main_topics", [])
|
||||||
|
if current_topic in main_topics:
|
||||||
|
metadata_boost *= 1.1 # 10% boost for topic in main topics
|
||||||
|
|
||||||
|
# Engagement metrics boost
|
||||||
|
engagement = conversation_metadata.get("engagement_metrics", {})
|
||||||
|
message_count = engagement.get("message_count", 0)
|
||||||
|
avg_importance = engagement.get("avg_importance", 0)
|
||||||
|
|
||||||
|
if message_count > 10: # Substantial conversation
|
||||||
|
metadata_boost *= 1.1
|
||||||
|
if avg_importance > 0.7: # High importance
|
||||||
|
metadata_boost *= 1.15
|
||||||
|
|
||||||
|
# Temporal patterns boost (recent activity preferred)
|
||||||
|
temporal = conversation_metadata.get("temporal_patterns", {})
|
||||||
|
last_activity = temporal.get("last_activity")
|
||||||
|
if last_activity:
|
||||||
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
|
if last_activity > datetime.now() - timedelta(days=7):
|
||||||
|
metadata_boost *= 1.2 # 20% boost for recent activity
|
||||||
|
elif last_activity > datetime.now() - timedelta(days=30):
|
||||||
|
metadata_boost *= 1.1 # 10% boost for somewhat recent
|
||||||
|
|
||||||
|
# Context clues boost (related conversations)
|
||||||
|
context_clues = conversation_metadata.get("context_clues", {})
|
||||||
|
related_conversations = context_clues.get("related_conversations", [])
|
||||||
|
if related_conversations:
|
||||||
|
metadata_boost *= 1.05 # Small boost for conversations with context
|
||||||
|
|
||||||
# Combined boost (limited to prevent over-boosting)
|
# Combined boost (limited to prevent over-boosting)
|
||||||
combined_boost = min(2.0, topic_boost * keyword_boost)
|
combined_boost = min(3.0, topic_boost * keyword_boost * metadata_boost)
|
||||||
|
|
||||||
return float(combined_boost)
|
return float(combined_boost)
|
||||||
|
|
||||||
@@ -256,12 +301,44 @@ class ContextAwareSearch:
|
|||||||
topic = current_topic or context["current_topic"]
|
topic = current_topic or context["current_topic"]
|
||||||
active_keywords = context["active_keywords"]
|
active_keywords = context["active_keywords"]
|
||||||
|
|
||||||
|
# Get conversation metadata for enhanced analysis
|
||||||
|
conversation_metadata = {}
|
||||||
|
if conversation_id:
|
||||||
|
try:
|
||||||
|
# Extract conversation IDs from results to get their metadata
|
||||||
|
result_conversation_ids = list(
|
||||||
|
set(
|
||||||
|
[
|
||||||
|
result.conversation_id
|
||||||
|
for result in results
|
||||||
|
if result.conversation_id
|
||||||
|
]
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
|
if result_conversation_ids:
|
||||||
|
conversation_metadata = (
|
||||||
|
self.sqlite_manager.get_conversation_metadata(
|
||||||
|
result_conversation_ids
|
||||||
|
)
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to get conversation metadata: {e}")
|
||||||
|
|
||||||
# Apply topic relevance scoring
|
# Apply topic relevance scoring
|
||||||
scored_results = []
|
scored_results = []
|
||||||
for result in results:
|
for result in results:
|
||||||
# Calculate topic relevance boost
|
# Get metadata for this result's conversation
|
||||||
|
result_metadata = None
|
||||||
|
if (
|
||||||
|
result.conversation_id
|
||||||
|
and result.conversation_id in conversation_metadata
|
||||||
|
):
|
||||||
|
result_metadata = conversation_metadata[result.conversation_id]
|
||||||
|
|
||||||
|
# Calculate topic relevance boost with metadata
|
||||||
topic_boost = self._calculate_topic_relevance(
|
topic_boost = self._calculate_topic_relevance(
|
||||||
result, topic, active_keywords
|
result, topic, active_keywords, result_metadata
|
||||||
)
|
)
|
||||||
|
|
||||||
# Apply boost to relevance score
|
# Apply boost to relevance score
|
||||||
@@ -269,7 +346,7 @@ class ContextAwareSearch:
|
|||||||
|
|
||||||
# Update result with boosted score
|
# Update result with boosted score
|
||||||
result.relevance_score = boosted_score
|
result.relevance_score = boosted_score
|
||||||
result.search_type = "context_aware"
|
result.search_type = "context_aware_enhanced"
|
||||||
|
|
||||||
scored_results.append(result)
|
scored_results.append(result)
|
||||||
|
|
||||||
@@ -278,7 +355,8 @@ class ContextAwareSearch:
|
|||||||
|
|
||||||
self.logger.info(
|
self.logger.info(
|
||||||
f"Prioritized {len(results)} results for topic '{topic}' "
|
f"Prioritized {len(results)} results for topic '{topic}' "
|
||||||
f"with active keywords: {len(active_keywords)}"
|
f"with active keywords: {len(active_keywords)} and "
|
||||||
|
f"{len(conversation_metadata)} conversations with metadata"
|
||||||
)
|
)
|
||||||
|
|
||||||
return scored_results
|
return scored_results
|
||||||
@@ -287,23 +365,38 @@ class ContextAwareSearch:
|
|||||||
self, conversation_id: str, limit: int = 20
|
self, conversation_id: str, limit: int = 20
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""
|
"""
|
||||||
Get topic summary for a conversation.
|
Get topic summary for a conversation with enhanced metadata analysis.
|
||||||
|
|
||||||
Args:
|
Args:
|
||||||
conversation_id: ID of conversation to analyze
|
conversation_id: ID of conversation to analyze
|
||||||
limit: Number of messages to analyze
|
limit: Number of messages to analyze
|
||||||
|
|
||||||
Returns:
|
Returns:
|
||||||
Dictionary with topic analysis
|
Dictionary with comprehensive topic analysis
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
# Get recent messages
|
# Get conversation metadata for comprehensive analysis
|
||||||
|
try:
|
||||||
|
metadata = self.sqlite_manager.get_conversation_metadata(
|
||||||
|
[conversation_id]
|
||||||
|
)
|
||||||
|
conv_metadata = metadata.get(conversation_id, {})
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.error(f"Failed to get conversation metadata: {e}")
|
||||||
|
conv_metadata = {}
|
||||||
|
|
||||||
|
# Get recent messages for content analysis
|
||||||
messages = self.sqlite_manager.get_recent_messages(
|
messages = self.sqlite_manager.get_recent_messages(
|
||||||
conversation_id, limit=limit
|
conversation_id, limit=limit
|
||||||
)
|
)
|
||||||
|
|
||||||
if not messages:
|
if not messages:
|
||||||
return {"topic": "general", "keywords": [], "message_count": 0}
|
return {
|
||||||
|
"topic": "general",
|
||||||
|
"keywords": [],
|
||||||
|
"message_count": 0,
|
||||||
|
"metadata_enhanced": False,
|
||||||
|
}
|
||||||
|
|
||||||
# Combine all message content
|
# Combine all message content
|
||||||
all_text = " ".join([msg.get("content", "") for msg in messages])
|
all_text = " ".join([msg.get("content", "") for msg in messages])
|
||||||
@@ -318,17 +411,72 @@ class ContextAwareSearch:
|
|||||||
msg_topic = self._classify_topic(msg.get("content", ""))
|
msg_topic = self._classify_topic(msg.get("content", ""))
|
||||||
topic_distribution[msg_topic] = topic_distribution.get(msg_topic, 0) + 1
|
topic_distribution[msg_topic] = topic_distribution.get(msg_topic, 0) + 1
|
||||||
|
|
||||||
return {
|
# Build enhanced summary with metadata
|
||||||
|
summary = {
|
||||||
"primary_topic": topic,
|
"primary_topic": topic,
|
||||||
"all_keywords": keywords,
|
"all_keywords": keywords,
|
||||||
"message_count": len(messages),
|
"message_count": len(messages),
|
||||||
"topic_distribution": topic_distribution,
|
"topic_distribution": topic_distribution,
|
||||||
"recent_focus": topic if len(messages) >= 5 else "general",
|
"recent_focus": topic if len(messages) >= 5 else "general",
|
||||||
|
"metadata_enhanced": bool(conv_metadata),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
# Add metadata-enhanced insights if available
|
||||||
|
if conv_metadata:
|
||||||
|
# Topic information from metadata
|
||||||
|
topic_info = conv_metadata.get("topic_info", {})
|
||||||
|
summary["stored_topics"] = {
|
||||||
|
"main_topics": topic_info.get("main_topics", []),
|
||||||
|
"primary_topic": topic_info.get("primary_topic", "general"),
|
||||||
|
"topic_frequency": topic_info.get("topic_frequency", {}),
|
||||||
|
"topic_sentiment": topic_info.get("topic_sentiment", {}),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Engagement insights
|
||||||
|
engagement = conv_metadata.get("engagement_metrics", {})
|
||||||
|
summary["engagement_insights"] = {
|
||||||
|
"total_messages": engagement.get("message_count", 0),
|
||||||
|
"user_message_ratio": engagement.get("user_message_ratio", 0),
|
||||||
|
"avg_importance": engagement.get("avg_importance", 0),
|
||||||
|
"conversation_duration_minutes": engagement.get(
|
||||||
|
"conversation_duration_seconds", 0
|
||||||
|
)
|
||||||
|
/ 60,
|
||||||
|
}
|
||||||
|
|
||||||
|
# Temporal patterns
|
||||||
|
temporal = conv_metadata.get("temporal_patterns", {})
|
||||||
|
if temporal.get("most_common_hour") is not None:
|
||||||
|
summary["temporal_patterns"] = {
|
||||||
|
"most_active_hour": temporal.get("most_common_hour"),
|
||||||
|
"most_active_day": temporal.get("most_common_day"),
|
||||||
|
"last_activity": temporal.get("last_activity"),
|
||||||
|
}
|
||||||
|
|
||||||
|
# Context clues
|
||||||
|
context_clues = conv_metadata.get("context_clues", {})
|
||||||
|
related_conversations = context_clues.get("related_conversations", [])
|
||||||
|
if related_conversations:
|
||||||
|
summary["related_contexts"] = [
|
||||||
|
{
|
||||||
|
"id": rel["id"],
|
||||||
|
"title": rel["title"],
|
||||||
|
"relationship": rel["relationship"],
|
||||||
|
}
|
||||||
|
for rel in related_conversations[:3] # Top 3 related
|
||||||
|
]
|
||||||
|
|
||||||
|
return summary
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"Failed to get topic summary: {e}")
|
self.logger.error(f"Failed to get topic summary: {e}")
|
||||||
return {"topic": "general", "keywords": [], "message_count": 0}
|
return {
|
||||||
|
"topic": "general",
|
||||||
|
"keywords": [],
|
||||||
|
"message_count": 0,
|
||||||
|
"metadata_enhanced": False,
|
||||||
|
"error": str(e),
|
||||||
|
}
|
||||||
|
|
||||||
def suggest_related_topics(self, query: str, limit: int = 3) -> List[str]:
|
def suggest_related_topics(self, query: str, limit: int = 3) -> List[str]:
|
||||||
"""
|
"""
|
||||||
|
|||||||
Reference in New Issue
Block a user