feat(04-07): integrate SQLiteManager metadata in ContextAwareSearch

- Enhanced _calculate_topic_relevance with conversation metadata support - Added metadata-based topic boosts for primary topics and engagement - Incorporated temporal patterns for recent activity preference - Updated prioritize_by_topic to use get_conversation_metadata - Enhanced get_topic_summary with comprehensive metadata insights - Added related conversation context and engagement metrics - Maintained backward compatibility with existing functionality
2026-01-28 13:15:17 -05:00
parent 1e4ceec820
commit 346a013a6f
1 changed files with 160 additions and 12 deletions
--- a/src/memory/retrieval/context_aware.py
+++ b/src/memory/retrieval/context_aware.py
@@ -192,7 +192,11 @@ class ContextAwareSearch:
        return context

    def _calculate_topic_relevance(
-        self, result: SearchResult, current_topic: str, active_keywords: Set[str]
+        self,
+        result: SearchResult,
+        current_topic: str,
+        active_keywords: Set[str],
+        conversation_metadata: Optional[Dict[str, Any]] = None,
    ) -> float:
        """
        Calculate topic relevance score for a search result.
@@ -201,6 +205,7 @@ class ContextAwareSearch:
            result: SearchResult to score
            current_topic: Current conversation topic
            active_keywords: Keywords active in current conversation
+            conversation_metadata: Optional conversation metadata for enhanced analysis

        Returns:
            Topic relevance boost factor (1.0 = no boost, >1.0 = boosted)
@@ -224,8 +229,48 @@ class ContextAwareSearch:
        total_keywords = len(result_keywords) or 1
        keyword_boost = 1.0 + (keyword_overlap / total_keywords) * 0.3  # Max 30% boost

+        # Enhanced metadata-based boosts
+        metadata_boost = 1.0
+
+        if conversation_metadata:
+            # Topic information boost
+            topic_info = conversation_metadata.get("topic_info", {})
+            if topic_info.get("primary_topic") == current_topic:
+                metadata_boost *= 1.2  # 20% boost for matching primary topic
+
+            main_topics = topic_info.get("main_topics", [])
+            if current_topic in main_topics:
+                metadata_boost *= 1.1  # 10% boost for topic in main topics
+
+            # Engagement metrics boost
+            engagement = conversation_metadata.get("engagement_metrics", {})
+            message_count = engagement.get("message_count", 0)
+            avg_importance = engagement.get("avg_importance", 0)
+
+            if message_count > 10:  # Substantial conversation
+                metadata_boost *= 1.1
+            if avg_importance > 0.7:  # High importance
+                metadata_boost *= 1.15
+
+            # Temporal patterns boost (recent activity preferred)
+            temporal = conversation_metadata.get("temporal_patterns", {})
+            last_activity = temporal.get("last_activity")
+            if last_activity:
+                from datetime import datetime, timedelta
+
+                if last_activity > datetime.now() - timedelta(days=7):
+                    metadata_boost *= 1.2  # 20% boost for recent activity
+                elif last_activity > datetime.now() - timedelta(days=30):
+                    metadata_boost *= 1.1  # 10% boost for somewhat recent
+
+            # Context clues boost (related conversations)
+            context_clues = conversation_metadata.get("context_clues", {})
+            related_conversations = context_clues.get("related_conversations", [])
+            if related_conversations:
+                metadata_boost *= 1.05  # Small boost for conversations with context
+
        # Combined boost (limited to prevent over-boosting)
-        combined_boost = min(2.0, topic_boost * keyword_boost)
+        combined_boost = min(3.0, topic_boost * keyword_boost * metadata_boost)

        return float(combined_boost)

@@ -256,12 +301,44 @@ class ContextAwareSearch:
        topic = current_topic or context["current_topic"]
        active_keywords = context["active_keywords"]

+        # Get conversation metadata for enhanced analysis
+        conversation_metadata = {}
+        if conversation_id:
+            try:
+                # Extract conversation IDs from results to get their metadata
+                result_conversation_ids = list(
+                    set(
+                        [
+                            result.conversation_id
+                            for result in results
+                            if result.conversation_id
+                        ]
+                    )
+                )
+
+                if result_conversation_ids:
+                    conversation_metadata = (
+                        self.sqlite_manager.get_conversation_metadata(
+                            result_conversation_ids
+                        )
+                    )
+            except Exception as e:
+                self.logger.error(f"Failed to get conversation metadata: {e}")
+
        # Apply topic relevance scoring
        scored_results = []
        for result in results:
-            # Calculate topic relevance boost
+            # Get metadata for this result's conversation
+            result_metadata = None
+            if (
+                result.conversation_id
+                and result.conversation_id in conversation_metadata
+            ):
+                result_metadata = conversation_metadata[result.conversation_id]
+
+            # Calculate topic relevance boost with metadata
            topic_boost = self._calculate_topic_relevance(
-                result, topic, active_keywords
+                result, topic, active_keywords, result_metadata
            )

            # Apply boost to relevance score
@@ -269,7 +346,7 @@ class ContextAwareSearch:

            # Update result with boosted score
            result.relevance_score = boosted_score
-            result.search_type = "context_aware"
+            result.search_type = "context_aware_enhanced"

            scored_results.append(result)

@@ -278,7 +355,8 @@ class ContextAwareSearch:

        self.logger.info(
            f"Prioritized {len(results)} results for topic '{topic}' "
-            f"with active keywords: {len(active_keywords)}"
+            f"with active keywords: {len(active_keywords)} and "
+            f"{len(conversation_metadata)} conversations with metadata"
        )

        return scored_results
@@ -287,23 +365,38 @@ class ContextAwareSearch:
        self, conversation_id: str, limit: int = 20
    ) -> Dict[str, Any]:
        """
-        Get topic summary for a conversation.
+        Get topic summary for a conversation with enhanced metadata analysis.

        Args:
            conversation_id: ID of conversation to analyze
            limit: Number of messages to analyze

        Returns:
-            Dictionary with topic analysis
+            Dictionary with comprehensive topic analysis
        """
        try:
-            # Get recent messages
+            # Get conversation metadata for comprehensive analysis
+            try:
+                metadata = self.sqlite_manager.get_conversation_metadata(
+                    [conversation_id]
+                )
+                conv_metadata = metadata.get(conversation_id, {})
+            except Exception as e:
+                self.logger.error(f"Failed to get conversation metadata: {e}")
+                conv_metadata = {}
+
+            # Get recent messages for content analysis
            messages = self.sqlite_manager.get_recent_messages(
                conversation_id, limit=limit
            )

            if not messages:
-                return {"topic": "general", "keywords": [], "message_count": 0}
+                return {
+                    "topic": "general",
+                    "keywords": [],
+                    "message_count": 0,
+                    "metadata_enhanced": False,
+                }

            # Combine all message content
            all_text = " ".join([msg.get("content", "") for msg in messages])
@@ -318,17 +411,72 @@ class ContextAwareSearch:
                msg_topic = self._classify_topic(msg.get("content", ""))
                topic_distribution[msg_topic] = topic_distribution.get(msg_topic, 0) + 1

-            return {
+            # Build enhanced summary with metadata
+            summary = {
                "primary_topic": topic,
                "all_keywords": keywords,
                "message_count": len(messages),
                "topic_distribution": topic_distribution,
                "recent_focus": topic if len(messages) >= 5 else "general",
+                "metadata_enhanced": bool(conv_metadata),
            }

+            # Add metadata-enhanced insights if available
+            if conv_metadata:
+                # Topic information from metadata
+                topic_info = conv_metadata.get("topic_info", {})
+                summary["stored_topics"] = {
+                    "main_topics": topic_info.get("main_topics", []),
+                    "primary_topic": topic_info.get("primary_topic", "general"),
+                    "topic_frequency": topic_info.get("topic_frequency", {}),
+                    "topic_sentiment": topic_info.get("topic_sentiment", {}),
+                }
+
+                # Engagement insights
+                engagement = conv_metadata.get("engagement_metrics", {})
+                summary["engagement_insights"] = {
+                    "total_messages": engagement.get("message_count", 0),
+                    "user_message_ratio": engagement.get("user_message_ratio", 0),
+                    "avg_importance": engagement.get("avg_importance", 0),
+                    "conversation_duration_minutes": engagement.get(
+                        "conversation_duration_seconds", 0
+                    )
+                    / 60,
+                }
+
+                # Temporal patterns
+                temporal = conv_metadata.get("temporal_patterns", {})
+                if temporal.get("most_common_hour") is not None:
+                    summary["temporal_patterns"] = {
+                        "most_active_hour": temporal.get("most_common_hour"),
+                        "most_active_day": temporal.get("most_common_day"),
+                        "last_activity": temporal.get("last_activity"),
+                    }
+
+                # Context clues
+                context_clues = conv_metadata.get("context_clues", {})
+                related_conversations = context_clues.get("related_conversations", [])
+                if related_conversations:
+                    summary["related_contexts"] = [
+                        {
+                            "id": rel["id"],
+                            "title": rel["title"],
+                            "relationship": rel["relationship"],
+                        }
+                        for rel in related_conversations[:3]  # Top 3 related
+                    ]
+
+            return summary
+
        except Exception as e:
            self.logger.error(f"Failed to get topic summary: {e}")
-            return {"topic": "general", "keywords": [], "message_count": 0}
+            return {
+                "topic": "general",
+                "keywords": [],
+                "message_count": 0,
+                "metadata_enhanced": False,
+                "error": str(e),
+            }

    def suggest_related_topics(self, query: str, limit: int = 3) -> List[str]:
        """