Initial commit: Clean slate for Mai project

2026-01-26 22:40:49 -05:00
commit 7c98aec306
70 changed files with 28199 additions and 0 deletions
--- a/.mai/config/memory.yaml
+++ b/.mai/config/memory.yaml
@@ -0,0 +1,171 @@
+# Memory System Configuration for Mai
+
+# Compression settings
+compression:
+  # Triggers for automatic compression
+  thresholds:
+    message_count: 50          # Compress after 50 messages
+    age_days: 30              # Compress conversations older than 30 days
+    memory_limit_mb: 500       # Compress when memory usage exceeds 500MB
+  
+  # AI summarization configuration
+  summarization:
+    model: "llama2"                           # Model to use for summarization
+    preserve_elements:                         # Elements to preserve in compression
+      - "preferences"                           # User preferences and choices
+      - "decisions"                             # Important decisions made
+      - "patterns"                              # Interaction and topic patterns
+      - "key_facts"                             # Critical information and facts
+    min_quality_score: 0.7                      # Minimum acceptable summary quality
+    max_summary_length: 1000                    # Maximum summary length in characters
+    context_messages: 30                         # Messages to include for context
+  
+  # Adaptive weighting parameters
+  adaptive_weighting:
+    importance_decay_days: 90                    # Days for importance decay
+    pattern_weight: 1.5                          # Weight for pattern preservation
+    technical_weight: 1.2                        # Weight for technical conversations
+    planning_weight: 1.3                         # Weight for planning conversations
+    recency_boost: 1.2                           # Boost for recent messages
+    keyword_boost: 1.5                           # Boost for preference keywords
+  
+  # Compression strategy settings
+  strategy:
+    keep_recent_count: 10                          # Recent messages to always keep
+    max_patterns_extracted: 5                      # Maximum patterns to extract
+    topic_extraction_method: "keyword"              # Method for topic extraction
+    pattern_confidence_threshold: 0.6               # Minimum confidence for pattern extraction
+
+# Context retrieval settings
+retrieval:
+  # Search configuration
+  search:
+    similarity_threshold: 0.7                     # Minimum similarity for semantic search
+    max_results: 5                               # Maximum search results to return
+    include_content: false                          # Include full content in results
+    
+  # Multi-faceted search weights
+  weights:
+    semantic_similarity: 0.4                        # Weight for semantic similarity
+    keyword_match: 0.3                             # Weight for keyword matching
+    recency_weight: 0.2                            # Weight for recency
+    user_pattern_weight: 0.1                       # Weight for user patterns
+  
+  # Adaptive search settings
+  adaptive:
+    conversation_type_detection: true              # Automatically detect conversation type
+    weight_adjustment: true                        # Adjust weights based on context
+    context_window_limit: 2000                     # Token limit for context retrieval
+    
+  # Performance tuning
+  performance:
+    cache_search_results: true                     # Cache frequent searches
+    cache_ttl_seconds: 300                         # Cache time-to-live in seconds
+    parallel_search: false                          # Enable parallel search (experimental)
+    max_search_time_ms: 1000                      # Maximum search time in milliseconds
+
+# Pattern extraction settings
+patterns:
+  # Granularity levels
+  extraction_granularity:
+    fine:                                        # Detailed extraction for important conversations
+      message_sample_size: 50
+      pattern_confidence: 0.8
+    medium:                                       # Standard extraction
+      message_sample_size: 30
+      pattern_confidence: 0.7
+    coarse:                                        # Broad extraction for old conversations
+      message_sample_size: 20
+      pattern_confidence: 0.6
+  
+  # Pattern types to extract
+  types:
+    user_preferences:
+      enabled: true
+      keywords:
+        - "prefer"
+        - "like"
+        - "want"
+        - "should"
+        - "don't like"
+        - "avoid"
+      confidence_threshold: 0.7
+      
+    interaction_patterns:
+      enabled: true
+      metrics:
+        - "message_length_ratio"
+        - "response_time_pattern"
+        - "question_frequency"
+        - "clarification_requests"
+      
+    topic_preferences:
+      enabled: true
+      max_topics: 10
+      min_topic_frequency: 3
+      
+    emotional_patterns:
+      enabled: false                              # Future enhancement
+      sentiment_analysis: false
+      
+    decision_patterns:
+      enabled: true
+      decision_keywords:
+        - "decided"
+        - "chose"
+        - "selected"
+        - "agreed"
+        - "rejected"
+
+# Memory management settings
+management:
+  # Storage limits and cleanup
+  storage:
+    max_conversation_age_days: 365              # Maximum age before review
+    auto_cleanup: false                           # Enable automatic cleanup
+    backup_before_cleanup: true                   # Backup before cleanup
+    
+  # User control settings
+  user_control:
+    allow_conversation_deletion: true            # Allow users to delete conversations
+    grace_period_days: 7                         # Recovery grace period
+    bulk_operations: true                         # Allow bulk operations
+    
+  # Privacy settings
+  privacy:
+    anonymize_patterns: false                     # Anonymize extracted patterns
+    pattern_retention_days: 180                   # How long to keep patterns
+    encrypt_sensitive_topics: true                 # Encrypt sensitive topic patterns
+
+# Performance and monitoring
+performance:
+  # Resource limits
+  resources:
+    max_memory_usage_mb: 200                    # Maximum memory for compression
+    max_cpu_usage_percent: 80                    # Maximum CPU usage
+    max_compression_time_seconds: 30              # Maximum time per compression
+    
+  # Background processing
+  background:
+    enable_background_compression: true           # Run compression in background
+    compression_interval_hours: 6                 # Check interval for compression
+    batch_size: 5                                # Conversations per batch
+    
+  # Monitoring and metrics
+  monitoring:
+    track_compression_stats: true                # Track compression statistics
+    log_compression_events: true                 # Log compression operations
+    performance_metrics_retention_days: 30       # How long to keep metrics
+
+# Development and debugging
+debug:
+  # Debug settings
+  enabled: false                                   # Enable debug mode
+  log_compression_details: false                   # Log detailed compression info
+  save_intermediate_results: false                 # Save intermediate compression results
+  
+  # Testing settings
+  testing:
+    mock_summarization: false                     # Use mock summarization for testing
+    force_compression_threshold: false             # Force compression for testing
+    disable_pattern_extraction: false               # Disable pattern extraction for testing