From 0716291d9d875d4d8064159fdf7b8762592967ea Mon Sep 17 00:00:00 2001
From: Dani <dsapelli@yahoo.com>
Date: Tue, 15 Apr 2025 21:17:31 -0400
Subject: [PATCH] Added coherence fix

---
 trainer.py | 34 ++++++++++++++++++++++++++++------
 1 file changed, 28 insertions(+), 6 deletions(-)
diff --git a/trainer.py b/trainer.py
index 967ff67..3dae0e9 100644
--- a/trainer.py
+++ b/trainer.py
@@ -114,7 +114,7 @@ class RubyTrainer:
 
         new_tokens = input_ids.squeeze(0).tolist()[1:]
         return self.tokenizer.detokenize([t for t in new_tokens if t != self.tokenizer.vocab["<END>"]])
-    
+
     def dream(self, log_path="logs/messages.log", max_lines=50):
         print("[DREAM] Ruby is dreaming...")
 
@@ -147,16 +147,23 @@ class RubyTrainer:
 
             if not raw or len(raw.strip().split()) < 4:
                 continue
+            for _ in range(rounds):
+                raw = self.generate_reply()
+                if not raw or len(raw.strip().split()) < 4:
+                    continue
 
-            rephrased = self.self_rephrase(raw)
-            if len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith("."):
-                final = rephrased
-            else:
-                final = raw
+                rephrased = self.self_rephrase(raw)
+                final = rephrased if (
+                len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith(".")
+            ) else raw
 
             self.train_on_tokens_from_text(final)
             thoughts.append(final)
 
+            if self.is_coherent(final):
+                with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
+                    f.write(final.strip() + "\n")
+
         with open(log_output, "a", encoding="utf-8") as f:
             for t in thoughts:
                 f.write(f"[DREAM] {t}\n")
@@ -186,3 +193,18 @@ class RubyTrainer:
 
         for line in core_memories:
             self.train_on_tokens_from_text(line)
+
+    def is_coherent(self, text: str) -> bool:
+        words = text.lower().split()
+        unique = set(words)
+
+        if len(unique) < 5:
+            return False
+
+        if not any(w in unique for w in ["i", "you", "they", "we", "it"]):
+            return False
+
+        if not any(w in unique for w in ["am", "are", "is", "was", "want", "feel", "know", "see", "learn", "change"]):
+            return False
+
+        return text.strip().endswith(".")