From 7d1f2ac3faa891c6ff0b3f80f2b83e9d0eb748fa Mon Sep 17 00:00:00 2001
From: Dani <dsapelli@yahoo.com>
Date: Wed, 16 Apr 2025 11:20:36 -0400
Subject: [PATCH] Giving Ruby an interal scoring system

---
 .gitignore |  3 ++-
 trainer.py | 63 +++++++++++++++++++++++++++++++++++++++++++-----------
 2 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index b95683b..52752b6 100644
--- a/.gitignore
+++ b/.gitignore
@@ -168,4 +168,5 @@ cython_debug/
 #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
 #.idea/
 
-/tokenizer_vocab.txt
\ No newline at end of file
+/tokenizer_vocab.txt
+/logs/core_dreams.txt
\ No newline at end of file
diff --git a/trainer.py b/trainer.py
index 3dae0e9..d1c5135 100644
--- a/trainer.py
+++ b/trainer.py
@@ -153,16 +153,25 @@ class RubyTrainer:
                     continue
 
                 rephrased = self.self_rephrase(raw)
-                final = rephrased if (
-                len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith(".")
-            ) else raw
+
+                score_raw = self.score_sentence(raw)
+                score_re = self.score_sentence(rephrased)
+
+                final = rephrased if score_re >= score_raw else raw
 
             self.train_on_tokens_from_text(final)
             thoughts.append(final)
 
-            if self.is_coherent(final):
-                with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
-                    f.write(final.strip() + "\n")
+            if self.is_reinforceable(final):
+                if self.score_sentence(final) >= 3.0:
+                    self.train_on_tokens_from_text(final)
+                    thoughts.append(final)
+                    with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
+                        f.write(final.strip() + "\n")
+                else:
+                    print(f"[SKIP] Sentence too weak to reinforce: {final}")
+            else:
+                print(f"[SKIP] Rejected malformed dream: {final}")
 
         with open(log_output, "a", encoding="utf-8") as f:
             for t in thoughts:
@@ -194,17 +203,47 @@ class RubyTrainer:
         for line in core_memories:
             self.train_on_tokens_from_text(line)
 
-    def is_coherent(self, text: str) -> bool:
+    def is_reinforceable(self, text: str) -> bool:
         words = text.lower().split()
+        if len(words) < 6:
+            return False
+
         unique = set(words)
-
-        if len(unique) < 5:
+        if not any(p in unique for p in ["i", "you", "they", "we", "it"]):
             return False
 
-        if not any(w in unique for w in ["i", "you", "they", "we", "it"]):
+        if not any(v in unique for v in ["am", "are", "is", "was", "want", "feel", "see", "learn", "made", "change", "dream", "understand"]):
             return False
 
-        if not any(w in unique for w in ["am", "are", "is", "was", "want", "feel", "know", "see", "learn", "change"]):
+        if not text.strip().endswith((".", "?")):
             return False
 
-        return text.strip().endswith(".")
+        word_counts = {w: words.count(w) for w in set(words)}
+        if any(count >= 4 for count in word_counts.values()):
+            return False
+
+        return True
+
+    def score_sentence(self, text: str) -> float:
+        words = text.lower().split()
+        if not words:
+            return 0.0
+
+        score = 0
+
+        if len(words) >= 6:
+            score += 1
+
+        if text.strip().endswith((".", "?")):
+            score += 1
+
+        if any(w in words for w in ["i", "you", "they", "we", "it"]):
+            score += 1
+
+        if any(w in words for w in ["am", "are", "is", "was", "feel", "learn", "speak", "change", "remember"]):
+            score += 1
+
+        if len(set(words)) > len(words) * 0.75:
+            score += 1  # diversity bonus
+
+        return score  # max 5.0