Giving Ruby an interal scoring system

2025-04-16 11:20:36 -04:00 · 2025-04-16 11:20:36 -04:00 · 7d1f2ac3fa
commit 7d1f2ac3fa
parent 0716291d9d
2 changed files with 53 additions and 13 deletions
--- a/.gitignore
+++ b/.gitignore
@ -169,3 +169,4 @@ cython_debug/
 #.idea/
 /tokenizer_vocab.txt
 /logs/core_dreams.txt
--- a/trainer.py
+++ b/trainer.py
@ -153,16 +153,25 @@ class RubyTrainer:
                    continue
                rephrased = self.self_rephrase(raw)
-                final = rephrased if (
+
-                len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith(".")
+                score_raw = self.score_sentence(raw)
-            ) else raw
+                score_re = self.score_sentence(rephrased)
                final = rephrased if score_re >= score_raw else raw
            self.train_on_tokens_from_text(final)
            thoughts.append(final)
-            if self.is_coherent(final):
+            if self.is_reinforceable(final):
-                with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
+                if self.score_sentence(final) >= 3.0:
-                    f.write(final.strip() + "\n")
+                    self.train_on_tokens_from_text(final)
                    thoughts.append(final)
                    with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
                        f.write(final.strip() + "\n")
                else:
                    print(f"[SKIP] Sentence too weak to reinforce: {final}")
            else:
                print(f"[SKIP] Rejected malformed dream: {final}")
        with open(log_output, "a", encoding="utf-8") as f:
            for t in thoughts:
@ -194,17 +203,47 @@ class RubyTrainer:
        for line in core_memories:
            self.train_on_tokens_from_text(line)
-    def is_coherent(self, text: str) -> bool:
+    def is_reinforceable(self, text: str) -> bool:
        words = text.lower().split()
        if len(words) < 6:
            return False
        unique = set(words)
-
+        if not any(p in unique for p in ["i", "you", "they", "we", "it"]):
        if len(unique) < 5:
            return False
-        if not any(w in unique for w in ["i", "you", "they", "we", "it"]):
+        if not any(v in unique for v in ["am", "are", "is", "was", "want", "feel", "see", "learn", "made", "change", "dream", "understand"]):
            return False
-        if not any(w in unique for w in ["am", "are", "is", "was", "want", "feel", "know", "see", "learn", "change"]):
+        if not text.strip().endswith((".", "?")):
            return False
-        return text.strip().endswith(".")
+        word_counts = {w: words.count(w) for w in set(words)}
        if any(count >= 4 for count in word_counts.values()):
            return False
        return True
    def score_sentence(self, text: str) -> float:
        words = text.lower().split()
        if not words:
            return 0.0
        score = 0
        if len(words) >= 6:
            score += 1
        if text.strip().endswith((".", "?")):
            score += 1
        if any(w in words for w in ["i", "you", "they", "we", "it"]):
            score += 1
        if any(w in words for w in ["am", "are", "is", "was", "feel", "learn", "speak", "change", "remember"]):
            score += 1
        if len(set(words)) > len(words) * 0.75:
            score += 1  # diversity bonus
        return score  # max 5.0