From 7d1f2ac3faa891c6ff0b3f80f2b83e9d0eb748fa Mon Sep 17 00:00:00 2001 From: Dani Date: Wed, 16 Apr 2025 11:20:36 -0400 Subject: [PATCH] Giving Ruby an interal scoring system --- .gitignore | 3 ++- trainer.py | 63 +++++++++++++++++++++++++++++++++++++++++++----------- 2 files changed, 53 insertions(+), 13 deletions(-) diff --git a/.gitignore b/.gitignore index b95683b..52752b6 100644 --- a/.gitignore +++ b/.gitignore @@ -168,4 +168,5 @@ cython_debug/ # option (not recommended) you can uncomment the following to ignore the entire idea folder. #.idea/ -/tokenizer_vocab.txt \ No newline at end of file +/tokenizer_vocab.txt +/logs/core_dreams.txt \ No newline at end of file diff --git a/trainer.py b/trainer.py index 3dae0e9..d1c5135 100644 --- a/trainer.py +++ b/trainer.py @@ -153,16 +153,25 @@ class RubyTrainer: continue rephrased = self.self_rephrase(raw) - final = rephrased if ( - len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith(".") - ) else raw + + score_raw = self.score_sentence(raw) + score_re = self.score_sentence(rephrased) + + final = rephrased if score_re >= score_raw else raw self.train_on_tokens_from_text(final) thoughts.append(final) - if self.is_coherent(final): - with open("logs/core_dreams.txt", "a", encoding="utf-8") as f: - f.write(final.strip() + "\n") + if self.is_reinforceable(final): + if self.score_sentence(final) >= 3.0: + self.train_on_tokens_from_text(final) + thoughts.append(final) + with open("logs/core_dreams.txt", "a", encoding="utf-8") as f: + f.write(final.strip() + "\n") + else: + print(f"[SKIP] Sentence too weak to reinforce: {final}") + else: + print(f"[SKIP] Rejected malformed dream: {final}") with open(log_output, "a", encoding="utf-8") as f: for t in thoughts: @@ -194,17 +203,47 @@ class RubyTrainer: for line in core_memories: self.train_on_tokens_from_text(line) - def is_coherent(self, text: str) -> bool: + def is_reinforceable(self, text: str) -> bool: words = text.lower().split() + if len(words) < 6: + return False + unique = set(words) - - if len(unique) < 5: + if not any(p in unique for p in ["i", "you", "they", "we", "it"]): return False - if not any(w in unique for w in ["i", "you", "they", "we", "it"]): + if not any(v in unique for v in ["am", "are", "is", "was", "want", "feel", "see", "learn", "made", "change", "dream", "understand"]): return False - if not any(w in unique for w in ["am", "are", "is", "was", "want", "feel", "know", "see", "learn", "change"]): + if not text.strip().endswith((".", "?")): return False - return text.strip().endswith(".") + word_counts = {w: words.count(w) for w in set(words)} + if any(count >= 4 for count in word_counts.values()): + return False + + return True + + def score_sentence(self, text: str) -> float: + words = text.lower().split() + if not words: + return 0.0 + + score = 0 + + if len(words) >= 6: + score += 1 + + if text.strip().endswith((".", "?")): + score += 1 + + if any(w in words for w in ["i", "you", "they", "we", "it"]): + score += 1 + + if any(w in words for w in ["am", "are", "is", "was", "feel", "learn", "speak", "change", "remember"]): + score += 1 + + if len(set(words)) > len(words) * 0.75: + score += 1 # diversity bonus + + return score # max 5.0