From 0716291d9d875d4d8064159fdf7b8762592967ea Mon Sep 17 00:00:00 2001 From: Dani Date: Tue, 15 Apr 2025 21:17:31 -0400 Subject: [PATCH] Added coherence fix --- trainer.py | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) diff --git a/trainer.py b/trainer.py index 967ff67..3dae0e9 100644 --- a/trainer.py +++ b/trainer.py @@ -114,7 +114,7 @@ class RubyTrainer: new_tokens = input_ids.squeeze(0).tolist()[1:] return self.tokenizer.detokenize([t for t in new_tokens if t != self.tokenizer.vocab[""]]) - + def dream(self, log_path="logs/messages.log", max_lines=50): print("[DREAM] Ruby is dreaming...") @@ -147,16 +147,23 @@ class RubyTrainer: if not raw or len(raw.strip().split()) < 4: continue + for _ in range(rounds): + raw = self.generate_reply() + if not raw or len(raw.strip().split()) < 4: + continue - rephrased = self.self_rephrase(raw) - if len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith("."): - final = rephrased - else: - final = raw + rephrased = self.self_rephrase(raw) + final = rephrased if ( + len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith(".") + ) else raw self.train_on_tokens_from_text(final) thoughts.append(final) + if self.is_coherent(final): + with open("logs/core_dreams.txt", "a", encoding="utf-8") as f: + f.write(final.strip() + "\n") + with open(log_output, "a", encoding="utf-8") as f: for t in thoughts: f.write(f"[DREAM] {t}\n") @@ -186,3 +193,18 @@ class RubyTrainer: for line in core_memories: self.train_on_tokens_from_text(line) + + def is_coherent(self, text: str) -> bool: + words = text.lower().split() + unique = set(words) + + if len(unique) < 5: + return False + + if not any(w in unique for w in ["i", "you", "they", "we", "it"]): + return False + + if not any(w in unique for w in ["am", "are", "is", "was", "want", "feel", "know", "see", "learn", "change"]): + return False + + return text.strip().endswith(".")