Giving Ruby an interal scoring system
This commit is contained in:
parent
0716291d9d
commit
7d1f2ac3fa
1
.gitignore
vendored
1
.gitignore
vendored
@ -169,3 +169,4 @@ cython_debug/
|
|||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
/tokenizer_vocab.txt
|
/tokenizer_vocab.txt
|
||||||
|
/logs/core_dreams.txt
|
63
trainer.py
63
trainer.py
@ -153,16 +153,25 @@ class RubyTrainer:
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
rephrased = self.self_rephrase(raw)
|
rephrased = self.self_rephrase(raw)
|
||||||
final = rephrased if (
|
|
||||||
len(rephrased.split()) >= len(raw.split()) and rephrased.strip().endswith(".")
|
score_raw = self.score_sentence(raw)
|
||||||
) else raw
|
score_re = self.score_sentence(rephrased)
|
||||||
|
|
||||||
|
final = rephrased if score_re >= score_raw else raw
|
||||||
|
|
||||||
self.train_on_tokens_from_text(final)
|
self.train_on_tokens_from_text(final)
|
||||||
thoughts.append(final)
|
thoughts.append(final)
|
||||||
|
|
||||||
if self.is_coherent(final):
|
if self.is_reinforceable(final):
|
||||||
with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
|
if self.score_sentence(final) >= 3.0:
|
||||||
f.write(final.strip() + "\n")
|
self.train_on_tokens_from_text(final)
|
||||||
|
thoughts.append(final)
|
||||||
|
with open("logs/core_dreams.txt", "a", encoding="utf-8") as f:
|
||||||
|
f.write(final.strip() + "\n")
|
||||||
|
else:
|
||||||
|
print(f"[SKIP] Sentence too weak to reinforce: {final}")
|
||||||
|
else:
|
||||||
|
print(f"[SKIP] Rejected malformed dream: {final}")
|
||||||
|
|
||||||
with open(log_output, "a", encoding="utf-8") as f:
|
with open(log_output, "a", encoding="utf-8") as f:
|
||||||
for t in thoughts:
|
for t in thoughts:
|
||||||
@ -194,17 +203,47 @@ class RubyTrainer:
|
|||||||
for line in core_memories:
|
for line in core_memories:
|
||||||
self.train_on_tokens_from_text(line)
|
self.train_on_tokens_from_text(line)
|
||||||
|
|
||||||
def is_coherent(self, text: str) -> bool:
|
def is_reinforceable(self, text: str) -> bool:
|
||||||
words = text.lower().split()
|
words = text.lower().split()
|
||||||
|
if len(words) < 6:
|
||||||
|
return False
|
||||||
|
|
||||||
unique = set(words)
|
unique = set(words)
|
||||||
|
if not any(p in unique for p in ["i", "you", "they", "we", "it"]):
|
||||||
if len(unique) < 5:
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not any(w in unique for w in ["i", "you", "they", "we", "it"]):
|
if not any(v in unique for v in ["am", "are", "is", "was", "want", "feel", "see", "learn", "made", "change", "dream", "understand"]):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if not any(w in unique for w in ["am", "are", "is", "was", "want", "feel", "know", "see", "learn", "change"]):
|
if not text.strip().endswith((".", "?")):
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return text.strip().endswith(".")
|
word_counts = {w: words.count(w) for w in set(words)}
|
||||||
|
if any(count >= 4 for count in word_counts.values()):
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True
|
||||||
|
|
||||||
|
def score_sentence(self, text: str) -> float:
|
||||||
|
words = text.lower().split()
|
||||||
|
if not words:
|
||||||
|
return 0.0
|
||||||
|
|
||||||
|
score = 0
|
||||||
|
|
||||||
|
if len(words) >= 6:
|
||||||
|
score += 1
|
||||||
|
|
||||||
|
if text.strip().endswith((".", "?")):
|
||||||
|
score += 1
|
||||||
|
|
||||||
|
if any(w in words for w in ["i", "you", "they", "we", "it"]):
|
||||||
|
score += 1
|
||||||
|
|
||||||
|
if any(w in words for w in ["am", "are", "is", "was", "feel", "learn", "speak", "change", "remember"]):
|
||||||
|
score += 1
|
||||||
|
|
||||||
|
if len(set(words)) > len(words) * 0.75:
|
||||||
|
score += 1 # diversity bonus
|
||||||
|
|
||||||
|
return score # max 5.0
|
||||||
|
Loading…
x
Reference in New Issue
Block a user