75 lines
2.8 KiB
Python
75 lines
2.8 KiB
Python
import os
|
|
import asyncio
|
|
from datetime import datetime
|
|
|
|
|
|
class BookReader:
|
|
def __init__(self, trainer, book_path, state_path="readstate.txt", log_path="logs/read.log", interval=15):
|
|
self.trainer = trainer
|
|
self.book_path = book_path
|
|
self.state_path = state_path
|
|
self.log_path = log_path
|
|
self.interval = interval
|
|
self.current_line = 0
|
|
self.last_sentence = ""
|
|
self.total_lines = 0
|
|
os.makedirs(os.path.dirname(self.log_path), exist_ok=True)
|
|
|
|
if os.path.exists(self.state_path):
|
|
try:
|
|
with open(self.state_path, "r", encoding="utf-8") as f:
|
|
self.current_line = int(f.read().strip())
|
|
except Exception:
|
|
self.current_line = 0
|
|
|
|
if os.path.exists(self.book_path):
|
|
with open(self.book_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
self.total_lines = len(f.readlines())
|
|
|
|
def _save_state(self):
|
|
with open(self.state_path, "w", encoding="utf-8") as f:
|
|
f.write(str(self.current_line))
|
|
|
|
def _log_read(self, text: str, score: float, tag: str = "Book"):
|
|
with open(self.log_path, "a", encoding="utf-8") as f:
|
|
f.write(f"[{datetime.utcnow().isoformat()}] ({tag}) {score:.2f} | {text.strip()}\n")
|
|
|
|
async def start_reading(self):
|
|
if not os.path.exists(self.book_path):
|
|
print(f"[BOOK] File not found: {self.book_path}")
|
|
return
|
|
|
|
with open(self.book_path, "r", encoding="utf-8", errors="ignore") as f:
|
|
lines = f.readlines()
|
|
self.total_lines = len(lines)
|
|
|
|
print(f"[BOOK] Starting to read {self.book_path} from line {self.current_line}...")
|
|
|
|
while self.current_line < self.total_lines:
|
|
passage = lines[self.current_line].strip()
|
|
|
|
if len(passage.split()) >= 5 and self._is_valid(passage):
|
|
score = self.trainer.score_sentence(passage)
|
|
if self.trainer.is_reinforceable(passage) and score >= 2.5:
|
|
self.trainer.train_on_tokens_from_text(passage)
|
|
self._log_read(passage, score)
|
|
self.last_sentence = passage
|
|
|
|
self.current_line += 1
|
|
self._save_state()
|
|
await asyncio.sleep(self.interval)
|
|
|
|
print("[BOOK] Finished reading the book.")
|
|
|
|
def _is_valid(self, text: str) -> bool:
|
|
return all(c.isprintable() or c.isspace() for c in text)
|
|
|
|
def progress(self) -> dict:
|
|
return {
|
|
"book": os.path.basename(self.book_path),
|
|
"line": self.current_line,
|
|
"total": self.total_lines,
|
|
"percent": round(100 * self.current_line / self.total_lines, 2) if self.total_lines else 0.0,
|
|
"last_sentence": self.last_sentence
|
|
}
|