import os import asyncio from datetime import datetime class BookReader: def __init__(self, trainer, book_path, state_path="readstate.txt", log_path="logs/read.log", interval=15): self.trainer = trainer self.book_path = book_path self.state_path = state_path self.log_path = log_path self.interval = interval self.current_line = 0 self.last_sentence = "" self.total_lines = 0 os.makedirs(os.path.dirname(self.log_path), exist_ok=True) if os.path.exists(self.state_path): try: with open(self.state_path, "r", encoding="utf-8") as f: self.current_line = int(f.read().strip()) except Exception: self.current_line = 0 if os.path.exists(self.book_path): with open(self.book_path, "r", encoding="utf-8", errors="ignore") as f: self.total_lines = len(f.readlines()) def _save_state(self): with open(self.state_path, "w", encoding="utf-8") as f: f.write(str(self.current_line)) def _log_read(self, text: str, score: float, tag: str = "Book"): with open(self.log_path, "a", encoding="utf-8") as f: f.write(f"[{datetime.utcnow().isoformat()}] ({tag}) {score:.2f} | {text.strip()}\n") async def start_reading(self): if not os.path.exists(self.book_path): print(f"[BOOK] File not found: {self.book_path}") return with open(self.book_path, "r", encoding="utf-8", errors="ignore") as f: lines = f.readlines() self.total_lines = len(lines) print(f"[BOOK] Starting to read {self.book_path} from line {self.current_line}...") while self.current_line < self.total_lines: passage = lines[self.current_line].strip() if len(passage.split()) >= 5 and self._is_valid(passage): score = self.trainer.score_sentence(passage) if self.trainer.is_reinforceable(passage) and score >= 2.5: self.trainer.train_on_tokens_from_text(passage) self._log_read(passage, score) self.last_sentence = passage self.current_line += 1 self._save_state() await asyncio.sleep(self.interval) print("[BOOK] Finished reading the book.") def _is_valid(self, text: str) -> bool: return all(c.isprintable() or c.isspace() for c in text) def progress(self) -> dict: return { "book": os.path.basename(self.book_path), "line": self.current_line, "total": self.total_lines, "percent": round(100 * self.current_line / self.total_lines, 2) if self.total_lines else 0.0, "last_sentence": self.last_sentence }