import os import asyncio import json from ego.tokenizer import save_vocab, Tokenizer from brain.brainmap import save_brainmap from ego.trainer import train_on_message from utils.scheduler import set_next_action from reader.filter import is_valid_line BOOK_DIR = "data/books" PROGRESS_FILE = "data/memory/book_progress.json" READ_DELAY = 0.2 # seconds between paragraphs PARAGRAPH_MIN_LENGTH = 20 END_PUNCTUATION = {".", "!", "?"} tokenizer = Tokenizer() def get_books(): return [f for f in os.listdir(BOOK_DIR) if f.endswith(".txt")] def load_progress(): if os.path.exists(PROGRESS_FILE): try: with open(PROGRESS_FILE, "r", encoding="utf-8") as f: data = f.read().strip() if not data: return {"progress": {}, "completed": []} return json.loads(data) except Exception as e: print(f"[Reader] Failed to load progress file: {e}") return {"progress": {}, "completed": []} return {"progress": {}, "completed": []} def save_progress(prog): with open(PROGRESS_FILE, "w", encoding="utf-8") as f: json.dump(prog, f, indent=2) async def read_books_forever(): books = get_books() progress_data = load_progress() progress = progress_data.get("progress", {}) completed_books = progress_data.get("completed", []) while True: available_books = [b for b in books if b not in completed_books] if not available_books: print("[Reader] All books completed. Resetting progress.") progress_data = {"progress": {}, "completed": []} save_progress(progress_data) available_books = books progress = {} completed_books = [] for book in available_books: path = os.path.join(BOOK_DIR, book) if not os.path.exists(path): continue with open(path, "r", encoding="utf-8") as f: lines = f.readlines() idx = progress.get(book, 0) paragraph = "" while idx < len(lines): line = lines[idx].strip() idx += 1 if not line: continue # Ignore blank lines paragraph += " " + line if line[-1] in END_PUNCTUATION and len(paragraph) > PARAGRAPH_MIN_LENGTH: await train_on_message(paragraph.strip(), source="book") paragraph = "" await asyncio.sleep(READ_DELAY) set_next_action(READ_DELAY, "Reading") progress[book] = idx progress_data["progress"] = progress save_progress(progress_data) if paragraph.strip(): if len(paragraph) > PARAGRAPH_MIN_LENGTH: await train_on_message(paragraph.strip(), source="book") await asyncio.sleep(READ_DELAY) set_next_action(READ_DELAY, "Reading") print(f"[Reader] Finished reading {book}. Taking a break to dream...") save_vocab(tokenizer.vocab) save_brainmap() await asyncio.sleep(120) # 💤 2 minute nap after each book completed_books.append(book) progress_data["completed"] = list(set(completed_books)) save_progress(progress_data)