import os import asyncio from model.trainer import train_on_message from model.scheduler import set_next_action from reader.filter import is_valid_line from utils.unicleaner import clean_unicode import json BOOK_DIR = "data/books" PROGRESS_FILE = "data/memory/book_progress.json" READ_DELAY = 0.2 # seconds between lines PARAGRAPH_MIN_LENGTH = 20 def get_books(): return [f for f in os.listdir(BOOK_DIR) if f.endswith(".txt")] def load_progress(): if os.path.exists(PROGRESS_FILE): with open(PROGRESS_FILE, "r", encoding="utf-8") as f: return json.load(f) return {} def save_progress(prog): with open(PROGRESS_FILE, "w", encoding="utf-8") as f: json.dump(prog, f, indent=2) async def read_books_forever(): books = get_books() progress = load_progress() while True: for book in books: path = os.path.join(BOOK_DIR, book) if not os.path.exists(path): continue with open(path, "r", encoding="utf-8") as f: lines = f.readlines() idx = progress.get(book, 0) paragraph = "" while idx < len(lines): line = lines[idx].strip() idx += 1 if not line: if len(paragraph) > PARAGRAPH_MIN_LENGTH: cleaned_paragraph = clean_unicode(paragraph.strip()) train_on_message(cleaned_paragraph, source="book") paragraph = "" await asyncio.sleep(READ_DELAY) set_next_action(READ_DELAY, "Reading") else: paragraph += " " + line progress[book] = idx save_progress(progress) # train last paragraph if any if paragraph and len(paragraph) > PARAGRAPH_MIN_LENGTH: cleaned_paragraph = clean_unicode(paragraph.strip()) train_on_message(cleaned_paragraph, source="book") await asyncio.sleep(READ_DELAY) set_next_action(READ_DELAY, "Reading")