101 lines
3.3 KiB
Python
101 lines
3.3 KiB
Python
import os
|
|
import asyncio
|
|
import json
|
|
from ego.tokenizer import save_vocab, Tokenizer
|
|
from brain.brainmap import save_brainmap
|
|
from ego.trainer import train_on_message
|
|
from utils.scheduler import set_next_action
|
|
from reader.filter import is_valid_line
|
|
|
|
BOOK_DIR = "content/books"
|
|
PROGRESS_FILE = "memory/book_progress.json"
|
|
READ_DELAY = 0.2 # seconds between paragraphs
|
|
PARAGRAPH_MIN_LENGTH = 20
|
|
END_PUNCTUATION = {".", "!", "?"}
|
|
tokenizer = Tokenizer()
|
|
|
|
|
|
def get_books():
|
|
return [f for f in os.listdir(BOOK_DIR) if f.endswith(".txt")]
|
|
|
|
|
|
def load_progress():
|
|
if os.path.exists(PROGRESS_FILE):
|
|
try:
|
|
with open(PROGRESS_FILE, "r", encoding="utf-8") as f:
|
|
data = f.read().strip()
|
|
if not data:
|
|
return {"progress": {}, "completed": []}
|
|
return json.loads(data)
|
|
except Exception as e:
|
|
print(f"[Reader] Failed to load progress file: {e}")
|
|
return {"progress": {}, "completed": []}
|
|
return {"progress": {}, "completed": []}
|
|
|
|
|
|
def save_progress(prog):
|
|
with open(PROGRESS_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(prog, f, indent=2)
|
|
|
|
|
|
async def read_books_forever():
|
|
books = get_books()
|
|
progress_data = load_progress()
|
|
progress = progress_data.get("progress", {})
|
|
completed_books = progress_data.get("completed", [])
|
|
|
|
while True:
|
|
available_books = [b for b in books if b not in completed_books]
|
|
|
|
if not available_books:
|
|
print("[Reader] All books completed. Resetting progress.")
|
|
progress_data = {"progress": {}, "completed": []}
|
|
save_progress(progress_data)
|
|
available_books = books
|
|
progress = {}
|
|
completed_books = []
|
|
|
|
for book in available_books:
|
|
path = os.path.join(BOOK_DIR, book)
|
|
if not os.path.exists(path):
|
|
continue
|
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
|
|
idx = progress.get(book, 0)
|
|
paragraph = ""
|
|
|
|
while idx < len(lines):
|
|
line = lines[idx].strip()
|
|
idx += 1
|
|
|
|
if not line:
|
|
continue # Ignore blank lines
|
|
|
|
paragraph += " " + line
|
|
|
|
if line[-1] in END_PUNCTUATION and len(paragraph) > PARAGRAPH_MIN_LENGTH:
|
|
await train_on_message(paragraph.strip(), source="book")
|
|
paragraph = ""
|
|
await asyncio.sleep(READ_DELAY)
|
|
set_next_action(READ_DELAY, "Reading")
|
|
|
|
progress[book] = idx
|
|
progress_data["progress"] = progress
|
|
save_progress(progress_data)
|
|
|
|
if paragraph.strip():
|
|
if len(paragraph) > PARAGRAPH_MIN_LENGTH:
|
|
await train_on_message(paragraph.strip(), source="book")
|
|
await asyncio.sleep(READ_DELAY)
|
|
set_next_action(READ_DELAY, "Reading")
|
|
|
|
print(f"[Reader] Finished reading {book}. Taking a break to dream...")
|
|
save_vocab(tokenizer.vocab)
|
|
save_brainmap()
|
|
await asyncio.sleep(120) # 💤 2 minute nap after each book
|
|
completed_books.append(book)
|
|
progress_data["completed"] = list(set(completed_books))
|
|
save_progress(progress_data)
|