Ruby/reader/reader.py

66 lines
1.9 KiB
Python

import os
import asyncio
from model.trainer import train_on_message
from model.scheduler import set_next_action
from reader.filter import is_valid_line
import json
BOOK_DIR = "data/books"
PROGRESS_FILE = "data/memory/book_progress.json"
READ_DELAY = 0.2 # seconds between lines
PARAGRAPH_MIN_LENGTH = 20
def get_books():
return [f for f in os.listdir(BOOK_DIR) if f.endswith(".txt")]
def load_progress():
if os.path.exists(PROGRESS_FILE):
with open(PROGRESS_FILE, "r", encoding="utf-8") as f:
return json.load(f)
return {}
def save_progress(prog):
with open(PROGRESS_FILE, "w", encoding="utf-8") as f:
json.dump(prog, f, indent=2)
async def read_books_forever():
books = get_books()
progress = load_progress()
while True:
for book in books:
path = os.path.join(BOOK_DIR, book)
if not os.path.exists(path):
continue
with open(path, "r", encoding="utf-8") as f:
lines = f.readlines()
idx = progress.get(book, 0)
paragraph = ""
while idx < len(lines):
line = lines[idx].strip()
idx += 1
if not line:
if len(paragraph) > PARAGRAPH_MIN_LENGTH:
train_on_message(paragraph.strip(), source="book")
paragraph = ""
await asyncio.sleep(READ_DELAY)
set_next_action(READ_DELAY, "Reading")
else:
paragraph += " " + line
progress[book] = idx
save_progress(progress)
# train last paragraph if any
if paragraph and len(paragraph) > PARAGRAPH_MIN_LENGTH:
train_on_message(paragraph.strip(), source="book")
await asyncio.sleep(READ_DELAY)
set_next_action(READ_DELAY, "Reading")