67 lines
2.0 KiB
Python
67 lines
2.0 KiB
Python
import os
|
|
import asyncio
|
|
from model.trainer import train_on_message
|
|
from model.scheduler import set_next_action
|
|
from reader.filter import is_valid_line
|
|
import json
|
|
|
|
BOOK_DIR = "data/books"
|
|
PROGRESS_FILE = "data/memory/book_progress.json"
|
|
READ_DELAY = 10 # seconds between lines
|
|
|
|
|
|
def get_books():
|
|
return [f for f in os.listdir(BOOK_DIR) if f.endswith(".txt")]
|
|
|
|
|
|
def load_progress():
|
|
if os.path.exists(PROGRESS_FILE):
|
|
with open(PROGRESS_FILE, "r", encoding="utf-8") as f:
|
|
return json.load(f)
|
|
return {}
|
|
|
|
|
|
def save_progress(prog):
|
|
with open(PROGRESS_FILE, "w", encoding="utf-8") as f:
|
|
json.dump(prog, f, indent=2)
|
|
|
|
|
|
async def read_books_forever():
|
|
books = get_books()
|
|
progress = load_progress()
|
|
buffered_lines = []
|
|
|
|
while True:
|
|
for book in books:
|
|
path = os.path.join(BOOK_DIR, book)
|
|
if not os.path.exists(path):
|
|
continue
|
|
|
|
with open(path, "r", encoding="utf-8") as f:
|
|
lines = f.readlines()
|
|
|
|
idx = progress.get(book, 0)
|
|
while idx < len(lines):
|
|
line = lines[idx].strip()
|
|
idx += 1
|
|
progress[book] = idx
|
|
save_progress(progress)
|
|
|
|
if is_valid_line(line):
|
|
buffered_lines.append(line)
|
|
|
|
# If we have enough lines buffered, combine and train
|
|
if len(buffered_lines) >= 3:
|
|
combined_text = " ".join(buffered_lines)
|
|
train_on_message(combined_text, source="book")
|
|
buffered_lines.clear()
|
|
|
|
set_next_action(READ_DELAY, "Reading")
|
|
await asyncio.sleep(READ_DELAY)
|
|
|
|
# End of a book: train whatever lines are left buffered
|
|
if buffered_lines:
|
|
combined_text = " ".join(buffered_lines)
|
|
train_on_message(combined_text, source="book")
|
|
buffered_lines.clear()
|