Started Phase 4
This commit is contained in:
parent
2cf713ca97
commit
67c9e63fa3
2
.gitignore
vendored
2
.gitignore
vendored
@ -169,3 +169,5 @@ cython_debug/
|
|||||||
#.idea/
|
#.idea/
|
||||||
|
|
||||||
.vscode/launch.json
|
.vscode/launch.json
|
||||||
|
/data/books/alice_in_wonderland.txt
|
||||||
|
/data/books/wizard_of_oz.txt
|
3
main.py
3
main.py
@ -35,5 +35,8 @@ async def on_message(message):
|
|||||||
# Launch Flask in background
|
# Launch Flask in background
|
||||||
threading.Thread(target=run_dashboard, daemon=True).start()
|
threading.Thread(target=run_dashboard, daemon=True).start()
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
loop.create_task(read_books_forever()) # Book reader task
|
||||||
|
|
||||||
# Launch Discord bot (blocking)
|
# Launch Discord bot (blocking)
|
||||||
client.run(TOKEN)
|
client.run(TOKEN)
|
||||||
|
@ -55,7 +55,7 @@ def daydream():
|
|||||||
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0)
|
seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0)
|
||||||
dream = []
|
dream = []
|
||||||
|
|
||||||
for _ in range(12): # generate 12-word thought
|
for _ in range(12):
|
||||||
out = model(seed)
|
out = model(seed)
|
||||||
logits = out[:, -1, :]
|
logits = out[:, -1, :]
|
||||||
probs = F.softmax(logits, dim=-1)
|
probs = F.softmax(logits, dim=-1)
|
||||||
@ -68,6 +68,7 @@ def daydream():
|
|||||||
|
|
||||||
if score > 0.3:
|
if score > 0.3:
|
||||||
save_dream(sentence, score)
|
save_dream(sentence, score)
|
||||||
|
train_on_message(sentence)
|
||||||
recent_dreams.append((score, sentence))
|
recent_dreams.append((score, sentence))
|
||||||
if len(recent_dreams) > 10:
|
if len(recent_dreams) > 10:
|
||||||
recent_dreams.pop(0)
|
recent_dreams.pop(0)
|
||||||
|
@ -0,0 +1,12 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def is_valid_line(text: str) -> bool:
|
||||||
|
text = text.strip()
|
||||||
|
if len(text) < 10:
|
||||||
|
return False
|
||||||
|
if not re.search(r"[a-zA-Z]", text):
|
||||||
|
return False
|
||||||
|
if any(c in text for c in ["<EFBFBD>", "\ufffd"]):
|
||||||
|
return False
|
||||||
|
return True
|
@ -0,0 +1,48 @@
|
|||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
from model.train import train_on_message
|
||||||
|
from reader.filter import is_valid_line
|
||||||
|
|
||||||
|
BOOK_DIR = "data/books"
|
||||||
|
PROGRESS_FILE = "data/memory/book_progress.json"
|
||||||
|
READ_DELAY = 10 # seconds between lines
|
||||||
|
|
||||||
|
|
||||||
|
def get_books():
|
||||||
|
return [f for f in os.listdir(BOOK_DIR) if f.endswith(".txt")]
|
||||||
|
|
||||||
|
|
||||||
|
def load_progress():
|
||||||
|
if os.path.exists(PROGRESS_FILE):
|
||||||
|
with open(PROGRESS_FILE, "r", encoding="utf-8") as f:
|
||||||
|
return json.load(f)
|
||||||
|
return {}
|
||||||
|
|
||||||
|
|
||||||
|
def save_progress(prog):
|
||||||
|
with open(PROGRESS_FILE, "w", encoding="utf-8") as f:
|
||||||
|
json.dump(prog, f, indent=2)
|
||||||
|
|
||||||
|
|
||||||
|
async def read_books_forever():
|
||||||
|
books = get_books()
|
||||||
|
progress = load_progress()
|
||||||
|
while True:
|
||||||
|
for book in books:
|
||||||
|
path = os.path.join(BOOK_DIR, book)
|
||||||
|
if not os.path.exists(path):
|
||||||
|
continue
|
||||||
|
|
||||||
|
with open(path, "r", encoding="utf-8") as f:
|
||||||
|
lines = f.readlines()
|
||||||
|
|
||||||
|
idx = progress.get(book, 0)
|
||||||
|
while idx < len(lines):
|
||||||
|
line = lines[idx].strip()
|
||||||
|
idx += 1
|
||||||
|
progress[book] = idx
|
||||||
|
save_progress(progress)
|
||||||
|
|
||||||
|
if is_valid_line(line):
|
||||||
|
train_on_message(line)
|
||||||
|
await asyncio.sleep(READ_DELAY)
|
Loading…
x
Reference in New Issue
Block a user