diff --git a/.gitignore b/.gitignore index 637313c..5d73656 100644 --- a/.gitignore +++ b/.gitignore @@ -170,9 +170,5 @@ cython_debug/ .vscode/launch.json /data/books/* -/data/memory/context.json -/data/memory/dreams.json -data/memory/brainmap.json -/data/memory/vocab.json -data/memory/book_progress.json -/data/memory/journal.json \ No newline at end of file +/data/memory/* +/data/logs/* \ No newline at end of file diff --git a/model/dreamer.py b/model/dreamer.py index cfc7bf2..1db8236 100644 --- a/model/dreamer.py +++ b/model/dreamer.py @@ -13,14 +13,20 @@ recent_dreams = [] def daydream(): model.eval() - seed = torch.tensor([random.randint(0, tokenizer.next_id - 1)], device=DEVICE).unsqueeze(0) + max_token_id = model.head.out_features - 1 + seed = torch.randint(0, max_token_id + 1, (1, 1), device=DEVICE) dream = [] + max_token_id = model.head.out_features - 1 for _ in range(12): out = model(seed) logits = out[:, -1, :] probs = F.softmax(logits, dim=-1) token = torch.multinomial(probs, num_samples=1) + + # CLAMP the token + token = torch.clamp(token, max=max_token_id) + dream.append(token.item()) seed = torch.cat([seed, token], dim=1) diff --git a/model/trainer.py b/model/trainer.py index d525642..b8fb0b6 100644 --- a/model/trainer.py +++ b/model/trainer.py @@ -47,7 +47,9 @@ def train_on_message(text: str, source: str = "user"): # Clamp any token IDs beyond the model's output size max_token_id = model.head.out_features - 1 - tokens = [min(t, max_token_id) for t in tokens] + if tokenizer.next_id > model.head.out_features: + expand_model_if_needed() + tokens = [t if t <= max_token_id else max_token_id for t in tokens] tokens = tokens[:128] # Hard clamp input length if len(tokens) < 2: