From 26fbf85a90a3d973b45c81b61d4b78581d2d7c80 Mon Sep 17 00:00:00 2001 From: Dani Date: Sun, 27 Apr 2025 19:59:03 -0400 Subject: [PATCH] Updated context and increased her brain capacity --- model/brain_architecture.py | 4 ++-- model/trainer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/model/brain_architecture.py b/model/brain_architecture.py index 974e4cd..e600d3f 100644 --- a/model/brain_architecture.py +++ b/model/brain_architecture.py @@ -45,10 +45,10 @@ class TransformerBlock(nn.Module): class TinyTransformer(nn.Module): - def __init__(self, vocab_size, embed_dim=256, depth=4, heads=8): + def __init__(self, vocab_size, embed_dim=512, depth=4, heads=16): super().__init__() self.token_embed = nn.Embedding(vocab_size, embed_dim) - self.pos_embed = nn.Parameter(torch.randn(1, 128, embed_dim)) + self.pos_embed = nn.Parameter(torch.randn(1, 256, embed_dim)) self.blocks = nn.Sequential(*[TransformerBlock(embed_dim, heads) for _ in range(depth)]) self.norm = nn.LayerNorm(embed_dim) self.head = nn.Linear(embed_dim, vocab_size) diff --git a/model/trainer.py b/model/trainer.py index 48d45dc..cc70982 100644 --- a/model/trainer.py +++ b/model/trainer.py @@ -35,7 +35,7 @@ def train_on_message(text: str, source: str = "user"): try: model.train() - context_texts = get_recent_context(10) + context_texts = get_recent_context(30) augmented_text = " " + " ".join(context_texts + [text]) + " " tokens = tokenizer.tokenize(augmented_text)