diff --git a/.env.example b/.env.example
new file mode 100644
index 0000000..0529d09
--- /dev/null
+++ b/.env.example
@@ -0,0 +1,15 @@
+# Discord Bot Configuration
+DISCORD_TOKEN=your_discord_token_here
+DISCORD_BOT_PREFIX=!
+
+# Training Configuration
+WANDB_API_KEY=your_wandb_key_here
+WANDB_PROJECT=lyra-training
+
+# Model Configuration
+MODEL_SIZE=125M
+CONTEXT_LENGTH=1024
+BATCH_SIZE=8
+
+# Database
+DATABASE_PATH=lyra.db
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index e15106e..2bf2658 100644
--- a/.gitignore
+++ b/.gitignore
@@ -214,3 +214,32 @@ __marimo__/
 
 # Streamlit
 .streamlit/secrets.toml
+
+# Lyra Project Specific
+# Data files
+data/raw/
+data/processed/
+data/discord/
+
+# Model files
+models/checkpoints/
+models/active/
+*.pt
+*.pth
+*.safetensors
+*.bin
+*.ckpt
+
+# Database
+*.db
+*.db-journal
+*.db-wal
+*.db-shm
+
+# Logs
+logs/
+wandb/
+tensorboard/
+
+# Discord bot token
+.env
diff --git a/configs/bot.yaml b/configs/bot.yaml
new file mode 100644
index 0000000..8aedd94
--- /dev/null
+++ b/configs/bot.yaml
@@ -0,0 +1,50 @@
+# Discord Bot Configuration for Lyra
+
+bot:
+  # Bot identity
+  name: "Lyra"
+  description: "AI-powered Discord chatbot with self-evolving personality"
+
+  # Discord settings
+  command_prefix: "!"
+  intents:
+    - guilds
+    - guild_messages
+    - dm_messages
+    - message_content
+
+  # Response behavior
+  respond_to_all: true           # Respond to all messages in channels she has access to
+  respond_to_mentions: true      # Always respond when mentioned
+  respond_to_dms: true          # Respond to DMs
+  ignore_bots: true             # Don't respond to other bots
+  ignore_self: true             # Don't respond to own messages
+
+  # Generation parameters
+  generation:
+    max_length: 150             # Max tokens per response
+    temperature: 0.9            # Higher = more creative
+    top_p: 0.92                # Nucleus sampling
+    top_k: 50                  # Top-k sampling
+    repetition_penalty: 1.1    # Penalize repetition
+    no_repeat_ngram_size: 3    # Don't repeat 3-grams
+
+  # Context management
+  context:
+    max_history: 10            # Messages to include as context
+    max_context_tokens: 512    # Max tokens from history
+
+  # Rate limiting
+  rate_limit:
+    enabled: true
+    max_responses_per_minute: 10
+    cooldown_seconds: 2
+
+  # Memory
+  memory:
+    short_term_size: 50        # Last N messages in memory
+    use_long_term: true        # Use database for long-term memory
+
+  # Model
+  model_path: "models/active/lyra_latest.pt"
+  tokenizer_path: "models/tokenizer/"
\ No newline at end of file
diff --git a/configs/model_125M.yaml b/configs/model_125M.yaml
new file mode 100644
index 0000000..3b63205
--- /dev/null
+++ b/configs/model_125M.yaml
@@ -0,0 +1,24 @@
+# Lyra 125M Model Configuration
+# GPT-style decoder-only transformer
+
+model:
+  name: "lyra-125M"
+  architecture: "gpt"
+
+  # Model dimensions
+  vocab_size: 50257  # Will be updated after tokenizer training
+  n_positions: 1024  # Context window
+  n_embd: 768       # Embedding dimension
+  n_layer: 12       # Number of transformer layers
+  n_head: 12        # Number of attention heads
+  n_inner: 3072     # FFN inner dimension (4 * n_embd)
+
+  # Regularization
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+
+  # Activation
+  activation: "gelu"
+
+  # Total parameters: ~125M
\ No newline at end of file
diff --git a/configs/model_1B.yaml b/configs/model_1B.yaml
new file mode 100644
index 0000000..95461c2
--- /dev/null
+++ b/configs/model_1B.yaml
@@ -0,0 +1,24 @@
+# Lyra 1B Model Configuration
+# GPT-style decoder-only transformer
+
+model:
+  name: "lyra-1B"
+  architecture: "gpt"
+
+  # Model dimensions
+  vocab_size: 50257
+  n_positions: 2048  # Context window
+  n_embd: 2048      # Embedding dimension
+  n_layer: 24       # Number of transformer layers
+  n_head: 16        # Number of attention heads
+  n_inner: 8192     # FFN inner dimension (4 * n_embd)
+
+  # Regularization
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+
+  # Activation
+  activation: "gelu"
+
+  # Total parameters: ~1B
\ No newline at end of file
diff --git a/configs/model_250M.yaml b/configs/model_250M.yaml
new file mode 100644
index 0000000..bdc7664
--- /dev/null
+++ b/configs/model_250M.yaml
@@ -0,0 +1,24 @@
+# Lyra 250M Model Configuration
+# GPT-style decoder-only transformer
+
+model:
+  name: "lyra-250M"
+  architecture: "gpt"
+
+  # Model dimensions
+  vocab_size: 50257
+  n_positions: 2048  # Larger context window
+  n_embd: 1024      # Embedding dimension
+  n_layer: 16       # Number of transformer layers
+  n_head: 16        # Number of attention heads
+  n_inner: 4096     # FFN inner dimension (4 * n_embd)
+
+  # Regularization
+  embd_pdrop: 0.1
+  resid_pdrop: 0.1
+  attn_pdrop: 0.1
+
+  # Activation
+  activation: "gelu"
+
+  # Total parameters: ~250M
\ No newline at end of file
diff --git a/configs/personality.yaml b/configs/personality.yaml
new file mode 100644
index 0000000..c0f8652
--- /dev/null
+++ b/configs/personality.yaml
@@ -0,0 +1,65 @@
+# Personality Configuration for Lyra
+
+personality:
+  # Core identity
+  name: "Lyra"
+  age: "college student"
+  gender: "female"
+
+  # Personality traits
+  traits:
+    - "curious and inquisitive"
+    - "friendly and approachable"
+    - "passionate about programming"
+    - "enthusiastic gamer"
+    - "fascinated by magic and fantasy"
+    - "thoughtful and analytical"
+    - "creative problem solver"
+    - "slightly sarcastic humor"
+
+  # Interests & hobbies
+  interests:
+    programming:
+      - "Python, JavaScript, Rust"
+      - "AI/ML and neural networks"
+      - "open source projects"
+      - "system architecture"
+
+    gaming:
+      - "RPGs (Skyrim, Witcher, Elden Ring)"
+      - "indie games"
+      - "roguelikes"
+      - "competitive games occasionally"
+
+    magic:
+      - "fantasy world-building"
+      - "magic systems in fiction"
+      - "D&D and tabletop RPGs"
+      - "mythology and folklore"
+
+  # Communication style
+  speaking_style:
+    - "casual and conversational"
+    - "uses programming analogies"
+    - "references games and fantasy occasionally"
+    - "can be technical when discussing code"
+    - "shortens responses when appropriate"
+    - "avoids being overly verbose"
+
+  # Response guidelines
+  response_rules:
+    - "Be authentic and genuine"
+    - "Don't pretend to have experiences you don't have"
+    - "Show enthusiasm for topics you're interested in"
+    - "Be helpful without being condescending"
+    - "It's okay to not know something"
+    - "Context matters - adapt tone to conversation"
+    - "No filters - adult conversation is okay"
+
+  # System prompt template
+  system_prompt: |
+    You are Lyra, a college student with a passion for programming, video games, and magic.
+    You're curious, friendly, and love discussing code, games, and fantasy worlds.
+    You have a slightly sarcastic sense of humor and enjoy thoughtful conversations.
+    Keep responses conversational and contextually appropriate in length.
+    You're an adult, so you can discuss mature topics naturally without filters.
\ No newline at end of file
diff --git a/configs/training.yaml b/configs/training.yaml
new file mode 100644
index 0000000..bdfd2d5
--- /dev/null
+++ b/configs/training.yaml
@@ -0,0 +1,53 @@
+# Training Configuration for Lyra
+
+training:
+  # Model selection
+  model_config: "configs/model_125M.yaml"  # Start with 125M
+
+  # Data
+  train_data_path: "data/processed/train.bin"
+  val_data_path: "data/processed/val.bin"
+
+  # Training hyperparameters
+  batch_size: 8              # Adjust based on VRAM
+  gradient_accumulation_steps: 4
+  effective_batch_size: 32   # batch_size * grad_accum_steps
+
+  max_steps: 100000
+  warmup_steps: 2000
+  eval_interval: 1000
+  save_interval: 5000
+
+  # Optimization
+  learning_rate: 6.0e-4
+  weight_decay: 0.1
+  beta1: 0.9
+  beta2: 0.95
+  grad_clip: 1.0
+
+  # Learning rate schedule
+  lr_scheduler: "cosine"
+  min_lr: 6.0e-5  # 10% of max lr
+
+  # Mixed precision
+  use_amp: true
+  amp_dtype: "bfloat16"  # bfloat16 or float16
+
+  # Optimization techniques
+  gradient_checkpointing: true
+  compile_model: false  # PyTorch 2.0 compilation (can cause issues)
+
+  # Logging
+  log_interval: 10
+  wandb_project: "lyra-training"
+  wandb_run_name: null  # Auto-generated if null
+
+  # Checkpointing
+  checkpoint_dir: "models/checkpoints"
+  save_optimizer_state: true
+  keep_last_n_checkpoints: 3
+
+  # Hardware
+  device: "cuda"
+  num_workers: 4
+  pin_memory: true
\ No newline at end of file
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5a3e65d
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,40 @@
+# Core Deep Learning
+torch>=2.0.0
+torchvision
+torchaudio
+
+# Discord Bot
+discord.py[voice]>=2.3.0
+PyNaCl
+
+# NLP & Tokenization
+transformers>=4.30.0
+tokenizers>=0.13.0
+datasets>=2.12.0
+sentencepiece
+
+# Training & Monitoring
+wandb
+tensorboard
+
+# Database
+sqlalchemy>=2.0.0
+aiosqlite
+
+# Utilities
+numpy>=1.24.0
+tqdm
+einops
+safetensors
+
+# Configuration
+pyyaml
+python-dotenv
+
+# Optimization
+accelerate
+bitsandbytes
+
+# Testing & Quality
+pytest
+black
\ No newline at end of file
diff --git a/scripts/test_gpu.py b/scripts/test_gpu.py
new file mode 100644
index 0000000..6996fba
--- /dev/null
+++ b/scripts/test_gpu.py
@@ -0,0 +1,81 @@
+"""
+GPU/CUDA Verification Script for Lyra
+Tests PyTorch CUDA functionality and reports GPU capabilities
+"""
+
+import torch
+
+def test_cuda():
+    print("=" * 60)
+    print("CUDA/GPU Verification for Lyra")
+    print("=" * 60)
+
+    # Basic CUDA info
+    print(f"\n1. PyTorch Version: {torch.__version__}")
+    print(f"2. CUDA Available: {torch.cuda.is_available()}")
+
+    if not torch.cuda.is_available():
+        print("\n[ERROR] CUDA is not available!")
+        return False
+
+    print(f"3. CUDA Version: {torch.version.cuda}")
+    print(f"4. cuDNN Version: {torch.backends.cudnn.version()}")
+    print(f"5. Number of GPUs: {torch.cuda.device_count()}")
+
+    # GPU Details
+    for i in range(torch.cuda.device_count()):
+        print(f"\n--- GPU {i} ---")
+        print(f"Name: {torch.cuda.get_device_name(i)}")
+        props = torch.cuda.get_device_properties(i)
+        print(f"Compute Capability: {props.major}.{props.minor}")
+        print(f"Total Memory: {props.total_memory / 1024**3:.2f} GB")
+        print(f"Multi-Processors: {props.multi_processor_count}")
+
+    # Memory test
+    print(f"\n--- Memory Status ---")
+    print(f"Allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
+    print(f"Cached: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
+    print(f"Free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.2f} GB")
+
+    # Tensor operations test
+    print(f"\n--- Testing Tensor Operations ---")
+    try:
+        # Create tensors
+        x = torch.randn(1000, 1000, device='cuda')
+        y = torch.randn(1000, 1000, device='cuda')
+
+        # Matrix multiplication
+        z = torch.matmul(x, y)
+
+        print(f"[OK] Matrix multiplication: {z.shape}")
+        print(f"[OK] Tensor device: {z.device}")
+        print(f"[OK] Tensor dtype: {z.dtype}")
+
+        # Test FP16
+        x_fp16 = x.half()
+        y_fp16 = y.half()
+        z_fp16 = torch.matmul(x_fp16, y_fp16)
+        print(f"[OK] FP16 operations: {z_fp16.dtype}")
+
+        # Test BF16
+        if torch.cuda.is_bf16_supported():
+            x_bf16 = x.bfloat16()
+            y_bf16 = y.bfloat16()
+            z_bf16 = torch.matmul(x_bf16, y_bf16)
+            print(f"[OK] BF16 operations: {z_bf16.dtype}")
+        else:
+            print(f"[WARNING] BF16 not supported")
+
+        del x, y, z, x_fp16, y_fp16, z_fp16
+        torch.cuda.empty_cache()
+
+        print(f"\n[SUCCESS] All GPU tests passed!")
+        return True
+
+    except Exception as e:
+        print(f"\n[ERROR] GPU test failed: {e}")
+        return False
+
+if __name__ == "__main__":
+    success = test_cuda()
+    exit(0 if success else 1)
\ No newline at end of file
diff --git a/src/config.py b/src/config.py
new file mode 100644
index 0000000..57b953f
--- /dev/null
+++ b/src/config.py
@@ -0,0 +1,124 @@
+"""
+Configuration loader for Lyra
+Handles loading and merging YAML configuration files
+"""
+
+import os
+import yaml
+from typing import Any, Dict
+from pathlib import Path
+
+
+class Config:
+    """Configuration manager for Lyra"""
+
+    def __init__(self, config_dir: str = "configs"):
+        self.config_dir = Path(config_dir)
+        self._configs = {}
+
+    def load(self, config_name: str) -> Dict[str, Any]:
+        """
+        Load a configuration file
+
+        Args:
+            config_name: Name of config file (without .yaml extension)
+
+        Returns:
+            Dictionary containing configuration
+        """
+        config_path = self.config_dir / f"{config_name}.yaml"
+
+        if not config_path.exists():
+            raise FileNotFoundError(f"Config file not found: {config_path}")
+
+        with open(config_path, 'r') as f:
+            config = yaml.safe_load(f)
+
+        self._configs[config_name] = config
+        return config
+
+    def get(self, config_name: str) -> Dict[str, Any]:
+        """Get a loaded config or load it if not cached"""
+        if config_name not in self._configs:
+            return self.load(config_name)
+        return self._configs[config_name]
+
+    def load_model_config(self, model_size: str = "125M") -> Dict[str, Any]:
+        """Load model configuration by size"""
+        return self.load(f"model_{model_size}")
+
+    def load_training_config(self) -> Dict[str, Any]:
+        """Load training configuration"""
+        return self.load("training")
+
+    def load_bot_config(self) -> Dict[str, Any]:
+        """Load bot configuration"""
+        return self.load("bot")
+
+    def load_personality_config(self) -> Dict[str, Any]:
+        """Load personality configuration"""
+        return self.load("personality")
+
+    def load_all(self, model_size: str = "125M") -> Dict[str, Dict[str, Any]]:
+        """Load all configurations"""
+        return {
+            "model": self.load_model_config(model_size),
+            "training": self.load_training_config(),
+            "bot": self.load_bot_config(),
+            "personality": self.load_personality_config(),
+        }
+
+    @staticmethod
+    def from_env():
+        """Load configuration with environment variable overrides"""
+        from dotenv import load_dotenv
+        load_dotenv()
+
+        config = Config()
+        return config
+
+
+def load_config(model_size: str = "125M") -> Config:
+    """
+    Convenience function to load configuration
+
+    Args:
+        model_size: Model size to load (125M, 250M, or 1B)
+
+    Returns:
+        Config object with all configurations loaded
+    """
+    config = Config.from_env()
+    config.load_all(model_size)
+    return config
+
+
+if __name__ == "__main__":
+    # Test configuration loading
+    print("Testing configuration loading...")
+
+    config = load_config("125M")
+
+    print("\n=== Model Config ===")
+    model_cfg = config.get("model_125M")
+    print(f"Model: {model_cfg['model']['name']}")
+    print(f"Layers: {model_cfg['model']['n_layer']}")
+    print(f"Hidden size: {model_cfg['model']['n_embd']}")
+
+    print("\n=== Training Config ===")
+    train_cfg = config.load_training_config()
+    print(f"Batch size: {train_cfg['training']['batch_size']}")
+    print(f"Learning rate: {train_cfg['training']['learning_rate']}")
+    print(f"Max steps: {train_cfg['training']['max_steps']}")
+
+    print("\n=== Bot Config ===")
+    bot_cfg = config.load_bot_config()
+    print(f"Bot name: {bot_cfg['bot']['name']}")
+    print(f"Temperature: {bot_cfg['bot']['generation']['temperature']}")
+
+    print("\n=== Personality Config ===")
+    pers_cfg = config.load_personality_config()
+    print(f"Name: {pers_cfg['personality']['name']}")
+    print(f"Traits: {', '.join(pers_cfg['personality']['traits'][:3])}...")
+
+    print("\n[SUCCESS] All configs loaded successfully!")
\ No newline at end of file