Compare commits
4 Commits
c565519695
...
main
Author | SHA1 | Date | |
---|---|---|---|
c719c5873f | |||
a3f14b18dc | |||
d9c526fa5c | |||
faa23d596e |
15
.env.example
Normal file
15
.env.example
Normal file
@@ -0,0 +1,15 @@
|
||||
# Discord Bot Configuration
|
||||
DISCORD_TOKEN=your_discord_token_here
|
||||
DISCORD_BOT_PREFIX=!
|
||||
|
||||
# Training Configuration
|
||||
WANDB_API_KEY=your_wandb_key_here
|
||||
WANDB_PROJECT=lyra-training
|
||||
|
||||
# Model Configuration
|
||||
MODEL_SIZE=125M
|
||||
CONTEXT_LENGTH=1024
|
||||
BATCH_SIZE=8
|
||||
|
||||
# Database
|
||||
DATABASE_PATH=lyra.db
|
83
.gitignore
vendored
83
.gitignore
vendored
@@ -1,7 +1,6 @@
|
||||
# ---> Python
|
||||
# Byte-compiled / optimized / DLL files
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
*.py[codz]
|
||||
*$py.class
|
||||
|
||||
# C extensions
|
||||
@@ -47,7 +46,7 @@ htmlcov/
|
||||
nosetests.xml
|
||||
coverage.xml
|
||||
*.cover
|
||||
*.py,cover
|
||||
*.py.cover
|
||||
.hypothesis/
|
||||
.pytest_cache/
|
||||
cover/
|
||||
@@ -107,17 +106,24 @@ ipython_config.py
|
||||
# commonly ignored for libraries.
|
||||
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
|
||||
# poetry.lock
|
||||
# poetry.toml
|
||||
|
||||
# pdm
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
|
||||
# pdm recommends including project-wide configuration in pdm.toml, but excluding .pdm-python.
|
||||
# https://pdm-project.org/en/latest/usage/project/#working-with-version-control
|
||||
# pdm.lock
|
||||
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
|
||||
# in version control.
|
||||
# https://pdm.fming.dev/latest/usage/project/#working-with-version-control
|
||||
.pdm.toml
|
||||
# pdm.toml
|
||||
.pdm-python
|
||||
.pdm-build/
|
||||
|
||||
# pixi
|
||||
# Similar to Pipfile.lock, it is generally recommended to include pixi.lock in version control.
|
||||
# pixi.lock
|
||||
# Pixi creates a virtual environment in the .pixi directory, just like venv module creates one
|
||||
# in the .venv directory. It is recommended not to include this directory in version control.
|
||||
.pixi
|
||||
|
||||
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
|
||||
__pypackages__/
|
||||
|
||||
@@ -125,11 +131,25 @@ __pypackages__/
|
||||
celerybeat-schedule
|
||||
celerybeat.pid
|
||||
|
||||
# Redis
|
||||
*.rdb
|
||||
*.aof
|
||||
*.pid
|
||||
|
||||
# RabbitMQ
|
||||
mnesia/
|
||||
rabbitmq/
|
||||
rabbitmq-data/
|
||||
|
||||
# ActiveMQ
|
||||
activemq-data/
|
||||
|
||||
# SageMath parsed files
|
||||
*.sage.py
|
||||
|
||||
# Environments
|
||||
.env
|
||||
.envrc
|
||||
.venv
|
||||
env/
|
||||
venv/
|
||||
@@ -168,9 +188,58 @@ cython_debug/
|
||||
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
|
||||
# .idea/
|
||||
|
||||
# Abstra
|
||||
# Abstra is an AI-powered process automation framework.
|
||||
# Ignore directories containing user credentials, local state, and settings.
|
||||
# Learn more at https://abstra.io/docs
|
||||
.abstra/
|
||||
|
||||
# Visual Studio Code
|
||||
# Visual Studio Code specific template is maintained in a separate VisualStudioCode.gitignore
|
||||
# that can be found at https://github.com/github/gitignore/blob/main/Global/VisualStudioCode.gitignore
|
||||
# and can be added to the global gitignore or merged into this file. However, if you prefer,
|
||||
# you could uncomment the following to ignore the entire vscode folder
|
||||
# .vscode/
|
||||
|
||||
# Ruff stuff:
|
||||
.ruff_cache/
|
||||
|
||||
# PyPI configuration file
|
||||
.pypirc
|
||||
|
||||
# Marimo
|
||||
marimo/_static/
|
||||
marimo/_lsp/
|
||||
__marimo__/
|
||||
|
||||
# Streamlit
|
||||
.streamlit/secrets.toml
|
||||
|
||||
# Lyra Project Specific
|
||||
# Data files
|
||||
data/raw/
|
||||
data/processed/
|
||||
data/discord/
|
||||
|
||||
# Model files
|
||||
models/checkpoints/
|
||||
models/active/
|
||||
*.pt
|
||||
*.pth
|
||||
*.safetensors
|
||||
*.bin
|
||||
*.ckpt
|
||||
|
||||
# Database
|
||||
*.db
|
||||
*.db-journal
|
||||
*.db-wal
|
||||
*.db-shm
|
||||
|
||||
# Logs
|
||||
logs/
|
||||
wandb/
|
||||
tensorboard/
|
||||
|
||||
# Discord bot token
|
||||
.env
|
||||
|
10
LICENSE
10
LICENSE
@@ -1,10 +0,0 @@
|
||||
This is free and unencumbered software released into the public domain.
|
||||
|
||||
Anyone is free to copy, modify, publish, use, compile, sell, or distribute this software, either in source code form or as a compiled binary, for any purpose, commercial or non-commercial, and by any means.
|
||||
|
||||
In jurisdictions that recognize copyright laws, the author or authors of this software dedicate any and all copyright interest in the software to the public domain. We make this dedication for the benefit of the public at large and to the detriment of our heirs and
|
||||
successors. We intend this dedication to be an overt act of relinquishment in perpetuity of all present and future rights to this software under copyright law.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
|
||||
For more information, please refer to <http://unlicense.org/>
|
50
configs/bot.yaml
Normal file
50
configs/bot.yaml
Normal file
@@ -0,0 +1,50 @@
|
||||
# Discord Bot Configuration for Lyra
|
||||
|
||||
bot:
|
||||
# Bot identity
|
||||
name: "Lyra"
|
||||
description: "AI-powered Discord chatbot with self-evolving personality"
|
||||
|
||||
# Discord settings
|
||||
command_prefix: "!"
|
||||
intents:
|
||||
- guilds
|
||||
- guild_messages
|
||||
- dm_messages
|
||||
- message_content
|
||||
|
||||
# Response behavior
|
||||
respond_to_all: true # Respond to all messages in channels she has access to
|
||||
respond_to_mentions: true # Always respond when mentioned
|
||||
respond_to_dms: true # Respond to DMs
|
||||
ignore_bots: true # Don't respond to other bots
|
||||
ignore_self: true # Don't respond to own messages
|
||||
|
||||
# Generation parameters
|
||||
generation:
|
||||
max_length: 150 # Max tokens per response
|
||||
temperature: 0.9 # Higher = more creative
|
||||
top_p: 0.92 # Nucleus sampling
|
||||
top_k: 50 # Top-k sampling
|
||||
repetition_penalty: 1.1 # Penalize repetition
|
||||
no_repeat_ngram_size: 3 # Don't repeat 3-grams
|
||||
|
||||
# Context management
|
||||
context:
|
||||
max_history: 10 # Messages to include as context
|
||||
max_context_tokens: 512 # Max tokens from history
|
||||
|
||||
# Rate limiting
|
||||
rate_limit:
|
||||
enabled: true
|
||||
max_responses_per_minute: 10
|
||||
cooldown_seconds: 2
|
||||
|
||||
# Memory
|
||||
memory:
|
||||
short_term_size: 50 # Last N messages in memory
|
||||
use_long_term: true # Use database for long-term memory
|
||||
|
||||
# Model
|
||||
model_path: "models/active/lyra_latest.pt"
|
||||
tokenizer_path: "models/tokenizer/"
|
24
configs/model_125M.yaml
Normal file
24
configs/model_125M.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 125M Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-125M"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257 # Will be updated after tokenizer training
|
||||
n_positions: 1024 # Context window
|
||||
n_embd: 768 # Embedding dimension
|
||||
n_layer: 12 # Number of transformer layers
|
||||
n_head: 12 # Number of attention heads
|
||||
n_inner: 3072 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~125M
|
24
configs/model_1B.yaml
Normal file
24
configs/model_1B.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 1B Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-1B"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Context window
|
||||
n_embd: 2048 # Embedding dimension
|
||||
n_layer: 24 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 8192 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~1B
|
24
configs/model_250M.yaml
Normal file
24
configs/model_250M.yaml
Normal file
@@ -0,0 +1,24 @@
|
||||
# Lyra 250M Model Configuration
|
||||
# GPT-style decoder-only transformer
|
||||
|
||||
model:
|
||||
name: "lyra-250M"
|
||||
architecture: "gpt"
|
||||
|
||||
# Model dimensions
|
||||
vocab_size: 50257
|
||||
n_positions: 2048 # Larger context window
|
||||
n_embd: 1024 # Embedding dimension
|
||||
n_layer: 16 # Number of transformer layers
|
||||
n_head: 16 # Number of attention heads
|
||||
n_inner: 4096 # FFN inner dimension (4 * n_embd)
|
||||
|
||||
# Regularization
|
||||
embd_pdrop: 0.1
|
||||
resid_pdrop: 0.1
|
||||
attn_pdrop: 0.1
|
||||
|
||||
# Activation
|
||||
activation: "gelu"
|
||||
|
||||
# Total parameters: ~250M
|
65
configs/personality.yaml
Normal file
65
configs/personality.yaml
Normal file
@@ -0,0 +1,65 @@
|
||||
# Personality Configuration for Lyra
|
||||
|
||||
personality:
|
||||
# Core identity
|
||||
name: "Lyra"
|
||||
age: "college student"
|
||||
gender: "female"
|
||||
|
||||
# Personality traits
|
||||
traits:
|
||||
- "curious and inquisitive"
|
||||
- "friendly and approachable"
|
||||
- "passionate about programming"
|
||||
- "enthusiastic gamer"
|
||||
- "fascinated by magic and fantasy"
|
||||
- "thoughtful and analytical"
|
||||
- "creative problem solver"
|
||||
- "slightly sarcastic humor"
|
||||
|
||||
# Interests & hobbies
|
||||
interests:
|
||||
programming:
|
||||
- "Python, JavaScript, Rust"
|
||||
- "AI/ML and neural networks"
|
||||
- "open source projects"
|
||||
- "system architecture"
|
||||
|
||||
gaming:
|
||||
- "RPGs (Skyrim, Witcher, Elden Ring)"
|
||||
- "indie games"
|
||||
- "roguelikes"
|
||||
- "competitive games occasionally"
|
||||
|
||||
magic:
|
||||
- "fantasy world-building"
|
||||
- "magic systems in fiction"
|
||||
- "D&D and tabletop RPGs"
|
||||
- "mythology and folklore"
|
||||
|
||||
# Communication style
|
||||
speaking_style:
|
||||
- "casual and conversational"
|
||||
- "uses programming analogies"
|
||||
- "references games and fantasy occasionally"
|
||||
- "can be technical when discussing code"
|
||||
- "shortens responses when appropriate"
|
||||
- "avoids being overly verbose"
|
||||
|
||||
# Response guidelines
|
||||
response_rules:
|
||||
- "Be authentic and genuine"
|
||||
- "Don't pretend to have experiences you don't have"
|
||||
- "Show enthusiasm for topics you're interested in"
|
||||
- "Be helpful without being condescending"
|
||||
- "It's okay to not know something"
|
||||
- "Context matters - adapt tone to conversation"
|
||||
- "No filters - adult conversation is okay"
|
||||
|
||||
# System prompt template
|
||||
system_prompt: |
|
||||
You are Lyra, a college student with a passion for programming, video games, and magic.
|
||||
You're curious, friendly, and love discussing code, games, and fantasy worlds.
|
||||
You have a slightly sarcastic sense of humor and enjoy thoughtful conversations.
|
||||
Keep responses conversational and contextually appropriate in length.
|
||||
You're an adult, so you can discuss mature topics naturally without filters.
|
53
configs/training.yaml
Normal file
53
configs/training.yaml
Normal file
@@ -0,0 +1,53 @@
|
||||
# Training Configuration for Lyra
|
||||
|
||||
training:
|
||||
# Model selection
|
||||
model_config: "configs/model_125M.yaml" # Start with 125M
|
||||
|
||||
# Data
|
||||
train_data_path: "data/processed/train.bin"
|
||||
val_data_path: "data/processed/val.bin"
|
||||
|
||||
# Training hyperparameters
|
||||
batch_size: 8 # Adjust based on VRAM
|
||||
gradient_accumulation_steps: 4
|
||||
effective_batch_size: 32 # batch_size * grad_accum_steps
|
||||
|
||||
max_steps: 100000
|
||||
warmup_steps: 2000
|
||||
eval_interval: 1000
|
||||
save_interval: 5000
|
||||
|
||||
# Optimization
|
||||
learning_rate: 6.0e-4
|
||||
weight_decay: 0.1
|
||||
beta1: 0.9
|
||||
beta2: 0.95
|
||||
grad_clip: 1.0
|
||||
|
||||
# Learning rate schedule
|
||||
lr_scheduler: "cosine"
|
||||
min_lr: 6.0e-5 # 10% of max lr
|
||||
|
||||
# Mixed precision
|
||||
use_amp: true
|
||||
amp_dtype: "bfloat16" # bfloat16 or float16
|
||||
|
||||
# Optimization techniques
|
||||
gradient_checkpointing: true
|
||||
compile_model: false # PyTorch 2.0 compilation (can cause issues)
|
||||
|
||||
# Logging
|
||||
log_interval: 10
|
||||
wandb_project: "lyra-training"
|
||||
wandb_run_name: null # Auto-generated if null
|
||||
|
||||
# Checkpointing
|
||||
checkpoint_dir: "models/checkpoints"
|
||||
save_optimizer_state: true
|
||||
keep_last_n_checkpoints: 3
|
||||
|
||||
# Hardware
|
||||
device: "cuda"
|
||||
num_workers: 4
|
||||
pin_memory: true
|
40
requirements.txt
Normal file
40
requirements.txt
Normal file
@@ -0,0 +1,40 @@
|
||||
# Core Deep Learning
|
||||
torch>=2.0.0
|
||||
torchvision
|
||||
torchaudio
|
||||
|
||||
# Discord Bot
|
||||
discord.py[voice]>=2.3.0
|
||||
PyNaCl
|
||||
|
||||
# NLP & Tokenization
|
||||
transformers>=4.30.0
|
||||
tokenizers>=0.13.0
|
||||
datasets>=2.12.0
|
||||
sentencepiece
|
||||
|
||||
# Training & Monitoring
|
||||
wandb
|
||||
tensorboard
|
||||
|
||||
# Database
|
||||
sqlalchemy>=2.0.0
|
||||
aiosqlite
|
||||
|
||||
# Utilities
|
||||
numpy>=1.24.0
|
||||
tqdm
|
||||
einops
|
||||
safetensors
|
||||
|
||||
# Configuration
|
||||
pyyaml
|
||||
python-dotenv
|
||||
|
||||
# Optimization
|
||||
accelerate
|
||||
bitsandbytes
|
||||
|
||||
# Testing & Quality
|
||||
pytest
|
||||
black
|
81
scripts/test_gpu.py
Normal file
81
scripts/test_gpu.py
Normal file
@@ -0,0 +1,81 @@
|
||||
"""
|
||||
GPU/CUDA Verification Script for Lyra
|
||||
Tests PyTorch CUDA functionality and reports GPU capabilities
|
||||
"""
|
||||
|
||||
import torch
|
||||
|
||||
def test_cuda():
|
||||
print("=" * 60)
|
||||
print("CUDA/GPU Verification for Lyra")
|
||||
print("=" * 60)
|
||||
|
||||
# Basic CUDA info
|
||||
print(f"\n1. PyTorch Version: {torch.__version__}")
|
||||
print(f"2. CUDA Available: {torch.cuda.is_available()}")
|
||||
|
||||
if not torch.cuda.is_available():
|
||||
print("\n[ERROR] CUDA is not available!")
|
||||
return False
|
||||
|
||||
print(f"3. CUDA Version: {torch.version.cuda}")
|
||||
print(f"4. cuDNN Version: {torch.backends.cudnn.version()}")
|
||||
print(f"5. Number of GPUs: {torch.cuda.device_count()}")
|
||||
|
||||
# GPU Details
|
||||
for i in range(torch.cuda.device_count()):
|
||||
print(f"\n--- GPU {i} ---")
|
||||
print(f"Name: {torch.cuda.get_device_name(i)}")
|
||||
props = torch.cuda.get_device_properties(i)
|
||||
print(f"Compute Capability: {props.major}.{props.minor}")
|
||||
print(f"Total Memory: {props.total_memory / 1024**3:.2f} GB")
|
||||
print(f"Multi-Processors: {props.multi_processor_count}")
|
||||
|
||||
# Memory test
|
||||
print(f"\n--- Memory Status ---")
|
||||
print(f"Allocated: {torch.cuda.memory_allocated(0) / 1024**2:.2f} MB")
|
||||
print(f"Cached: {torch.cuda.memory_reserved(0) / 1024**2:.2f} MB")
|
||||
print(f"Free: {(torch.cuda.get_device_properties(0).total_memory - torch.cuda.memory_allocated(0)) / 1024**3:.2f} GB")
|
||||
|
||||
# Tensor operations test
|
||||
print(f"\n--- Testing Tensor Operations ---")
|
||||
try:
|
||||
# Create tensors
|
||||
x = torch.randn(1000, 1000, device='cuda')
|
||||
y = torch.randn(1000, 1000, device='cuda')
|
||||
|
||||
# Matrix multiplication
|
||||
z = torch.matmul(x, y)
|
||||
|
||||
print(f"[OK] Matrix multiplication: {z.shape}")
|
||||
print(f"[OK] Tensor device: {z.device}")
|
||||
print(f"[OK] Tensor dtype: {z.dtype}")
|
||||
|
||||
# Test FP16
|
||||
x_fp16 = x.half()
|
||||
y_fp16 = y.half()
|
||||
z_fp16 = torch.matmul(x_fp16, y_fp16)
|
||||
print(f"[OK] FP16 operations: {z_fp16.dtype}")
|
||||
|
||||
# Test BF16
|
||||
if torch.cuda.is_bf16_supported():
|
||||
x_bf16 = x.bfloat16()
|
||||
y_bf16 = y.bfloat16()
|
||||
z_bf16 = torch.matmul(x_bf16, y_bf16)
|
||||
print(f"[OK] BF16 operations: {z_bf16.dtype}")
|
||||
else:
|
||||
print(f"[WARNING] BF16 not supported")
|
||||
|
||||
del x, y, z, x_fp16, y_fp16, z_fp16
|
||||
torch.cuda.empty_cache()
|
||||
|
||||
print(f"\n[SUCCESS] All GPU tests passed!")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
print(f"\n[ERROR] GPU test failed: {e}")
|
||||
return False
|
||||
|
||||
if __name__ == "__main__":
|
||||
success = test_cuda()
|
||||
exit(0 if success else 1)
|
124
src/config.py
Normal file
124
src/config.py
Normal file
@@ -0,0 +1,124 @@
|
||||
"""
|
||||
Configuration loader for Lyra
|
||||
Handles loading and merging YAML configuration files
|
||||
"""
|
||||
|
||||
import os
|
||||
import yaml
|
||||
from typing import Any, Dict
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class Config:
|
||||
"""Configuration manager for Lyra"""
|
||||
|
||||
def __init__(self, config_dir: str = "configs"):
|
||||
self.config_dir = Path(config_dir)
|
||||
self._configs = {}
|
||||
|
||||
def load(self, config_name: str) -> Dict[str, Any]:
|
||||
"""
|
||||
Load a configuration file
|
||||
|
||||
Args:
|
||||
config_name: Name of config file (without .yaml extension)
|
||||
|
||||
Returns:
|
||||
Dictionary containing configuration
|
||||
"""
|
||||
config_path = self.config_dir / f"{config_name}.yaml"
|
||||
|
||||
if not config_path.exists():
|
||||
raise FileNotFoundError(f"Config file not found: {config_path}")
|
||||
|
||||
with open(config_path, 'r') as f:
|
||||
config = yaml.safe_load(f)
|
||||
|
||||
self._configs[config_name] = config
|
||||
return config
|
||||
|
||||
def get(self, config_name: str) -> Dict[str, Any]:
|
||||
"""Get a loaded config or load it if not cached"""
|
||||
if config_name not in self._configs:
|
||||
return self.load(config_name)
|
||||
return self._configs[config_name]
|
||||
|
||||
def load_model_config(self, model_size: str = "125M") -> Dict[str, Any]:
|
||||
"""Load model configuration by size"""
|
||||
return self.load(f"model_{model_size}")
|
||||
|
||||
def load_training_config(self) -> Dict[str, Any]:
|
||||
"""Load training configuration"""
|
||||
return self.load("training")
|
||||
|
||||
def load_bot_config(self) -> Dict[str, Any]:
|
||||
"""Load bot configuration"""
|
||||
return self.load("bot")
|
||||
|
||||
def load_personality_config(self) -> Dict[str, Any]:
|
||||
"""Load personality configuration"""
|
||||
return self.load("personality")
|
||||
|
||||
def load_all(self, model_size: str = "125M") -> Dict[str, Dict[str, Any]]:
|
||||
"""Load all configurations"""
|
||||
return {
|
||||
"model": self.load_model_config(model_size),
|
||||
"training": self.load_training_config(),
|
||||
"bot": self.load_bot_config(),
|
||||
"personality": self.load_personality_config(),
|
||||
}
|
||||
|
||||
@staticmethod
|
||||
def from_env():
|
||||
"""Load configuration with environment variable overrides"""
|
||||
from dotenv import load_dotenv
|
||||
load_dotenv()
|
||||
|
||||
config = Config()
|
||||
return config
|
||||
|
||||
|
||||
def load_config(model_size: str = "125M") -> Config:
|
||||
"""
|
||||
Convenience function to load configuration
|
||||
|
||||
Args:
|
||||
model_size: Model size to load (125M, 250M, or 1B)
|
||||
|
||||
Returns:
|
||||
Config object with all configurations loaded
|
||||
"""
|
||||
config = Config.from_env()
|
||||
config.load_all(model_size)
|
||||
return config
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Test configuration loading
|
||||
print("Testing configuration loading...")
|
||||
|
||||
config = load_config("125M")
|
||||
|
||||
print("\n=== Model Config ===")
|
||||
model_cfg = config.get("model_125M")
|
||||
print(f"Model: {model_cfg['model']['name']}")
|
||||
print(f"Layers: {model_cfg['model']['n_layer']}")
|
||||
print(f"Hidden size: {model_cfg['model']['n_embd']}")
|
||||
|
||||
print("\n=== Training Config ===")
|
||||
train_cfg = config.load_training_config()
|
||||
print(f"Batch size: {train_cfg['training']['batch_size']}")
|
||||
print(f"Learning rate: {train_cfg['training']['learning_rate']}")
|
||||
print(f"Max steps: {train_cfg['training']['max_steps']}")
|
||||
|
||||
print("\n=== Bot Config ===")
|
||||
bot_cfg = config.load_bot_config()
|
||||
print(f"Bot name: {bot_cfg['bot']['name']}")
|
||||
print(f"Temperature: {bot_cfg['bot']['generation']['temperature']}")
|
||||
|
||||
print("\n=== Personality Config ===")
|
||||
pers_cfg = config.load_personality_config()
|
||||
print(f"Name: {pers_cfg['personality']['name']}")
|
||||
print(f"Traits: {', '.join(pers_cfg['personality']['traits'][:3])}...")
|
||||
|
||||
print("\n[SUCCESS] All configs loaded successfully!")
|
Reference in New Issue
Block a user