NOVA/.github/workflows/ci.yml

name: NOVA CI

on:
  push:
    branches: [ main, dev ]
  pull_request:
    branches: [ main ]

jobs:
  test:
    runs-on: ${{ matrix.os }}
    strategy:
      matrix:
        os: [ubuntu-latest, windows-latest]
        python-version: ['3.10', '3.11']

    steps:
    - uses: actions/checkout@v3

    - name: Set up Python ${{ matrix.python-version }}
      uses: actions/setup-python@v4
      with:
        python-version: ${{ matrix.python-version }}

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt
        pip install pytest pytest-cov ruff black mypy

    - name: Lint with ruff
      run: |
        ruff check nova_core/ nova_tokenizer/ nova_train/ nova_evo/ nova_chat/ nova_data/

    - name: Format check with black
      run: |
        black --check nova_core/ nova_tokenizer/ nova_train/ nova_evo/ nova_chat/ nova_data/

    - name: Type check with mypy
      run: |
        mypy nova_core/ --ignore-missing-imports || true

    - name: Test with pytest
      run: |
        pytest tests/ -v --cov=nova_core --cov=nova_tokenizer --cov=nova_train

    - name: Upload coverage
      uses: codecov/codecov-action@v3
      if: matrix.os == 'ubuntu-latest' && matrix.python-version == '3.10'

  smoke-test:
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3

    - name: Set up Python 3.10
      uses: actions/setup-python@v4
      with:
        python-version: '3.10'

    - name: Install dependencies
      run: |
        python -m pip install --upgrade pip
        pip install -r requirements.txt

    - name: Initialize NOVA
      run: |
        python scripts/cli.py init

    - name: Train tokenizer (smoke test)
      run: |
        python scripts/cli.py tokenizer train \
          --input data/toy_dataset/toy.txt \
          --output test_tokenizer \
          --vocab-size 1000

    - name: Test tokenizer
      run: |
        python -c "from nova_tokenizer import NovaTokenizer; t = NovaTokenizer('test_tokenizer.model'); print('Vocab size:', len(t)); print('Encoded:', t.encode('Hello world'))"

    - name: Data pipeline smoke test
      run: |
        python -c "from nova_data import DataPipeline; p = DataPipeline(); p.verify_licenses()"

  build-check:
    runs-on: ubuntu-latest

    steps:
    - uses: actions/checkout@v3

    - name: Set up Python 3.10
      uses: actions/setup-python@v4
      with:
        python-version: '3.10'

    - name: Build package
      run: |
        python -m pip install --upgrade pip build
        python -m build

    - name: Check package
      run: |
        python -m pip install dist/*.whl
        python -c "import nova_core; import nova_tokenizer; import nova_train"