Compare commits

...

4 Commits

Author SHA1 Message Date
cbad993a0a
feat: torch data loader 2024-12-30 22:53:59 -05:00
4ba02e9963
fix: training pipeline quits unused uci workers 2024-12-30 19:15:56 -05:00
174519bec5
fix: training pipeline bugs
- invalid integer cast
- "resuming" message when not resuming
2024-12-30 18:37:49 -05:00
9a2a770afb
chore: move gitignore into nnue/ 2024-12-30 18:00:28 -05:00
7 changed files with 93 additions and 11 deletions

2
.gitignore vendored
View File

@ -1,4 +1,2 @@
/target
TODO.txt
nnue/batches
nnue/venv

4
nnue/.gitignore vendored Normal file
View File

@ -0,0 +1,4 @@
batches/
venv/
train_data/
__pycache__/

View File

@ -6,15 +6,19 @@ The network is trained on both self-play games, and its games on Lichess.
Both of these sources provide games in PGN format.
This folder includes the following scripts:
- `batch_pgn_data.py`: Combine and convert big PGN files into small chunked files.
- `process_pgn_data.py`: Convert PGN data into a format suitable for training.
- `s1_batch_pgn_data.py`: Combine and convert big PGN files into small chunked files.
- `s2_process_pgn_data.py`: Convert PGN data into a format suitable for training.
Example training pipeline:
```bash
# chunk all the PGN files in `games/`. outputs by default to `batches/batch%d.pgn`.
./batch_pgn_data.py games/*.pgn
./s1_batch_pgn_data.py games/*.pgn
# analyze batches 0 to 20 to turn them into training data. outputs by default to train_data/batch%d.tsv.gz.
# analyze batches to turn them into training data. outputs by default to train_data/batch%d.tsv.gz.
# set max-workers to the number of hardware threads / cores you have.
./process_pgn_data.py --engine ../target/release/chess_inator --max-workers 8 batches/batch{0..20}.pgn
# this is the longest part.
./s2_process_pgn_data.py --engine ../target/release/chess_inator --max-workers 8 batches/batch*.pgn
# combine all processed data into a single training set file.
zcat train_data/*.tsv.gz | gzip > combined_training.tsv.gz
```

View File

@ -18,11 +18,9 @@ import itertools
from pathlib import Path
"""Games to include per file in output."""
parser = argparse.ArgumentParser()
parser.add_argument("files", nargs="+", type=Path)
parser.add_argument("--batch-size", type=int, help="Number of games to save in each output file.", default=8)
parser.add_argument("--batch-size", type=int, help="Number of games to save in each output file. Set this to two to four times the amount of concurrent workers used in the processing step.", default=8)
parser.add_argument("--output-folder", type=Path, help="Folder to save batched games in.", default=Path("batches"))
args = parser.parse_args()

View File

@ -160,6 +160,8 @@ async def worker(game_generator: AsyncIterator[pgn.Game]) -> None:
await output_queue.put((board.fen(), tensor, int(eval_abs), wdl))
await engine.quit()
async def analyse_games(file: Path):
"""Task that manages reading PGNs and analyzing them."""
@ -219,6 +221,7 @@ async def main():
if skipped:
logging.info("Resuming at file '%s'.", file)
skipped = False
else:
logging.info("Reading file '%s'.", file)

75
nnue/s3_train_neural_net.py Executable file
View File

@ -0,0 +1,75 @@
#!/usr/bin/env python
"""Train the NNUE weights."""
import torch
import pandas as pd
import numpy as np
from torch.utils.data import Dataset, DataLoader
from pathlib import Path
from dataclasses import dataclass
################################
################################
## Data loading / parsing
################################
################################
@dataclass
class Position:
"""Single board position."""
fen: str
"""Normal board representation."""
board: torch.Tensor
"""Multi-hot board representation."""
cp_eval: np.double
"""Centipawn evaluation (white perspective)."""
expected_points: np.double
"""
Points expected to be gained for white from the game, based on centipawn evaluation.
- 0: black win
- 0.5: draw
- 1: white win
"""
def sigmoid(x):
"""Calculate sigmoid of `x`, using scaling constant `K`."""
K = 150
return 1 / (1 + np.exp(-K * x / 400))
class ChessPositionDataset(Dataset):
def __init__(self, data_file: Path):
self.data = pd.read_csv(data_file, delimiter="\t")
def __len__(self):
return len(self.data)
def __getitem__(self, idx):
row = self.data.iloc[idx]
eval = np.double(row.iloc[2])
return Position(
fen=row.iloc[0],
board=torch.as_tensor([1 if c == "1" else 0 for c in row.iloc[1]]),
cp_eval=eval,
expected_points=sigmoid(eval/100),
)
if __name__ == "__main__":
full_dataset = ChessPositionDataset(Path("combined_training.tsv.gz"))
train_dataset, test_dataset = torch.utils.data.random_split(full_dataset, [0.8, 0.2])
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=True)

View File

@ -397,7 +397,7 @@ pub fn eval_metrics(board: &Board) -> EvalMetrics {
let king_distance_eval =
-advantage * i32::try_from(king_distance).unwrap() * max(7 - phase, 0) / 100;
let eval = pst_eval + king_distance_eval;
let eval = (pst_eval + king_distance_eval).clamp(i16::MIN.into(), i16::MAX.into());
EvalMetrics {
pst_eval,