feat: web client bot tuning
This commit is contained in:
parent
7a760980ba
commit
813cc3448a
4 changed files with 508 additions and 75 deletions
|
|
@ -1,4 +1,4 @@
|
||||||
use trictrac_store::{Board, CheckerMove, Color, GameState, MoveRules, Stage, TurnStage};
|
use trictrac_store::{Board, CheckerMove, Color, Dice, GameState, MoveRules, Stage, TurnStage};
|
||||||
|
|
||||||
use super::types::{PlayerAction, PreGameRollState};
|
use super::types::{PlayerAction, PreGameRollState};
|
||||||
|
|
||||||
|
|
@ -45,13 +45,42 @@ pub fn bot_decide(game: &GameState, pgr: Option<&PreGameRollState>) -> Option<Pl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Score a candidate move sequence from the bot's (Black) perspective.
|
/// Score a candidate bot move sequence using depth-1 expectiminimax.
|
||||||
|
/// For each of the 21 possible opponent dice pairs, the opponent picks the move that
|
||||||
|
/// minimises the bot's score; we average those minima weighted by dice probability.
|
||||||
/// `m1` and `m2` are in mirrored (White) space, as returned by MoveRules for Color::Black.
|
/// `m1` and `m2` are in mirrored (White) space, as returned by MoveRules for Color::Black.
|
||||||
fn score_seq(board: &Board, m1: &CheckerMove, m2: &CheckerMove) -> f32 {
|
fn score_seq(board: &Board, m1: &CheckerMove, m2: &CheckerMove) -> f32 {
|
||||||
let mut b = board.mirror();
|
// Apply bot's moves on the mirrored board, then restore normal coordinates → B1.
|
||||||
let _ = b.move_checker(&Color::White, *m1);
|
let mut b_mirror = board.mirror();
|
||||||
let _ = b.move_checker(&Color::White, *m2);
|
let _ = b_mirror.move_checker(&Color::White, *m1);
|
||||||
evaluate(&b)
|
let _ = b_mirror.move_checker(&Color::White, *m2);
|
||||||
|
let b1 = b_mirror.mirror();
|
||||||
|
|
||||||
|
// Expectiminimax: sum over all 21 distinct dice pairs, weighted by probability (out of 36).
|
||||||
|
// Non-doubles have probability 2/36 each; doubles 1/36 each.
|
||||||
|
let mut total = 0.0f32;
|
||||||
|
for d1 in 1u8..=6 {
|
||||||
|
for d2 in d1..=6 {
|
||||||
|
let weight = if d1 == d2 { 1.0f32 } else { 2.0f32 };
|
||||||
|
let opp_rules = MoveRules::new(&Color::White, &b1, Dice { values: (d1, d2) });
|
||||||
|
let opp_seqs = opp_rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
let min_score = if opp_seqs.is_empty() {
|
||||||
|
evaluate(&b1.mirror())
|
||||||
|
} else {
|
||||||
|
opp_seqs
|
||||||
|
.iter()
|
||||||
|
.map(|(om1, om2)| {
|
||||||
|
let mut b2 = b1.clone();
|
||||||
|
let _ = b2.move_checker(&Color::White, *om1);
|
||||||
|
let _ = b2.move_checker(&Color::White, *om2);
|
||||||
|
evaluate(&b2.mirror())
|
||||||
|
})
|
||||||
|
.fold(f32::INFINITY, f32::min)
|
||||||
|
};
|
||||||
|
total += weight * min_score;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
total // proportional to expected score; dividing by 36 doesn't affect move ordering
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Evaluate a board position from White's perspective (call after mirroring for Black).
|
/// Evaluate a board position from White's perspective (call after mirroring for Black).
|
||||||
|
|
@ -61,11 +90,19 @@ fn evaluate(board: &Board) -> f32 {
|
||||||
let white_fields = board.get_color_fields(Color::White);
|
let white_fields = board.get_color_fields(Color::White);
|
||||||
let black_fields = board.get_color_fields(Color::Black);
|
let black_fields = board.get_color_fields(Color::Black);
|
||||||
|
|
||||||
|
// Bonus if rest corner filled (tuned: 6.0)
|
||||||
|
let corner_field = board.get_color_corner(&Color::White);
|
||||||
|
let (corner_count, _color) = board.get_field_checkers(corner_field).unwrap();
|
||||||
|
if corner_count > 0 {
|
||||||
|
score += 6.0;
|
||||||
|
}
|
||||||
|
|
||||||
// Quarter fill progress — quarters 1-6, 7-12, 19-24.
|
// Quarter fill progress — quarters 1-6, 7-12, 19-24.
|
||||||
// Quarter 13-18 is skipped: field 13 is the opponent's rest corner so White can never fill it.
|
// Quarter 13-18 is skipped: field 13 is the opponent's rest corner so White can never fill it.
|
||||||
|
// quarter_filled tuned to 5.5 (was 8.0), quarter_progress kept at 0.3.
|
||||||
for &q in &[1usize, 7, 19] {
|
for &q in &[1usize, 7, 19] {
|
||||||
if board.is_quarter_filled(Color::White, q) {
|
if board.is_quarter_filled(Color::White, q) {
|
||||||
score += 8.0;
|
score += 5.5;
|
||||||
} else {
|
} else {
|
||||||
let missing = board.get_quarter_filling_candidate(Color::White);
|
let missing = board.get_quarter_filling_candidate(Color::White);
|
||||||
score += (6 - missing.len().min(6)) as f32 * 0.3;
|
score += (6 - missing.len().min(6)) as f32 * 0.3;
|
||||||
|
|
@ -81,12 +118,8 @@ fn evaluate(board: &Board) -> f32 {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Exit zone progress: reward checkers already in fields 19-24.
|
// Exit zone progress: tuned to 0.0 — mid-game jan-filling dominates.
|
||||||
for (field, count) in &white_fields {
|
// (term kept here as a reminder; re-enable when bearing-off phase is reached)
|
||||||
if *field >= 19 {
|
|
||||||
score += count.abs() as f32 * 0.3;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
score
|
score
|
||||||
}
|
}
|
||||||
|
|
|
||||||
62
devenv.lock
62
devenv.lock
|
|
@ -17,62 +17,6 @@
|
||||||
"type": "github"
|
"type": "github"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
"flake-compat": {
|
|
||||||
"flake": false,
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1767039857,
|
|
||||||
"owner": "NixOS",
|
|
||||||
"repo": "flake-compat",
|
|
||||||
"rev": "5edf11c44bc78a0d334f6334cdaf7d60d732daab",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "NixOS",
|
|
||||||
"repo": "flake-compat",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"git-hooks": {
|
|
||||||
"inputs": {
|
|
||||||
"flake-compat": "flake-compat",
|
|
||||||
"gitignore": "gitignore",
|
|
||||||
"nixpkgs": [
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1778507602,
|
|
||||||
"owner": "cachix",
|
|
||||||
"repo": "git-hooks.nix",
|
|
||||||
"rev": "61ab0e80d9c7ab14c256b5b453d8b3fb0189ba0a",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "cachix",
|
|
||||||
"repo": "git-hooks.nix",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"gitignore": {
|
|
||||||
"inputs": {
|
|
||||||
"nixpkgs": [
|
|
||||||
"git-hooks",
|
|
||||||
"nixpkgs"
|
|
||||||
]
|
|
||||||
},
|
|
||||||
"locked": {
|
|
||||||
"lastModified": 1762808025,
|
|
||||||
"owner": "hercules-ci",
|
|
||||||
"repo": "gitignore.nix",
|
|
||||||
"rev": "cb5e3fdca1de58ccbc3ef53de65bd372b48f567c",
|
|
||||||
"type": "github"
|
|
||||||
},
|
|
||||||
"original": {
|
|
||||||
"owner": "hercules-ci",
|
|
||||||
"repo": "gitignore.nix",
|
|
||||||
"type": "github"
|
|
||||||
}
|
|
||||||
},
|
|
||||||
"nixpkgs": {
|
"nixpkgs": {
|
||||||
"locked": {
|
"locked": {
|
||||||
"lastModified": 1779102034,
|
"lastModified": 1779102034,
|
||||||
|
|
@ -108,12 +52,8 @@
|
||||||
"root": {
|
"root": {
|
||||||
"inputs": {
|
"inputs": {
|
||||||
"devenv": "devenv",
|
"devenv": "devenv",
|
||||||
"git-hooks": "git-hooks",
|
|
||||||
"nixpkgs": "nixpkgs",
|
"nixpkgs": "nixpkgs",
|
||||||
"nixpkgs-cmake3": "nixpkgs-cmake3",
|
"nixpkgs-cmake3": "nixpkgs-cmake3"
|
||||||
"pre-commit-hooks": [
|
|
||||||
"git-hooks"
|
|
||||||
]
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
|
|
||||||
|
|
@ -22,3 +22,7 @@ transpose = "0.2.2"
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "random_game"
|
name = "random_game"
|
||||||
path = "src/bin/random_game.rs"
|
path = "src/bin/random_game.rs"
|
||||||
|
|
||||||
|
[[bin]]
|
||||||
|
name = "weight_tuner"
|
||||||
|
path = "src/bin/weight_tuner.rs"
|
||||||
|
|
|
||||||
456
store/src/bin/weight_tuner.rs
Normal file
456
store/src/bin/weight_tuner.rs
Normal file
|
|
@ -0,0 +1,456 @@
|
||||||
|
//! Weight tuner for the trictrac heuristic bot.
|
||||||
|
//!
|
||||||
|
//! Uses self-play (greedy heuristic with candidate weights vs current champion weights)
|
||||||
|
//! to measure win-rate signal. Since both bots are similarly capable, small weight
|
||||||
|
//! differences produce a gradient near 50%, unlike vs-random where the heuristic wins
|
||||||
|
//! ~100% regardless of weights.
|
||||||
|
//!
|
||||||
|
//! Algorithm: coordinate-descent hill-climbing. For each weight, probe +step and -step;
|
||||||
|
//! accept the change that pushes the challenger win-rate above 50%. Halve step when no
|
||||||
|
//! weight in the current pass improved. Stop when step < min_step.
|
||||||
|
//!
|
||||||
|
//! Each win-rate estimate runs `n_games` games with the challenger as White AND as Black
|
||||||
|
//! (total 2×n_games), eliminating first-move bias.
|
||||||
|
//!
|
||||||
|
//! Usage:
|
||||||
|
//! cargo run --release --bin weight_tuner -- [--games <N>] [--seed <u64>] [--step <f32>] [--min-step <f32>]
|
||||||
|
//!
|
||||||
|
//! Prints the best weights at the end; paste them into bot_local.rs.
|
||||||
|
|
||||||
|
use std::borrow::Cow;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
|
use trictrac_store::{
|
||||||
|
training_common::sample_valid_action, Board, CheckerMove, Color, DiceRoller, GameEvent,
|
||||||
|
GameState, MoveRules, Stage, TurnStage,
|
||||||
|
};
|
||||||
|
|
||||||
|
// ── Weights ───────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
#[derive(Clone, Debug, PartialEq)]
|
||||||
|
struct Weights {
|
||||||
|
corner_filled: f32, // bonus if rest corner (field 12 for White) is occupied
|
||||||
|
quarter_filled: f32, // bonus per fully filled quarter
|
||||||
|
quarter_progress: f32, // bonus per non-missing checker in the most-promising unfilled quarter
|
||||||
|
singleton_penalty: f32, // penalty per exposed singleton (opponent checker at higher field)
|
||||||
|
exit_zone: f32, // bonus per checker already in fields 19-24
|
||||||
|
}
|
||||||
|
|
||||||
|
const WEIGHT_NAMES: [&str; 5] = [
|
||||||
|
"corner_filled",
|
||||||
|
"quarter_filled",
|
||||||
|
"quarter_progress",
|
||||||
|
"singleton_penalty",
|
||||||
|
"exit_zone",
|
||||||
|
];
|
||||||
|
|
||||||
|
impl Weights {
|
||||||
|
fn initial() -> Self {
|
||||||
|
// Current hard-coded values from bot_local.rs
|
||||||
|
Self {
|
||||||
|
corner_filled: 5.0,
|
||||||
|
quarter_filled: 8.0,
|
||||||
|
quarter_progress: 0.3,
|
||||||
|
singleton_penalty: 0.5,
|
||||||
|
exit_zone: 0.3,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn get(&self, i: usize) -> f32 {
|
||||||
|
match i {
|
||||||
|
0 => self.corner_filled,
|
||||||
|
1 => self.quarter_filled,
|
||||||
|
2 => self.quarter_progress,
|
||||||
|
3 => self.singleton_penalty,
|
||||||
|
4 => self.exit_zone,
|
||||||
|
_ => panic!("weight index out of range"),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fn with(&self, i: usize, v: f32) -> Self {
|
||||||
|
let mut w = self.clone();
|
||||||
|
match i {
|
||||||
|
0 => w.corner_filled = v,
|
||||||
|
1 => w.quarter_filled = v,
|
||||||
|
2 => w.quarter_progress = v,
|
||||||
|
3 => w.singleton_penalty = v,
|
||||||
|
4 => w.exit_zone = v,
|
||||||
|
_ => panic!("weight index out of range"),
|
||||||
|
}
|
||||||
|
w
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Evaluation ────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Evaluate a board from White's perspective.
|
||||||
|
/// Mirrors evaluate() in bot_local.rs with parameterised weights.
|
||||||
|
fn evaluate(board: &Board, w: &Weights) -> f32 {
|
||||||
|
let mut score = 0.0f32;
|
||||||
|
|
||||||
|
let white_fields = board.get_color_fields(Color::White);
|
||||||
|
let black_fields = board.get_color_fields(Color::Black);
|
||||||
|
|
||||||
|
let corner_field = board.get_color_corner(&Color::White);
|
||||||
|
let (corner_count, _) = board.get_field_checkers(corner_field).unwrap();
|
||||||
|
if corner_count > 0 {
|
||||||
|
score += w.corner_filled;
|
||||||
|
}
|
||||||
|
|
||||||
|
for &q in &[1usize, 7, 19] {
|
||||||
|
if board.is_quarter_filled(Color::White, q) {
|
||||||
|
score += w.quarter_filled;
|
||||||
|
} else {
|
||||||
|
let missing = board.get_quarter_filling_candidate(Color::White);
|
||||||
|
score += (6 - missing.len().min(6)) as f32 * w.quarter_progress;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let max_black_field = black_fields.iter().map(|(f, _)| *f).max().unwrap_or(0);
|
||||||
|
for (f, count) in &white_fields {
|
||||||
|
if *count == 1 && *f < max_black_field {
|
||||||
|
score -= w.singleton_penalty;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (field, count) in &white_fields {
|
||||||
|
if *field >= 19 {
|
||||||
|
score += count.abs() as f32 * w.exit_zone;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
score
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Greedy score for a move sequence.
|
||||||
|
/// `m1`, `m2` are in the MoveRules output space for `color` (mirrored White space for Black).
|
||||||
|
fn score_seq(board: &Board, m1: &CheckerMove, m2: &CheckerMove, color: Color, w: &Weights) -> f32 {
|
||||||
|
// MoveRules for Black mirrors the board; sequences are in White space after mirror.
|
||||||
|
// Replicate: use the mirrored board for Black, original for White.
|
||||||
|
let mut b = if color == Color::White { board.clone() } else { board.mirror() };
|
||||||
|
let _ = b.move_checker(&Color::White, *m1);
|
||||||
|
let _ = b.move_checker(&Color::White, *m2);
|
||||||
|
evaluate(&b, w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Bot actions ───────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Pick the greedy best move for the heuristic bot with the given color and weights.
|
||||||
|
/// Returns a GameEvent::Move with moves in the game's (non-mirrored) coordinate space.
|
||||||
|
fn heuristic_action(state: &GameState, color: Color, weights: &Weights) -> GameEvent {
|
||||||
|
let rules = MoveRules::new(&color, &state.board, state.dice);
|
||||||
|
let seqs = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
let (m1, m2) = seqs
|
||||||
|
.iter()
|
||||||
|
.max_by(|(a1, a2), (b1, b2)| {
|
||||||
|
score_seq(&state.board, a1, a2, color, weights)
|
||||||
|
.partial_cmp(&score_seq(&state.board, b1, b2, color, weights))
|
||||||
|
.unwrap_or(std::cmp::Ordering::Equal)
|
||||||
|
})
|
||||||
|
.copied()
|
||||||
|
.unwrap_or_default();
|
||||||
|
// MoveRules for Black returns moves in mirrored (White) space — mirror back.
|
||||||
|
let (m1, m2) = if color == Color::Black { (m1.mirror(), m2.mirror()) } else { (m1, m2) };
|
||||||
|
GameEvent::Move { player_id: state.active_player_id, moves: (m1, m2) }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Pick a uniformly random move for the random bot (used only in --vs-random mode).
|
||||||
|
fn random_action(state: &GameState) -> GameEvent {
|
||||||
|
let view: Cow<GameState> = Cow::Owned(state.mirror());
|
||||||
|
if let Some(action) = sample_valid_action(&view) {
|
||||||
|
if let Some(event) = action.to_event(&view) {
|
||||||
|
return event.get_mirror(false);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
GameEvent::Move {
|
||||||
|
player_id: state.active_player_id,
|
||||||
|
moves: (CheckerMove::default(), CheckerMove::default()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Game simulation ───────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
const MAX_STEPS: usize = 8_000;
|
||||||
|
|
||||||
|
/// Simulate one self-play game.
|
||||||
|
/// Player 1 (White) uses `weights_p1`, player 2 (Black) uses `weights_p2`.
|
||||||
|
/// Returns the winner's player_id, or None on truncation.
|
||||||
|
fn run_selfplay_game(
|
||||||
|
weights_p1: &Weights,
|
||||||
|
weights_p2: &Weights,
|
||||||
|
roller: &mut DiceRoller,
|
||||||
|
) -> Option<u64> {
|
||||||
|
let mut state = GameState::new_with_players("Bot1", "Bot2");
|
||||||
|
let mut steps = 0;
|
||||||
|
|
||||||
|
while state.stage != Stage::Ended {
|
||||||
|
steps += 1;
|
||||||
|
if steps > MAX_STEPS {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
|
||||||
|
match state.turn_stage {
|
||||||
|
TurnStage::RollDice => {
|
||||||
|
let _ = state.consume(&GameEvent::Roll { player_id: state.active_player_id });
|
||||||
|
let dice = roller.roll();
|
||||||
|
let _ = state
|
||||||
|
.consume(&GameEvent::RollResult { player_id: state.active_player_id, dice });
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let event = if state.active_player_id == 1 {
|
||||||
|
heuristic_action(&state, Color::White, weights_p1)
|
||||||
|
} else {
|
||||||
|
heuristic_action(&state, Color::Black, weights_p2)
|
||||||
|
};
|
||||||
|
if state.consume(&event).is_err() {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
state.determine_winner()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Estimate challenger's win rate against champion via self-play.
|
||||||
|
/// Runs n_games with challenger as White and n_games with challenger as Black
|
||||||
|
/// to eliminate first-move bias. Returns fraction of games won by challenger.
|
||||||
|
fn self_play_win_rate(
|
||||||
|
challenger: &Weights,
|
||||||
|
champion: &Weights,
|
||||||
|
n_games: usize,
|
||||||
|
roller: &mut DiceRoller,
|
||||||
|
) -> f32 {
|
||||||
|
let mut challenger_wins = 0usize;
|
||||||
|
let total = n_games * 2;
|
||||||
|
|
||||||
|
for _ in 0..n_games {
|
||||||
|
// Challenger as White (player 1)
|
||||||
|
if run_selfplay_game(challenger, champion, roller) == Some(1) {
|
||||||
|
challenger_wins += 1;
|
||||||
|
}
|
||||||
|
// Challenger as Black (player 2)
|
||||||
|
if run_selfplay_game(champion, challenger, roller) == Some(2) {
|
||||||
|
challenger_wins += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
challenger_wins as f32 / total as f32
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Win rate of the heuristic bot (player 1 / White) against the random bot.
|
||||||
|
/// Useful as a sanity check, but not suitable for hill-climbing (win rate ≈ 100%).
|
||||||
|
fn vs_random_win_rate(weights: &Weights, n_games: usize, roller: &mut DiceRoller) -> f32 {
|
||||||
|
let mut wins = 0usize;
|
||||||
|
for _ in 0..n_games {
|
||||||
|
let mut state = GameState::new_with_players("Heuristic", "Random");
|
||||||
|
let mut steps = 0;
|
||||||
|
while state.stage != Stage::Ended {
|
||||||
|
steps += 1;
|
||||||
|
if steps > MAX_STEPS {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
match state.turn_stage {
|
||||||
|
TurnStage::RollDice => {
|
||||||
|
let _ = state.consume(&GameEvent::Roll { player_id: state.active_player_id });
|
||||||
|
let dice = roller.roll();
|
||||||
|
let _ = state.consume(&GameEvent::RollResult {
|
||||||
|
player_id: state.active_player_id,
|
||||||
|
dice,
|
||||||
|
});
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
let event = if state.active_player_id == 1 {
|
||||||
|
heuristic_action(&state, Color::White, weights)
|
||||||
|
} else {
|
||||||
|
random_action(&state)
|
||||||
|
};
|
||||||
|
let _ = state.consume(&event);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if state.determine_winner() == Some(1) {
|
||||||
|
wins += 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
wins as f32 / n_games as f32
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Hill-climbing ─────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
/// Coordinate-descent hill-climbing via self-play.
|
||||||
|
///
|
||||||
|
/// Compares each candidate (champion ± step on one weight) against the current
|
||||||
|
/// champion. Accepts the candidate if its self-play win rate exceeds `0.5 + margin`
|
||||||
|
/// (default 0.52 ≈ 2σ at N=150 games, i.e. N=300 total trials).
|
||||||
|
/// Halves step when a full pass produces no improvement; stops when step < min_step.
|
||||||
|
fn hill_climb(
|
||||||
|
initial: Weights,
|
||||||
|
n_games: usize,
|
||||||
|
initial_step: f32,
|
||||||
|
min_step: f32,
|
||||||
|
margin: f32,
|
||||||
|
roller: &mut DiceRoller,
|
||||||
|
) -> Weights {
|
||||||
|
let threshold = 0.5 + margin;
|
||||||
|
let mut champion = initial;
|
||||||
|
let mut step = initial_step;
|
||||||
|
|
||||||
|
println!("Initial weights: {:?}", champion);
|
||||||
|
println!("Acceptance threshold: >{:.0}% (margin={:.3})", threshold * 100.0, margin);
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let mut iteration = 0usize;
|
||||||
|
while step >= min_step {
|
||||||
|
let mut improved = false;
|
||||||
|
iteration += 1;
|
||||||
|
|
||||||
|
for i in 0..5 {
|
||||||
|
// Probe +step (clamped to non-negative).
|
||||||
|
let up = champion.with(i, (champion.get(i) + step).max(0.0));
|
||||||
|
let wr_up = self_play_win_rate(&up, &champion, n_games, roller);
|
||||||
|
|
||||||
|
// Probe -step.
|
||||||
|
let dn = champion.with(i, (champion.get(i) - step).max(0.0));
|
||||||
|
let wr_dn = self_play_win_rate(&dn, &champion, n_games, roller);
|
||||||
|
|
||||||
|
let best_wr = wr_up.max(wr_dn);
|
||||||
|
if best_wr >= threshold {
|
||||||
|
let (accepted, wr_accepted) =
|
||||||
|
if wr_up >= wr_dn { (up, wr_up) } else { (dn, wr_dn) };
|
||||||
|
let dir = if wr_up >= wr_dn { '+' } else { '-' };
|
||||||
|
println!(
|
||||||
|
" iter {:3} {} {}{:.3} self-play win {:.1}% {:?}",
|
||||||
|
iteration,
|
||||||
|
WEIGHT_NAMES[i],
|
||||||
|
dir,
|
||||||
|
step,
|
||||||
|
wr_accepted * 100.0,
|
||||||
|
accepted
|
||||||
|
);
|
||||||
|
champion = accepted;
|
||||||
|
improved = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !improved {
|
||||||
|
step *= 0.5;
|
||||||
|
println!(
|
||||||
|
" iter {:3} no improvement at step {:.3} → halving to {:.3}",
|
||||||
|
iteration,
|
||||||
|
step * 2.0,
|
||||||
|
step
|
||||||
|
);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
champion
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── CLI args ──────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
struct Args {
|
||||||
|
n_games: usize,
|
||||||
|
seed: Option<u64>,
|
||||||
|
initial_step: f32,
|
||||||
|
min_step: f32,
|
||||||
|
margin: f32,
|
||||||
|
vs_random: bool,
|
||||||
|
}
|
||||||
|
|
||||||
|
fn parse_args() -> Args {
|
||||||
|
let args: Vec<String> = std::env::args().collect();
|
||||||
|
let mut n_games = 200usize;
|
||||||
|
let mut seed: Option<u64> = None;
|
||||||
|
let mut initial_step = 2.0f32;
|
||||||
|
let mut min_step = 0.1f32;
|
||||||
|
// At N=200 games × 2 directions = 400 total trials, σ ≈ sqrt(0.25/400) ≈ 2.5%.
|
||||||
|
// margin=0.03 ≈ 1.2σ: catches real improvements while filtering most noise.
|
||||||
|
let mut margin = 0.03f32;
|
||||||
|
let mut vs_random = false;
|
||||||
|
|
||||||
|
let mut i = 1;
|
||||||
|
while i < args.len() {
|
||||||
|
match args[i].as_str() {
|
||||||
|
"--games" => {
|
||||||
|
i += 1;
|
||||||
|
if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
|
||||||
|
n_games = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"--seed" => {
|
||||||
|
i += 1;
|
||||||
|
seed = args.get(i).and_then(|s| s.parse().ok());
|
||||||
|
}
|
||||||
|
"--step" => {
|
||||||
|
i += 1;
|
||||||
|
if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
|
||||||
|
initial_step = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"--min-step" => {
|
||||||
|
i += 1;
|
||||||
|
if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
|
||||||
|
min_step = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"--margin" => {
|
||||||
|
i += 1;
|
||||||
|
if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
|
||||||
|
margin = v;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
"--vs-random" => vs_random = true,
|
||||||
|
_ => {}
|
||||||
|
}
|
||||||
|
i += 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
Args { n_games, seed, initial_step, min_step, margin, vs_random }
|
||||||
|
}
|
||||||
|
|
||||||
|
// ── Main ──────────────────────────────────────────────────────────────────────
|
||||||
|
|
||||||
|
fn main() {
|
||||||
|
let args = parse_args();
|
||||||
|
|
||||||
|
println!("=== Trictrac weight tuner ===");
|
||||||
|
println!("mode : {}", if args.vs_random { "vs-random (no hill-climbing)" } else { "self-play hill-climbing" });
|
||||||
|
println!("games/eval : {}", args.n_games);
|
||||||
|
println!("seed : {:?}", args.seed);
|
||||||
|
if !args.vs_random {
|
||||||
|
println!("step range : {:.3} → {:.3}", args.initial_step, args.min_step);
|
||||||
|
println!("margin : >{:.0}%", (0.5 + args.margin) * 100.0);
|
||||||
|
}
|
||||||
|
println!();
|
||||||
|
|
||||||
|
let mut roller = DiceRoller::new(args.seed);
|
||||||
|
let t0 = Instant::now();
|
||||||
|
|
||||||
|
if args.vs_random {
|
||||||
|
let wr = vs_random_win_rate(&Weights::initial(), args.n_games, &mut roller);
|
||||||
|
println!("vs-random win rate: {:.1}% ({} games)", wr * 100.0, args.n_games);
|
||||||
|
println!("Elapsed: {:.1} s", t0.elapsed().as_secs_f64());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
let best = hill_climb(
|
||||||
|
Weights::initial(),
|
||||||
|
args.n_games,
|
||||||
|
args.initial_step,
|
||||||
|
args.min_step,
|
||||||
|
args.margin,
|
||||||
|
&mut roller,
|
||||||
|
);
|
||||||
|
|
||||||
|
let elapsed = t0.elapsed();
|
||||||
|
println!();
|
||||||
|
println!("=== Optimised weights (paste into bot_local.rs) ===");
|
||||||
|
println!(" corner_filled: {}", best.corner_filled);
|
||||||
|
println!(" quarter_filled: {}", best.quarter_filled);
|
||||||
|
println!(" quarter_progress: {}", best.quarter_progress);
|
||||||
|
println!(" singleton_penalty: {}", best.singleton_penalty);
|
||||||
|
println!(" exit_zone: {}", best.exit_zone);
|
||||||
|
println!();
|
||||||
|
println!("Elapsed: {:.1} s", elapsed.as_secs_f64());
|
||||||
|
}
|
||||||
Loading…
Add table
Add a link
Reference in a new issue