//! Weight tuner for the trictrac heuristic bot. //! //! Uses self-play (greedy heuristic with candidate weights vs current champion weights) //! to measure win-rate signal. Since both bots are similarly capable, small weight //! differences produce a gradient near 50%, unlike vs-random where the heuristic wins //! ~100% regardless of weights. //! //! Algorithm: coordinate-descent hill-climbing. For each weight, probe +step and -step; //! accept the change that pushes the challenger win-rate above 50%. Halve step when no //! weight in the current pass improved. Stop when step < min_step. //! //! Each win-rate estimate runs `n_games` games with the challenger as White AND as Black //! (total 2×n_games), eliminating first-move bias. //! //! Usage: //! cargo run --release --bin weight_tuner -- [--games ] [--seed ] [--step ] [--min-step ] //! //! Prints the best weights at the end; paste them into bot_local.rs. use std::borrow::Cow; use std::time::Instant; use trictrac_store::{ training_common::sample_valid_action, Board, CheckerMove, Color, DiceRoller, GameEvent, GameState, MoveRules, Stage, TurnStage, }; // ── Weights ─────────────────────────────────────────────────────────────────── #[derive(Clone, Debug, PartialEq)] struct Weights { corner_filled: f32, // bonus if rest corner (field 12 for White) is occupied quarter_filled: f32, // bonus per fully filled quarter quarter_progress: f32, // bonus per non-missing checker in the most-promising unfilled quarter singleton_penalty: f32, // penalty per exposed singleton (opponent checker at higher field) exit_zone: f32, // bonus per checker already in fields 19-24 } const WEIGHT_NAMES: [&str; 5] = [ "corner_filled", "quarter_filled", "quarter_progress", "singleton_penalty", "exit_zone", ]; impl Weights { fn initial() -> Self { // Current hard-coded values from bot_local.rs Self { corner_filled: 5.0, quarter_filled: 8.0, quarter_progress: 0.3, singleton_penalty: 0.5, exit_zone: 0.3, } } fn get(&self, i: usize) -> f32 { match i { 0 => self.corner_filled, 1 => self.quarter_filled, 2 => self.quarter_progress, 3 => self.singleton_penalty, 4 => self.exit_zone, _ => panic!("weight index out of range"), } } fn with(&self, i: usize, v: f32) -> Self { let mut w = self.clone(); match i { 0 => w.corner_filled = v, 1 => w.quarter_filled = v, 2 => w.quarter_progress = v, 3 => w.singleton_penalty = v, 4 => w.exit_zone = v, _ => panic!("weight index out of range"), } w } } // ── Evaluation ──────────────────────────────────────────────────────────────── /// Evaluate a board from White's perspective. /// Mirrors evaluate() in bot_local.rs with parameterised weights. fn evaluate(board: &Board, w: &Weights) -> f32 { let mut score = 0.0f32; let white_fields = board.get_color_fields(Color::White); let black_fields = board.get_color_fields(Color::Black); let corner_field = board.get_color_corner(&Color::White); let (corner_count, _) = board.get_field_checkers(corner_field).unwrap(); if corner_count > 0 { score += w.corner_filled; } for &q in &[1usize, 7, 19] { if board.is_quarter_filled(Color::White, q) { score += w.quarter_filled; } else { let missing = board.get_quarter_filling_candidate(Color::White); score += (6 - missing.len().min(6)) as f32 * w.quarter_progress; } } let max_black_field = black_fields.iter().map(|(f, _)| *f).max().unwrap_or(0); for (f, count) in &white_fields { if *count == 1 && *f < max_black_field { score -= w.singleton_penalty; } } for (field, count) in &white_fields { if *field >= 19 { score += count.abs() as f32 * w.exit_zone; } } score } /// Greedy score for a move sequence. /// `m1`, `m2` are in the MoveRules output space for `color` (mirrored White space for Black). fn score_seq(board: &Board, m1: &CheckerMove, m2: &CheckerMove, color: Color, w: &Weights) -> f32 { // MoveRules for Black mirrors the board; sequences are in White space after mirror. // Replicate: use the mirrored board for Black, original for White. let mut b = if color == Color::White { board.clone() } else { board.mirror() }; let _ = b.move_checker(&Color::White, *m1); let _ = b.move_checker(&Color::White, *m2); evaluate(&b, w) } // ── Bot actions ─────────────────────────────────────────────────────────────── /// Pick the greedy best move for the heuristic bot with the given color and weights. /// Returns a GameEvent::Move with moves in the game's (non-mirrored) coordinate space. fn heuristic_action(state: &GameState, color: Color, weights: &Weights) -> GameEvent { let rules = MoveRules::new(&color, &state.board, state.dice); let seqs = rules.get_possible_moves_sequences(true, vec![]); let (m1, m2) = seqs .iter() .max_by(|(a1, a2), (b1, b2)| { score_seq(&state.board, a1, a2, color, weights) .partial_cmp(&score_seq(&state.board, b1, b2, color, weights)) .unwrap_or(std::cmp::Ordering::Equal) }) .copied() .unwrap_or_default(); // MoveRules for Black returns moves in mirrored (White) space — mirror back. let (m1, m2) = if color == Color::Black { (m1.mirror(), m2.mirror()) } else { (m1, m2) }; GameEvent::Move { player_id: state.active_player_id, moves: (m1, m2) } } /// Pick a uniformly random move for the random bot (used only in --vs-random mode). fn random_action(state: &GameState) -> GameEvent { let view: Cow = Cow::Owned(state.mirror()); if let Some(action) = sample_valid_action(&view) { if let Some(event) = action.to_event(&view) { return event.get_mirror(false); } } GameEvent::Move { player_id: state.active_player_id, moves: (CheckerMove::default(), CheckerMove::default()), } } // ── Game simulation ─────────────────────────────────────────────────────────── const MAX_STEPS: usize = 8_000; /// Simulate one self-play game. /// Player 1 (White) uses `weights_p1`, player 2 (Black) uses `weights_p2`. /// Returns the winner's player_id, or None on truncation. fn run_selfplay_game( weights_p1: &Weights, weights_p2: &Weights, roller: &mut DiceRoller, ) -> Option { let mut state = GameState::new_with_players("Bot1", "Bot2"); let mut steps = 0; while state.stage != Stage::Ended { steps += 1; if steps > MAX_STEPS { return None; } match state.turn_stage { TurnStage::RollDice => { let _ = state.consume(&GameEvent::Roll { player_id: state.active_player_id }); let dice = roller.roll(); let _ = state .consume(&GameEvent::RollResult { player_id: state.active_player_id, dice }); } _ => { let event = if state.active_player_id == 1 { heuristic_action(&state, Color::White, weights_p1) } else { heuristic_action(&state, Color::Black, weights_p2) }; if state.consume(&event).is_err() { return None; } } } } state.determine_winner() } /// Estimate challenger's win rate against champion via self-play. /// Runs n_games with challenger as White and n_games with challenger as Black /// to eliminate first-move bias. Returns fraction of games won by challenger. fn self_play_win_rate( challenger: &Weights, champion: &Weights, n_games: usize, roller: &mut DiceRoller, ) -> f32 { let mut challenger_wins = 0usize; let total = n_games * 2; for _ in 0..n_games { // Challenger as White (player 1) if run_selfplay_game(challenger, champion, roller) == Some(1) { challenger_wins += 1; } // Challenger as Black (player 2) if run_selfplay_game(champion, challenger, roller) == Some(2) { challenger_wins += 1; } } challenger_wins as f32 / total as f32 } /// Win rate of the heuristic bot (player 1 / White) against the random bot. /// Useful as a sanity check, but not suitable for hill-climbing (win rate ≈ 100%). fn vs_random_win_rate(weights: &Weights, n_games: usize, roller: &mut DiceRoller) -> f32 { let mut wins = 0usize; for _ in 0..n_games { let mut state = GameState::new_with_players("Heuristic", "Random"); let mut steps = 0; while state.stage != Stage::Ended { steps += 1; if steps > MAX_STEPS { break; } match state.turn_stage { TurnStage::RollDice => { let _ = state.consume(&GameEvent::Roll { player_id: state.active_player_id }); let dice = roller.roll(); let _ = state.consume(&GameEvent::RollResult { player_id: state.active_player_id, dice, }); } _ => { let event = if state.active_player_id == 1 { heuristic_action(&state, Color::White, weights) } else { random_action(&state) }; let _ = state.consume(&event); } } } if state.determine_winner() == Some(1) { wins += 1; } } wins as f32 / n_games as f32 } // ── Hill-climbing ───────────────────────────────────────────────────────────── /// Coordinate-descent hill-climbing via self-play. /// /// Compares each candidate (champion ± step on one weight) against the current /// champion. Accepts the candidate if its self-play win rate exceeds `0.5 + margin` /// (default 0.52 ≈ 2σ at N=150 games, i.e. N=300 total trials). /// Halves step when a full pass produces no improvement; stops when step < min_step. fn hill_climb( initial: Weights, n_games: usize, initial_step: f32, min_step: f32, margin: f32, roller: &mut DiceRoller, ) -> Weights { let threshold = 0.5 + margin; let mut champion = initial; let mut step = initial_step; println!("Initial weights: {:?}", champion); println!("Acceptance threshold: >{:.0}% (margin={:.3})", threshold * 100.0, margin); println!(); let mut iteration = 0usize; while step >= min_step { let mut improved = false; iteration += 1; for i in 0..5 { // Probe +step (clamped to non-negative). let up = champion.with(i, (champion.get(i) + step).max(0.0)); let wr_up = self_play_win_rate(&up, &champion, n_games, roller); // Probe -step. let dn = champion.with(i, (champion.get(i) - step).max(0.0)); let wr_dn = self_play_win_rate(&dn, &champion, n_games, roller); let best_wr = wr_up.max(wr_dn); if best_wr >= threshold { let (accepted, wr_accepted) = if wr_up >= wr_dn { (up, wr_up) } else { (dn, wr_dn) }; let dir = if wr_up >= wr_dn { '+' } else { '-' }; println!( " iter {:3} {} {}{:.3} self-play win {:.1}% {:?}", iteration, WEIGHT_NAMES[i], dir, step, wr_accepted * 100.0, accepted ); champion = accepted; improved = true; } } if !improved { step *= 0.5; println!( " iter {:3} no improvement at step {:.3} → halving to {:.3}", iteration, step * 2.0, step ); } } champion } // ── CLI args ────────────────────────────────────────────────────────────────── struct Args { n_games: usize, seed: Option, initial_step: f32, min_step: f32, margin: f32, vs_random: bool, } fn parse_args() -> Args { let args: Vec = std::env::args().collect(); let mut n_games = 200usize; let mut seed: Option = None; let mut initial_step = 2.0f32; let mut min_step = 0.1f32; // At N=200 games × 2 directions = 400 total trials, σ ≈ sqrt(0.25/400) ≈ 2.5%. // margin=0.03 ≈ 1.2σ: catches real improvements while filtering most noise. let mut margin = 0.03f32; let mut vs_random = false; let mut i = 1; while i < args.len() { match args[i].as_str() { "--games" => { i += 1; if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) { n_games = v; } } "--seed" => { i += 1; seed = args.get(i).and_then(|s| s.parse().ok()); } "--step" => { i += 1; if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) { initial_step = v; } } "--min-step" => { i += 1; if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) { min_step = v; } } "--margin" => { i += 1; if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) { margin = v; } } "--vs-random" => vs_random = true, _ => {} } i += 1; } Args { n_games, seed, initial_step, min_step, margin, vs_random } } // ── Main ────────────────────────────────────────────────────────────────────── fn main() { let args = parse_args(); println!("=== Trictrac weight tuner ==="); println!("mode : {}", if args.vs_random { "vs-random (no hill-climbing)" } else { "self-play hill-climbing" }); println!("games/eval : {}", args.n_games); println!("seed : {:?}", args.seed); if !args.vs_random { println!("step range : {:.3} → {:.3}", args.initial_step, args.min_step); println!("margin : >{:.0}%", (0.5 + args.margin) * 100.0); } println!(); let mut roller = DiceRoller::new(args.seed); let t0 = Instant::now(); if args.vs_random { let wr = vs_random_win_rate(&Weights::initial(), args.n_games, &mut roller); println!("vs-random win rate: {:.1}% ({} games)", wr * 100.0, args.n_games); println!("Elapsed: {:.1} s", t0.elapsed().as_secs_f64()); return; } let best = hill_climb( Weights::initial(), args.n_games, args.initial_step, args.min_step, args.margin, &mut roller, ); let elapsed = t0.elapsed(); println!(); println!("=== Optimised weights (paste into bot_local.rs) ==="); println!(" corner_filled: {}", best.corner_filled); println!(" quarter_filled: {}", best.quarter_filled); println!(" quarter_progress: {}", best.quarter_progress); println!(" singleton_penalty: {}", best.singleton_penalty); println!(" exit_zone: {}", best.exit_zone); println!(); println!("Elapsed: {:.1} s", elapsed.as_secs_f64()); }