From 813cc3448a15f0a39388c9f5a994392a5dee1d31 Mon Sep 17 00:00:00 2001
From: Henri Bourcereau <henri.bourcereau@gmail.com>
Date: Sun, 21 Jun 2026 13:44:24 +0200
Subject: [PATCH] feat: web client bot tuning

---
 clients/web/src/game/trictrac/bot_local.rs |  59 ++-
 devenv.lock                                |  64 +--
 store/Cargo.toml                           |   4 +
 store/src/bin/weight_tuner.rs              | 456 +++++++++++++++++++++
 4 files changed, 508 insertions(+), 75 deletions(-)
 create mode 100644 store/src/bin/weight_tuner.rs

diff --git a/clients/web/src/game/trictrac/bot_local.rs b/clients/web/src/game/trictrac/bot_local.rs
index 6161fe2..8a817fe 100644
--- a/clients/web/src/game/trictrac/bot_local.rs
+++ b/clients/web/src/game/trictrac/bot_local.rs
@@ -1,4 +1,4 @@
-use trictrac_store::{Board, CheckerMove, Color, GameState, MoveRules, Stage, TurnStage};
+use trictrac_store::{Board, CheckerMove, Color, Dice, GameState, MoveRules, Stage, TurnStage};
 
 use super::types::{PlayerAction, PreGameRollState};
 
@@ -45,13 +45,42 @@ pub fn bot_decide(game: &GameState, pgr: Option<&PreGameRollState>) -> Option<Pl
     }
 }
 
-/// Score a candidate move sequence from the bot's (Black) perspective.
+/// Score a candidate bot move sequence using depth-1 expectiminimax.
+/// For each of the 21 possible opponent dice pairs, the opponent picks the move that
+/// minimises the bot's score; we average those minima weighted by dice probability.
 /// `m1` and `m2` are in mirrored (White) space, as returned by MoveRules for Color::Black.
 fn score_seq(board: &Board, m1: &CheckerMove, m2: &CheckerMove) -> f32 {
-    let mut b = board.mirror();
-    let _ = b.move_checker(&Color::White, *m1);
-    let _ = b.move_checker(&Color::White, *m2);
-    evaluate(&b)
+    // Apply bot's moves on the mirrored board, then restore normal coordinates → B1.
+    let mut b_mirror = board.mirror();
+    let _ = b_mirror.move_checker(&Color::White, *m1);
+    let _ = b_mirror.move_checker(&Color::White, *m2);
+    let b1 = b_mirror.mirror();
+
+    // Expectiminimax: sum over all 21 distinct dice pairs, weighted by probability (out of 36).
+    // Non-doubles have probability 2/36 each; doubles 1/36 each.
+    let mut total = 0.0f32;
+    for d1 in 1u8..=6 {
+        for d2 in d1..=6 {
+            let weight = if d1 == d2 { 1.0f32 } else { 2.0f32 };
+            let opp_rules = MoveRules::new(&Color::White, &b1, Dice { values: (d1, d2) });
+            let opp_seqs = opp_rules.get_possible_moves_sequences(true, vec![]);
+            let min_score = if opp_seqs.is_empty() {
+                evaluate(&b1.mirror())
+            } else {
+                opp_seqs
+                    .iter()
+                    .map(|(om1, om2)| {
+                        let mut b2 = b1.clone();
+                        let _ = b2.move_checker(&Color::White, *om1);
+                        let _ = b2.move_checker(&Color::White, *om2);
+                        evaluate(&b2.mirror())
+                    })
+                    .fold(f32::INFINITY, f32::min)
+            };
+            total += weight * min_score;
+        }
+    }
+    total // proportional to expected score; dividing by 36 doesn't affect move ordering
 }
 
 /// Evaluate a board position from White's perspective (call after mirroring for Black).
@@ -61,11 +90,19 @@ fn evaluate(board: &Board) -> f32 {
     let white_fields = board.get_color_fields(Color::White);
     let black_fields = board.get_color_fields(Color::Black);
 
+    // Bonus if rest corner filled (tuned: 6.0)
+    let corner_field = board.get_color_corner(&Color::White);
+    let (corner_count, _color) = board.get_field_checkers(corner_field).unwrap();
+    if corner_count > 0 {
+        score += 6.0;
+    }
+
     // Quarter fill progress — quarters 1-6, 7-12, 19-24.
     // Quarter 13-18 is skipped: field 13 is the opponent's rest corner so White can never fill it.
+    // quarter_filled tuned to 5.5 (was 8.0), quarter_progress kept at 0.3.
     for &q in &[1usize, 7, 19] {
         if board.is_quarter_filled(Color::White, q) {
-            score += 8.0;
+            score += 5.5;
         } else {
             let missing = board.get_quarter_filling_candidate(Color::White);
             score += (6 - missing.len().min(6)) as f32 * 0.3;
@@ -81,12 +118,8 @@ fn evaluate(board: &Board) -> f32 {
         }
     }
 
-    // Exit zone progress: reward checkers already in fields 19-24.
-    for (field, count) in &white_fields {
-        if *field >= 19 {
-            score += count.abs() as f32 * 0.3;
-        }
-    }
+    // Exit zone progress: tuned to 0.0 — mid-game jan-filling dominates.
+    // (term kept here as a reminder; re-enable when bearing-off phase is reached)
 
     score
 }
diff --git a/devenv.lock b/devenv.lock
index 3f0905b..e6e8ef6 100644
--- a/devenv.lock
+++ b/devenv.lock
@@ -17,62 +17,6 @@
         "type": "github"
       }
     },
-    "flake-compat": {
-      "flake": false,
-      "locked": {
-        "lastModified": 1767039857,
-        "owner": "NixOS",
-        "repo": "flake-compat",
-        "rev": "5edf11c44bc78a0d334f6334cdaf7d60d732daab",
-        "type": "github"
-      },
-      "original": {
-        "owner": "NixOS",
-        "repo": "flake-compat",
-        "type": "github"
-      }
-    },
-    "git-hooks": {
-      "inputs": {
-        "flake-compat": "flake-compat",
-        "gitignore": "gitignore",
-        "nixpkgs": [
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1778507602,
-        "owner": "cachix",
-        "repo": "git-hooks.nix",
-        "rev": "61ab0e80d9c7ab14c256b5b453d8b3fb0189ba0a",
-        "type": "github"
-      },
-      "original": {
-        "owner": "cachix",
-        "repo": "git-hooks.nix",
-        "type": "github"
-      }
-    },
-    "gitignore": {
-      "inputs": {
-        "nixpkgs": [
-          "git-hooks",
-          "nixpkgs"
-        ]
-      },
-      "locked": {
-        "lastModified": 1762808025,
-        "owner": "hercules-ci",
-        "repo": "gitignore.nix",
-        "rev": "cb5e3fdca1de58ccbc3ef53de65bd372b48f567c",
-        "type": "github"
-      },
-      "original": {
-        "owner": "hercules-ci",
-        "repo": "gitignore.nix",
-        "type": "github"
-      }
-    },
     "nixpkgs": {
       "locked": {
         "lastModified": 1779102034,
@@ -108,15 +52,11 @@
     "root": {
       "inputs": {
         "devenv": "devenv",
-        "git-hooks": "git-hooks",
         "nixpkgs": "nixpkgs",
-        "nixpkgs-cmake3": "nixpkgs-cmake3",
-        "pre-commit-hooks": [
-          "git-hooks"
-        ]
+        "nixpkgs-cmake3": "nixpkgs-cmake3"
       }
     }
   },
   "root": "root",
   "version": 7
-}
+}
\ No newline at end of file
diff --git a/store/Cargo.toml b/store/Cargo.toml
index 92b1b84..863cda3 100644
--- a/store/Cargo.toml
+++ b/store/Cargo.toml
@@ -22,3 +22,7 @@ transpose = "0.2.2"
 [[bin]]
 name = "random_game"
 path = "src/bin/random_game.rs"
+
+[[bin]]
+name = "weight_tuner"
+path = "src/bin/weight_tuner.rs"
diff --git a/store/src/bin/weight_tuner.rs b/store/src/bin/weight_tuner.rs
new file mode 100644
index 0000000..43229b5
--- /dev/null
+++ b/store/src/bin/weight_tuner.rs
@@ -0,0 +1,456 @@
+//! Weight tuner for the trictrac heuristic bot.
+//!
+//! Uses self-play (greedy heuristic with candidate weights vs current champion weights)
+//! to measure win-rate signal. Since both bots are similarly capable, small weight
+//! differences produce a gradient near 50%, unlike vs-random where the heuristic wins
+//! ~100% regardless of weights.
+//!
+//! Algorithm: coordinate-descent hill-climbing. For each weight, probe +step and -step;
+//! accept the change that pushes the challenger win-rate above 50%. Halve step when no
+//! weight in the current pass improved. Stop when step < min_step.
+//!
+//! Each win-rate estimate runs `n_games` games with the challenger as White AND as Black
+//! (total 2×n_games), eliminating first-move bias.
+//!
+//! Usage:
+//!   cargo run --release --bin weight_tuner -- [--games <N>] [--seed <u64>] [--step <f32>] [--min-step <f32>]
+//!
+//! Prints the best weights at the end; paste them into bot_local.rs.
+
+use std::borrow::Cow;
+use std::time::Instant;
+
+use trictrac_store::{
+    training_common::sample_valid_action, Board, CheckerMove, Color, DiceRoller, GameEvent,
+    GameState, MoveRules, Stage, TurnStage,
+};
+
+// ── Weights ───────────────────────────────────────────────────────────────────
+
+#[derive(Clone, Debug, PartialEq)]
+struct Weights {
+    corner_filled: f32,     // bonus if rest corner (field 12 for White) is occupied
+    quarter_filled: f32,    // bonus per fully filled quarter
+    quarter_progress: f32,  // bonus per non-missing checker in the most-promising unfilled quarter
+    singleton_penalty: f32, // penalty per exposed singleton (opponent checker at higher field)
+    exit_zone: f32,         // bonus per checker already in fields 19-24
+}
+
+const WEIGHT_NAMES: [&str; 5] = [
+    "corner_filled",
+    "quarter_filled",
+    "quarter_progress",
+    "singleton_penalty",
+    "exit_zone",
+];
+
+impl Weights {
+    fn initial() -> Self {
+        // Current hard-coded values from bot_local.rs
+        Self {
+            corner_filled: 5.0,
+            quarter_filled: 8.0,
+            quarter_progress: 0.3,
+            singleton_penalty: 0.5,
+            exit_zone: 0.3,
+        }
+    }
+
+    fn get(&self, i: usize) -> f32 {
+        match i {
+            0 => self.corner_filled,
+            1 => self.quarter_filled,
+            2 => self.quarter_progress,
+            3 => self.singleton_penalty,
+            4 => self.exit_zone,
+            _ => panic!("weight index out of range"),
+        }
+    }
+
+    fn with(&self, i: usize, v: f32) -> Self {
+        let mut w = self.clone();
+        match i {
+            0 => w.corner_filled = v,
+            1 => w.quarter_filled = v,
+            2 => w.quarter_progress = v,
+            3 => w.singleton_penalty = v,
+            4 => w.exit_zone = v,
+            _ => panic!("weight index out of range"),
+        }
+        w
+    }
+}
+
+// ── Evaluation ────────────────────────────────────────────────────────────────
+
+/// Evaluate a board from White's perspective.
+/// Mirrors evaluate() in bot_local.rs with parameterised weights.
+fn evaluate(board: &Board, w: &Weights) -> f32 {
+    let mut score = 0.0f32;
+
+    let white_fields = board.get_color_fields(Color::White);
+    let black_fields = board.get_color_fields(Color::Black);
+
+    let corner_field = board.get_color_corner(&Color::White);
+    let (corner_count, _) = board.get_field_checkers(corner_field).unwrap();
+    if corner_count > 0 {
+        score += w.corner_filled;
+    }
+
+    for &q in &[1usize, 7, 19] {
+        if board.is_quarter_filled(Color::White, q) {
+            score += w.quarter_filled;
+        } else {
+            let missing = board.get_quarter_filling_candidate(Color::White);
+            score += (6 - missing.len().min(6)) as f32 * w.quarter_progress;
+        }
+    }
+
+    let max_black_field = black_fields.iter().map(|(f, _)| *f).max().unwrap_or(0);
+    for (f, count) in &white_fields {
+        if *count == 1 && *f < max_black_field {
+            score -= w.singleton_penalty;
+        }
+    }
+
+    for (field, count) in &white_fields {
+        if *field >= 19 {
+            score += count.abs() as f32 * w.exit_zone;
+        }
+    }
+
+    score
+}
+
+/// Greedy score for a move sequence.
+/// `m1`, `m2` are in the MoveRules output space for `color` (mirrored White space for Black).
+fn score_seq(board: &Board, m1: &CheckerMove, m2: &CheckerMove, color: Color, w: &Weights) -> f32 {
+    // MoveRules for Black mirrors the board; sequences are in White space after mirror.
+    // Replicate: use the mirrored board for Black, original for White.
+    let mut b = if color == Color::White { board.clone() } else { board.mirror() };
+    let _ = b.move_checker(&Color::White, *m1);
+    let _ = b.move_checker(&Color::White, *m2);
+    evaluate(&b, w)
+}
+
+// ── Bot actions ───────────────────────────────────────────────────────────────
+
+/// Pick the greedy best move for the heuristic bot with the given color and weights.
+/// Returns a GameEvent::Move with moves in the game's (non-mirrored) coordinate space.
+fn heuristic_action(state: &GameState, color: Color, weights: &Weights) -> GameEvent {
+    let rules = MoveRules::new(&color, &state.board, state.dice);
+    let seqs = rules.get_possible_moves_sequences(true, vec![]);
+    let (m1, m2) = seqs
+        .iter()
+        .max_by(|(a1, a2), (b1, b2)| {
+            score_seq(&state.board, a1, a2, color, weights)
+                .partial_cmp(&score_seq(&state.board, b1, b2, color, weights))
+                .unwrap_or(std::cmp::Ordering::Equal)
+        })
+        .copied()
+        .unwrap_or_default();
+    // MoveRules for Black returns moves in mirrored (White) space — mirror back.
+    let (m1, m2) = if color == Color::Black { (m1.mirror(), m2.mirror()) } else { (m1, m2) };
+    GameEvent::Move { player_id: state.active_player_id, moves: (m1, m2) }
+}
+
+/// Pick a uniformly random move for the random bot (used only in --vs-random mode).
+fn random_action(state: &GameState) -> GameEvent {
+    let view: Cow<GameState> = Cow::Owned(state.mirror());
+    if let Some(action) = sample_valid_action(&view) {
+        if let Some(event) = action.to_event(&view) {
+            return event.get_mirror(false);
+        }
+    }
+    GameEvent::Move {
+        player_id: state.active_player_id,
+        moves: (CheckerMove::default(), CheckerMove::default()),
+    }
+}
+
+// ── Game simulation ───────────────────────────────────────────────────────────
+
+const MAX_STEPS: usize = 8_000;
+
+/// Simulate one self-play game.
+/// Player 1 (White) uses `weights_p1`, player 2 (Black) uses `weights_p2`.
+/// Returns the winner's player_id, or None on truncation.
+fn run_selfplay_game(
+    weights_p1: &Weights,
+    weights_p2: &Weights,
+    roller: &mut DiceRoller,
+) -> Option<u64> {
+    let mut state = GameState::new_with_players("Bot1", "Bot2");
+    let mut steps = 0;
+
+    while state.stage != Stage::Ended {
+        steps += 1;
+        if steps > MAX_STEPS {
+            return None;
+        }
+
+        match state.turn_stage {
+            TurnStage::RollDice => {
+                let _ = state.consume(&GameEvent::Roll { player_id: state.active_player_id });
+                let dice = roller.roll();
+                let _ = state
+                    .consume(&GameEvent::RollResult { player_id: state.active_player_id, dice });
+            }
+            _ => {
+                let event = if state.active_player_id == 1 {
+                    heuristic_action(&state, Color::White, weights_p1)
+                } else {
+                    heuristic_action(&state, Color::Black, weights_p2)
+                };
+                if state.consume(&event).is_err() {
+                    return None;
+                }
+            }
+        }
+    }
+
+    state.determine_winner()
+}
+
+/// Estimate challenger's win rate against champion via self-play.
+/// Runs n_games with challenger as White and n_games with challenger as Black
+/// to eliminate first-move bias. Returns fraction of games won by challenger.
+fn self_play_win_rate(
+    challenger: &Weights,
+    champion: &Weights,
+    n_games: usize,
+    roller: &mut DiceRoller,
+) -> f32 {
+    let mut challenger_wins = 0usize;
+    let total = n_games * 2;
+
+    for _ in 0..n_games {
+        // Challenger as White (player 1)
+        if run_selfplay_game(challenger, champion, roller) == Some(1) {
+            challenger_wins += 1;
+        }
+        // Challenger as Black (player 2)
+        if run_selfplay_game(champion, challenger, roller) == Some(2) {
+            challenger_wins += 1;
+        }
+    }
+
+    challenger_wins as f32 / total as f32
+}
+
+/// Win rate of the heuristic bot (player 1 / White) against the random bot.
+/// Useful as a sanity check, but not suitable for hill-climbing (win rate ≈ 100%).
+fn vs_random_win_rate(weights: &Weights, n_games: usize, roller: &mut DiceRoller) -> f32 {
+    let mut wins = 0usize;
+    for _ in 0..n_games {
+        let mut state = GameState::new_with_players("Heuristic", "Random");
+        let mut steps = 0;
+        while state.stage != Stage::Ended {
+            steps += 1;
+            if steps > MAX_STEPS {
+                break;
+            }
+            match state.turn_stage {
+                TurnStage::RollDice => {
+                    let _ = state.consume(&GameEvent::Roll { player_id: state.active_player_id });
+                    let dice = roller.roll();
+                    let _ = state.consume(&GameEvent::RollResult {
+                        player_id: state.active_player_id,
+                        dice,
+                    });
+                }
+                _ => {
+                    let event = if state.active_player_id == 1 {
+                        heuristic_action(&state, Color::White, weights)
+                    } else {
+                        random_action(&state)
+                    };
+                    let _ = state.consume(&event);
+                }
+            }
+        }
+        if state.determine_winner() == Some(1) {
+            wins += 1;
+        }
+    }
+    wins as f32 / n_games as f32
+}
+
+// ── Hill-climbing ─────────────────────────────────────────────────────────────
+
+/// Coordinate-descent hill-climbing via self-play.
+///
+/// Compares each candidate (champion ± step on one weight) against the current
+/// champion. Accepts the candidate if its self-play win rate exceeds `0.5 + margin`
+/// (default 0.52 ≈ 2σ at N=150 games, i.e. N=300 total trials).
+/// Halves step when a full pass produces no improvement; stops when step < min_step.
+fn hill_climb(
+    initial: Weights,
+    n_games: usize,
+    initial_step: f32,
+    min_step: f32,
+    margin: f32,
+    roller: &mut DiceRoller,
+) -> Weights {
+    let threshold = 0.5 + margin;
+    let mut champion = initial;
+    let mut step = initial_step;
+
+    println!("Initial weights: {:?}", champion);
+    println!("Acceptance threshold: >{:.0}%  (margin={:.3})", threshold * 100.0, margin);
+    println!();
+
+    let mut iteration = 0usize;
+    while step >= min_step {
+        let mut improved = false;
+        iteration += 1;
+
+        for i in 0..5 {
+            // Probe +step (clamped to non-negative).
+            let up = champion.with(i, (champion.get(i) + step).max(0.0));
+            let wr_up = self_play_win_rate(&up, &champion, n_games, roller);
+
+            // Probe -step.
+            let dn = champion.with(i, (champion.get(i) - step).max(0.0));
+            let wr_dn = self_play_win_rate(&dn, &champion, n_games, roller);
+
+            let best_wr = wr_up.max(wr_dn);
+            if best_wr >= threshold {
+                let (accepted, wr_accepted) =
+                    if wr_up >= wr_dn { (up, wr_up) } else { (dn, wr_dn) };
+                let dir = if wr_up >= wr_dn { '+' } else { '-' };
+                println!(
+                    "  iter {:3}  {} {}{:.3}  self-play win {:.1}%  {:?}",
+                    iteration,
+                    WEIGHT_NAMES[i],
+                    dir,
+                    step,
+                    wr_accepted * 100.0,
+                    accepted
+                );
+                champion = accepted;
+                improved = true;
+            }
+        }
+
+        if !improved {
+            step *= 0.5;
+            println!(
+                "  iter {:3}  no improvement at step {:.3} → halving to {:.3}",
+                iteration,
+                step * 2.0,
+                step
+            );
+        }
+    }
+
+    champion
+}
+
+// ── CLI args ──────────────────────────────────────────────────────────────────
+
+struct Args {
+    n_games: usize,
+    seed: Option<u64>,
+    initial_step: f32,
+    min_step: f32,
+    margin: f32,
+    vs_random: bool,
+}
+
+fn parse_args() -> Args {
+    let args: Vec<String> = std::env::args().collect();
+    let mut n_games = 200usize;
+    let mut seed: Option<u64> = None;
+    let mut initial_step = 2.0f32;
+    let mut min_step = 0.1f32;
+    // At N=200 games × 2 directions = 400 total trials, σ ≈ sqrt(0.25/400) ≈ 2.5%.
+    // margin=0.03 ≈ 1.2σ: catches real improvements while filtering most noise.
+    let mut margin = 0.03f32;
+    let mut vs_random = false;
+
+    let mut i = 1;
+    while i < args.len() {
+        match args[i].as_str() {
+            "--games" => {
+                i += 1;
+                if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
+                    n_games = v;
+                }
+            }
+            "--seed" => {
+                i += 1;
+                seed = args.get(i).and_then(|s| s.parse().ok());
+            }
+            "--step" => {
+                i += 1;
+                if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
+                    initial_step = v;
+                }
+            }
+            "--min-step" => {
+                i += 1;
+                if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
+                    min_step = v;
+                }
+            }
+            "--margin" => {
+                i += 1;
+                if let Some(v) = args.get(i).and_then(|s| s.parse().ok()) {
+                    margin = v;
+                }
+            }
+            "--vs-random" => vs_random = true,
+            _ => {}
+        }
+        i += 1;
+    }
+
+    Args { n_games, seed, initial_step, min_step, margin, vs_random }
+}
+
+// ── Main ──────────────────────────────────────────────────────────────────────
+
+fn main() {
+    let args = parse_args();
+
+    println!("=== Trictrac weight tuner ===");
+    println!("mode       : {}", if args.vs_random { "vs-random (no hill-climbing)" } else { "self-play hill-climbing" });
+    println!("games/eval : {}", args.n_games);
+    println!("seed       : {:?}", args.seed);
+    if !args.vs_random {
+        println!("step range : {:.3} → {:.3}", args.initial_step, args.min_step);
+        println!("margin     : >{:.0}%", (0.5 + args.margin) * 100.0);
+    }
+    println!();
+
+    let mut roller = DiceRoller::new(args.seed);
+    let t0 = Instant::now();
+
+    if args.vs_random {
+        let wr = vs_random_win_rate(&Weights::initial(), args.n_games, &mut roller);
+        println!("vs-random win rate: {:.1}%  ({} games)", wr * 100.0, args.n_games);
+        println!("Elapsed: {:.1} s", t0.elapsed().as_secs_f64());
+        return;
+    }
+
+    let best = hill_climb(
+        Weights::initial(),
+        args.n_games,
+        args.initial_step,
+        args.min_step,
+        args.margin,
+        &mut roller,
+    );
+
+    let elapsed = t0.elapsed();
+    println!();
+    println!("=== Optimised weights (paste into bot_local.rs) ===");
+    println!("  corner_filled:     {}", best.corner_filled);
+    println!("  quarter_filled:    {}", best.quarter_filled);
+    println!("  quarter_progress:  {}", best.quarter_progress);
+    println!("  singleton_penalty: {}", best.singleton_penalty);
+    println!("  exit_zone:         {}", best.exit_zone);
+    println!();
+    println!("Elapsed: {:.1} s", elapsed.as_secs_f64());
+}