From a31d2c1f30398ce412211905d3d3683e7b28f780 Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Sat, 7 Mar 2026 20:12:59 +0100 Subject: [PATCH] feat(spiel_bot): init crate & implements `GameEnv` trait + `TrictracEnv` --- Cargo.lock | 9 + Cargo.toml | 2 +- spiel_bot/Cargo.toml | 9 + spiel_bot/src/env/mod.rs | 121 ++++++++ spiel_bot/src/env/trictrac.rs | 535 ++++++++++++++++++++++++++++++++++ spiel_bot/src/lib.rs | 1 + 6 files changed, 676 insertions(+), 1 deletion(-) create mode 100644 spiel_bot/Cargo.toml create mode 100644 spiel_bot/src/env/mod.rs create mode 100644 spiel_bot/src/env/trictrac.rs create mode 100644 spiel_bot/src/lib.rs diff --git a/Cargo.lock b/Cargo.lock index a43261e..d1f5a20 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -5891,6 +5891,15 @@ dependencies = [ "windows-sys 0.60.2", ] +[[package]] +name = "spiel_bot" +version = "0.1.0" +dependencies = [ + "anyhow", + "rand 0.9.2", + "trictrac-store", +] + [[package]] name = "spin" version = "0.10.0" diff --git a/Cargo.toml b/Cargo.toml index b9e6d45..4c2eb15 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,4 +1,4 @@ [workspace] resolver = "2" -members = ["client_cli", "bot", "store"] +members = ["client_cli", "bot", "store", "spiel_bot"] diff --git a/spiel_bot/Cargo.toml b/spiel_bot/Cargo.toml new file mode 100644 index 0000000..2459f51 --- /dev/null +++ b/spiel_bot/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "spiel_bot" +version = "0.1.0" +edition = "2021" + +[dependencies] +trictrac-store = { path = "../store" } +anyhow = "1" +rand = "0.9" diff --git a/spiel_bot/src/env/mod.rs b/spiel_bot/src/env/mod.rs new file mode 100644 index 0000000..42b4ae0 --- /dev/null +++ b/spiel_bot/src/env/mod.rs @@ -0,0 +1,121 @@ +//! Game environment abstraction — the minimal "Rust OpenSpiel". +//! +//! A `GameEnv` describes the rules of a two-player, zero-sum game that may +//! contain stochastic (chance) nodes. Algorithms such as AlphaZero, DQN, +//! and PPO interact with a game exclusively through this trait. +//! +//! # Node taxonomy +//! +//! Every game position belongs to one of four categories, returned by +//! [`GameEnv::current_player`]: +//! +//! | [`Player`] | Meaning | +//! |-----------|---------| +//! | `P1` | Player 1 (index 0) must choose an action | +//! | `P2` | Player 2 (index 1) must choose an action | +//! | `Chance` | A stochastic event must be sampled (dice roll, card draw…) | +//! | `Terminal` | The game is over; [`GameEnv::returns`] is meaningful | +//! +//! # Perspective convention +//! +//! [`GameEnv::observation`] always returns the board from *the requested +//! player's* point of view. Callers pass `pov = 0` for Player 1 and +//! `pov = 1` for Player 2. The implementation is responsible for any +//! mirroring required (e.g. Trictrac always reasons from White's side). + +pub mod trictrac; +pub use trictrac::TrictracEnv; + +/// Who controls the current game node. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub enum Player { + /// Player 1 (index 0) is to move. + P1, + /// Player 2 (index 1) is to move. + P2, + /// A stochastic event (dice roll, etc.) must be resolved. + Chance, + /// The game is over. + Terminal, +} + +impl Player { + /// Returns the player index (0 or 1) if this is a decision node, + /// or `None` for `Chance` / `Terminal`. + pub fn index(self) -> Option { + match self { + Player::P1 => Some(0), + Player::P2 => Some(1), + _ => None, + } + } + + pub fn is_decision(self) -> bool { + matches!(self, Player::P1 | Player::P2) + } + + pub fn is_chance(self) -> bool { + self == Player::Chance + } + + pub fn is_terminal(self) -> bool { + self == Player::Terminal + } +} + +/// Trait that completely describes a two-player zero-sum game. +/// +/// Implementors must be cheaply cloneable (the type is used as a stateless +/// factory; the mutable game state lives in `Self::State`). +pub trait GameEnv: Clone + Send + Sync + 'static { + /// The mutable game state. Must be `Clone` so MCTS can copy + /// game trees without touching the environment. + type State: Clone + Send + Sync; + + // ── State creation ──────────────────────────────────────────────────── + + /// Create a fresh game state at the initial position. + fn new_game(&self) -> Self::State; + + // ── Node queries ────────────────────────────────────────────────────── + + /// Classify the current node. + fn current_player(&self, s: &Self::State) -> Player; + + /// Legal action indices at a decision node (`current_player` is `P1`/`P2`). + /// + /// The returned indices are in `[0, action_space())`. + /// The result is unspecified (may panic or return empty) when called at a + /// `Chance` or `Terminal` node. + fn legal_actions(&self, s: &Self::State) -> Vec; + + // ── State mutation ──────────────────────────────────────────────────── + + /// Apply a player action. `action` must be a value returned by + /// [`legal_actions`] for the current state. + fn apply(&self, s: &mut Self::State, action: usize); + + /// Sample and apply a stochastic outcome. Must only be called when + /// `current_player(s) == Player::Chance`. + fn apply_chance(&self, s: &mut Self::State, rng: &mut R); + + // ── Observation ─────────────────────────────────────────────────────── + + /// Observation tensor from player `pov`'s perspective (0 = P1, 1 = P2). + /// The returned slice has exactly [`obs_size()`] elements, all in `[0, 1]`. + fn observation(&self, s: &Self::State, pov: usize) -> Vec; + + /// Number of floats returned by [`observation`]. + fn obs_size(&self) -> usize; + + /// Total number of distinct action indices (the policy head output size). + fn action_space(&self) -> usize; + + // ── Terminal values ─────────────────────────────────────────────────── + + /// Game outcome for each player, or `None` if the game is not over. + /// + /// Values are in `[-1, 1]`: `+1.0` = win, `-1.0` = loss, `0.0` = draw. + /// Index 0 = Player 1, index 1 = Player 2. + fn returns(&self, s: &Self::State) -> Option<[f32; 2]>; +} diff --git a/spiel_bot/src/env/trictrac.rs b/spiel_bot/src/env/trictrac.rs new file mode 100644 index 0000000..99ba058 --- /dev/null +++ b/spiel_bot/src/env/trictrac.rs @@ -0,0 +1,535 @@ +//! [`GameEnv`] implementation for Trictrac. +//! +//! # Game flow (schools_enabled = false) +//! +//! With scoring schools disabled (the standard training configuration), +//! `MarkPoints` and `MarkAdvPoints` stages are never reached — the engine +//! applies them automatically inside `RollResult` and `Move`. The only +//! four stages that actually occur are: +//! +//! | `TurnStage` | [`Player`] kind | Handled by | +//! |-------------|-----------------|------------| +//! | `RollDice` | `Chance` | [`apply_chance`] | +//! | `RollWaiting` | `Chance` | [`apply_chance`] | +//! | `HoldOrGoChoice` | `P1`/`P2` | [`apply`] | +//! | `Move` | `P1`/`P2` | [`apply`] | +//! +//! # Perspective +//! +//! The Trictrac engine always reasons from White's perspective. Player 1 is +//! White; Player 2 is Black. When Player 2 is active, the board is mirrored +//! before computing legal actions / the observation tensor, and the resulting +//! event is mirrored back before being applied to the real state. This +//! mirrors the pattern used in `cxxengine.rs` and `random_game.rs`. + +use trictrac_store::{ + training_common::{get_valid_action_indices, TrictracAction, ACTION_SPACE_SIZE}, + Dice, GameEvent, GameState, Stage, TurnStage, +}; + +use super::{GameEnv, Player}; + +/// Stateless factory that produces Trictrac [`GameState`] environments. +/// +/// Schools (`schools_enabled`) are always disabled — scoring is automatic. +#[derive(Clone, Debug, Default)] +pub struct TrictracEnv; + +impl GameEnv for TrictracEnv { + type State = GameState; + + // ── State creation ──────────────────────────────────────────────────── + + fn new_game(&self) -> GameState { + GameState::new_with_players("P1", "P2") + } + + // ── Node queries ────────────────────────────────────────────────────── + + fn current_player(&self, s: &GameState) -> Player { + if s.stage == Stage::Ended { + return Player::Terminal; + } + match s.turn_stage { + TurnStage::RollDice | TurnStage::RollWaiting => Player::Chance, + _ => { + if s.active_player_id == 1 { + Player::P1 + } else { + Player::P2 + } + } + } + } + + /// Returns the legal action indices for the active player. + /// + /// The board is automatically mirrored for Player 2 so that the engine + /// always reasons from White's perspective. The returned indices are + /// identical in meaning for both players (checker ordinals are + /// perspective-relative). + /// + /// # Panics + /// + /// Panics in debug builds if called at a `Chance` or `Terminal` node. + fn legal_actions(&self, s: &GameState) -> Vec { + debug_assert!( + self.current_player(s).is_decision(), + "legal_actions called at a non-decision node (turn_stage={:?})", + s.turn_stage + ); + let indices = if s.active_player_id == 2 { + get_valid_action_indices(&s.mirror()) + } else { + get_valid_action_indices(s) + }; + indices.unwrap_or_default() + } + + // ── State mutation ──────────────────────────────────────────────────── + + /// Apply a player action index to the game state. + /// + /// For Player 2, the action is decoded against the mirrored board and + /// the resulting event is un-mirrored before being applied. + /// + /// # Panics + /// + /// Panics in debug builds if `action` cannot be decoded or does not + /// produce a valid event for the current state. + fn apply(&self, s: &mut GameState, action: usize) { + let needs_mirror = s.active_player_id == 2; + + let event = if needs_mirror { + let view = s.mirror(); + TrictracAction::from_action_index(action) + .and_then(|a| a.to_event(&view)) + .map(|e| e.get_mirror(false)) + } else { + TrictracAction::from_action_index(action).and_then(|a| a.to_event(s)) + }; + + match event { + Some(e) => { + s.consume(&e).expect("apply: consume failed for valid action"); + } + None => { + panic!("apply: action index {action} produced no event in state {s}"); + } + } + } + + /// Sample dice and advance through a chance node. + /// + /// Handles both `RollDice` (triggers the roll mechanism, then samples + /// dice) and `RollWaiting` (only samples dice) in a single call so that + /// callers never need to distinguish the two. + /// + /// # Panics + /// + /// Panics in debug builds if called at a non-Chance node. + fn apply_chance(&self, s: &mut GameState, rng: &mut R) { + debug_assert!( + self.current_player(s).is_chance(), + "apply_chance called at a non-Chance node (turn_stage={:?})", + s.turn_stage + ); + + // Step 1: RollDice → RollWaiting (player initiates the roll). + if s.turn_stage == TurnStage::RollDice { + s.consume(&GameEvent::Roll { + player_id: s.active_player_id, + }) + .expect("apply_chance: Roll event failed"); + } + + // Step 2: RollWaiting → Move / HoldOrGoChoice / Ended. + // With schools_enabled=false, point marking is automatic inside consume(). + let dice = Dice { + values: (rng.random_range(1u8..=6), rng.random_range(1u8..=6)), + }; + s.consume(&GameEvent::RollResult { + player_id: s.active_player_id, + dice, + }) + .expect("apply_chance: RollResult event failed"); + } + + // ── Observation ─────────────────────────────────────────────────────── + + fn observation(&self, s: &GameState, pov: usize) -> Vec { + if pov == 0 { + s.to_tensor() + } else { + s.mirror().to_tensor() + } + } + + fn obs_size(&self) -> usize { + 217 + } + + fn action_space(&self) -> usize { + ACTION_SPACE_SIZE + } + + // ── Terminal values ─────────────────────────────────────────────────── + + /// Returns `Some([r1, r2])` when the game is over, `None` otherwise. + /// + /// The winner (higher cumulative score) receives `+1.0`; the loser + /// receives `-1.0`; an exact tie gives `0.0` each. A cumulative score + /// is `holes × 12 + points`. + fn returns(&self, s: &GameState) -> Option<[f32; 2]> { + if s.stage != Stage::Ended { + return None; + } + let score = |id: u64| -> i32 { + s.players + .get(&id) + .map(|p| p.holes as i32 * 12 + p.points as i32) + .unwrap_or(0) + }; + let s1 = score(1); + let s2 = score(2); + Some(match s1.cmp(&s2) { + std::cmp::Ordering::Greater => [1.0, -1.0], + std::cmp::Ordering::Less => [-1.0, 1.0], + std::cmp::Ordering::Equal => [0.0, 0.0], + }) + } +} + +// ── Tests ───────────────────────────────────────────────────────────────────── + +#[cfg(test)] +mod tests { + use super::*; + use rand::{rngs::SmallRng, Rng, SeedableRng}; + + fn env() -> TrictracEnv { + TrictracEnv + } + + fn seeded_rng(seed: u64) -> SmallRng { + SmallRng::seed_from_u64(seed) + } + + // ── Initial state ───────────────────────────────────────────────────── + + #[test] + fn new_game_is_chance_node() { + let e = env(); + let s = e.new_game(); + // A fresh game starts at RollDice — a Chance node. + assert_eq!(e.current_player(&s), Player::Chance); + assert!(e.returns(&s).is_none()); + } + + #[test] + fn new_game_is_not_terminal() { + let e = env(); + let s = e.new_game(); + assert_ne!(e.current_player(&s), Player::Terminal); + assert!(e.returns(&s).is_none()); + } + + // ── Chance nodes ────────────────────────────────────────────────────── + + #[test] + fn apply_chance_reaches_decision_node() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(1); + + // A single chance step must yield a decision node (or end the game, + // which only happens after 12 holes — impossible on the first roll). + e.apply_chance(&mut s, &mut rng); + let p = e.current_player(&s); + assert!( + p.is_decision(), + "expected decision node after first roll, got {p:?}" + ); + } + + #[test] + fn apply_chance_from_rollwaiting() { + // Check that apply_chance works when called mid-way (at RollWaiting). + let e = env(); + let mut s = e.new_game(); + assert_eq!(s.turn_stage, TurnStage::RollDice); + + // Manually advance to RollWaiting. + s.consume(&GameEvent::Roll { player_id: s.active_player_id }) + .unwrap(); + assert_eq!(s.turn_stage, TurnStage::RollWaiting); + + let mut rng = seeded_rng(2); + e.apply_chance(&mut s, &mut rng); + + let p = e.current_player(&s); + assert!(p.is_decision() || p.is_terminal()); + } + + // ── Legal actions ───────────────────────────────────────────────────── + + #[test] + fn legal_actions_nonempty_after_roll() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(3); + + e.apply_chance(&mut s, &mut rng); + assert!(e.current_player(&s).is_decision()); + + let actions = e.legal_actions(&s); + assert!( + !actions.is_empty(), + "legal_actions must be non-empty at a decision node" + ); + } + + #[test] + fn legal_actions_within_action_space() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(4); + + e.apply_chance(&mut s, &mut rng); + for &a in e.legal_actions(&s).iter() { + assert!( + a < e.action_space(), + "action {a} out of bounds (action_space={})", + e.action_space() + ); + } + } + + // ── Observations ────────────────────────────────────────────────────── + + #[test] + fn observation_has_correct_size() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(5); + e.apply_chance(&mut s, &mut rng); + + assert_eq!(e.observation(&s, 0).len(), e.obs_size()); + assert_eq!(e.observation(&s, 1).len(), e.obs_size()); + } + + #[test] + fn observation_values_in_unit_interval() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(6); + e.apply_chance(&mut s, &mut rng); + + for (pov, obs) in [(0, e.observation(&s, 0)), (1, e.observation(&s, 1))] { + for (i, &v) in obs.iter().enumerate() { + assert!( + v >= 0.0 && v <= 1.0, + "pov={pov}: obs[{i}] = {v} is outside [0,1]" + ); + } + } + } + + #[test] + fn p1_and_p2_observations_differ() { + // The board is mirrored for P2, so the two observations should differ + // whenever there are checkers in non-symmetric positions (always true + // in a real game after a few moves). + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(7); + + // Advance far enough that the board is non-trivial. + for _ in 0..6 { + while e.current_player(&s).is_chance() { + e.apply_chance(&mut s, &mut rng); + } + if e.current_player(&s).is_terminal() { + break; + } + let actions = e.legal_actions(&s); + e.apply(&mut s, actions[0]); + } + + if !e.current_player(&s).is_terminal() { + let obs0 = e.observation(&s, 0); + let obs1 = e.observation(&s, 1); + assert_ne!(obs0, obs1, "P1 and P2 observations should differ on a non-symmetric board"); + } + } + + // ── Applying actions ────────────────────────────────────────────────── + + #[test] + fn apply_changes_state() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(8); + + e.apply_chance(&mut s, &mut rng); + assert!(e.current_player(&s).is_decision()); + + let before = s.clone(); + let action = e.legal_actions(&s)[0]; + e.apply(&mut s, action); + + assert_ne!( + before.turn_stage, s.turn_stage, + "state must change after apply" + ); + } + + #[test] + fn apply_all_legal_actions_do_not_panic() { + // Verify that every action returned by legal_actions can be applied + // without panicking (on several independent copies of the same state). + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(9); + + e.apply_chance(&mut s, &mut rng); + assert!(e.current_player(&s).is_decision()); + + for action in e.legal_actions(&s) { + let mut copy = s.clone(); + e.apply(&mut copy, action); // must not panic + } + } + + // ── Full game ───────────────────────────────────────────────────────── + + /// Run a complete game with random actions through the `GameEnv` trait + /// and verify that: + /// - The game terminates. + /// - `returns()` is `Some` at the end. + /// - The outcome is valid: scores sum to 0 (zero-sum) or each player's + /// score is ±1 / 0. + /// - No step panics. + #[test] + fn full_random_game_terminates() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(42); + let max_steps = 50_000; + + for step in 0..max_steps { + match e.current_player(&s) { + Player::Terminal => break, + Player::Chance => e.apply_chance(&mut s, &mut rng), + Player::P1 | Player::P2 => { + let actions = e.legal_actions(&s); + assert!(!actions.is_empty(), "step {step}: empty legal actions at decision node"); + let idx = rng.random_range(0..actions.len()); + e.apply(&mut s, actions[idx]); + } + } + assert!(step < max_steps - 1, "game did not terminate within {max_steps} steps"); + } + + let result = e.returns(&s); + assert!(result.is_some(), "returns() must be Some at Terminal"); + + let [r1, r2] = result.unwrap(); + let sum = r1 + r2; + assert!( + (sum.abs() < 1e-5) || (sum - 0.0).abs() < 1e-5, + "game must be zero-sum: r1={r1}, r2={r2}, sum={sum}" + ); + assert!( + r1.abs() <= 1.0 && r2.abs() <= 1.0, + "returns must be in [-1,1]: r1={r1}, r2={r2}" + ); + } + + /// Run multiple games with different seeds to stress-test for panics. + #[test] + fn multiple_games_no_panic() { + let e = env(); + let max_steps = 20_000; + + for seed in 0..10u64 { + let mut s = e.new_game(); + let mut rng = seeded_rng(seed); + + for _ in 0..max_steps { + match e.current_player(&s) { + Player::Terminal => break, + Player::Chance => e.apply_chance(&mut s, &mut rng), + Player::P1 | Player::P2 => { + let actions = e.legal_actions(&s); + let idx = rng.random_range(0..actions.len()); + e.apply(&mut s, actions[idx]); + } + } + } + } + } + + // ── Returns ─────────────────────────────────────────────────────────── + + #[test] + fn returns_none_mid_game() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(11); + + // Advance a few steps but do not finish the game. + for _ in 0..4 { + match e.current_player(&s) { + Player::Terminal => break, + Player::Chance => e.apply_chance(&mut s, &mut rng), + Player::P1 | Player::P2 => { + let actions = e.legal_actions(&s); + e.apply(&mut s, actions[0]); + } + } + } + + if !e.current_player(&s).is_terminal() { + assert!( + e.returns(&s).is_none(), + "returns() must be None before the game ends" + ); + } + } + + // ── Player 2 actions ────────────────────────────────────────────────── + + /// Verify that Player 2 (Black) can take actions without panicking, + /// and that the state advances correctly. + #[test] + fn player2_can_act() { + let e = env(); + let mut s = e.new_game(); + let mut rng = seeded_rng(12); + + // Keep stepping until Player 2 gets a turn. + let max_steps = 5_000; + let mut p2_acted = false; + + for _ in 0..max_steps { + match e.current_player(&s) { + Player::Terminal => break, + Player::Chance => e.apply_chance(&mut s, &mut rng), + Player::P2 => { + let actions = e.legal_actions(&s); + assert!(!actions.is_empty()); + e.apply(&mut s, actions[0]); + p2_acted = true; + break; + } + Player::P1 => { + let actions = e.legal_actions(&s); + e.apply(&mut s, actions[0]); + } + } + } + + assert!(p2_acted, "Player 2 never got a turn in {max_steps} steps"); + } +} diff --git a/spiel_bot/src/lib.rs b/spiel_bot/src/lib.rs new file mode 100644 index 0000000..3d7924f --- /dev/null +++ b/spiel_bot/src/lib.rs @@ -0,0 +1 @@ +pub mod env;