feat(spiel_bot): init crate & implements GameEnv trait + TrictracEnv

2026-03-07 20:12:59 +01:00 · 2026-03-07 20:12:59 +01:00 · df05a43022
commit df05a43022
parent a6644e3c9d
6 changed files with 676 additions and 1 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -5891,6 +5891,15 @@ dependencies = [
 "windows-sys 0.60.2",
 ]

+[[package]]
+name = "spiel_bot"
+version = "0.1.0"
+dependencies = [
+ "anyhow",
+ "rand 0.9.2",
+ "trictrac-store",
+]
+
 [[package]]
 name = "spin"
 version = "0.10.0"
--- a/Cargo.toml
+++ b/Cargo.toml
@ -1,4 +1,4 @@
 [workspace]
 resolver = "2"

-members = ["client_cli", "bot", "store"]
+members = ["client_cli", "bot", "store", "spiel_bot"]
--- a/spiel_bot/Cargo.toml
+++ b/spiel_bot/Cargo.toml
@ -0,0 +1,9 @@
+[package]
+name    = "spiel_bot"
+version = "0.1.0"
+edition = "2021"
+
+[dependencies]
+trictrac-store = { path = "../store" }
+anyhow         = "1"
+rand           = "0.9"
--- a/spiel_bot/src/env/mod.rs
+++ b/spiel_bot/src/env/mod.rs
@ -0,0 +1,121 @@
+//! Game environment abstraction — the minimal "Rust OpenSpiel".
+//!
+//! A `GameEnv` describes the rules of a two-player, zero-sum game that may
+//! contain stochastic (chance) nodes.  Algorithms such as AlphaZero, DQN,
+//! and PPO interact with a game exclusively through this trait.
+//!
+//! # Node taxonomy
+//!
+//! Every game position belongs to one of four categories, returned by
+//! [`GameEnv::current_player`]:
+//!
+//! | [`Player`] | Meaning |
+//! |-----------|---------|
+//! | `P1` | Player 1 (index 0) must choose an action |
+//! | `P2` | Player 2 (index 1) must choose an action |
+//! | `Chance` | A stochastic event must be sampled (dice roll, card draw…) |
+//! | `Terminal` | The game is over; [`GameEnv::returns`] is meaningful |
+//!
+//! # Perspective convention
+//!
+//! [`GameEnv::observation`] always returns the board from *the requested
+//! player's* point of view.  Callers pass `pov = 0` for Player 1 and
+//! `pov = 1` for Player 2.  The implementation is responsible for any
+//! mirroring required (e.g. Trictrac always reasons from White's side).
+
+pub mod trictrac;
+pub use trictrac::TrictracEnv;
+
+/// Who controls the current game node.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum Player {
+    /// Player 1 (index 0) is to move.
+    P1,
+    /// Player 2 (index 1) is to move.
+    P2,
+    /// A stochastic event (dice roll, etc.) must be resolved.
+    Chance,
+    /// The game is over.
+    Terminal,
+}
+
+impl Player {
+    /// Returns the player index (0 or 1) if this is a decision node,
+    /// or `None` for `Chance` / `Terminal`.
+    pub fn index(self) -> Option<usize> {
+        match self {
+            Player::P1 => Some(0),
+            Player::P2 => Some(1),
+            _ => None,
+        }
+    }
+
+    pub fn is_decision(self) -> bool {
+        matches!(self, Player::P1 | Player::P2)
+    }
+
+    pub fn is_chance(self) -> bool {
+        self == Player::Chance
+    }
+
+    pub fn is_terminal(self) -> bool {
+        self == Player::Terminal
+    }
+}
+
+/// Trait that completely describes a two-player zero-sum game.
+///
+/// Implementors must be cheaply cloneable (the type is used as a stateless
+/// factory; the mutable game state lives in `Self::State`).
+pub trait GameEnv: Clone + Send + Sync + 'static {
+    /// The mutable game state.  Must be `Clone` so MCTS can copy
+    /// game trees without touching the environment.
+    type State: Clone + Send + Sync;
+
+    // ── State creation ────────────────────────────────────────────────────
+
+    /// Create a fresh game state at the initial position.
+    fn new_game(&self) -> Self::State;
+
+    // ── Node queries ──────────────────────────────────────────────────────
+
+    /// Classify the current node.
+    fn current_player(&self, s: &Self::State) -> Player;
+
+    /// Legal action indices at a decision node (`current_player` is `P1`/`P2`).
+    ///
+    /// The returned indices are in `[0, action_space())`.
+    /// The result is unspecified (may panic or return empty) when called at a
+    /// `Chance` or `Terminal` node.
+    fn legal_actions(&self, s: &Self::State) -> Vec<usize>;
+
+    // ── State mutation ────────────────────────────────────────────────────
+
+    /// Apply a player action.  `action` must be a value returned by
+    /// [`legal_actions`] for the current state.
+    fn apply(&self, s: &mut Self::State, action: usize);
+
+    /// Sample and apply a stochastic outcome.  Must only be called when
+    /// `current_player(s) == Player::Chance`.
+    fn apply_chance<R: rand::Rng>(&self, s: &mut Self::State, rng: &mut R);
+
+    // ── Observation ───────────────────────────────────────────────────────
+
+    /// Observation tensor from player `pov`'s perspective (0 = P1, 1 = P2).
+    /// The returned slice has exactly [`obs_size()`] elements, all in `[0, 1]`.
+    fn observation(&self, s: &Self::State, pov: usize) -> Vec<f32>;
+
+    /// Number of floats returned by [`observation`].
+    fn obs_size(&self) -> usize;
+
+    /// Total number of distinct action indices (the policy head output size).
+    fn action_space(&self) -> usize;
+
+    // ── Terminal values ───────────────────────────────────────────────────
+
+    /// Game outcome for each player, or `None` if the game is not over.
+    ///
+    /// Values are in `[-1, 1]`: `+1.0` = win, `-1.0` = loss, `0.0` = draw.
+    /// Index 0 = Player 1, index 1 = Player 2.
+    fn returns(&self, s: &Self::State) -> Option<[f32; 2]>;
+}
--- a/spiel_bot/src/env/trictrac.rs
+++ b/spiel_bot/src/env/trictrac.rs
@ -0,0 +1,535 @@
+//! [`GameEnv`] implementation for Trictrac.
+//!
+//! # Game flow (schools_enabled = false)
+//!
+//! With scoring schools disabled (the standard training configuration),
+//! `MarkPoints` and `MarkAdvPoints` stages are never reached — the engine
+//! applies them automatically inside `RollResult` and `Move`.  The only
+//! four stages that actually occur are:
+//!
+//! | `TurnStage` | [`Player`] kind | Handled by |
+//! |-------------|-----------------|------------|
+//! | `RollDice`  | `Chance`        | [`apply_chance`] |
+//! | `RollWaiting` | `Chance`      | [`apply_chance`] |
+//! | `HoldOrGoChoice` | `P1`/`P2` | [`apply`] |
+//! | `Move`      | `P1`/`P2`       | [`apply`] |
+//!
+//! # Perspective
+//!
+//! The Trictrac engine always reasons from White's perspective.  Player 1 is
+//! White; Player 2 is Black.  When Player 2 is active, the board is mirrored
+//! before computing legal actions / the observation tensor, and the resulting
+//! event is mirrored back before being applied to the real state.  This
+//! mirrors the pattern used in `cxxengine.rs` and `random_game.rs`.
+
+use trictrac_store::{
+    training_common::{get_valid_action_indices, TrictracAction, ACTION_SPACE_SIZE},
+    Dice, GameEvent, GameState, Stage, TurnStage,
+};
+
+use super::{GameEnv, Player};
+
+/// Stateless factory that produces Trictrac [`GameState`] environments.
+///
+/// Schools (`schools_enabled`) are always disabled — scoring is automatic.
+#[derive(Clone, Debug, Default)]
+pub struct TrictracEnv;
+
+impl GameEnv for TrictracEnv {
+    type State = GameState;
+
+    // ── State creation ────────────────────────────────────────────────────
+
+    fn new_game(&self) -> GameState {
+        GameState::new_with_players("P1", "P2")
+    }
+
+    // ── Node queries ──────────────────────────────────────────────────────
+
+    fn current_player(&self, s: &GameState) -> Player {
+        if s.stage == Stage::Ended {
+            return Player::Terminal;
+        }
+        match s.turn_stage {
+            TurnStage::RollDice | TurnStage::RollWaiting => Player::Chance,
+            _ => {
+                if s.active_player_id == 1 {
+                    Player::P1
+                } else {
+                    Player::P2
+                }
+            }
+        }
+    }
+
+    /// Returns the legal action indices for the active player.
+    ///
+    /// The board is automatically mirrored for Player 2 so that the engine
+    /// always reasons from White's perspective.  The returned indices are
+    /// identical in meaning for both players (checker ordinals are
+    /// perspective-relative).
+    ///
+    /// # Panics
+    ///
+    /// Panics in debug builds if called at a `Chance` or `Terminal` node.
+    fn legal_actions(&self, s: &GameState) -> Vec<usize> {
+        debug_assert!(
+            self.current_player(s).is_decision(),
+            "legal_actions called at a non-decision node (turn_stage={:?})",
+            s.turn_stage
+        );
+        let indices = if s.active_player_id == 2 {
+            get_valid_action_indices(&s.mirror())
+        } else {
+            get_valid_action_indices(s)
+        };
+        indices.unwrap_or_default()
+    }
+
+    // ── State mutation ────────────────────────────────────────────────────
+
+    /// Apply a player action index to the game state.
+    ///
+    /// For Player 2, the action is decoded against the mirrored board and
+    /// the resulting event is un-mirrored before being applied.
+    ///
+    /// # Panics
+    ///
+    /// Panics in debug builds if `action` cannot be decoded or does not
+    /// produce a valid event for the current state.
+    fn apply(&self, s: &mut GameState, action: usize) {
+        let needs_mirror = s.active_player_id == 2;
+
+        let event = if needs_mirror {
+            let view = s.mirror();
+            TrictracAction::from_action_index(action)
+                .and_then(|a| a.to_event(&view))
+                .map(|e| e.get_mirror(false))
+        } else {
+            TrictracAction::from_action_index(action).and_then(|a| a.to_event(s))
+        };
+
+        match event {
+            Some(e) => {
+                s.consume(&e).expect("apply: consume failed for valid action");
+            }
+            None => {
+                panic!("apply: action index {action} produced no event in state {s}");
+            }
+        }
+    }
+
+    /// Sample dice and advance through a chance node.
+    ///
+    /// Handles both `RollDice` (triggers the roll mechanism, then samples
+    /// dice) and `RollWaiting` (only samples dice) in a single call so that
+    /// callers never need to distinguish the two.
+    ///
+    /// # Panics
+    ///
+    /// Panics in debug builds if called at a non-Chance node.
+    fn apply_chance<R: rand::Rng>(&self, s: &mut GameState, rng: &mut R) {
+        debug_assert!(
+            self.current_player(s).is_chance(),
+            "apply_chance called at a non-Chance node (turn_stage={:?})",
+            s.turn_stage
+        );
+
+        // Step 1: RollDice → RollWaiting (player initiates the roll).
+        if s.turn_stage == TurnStage::RollDice {
+            s.consume(&GameEvent::Roll {
+                player_id: s.active_player_id,
+            })
+            .expect("apply_chance: Roll event failed");
+        }
+
+        // Step 2: RollWaiting → Move / HoldOrGoChoice / Ended.
+        // With schools_enabled=false, point marking is automatic inside consume().
+        let dice = Dice {
+            values: (rng.random_range(1u8..=6), rng.random_range(1u8..=6)),
+        };
+        s.consume(&GameEvent::RollResult {
+            player_id: s.active_player_id,
+            dice,
+        })
+        .expect("apply_chance: RollResult event failed");
+    }
+
+    // ── Observation ───────────────────────────────────────────────────────
+
+    fn observation(&self, s: &GameState, pov: usize) -> Vec<f32> {
+        if pov == 0 {
+            s.to_tensor()
+        } else {
+            s.mirror().to_tensor()
+        }
+    }
+
+    fn obs_size(&self) -> usize {
+        217
+    }
+
+    fn action_space(&self) -> usize {
+        ACTION_SPACE_SIZE
+    }
+
+    // ── Terminal values ───────────────────────────────────────────────────
+
+    /// Returns `Some([r1, r2])` when the game is over, `None` otherwise.
+    ///
+    /// The winner (higher cumulative score) receives `+1.0`; the loser
+    /// receives `-1.0`; an exact tie gives `0.0` each.  A cumulative score
+    /// is `holes × 12 + points`.
+    fn returns(&self, s: &GameState) -> Option<[f32; 2]> {
+        if s.stage != Stage::Ended {
+            return None;
+        }
+        let score = |id: u64| -> i32 {
+            s.players
+                .get(&id)
+                .map(|p| p.holes as i32 * 12 + p.points as i32)
+                .unwrap_or(0)
+        };
+        let s1 = score(1);
+        let s2 = score(2);
+        Some(match s1.cmp(&s2) {
+            std::cmp::Ordering::Greater => [1.0, -1.0],
+            std::cmp::Ordering::Less => [-1.0, 1.0],
+            std::cmp::Ordering::Equal => [0.0, 0.0],
+        })
+    }
+}
+
+// ── Tests ─────────────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use rand::{rngs::SmallRng, Rng, SeedableRng};
+
+    fn env() -> TrictracEnv {
+        TrictracEnv
+    }
+
+    fn seeded_rng(seed: u64) -> SmallRng {
+        SmallRng::seed_from_u64(seed)
+    }
+
+    // ── Initial state ─────────────────────────────────────────────────────
+
+    #[test]
+    fn new_game_is_chance_node() {
+        let e = env();
+        let s = e.new_game();
+        // A fresh game starts at RollDice — a Chance node.
+        assert_eq!(e.current_player(&s), Player::Chance);
+        assert!(e.returns(&s).is_none());
+    }
+
+    #[test]
+    fn new_game_is_not_terminal() {
+        let e = env();
+        let s = e.new_game();
+        assert_ne!(e.current_player(&s), Player::Terminal);
+        assert!(e.returns(&s).is_none());
+    }
+
+    // ── Chance nodes ──────────────────────────────────────────────────────
+
+    #[test]
+    fn apply_chance_reaches_decision_node() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(1);
+
+        // A single chance step must yield a decision node (or end the game,
+        // which only happens after 12 holes — impossible on the first roll).
+        e.apply_chance(&mut s, &mut rng);
+        let p = e.current_player(&s);
+        assert!(
+            p.is_decision(),
+            "expected decision node after first roll, got {p:?}"
+        );
+    }
+
+    #[test]
+    fn apply_chance_from_rollwaiting() {
+        // Check that apply_chance works when called mid-way (at RollWaiting).
+        let e = env();
+        let mut s = e.new_game();
+        assert_eq!(s.turn_stage, TurnStage::RollDice);
+
+        // Manually advance to RollWaiting.
+        s.consume(&GameEvent::Roll { player_id: s.active_player_id })
+            .unwrap();
+        assert_eq!(s.turn_stage, TurnStage::RollWaiting);
+
+        let mut rng = seeded_rng(2);
+        e.apply_chance(&mut s, &mut rng);
+
+        let p = e.current_player(&s);
+        assert!(p.is_decision() || p.is_terminal());
+    }
+
+    // ── Legal actions ─────────────────────────────────────────────────────
+
+    #[test]
+    fn legal_actions_nonempty_after_roll() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(3);
+
+        e.apply_chance(&mut s, &mut rng);
+        assert!(e.current_player(&s).is_decision());
+
+        let actions = e.legal_actions(&s);
+        assert!(
+            !actions.is_empty(),
+            "legal_actions must be non-empty at a decision node"
+        );
+    }
+
+    #[test]
+    fn legal_actions_within_action_space() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(4);
+
+        e.apply_chance(&mut s, &mut rng);
+        for &a in e.legal_actions(&s).iter() {
+            assert!(
+                a < e.action_space(),
+                "action {a} out of bounds (action_space={})",
+                e.action_space()
+            );
+        }
+    }
+
+    // ── Observations ──────────────────────────────────────────────────────
+
+    #[test]
+    fn observation_has_correct_size() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(5);
+        e.apply_chance(&mut s, &mut rng);
+
+        assert_eq!(e.observation(&s, 0).len(), e.obs_size());
+        assert_eq!(e.observation(&s, 1).len(), e.obs_size());
+    }
+
+    #[test]
+    fn observation_values_in_unit_interval() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(6);
+        e.apply_chance(&mut s, &mut rng);
+
+        for (pov, obs) in [(0, e.observation(&s, 0)), (1, e.observation(&s, 1))] {
+            for (i, &v) in obs.iter().enumerate() {
+                assert!(
+                    v >= 0.0 && v <= 1.0,
+                    "pov={pov}: obs[{i}] = {v} is outside [0,1]"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn p1_and_p2_observations_differ() {
+        // The board is mirrored for P2, so the two observations should differ
+        // whenever there are checkers in non-symmetric positions (always true
+        // in a real game after a few moves).
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(7);
+
+        // Advance far enough that the board is non-trivial.
+        for _ in 0..6 {
+            while e.current_player(&s).is_chance() {
+                e.apply_chance(&mut s, &mut rng);
+            }
+            if e.current_player(&s).is_terminal() {
+                break;
+            }
+            let actions = e.legal_actions(&s);
+            e.apply(&mut s, actions[0]);
+        }
+
+        if !e.current_player(&s).is_terminal() {
+            let obs0 = e.observation(&s, 0);
+            let obs1 = e.observation(&s, 1);
+            assert_ne!(obs0, obs1, "P1 and P2 observations should differ on a non-symmetric board");
+        }
+    }
+
+    // ── Applying actions ──────────────────────────────────────────────────
+
+    #[test]
+    fn apply_changes_state() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(8);
+
+        e.apply_chance(&mut s, &mut rng);
+        assert!(e.current_player(&s).is_decision());
+
+        let before = s.clone();
+        let action = e.legal_actions(&s)[0];
+        e.apply(&mut s, action);
+
+        assert_ne!(
+            before.turn_stage, s.turn_stage,
+            "state must change after apply"
+        );
+    }
+
+    #[test]
+    fn apply_all_legal_actions_do_not_panic() {
+        // Verify that every action returned by legal_actions can be applied
+        // without panicking (on several independent copies of the same state).
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(9);
+
+        e.apply_chance(&mut s, &mut rng);
+        assert!(e.current_player(&s).is_decision());
+
+        for action in e.legal_actions(&s) {
+            let mut copy = s.clone();
+            e.apply(&mut copy, action); // must not panic
+        }
+    }
+
+    // ── Full game ─────────────────────────────────────────────────────────
+
+    /// Run a complete game with random actions through the `GameEnv` trait
+    /// and verify that:
+    /// - The game terminates.
+    /// - `returns()` is `Some` at the end.
+    /// - The outcome is valid: scores sum to 0 (zero-sum) or each player's
+    ///   score is ±1 / 0.
+    /// - No step panics.
+    #[test]
+    fn full_random_game_terminates() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(42);
+        let max_steps = 50_000;
+
+        for step in 0..max_steps {
+            match e.current_player(&s) {
+                Player::Terminal => break,
+                Player::Chance => e.apply_chance(&mut s, &mut rng),
+                Player::P1 | Player::P2 => {
+                    let actions = e.legal_actions(&s);
+                    assert!(!actions.is_empty(), "step {step}: empty legal actions at decision node");
+                    let idx = rng.random_range(0..actions.len());
+                    e.apply(&mut s, actions[idx]);
+                }
+            }
+            assert!(step < max_steps - 1, "game did not terminate within {max_steps} steps");
+        }
+
+        let result = e.returns(&s);
+        assert!(result.is_some(), "returns() must be Some at Terminal");
+
+        let [r1, r2] = result.unwrap();
+        let sum = r1 + r2;
+        assert!(
+            (sum.abs() < 1e-5) || (sum - 0.0).abs() < 1e-5,
+            "game must be zero-sum: r1={r1}, r2={r2}, sum={sum}"
+        );
+        assert!(
+            r1.abs() <= 1.0 && r2.abs() <= 1.0,
+            "returns must be in [-1,1]: r1={r1}, r2={r2}"
+        );
+    }
+
+    /// Run multiple games with different seeds to stress-test for panics.
+    #[test]
+    fn multiple_games_no_panic() {
+        let e = env();
+        let max_steps = 20_000;
+
+        for seed in 0..10u64 {
+            let mut s = e.new_game();
+            let mut rng = seeded_rng(seed);
+
+            for _ in 0..max_steps {
+                match e.current_player(&s) {
+                    Player::Terminal => break,
+                    Player::Chance => e.apply_chance(&mut s, &mut rng),
+                    Player::P1 | Player::P2 => {
+                        let actions = e.legal_actions(&s);
+                        let idx = rng.random_range(0..actions.len());
+                        e.apply(&mut s, actions[idx]);
+                    }
+                }
+            }
+        }
+    }
+
+    // ── Returns ───────────────────────────────────────────────────────────
+
+    #[test]
+    fn returns_none_mid_game() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(11);
+
+        // Advance a few steps but do not finish the game.
+        for _ in 0..4 {
+            match e.current_player(&s) {
+                Player::Terminal => break,
+                Player::Chance => e.apply_chance(&mut s, &mut rng),
+                Player::P1 | Player::P2 => {
+                    let actions = e.legal_actions(&s);
+                    e.apply(&mut s, actions[0]);
+                }
+            }
+        }
+
+        if !e.current_player(&s).is_terminal() {
+            assert!(
+                e.returns(&s).is_none(),
+                "returns() must be None before the game ends"
+            );
+        }
+    }
+
+    // ── Player 2 actions ──────────────────────────────────────────────────
+
+    /// Verify that Player 2 (Black) can take actions without panicking,
+    /// and that the state advances correctly.
+    #[test]
+    fn player2_can_act() {
+        let e = env();
+        let mut s = e.new_game();
+        let mut rng = seeded_rng(12);
+
+        // Keep stepping until Player 2 gets a turn.
+        let max_steps = 5_000;
+        let mut p2_acted = false;
+
+        for _ in 0..max_steps {
+            match e.current_player(&s) {
+                Player::Terminal => break,
+                Player::Chance => e.apply_chance(&mut s, &mut rng),
+                Player::P2 => {
+                    let actions = e.legal_actions(&s);
+                    assert!(!actions.is_empty());
+                    e.apply(&mut s, actions[0]);
+                    p2_acted = true;
+                    break;
+                }
+                Player::P1 => {
+                    let actions = e.legal_actions(&s);
+                    e.apply(&mut s, actions[0]);
+                }
+            }
+        }
+
+        assert!(p2_acted, "Player 2 never got a turn in {max_steps} steps");
+    }
+}
--- a/spiel_bot/src/lib.rs
+++ b/spiel_bot/src/lib.rs
@ -0,0 +1 @@
+pub mod env;