feat(spiel_bot): init crate & implements GameEnv trait + TrictracEnv

This commit is contained in:
Henri Bourcereau 2026-03-07 20:12:59 +01:00
parent a6644e3c9d
commit df05a43022
6 changed files with 676 additions and 1 deletions

9
Cargo.lock generated
View file

@ -5891,6 +5891,15 @@ dependencies = [
"windows-sys 0.60.2",
]
[[package]]
name = "spiel_bot"
version = "0.1.0"
dependencies = [
"anyhow",
"rand 0.9.2",
"trictrac-store",
]
[[package]]
name = "spin"
version = "0.10.0"

View file

@ -1,4 +1,4 @@
[workspace]
resolver = "2"
members = ["client_cli", "bot", "store"]
members = ["client_cli", "bot", "store", "spiel_bot"]

9
spiel_bot/Cargo.toml Normal file
View file

@ -0,0 +1,9 @@
[package]
name = "spiel_bot"
version = "0.1.0"
edition = "2021"
[dependencies]
trictrac-store = { path = "../store" }
anyhow = "1"
rand = "0.9"

121
spiel_bot/src/env/mod.rs vendored Normal file
View file

@ -0,0 +1,121 @@
//! Game environment abstraction — the minimal "Rust OpenSpiel".
//!
//! A `GameEnv` describes the rules of a two-player, zero-sum game that may
//! contain stochastic (chance) nodes. Algorithms such as AlphaZero, DQN,
//! and PPO interact with a game exclusively through this trait.
//!
//! # Node taxonomy
//!
//! Every game position belongs to one of four categories, returned by
//! [`GameEnv::current_player`]:
//!
//! | [`Player`] | Meaning |
//! |-----------|---------|
//! | `P1` | Player 1 (index 0) must choose an action |
//! | `P2` | Player 2 (index 1) must choose an action |
//! | `Chance` | A stochastic event must be sampled (dice roll, card draw…) |
//! | `Terminal` | The game is over; [`GameEnv::returns`] is meaningful |
//!
//! # Perspective convention
//!
//! [`GameEnv::observation`] always returns the board from *the requested
//! player's* point of view. Callers pass `pov = 0` for Player 1 and
//! `pov = 1` for Player 2. The implementation is responsible for any
//! mirroring required (e.g. Trictrac always reasons from White's side).
pub mod trictrac;
pub use trictrac::TrictracEnv;
/// Who controls the current game node.
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub enum Player {
/// Player 1 (index 0) is to move.
P1,
/// Player 2 (index 1) is to move.
P2,
/// A stochastic event (dice roll, etc.) must be resolved.
Chance,
/// The game is over.
Terminal,
}
impl Player {
/// Returns the player index (0 or 1) if this is a decision node,
/// or `None` for `Chance` / `Terminal`.
pub fn index(self) -> Option<usize> {
match self {
Player::P1 => Some(0),
Player::P2 => Some(1),
_ => None,
}
}
pub fn is_decision(self) -> bool {
matches!(self, Player::P1 | Player::P2)
}
pub fn is_chance(self) -> bool {
self == Player::Chance
}
pub fn is_terminal(self) -> bool {
self == Player::Terminal
}
}
/// Trait that completely describes a two-player zero-sum game.
///
/// Implementors must be cheaply cloneable (the type is used as a stateless
/// factory; the mutable game state lives in `Self::State`).
pub trait GameEnv: Clone + Send + Sync + 'static {
/// The mutable game state. Must be `Clone` so MCTS can copy
/// game trees without touching the environment.
type State: Clone + Send + Sync;
// ── State creation ────────────────────────────────────────────────────
/// Create a fresh game state at the initial position.
fn new_game(&self) -> Self::State;
// ── Node queries ──────────────────────────────────────────────────────
/// Classify the current node.
fn current_player(&self, s: &Self::State) -> Player;
/// Legal action indices at a decision node (`current_player` is `P1`/`P2`).
///
/// The returned indices are in `[0, action_space())`.
/// The result is unspecified (may panic or return empty) when called at a
/// `Chance` or `Terminal` node.
fn legal_actions(&self, s: &Self::State) -> Vec<usize>;
// ── State mutation ────────────────────────────────────────────────────
/// Apply a player action. `action` must be a value returned by
/// [`legal_actions`] for the current state.
fn apply(&self, s: &mut Self::State, action: usize);
/// Sample and apply a stochastic outcome. Must only be called when
/// `current_player(s) == Player::Chance`.
fn apply_chance<R: rand::Rng>(&self, s: &mut Self::State, rng: &mut R);
// ── Observation ───────────────────────────────────────────────────────
/// Observation tensor from player `pov`'s perspective (0 = P1, 1 = P2).
/// The returned slice has exactly [`obs_size()`] elements, all in `[0, 1]`.
fn observation(&self, s: &Self::State, pov: usize) -> Vec<f32>;
/// Number of floats returned by [`observation`].
fn obs_size(&self) -> usize;
/// Total number of distinct action indices (the policy head output size).
fn action_space(&self) -> usize;
// ── Terminal values ───────────────────────────────────────────────────
/// Game outcome for each player, or `None` if the game is not over.
///
/// Values are in `[-1, 1]`: `+1.0` = win, `-1.0` = loss, `0.0` = draw.
/// Index 0 = Player 1, index 1 = Player 2.
fn returns(&self, s: &Self::State) -> Option<[f32; 2]>;
}

535
spiel_bot/src/env/trictrac.rs vendored Normal file
View file

@ -0,0 +1,535 @@
//! [`GameEnv`] implementation for Trictrac.
//!
//! # Game flow (schools_enabled = false)
//!
//! With scoring schools disabled (the standard training configuration),
//! `MarkPoints` and `MarkAdvPoints` stages are never reached — the engine
//! applies them automatically inside `RollResult` and `Move`. The only
//! four stages that actually occur are:
//!
//! | `TurnStage` | [`Player`] kind | Handled by |
//! |-------------|-----------------|------------|
//! | `RollDice` | `Chance` | [`apply_chance`] |
//! | `RollWaiting` | `Chance` | [`apply_chance`] |
//! | `HoldOrGoChoice` | `P1`/`P2` | [`apply`] |
//! | `Move` | `P1`/`P2` | [`apply`] |
//!
//! # Perspective
//!
//! The Trictrac engine always reasons from White's perspective. Player 1 is
//! White; Player 2 is Black. When Player 2 is active, the board is mirrored
//! before computing legal actions / the observation tensor, and the resulting
//! event is mirrored back before being applied to the real state. This
//! mirrors the pattern used in `cxxengine.rs` and `random_game.rs`.
use trictrac_store::{
training_common::{get_valid_action_indices, TrictracAction, ACTION_SPACE_SIZE},
Dice, GameEvent, GameState, Stage, TurnStage,
};
use super::{GameEnv, Player};
/// Stateless factory that produces Trictrac [`GameState`] environments.
///
/// Schools (`schools_enabled`) are always disabled — scoring is automatic.
#[derive(Clone, Debug, Default)]
pub struct TrictracEnv;
impl GameEnv for TrictracEnv {
type State = GameState;
// ── State creation ────────────────────────────────────────────────────
fn new_game(&self) -> GameState {
GameState::new_with_players("P1", "P2")
}
// ── Node queries ──────────────────────────────────────────────────────
fn current_player(&self, s: &GameState) -> Player {
if s.stage == Stage::Ended {
return Player::Terminal;
}
match s.turn_stage {
TurnStage::RollDice | TurnStage::RollWaiting => Player::Chance,
_ => {
if s.active_player_id == 1 {
Player::P1
} else {
Player::P2
}
}
}
}
/// Returns the legal action indices for the active player.
///
/// The board is automatically mirrored for Player 2 so that the engine
/// always reasons from White's perspective. The returned indices are
/// identical in meaning for both players (checker ordinals are
/// perspective-relative).
///
/// # Panics
///
/// Panics in debug builds if called at a `Chance` or `Terminal` node.
fn legal_actions(&self, s: &GameState) -> Vec<usize> {
debug_assert!(
self.current_player(s).is_decision(),
"legal_actions called at a non-decision node (turn_stage={:?})",
s.turn_stage
);
let indices = if s.active_player_id == 2 {
get_valid_action_indices(&s.mirror())
} else {
get_valid_action_indices(s)
};
indices.unwrap_or_default()
}
// ── State mutation ────────────────────────────────────────────────────
/// Apply a player action index to the game state.
///
/// For Player 2, the action is decoded against the mirrored board and
/// the resulting event is un-mirrored before being applied.
///
/// # Panics
///
/// Panics in debug builds if `action` cannot be decoded or does not
/// produce a valid event for the current state.
fn apply(&self, s: &mut GameState, action: usize) {
let needs_mirror = s.active_player_id == 2;
let event = if needs_mirror {
let view = s.mirror();
TrictracAction::from_action_index(action)
.and_then(|a| a.to_event(&view))
.map(|e| e.get_mirror(false))
} else {
TrictracAction::from_action_index(action).and_then(|a| a.to_event(s))
};
match event {
Some(e) => {
s.consume(&e).expect("apply: consume failed for valid action");
}
None => {
panic!("apply: action index {action} produced no event in state {s}");
}
}
}
/// Sample dice and advance through a chance node.
///
/// Handles both `RollDice` (triggers the roll mechanism, then samples
/// dice) and `RollWaiting` (only samples dice) in a single call so that
/// callers never need to distinguish the two.
///
/// # Panics
///
/// Panics in debug builds if called at a non-Chance node.
fn apply_chance<R: rand::Rng>(&self, s: &mut GameState, rng: &mut R) {
debug_assert!(
self.current_player(s).is_chance(),
"apply_chance called at a non-Chance node (turn_stage={:?})",
s.turn_stage
);
// Step 1: RollDice → RollWaiting (player initiates the roll).
if s.turn_stage == TurnStage::RollDice {
s.consume(&GameEvent::Roll {
player_id: s.active_player_id,
})
.expect("apply_chance: Roll event failed");
}
// Step 2: RollWaiting → Move / HoldOrGoChoice / Ended.
// With schools_enabled=false, point marking is automatic inside consume().
let dice = Dice {
values: (rng.random_range(1u8..=6), rng.random_range(1u8..=6)),
};
s.consume(&GameEvent::RollResult {
player_id: s.active_player_id,
dice,
})
.expect("apply_chance: RollResult event failed");
}
// ── Observation ───────────────────────────────────────────────────────
fn observation(&self, s: &GameState, pov: usize) -> Vec<f32> {
if pov == 0 {
s.to_tensor()
} else {
s.mirror().to_tensor()
}
}
fn obs_size(&self) -> usize {
217
}
fn action_space(&self) -> usize {
ACTION_SPACE_SIZE
}
// ── Terminal values ───────────────────────────────────────────────────
/// Returns `Some([r1, r2])` when the game is over, `None` otherwise.
///
/// The winner (higher cumulative score) receives `+1.0`; the loser
/// receives `-1.0`; an exact tie gives `0.0` each. A cumulative score
/// is `holes × 12 + points`.
fn returns(&self, s: &GameState) -> Option<[f32; 2]> {
if s.stage != Stage::Ended {
return None;
}
let score = |id: u64| -> i32 {
s.players
.get(&id)
.map(|p| p.holes as i32 * 12 + p.points as i32)
.unwrap_or(0)
};
let s1 = score(1);
let s2 = score(2);
Some(match s1.cmp(&s2) {
std::cmp::Ordering::Greater => [1.0, -1.0],
std::cmp::Ordering::Less => [-1.0, 1.0],
std::cmp::Ordering::Equal => [0.0, 0.0],
})
}
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use rand::{rngs::SmallRng, Rng, SeedableRng};
fn env() -> TrictracEnv {
TrictracEnv
}
fn seeded_rng(seed: u64) -> SmallRng {
SmallRng::seed_from_u64(seed)
}
// ── Initial state ─────────────────────────────────────────────────────
#[test]
fn new_game_is_chance_node() {
let e = env();
let s = e.new_game();
// A fresh game starts at RollDice — a Chance node.
assert_eq!(e.current_player(&s), Player::Chance);
assert!(e.returns(&s).is_none());
}
#[test]
fn new_game_is_not_terminal() {
let e = env();
let s = e.new_game();
assert_ne!(e.current_player(&s), Player::Terminal);
assert!(e.returns(&s).is_none());
}
// ── Chance nodes ──────────────────────────────────────────────────────
#[test]
fn apply_chance_reaches_decision_node() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(1);
// A single chance step must yield a decision node (or end the game,
// which only happens after 12 holes — impossible on the first roll).
e.apply_chance(&mut s, &mut rng);
let p = e.current_player(&s);
assert!(
p.is_decision(),
"expected decision node after first roll, got {p:?}"
);
}
#[test]
fn apply_chance_from_rollwaiting() {
// Check that apply_chance works when called mid-way (at RollWaiting).
let e = env();
let mut s = e.new_game();
assert_eq!(s.turn_stage, TurnStage::RollDice);
// Manually advance to RollWaiting.
s.consume(&GameEvent::Roll { player_id: s.active_player_id })
.unwrap();
assert_eq!(s.turn_stage, TurnStage::RollWaiting);
let mut rng = seeded_rng(2);
e.apply_chance(&mut s, &mut rng);
let p = e.current_player(&s);
assert!(p.is_decision() || p.is_terminal());
}
// ── Legal actions ─────────────────────────────────────────────────────
#[test]
fn legal_actions_nonempty_after_roll() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(3);
e.apply_chance(&mut s, &mut rng);
assert!(e.current_player(&s).is_decision());
let actions = e.legal_actions(&s);
assert!(
!actions.is_empty(),
"legal_actions must be non-empty at a decision node"
);
}
#[test]
fn legal_actions_within_action_space() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(4);
e.apply_chance(&mut s, &mut rng);
for &a in e.legal_actions(&s).iter() {
assert!(
a < e.action_space(),
"action {a} out of bounds (action_space={})",
e.action_space()
);
}
}
// ── Observations ──────────────────────────────────────────────────────
#[test]
fn observation_has_correct_size() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(5);
e.apply_chance(&mut s, &mut rng);
assert_eq!(e.observation(&s, 0).len(), e.obs_size());
assert_eq!(e.observation(&s, 1).len(), e.obs_size());
}
#[test]
fn observation_values_in_unit_interval() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(6);
e.apply_chance(&mut s, &mut rng);
for (pov, obs) in [(0, e.observation(&s, 0)), (1, e.observation(&s, 1))] {
for (i, &v) in obs.iter().enumerate() {
assert!(
v >= 0.0 && v <= 1.0,
"pov={pov}: obs[{i}] = {v} is outside [0,1]"
);
}
}
}
#[test]
fn p1_and_p2_observations_differ() {
// The board is mirrored for P2, so the two observations should differ
// whenever there are checkers in non-symmetric positions (always true
// in a real game after a few moves).
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(7);
// Advance far enough that the board is non-trivial.
for _ in 0..6 {
while e.current_player(&s).is_chance() {
e.apply_chance(&mut s, &mut rng);
}
if e.current_player(&s).is_terminal() {
break;
}
let actions = e.legal_actions(&s);
e.apply(&mut s, actions[0]);
}
if !e.current_player(&s).is_terminal() {
let obs0 = e.observation(&s, 0);
let obs1 = e.observation(&s, 1);
assert_ne!(obs0, obs1, "P1 and P2 observations should differ on a non-symmetric board");
}
}
// ── Applying actions ──────────────────────────────────────────────────
#[test]
fn apply_changes_state() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(8);
e.apply_chance(&mut s, &mut rng);
assert!(e.current_player(&s).is_decision());
let before = s.clone();
let action = e.legal_actions(&s)[0];
e.apply(&mut s, action);
assert_ne!(
before.turn_stage, s.turn_stage,
"state must change after apply"
);
}
#[test]
fn apply_all_legal_actions_do_not_panic() {
// Verify that every action returned by legal_actions can be applied
// without panicking (on several independent copies of the same state).
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(9);
e.apply_chance(&mut s, &mut rng);
assert!(e.current_player(&s).is_decision());
for action in e.legal_actions(&s) {
let mut copy = s.clone();
e.apply(&mut copy, action); // must not panic
}
}
// ── Full game ─────────────────────────────────────────────────────────
/// Run a complete game with random actions through the `GameEnv` trait
/// and verify that:
/// - The game terminates.
/// - `returns()` is `Some` at the end.
/// - The outcome is valid: scores sum to 0 (zero-sum) or each player's
/// score is ±1 / 0.
/// - No step panics.
#[test]
fn full_random_game_terminates() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(42);
let max_steps = 50_000;
for step in 0..max_steps {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P1 | Player::P2 => {
let actions = e.legal_actions(&s);
assert!(!actions.is_empty(), "step {step}: empty legal actions at decision node");
let idx = rng.random_range(0..actions.len());
e.apply(&mut s, actions[idx]);
}
}
assert!(step < max_steps - 1, "game did not terminate within {max_steps} steps");
}
let result = e.returns(&s);
assert!(result.is_some(), "returns() must be Some at Terminal");
let [r1, r2] = result.unwrap();
let sum = r1 + r2;
assert!(
(sum.abs() < 1e-5) || (sum - 0.0).abs() < 1e-5,
"game must be zero-sum: r1={r1}, r2={r2}, sum={sum}"
);
assert!(
r1.abs() <= 1.0 && r2.abs() <= 1.0,
"returns must be in [-1,1]: r1={r1}, r2={r2}"
);
}
/// Run multiple games with different seeds to stress-test for panics.
#[test]
fn multiple_games_no_panic() {
let e = env();
let max_steps = 20_000;
for seed in 0..10u64 {
let mut s = e.new_game();
let mut rng = seeded_rng(seed);
for _ in 0..max_steps {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P1 | Player::P2 => {
let actions = e.legal_actions(&s);
let idx = rng.random_range(0..actions.len());
e.apply(&mut s, actions[idx]);
}
}
}
}
}
// ── Returns ───────────────────────────────────────────────────────────
#[test]
fn returns_none_mid_game() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(11);
// Advance a few steps but do not finish the game.
for _ in 0..4 {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P1 | Player::P2 => {
let actions = e.legal_actions(&s);
e.apply(&mut s, actions[0]);
}
}
}
if !e.current_player(&s).is_terminal() {
assert!(
e.returns(&s).is_none(),
"returns() must be None before the game ends"
);
}
}
// ── Player 2 actions ──────────────────────────────────────────────────
/// Verify that Player 2 (Black) can take actions without panicking,
/// and that the state advances correctly.
#[test]
fn player2_can_act() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(12);
// Keep stepping until Player 2 gets a turn.
let max_steps = 5_000;
let mut p2_acted = false;
for _ in 0..max_steps {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P2 => {
let actions = e.legal_actions(&s);
assert!(!actions.is_empty());
e.apply(&mut s, actions[0]);
p2_acted = true;
break;
}
Player::P1 => {
let actions = e.legal_actions(&s);
e.apply(&mut s, actions[0]);
}
}
}
assert!(p2_acted, "Player 2 never got a turn in {max_steps} steps");
}
}

1
spiel_bot/src/lib.rs Normal file
View file

@ -0,0 +1 @@
pub mod env;