//! [`GameEnv`] implementation for Trictrac. //! //! # Game flow (schools_enabled = false) //! //! With scoring schools disabled (the standard training configuration), //! `MarkPoints` and `MarkAdvPoints` stages are never reached — the engine //! applies them automatically inside `RollResult` and `Move`. The only //! four stages that actually occur are: //! //! | `TurnStage` | [`Player`] kind | Handled by | //! |-------------|-----------------|------------| //! | `RollDice` | `Chance` | [`apply_chance`] | //! | `RollWaiting` | `Chance` | [`apply_chance`] | //! | `HoldOrGoChoice` | `P1`/`P2` | [`apply`] | //! | `Move` | `P1`/`P2` | [`apply`] | //! //! # Perspective //! //! The Trictrac engine always reasons from White's perspective. Player 1 is //! White; Player 2 is Black. When Player 2 is active, the board is mirrored //! before computing legal actions / the observation tensor, and the resulting //! event is mirrored back before being applied to the real state. This //! mirrors the pattern used in `cxxengine.rs` and `random_game.rs`. use trictrac_store::{ training_common::{get_valid_action_indices, TrictracAction, ACTION_SPACE_SIZE}, Dice, GameEvent, GameState, Stage, TurnStage, }; use super::{GameEnv, Player}; /// Stateless factory that produces Trictrac [`GameState`] environments. /// /// Schools (`schools_enabled`) are always disabled — scoring is automatic. #[derive(Clone, Debug, Default)] pub struct TrictracEnv; impl GameEnv for TrictracEnv { type State = GameState; // ── State creation ──────────────────────────────────────────────────── fn new_game(&self) -> GameState { GameState::new_with_players("P1", "P2") } // ── Node queries ────────────────────────────────────────────────────── fn current_player(&self, s: &GameState) -> Player { if s.stage == Stage::Ended { return Player::Terminal; } match s.turn_stage { TurnStage::RollDice | TurnStage::RollWaiting => Player::Chance, _ => { if s.active_player_id == 1 { Player::P1 } else { Player::P2 } } } } /// Returns the legal action indices for the active player. /// /// The board is automatically mirrored for Player 2 so that the engine /// always reasons from White's perspective. The returned indices are /// identical in meaning for both players (checker ordinals are /// perspective-relative). /// /// # Panics /// /// Panics in debug builds if called at a `Chance` or `Terminal` node. fn legal_actions(&self, s: &GameState) -> Vec { debug_assert!( self.current_player(s).is_decision(), "legal_actions called at a non-decision node (turn_stage={:?})", s.turn_stage ); let indices = if s.active_player_id == 2 { get_valid_action_indices(&s.mirror()) } else { get_valid_action_indices(s) }; indices.unwrap_or_default() } // ── State mutation ──────────────────────────────────────────────────── /// Apply a player action index to the game state. /// /// For Player 2, the action is decoded against the mirrored board and /// the resulting event is un-mirrored before being applied. /// /// # Panics /// /// Panics in debug builds if `action` cannot be decoded or does not /// produce a valid event for the current state. fn apply(&self, s: &mut GameState, action: usize) { let needs_mirror = s.active_player_id == 2; let event = if needs_mirror { let view = s.mirror(); TrictracAction::from_action_index(action) .and_then(|a| a.to_event(&view)) .map(|e| e.get_mirror(false)) } else { TrictracAction::from_action_index(action).and_then(|a| a.to_event(s)) }; match event { Some(e) => { s.consume(&e).expect("apply: consume failed for valid action"); } None => { panic!("apply: action index {action} produced no event in state {s}"); } } } /// Sample dice and advance through a chance node. /// /// Handles both `RollDice` (triggers the roll mechanism, then samples /// dice) and `RollWaiting` (only samples dice) in a single call so that /// callers never need to distinguish the two. /// /// # Panics /// /// Panics in debug builds if called at a non-Chance node. fn apply_chance(&self, s: &mut GameState, rng: &mut R) { debug_assert!( self.current_player(s).is_chance(), "apply_chance called at a non-Chance node (turn_stage={:?})", s.turn_stage ); // Step 1: RollDice → RollWaiting (player initiates the roll). if s.turn_stage == TurnStage::RollDice { s.consume(&GameEvent::Roll { player_id: s.active_player_id, }) .expect("apply_chance: Roll event failed"); } // Step 2: RollWaiting → Move / HoldOrGoChoice / Ended. // With schools_enabled=false, point marking is automatic inside consume(). let dice = Dice { values: (rng.random_range(1u8..=6), rng.random_range(1u8..=6)), }; s.consume(&GameEvent::RollResult { player_id: s.active_player_id, dice, }) .expect("apply_chance: RollResult event failed"); } // ── Observation ─────────────────────────────────────────────────────── fn observation(&self, s: &GameState, pov: usize) -> Vec { if pov == 0 { s.to_tensor() } else { s.mirror().to_tensor() } } fn obs_size(&self) -> usize { 217 } fn action_space(&self) -> usize { ACTION_SPACE_SIZE } // ── Terminal values ─────────────────────────────────────────────────── /// Returns `Some([r1, r2])` when the game is over, `None` otherwise. /// /// The winner (higher cumulative score) receives `+1.0`; the loser /// receives `-1.0`; an exact tie gives `0.0` each. A cumulative score /// is `holes × 12 + points`. fn returns(&self, s: &GameState) -> Option<[f32; 2]> { if s.stage != Stage::Ended { return None; } let score = |id: u64| -> i32 { s.players .get(&id) .map(|p| p.holes as i32 * 12 + p.points as i32) .unwrap_or(0) }; let s1 = score(1); let s2 = score(2); Some(match s1.cmp(&s2) { std::cmp::Ordering::Greater => [1.0, -1.0], std::cmp::Ordering::Less => [-1.0, 1.0], std::cmp::Ordering::Equal => [0.0, 0.0], }) } } // ── Tests ───────────────────────────────────────────────────────────────────── #[cfg(test)] mod tests { use super::*; use rand::{rngs::SmallRng, Rng, SeedableRng}; fn env() -> TrictracEnv { TrictracEnv } fn seeded_rng(seed: u64) -> SmallRng { SmallRng::seed_from_u64(seed) } // ── Initial state ───────────────────────────────────────────────────── #[test] fn new_game_is_chance_node() { let e = env(); let s = e.new_game(); // A fresh game starts at RollDice — a Chance node. assert_eq!(e.current_player(&s), Player::Chance); assert!(e.returns(&s).is_none()); } #[test] fn new_game_is_not_terminal() { let e = env(); let s = e.new_game(); assert_ne!(e.current_player(&s), Player::Terminal); assert!(e.returns(&s).is_none()); } // ── Chance nodes ────────────────────────────────────────────────────── #[test] fn apply_chance_reaches_decision_node() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(1); // A single chance step must yield a decision node (or end the game, // which only happens after 12 holes — impossible on the first roll). e.apply_chance(&mut s, &mut rng); let p = e.current_player(&s); assert!( p.is_decision(), "expected decision node after first roll, got {p:?}" ); } #[test] fn apply_chance_from_rollwaiting() { // Check that apply_chance works when called mid-way (at RollWaiting). let e = env(); let mut s = e.new_game(); assert_eq!(s.turn_stage, TurnStage::RollDice); // Manually advance to RollWaiting. s.consume(&GameEvent::Roll { player_id: s.active_player_id }) .unwrap(); assert_eq!(s.turn_stage, TurnStage::RollWaiting); let mut rng = seeded_rng(2); e.apply_chance(&mut s, &mut rng); let p = e.current_player(&s); assert!(p.is_decision() || p.is_terminal()); } // ── Legal actions ───────────────────────────────────────────────────── #[test] fn legal_actions_nonempty_after_roll() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(3); e.apply_chance(&mut s, &mut rng); assert!(e.current_player(&s).is_decision()); let actions = e.legal_actions(&s); assert!( !actions.is_empty(), "legal_actions must be non-empty at a decision node" ); } #[test] fn legal_actions_within_action_space() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(4); e.apply_chance(&mut s, &mut rng); for &a in e.legal_actions(&s).iter() { assert!( a < e.action_space(), "action {a} out of bounds (action_space={})", e.action_space() ); } } // ── Observations ────────────────────────────────────────────────────── #[test] fn observation_has_correct_size() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(5); e.apply_chance(&mut s, &mut rng); assert_eq!(e.observation(&s, 0).len(), e.obs_size()); assert_eq!(e.observation(&s, 1).len(), e.obs_size()); } #[test] fn observation_values_in_unit_interval() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(6); e.apply_chance(&mut s, &mut rng); for (pov, obs) in [(0, e.observation(&s, 0)), (1, e.observation(&s, 1))] { for (i, &v) in obs.iter().enumerate() { assert!( v >= 0.0 && v <= 1.0, "pov={pov}: obs[{i}] = {v} is outside [0,1]" ); } } } #[test] fn p1_and_p2_observations_differ() { // The board is mirrored for P2, so the two observations should differ // whenever there are checkers in non-symmetric positions (always true // in a real game after a few moves). let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(7); // Advance far enough that the board is non-trivial. for _ in 0..6 { while e.current_player(&s).is_chance() { e.apply_chance(&mut s, &mut rng); } if e.current_player(&s).is_terminal() { break; } let actions = e.legal_actions(&s); e.apply(&mut s, actions[0]); } if !e.current_player(&s).is_terminal() { let obs0 = e.observation(&s, 0); let obs1 = e.observation(&s, 1); assert_ne!(obs0, obs1, "P1 and P2 observations should differ on a non-symmetric board"); } } // ── Applying actions ────────────────────────────────────────────────── #[test] fn apply_changes_state() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(8); e.apply_chance(&mut s, &mut rng); assert!(e.current_player(&s).is_decision()); let before = s.clone(); let action = e.legal_actions(&s)[0]; e.apply(&mut s, action); assert_ne!( before.turn_stage, s.turn_stage, "state must change after apply" ); } #[test] fn apply_all_legal_actions_do_not_panic() { // Verify that every action returned by legal_actions can be applied // without panicking (on several independent copies of the same state). let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(9); e.apply_chance(&mut s, &mut rng); assert!(e.current_player(&s).is_decision()); for action in e.legal_actions(&s) { let mut copy = s.clone(); e.apply(&mut copy, action); // must not panic } } // ── Full game ───────────────────────────────────────────────────────── /// Run a complete game with random actions through the `GameEnv` trait /// and verify that: /// - The game terminates. /// - `returns()` is `Some` at the end. /// - The outcome is valid: scores sum to 0 (zero-sum) or each player's /// score is ±1 / 0. /// - No step panics. #[test] fn full_random_game_terminates() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(42); let max_steps = 50_000; for step in 0..max_steps { match e.current_player(&s) { Player::Terminal => break, Player::Chance => e.apply_chance(&mut s, &mut rng), Player::P1 | Player::P2 => { let actions = e.legal_actions(&s); assert!(!actions.is_empty(), "step {step}: empty legal actions at decision node"); let idx = rng.random_range(0..actions.len()); e.apply(&mut s, actions[idx]); } } assert!(step < max_steps - 1, "game did not terminate within {max_steps} steps"); } let result = e.returns(&s); assert!(result.is_some(), "returns() must be Some at Terminal"); let [r1, r2] = result.unwrap(); let sum = r1 + r2; assert!( (sum.abs() < 1e-5) || (sum - 0.0).abs() < 1e-5, "game must be zero-sum: r1={r1}, r2={r2}, sum={sum}" ); assert!( r1.abs() <= 1.0 && r2.abs() <= 1.0, "returns must be in [-1,1]: r1={r1}, r2={r2}" ); } /// Run multiple games with different seeds to stress-test for panics. #[test] fn multiple_games_no_panic() { let e = env(); let max_steps = 20_000; for seed in 0..10u64 { let mut s = e.new_game(); let mut rng = seeded_rng(seed); for _ in 0..max_steps { match e.current_player(&s) { Player::Terminal => break, Player::Chance => e.apply_chance(&mut s, &mut rng), Player::P1 | Player::P2 => { let actions = e.legal_actions(&s); let idx = rng.random_range(0..actions.len()); e.apply(&mut s, actions[idx]); } } } } } // ── Returns ─────────────────────────────────────────────────────────── #[test] fn returns_none_mid_game() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(11); // Advance a few steps but do not finish the game. for _ in 0..4 { match e.current_player(&s) { Player::Terminal => break, Player::Chance => e.apply_chance(&mut s, &mut rng), Player::P1 | Player::P2 => { let actions = e.legal_actions(&s); e.apply(&mut s, actions[0]); } } } if !e.current_player(&s).is_terminal() { assert!( e.returns(&s).is_none(), "returns() must be None before the game ends" ); } } // ── Player 2 actions ────────────────────────────────────────────────── /// Verify that Player 2 (Black) can take actions without panicking, /// and that the state advances correctly. #[test] fn player2_can_act() { let e = env(); let mut s = e.new_game(); let mut rng = seeded_rng(12); // Keep stepping until Player 2 gets a turn. let max_steps = 5_000; let mut p2_acted = false; for _ in 0..max_steps { match e.current_player(&s) { Player::Terminal => break, Player::Chance => e.apply_chance(&mut s, &mut rng), Player::P2 => { let actions = e.legal_actions(&s); assert!(!actions.is_empty()); e.apply(&mut s, actions[0]); p2_acted = true; break; } Player::P1 => { let actions = e.legal_actions(&s); e.apply(&mut s, actions[0]); } } } assert!(p2_acted, "Player 2 never got a turn in {max_steps} steps"); } }