trictrac/spiel_bot/src/env/trictrac.rs

535 lines
19 KiB
Rust
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

//! [`GameEnv`] implementation for Trictrac.
//!
//! # Game flow (schools_enabled = false)
//!
//! With scoring schools disabled (the standard training configuration),
//! `MarkPoints` and `MarkAdvPoints` stages are never reached — the engine
//! applies them automatically inside `RollResult` and `Move`. The only
//! four stages that actually occur are:
//!
//! | `TurnStage` | [`Player`] kind | Handled by |
//! |-------------|-----------------|------------|
//! | `RollDice` | `Chance` | [`apply_chance`] |
//! | `RollWaiting` | `Chance` | [`apply_chance`] |
//! | `HoldOrGoChoice` | `P1`/`P2` | [`apply`] |
//! | `Move` | `P1`/`P2` | [`apply`] |
//!
//! # Perspective
//!
//! The Trictrac engine always reasons from White's perspective. Player 1 is
//! White; Player 2 is Black. When Player 2 is active, the board is mirrored
//! before computing legal actions / the observation tensor, and the resulting
//! event is mirrored back before being applied to the real state. This
//! mirrors the pattern used in `cxxengine.rs` and `random_game.rs`.
use trictrac_store::{
training_common::{get_valid_action_indices, TrictracAction, ACTION_SPACE_SIZE},
Dice, GameEvent, GameState, Stage, TurnStage,
};
use super::{GameEnv, Player};
/// Stateless factory that produces Trictrac [`GameState`] environments.
///
/// Schools (`schools_enabled`) are always disabled — scoring is automatic.
#[derive(Clone, Debug, Default)]
pub struct TrictracEnv;
impl GameEnv for TrictracEnv {
type State = GameState;
// ── State creation ────────────────────────────────────────────────────
fn new_game(&self) -> GameState {
GameState::new_with_players("P1", "P2")
}
// ── Node queries ──────────────────────────────────────────────────────
fn current_player(&self, s: &GameState) -> Player {
if s.stage == Stage::Ended {
return Player::Terminal;
}
match s.turn_stage {
TurnStage::RollDice | TurnStage::RollWaiting => Player::Chance,
_ => {
if s.active_player_id == 1 {
Player::P1
} else {
Player::P2
}
}
}
}
/// Returns the legal action indices for the active player.
///
/// The board is automatically mirrored for Player 2 so that the engine
/// always reasons from White's perspective. The returned indices are
/// identical in meaning for both players (checker ordinals are
/// perspective-relative).
///
/// # Panics
///
/// Panics in debug builds if called at a `Chance` or `Terminal` node.
fn legal_actions(&self, s: &GameState) -> Vec<usize> {
debug_assert!(
self.current_player(s).is_decision(),
"legal_actions called at a non-decision node (turn_stage={:?})",
s.turn_stage
);
let indices = if s.active_player_id == 2 {
get_valid_action_indices(&s.mirror())
} else {
get_valid_action_indices(s)
};
indices.unwrap_or_default()
}
// ── State mutation ────────────────────────────────────────────────────
/// Apply a player action index to the game state.
///
/// For Player 2, the action is decoded against the mirrored board and
/// the resulting event is un-mirrored before being applied.
///
/// # Panics
///
/// Panics in debug builds if `action` cannot be decoded or does not
/// produce a valid event for the current state.
fn apply(&self, s: &mut GameState, action: usize) {
let needs_mirror = s.active_player_id == 2;
let event = if needs_mirror {
let view = s.mirror();
TrictracAction::from_action_index(action)
.and_then(|a| a.to_event(&view))
.map(|e| e.get_mirror(false))
} else {
TrictracAction::from_action_index(action).and_then(|a| a.to_event(s))
};
match event {
Some(e) => {
s.consume(&e).expect("apply: consume failed for valid action");
}
None => {
panic!("apply: action index {action} produced no event in state {s}");
}
}
}
/// Sample dice and advance through a chance node.
///
/// Handles both `RollDice` (triggers the roll mechanism, then samples
/// dice) and `RollWaiting` (only samples dice) in a single call so that
/// callers never need to distinguish the two.
///
/// # Panics
///
/// Panics in debug builds if called at a non-Chance node.
fn apply_chance<R: rand::Rng>(&self, s: &mut GameState, rng: &mut R) {
debug_assert!(
self.current_player(s).is_chance(),
"apply_chance called at a non-Chance node (turn_stage={:?})",
s.turn_stage
);
// Step 1: RollDice → RollWaiting (player initiates the roll).
if s.turn_stage == TurnStage::RollDice {
s.consume(&GameEvent::Roll {
player_id: s.active_player_id,
})
.expect("apply_chance: Roll event failed");
}
// Step 2: RollWaiting → Move / HoldOrGoChoice / Ended.
// With schools_enabled=false, point marking is automatic inside consume().
let dice = Dice {
values: (rng.random_range(1u8..=6), rng.random_range(1u8..=6)),
};
s.consume(&GameEvent::RollResult {
player_id: s.active_player_id,
dice,
})
.expect("apply_chance: RollResult event failed");
}
// ── Observation ───────────────────────────────────────────────────────
fn observation(&self, s: &GameState, pov: usize) -> Vec<f32> {
if pov == 0 {
s.to_tensor()
} else {
s.mirror().to_tensor()
}
}
fn obs_size(&self) -> usize {
217
}
fn action_space(&self) -> usize {
ACTION_SPACE_SIZE
}
// ── Terminal values ───────────────────────────────────────────────────
/// Returns `Some([r1, r2])` when the game is over, `None` otherwise.
///
/// The winner (higher cumulative score) receives `+1.0`; the loser
/// receives `-1.0`; an exact tie gives `0.0` each. A cumulative score
/// is `holes × 12 + points`.
fn returns(&self, s: &GameState) -> Option<[f32; 2]> {
if s.stage != Stage::Ended {
return None;
}
let score = |id: u64| -> i32 {
s.players
.get(&id)
.map(|p| p.holes as i32 * 12 + p.points as i32)
.unwrap_or(0)
};
let s1 = score(1);
let s2 = score(2);
Some(match s1.cmp(&s2) {
std::cmp::Ordering::Greater => [1.0, -1.0],
std::cmp::Ordering::Less => [-1.0, 1.0],
std::cmp::Ordering::Equal => [0.0, 0.0],
})
}
}
// ── Tests ─────────────────────────────────────────────────────────────────────
#[cfg(test)]
mod tests {
use super::*;
use rand::{rngs::SmallRng, Rng, SeedableRng};
fn env() -> TrictracEnv {
TrictracEnv
}
fn seeded_rng(seed: u64) -> SmallRng {
SmallRng::seed_from_u64(seed)
}
// ── Initial state ─────────────────────────────────────────────────────
#[test]
fn new_game_is_chance_node() {
let e = env();
let s = e.new_game();
// A fresh game starts at RollDice — a Chance node.
assert_eq!(e.current_player(&s), Player::Chance);
assert!(e.returns(&s).is_none());
}
#[test]
fn new_game_is_not_terminal() {
let e = env();
let s = e.new_game();
assert_ne!(e.current_player(&s), Player::Terminal);
assert!(e.returns(&s).is_none());
}
// ── Chance nodes ──────────────────────────────────────────────────────
#[test]
fn apply_chance_reaches_decision_node() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(1);
// A single chance step must yield a decision node (or end the game,
// which only happens after 12 holes — impossible on the first roll).
e.apply_chance(&mut s, &mut rng);
let p = e.current_player(&s);
assert!(
p.is_decision(),
"expected decision node after first roll, got {p:?}"
);
}
#[test]
fn apply_chance_from_rollwaiting() {
// Check that apply_chance works when called mid-way (at RollWaiting).
let e = env();
let mut s = e.new_game();
assert_eq!(s.turn_stage, TurnStage::RollDice);
// Manually advance to RollWaiting.
s.consume(&GameEvent::Roll { player_id: s.active_player_id })
.unwrap();
assert_eq!(s.turn_stage, TurnStage::RollWaiting);
let mut rng = seeded_rng(2);
e.apply_chance(&mut s, &mut rng);
let p = e.current_player(&s);
assert!(p.is_decision() || p.is_terminal());
}
// ── Legal actions ─────────────────────────────────────────────────────
#[test]
fn legal_actions_nonempty_after_roll() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(3);
e.apply_chance(&mut s, &mut rng);
assert!(e.current_player(&s).is_decision());
let actions = e.legal_actions(&s);
assert!(
!actions.is_empty(),
"legal_actions must be non-empty at a decision node"
);
}
#[test]
fn legal_actions_within_action_space() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(4);
e.apply_chance(&mut s, &mut rng);
for &a in e.legal_actions(&s).iter() {
assert!(
a < e.action_space(),
"action {a} out of bounds (action_space={})",
e.action_space()
);
}
}
// ── Observations ──────────────────────────────────────────────────────
#[test]
fn observation_has_correct_size() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(5);
e.apply_chance(&mut s, &mut rng);
assert_eq!(e.observation(&s, 0).len(), e.obs_size());
assert_eq!(e.observation(&s, 1).len(), e.obs_size());
}
#[test]
fn observation_values_in_unit_interval() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(6);
e.apply_chance(&mut s, &mut rng);
for (pov, obs) in [(0, e.observation(&s, 0)), (1, e.observation(&s, 1))] {
for (i, &v) in obs.iter().enumerate() {
assert!(
v >= 0.0 && v <= 1.0,
"pov={pov}: obs[{i}] = {v} is outside [0,1]"
);
}
}
}
#[test]
fn p1_and_p2_observations_differ() {
// The board is mirrored for P2, so the two observations should differ
// whenever there are checkers in non-symmetric positions (always true
// in a real game after a few moves).
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(7);
// Advance far enough that the board is non-trivial.
for _ in 0..6 {
while e.current_player(&s).is_chance() {
e.apply_chance(&mut s, &mut rng);
}
if e.current_player(&s).is_terminal() {
break;
}
let actions = e.legal_actions(&s);
e.apply(&mut s, actions[0]);
}
if !e.current_player(&s).is_terminal() {
let obs0 = e.observation(&s, 0);
let obs1 = e.observation(&s, 1);
assert_ne!(obs0, obs1, "P1 and P2 observations should differ on a non-symmetric board");
}
}
// ── Applying actions ──────────────────────────────────────────────────
#[test]
fn apply_changes_state() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(8);
e.apply_chance(&mut s, &mut rng);
assert!(e.current_player(&s).is_decision());
let before = s.clone();
let action = e.legal_actions(&s)[0];
e.apply(&mut s, action);
assert_ne!(
before.turn_stage, s.turn_stage,
"state must change after apply"
);
}
#[test]
fn apply_all_legal_actions_do_not_panic() {
// Verify that every action returned by legal_actions can be applied
// without panicking (on several independent copies of the same state).
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(9);
e.apply_chance(&mut s, &mut rng);
assert!(e.current_player(&s).is_decision());
for action in e.legal_actions(&s) {
let mut copy = s.clone();
e.apply(&mut copy, action); // must not panic
}
}
// ── Full game ─────────────────────────────────────────────────────────
/// Run a complete game with random actions through the `GameEnv` trait
/// and verify that:
/// - The game terminates.
/// - `returns()` is `Some` at the end.
/// - The outcome is valid: scores sum to 0 (zero-sum) or each player's
/// score is ±1 / 0.
/// - No step panics.
#[test]
fn full_random_game_terminates() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(42);
let max_steps = 50_000;
for step in 0..max_steps {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P1 | Player::P2 => {
let actions = e.legal_actions(&s);
assert!(!actions.is_empty(), "step {step}: empty legal actions at decision node");
let idx = rng.random_range(0..actions.len());
e.apply(&mut s, actions[idx]);
}
}
assert!(step < max_steps - 1, "game did not terminate within {max_steps} steps");
}
let result = e.returns(&s);
assert!(result.is_some(), "returns() must be Some at Terminal");
let [r1, r2] = result.unwrap();
let sum = r1 + r2;
assert!(
(sum.abs() < 1e-5) || (sum - 0.0).abs() < 1e-5,
"game must be zero-sum: r1={r1}, r2={r2}, sum={sum}"
);
assert!(
r1.abs() <= 1.0 && r2.abs() <= 1.0,
"returns must be in [-1,1]: r1={r1}, r2={r2}"
);
}
/// Run multiple games with different seeds to stress-test for panics.
#[test]
fn multiple_games_no_panic() {
let e = env();
let max_steps = 20_000;
for seed in 0..10u64 {
let mut s = e.new_game();
let mut rng = seeded_rng(seed);
for _ in 0..max_steps {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P1 | Player::P2 => {
let actions = e.legal_actions(&s);
let idx = rng.random_range(0..actions.len());
e.apply(&mut s, actions[idx]);
}
}
}
}
}
// ── Returns ───────────────────────────────────────────────────────────
#[test]
fn returns_none_mid_game() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(11);
// Advance a few steps but do not finish the game.
for _ in 0..4 {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P1 | Player::P2 => {
let actions = e.legal_actions(&s);
e.apply(&mut s, actions[0]);
}
}
}
if !e.current_player(&s).is_terminal() {
assert!(
e.returns(&s).is_none(),
"returns() must be None before the game ends"
);
}
}
// ── Player 2 actions ──────────────────────────────────────────────────
/// Verify that Player 2 (Black) can take actions without panicking,
/// and that the state advances correctly.
#[test]
fn player2_can_act() {
let e = env();
let mut s = e.new_game();
let mut rng = seeded_rng(12);
// Keep stepping until Player 2 gets a turn.
let max_steps = 5_000;
let mut p2_acted = false;
for _ in 0..max_steps {
match e.current_player(&s) {
Player::Terminal => break,
Player::Chance => e.apply_chance(&mut s, &mut rng),
Player::P2 => {
let actions = e.legal_actions(&s);
assert!(!actions.is_empty());
e.apply(&mut s, actions[0]);
p2_acted = true;
break;
}
Player::P1 => {
let actions = e.legal_actions(&s);
e.apply(&mut s, actions[0]);
}
}
}
assert!(p2_acted, "Player 2 never got a turn in {max_steps} steps");
}
}