fix(spiel_bot): mcts fix

This commit is contained in:
Henri Bourcereau 2026-03-07 22:18:59 +01:00
parent b0ae4db2d9
commit 519dfe67ad
2 changed files with 21 additions and 3 deletions

View file

@ -401,8 +401,12 @@ mod tests {
};
let root = run_mcts(&env, &state, &ZeroEval(514), &config, &mut r);
assert!(root.n > 0);
// root.n = 1 (expansion) + n_simulations (one backup per simulation).
assert_eq!(root.n, 1 + config.n_simulations as u32);
// Children visit counts may sum to less than n_simulations when some
// simulations cross a chance node at depth 1 (turn ends after one move)
// and evaluate with the network directly without updating child.n.
let total: u32 = root.children.iter().map(|(_, c)| c.n).sum();
assert_eq!(total, 5);
assert!(total <= config.n_simulations as u32);
}
}

View file

@ -138,8 +138,14 @@ pub(super) fn simulate<E: GameEnv>(
// ── Apply action + advance through any chance nodes ───────────────────
let mut next_state = state;
env.apply(&mut next_state, action);
// Track whether we crossed a chance node (dice roll) on the way down.
// If we did, the child's cached legal actions are for a *different* dice
// outcome and must not be reused — evaluate with the network directly.
let mut crossed_chance = false;
while env.current_player(&next_state).is_chance() {
env.apply_chance(&mut next_state, rng);
crossed_chance = true;
}
let next_cp = env.current_player(&next_state);
@ -153,7 +159,15 @@ pub(super) fn simulate<E: GameEnv>(
returns[player_idx]
} else {
let child_player = next_cp.index().unwrap();
let v = if child.expanded {
let v = if crossed_chance {
// Outcome sampling: after dice, evaluate the resulting position
// directly with the network. Do NOT build the tree across chance
// boundaries — the dice change which actions are legal, so any
// previously cached children would be for a different outcome.
let obs = env.observation(&next_state, child_player);
let (_, value) = evaluator.evaluate(&obs);
value
} else if child.expanded {
simulate(child, next_state, env, evaluator, config, rng, child_player)
} else {
expand::<E>(child, &next_state, env, evaluator, child_player)