fix: --n-sim training parameter

2026-03-11 22:17:03 +01:00 · 2026-03-11 22:17:03 +01:00 · e80dade303
commit e80dade303
parent e7d13c9a02
2 changed files with 10 additions and 4 deletions
--- a/spiel_bot/src/mcts/search.rs
+++ b/spiel_bot/src/mcts/search.rs
@ -166,6 +166,12 @@ pub(super) fn simulate<E: GameEnv>(
            // previously cached children would be for a different outcome.
            let obs = env.observation(&next_state, child_player);
            let (_, value) = evaluator.evaluate(&obs);
+            // Record the visit so that PUCT and mcts_policy use real counts.
+            // Without this, child.n stays 0 for every simulation in games where
+            // every player action is immediately followed by a chance node (e.g.
+            // Trictrac), causing mcts_policy to always return a uniform policy.
+            child.n += 1;
+            child.w += value;
            value
        } else if child.expanded {
            simulate(child, next_state, env, evaluator, config, rng, child_player)