refact: add cargo "python" feature for pyo3

todo
fix: --n-sim training parameter
2026-03-25 16:06:12 +01:00 · 2026-03-25 16:06:12 +01:00 · 2026-03-25 16:06:11 +01:00 · 2026-03-25 16:05:12 +01:00
8 changed files with 25 additions and 12 deletions
--- a/README.md
+++ b/README.md
@ -24,7 +24,7 @@ Training of AI bots is the work in progress.

 - game rules and game state are implemented in the _store/_ folder.
 - the command-line application is implemented in _client_cli/_; it allows you to play against a bot, or to have two bots play against each other
- the bots algorithms and the training of their models are implemented in the _bot/_ folder
+- the bots algorithms and the training of their models are implemented in the _bot/_ and _spiel_bot_ folders.

 ### _store_ package

--- a/bot/Cargo.toml
+++ b/bot/Cargo.toml
@ -13,7 +13,7 @@ path = "src/burnrl/main.rs"
 pretty_assertions = "1.4.0"
 serde = { version = "1.0", features = ["derive"] }
 serde_json = "1.0"
-trictrac-store = { path = "../store" }
+trictrac-store = { path = "../store", features = ["python"] }
 rand = "0.9"
 env_logger = "0.10"
 burn = { version = "0.20", features = ["ndarray", "autodiff"] }
--- a/client_cli/Cargo.toml
+++ b/client_cli/Cargo.toml
@ -13,7 +13,7 @@ bincode = "1.3.3"
 pico-args = "0.5.0"
 pretty_assertions = "1.4.0"
 renet = "0.0.13"
-trictrac-store = { path = "../store" }
+trictrac-store = { path = "../store", features = ["python"] }
 trictrac-bot   = { path = "../bot" }
 spiel_bot      = { path = "../spiel_bot" }
 itertools = "0.13.0"
--- a/spiel_bot/Cargo.toml
+++ b/spiel_bot/Cargo.toml
@ -4,7 +4,7 @@ version = "0.1.0"
 edition = "2021"

 [dependencies]
-trictrac-store = { path = "../store" }
+trictrac-store = { path = "../store", features = ["python"] }
 trictrac-bot   = { path = "../bot" }
 anyhow         = "1"
 rand           = "0.9"
--- a/spiel_bot/src/mcts/search.rs
+++ b/spiel_bot/src/mcts/search.rs
@ -156,7 +156,13 @@ pub(super) fn simulate<E: GameEnv>(
        let returns = env
            .returns(&next_state)
            .expect("terminal node must have returns");
-        returns[player_idx]
+        let v = returns[player_idx];
+        // Update child stats so PUCT and mcts_policy count terminal visits.
+        // Store from player_idx's perspective so child.q() is directly usable
+        // by the parent's PUCT selection (high = good for the selecting player).
+        child.n += 1;
+        child.w += v;
+        v
    } else {
        let child_player = next_cp.index().unwrap();
        let v = if crossed_chance {
@ -166,12 +172,13 @@ pub(super) fn simulate<E: GameEnv>(
            // previously cached children would be for a different outcome.
            let obs = env.observation(&next_state, child_player);
            let (_, value) = evaluator.evaluate(&obs);
-            // Record the visit so that PUCT and mcts_policy use real counts.
-            // Without this, child.n stays 0 for every simulation in games where
-            // every player action is immediately followed by a chance node (e.g.
-            // Trictrac), causing mcts_policy to always return a uniform policy.
+            // Store from player_idx's (parent's) perspective so PUCT works correctly.
+            // `value` is from child_player's POV; negate when child is the opponent
+            // so that child.q() = expected return for the player CHOOSING this child.
+            // Without the negation, root would maximise the opponent's Q-value and
+            // systematically pick the worst action.
            child.n += 1;
-            child.w += value;
+            child.w += if child_player == player_idx { value } else { -value };
            value
        } else if child.expanded {
            simulate(child, next_state, env, evaluator, config, rng, child_player)
--- a/store/Cargo.toml
+++ b/store/Cargo.toml
@ -12,6 +12,10 @@ name = "trictrac_store"
 # "staticlib" → used by the C++ OpenSpiel game (cxxengine)
 crate-type = ["cdylib", "rlib", "staticlib"]

+[features]
+# Enable Python bindings (required for maturin / AI training). Not available on wasm32.
+python = ["pyo3"]
+
 [dependencies]
 anyhow = "1.0"
 base64 = "0.21.7"
@ -20,7 +24,7 @@ cxx = "1.0"
 log = "0.4.20"
 merge = "0.1.0"
 # generate python lib (with maturin) to be used in AI training
-pyo3 = { version = "0.23", features = ["extension-module", "abi3-py38"] }
+pyo3 = { version = "0.23", features = ["extension-module", "abi3-py38"], optional = true }
 rand = "0.9"
 serde = { version = "1.0", features = ["derive"] }
 transpose = "0.2.2"
--- a/store/src/lib.rs
+++ b/store/src/lib.rs
@ -20,6 +20,7 @@ pub use dice::{Dice, DiceRoller};
 pub mod training_common;

 // python interface "trictrac_engine" (for AI training..)
+#[cfg(feature = "python")]
 mod pyengine;

 // C++ interface via cxx.rs (for OpenSpiel C++ integration)
--- a/store/src/player.rs
+++ b/store/src/player.rs
@ -1,3 +1,4 @@
+#[cfg(feature = "python")]
 use pyo3::prelude::*;
 use serde::{Deserialize, Serialize};
 use std::fmt;
@ -5,7 +6,7 @@ use std::fmt;
 // This just makes it easier to dissern between a player id and any ol' u64
 pub type PlayerId = u64;

-#[pyclass(eq, eq_int)]
+#[cfg_attr(feature = "python", pyclass(eq, eq_int))]
 #[derive(Copy, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub enum Color {
    White,
Author	SHA1	Message	Date
Henri Bourcereau	0b06c62fd9	refact: add cargo "python" feature for pyo3	2026-03-25 16:06:12 +01:00
Henri Bourcereau	00f23543a5	todo	2026-03-25 16:06:12 +01:00
Henri Bourcereau	cf50784a23	fix: --n-sim training parameter	2026-03-25 16:06:11 +01:00
Henri Bourcereau	ad30d09311	feat(spiel_bot): cli spiel_bot strategy	2026-03-25 16:05:12 +01:00