wip reduction TrictracAction

2025-08-12 17:56:41 +02:00 · 2025-08-12 17:56:41 +02:00 · ec6ae26d38
parent 5370eb4307
commit ec6ae26d38
9 changed files with 418 additions and 48 deletions
--- a/bot/src/dqn/burnrl/environment.rs
+++ b/bot/src/dqn/burnrl/environment.rs
@ -59,7 +59,7 @@ impl Action for TrictracAction {
    }

    fn size() -> usize {
-        1252
+        514
    }
 }

@ -288,8 +288,8 @@ impl TrictracEnvironment {
            }
            TrictracAction::Move {
                dice_order,
-                from1,
-                from2,
+                checker1,
+                checker2,
            } => {
                // Effectuer un mouvement
                let (dice1, dice2) = if dice_order {
@ -297,7 +297,21 @@ impl TrictracEnvironment {
                } else {
                    (self.game.dice.values.1, self.game.dice.values.0)
                };
+
+                let color = &store::Color::White;
+                let from1 = self
+                    .game
+                    .board
+                    .get_checker_field(color, checker1 as u8)
+                    .unwrap_or(0);
                let mut to1 = from1 + dice1 as usize;
+                let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
+
+                let mut tmp_board = self.game.board.clone();
+                tmp_board.move_checker(color, checker_move1);
+                let from2 = tmp_board
+                    .get_checker_field(color, checker2 as u8)
+                    .unwrap_or(0);
                let mut to2 = from2 + dice2 as usize;

                // Gestion prise de coin par puissance
--- a/bot/src/dqn/burnrl_valid/environment.rs
+++ b/bot/src/dqn/burnrl_valid/environment.rs
@ -1,4 +1,4 @@
-use crate::dqn::dqn_common;
+use crate::dqn::dqn_common_big;
 use burn::{prelude::Backend, tensor::Tensor};
 use burn_rl::base::{Action, Environment, Snapshot, State};
 use rand::{thread_rng, Rng};
@ -205,16 +205,16 @@ impl TrictracEnvironment {
    const REWARD_RATIO: f32 = 1.0;

    /// Convertit une action burn-rl vers une action Trictrac
-    pub fn convert_action(action: TrictracAction) -> Option<dqn_common::TrictracAction> {
-        dqn_common::TrictracAction::from_action_index(action.index.try_into().unwrap())
+    pub fn convert_action(action: TrictracAction) -> Option<dqn_common_big::TrictracAction> {
+        dqn_common_big::TrictracAction::from_action_index(action.index.try_into().unwrap())
    }

    /// Convertit l'index d'une action au sein des actions valides vers une action Trictrac
    fn convert_valid_action_index(
        &self,
        action: TrictracAction,
-    ) -> Option<dqn_common::TrictracAction> {
-        use dqn_common::get_valid_actions;
+    ) -> Option<dqn_common_big::TrictracAction> {
+        use dqn_common_big::get_valid_actions;

        // Obtenir les actions valides dans le contexte actuel
        let valid_actions = get_valid_actions(&self.game);
@ -231,10 +231,10 @@ impl TrictracEnvironment {
    /// Exécute une action Trictrac dans le jeu
    // fn execute_action(
    //     &mut self,
-    //     action: dqn_common::TrictracAction,
+    //     action: dqn_common_big::TrictracAction,
    // ) -> Result<f32, Box<dyn std::error::Error>> {
-    fn execute_action(&mut self, action: dqn_common::TrictracAction) -> (f32, bool) {
-        use dqn_common::TrictracAction;
+    fn execute_action(&mut self, action: dqn_common_big::TrictracAction) -> (f32, bool) {
+        use dqn_common_big::TrictracAction;

        let mut reward = 0.0;
        let mut is_rollpoint = false;
--- a/bot/src/dqn/dqn_common.rs
+++ b/bot/src/dqn/dqn_common.rs
@ -13,8 +13,8 @@ pub enum TrictracAction {
    /// Effectuer un mouvement de pions
    Move {
        dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
-        from1: usize,     // position de départ du premier pion (0-24)
-        from2: usize,     // position de départ du deuxième pion (0-24)
+        checker1: usize, // premier pion à déplacer en numérotant depuis la colonne de départ (0-15) 0 : aucun pion
+        checker2: usize, // deuxième pion (0-15)
    },
    // Marquer les points : à activer si support des écoles
    // Mark,
@ -28,19 +28,19 @@ impl TrictracAction {
            TrictracAction::Go => 1,
            TrictracAction::Move {
                dice_order,
-                from1,
-                from2,
+                checker1,
+                checker2,
            } => {
                // Encoder les mouvements dans l'espace d'actions
                // Indices 2+ pour les mouvements
-                // de 2 à 1251 (2 à  626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier)
+                // de 2 à 513 (2 à  257 pour dé 1 en premier, 258 à 513 pour dé 2 en premier)
                let mut start = 2;
                if !dice_order {
-                    // 25 * 25 = 625
-                    start += 625;
+                    // 16 * 16 = 256
+                    start += 256;
                }
-                start + from1 * 25 + from2
-            } // TrictracAction::Mark => 1252,
+                start + checker1 * 16 + checker2
+            } // TrictracAction::Mark => 514,
        }
    }

@ -48,15 +48,15 @@ impl TrictracAction {
    pub fn from_action_index(index: usize) -> Option<TrictracAction> {
        match index {
            0 => Some(TrictracAction::Roll),
-            // 1252 => Some(TrictracAction::Mark),
            1 => Some(TrictracAction::Go),
-            i if i >= 3 => {
-                let move_code = i - 3;
-                let (dice_order, from1, from2) = Self::decode_move(move_code);
+            // 514 => Some(TrictracAction::Mark),
+            i if i >= 2 => {
+                let move_code = i - 2;
+                let (dice_order, checker1, checker2) = Self::decode_move(move_code);
                Some(TrictracAction::Move {
                    dice_order,
-                    from1,
-                    from2,
+                    checker1,
+                    checker2,
                })
            }
            _ => None,
@ -66,13 +66,13 @@ impl TrictracAction {
    /// Décode un entier en paire de mouvements
    fn decode_move(code: usize) -> (bool, usize, usize) {
        let mut encoded = code;
-        let dice_order = code < 626;
+        let dice_order = code < 256;
        if !dice_order {
-            encoded -= 625
+            encoded -= 256
        }
-        let from1 = encoded / 25;
-        let from2 = 1 + encoded % 25;
-        (dice_order, from1, from2)
+        let checker1 = encoded / 16;
+        let checker2 = 1 + encoded % 16;
+        (dice_order, checker1, checker2)
    }

    /// Retourne la taille de l'espace d'actions total
@ -80,7 +80,7 @@ impl TrictracAction {
        // 1 (Roll) + 1 (Go) + mouvements possibles
        // Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
        // Mais on peut optimiser en limitant aux positions valides (1-24)
-        2 + (2 * 25 * 25) // = 1252
+        2 + (2 * 16 * 16) // = 514
    }

    // pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
@ -136,7 +136,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
                    valid_actions.push(checker_moves_to_trictrac_action(
                        &move1,
                        &move2,
-                        &game_state.dice,
+                        &color,
+                        &game_state,
                    ));
                }
            }
@ -150,7 +151,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
                    valid_actions.push(checker_moves_to_trictrac_action(
                        &move1,
                        &move2,
-                        &game_state.dice,
+                        &color,
+                        &game_state,
                    ));
                }
            }
@ -164,12 +166,14 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
 fn checker_moves_to_trictrac_action(
    move1: &CheckerMove,
    move2: &CheckerMove,
-    dice: &Dice,
+    color: &store::Color,
+    state: &crate::GameState,
 ) -> TrictracAction {
    let to1 = move1.get_to();
    let to2 = move2.get_to();
    let from1 = move1.get_from();
    let from2 = move2.get_from();
+    let dice = state.dice;

    let mut diff_move1 = if to1 > 0 {
        // Mouvement sans sortie
@ -203,10 +207,17 @@ fn checker_moves_to_trictrac_action(
        // prise par puissance
        diff_move1 += 1;
    }
+    let dice_order = diff_move1 == dice.values.0 as usize;
+
+    let checker1 = state.board.get_field_checker(color, from1) as usize;
+    let mut tmp_board = state.board.clone();
+    // should not raise an error for a valid action
+    tmp_board.move_checker(color, *move1);
+    let checker2 = tmp_board.get_field_checker(color, from2) as usize;
    TrictracAction::Move {
-        dice_order: diff_move1 == dice.values.0 as usize,
-        from1: move1.get_from(),
-        from2: move2.get_from(),
+        dice_order,
+        checker1,
+        checker2,
    }
 }

@ -235,8 +246,8 @@ mod tests {
    fn to_action_index() {
        let action = TrictracAction::Move {
            dice_order: true,
-            from1: 3,
-            from2: 4,
+            checker1: 3,
+            checker2: 4,
        };
        let index = action.to_action_index();
        assert_eq!(Some(action), TrictracAction::from_action_index(index));
@ -247,8 +258,8 @@ mod tests {
    fn from_action_index() {
        let action = TrictracAction::Move {
            dice_order: true,
-            from1: 3,
-            from2: 4,
+            checker1: 3,
+            checker2: 4,
        };
        assert_eq!(Some(action), TrictracAction::from_action_index(81));
    }
--- a/bot/src/dqn/dqn_common_big.rs
+++ b/bot/src/dqn/dqn_common_big.rs
@ -0,0 +1,255 @@
+use std::cmp::{max, min};
+
+use serde::{Deserialize, Serialize};
+use store::{CheckerMove, Dice};
+
+/// Types d'actions possibles dans le jeu
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub enum TrictracAction {
+    /// Lancer les dés
+    Roll,
+    /// Continuer après avoir gagné un trou
+    Go,
+    /// Effectuer un mouvement de pions
+    Move {
+        dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
+        from1: usize,     // position de départ du premier pion (0-24)
+        from2: usize,     // position de départ du deuxième pion (0-24)
+    },
+    // Marquer les points : à activer si support des écoles
+    // Mark,
+}
+
+impl TrictracAction {
+    /// Encode une action en index pour le réseau de neurones
+    pub fn to_action_index(&self) -> usize {
+        match self {
+            TrictracAction::Roll => 0,
+            TrictracAction::Go => 1,
+            TrictracAction::Move {
+                dice_order,
+                from1,
+                from2,
+            } => {
+                // Encoder les mouvements dans l'espace d'actions
+                // Indices 2+ pour les mouvements
+                // de 2 à 1251 (2 à  626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier)
+                let mut start = 2;
+                if !dice_order {
+                    // 25 * 25 = 625
+                    start += 625;
+                }
+                start + from1 * 25 + from2
+            } // TrictracAction::Mark => 1252,
+        }
+    }
+
+    /// Décode un index d'action en TrictracAction
+    pub fn from_action_index(index: usize) -> Option<TrictracAction> {
+        match index {
+            0 => Some(TrictracAction::Roll),
+            // 1252 => Some(TrictracAction::Mark),
+            1 => Some(TrictracAction::Go),
+            i if i >= 3 => {
+                let move_code = i - 3;
+                let (dice_order, from1, from2) = Self::decode_move(move_code);
+                Some(TrictracAction::Move {
+                    dice_order,
+                    from1,
+                    from2,
+                })
+            }
+            _ => None,
+        }
+    }
+
+    /// Décode un entier en paire de mouvements
+    fn decode_move(code: usize) -> (bool, usize, usize) {
+        let mut encoded = code;
+        let dice_order = code < 626;
+        if !dice_order {
+            encoded -= 625
+        }
+        let from1 = encoded / 25;
+        let from2 = 1 + encoded % 25;
+        (dice_order, from1, from2)
+    }
+
+    /// Retourne la taille de l'espace d'actions total
+    pub fn action_space_size() -> usize {
+        // 1 (Roll) + 1 (Go) + mouvements possibles
+        // Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
+        // Mais on peut optimiser en limitant aux positions valides (1-24)
+        2 + (2 * 25 * 25) // = 1252
+    }
+
+    // pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
+    //     match action {
+    //         TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
+    //         TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
+    //         TrictracAction::Go => Some(GameEvent::Go { player_id }),
+    //         TrictracAction::Move {
+    //             dice_order,
+    //             from1,
+    //             from2,
+    //         } => {
+    //             // Effectuer un mouvement
+    //             let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
+    //             let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
+    //
+    //             Some(GameEvent::Move {
+    //                 player_id: self.agent_player_id,
+    //                 moves: (checker_move1, checker_move2),
+    //             })
+    //         }
+    //     };
+    // }
+}
+
+/// Obtient les actions valides pour l'état de jeu actuel
+pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
+    use store::TurnStage;
+
+    let mut valid_actions = Vec::new();
+
+    let active_player_id = game_state.active_player_id;
+    let player_color = game_state.player_color_by_id(&active_player_id);
+
+    if let Some(color) = player_color {
+        match game_state.turn_stage {
+            TurnStage::RollDice | TurnStage::RollWaiting => {
+                valid_actions.push(TrictracAction::Roll);
+            }
+            TurnStage::MarkPoints | TurnStage::MarkAdvPoints => {
+                // valid_actions.push(TrictracAction::Mark);
+            }
+            TurnStage::HoldOrGoChoice => {
+                valid_actions.push(TrictracAction::Go);
+
+                // Ajoute aussi les mouvements possibles
+                let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
+                let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
+
+                // Modififier checker_moves_to_trictrac_action si on doit gérer Black
+                assert_eq!(color, store::Color::White);
+                for (move1, move2) in possible_moves {
+                    valid_actions.push(checker_moves_to_trictrac_action(
+                        &move1,
+                        &move2,
+                        &game_state.dice,
+                    ));
+                }
+            }
+            TurnStage::Move => {
+                let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
+                let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
+
+                // Modififier checker_moves_to_trictrac_action si on doit gérer Black
+                assert_eq!(color, store::Color::White);
+                for (move1, move2) in possible_moves {
+                    valid_actions.push(checker_moves_to_trictrac_action(
+                        &move1,
+                        &move2,
+                        &game_state.dice,
+                    ));
+                }
+            }
+        }
+    }
+
+    valid_actions
+}
+
+// Valid only for White player
+fn checker_moves_to_trictrac_action(
+    move1: &CheckerMove,
+    move2: &CheckerMove,
+    dice: &Dice,
+) -> TrictracAction {
+    let to1 = move1.get_to();
+    let to2 = move2.get_to();
+    let from1 = move1.get_from();
+    let from2 = move2.get_from();
+
+    let mut diff_move1 = if to1 > 0 {
+        // Mouvement sans sortie
+        to1 - from1
+    } else {
+        // sortie, on utilise la valeur du dé
+        if to2 > 0 {
+            // sortie pour le mouvement 1 uniquement
+            let dice2 = to2 - from2;
+            if dice2 == dice.values.0 as usize {
+                dice.values.1 as usize
+            } else {
+                dice.values.0 as usize
+            }
+        } else {
+            // double sortie
+            if from1 < from2 {
+                max(dice.values.0, dice.values.1) as usize
+            } else {
+                min(dice.values.0, dice.values.1) as usize
+            }
+        }
+    };
+
+    // modification de diff_move1 si on est dans le cas d'un mouvement par puissance
+    let rest_field = 12;
+    if to1 == rest_field
+        && to2 == rest_field
+        && max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field
+    {
+        // prise par puissance
+        diff_move1 += 1;
+    }
+    TrictracAction::Move {
+        dice_order: diff_move1 == dice.values.0 as usize,
+        from1: move1.get_from(),
+        from2: move2.get_from(),
+    }
+}
+
+/// Retourne les indices des actions valides
+pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
+    get_valid_actions(game_state)
+        .into_iter()
+        .map(|action| action.to_action_index())
+        .collect()
+}
+
+/// Sélectionne une action valide aléatoire
+pub fn sample_valid_action(game_state: &crate::GameState) -> Option<TrictracAction> {
+    use rand::{seq::SliceRandom, thread_rng};
+
+    let valid_actions = get_valid_actions(game_state);
+    let mut rng = thread_rng();
+    valid_actions.choose(&mut rng).cloned()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn to_action_index() {
+        let action = TrictracAction::Move {
+            dice_order: true,
+            from1: 3,
+            from2: 4,
+        };
+        let index = action.to_action_index();
+        assert_eq!(Some(action), TrictracAction::from_action_index(index));
+        assert_eq!(81, index);
+    }
+
+    #[test]
+    fn from_action_index() {
+        let action = TrictracAction::Move {
+            dice_order: true,
+            from1: 3,
+            from2: 4,
+        };
+        assert_eq!(Some(action), TrictracAction::from_action_index(81));
+    }
+}
--- a/bot/src/dqn/mod.rs
+++ b/bot/src/dqn/mod.rs
@ -1,5 +1,6 @@
 pub mod burnrl;
 pub mod dqn_common;
+pub mod dqn_common_big;
 pub mod simple;

 pub mod burnrl_valid;
--- a/bot/src/dqn/simple/dqn_trainer.rs
+++ b/bot/src/dqn/simple/dqn_trainer.rs
@ -6,7 +6,7 @@ use std::collections::VecDeque;
 use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage};

 use super::dqn_model::{DqnConfig, SimpleNeuralNetwork};
-use crate::dqn::dqn_common::{get_valid_actions, TrictracAction};
+use crate::dqn::dqn_common_big::{get_valid_actions, TrictracAction};

 /// Expérience pour le buffer de replay
 #[derive(Debug, Clone, Serialize, Deserialize)]
--- a/bot/src/strategy/dqn.rs
+++ b/bot/src/strategy/dqn.rs
@ -3,7 +3,7 @@ use log::info;
 use std::path::Path;
 use store::MoveRules;

-use crate::dqn::dqn_common::{get_valid_actions, sample_valid_action, TrictracAction};
+use crate::dqn::dqn_common_big::{get_valid_actions, sample_valid_action, TrictracAction};
 use crate::dqn::simple::dqn_model::SimpleNeuralNetwork;

 /// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
--- a/bot/src/strategy/dqnburn.rs
+++ b/bot/src/strategy/dqnburn.rs
@ -117,8 +117,8 @@ impl BotStrategy for DqnBurnStrategy {
        // Utiliser le DQN pour choisir le mouvement
        if let Some(TrictracAction::Move {
            dice_order,
-            from1,
-            from2,
+            checker1,
+            checker2,
        }) = self.get_dqn_action()
        {
            let dicevals = self.game.dice.values;
@ -128,15 +128,33 @@ impl BotStrategy for DqnBurnStrategy {
                (dicevals.1, dicevals.0)
            };

+            let from1 = self
+                .game
+                .board
+                .get_checker_field(&self.color, checker1 as u8)
+                .unwrap_or(0);
+
            if from1 == 0 {
                // empty move
                dice1 = 0;
            }
-            let mut to1 = from1 + dice1 as usize;
-            if 24 < to1 {
+            let mut to1 = if self.color == Color::White {
+                from1 + dice1 as usize
+            } else {
+                from1 - dice1 as usize
+            };
+            if 24 < to1 || to1 < 0 {
                // sortie
                to1 = 0;
            }
+
+            let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
+
+            let mut tmp_board = self.game.board.clone();
+            tmp_board.move_checker(&self.color, checker_move1);
+            let from2 = tmp_board
+                .get_checker_field(&self.color, checker2 as u8)
+                .unwrap_or(0);
            if from2 == 0 {
                // empty move
                dice2 = 0;
@ -147,6 +165,13 @@ impl BotStrategy for DqnBurnStrategy {
                to2 = 0;
            }

+            // Gestion prise de coin par puissance
+            let opp_rest_field = 13;
+            if to1 == opp_rest_field && to2 == opp_rest_field {
+                to1 -= 1;
+                to2 -= 1;
+            }
+
            let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
            let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();

--- a/store/src/board.rs
+++ b/store/src/board.rs
@ -158,6 +158,42 @@ impl Board {
            .unsigned_abs()
    }

+    // get the number of the last checker in a field
+    pub fn get_field_checker(&self, color: &Color, field: Field) -> u8 {
+        assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror
+        let mut total_count: u8 = 0;
+        for (i, checker_count) in self.positions.iter().enumerate() {
+            // count white checkers (checker_count > 0)
+            if *checker_count > 0 {
+                total_count += *checker_count as u8;
+                if field == i + 1 {
+                    return total_count;
+                }
+            }
+        }
+        0
+    }
+
+    // get the field of the nth checker
+    pub fn get_checker_field(&self, color: &Color, checker_pos: u8) -> Option<Field> {
+        assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror
+        if checker_pos == 0 {
+            return None;
+        }
+        let mut total_count: u8 = 0;
+        for (i, checker_count) in self.positions.iter().enumerate() {
+            // count white checkers (checker_count > 0)
+            if *checker_count > 0 {
+                total_count += *checker_count as u8;
+            }
+            // return the current field if it contains the checker
+            if checker_pos <= total_count {
+                return Some(i + 1);
+            }
+        }
+        None
+    }
+
    pub fn to_vec(&self) -> Vec<i8> {
        self.positions.to_vec()
    }
@ -721,4 +757,32 @@ mod tests {
        );
        assert_eq!(vec![2], board.get_quarter_filling_candidate(Color::White));
    }
+
+    #[test]
+    fn get_checker_field() {
+        let mut board = Board::new();
+        board.set_positions(
+            &Color::White,
+            [
+                3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ],
+        );
+        assert_eq!(None, board.get_checker_field(&Color::White, 0));
+        assert_eq!(Some(3), board.get_checker_field(&Color::White, 5));
+        assert_eq!(Some(3), board.get_checker_field(&Color::White, 6));
+        assert_eq!(None, board.get_checker_field(&Color::White, 14));
+    }
+
+    #[test]
+    fn get_field_checker() {
+        let mut board = Board::new();
+        board.set_positions(
+            &Color::White,
+            [
+                3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            ],
+        );
+        assert_eq!(4, board.get_field_checker(&Color::White, 2));
+        assert_eq!(6, board.get_field_checker(&Color::White, 3));
+    }
 }