From ec6ae26d380f6a992423d9fd115c9fd228938624 Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Tue, 12 Aug 2025 17:56:41 +0200 Subject: [PATCH] wip reduction TrictracAction --- bot/src/dqn/burnrl/environment.rs | 20 +- bot/src/dqn/burnrl_valid/environment.rs | 16 +- bot/src/dqn/dqn_common.rs | 73 ++++--- bot/src/dqn/dqn_common_big.rs | 255 ++++++++++++++++++++++++ bot/src/dqn/mod.rs | 1 + bot/src/dqn/simple/dqn_trainer.rs | 2 +- bot/src/strategy/dqn.rs | 2 +- bot/src/strategy/dqnburn.rs | 33 ++- store/src/board.rs | 64 ++++++ 9 files changed, 418 insertions(+), 48 deletions(-) create mode 100644 bot/src/dqn/dqn_common_big.rs diff --git a/bot/src/dqn/burnrl/environment.rs b/bot/src/dqn/burnrl/environment.rs index a774b12..e634200 100644 --- a/bot/src/dqn/burnrl/environment.rs +++ b/bot/src/dqn/burnrl/environment.rs @@ -59,7 +59,7 @@ impl Action for TrictracAction { } fn size() -> usize { - 1252 + 514 } } @@ -288,8 +288,8 @@ impl TrictracEnvironment { } TrictracAction::Move { dice_order, - from1, - from2, + checker1, + checker2, } => { // Effectuer un mouvement let (dice1, dice2) = if dice_order { @@ -297,7 +297,21 @@ impl TrictracEnvironment { } else { (self.game.dice.values.1, self.game.dice.values.0) }; + + let color = &store::Color::White; + let from1 = self + .game + .board + .get_checker_field(color, checker1 as u8) + .unwrap_or(0); let mut to1 = from1 + dice1 as usize; + let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default(); + + let mut tmp_board = self.game.board.clone(); + tmp_board.move_checker(color, checker_move1); + let from2 = tmp_board + .get_checker_field(color, checker2 as u8) + .unwrap_or(0); let mut to2 = from2 + dice2 as usize; // Gestion prise de coin par puissance diff --git a/bot/src/dqn/burnrl_valid/environment.rs b/bot/src/dqn/burnrl_valid/environment.rs index 93e6c14..200aa49 100644 --- a/bot/src/dqn/burnrl_valid/environment.rs +++ b/bot/src/dqn/burnrl_valid/environment.rs @@ -1,4 +1,4 @@ -use crate::dqn::dqn_common; +use crate::dqn::dqn_common_big; use burn::{prelude::Backend, tensor::Tensor}; use burn_rl::base::{Action, Environment, Snapshot, State}; use rand::{thread_rng, Rng}; @@ -205,16 +205,16 @@ impl TrictracEnvironment { const REWARD_RATIO: f32 = 1.0; /// Convertit une action burn-rl vers une action Trictrac - pub fn convert_action(action: TrictracAction) -> Option { - dqn_common::TrictracAction::from_action_index(action.index.try_into().unwrap()) + pub fn convert_action(action: TrictracAction) -> Option { + dqn_common_big::TrictracAction::from_action_index(action.index.try_into().unwrap()) } /// Convertit l'index d'une action au sein des actions valides vers une action Trictrac fn convert_valid_action_index( &self, action: TrictracAction, - ) -> Option { - use dqn_common::get_valid_actions; + ) -> Option { + use dqn_common_big::get_valid_actions; // Obtenir les actions valides dans le contexte actuel let valid_actions = get_valid_actions(&self.game); @@ -231,10 +231,10 @@ impl TrictracEnvironment { /// Exécute une action Trictrac dans le jeu // fn execute_action( // &mut self, - // action: dqn_common::TrictracAction, + // action: dqn_common_big::TrictracAction, // ) -> Result> { - fn execute_action(&mut self, action: dqn_common::TrictracAction) -> (f32, bool) { - use dqn_common::TrictracAction; + fn execute_action(&mut self, action: dqn_common_big::TrictracAction) -> (f32, bool) { + use dqn_common_big::TrictracAction; let mut reward = 0.0; let mut is_rollpoint = false; diff --git a/bot/src/dqn/dqn_common.rs b/bot/src/dqn/dqn_common.rs index 2da4aa5..a5661a0 100644 --- a/bot/src/dqn/dqn_common.rs +++ b/bot/src/dqn/dqn_common.rs @@ -13,8 +13,8 @@ pub enum TrictracAction { /// Effectuer un mouvement de pions Move { dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier - from1: usize, // position de départ du premier pion (0-24) - from2: usize, // position de départ du deuxième pion (0-24) + checker1: usize, // premier pion à déplacer en numérotant depuis la colonne de départ (0-15) 0 : aucun pion + checker2: usize, // deuxième pion (0-15) }, // Marquer les points : à activer si support des écoles // Mark, @@ -28,19 +28,19 @@ impl TrictracAction { TrictracAction::Go => 1, TrictracAction::Move { dice_order, - from1, - from2, + checker1, + checker2, } => { // Encoder les mouvements dans l'espace d'actions // Indices 2+ pour les mouvements - // de 2 à 1251 (2 à 626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier) + // de 2 à 513 (2 à 257 pour dé 1 en premier, 258 à 513 pour dé 2 en premier) let mut start = 2; if !dice_order { - // 25 * 25 = 625 - start += 625; + // 16 * 16 = 256 + start += 256; } - start + from1 * 25 + from2 - } // TrictracAction::Mark => 1252, + start + checker1 * 16 + checker2 + } // TrictracAction::Mark => 514, } } @@ -48,15 +48,15 @@ impl TrictracAction { pub fn from_action_index(index: usize) -> Option { match index { 0 => Some(TrictracAction::Roll), - // 1252 => Some(TrictracAction::Mark), 1 => Some(TrictracAction::Go), - i if i >= 3 => { - let move_code = i - 3; - let (dice_order, from1, from2) = Self::decode_move(move_code); + // 514 => Some(TrictracAction::Mark), + i if i >= 2 => { + let move_code = i - 2; + let (dice_order, checker1, checker2) = Self::decode_move(move_code); Some(TrictracAction::Move { dice_order, - from1, - from2, + checker1, + checker2, }) } _ => None, @@ -66,13 +66,13 @@ impl TrictracAction { /// Décode un entier en paire de mouvements fn decode_move(code: usize) -> (bool, usize, usize) { let mut encoded = code; - let dice_order = code < 626; + let dice_order = code < 256; if !dice_order { - encoded -= 625 + encoded -= 256 } - let from1 = encoded / 25; - let from2 = 1 + encoded % 25; - (dice_order, from1, from2) + let checker1 = encoded / 16; + let checker2 = 1 + encoded % 16; + (dice_order, checker1, checker2) } /// Retourne la taille de l'espace d'actions total @@ -80,7 +80,7 @@ impl TrictracAction { // 1 (Roll) + 1 (Go) + mouvements possibles // Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from) // Mais on peut optimiser en limitant aux positions valides (1-24) - 2 + (2 * 25 * 25) // = 1252 + 2 + (2 * 16 * 16) // = 514 } // pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent { @@ -136,7 +136,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec { valid_actions.push(checker_moves_to_trictrac_action( &move1, &move2, - &game_state.dice, + &color, + &game_state, )); } } @@ -150,7 +151,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec { valid_actions.push(checker_moves_to_trictrac_action( &move1, &move2, - &game_state.dice, + &color, + &game_state, )); } } @@ -164,12 +166,14 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec { fn checker_moves_to_trictrac_action( move1: &CheckerMove, move2: &CheckerMove, - dice: &Dice, + color: &store::Color, + state: &crate::GameState, ) -> TrictracAction { let to1 = move1.get_to(); let to2 = move2.get_to(); let from1 = move1.get_from(); let from2 = move2.get_from(); + let dice = state.dice; let mut diff_move1 = if to1 > 0 { // Mouvement sans sortie @@ -203,10 +207,17 @@ fn checker_moves_to_trictrac_action( // prise par puissance diff_move1 += 1; } + let dice_order = diff_move1 == dice.values.0 as usize; + + let checker1 = state.board.get_field_checker(color, from1) as usize; + let mut tmp_board = state.board.clone(); + // should not raise an error for a valid action + tmp_board.move_checker(color, *move1); + let checker2 = tmp_board.get_field_checker(color, from2) as usize; TrictracAction::Move { - dice_order: diff_move1 == dice.values.0 as usize, - from1: move1.get_from(), - from2: move2.get_from(), + dice_order, + checker1, + checker2, } } @@ -235,8 +246,8 @@ mod tests { fn to_action_index() { let action = TrictracAction::Move { dice_order: true, - from1: 3, - from2: 4, + checker1: 3, + checker2: 4, }; let index = action.to_action_index(); assert_eq!(Some(action), TrictracAction::from_action_index(index)); @@ -247,8 +258,8 @@ mod tests { fn from_action_index() { let action = TrictracAction::Move { dice_order: true, - from1: 3, - from2: 4, + checker1: 3, + checker2: 4, }; assert_eq!(Some(action), TrictracAction::from_action_index(81)); } diff --git a/bot/src/dqn/dqn_common_big.rs b/bot/src/dqn/dqn_common_big.rs new file mode 100644 index 0000000..2da4aa5 --- /dev/null +++ b/bot/src/dqn/dqn_common_big.rs @@ -0,0 +1,255 @@ +use std::cmp::{max, min}; + +use serde::{Deserialize, Serialize}; +use store::{CheckerMove, Dice}; + +/// Types d'actions possibles dans le jeu +#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)] +pub enum TrictracAction { + /// Lancer les dés + Roll, + /// Continuer après avoir gagné un trou + Go, + /// Effectuer un mouvement de pions + Move { + dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier + from1: usize, // position de départ du premier pion (0-24) + from2: usize, // position de départ du deuxième pion (0-24) + }, + // Marquer les points : à activer si support des écoles + // Mark, +} + +impl TrictracAction { + /// Encode une action en index pour le réseau de neurones + pub fn to_action_index(&self) -> usize { + match self { + TrictracAction::Roll => 0, + TrictracAction::Go => 1, + TrictracAction::Move { + dice_order, + from1, + from2, + } => { + // Encoder les mouvements dans l'espace d'actions + // Indices 2+ pour les mouvements + // de 2 à 1251 (2 à 626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier) + let mut start = 2; + if !dice_order { + // 25 * 25 = 625 + start += 625; + } + start + from1 * 25 + from2 + } // TrictracAction::Mark => 1252, + } + } + + /// Décode un index d'action en TrictracAction + pub fn from_action_index(index: usize) -> Option { + match index { + 0 => Some(TrictracAction::Roll), + // 1252 => Some(TrictracAction::Mark), + 1 => Some(TrictracAction::Go), + i if i >= 3 => { + let move_code = i - 3; + let (dice_order, from1, from2) = Self::decode_move(move_code); + Some(TrictracAction::Move { + dice_order, + from1, + from2, + }) + } + _ => None, + } + } + + /// Décode un entier en paire de mouvements + fn decode_move(code: usize) -> (bool, usize, usize) { + let mut encoded = code; + let dice_order = code < 626; + if !dice_order { + encoded -= 625 + } + let from1 = encoded / 25; + let from2 = 1 + encoded % 25; + (dice_order, from1, from2) + } + + /// Retourne la taille de l'espace d'actions total + pub fn action_space_size() -> usize { + // 1 (Roll) + 1 (Go) + mouvements possibles + // Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from) + // Mais on peut optimiser en limitant aux positions valides (1-24) + 2 + (2 * 25 * 25) // = 1252 + } + + // pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent { + // match action { + // TrictracAction::Roll => Some(GameEvent::Roll { player_id }), + // TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }), + // TrictracAction::Go => Some(GameEvent::Go { player_id }), + // TrictracAction::Move { + // dice_order, + // from1, + // from2, + // } => { + // // Effectuer un mouvement + // let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default(); + // let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default(); + // + // Some(GameEvent::Move { + // player_id: self.agent_player_id, + // moves: (checker_move1, checker_move2), + // }) + // } + // }; + // } +} + +/// Obtient les actions valides pour l'état de jeu actuel +pub fn get_valid_actions(game_state: &crate::GameState) -> Vec { + use store::TurnStage; + + let mut valid_actions = Vec::new(); + + let active_player_id = game_state.active_player_id; + let player_color = game_state.player_color_by_id(&active_player_id); + + if let Some(color) = player_color { + match game_state.turn_stage { + TurnStage::RollDice | TurnStage::RollWaiting => { + valid_actions.push(TrictracAction::Roll); + } + TurnStage::MarkPoints | TurnStage::MarkAdvPoints => { + // valid_actions.push(TrictracAction::Mark); + } + TurnStage::HoldOrGoChoice => { + valid_actions.push(TrictracAction::Go); + + // Ajoute aussi les mouvements possibles + let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice); + let possible_moves = rules.get_possible_moves_sequences(true, vec![]); + + // Modififier checker_moves_to_trictrac_action si on doit gérer Black + assert_eq!(color, store::Color::White); + for (move1, move2) in possible_moves { + valid_actions.push(checker_moves_to_trictrac_action( + &move1, + &move2, + &game_state.dice, + )); + } + } + TurnStage::Move => { + let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice); + let possible_moves = rules.get_possible_moves_sequences(true, vec![]); + + // Modififier checker_moves_to_trictrac_action si on doit gérer Black + assert_eq!(color, store::Color::White); + for (move1, move2) in possible_moves { + valid_actions.push(checker_moves_to_trictrac_action( + &move1, + &move2, + &game_state.dice, + )); + } + } + } + } + + valid_actions +} + +// Valid only for White player +fn checker_moves_to_trictrac_action( + move1: &CheckerMove, + move2: &CheckerMove, + dice: &Dice, +) -> TrictracAction { + let to1 = move1.get_to(); + let to2 = move2.get_to(); + let from1 = move1.get_from(); + let from2 = move2.get_from(); + + let mut diff_move1 = if to1 > 0 { + // Mouvement sans sortie + to1 - from1 + } else { + // sortie, on utilise la valeur du dé + if to2 > 0 { + // sortie pour le mouvement 1 uniquement + let dice2 = to2 - from2; + if dice2 == dice.values.0 as usize { + dice.values.1 as usize + } else { + dice.values.0 as usize + } + } else { + // double sortie + if from1 < from2 { + max(dice.values.0, dice.values.1) as usize + } else { + min(dice.values.0, dice.values.1) as usize + } + } + }; + + // modification de diff_move1 si on est dans le cas d'un mouvement par puissance + let rest_field = 12; + if to1 == rest_field + && to2 == rest_field + && max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field + { + // prise par puissance + diff_move1 += 1; + } + TrictracAction::Move { + dice_order: diff_move1 == dice.values.0 as usize, + from1: move1.get_from(), + from2: move2.get_from(), + } +} + +/// Retourne les indices des actions valides +pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec { + get_valid_actions(game_state) + .into_iter() + .map(|action| action.to_action_index()) + .collect() +} + +/// Sélectionne une action valide aléatoire +pub fn sample_valid_action(game_state: &crate::GameState) -> Option { + use rand::{seq::SliceRandom, thread_rng}; + + let valid_actions = get_valid_actions(game_state); + let mut rng = thread_rng(); + valid_actions.choose(&mut rng).cloned() +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn to_action_index() { + let action = TrictracAction::Move { + dice_order: true, + from1: 3, + from2: 4, + }; + let index = action.to_action_index(); + assert_eq!(Some(action), TrictracAction::from_action_index(index)); + assert_eq!(81, index); + } + + #[test] + fn from_action_index() { + let action = TrictracAction::Move { + dice_order: true, + from1: 3, + from2: 4, + }; + assert_eq!(Some(action), TrictracAction::from_action_index(81)); + } +} diff --git a/bot/src/dqn/mod.rs b/bot/src/dqn/mod.rs index 7f1572e..ab75746 100644 --- a/bot/src/dqn/mod.rs +++ b/bot/src/dqn/mod.rs @@ -1,5 +1,6 @@ pub mod burnrl; pub mod dqn_common; +pub mod dqn_common_big; pub mod simple; pub mod burnrl_valid; diff --git a/bot/src/dqn/simple/dqn_trainer.rs b/bot/src/dqn/simple/dqn_trainer.rs index 78e6dc7..9a42083 100644 --- a/bot/src/dqn/simple/dqn_trainer.rs +++ b/bot/src/dqn/simple/dqn_trainer.rs @@ -6,7 +6,7 @@ use std::collections::VecDeque; use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage}; use super::dqn_model::{DqnConfig, SimpleNeuralNetwork}; -use crate::dqn::dqn_common::{get_valid_actions, TrictracAction}; +use crate::dqn::dqn_common_big::{get_valid_actions, TrictracAction}; /// Expérience pour le buffer de replay #[derive(Debug, Clone, Serialize, Deserialize)] diff --git a/bot/src/strategy/dqn.rs b/bot/src/strategy/dqn.rs index cf24684..20ce0d5 100644 --- a/bot/src/strategy/dqn.rs +++ b/bot/src/strategy/dqn.rs @@ -3,7 +3,7 @@ use log::info; use std::path::Path; use store::MoveRules; -use crate::dqn::dqn_common::{get_valid_actions, sample_valid_action, TrictracAction}; +use crate::dqn::dqn_common_big::{get_valid_actions, sample_valid_action, TrictracAction}; use crate::dqn::simple::dqn_model::SimpleNeuralNetwork; /// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné diff --git a/bot/src/strategy/dqnburn.rs b/bot/src/strategy/dqnburn.rs index 4fc0c06..6532adb 100644 --- a/bot/src/strategy/dqnburn.rs +++ b/bot/src/strategy/dqnburn.rs @@ -117,8 +117,8 @@ impl BotStrategy for DqnBurnStrategy { // Utiliser le DQN pour choisir le mouvement if let Some(TrictracAction::Move { dice_order, - from1, - from2, + checker1, + checker2, }) = self.get_dqn_action() { let dicevals = self.game.dice.values; @@ -128,15 +128,33 @@ impl BotStrategy for DqnBurnStrategy { (dicevals.1, dicevals.0) }; + let from1 = self + .game + .board + .get_checker_field(&self.color, checker1 as u8) + .unwrap_or(0); + if from1 == 0 { // empty move dice1 = 0; } - let mut to1 = from1 + dice1 as usize; - if 24 < to1 { + let mut to1 = if self.color == Color::White { + from1 + dice1 as usize + } else { + from1 - dice1 as usize + }; + if 24 < to1 || to1 < 0 { // sortie to1 = 0; } + + let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default(); + + let mut tmp_board = self.game.board.clone(); + tmp_board.move_checker(&self.color, checker_move1); + let from2 = tmp_board + .get_checker_field(&self.color, checker2 as u8) + .unwrap_or(0); if from2 == 0 { // empty move dice2 = 0; @@ -147,6 +165,13 @@ impl BotStrategy for DqnBurnStrategy { to2 = 0; } + // Gestion prise de coin par puissance + let opp_rest_field = 13; + if to1 == opp_rest_field && to2 == opp_rest_field { + to1 -= 1; + to2 -= 1; + } + let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default(); let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default(); diff --git a/store/src/board.rs b/store/src/board.rs index a838f10..4740f2d 100644 --- a/store/src/board.rs +++ b/store/src/board.rs @@ -158,6 +158,42 @@ impl Board { .unsigned_abs() } + // get the number of the last checker in a field + pub fn get_field_checker(&self, color: &Color, field: Field) -> u8 { + assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror + let mut total_count: u8 = 0; + for (i, checker_count) in self.positions.iter().enumerate() { + // count white checkers (checker_count > 0) + if *checker_count > 0 { + total_count += *checker_count as u8; + if field == i + 1 { + return total_count; + } + } + } + 0 + } + + // get the field of the nth checker + pub fn get_checker_field(&self, color: &Color, checker_pos: u8) -> Option { + assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror + if checker_pos == 0 { + return None; + } + let mut total_count: u8 = 0; + for (i, checker_count) in self.positions.iter().enumerate() { + // count white checkers (checker_count > 0) + if *checker_count > 0 { + total_count += *checker_count as u8; + } + // return the current field if it contains the checker + if checker_pos <= total_count { + return Some(i + 1); + } + } + None + } + pub fn to_vec(&self) -> Vec { self.positions.to_vec() } @@ -721,4 +757,32 @@ mod tests { ); assert_eq!(vec![2], board.get_quarter_filling_candidate(Color::White)); } + + #[test] + fn get_checker_field() { + let mut board = Board::new(); + board.set_positions( + &Color::White, + [ + 3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + ); + assert_eq!(None, board.get_checker_field(&Color::White, 0)); + assert_eq!(Some(3), board.get_checker_field(&Color::White, 5)); + assert_eq!(Some(3), board.get_checker_field(&Color::White, 6)); + assert_eq!(None, board.get_checker_field(&Color::White, 14)); + } + + #[test] + fn get_field_checker() { + let mut board = Board::new(); + board.set_positions( + &Color::White, + [ + 3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + ], + ); + assert_eq!(4, board.get_field_checker(&Color::White, 2)); + assert_eq!(6, board.get_field_checker(&Color::White, 3)); + } }