trictrac/bot/src/training_common.rs

271 lines
9.1 KiB
Rust
Raw Normal View History

2025-06-11 17:31:35 +02:00
use std::cmp::{max, min};
2025-06-08 21:20:04 +02:00
2025-05-26 20:44:35 +02:00
use serde::{Deserialize, Serialize};
2025-08-17 15:59:53 +02:00
use store::CheckerMove;
2025-06-01 20:00:15 +02:00
/// Types d'actions possibles dans le jeu
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
pub enum TrictracAction {
/// Lancer les dés
Roll,
/// Continuer après avoir gagné un trou
Go,
/// Effectuer un mouvement de pions
2025-06-01 20:21:38 +02:00
Move {
2025-06-08 21:20:04 +02:00
dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
2025-08-12 17:56:41 +02:00
checker1: usize, // premier pion à déplacer en numérotant depuis la colonne de départ (0-15) 0 : aucun pion
checker2: usize, // deuxième pion (0-15)
},
2025-06-11 17:31:35 +02:00
// Marquer les points : à activer si support des écoles
// Mark,
}
2025-06-01 20:00:15 +02:00
impl TrictracAction {
/// Encode une action en index pour le réseau de neurones
pub fn to_action_index(&self) -> usize {
match self {
TrictracAction::Roll => 0,
2025-06-11 17:31:35 +02:00
TrictracAction::Go => 1,
2025-06-08 21:20:04 +02:00
TrictracAction::Move {
dice_order,
2025-08-12 17:56:41 +02:00
checker1,
checker2,
2025-06-08 21:20:04 +02:00
} => {
2025-06-01 20:00:15 +02:00
// Encoder les mouvements dans l'espace d'actions
2025-06-11 17:31:35 +02:00
// Indices 2+ pour les mouvements
2025-08-12 17:56:41 +02:00
// de 2 à 513 (2 à 257 pour dé 1 en premier, 258 à 513 pour dé 2 en premier)
2025-06-11 17:31:35 +02:00
let mut start = 2;
2025-06-08 21:20:04 +02:00
if !dice_order {
2025-08-12 17:56:41 +02:00
// 16 * 16 = 256
start += 256;
2025-06-08 21:20:04 +02:00
}
2025-08-12 17:56:41 +02:00
start + checker1 * 16 + checker2
} // TrictracAction::Mark => 514,
2025-06-01 20:00:15 +02:00
}
}
2025-06-01 20:21:38 +02:00
2025-06-01 20:00:15 +02:00
/// Décode un index d'action en TrictracAction
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
match index {
0 => Some(TrictracAction::Roll),
2025-06-11 17:31:35 +02:00
1 => Some(TrictracAction::Go),
2025-08-12 17:56:41 +02:00
// 514 => Some(TrictracAction::Mark),
i if i >= 2 => {
let move_code = i - 2;
let (dice_order, checker1, checker2) = Self::decode_move(move_code);
2025-06-08 21:20:04 +02:00
Some(TrictracAction::Move {
dice_order,
2025-08-12 17:56:41 +02:00
checker1,
checker2,
2025-06-08 21:20:04 +02:00
})
2025-06-01 20:21:38 +02:00
}
2025-06-01 20:00:15 +02:00
_ => None,
}
}
2025-06-01 20:21:38 +02:00
2025-06-08 21:20:04 +02:00
/// Décode un entier en paire de mouvements
fn decode_move(code: usize) -> (bool, usize, usize) {
let mut encoded = code;
2025-08-12 17:56:41 +02:00
let dice_order = code < 256;
2025-06-08 21:20:04 +02:00
if !dice_order {
2025-08-12 17:56:41 +02:00
encoded -= 256
2025-06-08 21:20:04 +02:00
}
2025-08-12 17:56:41 +02:00
let checker1 = encoded / 16;
2025-08-12 21:56:52 +02:00
let checker2 = encoded % 16;
2025-08-12 17:56:41 +02:00
(dice_order, checker1, checker2)
2025-06-08 21:20:04 +02:00
}
2025-06-01 20:00:15 +02:00
/// Retourne la taille de l'espace d'actions total
pub fn action_space_size() -> usize {
2025-06-11 17:31:35 +02:00
// 1 (Roll) + 1 (Go) + mouvements possibles
2025-06-08 21:20:04 +02:00
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
2025-06-01 20:00:15 +02:00
// Mais on peut optimiser en limitant aux positions valides (1-24)
2025-08-12 17:56:41 +02:00
2 + (2 * 16 * 16) // = 514
2025-06-01 20:00:15 +02:00
}
2025-06-08 21:20:04 +02:00
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
// match action {
// TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
// TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
// TrictracAction::Go => Some(GameEvent::Go { player_id }),
// TrictracAction::Move {
// dice_order,
// from1,
// from2,
// } => {
// // Effectuer un mouvement
// let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
// let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
//
// Some(GameEvent::Move {
// player_id: self.agent_player_id,
// moves: (checker_move1, checker_move2),
// })
// }
// };
// }
2025-06-01 20:00:15 +02:00
}
2025-05-26 20:44:35 +02:00
2025-06-01 20:00:15 +02:00
/// Obtient les actions valides pour l'état de jeu actuel
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
use store::TurnStage;
2025-06-01 20:21:38 +02:00
2025-06-01 20:00:15 +02:00
let mut valid_actions = Vec::new();
2025-06-01 20:21:38 +02:00
2025-06-01 20:00:15 +02:00
let active_player_id = game_state.active_player_id;
let player_color = game_state.player_color_by_id(&active_player_id);
2025-06-01 20:21:38 +02:00
2025-06-01 20:00:15 +02:00
if let Some(color) = player_color {
match game_state.turn_stage {
2025-08-16 21:47:12 +02:00
TurnStage::RollDice => {
2025-06-01 20:00:15 +02:00
valid_actions.push(TrictracAction::Roll);
}
2025-08-16 21:47:12 +02:00
TurnStage::MarkPoints | TurnStage::MarkAdvPoints | TurnStage::RollWaiting => {
2025-06-11 17:31:35 +02:00
// valid_actions.push(TrictracAction::Mark);
2025-08-16 21:47:12 +02:00
panic!(
"get_valid_actions not implemented for turn stage {:?}",
game_state.turn_stage
);
2025-06-01 20:00:15 +02:00
}
TurnStage::HoldOrGoChoice => {
valid_actions.push(TrictracAction::Go);
2025-06-01 20:21:38 +02:00
2025-06-11 17:31:35 +02:00
// Ajoute aussi les mouvements possibles
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
2025-06-01 20:00:15 +02:00
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
2025-06-01 20:21:38 +02:00
2025-06-11 17:31:35 +02:00
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
assert_eq!(color, store::Color::White);
2025-06-01 20:00:15 +02:00
for (move1, move2) in possible_moves {
2025-06-11 17:31:35 +02:00
valid_actions.push(checker_moves_to_trictrac_action(
2025-08-16 21:47:12 +02:00
&move1, &move2, &color, game_state,
2025-06-11 17:31:35 +02:00
));
2025-06-01 20:00:15 +02:00
}
}
TurnStage::Move => {
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
2025-06-01 20:00:15 +02:00
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
2025-06-01 20:21:38 +02:00
2025-06-11 17:31:35 +02:00
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
assert_eq!(color, store::Color::White);
2025-06-01 20:00:15 +02:00
for (move1, move2) in possible_moves {
2025-06-11 17:31:35 +02:00
valid_actions.push(checker_moves_to_trictrac_action(
2025-08-16 21:47:12 +02:00
&move1, &move2, &color, game_state,
2025-06-11 17:31:35 +02:00
));
2025-06-01 20:00:15 +02:00
}
}
}
}
2025-06-01 20:21:38 +02:00
2025-08-18 17:44:01 +02:00
if valid_actions.is_empty() {
panic!("empty valid_actions for state {game_state}");
}
2025-06-01 20:00:15 +02:00
valid_actions
}
2025-06-11 17:31:35 +02:00
// Valid only for White player
fn checker_moves_to_trictrac_action(
move1: &CheckerMove,
move2: &CheckerMove,
2025-08-12 17:56:41 +02:00
color: &store::Color,
state: &crate::GameState,
2025-06-11 17:31:35 +02:00
) -> TrictracAction {
let to1 = move1.get_to();
let to2 = move2.get_to();
let from1 = move1.get_from();
let from2 = move2.get_from();
2025-08-12 17:56:41 +02:00
let dice = state.dice;
2025-06-11 17:31:35 +02:00
let mut diff_move1 = if to1 > 0 {
// Mouvement sans sortie
to1 - from1
} else {
// sortie, on utilise la valeur du dé
if to2 > 0 {
// sortie pour le mouvement 1 uniquement
let dice2 = to2 - from2;
if dice2 == dice.values.0 as usize {
dice.values.1 as usize
} else {
dice.values.0 as usize
}
} else {
// double sortie
if from1 < from2 {
max(dice.values.0, dice.values.1) as usize
} else {
min(dice.values.0, dice.values.1) as usize
}
}
};
// modification de diff_move1 si on est dans le cas d'un mouvement par puissance
let rest_field = 12;
if to1 == rest_field
&& to2 == rest_field
&& max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field
{
// prise par puissance
diff_move1 += 1;
}
2025-08-12 17:56:41 +02:00
let dice_order = diff_move1 == dice.values.0 as usize;
let checker1 = state.board.get_field_checker(color, from1) as usize;
let mut tmp_board = state.board.clone();
// should not raise an error for a valid action
2025-08-17 15:59:53 +02:00
let move_res = tmp_board.move_checker(color, *move1);
if move_res.is_err() {
panic!("error while moving checker {move_res:?}");
}
2025-08-12 17:56:41 +02:00
let checker2 = tmp_board.get_field_checker(color, from2) as usize;
2025-06-11 17:31:35 +02:00
TrictracAction::Move {
2025-08-12 17:56:41 +02:00
dice_order,
checker1,
checker2,
2025-06-11 17:31:35 +02:00
}
}
2025-06-01 20:00:15 +02:00
/// Retourne les indices des actions valides
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
get_valid_actions(game_state)
.into_iter()
.map(|action| action.to_action_index())
.collect()
}
/// Sélectionne une action valide aléatoire
pub fn sample_valid_action(game_state: &crate::GameState) -> Option<TrictracAction> {
2025-06-01 20:21:38 +02:00
use rand::{seq::SliceRandom, thread_rng};
2025-06-01 20:00:15 +02:00
let valid_actions = get_valid_actions(game_state);
let mut rng = thread_rng();
valid_actions.choose(&mut rng).cloned()
}
2025-07-26 09:37:54 +02:00
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn to_action_index() {
let action = TrictracAction::Move {
dice_order: true,
2025-08-12 17:56:41 +02:00
checker1: 3,
checker2: 4,
2025-07-26 09:37:54 +02:00
};
let index = action.to_action_index();
assert_eq!(Some(action), TrictracAction::from_action_index(index));
2025-08-12 21:56:52 +02:00
assert_eq!(54, index);
2025-07-26 09:37:54 +02:00
}
#[test]
fn from_action_index() {
let action = TrictracAction::Move {
dice_order: true,
2025-08-12 17:56:41 +02:00
checker1: 3,
checker2: 4,
2025-07-26 09:37:54 +02:00
};
2025-08-12 21:56:52 +02:00
assert_eq!(Some(action), TrictracAction::from_action_index(54));
2025-07-26 09:37:54 +02:00
}
}