wip reduction TrictracAction
This commit is contained in:
parent
5370eb4307
commit
ec6ae26d38
|
|
@ -59,7 +59,7 @@ impl Action for TrictracAction {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn size() -> usize {
|
fn size() -> usize {
|
||||||
1252
|
514
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -288,8 +288,8 @@ impl TrictracEnvironment {
|
||||||
}
|
}
|
||||||
TrictracAction::Move {
|
TrictracAction::Move {
|
||||||
dice_order,
|
dice_order,
|
||||||
from1,
|
checker1,
|
||||||
from2,
|
checker2,
|
||||||
} => {
|
} => {
|
||||||
// Effectuer un mouvement
|
// Effectuer un mouvement
|
||||||
let (dice1, dice2) = if dice_order {
|
let (dice1, dice2) = if dice_order {
|
||||||
|
|
@ -297,7 +297,21 @@ impl TrictracEnvironment {
|
||||||
} else {
|
} else {
|
||||||
(self.game.dice.values.1, self.game.dice.values.0)
|
(self.game.dice.values.1, self.game.dice.values.0)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let color = &store::Color::White;
|
||||||
|
let from1 = self
|
||||||
|
.game
|
||||||
|
.board
|
||||||
|
.get_checker_field(color, checker1 as u8)
|
||||||
|
.unwrap_or(0);
|
||||||
let mut to1 = from1 + dice1 as usize;
|
let mut to1 = from1 + dice1 as usize;
|
||||||
|
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
|
|
||||||
|
let mut tmp_board = self.game.board.clone();
|
||||||
|
tmp_board.move_checker(color, checker_move1);
|
||||||
|
let from2 = tmp_board
|
||||||
|
.get_checker_field(color, checker2 as u8)
|
||||||
|
.unwrap_or(0);
|
||||||
let mut to2 = from2 + dice2 as usize;
|
let mut to2 = from2 + dice2 as usize;
|
||||||
|
|
||||||
// Gestion prise de coin par puissance
|
// Gestion prise de coin par puissance
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::dqn::dqn_common;
|
use crate::dqn::dqn_common_big;
|
||||||
use burn::{prelude::Backend, tensor::Tensor};
|
use burn::{prelude::Backend, tensor::Tensor};
|
||||||
use burn_rl::base::{Action, Environment, Snapshot, State};
|
use burn_rl::base::{Action, Environment, Snapshot, State};
|
||||||
use rand::{thread_rng, Rng};
|
use rand::{thread_rng, Rng};
|
||||||
|
|
@ -205,16 +205,16 @@ impl TrictracEnvironment {
|
||||||
const REWARD_RATIO: f32 = 1.0;
|
const REWARD_RATIO: f32 = 1.0;
|
||||||
|
|
||||||
/// Convertit une action burn-rl vers une action Trictrac
|
/// Convertit une action burn-rl vers une action Trictrac
|
||||||
pub fn convert_action(action: TrictracAction) -> Option<dqn_common::TrictracAction> {
|
pub fn convert_action(action: TrictracAction) -> Option<dqn_common_big::TrictracAction> {
|
||||||
dqn_common::TrictracAction::from_action_index(action.index.try_into().unwrap())
|
dqn_common_big::TrictracAction::from_action_index(action.index.try_into().unwrap())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Convertit l'index d'une action au sein des actions valides vers une action Trictrac
|
/// Convertit l'index d'une action au sein des actions valides vers une action Trictrac
|
||||||
fn convert_valid_action_index(
|
fn convert_valid_action_index(
|
||||||
&self,
|
&self,
|
||||||
action: TrictracAction,
|
action: TrictracAction,
|
||||||
) -> Option<dqn_common::TrictracAction> {
|
) -> Option<dqn_common_big::TrictracAction> {
|
||||||
use dqn_common::get_valid_actions;
|
use dqn_common_big::get_valid_actions;
|
||||||
|
|
||||||
// Obtenir les actions valides dans le contexte actuel
|
// Obtenir les actions valides dans le contexte actuel
|
||||||
let valid_actions = get_valid_actions(&self.game);
|
let valid_actions = get_valid_actions(&self.game);
|
||||||
|
|
@ -231,10 +231,10 @@ impl TrictracEnvironment {
|
||||||
/// Exécute une action Trictrac dans le jeu
|
/// Exécute une action Trictrac dans le jeu
|
||||||
// fn execute_action(
|
// fn execute_action(
|
||||||
// &mut self,
|
// &mut self,
|
||||||
// action: dqn_common::TrictracAction,
|
// action: dqn_common_big::TrictracAction,
|
||||||
// ) -> Result<f32, Box<dyn std::error::Error>> {
|
// ) -> Result<f32, Box<dyn std::error::Error>> {
|
||||||
fn execute_action(&mut self, action: dqn_common::TrictracAction) -> (f32, bool) {
|
fn execute_action(&mut self, action: dqn_common_big::TrictracAction) -> (f32, bool) {
|
||||||
use dqn_common::TrictracAction;
|
use dqn_common_big::TrictracAction;
|
||||||
|
|
||||||
let mut reward = 0.0;
|
let mut reward = 0.0;
|
||||||
let mut is_rollpoint = false;
|
let mut is_rollpoint = false;
|
||||||
|
|
|
||||||
|
|
@ -13,8 +13,8 @@ pub enum TrictracAction {
|
||||||
/// Effectuer un mouvement de pions
|
/// Effectuer un mouvement de pions
|
||||||
Move {
|
Move {
|
||||||
dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
|
dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
|
||||||
from1: usize, // position de départ du premier pion (0-24)
|
checker1: usize, // premier pion à déplacer en numérotant depuis la colonne de départ (0-15) 0 : aucun pion
|
||||||
from2: usize, // position de départ du deuxième pion (0-24)
|
checker2: usize, // deuxième pion (0-15)
|
||||||
},
|
},
|
||||||
// Marquer les points : à activer si support des écoles
|
// Marquer les points : à activer si support des écoles
|
||||||
// Mark,
|
// Mark,
|
||||||
|
|
@ -28,19 +28,19 @@ impl TrictracAction {
|
||||||
TrictracAction::Go => 1,
|
TrictracAction::Go => 1,
|
||||||
TrictracAction::Move {
|
TrictracAction::Move {
|
||||||
dice_order,
|
dice_order,
|
||||||
from1,
|
checker1,
|
||||||
from2,
|
checker2,
|
||||||
} => {
|
} => {
|
||||||
// Encoder les mouvements dans l'espace d'actions
|
// Encoder les mouvements dans l'espace d'actions
|
||||||
// Indices 2+ pour les mouvements
|
// Indices 2+ pour les mouvements
|
||||||
// de 2 à 1251 (2 à 626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier)
|
// de 2 à 513 (2 à 257 pour dé 1 en premier, 258 à 513 pour dé 2 en premier)
|
||||||
let mut start = 2;
|
let mut start = 2;
|
||||||
if !dice_order {
|
if !dice_order {
|
||||||
// 25 * 25 = 625
|
// 16 * 16 = 256
|
||||||
start += 625;
|
start += 256;
|
||||||
}
|
}
|
||||||
start + from1 * 25 + from2
|
start + checker1 * 16 + checker2
|
||||||
} // TrictracAction::Mark => 1252,
|
} // TrictracAction::Mark => 514,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -48,15 +48,15 @@ impl TrictracAction {
|
||||||
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
||||||
match index {
|
match index {
|
||||||
0 => Some(TrictracAction::Roll),
|
0 => Some(TrictracAction::Roll),
|
||||||
// 1252 => Some(TrictracAction::Mark),
|
|
||||||
1 => Some(TrictracAction::Go),
|
1 => Some(TrictracAction::Go),
|
||||||
i if i >= 3 => {
|
// 514 => Some(TrictracAction::Mark),
|
||||||
let move_code = i - 3;
|
i if i >= 2 => {
|
||||||
let (dice_order, from1, from2) = Self::decode_move(move_code);
|
let move_code = i - 2;
|
||||||
|
let (dice_order, checker1, checker2) = Self::decode_move(move_code);
|
||||||
Some(TrictracAction::Move {
|
Some(TrictracAction::Move {
|
||||||
dice_order,
|
dice_order,
|
||||||
from1,
|
checker1,
|
||||||
from2,
|
checker2,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
_ => None,
|
_ => None,
|
||||||
|
|
@ -66,13 +66,13 @@ impl TrictracAction {
|
||||||
/// Décode un entier en paire de mouvements
|
/// Décode un entier en paire de mouvements
|
||||||
fn decode_move(code: usize) -> (bool, usize, usize) {
|
fn decode_move(code: usize) -> (bool, usize, usize) {
|
||||||
let mut encoded = code;
|
let mut encoded = code;
|
||||||
let dice_order = code < 626;
|
let dice_order = code < 256;
|
||||||
if !dice_order {
|
if !dice_order {
|
||||||
encoded -= 625
|
encoded -= 256
|
||||||
}
|
}
|
||||||
let from1 = encoded / 25;
|
let checker1 = encoded / 16;
|
||||||
let from2 = 1 + encoded % 25;
|
let checker2 = 1 + encoded % 16;
|
||||||
(dice_order, from1, from2)
|
(dice_order, checker1, checker2)
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retourne la taille de l'espace d'actions total
|
/// Retourne la taille de l'espace d'actions total
|
||||||
|
|
@ -80,7 +80,7 @@ impl TrictracAction {
|
||||||
// 1 (Roll) + 1 (Go) + mouvements possibles
|
// 1 (Roll) + 1 (Go) + mouvements possibles
|
||||||
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
||||||
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
||||||
2 + (2 * 25 * 25) // = 1252
|
2 + (2 * 16 * 16) // = 514
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
||||||
|
|
@ -136,7 +136,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
valid_actions.push(checker_moves_to_trictrac_action(
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
&move1,
|
&move1,
|
||||||
&move2,
|
&move2,
|
||||||
&game_state.dice,
|
&color,
|
||||||
|
&game_state,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -150,7 +151,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
valid_actions.push(checker_moves_to_trictrac_action(
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
&move1,
|
&move1,
|
||||||
&move2,
|
&move2,
|
||||||
&game_state.dice,
|
&color,
|
||||||
|
&game_state,
|
||||||
));
|
));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -164,12 +166,14 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
fn checker_moves_to_trictrac_action(
|
fn checker_moves_to_trictrac_action(
|
||||||
move1: &CheckerMove,
|
move1: &CheckerMove,
|
||||||
move2: &CheckerMove,
|
move2: &CheckerMove,
|
||||||
dice: &Dice,
|
color: &store::Color,
|
||||||
|
state: &crate::GameState,
|
||||||
) -> TrictracAction {
|
) -> TrictracAction {
|
||||||
let to1 = move1.get_to();
|
let to1 = move1.get_to();
|
||||||
let to2 = move2.get_to();
|
let to2 = move2.get_to();
|
||||||
let from1 = move1.get_from();
|
let from1 = move1.get_from();
|
||||||
let from2 = move2.get_from();
|
let from2 = move2.get_from();
|
||||||
|
let dice = state.dice;
|
||||||
|
|
||||||
let mut diff_move1 = if to1 > 0 {
|
let mut diff_move1 = if to1 > 0 {
|
||||||
// Mouvement sans sortie
|
// Mouvement sans sortie
|
||||||
|
|
@ -203,10 +207,17 @@ fn checker_moves_to_trictrac_action(
|
||||||
// prise par puissance
|
// prise par puissance
|
||||||
diff_move1 += 1;
|
diff_move1 += 1;
|
||||||
}
|
}
|
||||||
|
let dice_order = diff_move1 == dice.values.0 as usize;
|
||||||
|
|
||||||
|
let checker1 = state.board.get_field_checker(color, from1) as usize;
|
||||||
|
let mut tmp_board = state.board.clone();
|
||||||
|
// should not raise an error for a valid action
|
||||||
|
tmp_board.move_checker(color, *move1);
|
||||||
|
let checker2 = tmp_board.get_field_checker(color, from2) as usize;
|
||||||
TrictracAction::Move {
|
TrictracAction::Move {
|
||||||
dice_order: diff_move1 == dice.values.0 as usize,
|
dice_order,
|
||||||
from1: move1.get_from(),
|
checker1,
|
||||||
from2: move2.get_from(),
|
checker2,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -235,8 +246,8 @@ mod tests {
|
||||||
fn to_action_index() {
|
fn to_action_index() {
|
||||||
let action = TrictracAction::Move {
|
let action = TrictracAction::Move {
|
||||||
dice_order: true,
|
dice_order: true,
|
||||||
from1: 3,
|
checker1: 3,
|
||||||
from2: 4,
|
checker2: 4,
|
||||||
};
|
};
|
||||||
let index = action.to_action_index();
|
let index = action.to_action_index();
|
||||||
assert_eq!(Some(action), TrictracAction::from_action_index(index));
|
assert_eq!(Some(action), TrictracAction::from_action_index(index));
|
||||||
|
|
@ -247,8 +258,8 @@ mod tests {
|
||||||
fn from_action_index() {
|
fn from_action_index() {
|
||||||
let action = TrictracAction::Move {
|
let action = TrictracAction::Move {
|
||||||
dice_order: true,
|
dice_order: true,
|
||||||
from1: 3,
|
checker1: 3,
|
||||||
from2: 4,
|
checker2: 4,
|
||||||
};
|
};
|
||||||
assert_eq!(Some(action), TrictracAction::from_action_index(81));
|
assert_eq!(Some(action), TrictracAction::from_action_index(81));
|
||||||
}
|
}
|
||||||
|
|
|
||||||
255
bot/src/dqn/dqn_common_big.rs
Normal file
255
bot/src/dqn/dqn_common_big.rs
Normal file
|
|
@ -0,0 +1,255 @@
|
||||||
|
use std::cmp::{max, min};
|
||||||
|
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
use store::{CheckerMove, Dice};
|
||||||
|
|
||||||
|
/// Types d'actions possibles dans le jeu
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||||
|
pub enum TrictracAction {
|
||||||
|
/// Lancer les dés
|
||||||
|
Roll,
|
||||||
|
/// Continuer après avoir gagné un trou
|
||||||
|
Go,
|
||||||
|
/// Effectuer un mouvement de pions
|
||||||
|
Move {
|
||||||
|
dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
|
||||||
|
from1: usize, // position de départ du premier pion (0-24)
|
||||||
|
from2: usize, // position de départ du deuxième pion (0-24)
|
||||||
|
},
|
||||||
|
// Marquer les points : à activer si support des écoles
|
||||||
|
// Mark,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl TrictracAction {
|
||||||
|
/// Encode une action en index pour le réseau de neurones
|
||||||
|
pub fn to_action_index(&self) -> usize {
|
||||||
|
match self {
|
||||||
|
TrictracAction::Roll => 0,
|
||||||
|
TrictracAction::Go => 1,
|
||||||
|
TrictracAction::Move {
|
||||||
|
dice_order,
|
||||||
|
from1,
|
||||||
|
from2,
|
||||||
|
} => {
|
||||||
|
// Encoder les mouvements dans l'espace d'actions
|
||||||
|
// Indices 2+ pour les mouvements
|
||||||
|
// de 2 à 1251 (2 à 626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier)
|
||||||
|
let mut start = 2;
|
||||||
|
if !dice_order {
|
||||||
|
// 25 * 25 = 625
|
||||||
|
start += 625;
|
||||||
|
}
|
||||||
|
start + from1 * 25 + from2
|
||||||
|
} // TrictracAction::Mark => 1252,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Décode un index d'action en TrictracAction
|
||||||
|
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
||||||
|
match index {
|
||||||
|
0 => Some(TrictracAction::Roll),
|
||||||
|
// 1252 => Some(TrictracAction::Mark),
|
||||||
|
1 => Some(TrictracAction::Go),
|
||||||
|
i if i >= 3 => {
|
||||||
|
let move_code = i - 3;
|
||||||
|
let (dice_order, from1, from2) = Self::decode_move(move_code);
|
||||||
|
Some(TrictracAction::Move {
|
||||||
|
dice_order,
|
||||||
|
from1,
|
||||||
|
from2,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
_ => None,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Décode un entier en paire de mouvements
|
||||||
|
fn decode_move(code: usize) -> (bool, usize, usize) {
|
||||||
|
let mut encoded = code;
|
||||||
|
let dice_order = code < 626;
|
||||||
|
if !dice_order {
|
||||||
|
encoded -= 625
|
||||||
|
}
|
||||||
|
let from1 = encoded / 25;
|
||||||
|
let from2 = 1 + encoded % 25;
|
||||||
|
(dice_order, from1, from2)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retourne la taille de l'espace d'actions total
|
||||||
|
pub fn action_space_size() -> usize {
|
||||||
|
// 1 (Roll) + 1 (Go) + mouvements possibles
|
||||||
|
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
||||||
|
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
||||||
|
2 + (2 * 25 * 25) // = 1252
|
||||||
|
}
|
||||||
|
|
||||||
|
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
||||||
|
// match action {
|
||||||
|
// TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
|
||||||
|
// TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
|
||||||
|
// TrictracAction::Go => Some(GameEvent::Go { player_id }),
|
||||||
|
// TrictracAction::Move {
|
||||||
|
// dice_order,
|
||||||
|
// from1,
|
||||||
|
// from2,
|
||||||
|
// } => {
|
||||||
|
// // Effectuer un mouvement
|
||||||
|
// let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
||||||
|
// let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
||||||
|
//
|
||||||
|
// Some(GameEvent::Move {
|
||||||
|
// player_id: self.agent_player_id,
|
||||||
|
// moves: (checker_move1, checker_move2),
|
||||||
|
// })
|
||||||
|
// }
|
||||||
|
// };
|
||||||
|
// }
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Obtient les actions valides pour l'état de jeu actuel
|
||||||
|
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
|
use store::TurnStage;
|
||||||
|
|
||||||
|
let mut valid_actions = Vec::new();
|
||||||
|
|
||||||
|
let active_player_id = game_state.active_player_id;
|
||||||
|
let player_color = game_state.player_color_by_id(&active_player_id);
|
||||||
|
|
||||||
|
if let Some(color) = player_color {
|
||||||
|
match game_state.turn_stage {
|
||||||
|
TurnStage::RollDice | TurnStage::RollWaiting => {
|
||||||
|
valid_actions.push(TrictracAction::Roll);
|
||||||
|
}
|
||||||
|
TurnStage::MarkPoints | TurnStage::MarkAdvPoints => {
|
||||||
|
// valid_actions.push(TrictracAction::Mark);
|
||||||
|
}
|
||||||
|
TurnStage::HoldOrGoChoice => {
|
||||||
|
valid_actions.push(TrictracAction::Go);
|
||||||
|
|
||||||
|
// Ajoute aussi les mouvements possibles
|
||||||
|
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||||
|
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
|
||||||
|
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||||
|
assert_eq!(color, store::Color::White);
|
||||||
|
for (move1, move2) in possible_moves {
|
||||||
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
|
&move1,
|
||||||
|
&move2,
|
||||||
|
&game_state.dice,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
TurnStage::Move => {
|
||||||
|
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||||
|
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
|
||||||
|
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||||
|
assert_eq!(color, store::Color::White);
|
||||||
|
for (move1, move2) in possible_moves {
|
||||||
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
|
&move1,
|
||||||
|
&move2,
|
||||||
|
&game_state.dice,
|
||||||
|
));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
valid_actions
|
||||||
|
}
|
||||||
|
|
||||||
|
// Valid only for White player
|
||||||
|
fn checker_moves_to_trictrac_action(
|
||||||
|
move1: &CheckerMove,
|
||||||
|
move2: &CheckerMove,
|
||||||
|
dice: &Dice,
|
||||||
|
) -> TrictracAction {
|
||||||
|
let to1 = move1.get_to();
|
||||||
|
let to2 = move2.get_to();
|
||||||
|
let from1 = move1.get_from();
|
||||||
|
let from2 = move2.get_from();
|
||||||
|
|
||||||
|
let mut diff_move1 = if to1 > 0 {
|
||||||
|
// Mouvement sans sortie
|
||||||
|
to1 - from1
|
||||||
|
} else {
|
||||||
|
// sortie, on utilise la valeur du dé
|
||||||
|
if to2 > 0 {
|
||||||
|
// sortie pour le mouvement 1 uniquement
|
||||||
|
let dice2 = to2 - from2;
|
||||||
|
if dice2 == dice.values.0 as usize {
|
||||||
|
dice.values.1 as usize
|
||||||
|
} else {
|
||||||
|
dice.values.0 as usize
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// double sortie
|
||||||
|
if from1 < from2 {
|
||||||
|
max(dice.values.0, dice.values.1) as usize
|
||||||
|
} else {
|
||||||
|
min(dice.values.0, dice.values.1) as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// modification de diff_move1 si on est dans le cas d'un mouvement par puissance
|
||||||
|
let rest_field = 12;
|
||||||
|
if to1 == rest_field
|
||||||
|
&& to2 == rest_field
|
||||||
|
&& max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field
|
||||||
|
{
|
||||||
|
// prise par puissance
|
||||||
|
diff_move1 += 1;
|
||||||
|
}
|
||||||
|
TrictracAction::Move {
|
||||||
|
dice_order: diff_move1 == dice.values.0 as usize,
|
||||||
|
from1: move1.get_from(),
|
||||||
|
from2: move2.get_from(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Retourne les indices des actions valides
|
||||||
|
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
||||||
|
get_valid_actions(game_state)
|
||||||
|
.into_iter()
|
||||||
|
.map(|action| action.to_action_index())
|
||||||
|
.collect()
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Sélectionne une action valide aléatoire
|
||||||
|
pub fn sample_valid_action(game_state: &crate::GameState) -> Option<TrictracAction> {
|
||||||
|
use rand::{seq::SliceRandom, thread_rng};
|
||||||
|
|
||||||
|
let valid_actions = get_valid_actions(game_state);
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
valid_actions.choose(&mut rng).cloned()
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn to_action_index() {
|
||||||
|
let action = TrictracAction::Move {
|
||||||
|
dice_order: true,
|
||||||
|
from1: 3,
|
||||||
|
from2: 4,
|
||||||
|
};
|
||||||
|
let index = action.to_action_index();
|
||||||
|
assert_eq!(Some(action), TrictracAction::from_action_index(index));
|
||||||
|
assert_eq!(81, index);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn from_action_index() {
|
||||||
|
let action = TrictracAction::Move {
|
||||||
|
dice_order: true,
|
||||||
|
from1: 3,
|
||||||
|
from2: 4,
|
||||||
|
};
|
||||||
|
assert_eq!(Some(action), TrictracAction::from_action_index(81));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
@ -1,5 +1,6 @@
|
||||||
pub mod burnrl;
|
pub mod burnrl;
|
||||||
pub mod dqn_common;
|
pub mod dqn_common;
|
||||||
|
pub mod dqn_common_big;
|
||||||
pub mod simple;
|
pub mod simple;
|
||||||
|
|
||||||
pub mod burnrl_valid;
|
pub mod burnrl_valid;
|
||||||
|
|
|
||||||
|
|
@ -6,7 +6,7 @@ use std::collections::VecDeque;
|
||||||
use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage};
|
use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage};
|
||||||
|
|
||||||
use super::dqn_model::{DqnConfig, SimpleNeuralNetwork};
|
use super::dqn_model::{DqnConfig, SimpleNeuralNetwork};
|
||||||
use crate::dqn::dqn_common::{get_valid_actions, TrictracAction};
|
use crate::dqn::dqn_common_big::{get_valid_actions, TrictracAction};
|
||||||
|
|
||||||
/// Expérience pour le buffer de replay
|
/// Expérience pour le buffer de replay
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|
|
||||||
|
|
@ -3,7 +3,7 @@ use log::info;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use store::MoveRules;
|
use store::MoveRules;
|
||||||
|
|
||||||
use crate::dqn::dqn_common::{get_valid_actions, sample_valid_action, TrictracAction};
|
use crate::dqn::dqn_common_big::{get_valid_actions, sample_valid_action, TrictracAction};
|
||||||
use crate::dqn::simple::dqn_model::SimpleNeuralNetwork;
|
use crate::dqn::simple::dqn_model::SimpleNeuralNetwork;
|
||||||
|
|
||||||
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
||||||
|
|
|
||||||
|
|
@ -117,8 +117,8 @@ impl BotStrategy for DqnBurnStrategy {
|
||||||
// Utiliser le DQN pour choisir le mouvement
|
// Utiliser le DQN pour choisir le mouvement
|
||||||
if let Some(TrictracAction::Move {
|
if let Some(TrictracAction::Move {
|
||||||
dice_order,
|
dice_order,
|
||||||
from1,
|
checker1,
|
||||||
from2,
|
checker2,
|
||||||
}) = self.get_dqn_action()
|
}) = self.get_dqn_action()
|
||||||
{
|
{
|
||||||
let dicevals = self.game.dice.values;
|
let dicevals = self.game.dice.values;
|
||||||
|
|
@ -128,15 +128,33 @@ impl BotStrategy for DqnBurnStrategy {
|
||||||
(dicevals.1, dicevals.0)
|
(dicevals.1, dicevals.0)
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let from1 = self
|
||||||
|
.game
|
||||||
|
.board
|
||||||
|
.get_checker_field(&self.color, checker1 as u8)
|
||||||
|
.unwrap_or(0);
|
||||||
|
|
||||||
if from1 == 0 {
|
if from1 == 0 {
|
||||||
// empty move
|
// empty move
|
||||||
dice1 = 0;
|
dice1 = 0;
|
||||||
}
|
}
|
||||||
let mut to1 = from1 + dice1 as usize;
|
let mut to1 = if self.color == Color::White {
|
||||||
if 24 < to1 {
|
from1 + dice1 as usize
|
||||||
|
} else {
|
||||||
|
from1 - dice1 as usize
|
||||||
|
};
|
||||||
|
if 24 < to1 || to1 < 0 {
|
||||||
// sortie
|
// sortie
|
||||||
to1 = 0;
|
to1 = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
|
|
||||||
|
let mut tmp_board = self.game.board.clone();
|
||||||
|
tmp_board.move_checker(&self.color, checker_move1);
|
||||||
|
let from2 = tmp_board
|
||||||
|
.get_checker_field(&self.color, checker2 as u8)
|
||||||
|
.unwrap_or(0);
|
||||||
if from2 == 0 {
|
if from2 == 0 {
|
||||||
// empty move
|
// empty move
|
||||||
dice2 = 0;
|
dice2 = 0;
|
||||||
|
|
@ -147,6 +165,13 @@ impl BotStrategy for DqnBurnStrategy {
|
||||||
to2 = 0;
|
to2 = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Gestion prise de coin par puissance
|
||||||
|
let opp_rest_field = 13;
|
||||||
|
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||||
|
to1 -= 1;
|
||||||
|
to2 -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -158,6 +158,42 @@ impl Board {
|
||||||
.unsigned_abs()
|
.unsigned_abs()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// get the number of the last checker in a field
|
||||||
|
pub fn get_field_checker(&self, color: &Color, field: Field) -> u8 {
|
||||||
|
assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror
|
||||||
|
let mut total_count: u8 = 0;
|
||||||
|
for (i, checker_count) in self.positions.iter().enumerate() {
|
||||||
|
// count white checkers (checker_count > 0)
|
||||||
|
if *checker_count > 0 {
|
||||||
|
total_count += *checker_count as u8;
|
||||||
|
if field == i + 1 {
|
||||||
|
return total_count;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
0
|
||||||
|
}
|
||||||
|
|
||||||
|
// get the field of the nth checker
|
||||||
|
pub fn get_checker_field(&self, color: &Color, checker_pos: u8) -> Option<Field> {
|
||||||
|
assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror
|
||||||
|
if checker_pos == 0 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let mut total_count: u8 = 0;
|
||||||
|
for (i, checker_count) in self.positions.iter().enumerate() {
|
||||||
|
// count white checkers (checker_count > 0)
|
||||||
|
if *checker_count > 0 {
|
||||||
|
total_count += *checker_count as u8;
|
||||||
|
}
|
||||||
|
// return the current field if it contains the checker
|
||||||
|
if checker_pos <= total_count {
|
||||||
|
return Some(i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
None
|
||||||
|
}
|
||||||
|
|
||||||
pub fn to_vec(&self) -> Vec<i8> {
|
pub fn to_vec(&self) -> Vec<i8> {
|
||||||
self.positions.to_vec()
|
self.positions.to_vec()
|
||||||
}
|
}
|
||||||
|
|
@ -721,4 +757,32 @@ mod tests {
|
||||||
);
|
);
|
||||||
assert_eq!(vec![2], board.get_quarter_filling_candidate(Color::White));
|
assert_eq!(vec![2], board.get_quarter_filling_candidate(Color::White));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn get_checker_field() {
|
||||||
|
let mut board = Board::new();
|
||||||
|
board.set_positions(
|
||||||
|
&Color::White,
|
||||||
|
[
|
||||||
|
3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
assert_eq!(None, board.get_checker_field(&Color::White, 0));
|
||||||
|
assert_eq!(Some(3), board.get_checker_field(&Color::White, 5));
|
||||||
|
assert_eq!(Some(3), board.get_checker_field(&Color::White, 6));
|
||||||
|
assert_eq!(None, board.get_checker_field(&Color::White, 14));
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn get_field_checker() {
|
||||||
|
let mut board = Board::new();
|
||||||
|
board.set_positions(
|
||||||
|
&Color::White,
|
||||||
|
[
|
||||||
|
3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||||
|
],
|
||||||
|
);
|
||||||
|
assert_eq!(4, board.get_field_checker(&Color::White, 2));
|
||||||
|
assert_eq!(6, board.get_field_checker(&Color::White, 3));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue