dqn trainer
This commit is contained in:
parent
7507ea5d78
commit
dc197fbc6f
|
|
@ -1,4 +1,4 @@
|
||||||
use std::cmp::max;
|
use std::cmp::{max, min};
|
||||||
|
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use store::{CheckerMove, Dice, GameEvent, PlayerId};
|
use store::{CheckerMove, Dice, GameEvent, PlayerId};
|
||||||
|
|
@ -8,8 +8,6 @@ use store::{CheckerMove, Dice, GameEvent, PlayerId};
|
||||||
pub enum TrictracAction {
|
pub enum TrictracAction {
|
||||||
/// Lancer les dés
|
/// Lancer les dés
|
||||||
Roll,
|
Roll,
|
||||||
/// Marquer les points
|
|
||||||
Mark,
|
|
||||||
/// Continuer après avoir gagné un trou
|
/// Continuer après avoir gagné un trou
|
||||||
Go,
|
Go,
|
||||||
/// Effectuer un mouvement de pions
|
/// Effectuer un mouvement de pions
|
||||||
|
|
@ -18,6 +16,8 @@ pub enum TrictracAction {
|
||||||
from1: usize, // position de départ du premier pion (0-24)
|
from1: usize, // position de départ du premier pion (0-24)
|
||||||
from2: usize, // position de départ du deuxième pion (0-24)
|
from2: usize, // position de départ du deuxième pion (0-24)
|
||||||
},
|
},
|
||||||
|
// Marquer les points : à activer si support des écoles
|
||||||
|
// Mark,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl TrictracAction {
|
impl TrictracAction {
|
||||||
|
|
@ -25,22 +25,22 @@ impl TrictracAction {
|
||||||
pub fn to_action_index(&self) -> usize {
|
pub fn to_action_index(&self) -> usize {
|
||||||
match self {
|
match self {
|
||||||
TrictracAction::Roll => 0,
|
TrictracAction::Roll => 0,
|
||||||
TrictracAction::Mark => 1,
|
TrictracAction::Go => 1,
|
||||||
TrictracAction::Go => 2,
|
|
||||||
TrictracAction::Move {
|
TrictracAction::Move {
|
||||||
dice_order,
|
dice_order,
|
||||||
from1,
|
from1,
|
||||||
from2,
|
from2,
|
||||||
} => {
|
} => {
|
||||||
// Encoder les mouvements dans l'espace d'actions
|
// Encoder les mouvements dans l'espace d'actions
|
||||||
// Indices 3+ pour les mouvements
|
// Indices 2+ pour les mouvements
|
||||||
let mut start = 3;
|
// de 2 à 1251 (2 à 626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier)
|
||||||
|
let mut start = 2;
|
||||||
if !dice_order {
|
if !dice_order {
|
||||||
// 25 * 25 = 625
|
// 25 * 25 = 625
|
||||||
start += 625;
|
start += 625;
|
||||||
}
|
}
|
||||||
start + from1 * 25 + from2
|
start + from1 * 25 + from2
|
||||||
}
|
} // TrictracAction::Mark => 1252,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -48,8 +48,8 @@ impl TrictracAction {
|
||||||
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
||||||
match index {
|
match index {
|
||||||
0 => Some(TrictracAction::Roll),
|
0 => Some(TrictracAction::Roll),
|
||||||
1 => Some(TrictracAction::Mark),
|
// 1252 => Some(TrictracAction::Mark),
|
||||||
2 => Some(TrictracAction::Go),
|
1 => Some(TrictracAction::Go),
|
||||||
i if i >= 3 => {
|
i if i >= 3 => {
|
||||||
let move_code = i - 3;
|
let move_code = i - 3;
|
||||||
let (dice_order, from1, from2) = Self::decode_move(move_code);
|
let (dice_order, from1, from2) = Self::decode_move(move_code);
|
||||||
|
|
@ -77,10 +77,10 @@ impl TrictracAction {
|
||||||
|
|
||||||
/// Retourne la taille de l'espace d'actions total
|
/// Retourne la taille de l'espace d'actions total
|
||||||
pub fn action_space_size() -> usize {
|
pub fn action_space_size() -> usize {
|
||||||
// 1 (Roll) + 1 (Mark) + 1 (Go) + mouvements possibles
|
// 1 (Roll) + 1 (Go) + mouvements possibles
|
||||||
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
||||||
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
||||||
3 + (2 * 25 * 25) // = 1253
|
2 + (2 * 25 * 25) // = 1252
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
||||||
|
|
@ -273,35 +273,37 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
valid_actions.push(TrictracAction::Roll);
|
valid_actions.push(TrictracAction::Roll);
|
||||||
}
|
}
|
||||||
TurnStage::MarkPoints | TurnStage::MarkAdvPoints => {
|
TurnStage::MarkPoints | TurnStage::MarkAdvPoints => {
|
||||||
valid_actions.push(TrictracAction::Mark);
|
// valid_actions.push(TrictracAction::Mark);
|
||||||
}
|
}
|
||||||
TurnStage::HoldOrGoChoice => {
|
TurnStage::HoldOrGoChoice => {
|
||||||
valid_actions.push(TrictracAction::Go);
|
valid_actions.push(TrictracAction::Go);
|
||||||
|
|
||||||
// Ajouter aussi les mouvements possibles
|
// Ajoute aussi les mouvements possibles
|
||||||
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
|
||||||
|
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||||
|
assert_eq!(color, store::Color::White);
|
||||||
for (move1, move2) in possible_moves {
|
for (move1, move2) in possible_moves {
|
||||||
let diff_move1 = move1.get_to() - move1.get_from();
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
valid_actions.push(TrictracAction::Move {
|
&move1,
|
||||||
dice_order: diff_move1 == game_state.dice.values.0 as usize,
|
&move2,
|
||||||
from1: move1.get_from(),
|
&game_state.dice,
|
||||||
from2: move2.get_from(),
|
));
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::Move => {
|
TurnStage::Move => {
|
||||||
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
|
||||||
|
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||||
|
assert_eq!(color, store::Color::White);
|
||||||
for (move1, move2) in possible_moves {
|
for (move1, move2) in possible_moves {
|
||||||
let diff_move1 = move1.get_to() - move1.get_from();
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
valid_actions.push(TrictracAction::Move {
|
&move1,
|
||||||
dice_order: diff_move1 == game_state.dice.values.0 as usize,
|
&move2,
|
||||||
from1: move1.get_from(),
|
&game_state.dice,
|
||||||
from2: move2.get_from(),
|
));
|
||||||
});
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -310,6 +312,56 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
valid_actions
|
valid_actions
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Valid only for White player
|
||||||
|
fn checker_moves_to_trictrac_action(
|
||||||
|
move1: &CheckerMove,
|
||||||
|
move2: &CheckerMove,
|
||||||
|
dice: &Dice,
|
||||||
|
) -> TrictracAction {
|
||||||
|
let to1 = move1.get_to();
|
||||||
|
let to2 = move2.get_to();
|
||||||
|
let from1 = move1.get_from();
|
||||||
|
let from2 = move2.get_from();
|
||||||
|
|
||||||
|
let mut diff_move1 = if to1 > 0 {
|
||||||
|
// Mouvement sans sortie
|
||||||
|
to1 - from1
|
||||||
|
} else {
|
||||||
|
// sortie, on utilise la valeur du dé
|
||||||
|
if to2 > 0 {
|
||||||
|
// sortie pour le mouvement 1 uniquement
|
||||||
|
let dice2 = to2 - from2;
|
||||||
|
if dice2 == dice.values.0 as usize {
|
||||||
|
dice.values.1 as usize
|
||||||
|
} else {
|
||||||
|
dice.values.0 as usize
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// double sortie
|
||||||
|
if from1 < from2 {
|
||||||
|
max(dice.values.0, dice.values.1) as usize
|
||||||
|
} else {
|
||||||
|
min(dice.values.0, dice.values.1) as usize
|
||||||
|
}
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
// modification de diff_move1 si on est dans le cas d'un mouvement par puissance
|
||||||
|
let rest_field = 12;
|
||||||
|
if to1 == rest_field
|
||||||
|
&& to2 == rest_field
|
||||||
|
&& max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field
|
||||||
|
{
|
||||||
|
// prise par puissance
|
||||||
|
diff_move1 += 1;
|
||||||
|
}
|
||||||
|
TrictracAction::Move {
|
||||||
|
dice_order: diff_move1 == dice.values.0 as usize,
|
||||||
|
from1: move1.get_from(),
|
||||||
|
from2: move2.get_from(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/// Retourne les indices des actions valides
|
/// Retourne les indices des actions valides
|
||||||
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
||||||
get_valid_actions(game_state)
|
get_valid_actions(game_state)
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use crate::{Color, GameState, PlayerId};
|
use crate::{CheckerMove, Color, GameState, PlayerId};
|
||||||
use rand::prelude::SliceRandom;
|
use rand::prelude::SliceRandom;
|
||||||
use rand::{thread_rng, Rng};
|
use rand::{thread_rng, Rng};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
@ -251,14 +251,15 @@ impl TrictracEnv {
|
||||||
player_id: self.agent_player_id,
|
player_id: self.agent_player_id,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
TrictracAction::Mark { points } => {
|
// TrictracAction::Mark => {
|
||||||
// Marquer des points
|
// // Marquer des points
|
||||||
reward += 0.1 * points as f32;
|
// let points = self.game_state.
|
||||||
Some(GameEvent::Mark {
|
// reward += 0.1 * points as f32;
|
||||||
player_id: self.agent_player_id,
|
// Some(GameEvent::Mark {
|
||||||
points,
|
// player_id: self.agent_player_id,
|
||||||
})
|
// points,
|
||||||
}
|
// })
|
||||||
|
// }
|
||||||
TrictracAction::Go => {
|
TrictracAction::Go => {
|
||||||
// Continuer après avoir gagné un trou
|
// Continuer après avoir gagné un trou
|
||||||
reward += 0.2;
|
reward += 0.2;
|
||||||
|
|
@ -272,8 +273,23 @@ impl TrictracEnv {
|
||||||
from2,
|
from2,
|
||||||
} => {
|
} => {
|
||||||
// Effectuer un mouvement
|
// Effectuer un mouvement
|
||||||
let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
let (dice1, dice2) = if dice_order {
|
||||||
let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
(self.game_state.dice.values.0, self.game_state.dice.values.1)
|
||||||
|
} else {
|
||||||
|
(self.game_state.dice.values.1, self.game_state.dice.values.0)
|
||||||
|
};
|
||||||
|
let mut to1 = from1 + dice1 as usize;
|
||||||
|
let mut to2 = from2 + dice2 as usize;
|
||||||
|
|
||||||
|
// Gestion prise de coin par puissance
|
||||||
|
let opp_rest_field = 13;
|
||||||
|
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||||
|
to1 -= 1;
|
||||||
|
to2 -= 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
|
let checker_move2 = store::CheckerMove::new(from2, to2).unwrap_or_default();
|
||||||
|
|
||||||
reward += 0.2;
|
reward += 0.2;
|
||||||
Some(GameEvent::Move {
|
Some(GameEvent::Move {
|
||||||
|
|
@ -360,7 +376,9 @@ impl TrictracEnv {
|
||||||
|
|
||||||
// Stratégie simple : choix aléatoire
|
// Stratégie simple : choix aléatoire
|
||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
let choosen_move = *possible_moves.choose(&mut rng).unwrap();
|
let choosen_move = *possible_moves
|
||||||
|
.choose(&mut rng)
|
||||||
|
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||||
|
|
||||||
GameEvent::Move {
|
GameEvent::Move {
|
||||||
player_id: self.opponent_player_id,
|
player_id: self.opponent_player_id,
|
||||||
|
|
@ -443,7 +461,6 @@ impl DqnTrainer {
|
||||||
for episode in 1..=episodes {
|
for episode in 1..=episodes {
|
||||||
let reward = self.train_episode();
|
let reward = self.train_episode();
|
||||||
|
|
||||||
print!(".");
|
|
||||||
if episode % 100 == 0 {
|
if episode % 100 == 0 {
|
||||||
println!(
|
println!(
|
||||||
"Épisode {}/{}: Récompense = {:.2}, Epsilon = {:.3}, Steps = {}",
|
"Épisode {}/{}: Récompense = {:.2}, Epsilon = {:.3}, Steps = {}",
|
||||||
|
|
|
||||||
|
|
@ -757,6 +757,7 @@ mod tests {
|
||||||
#[test]
|
#[test]
|
||||||
fn hold_or_go() {
|
fn hold_or_go() {
|
||||||
let mut game_state = init_test_gamestate(TurnStage::MarkPoints);
|
let mut game_state = init_test_gamestate(TurnStage::MarkPoints);
|
||||||
|
game_state.schools_enabled = true;
|
||||||
let pid = game_state.active_player_id;
|
let pid = game_state.active_player_id;
|
||||||
game_state.consume(
|
game_state.consume(
|
||||||
&(GameEvent::Mark {
|
&(GameEvent::Mark {
|
||||||
|
|
@ -782,6 +783,7 @@ mod tests {
|
||||||
|
|
||||||
// Hold
|
// Hold
|
||||||
let mut game_state = init_test_gamestate(TurnStage::MarkPoints);
|
let mut game_state = init_test_gamestate(TurnStage::MarkPoints);
|
||||||
|
game_state.schools_enabled = true;
|
||||||
let pid = game_state.active_player_id;
|
let pid = game_state.active_player_id;
|
||||||
game_state.consume(
|
game_state.consume(
|
||||||
&(GameEvent::Mark {
|
&(GameEvent::Mark {
|
||||||
|
|
@ -802,6 +804,6 @@ mod tests {
|
||||||
assert_ne!(game_state.active_player_id, pid);
|
assert_ne!(game_state.active_player_id, pid);
|
||||||
assert_eq!(game_state.players.get(&pid).unwrap().points, 1);
|
assert_eq!(game_state.players.get(&pid).unwrap().points, 1);
|
||||||
assert_eq!(game_state.get_active_player().unwrap().points, 0);
|
assert_eq!(game_state.get_active_player().unwrap().points, 0);
|
||||||
assert_eq!(game_state.turn_stage, TurnStage::RollDice);
|
assert_eq!(game_state.turn_stage, TurnStage::MarkAdvPoints);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue