wip fix workflow
This commit is contained in:
parent
f6ec3ef5ae
commit
5f33737c1b
|
|
@ -36,18 +36,20 @@ impl BotStrategy for DefaultStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_points(&self) -> u8 {
|
fn calculate_points(&self) -> u8 {
|
||||||
let dice_roll_count = self
|
// let dice_roll_count = self
|
||||||
.get_game()
|
// .get_game()
|
||||||
.players
|
// .players
|
||||||
.get(&self.player_id)
|
// .get(&self.player_id)
|
||||||
.unwrap()
|
// .unwrap()
|
||||||
.dice_roll_count;
|
// .dice_roll_count;
|
||||||
let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
// let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
||||||
points_rules.get_points(dice_roll_count).0
|
// points_rules.get_points(dice_roll_count).0
|
||||||
|
self.game.dice_points.0
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_adv_points(&self) -> u8 {
|
fn calculate_adv_points(&self) -> u8 {
|
||||||
self.calculate_points()
|
// self.calculate_points()
|
||||||
|
self.game.dice_points.1
|
||||||
}
|
}
|
||||||
|
|
||||||
fn choose_go(&self) -> bool {
|
fn choose_go(&self) -> bool {
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,9 @@ use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId, PointsRules};
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use store::MoveRules;
|
use store::MoveRules;
|
||||||
|
|
||||||
use super::dqn_common::{SimpleNeuralNetwork, TrictracAction, get_valid_actions, sample_valid_action};
|
use super::dqn_common::{
|
||||||
|
get_valid_actions, sample_valid_action, SimpleNeuralNetwork, TrictracAction,
|
||||||
|
};
|
||||||
|
|
||||||
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
@ -42,18 +44,18 @@ impl DqnStrategy {
|
||||||
if let Some(ref model) = self.model {
|
if let Some(ref model) = self.model {
|
||||||
let state = self.game.to_vec_float();
|
let state = self.game.to_vec_float();
|
||||||
let valid_actions = get_valid_actions(&self.game);
|
let valid_actions = get_valid_actions(&self.game);
|
||||||
|
|
||||||
if valid_actions.is_empty() {
|
if valid_actions.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Obtenir les Q-values pour toutes les actions
|
// Obtenir les Q-values pour toutes les actions
|
||||||
let q_values = model.forward(&state);
|
let q_values = model.forward(&state);
|
||||||
|
|
||||||
// Trouver la meilleure action valide
|
// Trouver la meilleure action valide
|
||||||
let mut best_action = &valid_actions[0];
|
let mut best_action = &valid_actions[0];
|
||||||
let mut best_q_value = f32::NEG_INFINITY;
|
let mut best_q_value = f32::NEG_INFINITY;
|
||||||
|
|
||||||
for action in &valid_actions {
|
for action in &valid_actions {
|
||||||
let action_index = action.to_action_index();
|
let action_index = action.to_action_index();
|
||||||
if action_index < q_values.len() {
|
if action_index < q_values.len() {
|
||||||
|
|
@ -64,7 +66,7 @@ impl DqnStrategy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Some(best_action.clone())
|
Some(best_action.clone())
|
||||||
} else {
|
} else {
|
||||||
// Fallback : action aléatoire valide
|
// Fallback : action aléatoire valide
|
||||||
|
|
@ -91,26 +93,11 @@ impl BotStrategy for DqnStrategy {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_points(&self) -> u8 {
|
fn calculate_points(&self) -> u8 {
|
||||||
// Utiliser le DQN pour choisir le nombre de points à marquer
|
self.game.dice_points.0
|
||||||
if let Some(action) = self.get_dqn_action() {
|
|
||||||
if let TrictracAction::Mark { points } = action {
|
|
||||||
return points;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Fallback : utiliser la méthode standard
|
|
||||||
let dice_roll_count = self
|
|
||||||
.get_game()
|
|
||||||
.players
|
|
||||||
.get(&self.player_id)
|
|
||||||
.unwrap()
|
|
||||||
.dice_roll_count;
|
|
||||||
let points_rules = PointsRules::new(&self.color, &self.game.board, self.game.dice);
|
|
||||||
points_rules.get_points(dice_roll_count).0
|
|
||||||
}
|
}
|
||||||
|
|
||||||
fn calculate_adv_points(&self) -> u8 {
|
fn calculate_adv_points(&self) -> u8 {
|
||||||
self.calculate_points()
|
self.game.dice_points.1
|
||||||
}
|
}
|
||||||
|
|
||||||
fn choose_go(&self) -> bool {
|
fn choose_go(&self) -> bool {
|
||||||
|
|
@ -126,24 +113,55 @@ impl BotStrategy for DqnStrategy {
|
||||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||||
// Utiliser le DQN pour choisir le mouvement
|
// Utiliser le DQN pour choisir le mouvement
|
||||||
if let Some(action) = self.get_dqn_action() {
|
if let Some(action) = self.get_dqn_action() {
|
||||||
if let TrictracAction::Move { move1, move2 } = action {
|
if let TrictracAction::Move {
|
||||||
let checker_move1 = CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
dice_order,
|
||||||
let checker_move2 = CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
from1,
|
||||||
|
from2,
|
||||||
|
} = action
|
||||||
|
{
|
||||||
|
let dicevals = self.game.dice.values;
|
||||||
|
let (mut dice1, mut dice2) = if dice_order {
|
||||||
|
(dicevals.0, dicevals.1)
|
||||||
|
} else {
|
||||||
|
(dicevals.1, dicevals.0)
|
||||||
|
};
|
||||||
|
|
||||||
|
if from1 == 0 {
|
||||||
|
// empty move
|
||||||
|
dice1 = 0;
|
||||||
|
}
|
||||||
|
let mut to1 = from1 + dice1 as usize;
|
||||||
|
if 24 < to1 {
|
||||||
|
// sortie
|
||||||
|
to1 = 0;
|
||||||
|
}
|
||||||
|
if from2 == 0 {
|
||||||
|
// empty move
|
||||||
|
dice2 = 0;
|
||||||
|
}
|
||||||
|
let mut to2 = from2 + dice2 as usize;
|
||||||
|
if 24 < to2 {
|
||||||
|
// sortie
|
||||||
|
to2 = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
|
let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
||||||
|
|
||||||
let chosen_move = if self.color == Color::White {
|
let chosen_move = if self.color == Color::White {
|
||||||
(checker_move1, checker_move2)
|
(checker_move1, checker_move2)
|
||||||
} else {
|
} else {
|
||||||
(checker_move1.mirror(), checker_move2.mirror())
|
(checker_move1.mirror(), checker_move2.mirror())
|
||||||
};
|
};
|
||||||
|
|
||||||
return chosen_move;
|
return chosen_move;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Fallback : utiliser la stratégie par défaut
|
// Fallback : utiliser la stratégie par défaut
|
||||||
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
||||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
|
||||||
let chosen_move = *possible_moves
|
let chosen_move = *possible_moves
|
||||||
.first()
|
.first()
|
||||||
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||||
|
|
@ -155,4 +173,3 @@ impl BotStrategy for DqnStrategy {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ impl TrictracAction {
|
||||||
2 => Some(TrictracAction::Go),
|
2 => Some(TrictracAction::Go),
|
||||||
i if i >= 3 => {
|
i if i >= 3 => {
|
||||||
let move_code = i - 3;
|
let move_code = i - 3;
|
||||||
let (dice_order, from1, from2) = decode_move(move_code);
|
let (dice_order, from1, from2) = Self::decode_move(move_code);
|
||||||
Some(TrictracAction::Move {
|
Some(TrictracAction::Move {
|
||||||
dice_order,
|
dice_order,
|
||||||
from1,
|
from1,
|
||||||
|
|
@ -83,28 +83,27 @@ impl TrictracAction {
|
||||||
3 + (2 * 25 * 25) // = 1253
|
3 + (2 * 25 * 25) // = 1253
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
||||||
match action {
|
// match action {
|
||||||
TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
|
// TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
|
||||||
TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
|
// TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
|
||||||
TrictracAction::Go => Some(GameEvent::Go { player_id }),
|
// TrictracAction::Go => Some(GameEvent::Go { player_id }),
|
||||||
TrictracAction::Move {
|
// TrictracAction::Move {
|
||||||
dice_order,
|
// dice_order,
|
||||||
from1,
|
// from1,
|
||||||
from2,
|
// from2,
|
||||||
} => {
|
// } => {
|
||||||
// Effectuer un mouvement
|
// // Effectuer un mouvement
|
||||||
let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
// let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
||||||
let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
// let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
||||||
|
//
|
||||||
reward += 0.2;
|
// Some(GameEvent::Move {
|
||||||
Some(GameEvent::Move {
|
// player_id: self.agent_player_id,
|
||||||
player_id: self.agent_player_id,
|
// moves: (checker_move1, checker_move2),
|
||||||
moves: (checker_move1, checker_move2),
|
// })
|
||||||
})
|
// }
|
||||||
}
|
// };
|
||||||
};
|
// }
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Configuration pour l'agent DQN
|
/// Configuration pour l'agent DQN
|
||||||
|
|
|
||||||
25
doc/workflow.md
Normal file
25
doc/workflow.md
Normal file
|
|
@ -0,0 +1,25 @@
|
||||||
|
# Workflow
|
||||||
|
|
||||||
|
@startuml
|
||||||
|
|
||||||
|
state c <<choice>>
|
||||||
|
state haswon <<choice>>
|
||||||
|
state MarkPoints #lightblue
|
||||||
|
state MarkAdvPoints #lightblue
|
||||||
|
note right of MarkPoints : automatic 'Mark' transition\nwhen no school
|
||||||
|
note right of MarkAdvPoints : automatic 'Mark' transition\nwhen no school
|
||||||
|
|
||||||
|
[*] -> RollDice : BeginGame
|
||||||
|
RollDice --> RollWaiting : Roll (current player)
|
||||||
|
RollWaiting --> MarkPoints : RollResult (engine)
|
||||||
|
MarkPoints --> c : Mark (current player)
|
||||||
|
c --> HoldHorGoChoice : [new hole]
|
||||||
|
c --> [*] : [has won]
|
||||||
|
c --> Move : [not new hole]
|
||||||
|
HoldHorGoChoice --> RollDice : Go
|
||||||
|
HoldHorGoChoice --> MarkAdvPoints : Move
|
||||||
|
Move --> MarkAdvPoints : Move
|
||||||
|
MarkAdvPoints --> haswon : Mark (adversary)
|
||||||
|
haswon --> RollDice : [has not won]
|
||||||
|
haswon --> [*] : [has won]
|
||||||
|
@enduml
|
||||||
|
|
@ -71,7 +71,7 @@ pub struct GameState {
|
||||||
/// last dice pair rolled
|
/// last dice pair rolled
|
||||||
pub dice: Dice,
|
pub dice: Dice,
|
||||||
/// players points computed for the last dice pair rolled
|
/// players points computed for the last dice pair rolled
|
||||||
dice_points: (u8, u8),
|
pub dice_points: (u8, u8),
|
||||||
pub dice_moves: (CheckerMove, CheckerMove),
|
pub dice_moves: (CheckerMove, CheckerMove),
|
||||||
pub dice_jans: PossibleJans,
|
pub dice_jans: PossibleJans,
|
||||||
/// true if player needs to roll first
|
/// true if player needs to roll first
|
||||||
|
|
@ -505,13 +505,7 @@ impl GameState {
|
||||||
self.players.remove(player_id);
|
self.players.remove(player_id);
|
||||||
}
|
}
|
||||||
Roll { player_id: _ } => {
|
Roll { player_id: _ } => {
|
||||||
// Opponent has moved, we can mark pending points earned during opponent's turn
|
self.turn_stage = TurnStage::RollWaiting;
|
||||||
let new_hole = self.mark_points(self.active_player_id, self.dice_points.1);
|
|
||||||
if new_hole && self.get_active_player().unwrap().holes > 12 {
|
|
||||||
self.stage = Stage::Ended;
|
|
||||||
} else {
|
|
||||||
self.turn_stage = TurnStage::RollWaiting;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
RollResult { player_id: _, dice } => {
|
RollResult { player_id: _, dice } => {
|
||||||
self.dice = *dice;
|
self.dice = *dice;
|
||||||
|
|
@ -534,23 +528,25 @@ impl GameState {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Mark { player_id, points } => {
|
Mark { player_id, points } => {
|
||||||
let new_hole = self.mark_points(*player_id, *points);
|
if self.schools_enabled {
|
||||||
if new_hole {
|
let new_hole = self.mark_points(*player_id, *points);
|
||||||
if self.get_active_player().unwrap().holes > 12 {
|
if new_hole {
|
||||||
self.stage = Stage::Ended;
|
if self.get_active_player().unwrap().holes > 12 {
|
||||||
|
self.stage = Stage::Ended;
|
||||||
|
} else {
|
||||||
|
self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints {
|
||||||
|
TurnStage::RollDice
|
||||||
|
} else {
|
||||||
|
TurnStage::HoldOrGoChoice
|
||||||
|
};
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints {
|
self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints {
|
||||||
TurnStage::RollDice
|
TurnStage::RollDice
|
||||||
} else {
|
} else {
|
||||||
TurnStage::HoldOrGoChoice
|
TurnStage::Move
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
} else {
|
|
||||||
self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints {
|
|
||||||
TurnStage::RollDice
|
|
||||||
} else {
|
|
||||||
TurnStage::Move
|
|
||||||
};
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Go { player_id: _ } => self.new_pick_up(),
|
Go { player_id: _ } => self.new_pick_up(),
|
||||||
|
|
@ -563,6 +559,11 @@ impl GameState {
|
||||||
self.turn_stage = if self.schools_enabled {
|
self.turn_stage = if self.schools_enabled {
|
||||||
TurnStage::MarkAdvPoints
|
TurnStage::MarkAdvPoints
|
||||||
} else {
|
} else {
|
||||||
|
// The player has moved, we can mark its opponent's points (which is now the current player)
|
||||||
|
let new_hole = self.mark_points(self.active_player_id, self.dice_points.1);
|
||||||
|
if new_hole && self.get_active_player().unwrap().holes > 12 {
|
||||||
|
self.stage = Stage::Ended;
|
||||||
|
}
|
||||||
TurnStage::RollDice
|
TurnStage::RollDice
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
@ -571,12 +572,6 @@ impl GameState {
|
||||||
self.history.push(valid_event.clone());
|
self.history.push(valid_event.clone());
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dice_points(&self) -> u8 {
|
|
||||||
let player = self.players.get(&self.active_player_id).unwrap();
|
|
||||||
let points_rules = PointsRules::new(&player.color, &self.board, self.dice);
|
|
||||||
let (points, adv_points) = points_rules.get_points(player.dice_roll_count);
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Set a new pick up ('relevé') after a player won a hole and choose to 'go',
|
/// Set a new pick up ('relevé') after a player won a hole and choose to 'go',
|
||||||
/// or after a player has bore off (took of his men off the board)
|
/// or after a player has bore off (took of his men off the board)
|
||||||
fn new_pick_up(&mut self) {
|
fn new_pick_up(&mut self) {
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue