From 5f33737c1bda8182c3190f58d5f1977926088a8d Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Mon, 9 Jun 2025 20:17:00 +0200 Subject: [PATCH] wip fix workflow --- bot/src/strategy/default.rs | 20 +++++---- bot/src/strategy/dqn.rs | 79 +++++++++++++++++++++------------- bot/src/strategy/dqn_common.rs | 45 ++++++++++--------- doc/workflow.md | 25 +++++++++++ store/src/game.rs | 45 +++++++++---------- 5 files changed, 126 insertions(+), 88 deletions(-) create mode 100644 doc/workflow.md diff --git a/bot/src/strategy/default.rs b/bot/src/strategy/default.rs index 98e8322..81aa5f1 100644 --- a/bot/src/strategy/default.rs +++ b/bot/src/strategy/default.rs @@ -36,18 +36,20 @@ impl BotStrategy for DefaultStrategy { } fn calculate_points(&self) -> u8 { - let dice_roll_count = self - .get_game() - .players - .get(&self.player_id) - .unwrap() - .dice_roll_count; - let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice); - points_rules.get_points(dice_roll_count).0 + // let dice_roll_count = self + // .get_game() + // .players + // .get(&self.player_id) + // .unwrap() + // .dice_roll_count; + // let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice); + // points_rules.get_points(dice_roll_count).0 + self.game.dice_points.0 } fn calculate_adv_points(&self) -> u8 { - self.calculate_points() + // self.calculate_points() + self.game.dice_points.1 } fn choose_go(&self) -> bool { diff --git a/bot/src/strategy/dqn.rs b/bot/src/strategy/dqn.rs index d2fc9ed..779ce3d 100644 --- a/bot/src/strategy/dqn.rs +++ b/bot/src/strategy/dqn.rs @@ -2,7 +2,9 @@ use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId, PointsRules}; use std::path::Path; use store::MoveRules; -use super::dqn_common::{SimpleNeuralNetwork, TrictracAction, get_valid_actions, sample_valid_action}; +use super::dqn_common::{ + get_valid_actions, sample_valid_action, SimpleNeuralNetwork, TrictracAction, +}; /// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné #[derive(Debug)] @@ -42,18 +44,18 @@ impl DqnStrategy { if let Some(ref model) = self.model { let state = self.game.to_vec_float(); let valid_actions = get_valid_actions(&self.game); - + if valid_actions.is_empty() { return None; } - + // Obtenir les Q-values pour toutes les actions let q_values = model.forward(&state); - + // Trouver la meilleure action valide let mut best_action = &valid_actions[0]; let mut best_q_value = f32::NEG_INFINITY; - + for action in &valid_actions { let action_index = action.to_action_index(); if action_index < q_values.len() { @@ -64,7 +66,7 @@ impl DqnStrategy { } } } - + Some(best_action.clone()) } else { // Fallback : action aléatoire valide @@ -91,26 +93,11 @@ impl BotStrategy for DqnStrategy { } fn calculate_points(&self) -> u8 { - // Utiliser le DQN pour choisir le nombre de points à marquer - if let Some(action) = self.get_dqn_action() { - if let TrictracAction::Mark { points } = action { - return points; - } - } - - // Fallback : utiliser la méthode standard - let dice_roll_count = self - .get_game() - .players - .get(&self.player_id) - .unwrap() - .dice_roll_count; - let points_rules = PointsRules::new(&self.color, &self.game.board, self.game.dice); - points_rules.get_points(dice_roll_count).0 + self.game.dice_points.0 } fn calculate_adv_points(&self) -> u8 { - self.calculate_points() + self.game.dice_points.1 } fn choose_go(&self) -> bool { @@ -126,24 +113,55 @@ impl BotStrategy for DqnStrategy { fn choose_move(&self) -> (CheckerMove, CheckerMove) { // Utiliser le DQN pour choisir le mouvement if let Some(action) = self.get_dqn_action() { - if let TrictracAction::Move { move1, move2 } = action { - let checker_move1 = CheckerMove::new(move1.0, move1.1).unwrap_or_default(); - let checker_move2 = CheckerMove::new(move2.0, move2.1).unwrap_or_default(); - + if let TrictracAction::Move { + dice_order, + from1, + from2, + } = action + { + let dicevals = self.game.dice.values; + let (mut dice1, mut dice2) = if dice_order { + (dicevals.0, dicevals.1) + } else { + (dicevals.1, dicevals.0) + }; + + if from1 == 0 { + // empty move + dice1 = 0; + } + let mut to1 = from1 + dice1 as usize; + if 24 < to1 { + // sortie + to1 = 0; + } + if from2 == 0 { + // empty move + dice2 = 0; + } + let mut to2 = from2 + dice2 as usize; + if 24 < to2 { + // sortie + to2 = 0; + } + + let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default(); + let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default(); + let chosen_move = if self.color == Color::White { (checker_move1, checker_move2) } else { (checker_move1.mirror(), checker_move2.mirror()) }; - + return chosen_move; } } - + // Fallback : utiliser la stratégie par défaut let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice); let possible_moves = rules.get_possible_moves_sequences(true, vec![]); - + let chosen_move = *possible_moves .first() .unwrap_or(&(CheckerMove::default(), CheckerMove::default())); @@ -155,4 +173,3 @@ impl BotStrategy for DqnStrategy { } } } - diff --git a/bot/src/strategy/dqn_common.rs b/bot/src/strategy/dqn_common.rs index 5cf30d5..3191b4b 100644 --- a/bot/src/strategy/dqn_common.rs +++ b/bot/src/strategy/dqn_common.rs @@ -52,7 +52,7 @@ impl TrictracAction { 2 => Some(TrictracAction::Go), i if i >= 3 => { let move_code = i - 3; - let (dice_order, from1, from2) = decode_move(move_code); + let (dice_order, from1, from2) = Self::decode_move(move_code); Some(TrictracAction::Move { dice_order, from1, @@ -83,28 +83,27 @@ impl TrictracAction { 3 + (2 * 25 * 25) // = 1253 } - pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent { - match action { - TrictracAction::Roll => Some(GameEvent::Roll { player_id }), - TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }), - TrictracAction::Go => Some(GameEvent::Go { player_id }), - TrictracAction::Move { - dice_order, - from1, - from2, - } => { - // Effectuer un mouvement - let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default(); - let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default(); - - reward += 0.2; - Some(GameEvent::Move { - player_id: self.agent_player_id, - moves: (checker_move1, checker_move2), - }) - } - }; - } + // pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent { + // match action { + // TrictracAction::Roll => Some(GameEvent::Roll { player_id }), + // TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }), + // TrictracAction::Go => Some(GameEvent::Go { player_id }), + // TrictracAction::Move { + // dice_order, + // from1, + // from2, + // } => { + // // Effectuer un mouvement + // let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default(); + // let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default(); + // + // Some(GameEvent::Move { + // player_id: self.agent_player_id, + // moves: (checker_move1, checker_move2), + // }) + // } + // }; + // } } /// Configuration pour l'agent DQN diff --git a/doc/workflow.md b/doc/workflow.md new file mode 100644 index 0000000..2139332 --- /dev/null +++ b/doc/workflow.md @@ -0,0 +1,25 @@ +# Workflow + +@startuml + +state c <> +state haswon <> +state MarkPoints #lightblue +state MarkAdvPoints #lightblue +note right of MarkPoints : automatic 'Mark' transition\nwhen no school +note right of MarkAdvPoints : automatic 'Mark' transition\nwhen no school + +[*] -> RollDice : BeginGame +RollDice --> RollWaiting : Roll (current player) +RollWaiting --> MarkPoints : RollResult (engine) +MarkPoints --> c : Mark (current player) +c --> HoldHorGoChoice : [new hole] +c --> [*] : [has won] +c --> Move : [not new hole] +HoldHorGoChoice --> RollDice : Go +HoldHorGoChoice --> MarkAdvPoints : Move +Move --> MarkAdvPoints : Move +MarkAdvPoints --> haswon : Mark (adversary) +haswon --> RollDice : [has not won] +haswon --> [*] : [has won] +@enduml diff --git a/store/src/game.rs b/store/src/game.rs index 90e905b..ed77519 100644 --- a/store/src/game.rs +++ b/store/src/game.rs @@ -71,7 +71,7 @@ pub struct GameState { /// last dice pair rolled pub dice: Dice, /// players points computed for the last dice pair rolled - dice_points: (u8, u8), + pub dice_points: (u8, u8), pub dice_moves: (CheckerMove, CheckerMove), pub dice_jans: PossibleJans, /// true if player needs to roll first @@ -505,13 +505,7 @@ impl GameState { self.players.remove(player_id); } Roll { player_id: _ } => { - // Opponent has moved, we can mark pending points earned during opponent's turn - let new_hole = self.mark_points(self.active_player_id, self.dice_points.1); - if new_hole && self.get_active_player().unwrap().holes > 12 { - self.stage = Stage::Ended; - } else { - self.turn_stage = TurnStage::RollWaiting; - } + self.turn_stage = TurnStage::RollWaiting; } RollResult { player_id: _, dice } => { self.dice = *dice; @@ -534,23 +528,25 @@ impl GameState { } } Mark { player_id, points } => { - let new_hole = self.mark_points(*player_id, *points); - if new_hole { - if self.get_active_player().unwrap().holes > 12 { - self.stage = Stage::Ended; + if self.schools_enabled { + let new_hole = self.mark_points(*player_id, *points); + if new_hole { + if self.get_active_player().unwrap().holes > 12 { + self.stage = Stage::Ended; + } else { + self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints { + TurnStage::RollDice + } else { + TurnStage::HoldOrGoChoice + }; + } } else { self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints { TurnStage::RollDice } else { - TurnStage::HoldOrGoChoice + TurnStage::Move }; } - } else { - self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints { - TurnStage::RollDice - } else { - TurnStage::Move - }; } } Go { player_id: _ } => self.new_pick_up(), @@ -563,6 +559,11 @@ impl GameState { self.turn_stage = if self.schools_enabled { TurnStage::MarkAdvPoints } else { + // The player has moved, we can mark its opponent's points (which is now the current player) + let new_hole = self.mark_points(self.active_player_id, self.dice_points.1); + if new_hole && self.get_active_player().unwrap().holes > 12 { + self.stage = Stage::Ended; + } TurnStage::RollDice }; } @@ -571,12 +572,6 @@ impl GameState { self.history.push(valid_event.clone()); } - fn dice_points(&self) -> u8 { - let player = self.players.get(&self.active_player_id).unwrap(); - let points_rules = PointsRules::new(&player.color, &self.board, self.dice); - let (points, adv_points) = points_rules.get_points(player.dice_roll_count); - } - /// Set a new pick up ('relevé') after a player won a hole and choose to 'go', /// or after a player has bore off (took of his men off the board) fn new_pick_up(&mut self) {