From 1e773671d9006c2021604762922e97da16edc26c Mon Sep 17 00:00:00 2001 From: Henri Bourcereau Date: Sat, 3 Jan 2026 18:28:05 +0100 Subject: [PATCH] bot train burnrl reward opponent --- bot/src/burnrl/environment.rs | 12 +++++++----- bot/src/burnrl/environment_big.rs | 9 +++++---- bot/src/training_common.rs | 2 ++ bot/src/training_common_big.rs | 2 ++ store/src/game.rs | 4 ++++ 5 files changed, 20 insertions(+), 9 deletions(-) diff --git a/bot/src/burnrl/environment.rs b/bot/src/burnrl/environment.rs index 50daf11..84c8311 100644 --- a/bot/src/burnrl/environment.rs +++ b/bot/src/burnrl/environment.rs @@ -6,10 +6,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State}; use rand::{thread_rng, Rng}; use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage}; -const ERROR_REWARD: f32 = -1.12121; -const REWARD_VALID_MOVE: f32 = 1.12121; -const REWARD_RATIO: f32 = 0.01; -const WIN_POINTS: f32 = 1.0; +const ERROR_REWARD: f32 = -1.0012121; +const REWARD_VALID_MOVE: f32 = 1.0012121; +const REWARD_RATIO: f32 = 0.1; +const WIN_POINTS: f32 = 100.0; /// État du jeu Trictrac pour burn-rl #[derive(Debug, Clone, Copy)] @@ -285,7 +285,7 @@ impl TrictracEnvironment { if let Some(event) = action.to_event(&self.game) { if self.game.validate(&event) { self.game.consume(&event); - reward += REWARD_VALID_MOVE; + // reward += REWARD_VALID_MOVE; // Simuler le résultat des dés après un Roll if matches!(action, TrictracAction::Roll) { let mut rng = thread_rng(); @@ -312,9 +312,11 @@ impl TrictracEnvironment { // on annule les précédents reward // et on indique une valeur reconnaissable pour statistiques reward = ERROR_REWARD; + self.game.mark_points_for_bot_training(self.opponent_id, 1); } } else { reward = ERROR_REWARD; + self.game.mark_points_for_bot_training(self.opponent_id, 1); } (reward, is_rollpoint) diff --git a/bot/src/burnrl/environment_big.rs b/bot/src/burnrl/environment_big.rs index 1bba2bd..40d5a74 100644 --- a/bot/src/burnrl/environment_big.rs +++ b/bot/src/burnrl/environment_big.rs @@ -4,10 +4,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State}; use rand::{thread_rng, Rng}; use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage}; -const ERROR_REWARD: f32 = -2.12121; -const REWARD_VALID_MOVE: f32 = 2.12121; -const REWARD_RATIO: f32 = 0.01; -const WIN_POINTS: f32 = 0.1; +const ERROR_REWARD: f32 = -1.00012121; +const REWARD_VALID_MOVE: f32 = 1.00012121; +const REWARD_RATIO: f32 = 0.1; +const WIN_POINTS: f32 = 100.0; /// État du jeu Trictrac pour burn-rl #[derive(Debug, Clone, Copy)] @@ -352,6 +352,7 @@ impl TrictracEnvironment { // on annule les précédents reward // et on indique une valeur reconnaissable pour statistiques reward = ERROR_REWARD; + self.game.mark_points_for_bot_training(self.opponent_id, 1); } } diff --git a/bot/src/training_common.rs b/bot/src/training_common.rs index 5d8e870..750b2ae 100644 --- a/bot/src/training_common.rs +++ b/bot/src/training_common.rs @@ -1,3 +1,5 @@ +/// training_common_big.rs : environnement avec espace d'actions optimisé +/// (514 au lieu de 1252 pour training_common_big.rs) use std::cmp::{max, min}; use std::fmt::{Debug, Display, Formatter}; diff --git a/bot/src/training_common_big.rs b/bot/src/training_common_big.rs index 9f8bae4..d7e5bf1 100644 --- a/bot/src/training_common_big.rs +++ b/bot/src/training_common_big.rs @@ -1,3 +1,5 @@ +/// training_common_big.rs : environnement avec espace d'actions non optimisé +/// (1252 au lieu de 514 pour training_common.rs) use std::cmp::{max, min}; use serde::{Deserialize, Serialize}; diff --git a/store/src/game.rs b/store/src/game.rs index 09ea3f3..4814349 100644 --- a/store/src/game.rs +++ b/store/src/game.rs @@ -742,6 +742,10 @@ impl GameState { }); } + pub fn mark_points_for_bot_training(&mut self, player_id: PlayerId, points: u8) -> bool { + self.mark_points(player_id, points) + } + fn mark_points(&mut self, player_id: PlayerId, points: u8) -> bool { // Update player points and holes let mut new_hole = false;