bot train burnrl reward opponent

2026-01-03 18:28:05 +01:00 · 2026-01-03 18:28:05 +01:00 · 1e773671d9
parent 883ebf9bc1
commit 1e773671d9
5 changed files with 20 additions and 9 deletions
--- a/bot/src/burnrl/environment.rs
+++ b/bot/src/burnrl/environment.rs
@ -6,10 +6,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State};
 use rand::{thread_rng, Rng};
 use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};

-const ERROR_REWARD: f32 = -1.12121;
-const REWARD_VALID_MOVE: f32 = 1.12121;
-const REWARD_RATIO: f32 = 0.01;
-const WIN_POINTS: f32 = 1.0;
+const ERROR_REWARD: f32 = -1.0012121;
+const REWARD_VALID_MOVE: f32 = 1.0012121;
+const REWARD_RATIO: f32 = 0.1;
+const WIN_POINTS: f32 = 100.0;

 /// État du jeu Trictrac pour burn-rl
 #[derive(Debug, Clone, Copy)]
@ -285,7 +285,7 @@ impl TrictracEnvironment {
        if let Some(event) = action.to_event(&self.game) {
            if self.game.validate(&event) {
                self.game.consume(&event);
-                reward += REWARD_VALID_MOVE;
+                // reward += REWARD_VALID_MOVE;
                // Simuler le résultat des dés après un Roll
                if matches!(action, TrictracAction::Roll) {
                    let mut rng = thread_rng();
@ -312,9 +312,11 @@ impl TrictracEnvironment {
                // on annule les précédents reward
                // et on indique une valeur reconnaissable pour statistiques
                reward = ERROR_REWARD;
+                self.game.mark_points_for_bot_training(self.opponent_id, 1);
            }
        } else {
            reward = ERROR_REWARD;
+            self.game.mark_points_for_bot_training(self.opponent_id, 1);
        }

        (reward, is_rollpoint)
--- a/bot/src/burnrl/environment_big.rs
+++ b/bot/src/burnrl/environment_big.rs
@ -4,10 +4,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State};
 use rand::{thread_rng, Rng};
 use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};

-const ERROR_REWARD: f32 = -2.12121;
-const REWARD_VALID_MOVE: f32 = 2.12121;
-const REWARD_RATIO: f32 = 0.01;
-const WIN_POINTS: f32 = 0.1;
+const ERROR_REWARD: f32 = -1.00012121;
+const REWARD_VALID_MOVE: f32 = 1.00012121;
+const REWARD_RATIO: f32 = 0.1;
+const WIN_POINTS: f32 = 100.0;

 /// État du jeu Trictrac pour burn-rl
 #[derive(Debug, Clone, Copy)]
@ -352,6 +352,7 @@ impl TrictracEnvironment {
                // on annule les précédents reward
                // et on indique une valeur reconnaissable pour statistiques
                reward = ERROR_REWARD;
+                self.game.mark_points_for_bot_training(self.opponent_id, 1);
            }
        }

--- a/bot/src/training_common.rs
+++ b/bot/src/training_common.rs
@ -1,3 +1,5 @@
+/// training_common_big.rs : environnement avec espace d'actions optimisé
+/// (514 au lieu de 1252 pour training_common_big.rs)
 use std::cmp::{max, min};
 use std::fmt::{Debug, Display, Formatter};

--- a/bot/src/training_common_big.rs
+++ b/bot/src/training_common_big.rs
@ -1,3 +1,5 @@
+/// training_common_big.rs : environnement avec espace d'actions non optimisé
+/// (1252 au lieu de 514 pour training_common.rs)
 use std::cmp::{max, min};

 use serde::{Deserialize, Serialize};
--- a/store/src/game.rs
+++ b/store/src/game.rs
@ -742,6 +742,10 @@ impl GameState {
        });
    }

+    pub fn mark_points_for_bot_training(&mut self, player_id: PlayerId, points: u8) -> bool {
+        self.mark_points(player_id, points)
+    }
+
    fn mark_points(&mut self, player_id: PlayerId, points: u8) -> bool {
        // Update player points and holes
        let mut new_hole = false;