From 1e773671d9006c2021604762922e97da16edc26c Mon Sep 17 00:00:00 2001
From: Henri Bourcereau <henri.bourcereau@gmail.com>
Date: Sat, 3 Jan 2026 18:28:05 +0100
Subject: [PATCH] bot train burnrl reward opponent

---
 bot/src/burnrl/environment.rs     | 12 +++++++-----
 bot/src/burnrl/environment_big.rs |  9 +++++----
 bot/src/training_common.rs        |  2 ++
 bot/src/training_common_big.rs    |  2 ++
 store/src/game.rs                 |  4 ++++
 5 files changed, 20 insertions(+), 9 deletions(-)

diff --git a/bot/src/burnrl/environment.rs b/bot/src/burnrl/environment.rs
index 50daf11..84c8311 100644
--- a/bot/src/burnrl/environment.rs
+++ b/bot/src/burnrl/environment.rs
@@ -6,10 +6,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State};
 use rand::{thread_rng, Rng};
 use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
 
-const ERROR_REWARD: f32 = -1.12121;
-const REWARD_VALID_MOVE: f32 = 1.12121;
-const REWARD_RATIO: f32 = 0.01;
-const WIN_POINTS: f32 = 1.0;
+const ERROR_REWARD: f32 = -1.0012121;
+const REWARD_VALID_MOVE: f32 = 1.0012121;
+const REWARD_RATIO: f32 = 0.1;
+const WIN_POINTS: f32 = 100.0;
 
 /// État du jeu Trictrac pour burn-rl
 #[derive(Debug, Clone, Copy)]
@@ -285,7 +285,7 @@ impl TrictracEnvironment {
         if let Some(event) = action.to_event(&self.game) {
             if self.game.validate(&event) {
                 self.game.consume(&event);
-                reward += REWARD_VALID_MOVE;
+                // reward += REWARD_VALID_MOVE;
                 // Simuler le résultat des dés après un Roll
                 if matches!(action, TrictracAction::Roll) {
                     let mut rng = thread_rng();
@@ -312,9 +312,11 @@ impl TrictracEnvironment {
                 // on annule les précédents reward
                 // et on indique une valeur reconnaissable pour statistiques
                 reward = ERROR_REWARD;
+                self.game.mark_points_for_bot_training(self.opponent_id, 1);
             }
         } else {
             reward = ERROR_REWARD;
+            self.game.mark_points_for_bot_training(self.opponent_id, 1);
         }
 
         (reward, is_rollpoint)
diff --git a/bot/src/burnrl/environment_big.rs b/bot/src/burnrl/environment_big.rs
index 1bba2bd..40d5a74 100644
--- a/bot/src/burnrl/environment_big.rs
+++ b/bot/src/burnrl/environment_big.rs
@@ -4,10 +4,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State};
 use rand::{thread_rng, Rng};
 use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
 
-const ERROR_REWARD: f32 = -2.12121;
-const REWARD_VALID_MOVE: f32 = 2.12121;
-const REWARD_RATIO: f32 = 0.01;
-const WIN_POINTS: f32 = 0.1;
+const ERROR_REWARD: f32 = -1.00012121;
+const REWARD_VALID_MOVE: f32 = 1.00012121;
+const REWARD_RATIO: f32 = 0.1;
+const WIN_POINTS: f32 = 100.0;
 
 /// État du jeu Trictrac pour burn-rl
 #[derive(Debug, Clone, Copy)]
@@ -352,6 +352,7 @@ impl TrictracEnvironment {
                 // on annule les précédents reward
                 // et on indique une valeur reconnaissable pour statistiques
                 reward = ERROR_REWARD;
+                self.game.mark_points_for_bot_training(self.opponent_id, 1);
             }
         }
 
diff --git a/bot/src/training_common.rs b/bot/src/training_common.rs
index 5d8e870..750b2ae 100644
--- a/bot/src/training_common.rs
+++ b/bot/src/training_common.rs
@@ -1,3 +1,5 @@
+/// training_common_big.rs : environnement avec espace d'actions optimisé
+/// (514 au lieu de 1252 pour training_common_big.rs)
 use std::cmp::{max, min};
 use std::fmt::{Debug, Display, Formatter};
 
diff --git a/bot/src/training_common_big.rs b/bot/src/training_common_big.rs
index 9f8bae4..d7e5bf1 100644
--- a/bot/src/training_common_big.rs
+++ b/bot/src/training_common_big.rs
@@ -1,3 +1,5 @@
+/// training_common_big.rs : environnement avec espace d'actions non optimisé
+/// (1252 au lieu de 514 pour training_common.rs)
 use std::cmp::{max, min};
 
 use serde::{Deserialize, Serialize};
diff --git a/store/src/game.rs b/store/src/game.rs
index 09ea3f3..4814349 100644
--- a/store/src/game.rs
+++ b/store/src/game.rs
@@ -742,6 +742,10 @@ impl GameState {
         });
     }
 
+    pub fn mark_points_for_bot_training(&mut self, player_id: PlayerId, points: u8) -> bool {
+        self.mark_points(player_id, points)
+    }
+
     fn mark_points(&mut self, player_id: PlayerId, points: u8) -> bool {
         // Update player points and holes
         let mut new_hole = false;