bot train burnrl reward opponent

This commit is contained in:
Henri Bourcereau 2026-01-03 18:28:05 +01:00
parent 883ebf9bc1
commit 1e773671d9
5 changed files with 20 additions and 9 deletions

View file

@ -6,10 +6,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State};
use rand::{thread_rng, Rng};
use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
const ERROR_REWARD: f32 = -1.12121;
const REWARD_VALID_MOVE: f32 = 1.12121;
const REWARD_RATIO: f32 = 0.01;
const WIN_POINTS: f32 = 1.0;
const ERROR_REWARD: f32 = -1.0012121;
const REWARD_VALID_MOVE: f32 = 1.0012121;
const REWARD_RATIO: f32 = 0.1;
const WIN_POINTS: f32 = 100.0;
/// État du jeu Trictrac pour burn-rl
#[derive(Debug, Clone, Copy)]
@ -285,7 +285,7 @@ impl TrictracEnvironment {
if let Some(event) = action.to_event(&self.game) {
if self.game.validate(&event) {
self.game.consume(&event);
reward += REWARD_VALID_MOVE;
// reward += REWARD_VALID_MOVE;
// Simuler le résultat des dés après un Roll
if matches!(action, TrictracAction::Roll) {
let mut rng = thread_rng();
@ -312,9 +312,11 @@ impl TrictracEnvironment {
// on annule les précédents reward
// et on indique une valeur reconnaissable pour statistiques
reward = ERROR_REWARD;
self.game.mark_points_for_bot_training(self.opponent_id, 1);
}
} else {
reward = ERROR_REWARD;
self.game.mark_points_for_bot_training(self.opponent_id, 1);
}
(reward, is_rollpoint)

View file

@ -4,10 +4,10 @@ use burn_rl::base::{Action, Environment, Snapshot, State};
use rand::{thread_rng, Rng};
use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
const ERROR_REWARD: f32 = -2.12121;
const REWARD_VALID_MOVE: f32 = 2.12121;
const REWARD_RATIO: f32 = 0.01;
const WIN_POINTS: f32 = 0.1;
const ERROR_REWARD: f32 = -1.00012121;
const REWARD_VALID_MOVE: f32 = 1.00012121;
const REWARD_RATIO: f32 = 0.1;
const WIN_POINTS: f32 = 100.0;
/// État du jeu Trictrac pour burn-rl
#[derive(Debug, Clone, Copy)]
@ -352,6 +352,7 @@ impl TrictracEnvironment {
// on annule les précédents reward
// et on indique une valeur reconnaissable pour statistiques
reward = ERROR_REWARD;
self.game.mark_points_for_bot_training(self.opponent_id, 1);
}
}

View file

@ -1,3 +1,5 @@
/// training_common_big.rs : environnement avec espace d'actions optimisé
/// (514 au lieu de 1252 pour training_common_big.rs)
use std::cmp::{max, min};
use std::fmt::{Debug, Display, Formatter};

View file

@ -1,3 +1,5 @@
/// training_common_big.rs : environnement avec espace d'actions non optimisé
/// (1252 au lieu de 514 pour training_common.rs)
use std::cmp::{max, min};
use serde::{Deserialize, Serialize};

View file

@ -742,6 +742,10 @@ impl GameState {
});
}
pub fn mark_points_for_bot_training(&mut self, player_id: PlayerId, points: u8) -> bool {
self.mark_points(player_id, points)
}
fn mark_points(&mut self, player_id: PlayerId, points: u8) -> bool {
// Update player points and holes
let mut new_hole = false;