wip debug

This commit is contained in:
Henri Bourcereau 2025-08-16 11:13:31 +02:00
parent d313cb6151
commit 56d155b911

View file

@ -168,6 +168,7 @@ impl Environment for TrictracEnvironment {
let is_rollpoint;
// Exécuter l'action si c'est le tour de l'agent DQN
let mut has_played = false;
if self.game.active_player_id == self.active_player_id {
if let Some(action) = trictrac_action {
(reward, is_rollpoint) = self.execute_action(action);
@ -175,6 +176,7 @@ impl Environment for TrictracEnvironment {
self.pointrolls_count += 1;
}
if reward != Self::ERROR_REWARD {
has_played = true;
self.goodmoves_count += 1;
}
} else {
@ -184,7 +186,18 @@ impl Environment for TrictracEnvironment {
}
// Faire jouer l'adversaire (stratégie simple)
if has_played {
print!(
"?({},{:?}) ",
self.game.active_player_id, self.game.turn_stage
);
if self.goodmoves_count > 10 {
println!("{:?}", self.game.history);
panic!("end debug");
}
}
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
print!(":");
reward += self.play_opponent_if_needed();
}
@ -260,11 +273,13 @@ impl TrictracEnvironment {
let mut reward = 0.0;
let mut is_rollpoint = false;
let mut need_roll = false;
let event = match action {
TrictracAction::Roll => {
// Lancer les dés
reward += 0.1;
need_roll = true;
Some(GameEvent::Roll {
player_id: self.active_player_id,
})
@ -323,7 +338,8 @@ impl TrictracEnvironment {
self.game.consume(&event);
// Simuler le résultat des dés après un Roll
if matches!(action, TrictracAction::Roll) {
// if matches!(action, TrictracAction::Roll) {
if need_roll {
let mut rng = thread_rng();
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
let dice_event = GameEvent::RollResult {
@ -332,6 +348,7 @@ impl TrictracEnvironment {
values: dice_values,
},
};
print!("o");
if self.game.validate(&dice_event) {
self.game.consume(&dice_event);
let (points, adv_points) = self.game.dice_points;
@ -380,7 +397,7 @@ impl TrictracEnvironment {
TurnStage::RollWaiting => {
let mut rng = thread_rng();
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
// calculate_points = true; // comment to replicate burnrl_before
calculate_points = true; // comment to replicate burnrl_before
GameEvent::RollResult {
player_id: self.opponent_id,
dice: store::Dice {
@ -432,7 +449,9 @@ impl TrictracEnvironment {
if self.game.validate(&event) {
self.game.consume(&event);
print!(".");
if calculate_points {
print!("x");
let dice_roll_count = self
.game
.players
@ -443,7 +462,11 @@ impl TrictracEnvironment {
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
let (points, adv_points) = points_rules.get_points(dice_roll_count);
// Récompense proportionnelle aux points
reward -= Self::REWARD_RATIO * (points - adv_points) as f32;
let adv_reward = Self::REWARD_RATIO * (points - adv_points) as f32;
reward -= adv_reward;
// if adv_reward != 0.0 {
// println!("info: opponent : {adv_reward} -> {reward}");
// }
}
}
}