wip debug
This commit is contained in:
parent
d313cb6151
commit
56d155b911
|
|
@ -168,6 +168,7 @@ impl Environment for TrictracEnvironment {
|
||||||
let is_rollpoint;
|
let is_rollpoint;
|
||||||
|
|
||||||
// Exécuter l'action si c'est le tour de l'agent DQN
|
// Exécuter l'action si c'est le tour de l'agent DQN
|
||||||
|
let mut has_played = false;
|
||||||
if self.game.active_player_id == self.active_player_id {
|
if self.game.active_player_id == self.active_player_id {
|
||||||
if let Some(action) = trictrac_action {
|
if let Some(action) = trictrac_action {
|
||||||
(reward, is_rollpoint) = self.execute_action(action);
|
(reward, is_rollpoint) = self.execute_action(action);
|
||||||
|
|
@ -175,6 +176,7 @@ impl Environment for TrictracEnvironment {
|
||||||
self.pointrolls_count += 1;
|
self.pointrolls_count += 1;
|
||||||
}
|
}
|
||||||
if reward != Self::ERROR_REWARD {
|
if reward != Self::ERROR_REWARD {
|
||||||
|
has_played = true;
|
||||||
self.goodmoves_count += 1;
|
self.goodmoves_count += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -184,7 +186,18 @@ impl Environment for TrictracEnvironment {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Faire jouer l'adversaire (stratégie simple)
|
// Faire jouer l'adversaire (stratégie simple)
|
||||||
|
if has_played {
|
||||||
|
print!(
|
||||||
|
"?({},{:?}) ",
|
||||||
|
self.game.active_player_id, self.game.turn_stage
|
||||||
|
);
|
||||||
|
if self.goodmoves_count > 10 {
|
||||||
|
println!("{:?}", self.game.history);
|
||||||
|
panic!("end debug");
|
||||||
|
}
|
||||||
|
}
|
||||||
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||||
|
print!(":");
|
||||||
reward += self.play_opponent_if_needed();
|
reward += self.play_opponent_if_needed();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -260,11 +273,13 @@ impl TrictracEnvironment {
|
||||||
|
|
||||||
let mut reward = 0.0;
|
let mut reward = 0.0;
|
||||||
let mut is_rollpoint = false;
|
let mut is_rollpoint = false;
|
||||||
|
let mut need_roll = false;
|
||||||
|
|
||||||
let event = match action {
|
let event = match action {
|
||||||
TrictracAction::Roll => {
|
TrictracAction::Roll => {
|
||||||
// Lancer les dés
|
// Lancer les dés
|
||||||
reward += 0.1;
|
reward += 0.1;
|
||||||
|
need_roll = true;
|
||||||
Some(GameEvent::Roll {
|
Some(GameEvent::Roll {
|
||||||
player_id: self.active_player_id,
|
player_id: self.active_player_id,
|
||||||
})
|
})
|
||||||
|
|
@ -323,7 +338,8 @@ impl TrictracEnvironment {
|
||||||
self.game.consume(&event);
|
self.game.consume(&event);
|
||||||
|
|
||||||
// Simuler le résultat des dés après un Roll
|
// Simuler le résultat des dés après un Roll
|
||||||
if matches!(action, TrictracAction::Roll) {
|
// if matches!(action, TrictracAction::Roll) {
|
||||||
|
if need_roll {
|
||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||||
let dice_event = GameEvent::RollResult {
|
let dice_event = GameEvent::RollResult {
|
||||||
|
|
@ -332,6 +348,7 @@ impl TrictracEnvironment {
|
||||||
values: dice_values,
|
values: dice_values,
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
|
print!("o");
|
||||||
if self.game.validate(&dice_event) {
|
if self.game.validate(&dice_event) {
|
||||||
self.game.consume(&dice_event);
|
self.game.consume(&dice_event);
|
||||||
let (points, adv_points) = self.game.dice_points;
|
let (points, adv_points) = self.game.dice_points;
|
||||||
|
|
@ -380,7 +397,7 @@ impl TrictracEnvironment {
|
||||||
TurnStage::RollWaiting => {
|
TurnStage::RollWaiting => {
|
||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||||
// calculate_points = true; // comment to replicate burnrl_before
|
calculate_points = true; // comment to replicate burnrl_before
|
||||||
GameEvent::RollResult {
|
GameEvent::RollResult {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
dice: store::Dice {
|
dice: store::Dice {
|
||||||
|
|
@ -432,7 +449,9 @@ impl TrictracEnvironment {
|
||||||
|
|
||||||
if self.game.validate(&event) {
|
if self.game.validate(&event) {
|
||||||
self.game.consume(&event);
|
self.game.consume(&event);
|
||||||
|
print!(".");
|
||||||
if calculate_points {
|
if calculate_points {
|
||||||
|
print!("x");
|
||||||
let dice_roll_count = self
|
let dice_roll_count = self
|
||||||
.game
|
.game
|
||||||
.players
|
.players
|
||||||
|
|
@ -443,7 +462,11 @@ impl TrictracEnvironment {
|
||||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||||
// Récompense proportionnelle aux points
|
// Récompense proportionnelle aux points
|
||||||
reward -= Self::REWARD_RATIO * (points - adv_points) as f32;
|
let adv_reward = Self::REWARD_RATIO * (points - adv_points) as f32;
|
||||||
|
reward -= adv_reward;
|
||||||
|
// if adv_reward != 0.0 {
|
||||||
|
// println!("info: opponent : {adv_reward} -> {reward}");
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue