fix: train bot opponent rewards
This commit is contained in:
parent
ac14341cf9
commit
86a67ae66a
|
|
@ -17,7 +17,7 @@ train() {
|
||||||
}
|
}
|
||||||
|
|
||||||
plot() {
|
plot() {
|
||||||
NAME=$(ls -rt "$LOGS_DIR" | tail -n 1)
|
NAME=$(ls -rt "$LOGS_DIR" | grep -v "png" | tail -n 1)
|
||||||
LOGS="$LOGS_DIR/$NAME"
|
LOGS="$LOGS_DIR/$NAME"
|
||||||
cfgs=$(head -n $CFG_SIZE "$LOGS")
|
cfgs=$(head -n $CFG_SIZE "$LOGS")
|
||||||
for cfg in $cfgs; do
|
for cfg in $cfgs; do
|
||||||
|
|
@ -31,8 +31,19 @@ plot() {
|
||||||
feedgnuplot --lines --points --unset grid --title "adv = $OPPONENT ; density = $dense_size ; decay = $eps_decay ; max steps = $max_steps" --terminal $PLOT_EXT >"$LOGS_DIR/$OPPONENT-$dense_size-$eps_decay-$max_steps-$NAME.$PLOT_EXT"
|
feedgnuplot --lines --points --unset grid --title "adv = $OPPONENT ; density = $dense_size ; decay = $eps_decay ; max steps = $max_steps" --terminal $PLOT_EXT >"$LOGS_DIR/$OPPONENT-$dense_size-$eps_decay-$max_steps-$NAME.$PLOT_EXT"
|
||||||
}
|
}
|
||||||
|
|
||||||
|
avg() {
|
||||||
|
NAME=$(ls -rt "$LOGS_DIR" | grep -v "png" | tail -n 1)
|
||||||
|
LOGS="$LOGS_DIR/$NAME"
|
||||||
|
echo $LOGS
|
||||||
|
tail -n +$((CFG_SIZE + 2)) "$LOGS" |
|
||||||
|
grep -v "info:" |
|
||||||
|
awk -F '[ ,]' '{print $5}' | awk '{ sum += $1; n++ } END { if (n > 0) print sum / n; }'
|
||||||
|
}
|
||||||
|
|
||||||
if [ "$1" = "plot" ]; then
|
if [ "$1" = "plot" ]; then
|
||||||
plot
|
plot
|
||||||
|
elif [ "$1" = "avg" ]; then
|
||||||
|
avg
|
||||||
else
|
else
|
||||||
train
|
train
|
||||||
fi
|
fi
|
||||||
|
|
|
||||||
|
|
@ -386,6 +386,8 @@ impl TrictracEnvironment {
|
||||||
*strategy.get_mut_game() = self.game.clone();
|
*strategy.get_mut_game() = self.game.clone();
|
||||||
|
|
||||||
// Exécuter l'action selon le turn_stage
|
// Exécuter l'action selon le turn_stage
|
||||||
|
let mut calculate_points = false;
|
||||||
|
let opponent_color = store::Color::Black;
|
||||||
let event = match self.game.turn_stage {
|
let event = match self.game.turn_stage {
|
||||||
TurnStage::RollDice => GameEvent::Roll {
|
TurnStage::RollDice => GameEvent::Roll {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
|
|
@ -393,6 +395,7 @@ impl TrictracEnvironment {
|
||||||
TurnStage::RollWaiting => {
|
TurnStage::RollWaiting => {
|
||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||||
|
calculate_points = true;
|
||||||
GameEvent::RollResult {
|
GameEvent::RollResult {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
dice: store::Dice {
|
dice: store::Dice {
|
||||||
|
|
@ -401,7 +404,6 @@ impl TrictracEnvironment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::MarkPoints => {
|
TurnStage::MarkPoints => {
|
||||||
let opponent_color = store::Color::Black;
|
|
||||||
let dice_roll_count = self
|
let dice_roll_count = self
|
||||||
.game
|
.game
|
||||||
.players
|
.players
|
||||||
|
|
@ -410,12 +412,9 @@ impl TrictracEnvironment {
|
||||||
.dice_roll_count;
|
.dice_roll_count;
|
||||||
let points_rules =
|
let points_rules =
|
||||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
|
||||||
reward -= Self::REWARD_RATIO * (points - adv_points) as f32; // Récompense proportionnelle aux points
|
|
||||||
|
|
||||||
GameEvent::Mark {
|
GameEvent::Mark {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
points,
|
points: points_rules.get_points(dice_roll_count).0,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::MarkAdvPoints => {
|
TurnStage::MarkAdvPoints => {
|
||||||
|
|
@ -428,11 +427,10 @@ impl TrictracEnvironment {
|
||||||
.dice_roll_count;
|
.dice_roll_count;
|
||||||
let points_rules =
|
let points_rules =
|
||||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||||
let points = points_rules.get_points(dice_roll_count).1;
|
|
||||||
// pas de reward : déjà comptabilisé lors du tour de blanc
|
// pas de reward : déjà comptabilisé lors du tour de blanc
|
||||||
GameEvent::Mark {
|
GameEvent::Mark {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
points,
|
points: points_rules.get_points(dice_roll_count).1,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::HoldOrGoChoice => {
|
TurnStage::HoldOrGoChoice => {
|
||||||
|
|
@ -449,6 +447,19 @@ impl TrictracEnvironment {
|
||||||
|
|
||||||
if self.game.validate(&event) {
|
if self.game.validate(&event) {
|
||||||
self.game.consume(&event);
|
self.game.consume(&event);
|
||||||
|
if calculate_points {
|
||||||
|
let dice_roll_count = self
|
||||||
|
.game
|
||||||
|
.players
|
||||||
|
.get(&self.opponent_id)
|
||||||
|
.unwrap()
|
||||||
|
.dice_roll_count;
|
||||||
|
let points_rules =
|
||||||
|
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||||
|
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||||
|
// Récompense proportionnelle aux points
|
||||||
|
reward -= Self::REWARD_RATIO * (points - adv_points) as f32;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
reward
|
reward
|
||||||
|
|
|
||||||
|
|
@ -156,17 +156,26 @@ impl Environment for TrictracEnvironment {
|
||||||
if self.game.active_player_id == self.active_player_id {
|
if self.game.active_player_id == self.active_player_id {
|
||||||
if let Some(action) = trictrac_action {
|
if let Some(action) = trictrac_action {
|
||||||
(reward, is_rollpoint) = self.execute_action(action);
|
(reward, is_rollpoint) = self.execute_action(action);
|
||||||
|
// if reward != 0.0 {
|
||||||
|
// println!("info: self rew {reward}");
|
||||||
|
// }
|
||||||
if is_rollpoint {
|
if is_rollpoint {
|
||||||
self.pointrolls_count += 1;
|
self.pointrolls_count += 1;
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// Action non convertible, pénalité
|
// Action non convertible, pénalité
|
||||||
|
println!("info: action non convertible -> -1 {trictrac_action:?}");
|
||||||
reward = -1.0;
|
reward = -1.0;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Faire jouer l'adversaire (stratégie simple)
|
// Faire jouer l'adversaire (stratégie simple)
|
||||||
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||||
|
// let op_rew = self.play_opponent_if_needed();
|
||||||
|
// if op_rew != 0.0 {
|
||||||
|
// println!("info: op rew {op_rew}");
|
||||||
|
// }
|
||||||
|
// reward += op_rew;
|
||||||
reward += self.play_opponent_if_needed();
|
reward += self.play_opponent_if_needed();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -322,6 +331,7 @@ impl TrictracEnvironment {
|
||||||
// Pénalité pour action invalide
|
// Pénalité pour action invalide
|
||||||
// on annule les précédents reward
|
// on annule les précédents reward
|
||||||
// et on indique une valeur reconnaissable pour statistiques
|
// et on indique une valeur reconnaissable pour statistiques
|
||||||
|
println!("info: action invalide -> err_reward");
|
||||||
reward = Self::ERROR_REWARD;
|
reward = Self::ERROR_REWARD;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -346,6 +356,8 @@ impl TrictracEnvironment {
|
||||||
*strategy.get_mut_game() = self.game.clone();
|
*strategy.get_mut_game() = self.game.clone();
|
||||||
|
|
||||||
// Exécuter l'action selon le turn_stage
|
// Exécuter l'action selon le turn_stage
|
||||||
|
let mut calculate_points = false;
|
||||||
|
let opponent_color = store::Color::Black;
|
||||||
let event = match self.game.turn_stage {
|
let event = match self.game.turn_stage {
|
||||||
TurnStage::RollDice => GameEvent::Roll {
|
TurnStage::RollDice => GameEvent::Roll {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
|
|
@ -353,6 +365,7 @@ impl TrictracEnvironment {
|
||||||
TurnStage::RollWaiting => {
|
TurnStage::RollWaiting => {
|
||||||
let mut rng = thread_rng();
|
let mut rng = thread_rng();
|
||||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||||
|
calculate_points = true;
|
||||||
GameEvent::RollResult {
|
GameEvent::RollResult {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
dice: store::Dice {
|
dice: store::Dice {
|
||||||
|
|
@ -361,7 +374,6 @@ impl TrictracEnvironment {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::MarkPoints => {
|
TurnStage::MarkPoints => {
|
||||||
let opponent_color = store::Color::Black;
|
|
||||||
let dice_roll_count = self
|
let dice_roll_count = self
|
||||||
.game
|
.game
|
||||||
.players
|
.players
|
||||||
|
|
@ -371,15 +383,12 @@ impl TrictracEnvironment {
|
||||||
let points_rules =
|
let points_rules =
|
||||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||||
reward -= Self::REWARD_RATIO * (points - adv_points) as f32; // Récompense proportionnelle aux points
|
|
||||||
|
|
||||||
GameEvent::Mark {
|
GameEvent::Mark {
|
||||||
player_id: self.opponent_id,
|
player_id: self.opponent_id,
|
||||||
points,
|
points,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::MarkAdvPoints => {
|
TurnStage::MarkAdvPoints => {
|
||||||
let opponent_color = store::Color::Black;
|
|
||||||
let dice_roll_count = self
|
let dice_roll_count = self
|
||||||
.game
|
.game
|
||||||
.players
|
.players
|
||||||
|
|
@ -409,6 +418,19 @@ impl TrictracEnvironment {
|
||||||
|
|
||||||
if self.game.validate(&event) {
|
if self.game.validate(&event) {
|
||||||
self.game.consume(&event);
|
self.game.consume(&event);
|
||||||
|
if calculate_points {
|
||||||
|
let dice_roll_count = self
|
||||||
|
.game
|
||||||
|
.players
|
||||||
|
.get(&self.opponent_id)
|
||||||
|
.unwrap()
|
||||||
|
.dice_roll_count;
|
||||||
|
let points_rules =
|
||||||
|
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||||
|
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||||
|
reward -= Self::REWARD_RATIO * (points - adv_points) as f32;
|
||||||
|
// Récompense proportionnelle aux points
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
reward
|
reward
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue