chore(refact): move training_common from bot to store crate
This commit is contained in:
parent
257665c546
commit
453c363334
12 changed files with 38 additions and 39 deletions
2
Cargo.lock
generated
2
Cargo.lock
generated
|
|
@ -6621,7 +6621,7 @@ dependencies = [
|
||||||
"log",
|
"log",
|
||||||
"merge",
|
"merge",
|
||||||
"pyo3",
|
"pyo3",
|
||||||
"rand 0.8.5",
|
"rand 0.9.2",
|
||||||
"serde",
|
"serde",
|
||||||
"transpose",
|
"transpose",
|
||||||
]
|
]
|
||||||
|
|
|
||||||
|
|
@ -2,3 +2,4 @@ import trictrac_store
|
||||||
|
|
||||||
game = trictrac_store.TricTrac()
|
game = trictrac_store.TricTrac()
|
||||||
print(game.get_active_player_id())
|
print(game.get_active_player_id())
|
||||||
|
print(game.get_legal_actions())
|
||||||
|
|
|
||||||
|
|
@ -1,9 +1,9 @@
|
||||||
use std::io::Write;
|
use std::io::Write;
|
||||||
|
|
||||||
use crate::training_common;
|
|
||||||
use burn::{prelude::Backend, tensor::Tensor};
|
use burn::{prelude::Backend, tensor::Tensor};
|
||||||
use burn_rl::base::{Action, Environment, Snapshot, State};
|
use burn_rl::base::{Action, Environment, Snapshot, State};
|
||||||
use rand::{rng, Rng};
|
use rand::{rng, Rng};
|
||||||
|
use trictrac_store::training_common;
|
||||||
use trictrac_store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
use trictrac_store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
||||||
|
|
||||||
const ERROR_REWARD: f32 = -1.0012121;
|
const ERROR_REWARD: f32 = -1.0012121;
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,7 @@
|
||||||
use crate::training_common;
|
|
||||||
use burn::{prelude::Backend, tensor::Tensor};
|
use burn::{prelude::Backend, tensor::Tensor};
|
||||||
use burn_rl::base::{Action, Environment, Snapshot, State};
|
use burn_rl::base::{Action, Environment, Snapshot, State};
|
||||||
use rand::{rng, Rng};
|
use rand::{rng, Rng};
|
||||||
|
use trictrac_store::training_common;
|
||||||
use trictrac_store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
use trictrac_store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
||||||
|
|
||||||
const ERROR_REWARD: f32 = -1.0012121;
|
const ERROR_REWARD: f32 = -1.0012121;
|
||||||
|
|
|
||||||
|
|
@ -1,15 +1,16 @@
|
||||||
pub mod burnrl;
|
pub mod burnrl;
|
||||||
pub mod strategy;
|
pub mod strategy;
|
||||||
pub mod training_common;
|
|
||||||
pub mod trictrac_board;
|
pub mod trictrac_board;
|
||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use trictrac_store::{CheckerMove, Color, GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
|
||||||
pub use strategy::default::DefaultStrategy;
|
pub use strategy::default::DefaultStrategy;
|
||||||
pub use strategy::dqnburn::DqnBurnStrategy;
|
pub use strategy::dqnburn::DqnBurnStrategy;
|
||||||
pub use strategy::erroneous_moves::ErroneousStrategy;
|
pub use strategy::erroneous_moves::ErroneousStrategy;
|
||||||
pub use strategy::random::RandomStrategy;
|
pub use strategy::random::RandomStrategy;
|
||||||
pub use strategy::stable_baselines3::StableBaselines3Strategy;
|
pub use strategy::stable_baselines3::StableBaselines3Strategy;
|
||||||
|
use trictrac_store::{
|
||||||
|
CheckerMove, Color, GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage,
|
||||||
|
};
|
||||||
|
|
||||||
pub trait BotStrategy: std::fmt::Debug {
|
pub trait BotStrategy: std::fmt::Debug {
|
||||||
fn get_game(&self) -> &GameState;
|
fn get_game(&self) -> &GameState;
|
||||||
|
|
|
||||||
|
|
@ -8,7 +8,9 @@ use trictrac_store::MoveRules;
|
||||||
|
|
||||||
use crate::burnrl::algos::dqn;
|
use crate::burnrl::algos::dqn;
|
||||||
use crate::burnrl::environment;
|
use crate::burnrl::environment;
|
||||||
use crate::training_common::{get_valid_action_indices, sample_valid_action, TrictracAction};
|
use trictrac_store::training_common::{
|
||||||
|
get_valid_action_indices, sample_valid_action, TrictracAction,
|
||||||
|
};
|
||||||
|
|
||||||
type DqnBurnNetwork = dqn::Net<NdArray<ElemType>>;
|
type DqnBurnNetwork = dqn::Net<NdArray<ElemType>>;
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,5 +1,4 @@
|
||||||
// https://docs.rs/board-game/ implementation
|
// https://docs.rs/board-game/ implementation
|
||||||
use crate::training_common::{get_valid_actions, TrictracAction};
|
|
||||||
use board_game::board::{
|
use board_game::board::{
|
||||||
Board as BoardGameBoard, BoardDone, BoardMoves, Outcome, PlayError, Player as BoardGamePlayer,
|
Board as BoardGameBoard, BoardDone, BoardMoves, Outcome, PlayError, Player as BoardGamePlayer,
|
||||||
};
|
};
|
||||||
|
|
@ -8,6 +7,7 @@ use internal_iterator::InternalIterator;
|
||||||
use std::fmt;
|
use std::fmt;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
use trictrac_store::training_common::{get_valid_actions, TrictracAction};
|
||||||
use trictrac_store::Color;
|
use trictrac_store::Color;
|
||||||
|
|
||||||
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
#[derive(Clone, Debug, Eq, PartialEq, Hash)]
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,6 @@ log = "0.4.20"
|
||||||
merge = "0.1.0"
|
merge = "0.1.0"
|
||||||
# generate python lib (with maturin) to be used in AI training
|
# generate python lib (with maturin) to be used in AI training
|
||||||
pyo3 = { version = "0.23", features = ["extension-module", "abi3-py38"] }
|
pyo3 = { version = "0.23", features = ["extension-module", "abi3-py38"] }
|
||||||
rand = "0.8.5"
|
rand = "0.9"
|
||||||
serde = { version = "1.0", features = ["derive"] }
|
serde = { version = "1.0", features = ["derive"] }
|
||||||
transpose = "0.2.2"
|
transpose = "0.2.2"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
use rand::distributions::{Distribution, Uniform};
|
use rand::distr::{Distribution, Uniform};
|
||||||
use rand::{rngs::StdRng, SeedableRng};
|
use rand::{rngs::StdRng, SeedableRng};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
|
@ -17,7 +17,7 @@ impl DiceRoller {
|
||||||
pub fn new(opt_seed: Option<u64>) -> Self {
|
pub fn new(opt_seed: Option<u64>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
rng: match opt_seed {
|
rng: match opt_seed {
|
||||||
None => StdRng::from_rng(rand::thread_rng()).unwrap(),
|
None => StdRng::from_rng(&mut rand::rng()),
|
||||||
Some(seed) => SeedableRng::seed_from_u64(seed),
|
Some(seed) => SeedableRng::seed_from_u64(seed),
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
|
@ -26,7 +26,7 @@ impl DiceRoller {
|
||||||
/// Roll the dices which generates two random numbers between 1 and 6, replicating a perfect
|
/// Roll the dices which generates two random numbers between 1 and 6, replicating a perfect
|
||||||
/// dice. We use the operating system's random number generator.
|
/// dice. We use the operating system's random number generator.
|
||||||
pub fn roll(&mut self) -> Dice {
|
pub fn roll(&mut self) -> Dice {
|
||||||
let between = Uniform::new_inclusive(1, 6);
|
let between = Uniform::new_inclusive(1, 6).expect("1 > 6 !?");
|
||||||
|
|
||||||
let v = (between.sample(&mut self.rng), between.sample(&mut self.rng));
|
let v = (between.sample(&mut self.rng), between.sample(&mut self.rng));
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -17,5 +17,7 @@ pub use board::CheckerMove;
|
||||||
mod dice;
|
mod dice;
|
||||||
pub use dice::{Dice, DiceRoller};
|
pub use dice::{Dice, DiceRoller};
|
||||||
|
|
||||||
|
pub mod training_common;
|
||||||
|
|
||||||
// python interface "trictrac_engine" (for AI training..)
|
// python interface "trictrac_engine" (for AI training..)
|
||||||
mod pyengine;
|
mod pyengine;
|
||||||
|
|
|
||||||
|
|
@ -8,6 +8,7 @@ use crate::game::{GameEvent, GameState, Stage, TurnStage};
|
||||||
use crate::game_rules_moves::MoveRules;
|
use crate::game_rules_moves::MoveRules;
|
||||||
use crate::game_rules_points::PointsRules;
|
use crate::game_rules_points::PointsRules;
|
||||||
use crate::player::{Color, PlayerId};
|
use crate::player::{Color, PlayerId};
|
||||||
|
use crate::training_common::get_valid_action_indices;
|
||||||
|
|
||||||
#[pyclass]
|
#[pyclass]
|
||||||
struct TricTrac {
|
struct TricTrac {
|
||||||
|
|
@ -49,12 +50,9 @@ impl TricTrac {
|
||||||
self.game_state.active_player_id - 1
|
self.game_state.active_player_id - 1
|
||||||
}
|
}
|
||||||
|
|
||||||
// fn get_legal_actions(&self) -> Vec<usize> {
|
fn get_legal_actions(&self) -> Vec<usize> {
|
||||||
// get_valid_actions(&self.game_state)
|
get_valid_action_indices(&self.game_state)
|
||||||
// .into_iter()
|
}
|
||||||
// .map(|action| action.to_action_index())
|
|
||||||
// .collect()
|
|
||||||
// }
|
|
||||||
|
|
||||||
/// Lance les dés ou utilise la séquence prédéfinie
|
/// Lance les dés ou utilise la séquence prédéfinie
|
||||||
fn roll_dice(&mut self) -> PyResult<(u8, u8)> {
|
fn roll_dice(&mut self) -> PyResult<(u8, u8)> {
|
||||||
|
|
|
||||||
|
|
@ -3,8 +3,8 @@
|
||||||
use std::cmp::{max, min};
|
use std::cmp::{max, min};
|
||||||
use std::fmt::{Debug, Display, Formatter};
|
use std::fmt::{Debug, Display, Formatter};
|
||||||
|
|
||||||
|
use crate::{CheckerMove, GameEvent, GameState};
|
||||||
use serde::{Deserialize, Serialize};
|
use serde::{Deserialize, Serialize};
|
||||||
use trictrac_store::{CheckerMove, GameEvent, GameState};
|
|
||||||
|
|
||||||
// 1 (Roll) + 1 (Go) + 512 (mouvements possibles)
|
// 1 (Roll) + 1 (Go) + 512 (mouvements possibles)
|
||||||
// avec 512 = 2 (choix du dé) * 16 * 16 (choix de la dame 0-15 pour chaque from)
|
// avec 512 = 2 (choix du dé) * 16 * 16 (choix de la dame 0-15 pour chaque from)
|
||||||
|
|
@ -94,14 +94,13 @@ impl TrictracAction {
|
||||||
(state.dice.values.1, state.dice.values.0)
|
(state.dice.values.1, state.dice.values.0)
|
||||||
};
|
};
|
||||||
|
|
||||||
let color = &trictrac_store::Color::White;
|
let color = &crate::Color::White;
|
||||||
let from1 = state
|
let from1 = state
|
||||||
.board
|
.board
|
||||||
.get_checker_field(color, *checker1 as u8)
|
.get_checker_field(color, *checker1 as u8)
|
||||||
.unwrap_or(0);
|
.unwrap_or(0);
|
||||||
let mut to1 = from1 + dice1 as usize;
|
let mut to1 = from1 + dice1 as usize;
|
||||||
let checker_move1 =
|
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
trictrac_store::CheckerMove::new(from1, to1).unwrap_or_default();
|
|
||||||
|
|
||||||
let mut tmp_board = state.board.clone();
|
let mut tmp_board = state.board.clone();
|
||||||
let move_result = tmp_board.move_checker(color, checker_move1);
|
let move_result = tmp_board.move_checker(color, checker_move1);
|
||||||
|
|
@ -121,10 +120,8 @@ impl TrictracAction {
|
||||||
to2 -= 1;
|
to2 -= 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
let checker_move1 =
|
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||||
trictrac_store::CheckerMove::new(from1, to1).unwrap_or_default();
|
let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
||||||
let checker_move2 =
|
|
||||||
trictrac_store::CheckerMove::new(from2, to2).unwrap_or_default();
|
|
||||||
|
|
||||||
Some(GameEvent::Move {
|
Some(GameEvent::Move {
|
||||||
player_id: state.active_player_id,
|
player_id: state.active_player_id,
|
||||||
|
|
@ -182,8 +179,8 @@ impl TrictracAction {
|
||||||
// from2,
|
// from2,
|
||||||
// } => {
|
// } => {
|
||||||
// // Effectuer un mouvement
|
// // Effectuer un mouvement
|
||||||
// let checker_move1 = trictrac_store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
// let checker_move1 = CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
||||||
// let checker_move2 = trictrac_store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
// let checker_move2 = CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
||||||
//
|
//
|
||||||
// Some(GameEvent::Move {
|
// Some(GameEvent::Move {
|
||||||
// player_id: self.agent_player_id,
|
// player_id: self.agent_player_id,
|
||||||
|
|
@ -195,8 +192,8 @@ impl TrictracAction {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Obtient les actions valides pour l'état de jeu actuel
|
/// Obtient les actions valides pour l'état de jeu actuel
|
||||||
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
pub fn get_valid_actions(game_state: &GameState) -> Vec<TrictracAction> {
|
||||||
use trictrac_store::TurnStage;
|
use crate::TurnStage;
|
||||||
|
|
||||||
let mut valid_actions = Vec::new();
|
let mut valid_actions = Vec::new();
|
||||||
|
|
||||||
|
|
@ -219,12 +216,11 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
valid_actions.push(TrictracAction::Go);
|
valid_actions.push(TrictracAction::Go);
|
||||||
|
|
||||||
// Ajoute aussi les mouvements possibles
|
// Ajoute aussi les mouvements possibles
|
||||||
let rules =
|
let rules = crate::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||||
trictrac_store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
|
||||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
|
|
||||||
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||||
assert_eq!(color, trictrac_store::Color::White);
|
assert_eq!(color, crate::Color::White);
|
||||||
for (move1, move2) in possible_moves {
|
for (move1, move2) in possible_moves {
|
||||||
valid_actions.push(checker_moves_to_trictrac_action(
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
&move1, &move2, &color, game_state,
|
&move1, &move2, &color, game_state,
|
||||||
|
|
@ -232,8 +228,7 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TurnStage::Move => {
|
TurnStage::Move => {
|
||||||
let rules =
|
let rules = crate::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||||
trictrac_store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
|
||||||
let mut possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
let mut possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||||
if possible_moves.is_empty() {
|
if possible_moves.is_empty() {
|
||||||
// Empty move
|
// Empty move
|
||||||
|
|
@ -241,7 +236,7 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
}
|
}
|
||||||
|
|
||||||
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||||
assert_eq!(color, trictrac_store::Color::White);
|
assert_eq!(color, crate::Color::White);
|
||||||
for (move1, move2) in possible_moves {
|
for (move1, move2) in possible_moves {
|
||||||
valid_actions.push(checker_moves_to_trictrac_action(
|
valid_actions.push(checker_moves_to_trictrac_action(
|
||||||
&move1, &move2, &color, game_state,
|
&move1, &move2, &color, game_state,
|
||||||
|
|
@ -261,8 +256,8 @@ pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
fn checker_moves_to_trictrac_action(
|
fn checker_moves_to_trictrac_action(
|
||||||
move1: &CheckerMove,
|
move1: &CheckerMove,
|
||||||
move2: &CheckerMove,
|
move2: &CheckerMove,
|
||||||
color: &trictrac_store::Color,
|
color: &crate::Color,
|
||||||
state: &crate::GameState,
|
state: &GameState,
|
||||||
) -> TrictracAction {
|
) -> TrictracAction {
|
||||||
let to1 = move1.get_to();
|
let to1 = move1.get_to();
|
||||||
let to2 = move2.get_to();
|
let to2 = move2.get_to();
|
||||||
|
|
@ -320,7 +315,7 @@ fn checker_moves_to_trictrac_action(
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Retourne les indices des actions valides
|
/// Retourne les indices des actions valides
|
||||||
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
pub fn get_valid_action_indices(game_state: &GameState) -> Vec<usize> {
|
||||||
get_valid_actions(game_state)
|
get_valid_actions(game_state)
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|action| action.to_action_index())
|
.map(|action| action.to_action_index())
|
||||||
|
|
@ -328,7 +323,7 @@ pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Sélectionne une action valide aléatoire
|
/// Sélectionne une action valide aléatoire
|
||||||
pub fn sample_valid_action(game_state: &crate::GameState) -> Option<TrictracAction> {
|
pub fn sample_valid_action(game_state: &GameState) -> Option<TrictracAction> {
|
||||||
use rand::{prelude::IndexedRandom, rng};
|
use rand::{prelude::IndexedRandom, rng};
|
||||||
|
|
||||||
let valid_actions = get_valid_actions(game_state);
|
let valid_actions = get_valid_actions(game_state);
|
||||||
Loading…
Add table
Add a link
Reference in a new issue