feat: bot all algos

This commit is contained in:
Henri Bourcereau 2025-08-21 17:39:45 +02:00
parent 0c58490f87
commit 8f41cc1412
18 changed files with 929 additions and 39 deletions

View file

@ -1,10 +1,9 @@
#!/usr/bin/env sh #!/usr/bin/env bash
ROOT="$(cd "$(dirname "$0")" && pwd)/../.." ROOT="$(cd "$(dirname "$0")" && pwd)/../.."
LOGS_DIR="$ROOT/bot/models/logs" LOGS_DIR="$ROOT/bot/models/logs"
CFG_SIZE=17 CFG_SIZE=17
ALGO="sac"
BINBOT=burn_train BINBOT=burn_train
# BINBOT=train_ppo_burn # BINBOT=train_ppo_burn
# BINBOT=train_dqn_burn # BINBOT=train_dqn_burn
@ -15,6 +14,7 @@ OPPONENT="random"
PLOT_EXT="png" PLOT_EXT="png"
train() { train() {
ALGO=$1
cargo build --release --bin=$BINBOT cargo build --release --bin=$BINBOT
NAME="$(date +%Y-%m-%d_%H:%M:%S)" NAME="$(date +%Y-%m-%d_%H:%M:%S)"
LOGS="$LOGS_DIR/$ALGO/$NAME.out" LOGS="$LOGS_DIR/$ALGO/$NAME.out"
@ -23,6 +23,7 @@ train() {
} }
plot() { plot() {
ALGO=$1
NAME=$(ls -rt "$LOGS_DIR/$ALGO" | tail -n 1) NAME=$(ls -rt "$LOGS_DIR/$ALGO" | tail -n 1)
LOGS="$LOGS_DIR/$ALGO/$NAME" LOGS="$LOGS_DIR/$ALGO/$NAME"
cfgs=$(head -n $CFG_SIZE "$LOGS") cfgs=$(head -n $CFG_SIZE "$LOGS")
@ -37,8 +38,14 @@ plot() {
feedgnuplot --lines --points --unset grid --title "adv = $OPPONENT ; density = $dense_size ; decay = $eps_decay ; max steps = $max_steps" --terminal $PLOT_EXT >"$LOGS_DIR/$ALGO/$NAME.$PLOT_EXT" feedgnuplot --lines --points --unset grid --title "adv = $OPPONENT ; density = $dense_size ; decay = $eps_decay ; max steps = $max_steps" --terminal $PLOT_EXT >"$LOGS_DIR/$ALGO/$NAME.$PLOT_EXT"
} }
if [ "$1" = "plot" ]; then if [[ -z "$1" ]]; then
plot echo "Usage : train [plot] <algo>"
elif [ "$1" = "plot" ]; then
if [[ -z "$2" ]]; then
echo "Usage : train [plot] <algo>"
else
plot $2
fi
else else
train train $1
fi fi

View file

@ -0,0 +1,9 @@
pub mod dqn;
pub mod dqn_big;
pub mod dqn_valid;
pub mod ppo;
pub mod ppo_big;
pub mod ppo_valid;
pub mod sac;
pub mod sac_big;
pub mod sac_valid;

View file

@ -161,8 +161,7 @@ pub fn run<
save_model(&model_with_loaded_weights, path); save_model(&model_with_loaded_weights, path);
} }
let valid_agent = agent.valid(model); agent.valid(model)
valid_agent
} }
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) { pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
@ -190,4 +189,3 @@ pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemTy
}) })
.ok() .ok()
} }

View file

@ -0,0 +1,191 @@
use crate::burnrl::environment_big::TrictracEnvironment;
use crate::burnrl::utils::Config;
use burn::backend::{ndarray::NdArrayDevice, NdArray};
use burn::module::Module;
use burn::nn::{Initializer, Linear, LinearConfig};
use burn::optim::AdamWConfig;
use burn::record::{CompactRecorder, Recorder};
use burn::tensor::activation::{relu, softmax};
use burn::tensor::backend::{AutodiffBackend, Backend};
use burn::tensor::Tensor;
use burn_rl::agent::{PPOModel, PPOOutput, PPOTrainingConfig, PPO};
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
use std::env;
use std::fs;
use std::time::SystemTime;
#[derive(Module, Debug)]
pub struct Net<B: Backend> {
linear: Linear<B>,
linear_actor: Linear<B>,
linear_critic: Linear<B>,
}
impl<B: Backend> Net<B> {
#[allow(unused)]
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
let initializer = Initializer::XavierUniform { gain: 1.0 };
Self {
linear: LinearConfig::new(input_size, dense_size)
.with_initializer(initializer.clone())
.init(&Default::default()),
linear_actor: LinearConfig::new(dense_size, output_size)
.with_initializer(initializer.clone())
.init(&Default::default()),
linear_critic: LinearConfig::new(dense_size, 1)
.with_initializer(initializer)
.init(&Default::default()),
}
}
}
impl<B: Backend> Model<B, Tensor<B, 2>, PPOOutput<B>, Tensor<B, 2>> for Net<B> {
fn forward(&self, input: Tensor<B, 2>) -> PPOOutput<B> {
let layer_0_output = relu(self.linear.forward(input));
let policies = softmax(self.linear_actor.forward(layer_0_output.clone()), 1);
let values = self.linear_critic.forward(layer_0_output);
PPOOutput::<B>::new(policies, values)
}
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let layer_0_output = relu(self.linear.forward(input));
softmax(self.linear_actor.forward(layer_0_output.clone()), 1)
}
}
impl<B: Backend> PPOModel<B> for Net<B> {}
#[allow(unused)]
const MEMORY_SIZE: usize = 512;
type MyAgent<E, B> = PPO<E, B, Net<B>>;
#[allow(unused)]
pub fn run<
E: Environment + AsMut<TrictracEnvironment>,
B: AutodiffBackend<InnerBackend = NdArray>,
>(
conf: &Config,
visualized: bool,
// ) -> PPO<E, B, Net<B>> {
) -> impl Agent<E> {
let mut env = E::new(visualized);
env.as_mut().max_steps = conf.max_steps;
let mut model = Net::<B>::new(
<<E as Environment>::StateType as State>::size(),
conf.dense_size,
<<E as Environment>::ActionType as Action>::size(),
);
let agent = MyAgent::default();
let config = PPOTrainingConfig {
gamma: conf.gamma,
lambda: conf.lambda,
epsilon_clip: conf.epsilon_clip,
critic_weight: conf.critic_weight,
entropy_weight: conf.entropy_weight,
learning_rate: conf.learning_rate,
epochs: conf.epochs,
batch_size: conf.batch_size,
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
conf.clip_grad,
)),
};
let mut optimizer = AdamWConfig::new()
.with_grad_clipping(config.clip_grad.clone())
.init();
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
for episode in 0..conf.num_episodes {
let mut episode_done = false;
let mut episode_reward = 0.0;
let mut episode_duration = 0_usize;
let mut now = SystemTime::now();
env.reset();
while !episode_done {
let state = env.state();
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &model) {
let snapshot = env.step(action);
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
snapshot.reward().clone(),
);
memory.push(
state,
*snapshot.state(),
action,
snapshot.reward().clone(),
snapshot.done(),
);
episode_duration += 1;
episode_done = snapshot.done() || episode_duration >= conf.max_steps;
}
}
println!(
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
now.elapsed().unwrap().as_secs(),
);
now = SystemTime::now();
model = MyAgent::train::<MEMORY_SIZE>(model, &memory, &mut optimizer, &config);
memory.clear();
}
if let Some(path) = &conf.save_path {
let device = NdArrayDevice::default();
let recorder = CompactRecorder::new();
let tmp_path = env::temp_dir().join("tmp_model.mpk");
// Save the trained model (backend B) to a temporary file
recorder
.record(model.clone().into_record(), tmp_path.clone())
.expect("Failed to save temporary model");
// Create a new model instance with the target backend (NdArray)
let model_to_save: Net<NdArray<ElemType>> = Net::new(
<<E as Environment>::StateType as State>::size(),
conf.dense_size,
<<E as Environment>::ActionType as Action>::size(),
);
// Load the record from the temporary file into the new model
let record = recorder
.load(tmp_path.clone(), &device)
.expect("Failed to load temporary model");
let model_with_loaded_weights = model_to_save.load_record(record);
// Clean up the temporary file
fs::remove_file(tmp_path).expect("Failed to remove temporary model file");
save_model(&model_with_loaded_weights, path);
}
agent.valid(model)
}
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
let recorder = CompactRecorder::new();
let model_path = format!("{path}.mpk");
println!("info: Modèle de validation sauvegardé : {model_path}");
recorder
.record(model.clone().into_record(), model_path.into())
.unwrap();
}
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
let model_path = format!("{path}.mpk");
// println!("Chargement du modèle depuis : {model_path}");
CompactRecorder::new()
.load(model_path.into(), &NdArrayDevice::default())
.map(|record| {
Net::new(
<TrictracEnvironment as Environment>::StateType::size(),
dense_size,
<TrictracEnvironment as Environment>::ActionType::size(),
)
.load_record(record)
})
.ok()
}

View file

@ -0,0 +1,191 @@
use crate::burnrl::environment_valid::TrictracEnvironment;
use crate::burnrl::utils::Config;
use burn::backend::{ndarray::NdArrayDevice, NdArray};
use burn::module::Module;
use burn::nn::{Initializer, Linear, LinearConfig};
use burn::optim::AdamWConfig;
use burn::record::{CompactRecorder, Recorder};
use burn::tensor::activation::{relu, softmax};
use burn::tensor::backend::{AutodiffBackend, Backend};
use burn::tensor::Tensor;
use burn_rl::agent::{PPOModel, PPOOutput, PPOTrainingConfig, PPO};
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
use std::env;
use std::fs;
use std::time::SystemTime;
#[derive(Module, Debug)]
pub struct Net<B: Backend> {
linear: Linear<B>,
linear_actor: Linear<B>,
linear_critic: Linear<B>,
}
impl<B: Backend> Net<B> {
#[allow(unused)]
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
let initializer = Initializer::XavierUniform { gain: 1.0 };
Self {
linear: LinearConfig::new(input_size, dense_size)
.with_initializer(initializer.clone())
.init(&Default::default()),
linear_actor: LinearConfig::new(dense_size, output_size)
.with_initializer(initializer.clone())
.init(&Default::default()),
linear_critic: LinearConfig::new(dense_size, 1)
.with_initializer(initializer)
.init(&Default::default()),
}
}
}
impl<B: Backend> Model<B, Tensor<B, 2>, PPOOutput<B>, Tensor<B, 2>> for Net<B> {
fn forward(&self, input: Tensor<B, 2>) -> PPOOutput<B> {
let layer_0_output = relu(self.linear.forward(input));
let policies = softmax(self.linear_actor.forward(layer_0_output.clone()), 1);
let values = self.linear_critic.forward(layer_0_output);
PPOOutput::<B>::new(policies, values)
}
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let layer_0_output = relu(self.linear.forward(input));
softmax(self.linear_actor.forward(layer_0_output.clone()), 1)
}
}
impl<B: Backend> PPOModel<B> for Net<B> {}
#[allow(unused)]
const MEMORY_SIZE: usize = 512;
type MyAgent<E, B> = PPO<E, B, Net<B>>;
#[allow(unused)]
pub fn run<
E: Environment + AsMut<TrictracEnvironment>,
B: AutodiffBackend<InnerBackend = NdArray>,
>(
conf: &Config,
visualized: bool,
// ) -> PPO<E, B, Net<B>> {
) -> impl Agent<E> {
let mut env = E::new(visualized);
env.as_mut().max_steps = conf.max_steps;
let mut model = Net::<B>::new(
<<E as Environment>::StateType as State>::size(),
conf.dense_size,
<<E as Environment>::ActionType as Action>::size(),
);
let agent = MyAgent::default();
let config = PPOTrainingConfig {
gamma: conf.gamma,
lambda: conf.lambda,
epsilon_clip: conf.epsilon_clip,
critic_weight: conf.critic_weight,
entropy_weight: conf.entropy_weight,
learning_rate: conf.learning_rate,
epochs: conf.epochs,
batch_size: conf.batch_size,
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
conf.clip_grad,
)),
};
let mut optimizer = AdamWConfig::new()
.with_grad_clipping(config.clip_grad.clone())
.init();
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
for episode in 0..conf.num_episodes {
let mut episode_done = false;
let mut episode_reward = 0.0;
let mut episode_duration = 0_usize;
let mut now = SystemTime::now();
env.reset();
while !episode_done {
let state = env.state();
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &model) {
let snapshot = env.step(action);
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
snapshot.reward().clone(),
);
memory.push(
state,
*snapshot.state(),
action,
snapshot.reward().clone(),
snapshot.done(),
);
episode_duration += 1;
episode_done = snapshot.done() || episode_duration >= conf.max_steps;
}
}
println!(
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
now.elapsed().unwrap().as_secs(),
);
now = SystemTime::now();
model = MyAgent::train::<MEMORY_SIZE>(model, &memory, &mut optimizer, &config);
memory.clear();
}
if let Some(path) = &conf.save_path {
let device = NdArrayDevice::default();
let recorder = CompactRecorder::new();
let tmp_path = env::temp_dir().join("tmp_model.mpk");
// Save the trained model (backend B) to a temporary file
recorder
.record(model.clone().into_record(), tmp_path.clone())
.expect("Failed to save temporary model");
// Create a new model instance with the target backend (NdArray)
let model_to_save: Net<NdArray<ElemType>> = Net::new(
<<E as Environment>::StateType as State>::size(),
conf.dense_size,
<<E as Environment>::ActionType as Action>::size(),
);
// Load the record from the temporary file into the new model
let record = recorder
.load(tmp_path.clone(), &device)
.expect("Failed to load temporary model");
let model_with_loaded_weights = model_to_save.load_record(record);
// Clean up the temporary file
fs::remove_file(tmp_path).expect("Failed to remove temporary model file");
save_model(&model_with_loaded_weights, path);
}
agent.valid(model)
}
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
let recorder = CompactRecorder::new();
let model_path = format!("{path}.mpk");
println!("info: Modèle de validation sauvegardé : {model_path}");
recorder
.record(model.clone().into_record(), model_path.into())
.unwrap();
}
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
let model_path = format!("{path}.mpk");
// println!("Chargement du modèle depuis : {model_path}");
CompactRecorder::new()
.load(model_path.into(), &NdArrayDevice::default())
.map(|record| {
Net::new(
<TrictracEnvironment as Environment>::StateType::size(),
dense_size,
<TrictracEnvironment as Environment>::ActionType::size(),
)
.load_record(record)
})
.ok()
}

View file

@ -0,0 +1,222 @@
use crate::burnrl::environment_big::TrictracEnvironment;
use crate::burnrl::utils::{soft_update_linear, Config};
use burn::backend::{ndarray::NdArrayDevice, NdArray};
use burn::module::Module;
use burn::nn::{Linear, LinearConfig};
use burn::optim::AdamWConfig;
use burn::record::{CompactRecorder, Recorder};
use burn::tensor::activation::{relu, softmax};
use burn::tensor::backend::{AutodiffBackend, Backend};
use burn::tensor::Tensor;
use burn_rl::agent::{SACActor, SACCritic, SACNets, SACOptimizer, SACTrainingConfig, SAC};
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
use std::time::SystemTime;
#[derive(Module, Debug)]
pub struct Actor<B: Backend> {
linear_0: Linear<B>,
linear_1: Linear<B>,
linear_2: Linear<B>,
}
impl<B: Backend> Actor<B> {
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
Self {
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
}
}
}
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Actor<B> {
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let layer_0_output = relu(self.linear_0.forward(input));
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
softmax(self.linear_2.forward(layer_1_output), 1)
}
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
self.forward(input)
}
}
impl<B: Backend> SACActor<B> for Actor<B> {}
#[derive(Module, Debug)]
pub struct Critic<B: Backend> {
linear_0: Linear<B>,
linear_1: Linear<B>,
linear_2: Linear<B>,
}
impl<B: Backend> Critic<B> {
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
Self {
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
}
}
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
(self.linear_0, self.linear_1, self.linear_2)
}
}
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Critic<B> {
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let layer_0_output = relu(self.linear_0.forward(input));
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
self.linear_2.forward(layer_1_output)
}
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
self.forward(input)
}
}
impl<B: Backend> SACCritic<B> for Critic<B> {
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
let (linear_0, linear_1, linear_2) = this.consume();
Self {
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
}
}
}
#[allow(unused)]
const MEMORY_SIZE: usize = 4096;
type MyAgent<E, B> = SAC<E, B, Actor<B>>;
#[allow(unused)]
pub fn run<
E: Environment + AsMut<TrictracEnvironment>,
B: AutodiffBackend<InnerBackend = NdArray>,
>(
conf: &Config,
visualized: bool,
) -> impl Agent<E> {
let mut env = E::new(visualized);
env.as_mut().max_steps = conf.max_steps;
let state_dim = <<E as Environment>::StateType as State>::size();
let action_dim = <<E as Environment>::ActionType as Action>::size();
let actor = Actor::<B>::new(state_dim, conf.dense_size, action_dim);
let critic_1 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
let critic_2 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
let mut nets = SACNets::<B, Actor<B>, Critic<B>>::new(actor, critic_1, critic_2);
let mut agent = MyAgent::default();
let config = SACTrainingConfig {
gamma: conf.gamma,
tau: conf.tau,
learning_rate: conf.learning_rate,
min_probability: conf.min_probability,
batch_size: conf.batch_size,
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
conf.clip_grad,
)),
};
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
let optimizer_config = AdamWConfig::new().with_grad_clipping(config.clip_grad.clone());
let mut optimizer = SACOptimizer::new(
optimizer_config.clone().init(),
optimizer_config.clone().init(),
optimizer_config.clone().init(),
optimizer_config.init(),
);
let mut step = 0_usize;
for episode in 0..conf.num_episodes {
let mut episode_done = false;
let mut episode_reward = 0.0;
let mut episode_duration = 0_usize;
let mut state = env.state();
let mut now = SystemTime::now();
while !episode_done {
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &nets.actor) {
let snapshot = env.step(action);
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
snapshot.reward().clone(),
);
memory.push(
state,
*snapshot.state(),
action,
snapshot.reward().clone(),
snapshot.done(),
);
if config.batch_size < memory.len() {
nets = agent.train::<MEMORY_SIZE, _>(nets, &memory, &mut optimizer, &config);
}
step += 1;
episode_duration += 1;
if snapshot.done() || episode_duration >= conf.max_steps {
env.reset();
episode_done = true;
println!(
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
now.elapsed().unwrap().as_secs()
);
now = SystemTime::now();
} else {
state = *snapshot.state();
}
}
}
}
let valid_agent = agent.valid(nets.actor);
if let Some(path) = &conf.save_path {
if let Some(model) = valid_agent.model() {
save_model(model, path);
}
}
valid_agent
}
pub fn save_model(model: &Actor<NdArray<ElemType>>, path: &String) {
let recorder = CompactRecorder::new();
let model_path = format!("{path}.mpk");
println!("info: Modèle de validation sauvegardé : {model_path}");
recorder
.record(model.clone().into_record(), model_path.into())
.unwrap();
}
pub fn load_model(dense_size: usize, path: &String) -> Option<Actor<NdArray<ElemType>>> {
let model_path = format!("{path}.mpk");
// println!("Chargement du modèle depuis : {model_path}");
CompactRecorder::new()
.load(model_path.into(), &NdArrayDevice::default())
.map(|record| {
Actor::new(
<TrictracEnvironment as Environment>::StateType::size(),
dense_size,
<TrictracEnvironment as Environment>::ActionType::size(),
)
.load_record(record)
})
.ok()
}

View file

@ -0,0 +1,222 @@
use crate::burnrl::environment_valid::TrictracEnvironment;
use crate::burnrl::utils::{soft_update_linear, Config};
use burn::backend::{ndarray::NdArrayDevice, NdArray};
use burn::module::Module;
use burn::nn::{Linear, LinearConfig};
use burn::optim::AdamWConfig;
use burn::record::{CompactRecorder, Recorder};
use burn::tensor::activation::{relu, softmax};
use burn::tensor::backend::{AutodiffBackend, Backend};
use burn::tensor::Tensor;
use burn_rl::agent::{SACActor, SACCritic, SACNets, SACOptimizer, SACTrainingConfig, SAC};
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
use std::time::SystemTime;
#[derive(Module, Debug)]
pub struct Actor<B: Backend> {
linear_0: Linear<B>,
linear_1: Linear<B>,
linear_2: Linear<B>,
}
impl<B: Backend> Actor<B> {
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
Self {
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
}
}
}
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Actor<B> {
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let layer_0_output = relu(self.linear_0.forward(input));
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
softmax(self.linear_2.forward(layer_1_output), 1)
}
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
self.forward(input)
}
}
impl<B: Backend> SACActor<B> for Actor<B> {}
#[derive(Module, Debug)]
pub struct Critic<B: Backend> {
linear_0: Linear<B>,
linear_1: Linear<B>,
linear_2: Linear<B>,
}
impl<B: Backend> Critic<B> {
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
Self {
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
}
}
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
(self.linear_0, self.linear_1, self.linear_2)
}
}
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Critic<B> {
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
let layer_0_output = relu(self.linear_0.forward(input));
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
self.linear_2.forward(layer_1_output)
}
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
self.forward(input)
}
}
impl<B: Backend> SACCritic<B> for Critic<B> {
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
let (linear_0, linear_1, linear_2) = this.consume();
Self {
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
}
}
}
#[allow(unused)]
const MEMORY_SIZE: usize = 4096;
type MyAgent<E, B> = SAC<E, B, Actor<B>>;
#[allow(unused)]
pub fn run<
E: Environment + AsMut<TrictracEnvironment>,
B: AutodiffBackend<InnerBackend = NdArray>,
>(
conf: &Config,
visualized: bool,
) -> impl Agent<E> {
let mut env = E::new(visualized);
env.as_mut().max_steps = conf.max_steps;
let state_dim = <<E as Environment>::StateType as State>::size();
let action_dim = <<E as Environment>::ActionType as Action>::size();
let actor = Actor::<B>::new(state_dim, conf.dense_size, action_dim);
let critic_1 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
let critic_2 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
let mut nets = SACNets::<B, Actor<B>, Critic<B>>::new(actor, critic_1, critic_2);
let mut agent = MyAgent::default();
let config = SACTrainingConfig {
gamma: conf.gamma,
tau: conf.tau,
learning_rate: conf.learning_rate,
min_probability: conf.min_probability,
batch_size: conf.batch_size,
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
conf.clip_grad,
)),
};
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
let optimizer_config = AdamWConfig::new().with_grad_clipping(config.clip_grad.clone());
let mut optimizer = SACOptimizer::new(
optimizer_config.clone().init(),
optimizer_config.clone().init(),
optimizer_config.clone().init(),
optimizer_config.init(),
);
let mut step = 0_usize;
for episode in 0..conf.num_episodes {
let mut episode_done = false;
let mut episode_reward = 0.0;
let mut episode_duration = 0_usize;
let mut state = env.state();
let mut now = SystemTime::now();
while !episode_done {
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &nets.actor) {
let snapshot = env.step(action);
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
snapshot.reward().clone(),
);
memory.push(
state,
*snapshot.state(),
action,
snapshot.reward().clone(),
snapshot.done(),
);
if config.batch_size < memory.len() {
nets = agent.train::<MEMORY_SIZE, _>(nets, &memory, &mut optimizer, &config);
}
step += 1;
episode_duration += 1;
if snapshot.done() || episode_duration >= conf.max_steps {
env.reset();
episode_done = true;
println!(
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
now.elapsed().unwrap().as_secs()
);
now = SystemTime::now();
} else {
state = *snapshot.state();
}
}
}
}
let valid_agent = agent.valid(nets.actor);
if let Some(path) = &conf.save_path {
if let Some(model) = valid_agent.model() {
save_model(model, path);
}
}
valid_agent
}
pub fn save_model(model: &Actor<NdArray<ElemType>>, path: &String) {
let recorder = CompactRecorder::new();
let model_path = format!("{path}.mpk");
println!("info: Modèle de validation sauvegardé : {model_path}");
recorder
.record(model.clone().into_record(), model_path.into())
.unwrap();
}
pub fn load_model(dense_size: usize, path: &String) -> Option<Actor<NdArray<ElemType>>> {
let model_path = format!("{path}.mpk");
// println!("Chargement du modèle depuis : {model_path}");
CompactRecorder::new()
.load(model_path.into(), &NdArrayDevice::default())
.map(|record| {
Actor::new(
<TrictracEnvironment as Environment>::StateType::size(),
dense_size,
<TrictracEnvironment as Environment>::ActionType::size(),
)
.load_record(record)
})
.ok()
}

View file

@ -1,8 +1,10 @@
use bot::burnrl::algos::{
dqn, dqn_big, dqn_valid, ppo, ppo_big, ppo_valid, sac, sac_big, sac_valid,
};
use bot::burnrl::environment::TrictracEnvironment; use bot::burnrl::environment::TrictracEnvironment;
use bot::burnrl::environment_big::TrictracEnvironment as TrictracEnvironmentBig; use bot::burnrl::environment_big::TrictracEnvironment as TrictracEnvironmentBig;
use bot::burnrl::environment_valid::TrictracEnvironment as TrictracEnvironmentValid; use bot::burnrl::environment_valid::TrictracEnvironment as TrictracEnvironmentValid;
use bot::burnrl::utils::{demo_model, Config}; use bot::burnrl::utils::{demo_model, Config};
use bot::burnrl::{dqn_big_model, dqn_model, dqn_valid_model, ppo_model, sac_model};
use burn::backend::{Autodiff, NdArray}; use burn::backend::{Autodiff, NdArray};
use burn_rl::base::ElemType; use burn_rl::base::ElemType;
use std::env; use std::env;
@ -51,9 +53,9 @@ fn main() {
match algo.as_str() { match algo.as_str() {
"dqn" => { "dqn" => {
let _agent = dqn_model::run::<TrictracEnvironment, Backend>(&conf, false); let _agent = dqn::run::<TrictracEnvironment, Backend>(&conf, false);
println!("> Chargement du modèle pour test"); println!("> Chargement du modèle pour test");
let loaded_model = dqn_model::load_model(conf.dense_size, &path); let loaded_model = dqn::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::DQN<TrictracEnvironment, _, _> = let loaded_agent: burn_rl::agent::DQN<TrictracEnvironment, _, _> =
burn_rl::agent::DQN::new(loaded_model.unwrap()); burn_rl::agent::DQN::new(loaded_model.unwrap());
@ -61,33 +63,87 @@ fn main() {
demo_model(loaded_agent); demo_model(loaded_agent);
} }
"dqn_big" => { "dqn_big" => {
let _agent = dqn_big_model::run::<TrictracEnvironmentBig, Backend>(&conf, false); let _agent = dqn_big::run::<TrictracEnvironmentBig, Backend>(&conf, false);
println!("> Chargement du modèle pour test");
let loaded_model = dqn_big::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::DQN<TrictracEnvironmentBig, _, _> =
burn_rl::agent::DQN::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé");
demo_model(loaded_agent);
} }
"dqn_valid" => { "dqn_valid" => {
let _agent = dqn_valid_model::run::<TrictracEnvironmentValid, Backend>(&conf, false); let _agent = dqn_valid::run::<TrictracEnvironmentValid, Backend>(&conf, false);
println!("> Chargement du modèle pour test");
let loaded_model = dqn_valid::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::DQN<TrictracEnvironmentValid, _, _> =
burn_rl::agent::DQN::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé");
demo_model(loaded_agent);
} }
"sac" => { "sac" => {
let _agent = sac_model::run::<TrictracEnvironment, Backend>(&conf, false); let _agent = sac::run::<TrictracEnvironment, Backend>(&conf, false);
println!("> Chargement du modèle pour test"); println!("> Chargement du modèle pour test");
let loaded_model = sac_model::load_model(conf.dense_size, &path); let loaded_model = sac::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::SAC<TrictracEnvironment, _, _> = let loaded_agent: burn_rl::agent::SAC<TrictracEnvironment, _, _> =
burn_rl::agent::SAC::new(loaded_model.unwrap()); burn_rl::agent::SAC::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé"); println!("> Test avec le modèle chargé");
demo_model(loaded_agent); demo_model(loaded_agent);
} }
"ppo" => { "sac_big" => {
let _agent = ppo_model::run::<TrictracEnvironment, Backend>(&conf, false); let _agent = sac_big::run::<TrictracEnvironmentBig, Backend>(&conf, false);
println!("> Chargement du modèle pour test"); println!("> Chargement du modèle pour test");
let loaded_model = ppo_model::load_model(conf.dense_size, &path); let loaded_model = sac_big::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::SAC<TrictracEnvironmentBig, _, _> =
burn_rl::agent::SAC::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé");
demo_model(loaded_agent);
}
"sac_valid" => {
let _agent = sac_valid::run::<TrictracEnvironmentValid, Backend>(&conf, false);
println!("> Chargement du modèle pour test");
let loaded_model = sac_valid::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::SAC<TrictracEnvironmentValid, _, _> =
burn_rl::agent::SAC::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé");
demo_model(loaded_agent);
}
"ppo" => {
let _agent = ppo::run::<TrictracEnvironment, Backend>(&conf, false);
println!("> Chargement du modèle pour test");
let loaded_model = ppo::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::PPO<TrictracEnvironment, _, _> = let loaded_agent: burn_rl::agent::PPO<TrictracEnvironment, _, _> =
burn_rl::agent::PPO::new(loaded_model.unwrap()); burn_rl::agent::PPO::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé"); println!("> Test avec le modèle chargé");
demo_model(loaded_agent); demo_model(loaded_agent);
} }
"ppo_big" => {
let _agent = ppo_big::run::<TrictracEnvironmentBig, Backend>(&conf, false);
println!("> Chargement du modèle pour test");
let loaded_model = ppo_big::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::PPO<TrictracEnvironmentBig, _, _> =
burn_rl::agent::PPO::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé");
demo_model(loaded_agent);
}
"ppo_valid" => {
let _agent = ppo_valid::run::<TrictracEnvironmentValid, Backend>(&conf, false);
println!("> Chargement du modèle pour test");
let loaded_model = ppo_valid::load_model(conf.dense_size, &path);
let loaded_agent: burn_rl::agent::PPO<TrictracEnvironmentValid, _, _> =
burn_rl::agent::PPO::new(loaded_model.unwrap());
println!("> Test avec le modèle chargé");
demo_model(loaded_agent);
}
&_ => { &_ => {
dbg!("unknown algo {algo}"); println!("unknown algo {algo}");
} }
} }
} }

View file

@ -1,9 +1,5 @@
pub mod dqn_big_model; pub mod algos;
pub mod dqn_model;
pub mod dqn_valid_model;
pub mod environment; pub mod environment;
pub mod environment_big; pub mod environment_big;
pub mod environment_valid; pub mod environment_valid;
pub mod ppo_model;
pub mod sac_model;
pub mod utils; pub mod utils;

View file

@ -6,11 +6,11 @@ use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId};
use log::info; use log::info;
use store::MoveRules; use store::MoveRules;
use crate::burnrl::dqn_model; use crate::burnrl::algos::dqn;
use crate::burnrl::environment; use crate::burnrl::environment;
use crate::training_common::{get_valid_action_indices, sample_valid_action, TrictracAction}; use crate::training_common::{get_valid_action_indices, sample_valid_action, TrictracAction};
type DqnBurnNetwork = dqn_model::Net<NdArray<ElemType>>; type DqnBurnNetwork = dqn::Net<NdArray<ElemType>>;
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné /// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
#[derive(Debug)] #[derive(Debug)]
@ -40,7 +40,7 @@ impl DqnBurnStrategy {
pub fn new_with_model(model_path: &String) -> Self { pub fn new_with_model(model_path: &String) -> Self {
info!("Loading model {model_path:?}"); info!("Loading model {model_path:?}");
let mut strategy = Self::new(); let mut strategy = Self::new();
strategy.model = dqn_model::load_model(256, model_path); strategy.model = dqn::load_model(256, model_path);
strategy strategy
} }

View file

@ -25,13 +25,13 @@ pythonlib:
trainsimple: trainsimple:
cargo build --release --bin=train_dqn_simple cargo build --release --bin=train_dqn_simple
LD_LIBRARY_PATH=./target/release ./target/release/train_dqn_simple | tee /tmp/train.out LD_LIBRARY_PATH=./target/release ./target/release/train_dqn_simple | tee /tmp/train.out
trainbot: trainbot algo:
#python ./store/python/trainModel.py #python ./store/python/trainModel.py
# cargo run --bin=train_dqn # ok # cargo run --bin=train_dqn # ok
# ./bot/scripts/trainValid.sh # ./bot/scripts/trainValid.sh
./bot/scripts/train.sh ./bot/scripts/train.sh {{algo}}
plottrainbot: plottrainbot algo:
./bot/scripts/train.sh plot ./bot/scripts/train.sh plot {{algo}}
debugtrainbot: debugtrainbot:
cargo build --bin=train_dqn_burn cargo build --bin=train_dqn_burn
RUST_BACKTRACE=1 LD_LIBRARY_PATH=./target/debug ./target/debug/train_dqn_burn RUST_BACKTRACE=1 LD_LIBRARY_PATH=./target/debug ./target/debug/train_dqn_burn

View file

@ -271,7 +271,7 @@ impl Board {
.map(|cells| { .map(|cells| {
cells cells
.into_iter() .into_iter()
.map(|cell| format!("{:>5}", cell)) .map(|cell| format!("{cell:>5}"))
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join("") .join("")
}) })
@ -282,7 +282,7 @@ impl Board {
.map(|cells| { .map(|cells| {
cells cells
.into_iter() .into_iter()
.map(|cell| format!("{:>5}", cell)) .map(|cell| format!("{cell:>5}"))
.collect::<Vec<String>>() .collect::<Vec<String>>()
.join("") .join("")
}) })

View file

@ -244,7 +244,7 @@ impl GameState {
pos_bits.push_str(&white_bits); pos_bits.push_str(&white_bits);
pos_bits.push_str(&black_bits); pos_bits.push_str(&black_bits);
pos_bits = format!("{:0>108}", pos_bits); pos_bits = format!("{pos_bits:0>108}");
// println!("{}", pos_bits); // println!("{}", pos_bits);
let pos_u8 = pos_bits let pos_u8 = pos_bits
.as_bytes() .as_bytes()
@ -647,9 +647,7 @@ impl GameState {
fn inc_roll_count(&mut self, player_id: PlayerId) { fn inc_roll_count(&mut self, player_id: PlayerId) {
self.players.get_mut(&player_id).map(|p| { self.players.get_mut(&player_id).map(|p| {
if p.dice_roll_count < u8::MAX { p.dice_roll_count = p.dice_roll_count.saturating_add(1);
p.dice_roll_count += 1;
}
p p
}); });
} }

View file

@ -603,7 +603,7 @@ mod tests {
); );
let points_rules = PointsRules::new(&Color::Black, &board, Dice { values: (2, 4) }); let points_rules = PointsRules::new(&Color::Black, &board, Dice { values: (2, 4) });
let jans = points_rules.get_result_jans(8); let jans = points_rules.get_result_jans(8);
assert!(jans.0.len() > 0); assert!(!jans.0.is_empty());
} }
#[test] #[test]
@ -628,7 +628,7 @@ mod tests {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, -2,
], ],
); );
let mut rules = PointsRules::new(&Color::Black, &board, Dice { values: (2, 3) }); let rules = PointsRules::new(&Color::Black, &board, Dice { values: (2, 3) });
assert_eq!(12, rules.get_points(5).0); assert_eq!(12, rules.get_points(5).0);
// Battre à vrai une dame située dans la table des grands jans : 2 + 2 = 4 // Battre à vrai une dame située dans la table des grands jans : 2 + 2 = 4