trictrac/bot/src/dqn/burnrl/main.rs

54 lines
2.2 KiB
Rust
Raw Normal View History

2025-08-08 17:07:34 +02:00
use bot::dqn::burnrl::{
dqn_model, environment,
utils::{demo_model, load_model, save_model},
};
use burn::backend::{Autodiff, NdArray};
2025-07-23 21:16:28 +02:00
use burn_rl::agent::DQN;
2025-08-08 17:07:34 +02:00
use burn_rl::base::ElemType;
2025-07-08 21:58:15 +02:00
type Backend = Autodiff<NdArray<ElemType>>;
type Env = environment::TrictracEnvironment;
fn main() {
2025-08-03 20:32:06 +02:00
// println!("> Entraînement");
2025-08-10 15:32:41 +02:00
// See also MEMORY_SIZE in dqn_model.rs : 8192
let conf = dqn_model::DqnConfig {
2025-08-10 17:45:53 +02:00
// defaults
num_episodes: 40, // 40
min_steps: 500.0, // 1000 min of max steps by episode (mise à jour par la fonction)
max_steps: 3000, // 1000 max steps by episode
dense_size: 256, // 128 neural network complexity (default 128)
eps_start: 0.9, // 0.9 epsilon initial value (0.9 => more exploration)
eps_end: 0.05, // 0.05
2025-08-10 15:32:41 +02:00
// eps_decay higher = epsilon decrease slower
// used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay);
// epsilon is updated at the start of each episode
2025-08-10 17:45:53 +02:00
eps_decay: 2000.0, // 1000 ?
2025-08-10 15:32:41 +02:00
2025-08-10 17:45:53 +02:00
gamma: 0.999, // 0.999 discount factor. Plus élevé = encourage stratégies à long terme
tau: 0.005, // 0.005 soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation
2025-08-10 15:32:41 +02:00
// plus lente moins sensible aux coups de chance
2025-08-10 17:45:53 +02:00
learning_rate: 0.001, // 0.001 taille du pas. Bas : plus lent, haut : risque de ne jamais
2025-08-10 15:32:41 +02:00
// converger
2025-08-10 17:45:53 +02:00
batch_size: 32, // 32 nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy.
clip_grad: 100.0, // 100 limite max de correction à apporter au gradient (default 100)
};
2025-08-10 15:32:41 +02:00
println!("{conf}----------");
let agent = dqn_model::run::<Env, Backend>(&conf, false); //true);
2025-07-08 21:58:15 +02:00
2025-07-23 21:16:28 +02:00
let valid_agent = agent.valid();
2025-07-23 21:28:29 +02:00
println!("> Sauvegarde du modèle de validation");
2025-08-08 17:07:34 +02:00
let path = "models/burn_dqn_40".to_string();
2025-07-26 09:37:54 +02:00
save_model(valid_agent.model().as_ref().unwrap(), &path);
2025-07-23 21:52:32 +02:00
println!("> Chargement du modèle pour test");
let loaded_model = load_model(conf.dense_size, &path);
2025-08-08 21:31:38 +02:00
let loaded_agent = DQN::new(loaded_model.unwrap());
2025-07-23 21:52:32 +02:00
println!("> Test avec le modèle chargé");
2025-07-26 09:37:54 +02:00
demo_model(loaded_agent);
2025-07-23 21:16:28 +02:00
}