feat: bot training configuration file

This commit is contained in:
Henri Bourcereau 2025-08-22 09:24:01 +02:00
parent 8f41cc1412
commit e1b8d7e679
4 changed files with 53 additions and 38 deletions

View file

@ -23,3 +23,4 @@ env_logger = "0.10"
burn = { version = "0.17", features = ["ndarray", "autodiff"] }
burn-rl = { git = "https://github.com/yunjhongwu/burn-rl-examples.git", package = "burn-rl" }
log = "0.4.20"
confy = "1.0.0"

View file

@ -17,38 +17,12 @@ fn main() {
// let dir_path = &args[2];
let path = format!("bot/models/burnrl_{algo}");
let conf = Config {
save_path: Some(path.clone()),
num_episodes: 30, // 40
max_steps: 1000, // 1000 max steps by episode
dense_size: 256, // 128 neural network complexity (default 128)
gamma: 0.9999, // 0.999 discount factor. Plus élevé = encourage stratégies à long terme
tau: 0.0005, // 0.005 soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation
// plus lente moins sensible aux coups de chance
learning_rate: 0.001, // 0.001 taille du pas. Bas : plus lent, haut : risque de ne jamais
// converger
batch_size: 128, // 32 nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy.
clip_grad: 70.0, // 100 limite max de correction à apporter au gradient (default 100)
// SAC
min_probability: 1e-9,
// DQN
eps_start: 0.9, // 0.9 epsilon initial value (0.9 => more exploration)
eps_end: 0.05, // 0.05
// eps_decay higher = epsilon decrease slower
// used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay);
// epsilon is updated at the start of each episode
eps_decay: 2000.0, // 1000 ?
// PPO
lambda: 0.95,
epsilon_clip: 0.2,
critic_weight: 0.5,
entropy_weight: 0.01,
epochs: 8,
};
println!(
"info: loading configuration from file {:?}",
confy::get_configuration_file_path("trictrac_bot", None).unwrap()
);
let mut conf: Config = confy::load("trictrac_bot", None).expect("Could not load config");
conf.save_path = Some(path.clone());
println!("{conf}----------");
match algo.as_str() {

View file

@ -3,28 +3,39 @@ use burn::nn::Linear;
use burn::tensor::backend::Backend;
use burn::tensor::Tensor;
use burn_rl::base::{Agent, ElemType, Environment};
use serde::{Deserialize, Serialize};
#[derive(Serialize, Deserialize)]
pub struct Config {
pub save_path: Option<String>,
pub max_steps: usize,
pub max_steps: usize, // max steps by episode
pub num_episodes: usize,
pub dense_size: usize,
pub dense_size: usize, // neural network complexity
// discount factor. Plus élevé = encourage stratégies à long terme
pub gamma: f32,
// soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation plus lente moins sensible aux coups de chance
pub tau: f32,
// taille du pas. Bas : plus lent, haut : risque de ne jamais
pub learning_rate: f32,
// nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy.
pub batch_size: usize,
// limite max de correction à apporter au gradient (default 100)
pub clip_grad: f32,
// for SAC
// ---- for SAC
pub min_probability: f32,
// for DQN
// ---- for DQN
// epsilon initial value (0.9 => more exploration)
pub eps_start: f64,
pub eps_end: f64,
// eps_decay higher = epsilon decrease slower
// used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay);
// epsilon is updated at the start of each episode
pub eps_decay: f64,
// for PPO
// ---- for PPO
pub lambda: f32,
pub epsilon_clip: f32,
pub critic_weight: f32,