diff --git a/Cargo.lock b/Cargo.lock index d504e2c..3708d45 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -320,6 +320,7 @@ version = "0.1.0" dependencies = [ "burn", "burn-rl", + "confy", "env_logger 0.10.0", "log", "pretty_assertions", @@ -917,7 +918,7 @@ checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81" dependencies = [ "serde", "termcolor", - "unicode-width 0.1.14", + "unicode-width 0.2.0", ] [[package]] @@ -964,6 +965,18 @@ dependencies = [ "crossbeam-utils", ] +[[package]] +name = "confy" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f29222b549d4e3ded127989d523da9e928918d0d0d7f7c1690b439d0d538bae9" +dependencies = [ + "directories", + "serde", + "thiserror 2.0.12", + "toml", +] + [[package]] name = "constant_time_eq" version = "0.1.5" @@ -1524,6 +1537,15 @@ dependencies = [ "subtle", ] +[[package]] +name = "directories" +version = "6.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "16f5094c54661b38d03bd7e50df373292118db60b585c08a411c6d840017fe7d" +dependencies = [ + "dirs-sys 0.5.0", +] + [[package]] name = "dirs" version = "5.0.1" @@ -4784,9 +4806,16 @@ dependencies = [ "serde", "serde_spanned", "toml_datetime", + "toml_write", "winnow", ] +[[package]] +name = "toml_write" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801" + [[package]] name = "torch-sys" version = "0.19.0" diff --git a/bot/Cargo.toml b/bot/Cargo.toml index 2de6307..fe918bd 100644 --- a/bot/Cargo.toml +++ b/bot/Cargo.toml @@ -23,3 +23,4 @@ env_logger = "0.10" burn = { version = "0.17", features = ["ndarray", "autodiff"] } burn-rl = { git = "https://github.com/yunjhongwu/burn-rl-examples.git", package = "burn-rl" } log = "0.4.20" +confy = "1.0.0" diff --git a/bot/src/burnrl/main.rs b/bot/src/burnrl/main.rs index d289dd6..f7608a3 100644 --- a/bot/src/burnrl/main.rs +++ b/bot/src/burnrl/main.rs @@ -17,38 +17,12 @@ fn main() { // let dir_path = &args[2]; let path = format!("bot/models/burnrl_{algo}"); - let conf = Config { - save_path: Some(path.clone()), - num_episodes: 30, // 40 - max_steps: 1000, // 1000 max steps by episode - dense_size: 256, // 128 neural network complexity (default 128) - - gamma: 0.9999, // 0.999 discount factor. Plus élevé = encourage stratégies à long terme - tau: 0.0005, // 0.005 soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation - // plus lente moins sensible aux coups de chance - learning_rate: 0.001, // 0.001 taille du pas. Bas : plus lent, haut : risque de ne jamais - // converger - batch_size: 128, // 32 nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy. - clip_grad: 70.0, // 100 limite max de correction à apporter au gradient (default 100) - - // SAC - min_probability: 1e-9, - - // DQN - eps_start: 0.9, // 0.9 epsilon initial value (0.9 => more exploration) - eps_end: 0.05, // 0.05 - // eps_decay higher = epsilon decrease slower - // used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay); - // epsilon is updated at the start of each episode - eps_decay: 2000.0, // 1000 ? - - // PPO - lambda: 0.95, - epsilon_clip: 0.2, - critic_weight: 0.5, - entropy_weight: 0.01, - epochs: 8, - }; + println!( + "info: loading configuration from file {:?}", + confy::get_configuration_file_path("trictrac_bot", None).unwrap() + ); + let mut conf: Config = confy::load("trictrac_bot", None).expect("Could not load config"); + conf.save_path = Some(path.clone()); println!("{conf}----------"); match algo.as_str() { diff --git a/bot/src/burnrl/utils.rs b/bot/src/burnrl/utils.rs index 21c6cec..9233819 100644 --- a/bot/src/burnrl/utils.rs +++ b/bot/src/burnrl/utils.rs @@ -3,28 +3,39 @@ use burn::nn::Linear; use burn::tensor::backend::Backend; use burn::tensor::Tensor; use burn_rl::base::{Agent, ElemType, Environment}; +use serde::{Deserialize, Serialize}; +#[derive(Serialize, Deserialize)] pub struct Config { pub save_path: Option, - pub max_steps: usize, + pub max_steps: usize, // max steps by episode pub num_episodes: usize, - pub dense_size: usize, + pub dense_size: usize, // neural network complexity + // discount factor. Plus élevé = encourage stratégies à long terme pub gamma: f32, + // soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation plus lente moins sensible aux coups de chance pub tau: f32, + // taille du pas. Bas : plus lent, haut : risque de ne jamais pub learning_rate: f32, + // nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy. pub batch_size: usize, + // limite max de correction à apporter au gradient (default 100) pub clip_grad: f32, - // for SAC + // ---- for SAC pub min_probability: f32, - // for DQN + // ---- for DQN + // epsilon initial value (0.9 => more exploration) pub eps_start: f64, pub eps_end: f64, + // eps_decay higher = epsilon decrease slower + // used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay); + // epsilon is updated at the start of each episode pub eps_decay: f64, - // for PPO + // ---- for PPO pub lambda: f32, pub epsilon_clip: f32, pub critic_weight: f32,