feat: bot training configuration file
This commit is contained in:
parent
8f41cc1412
commit
e1b8d7e679
31
Cargo.lock
generated
31
Cargo.lock
generated
|
|
@ -320,6 +320,7 @@ version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"burn",
|
"burn",
|
||||||
"burn-rl",
|
"burn-rl",
|
||||||
|
"confy",
|
||||||
"env_logger 0.10.0",
|
"env_logger 0.10.0",
|
||||||
"log",
|
"log",
|
||||||
"pretty_assertions",
|
"pretty_assertions",
|
||||||
|
|
@ -917,7 +918,7 @@ checksum = "fe6d2e5af09e8c8ad56c969f2157a3d4238cebc7c55f0a517728c38f7b200f81"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"termcolor",
|
"termcolor",
|
||||||
"unicode-width 0.1.14",
|
"unicode-width 0.2.0",
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
|
|
@ -964,6 +965,18 @@ dependencies = [
|
||||||
"crossbeam-utils",
|
"crossbeam-utils",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "confy"
|
||||||
|
version = "1.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "f29222b549d4e3ded127989d523da9e928918d0d0d7f7c1690b439d0d538bae9"
|
||||||
|
dependencies = [
|
||||||
|
"directories",
|
||||||
|
"serde",
|
||||||
|
"thiserror 2.0.12",
|
||||||
|
"toml",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "constant_time_eq"
|
name = "constant_time_eq"
|
||||||
version = "0.1.5"
|
version = "0.1.5"
|
||||||
|
|
@ -1524,6 +1537,15 @@ dependencies = [
|
||||||
"subtle",
|
"subtle",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "directories"
|
||||||
|
version = "6.0.0"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "16f5094c54661b38d03bd7e50df373292118db60b585c08a411c6d840017fe7d"
|
||||||
|
dependencies = [
|
||||||
|
"dirs-sys 0.5.0",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "dirs"
|
name = "dirs"
|
||||||
version = "5.0.1"
|
version = "5.0.1"
|
||||||
|
|
@ -4784,9 +4806,16 @@ dependencies = [
|
||||||
"serde",
|
"serde",
|
||||||
"serde_spanned",
|
"serde_spanned",
|
||||||
"toml_datetime",
|
"toml_datetime",
|
||||||
|
"toml_write",
|
||||||
"winnow",
|
"winnow",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "toml_write"
|
||||||
|
version = "0.1.2"
|
||||||
|
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||||
|
checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "torch-sys"
|
name = "torch-sys"
|
||||||
version = "0.19.0"
|
version = "0.19.0"
|
||||||
|
|
|
||||||
|
|
@ -23,3 +23,4 @@ env_logger = "0.10"
|
||||||
burn = { version = "0.17", features = ["ndarray", "autodiff"] }
|
burn = { version = "0.17", features = ["ndarray", "autodiff"] }
|
||||||
burn-rl = { git = "https://github.com/yunjhongwu/burn-rl-examples.git", package = "burn-rl" }
|
burn-rl = { git = "https://github.com/yunjhongwu/burn-rl-examples.git", package = "burn-rl" }
|
||||||
log = "0.4.20"
|
log = "0.4.20"
|
||||||
|
confy = "1.0.0"
|
||||||
|
|
|
||||||
|
|
@ -17,38 +17,12 @@ fn main() {
|
||||||
// let dir_path = &args[2];
|
// let dir_path = &args[2];
|
||||||
|
|
||||||
let path = format!("bot/models/burnrl_{algo}");
|
let path = format!("bot/models/burnrl_{algo}");
|
||||||
let conf = Config {
|
println!(
|
||||||
save_path: Some(path.clone()),
|
"info: loading configuration from file {:?}",
|
||||||
num_episodes: 30, // 40
|
confy::get_configuration_file_path("trictrac_bot", None).unwrap()
|
||||||
max_steps: 1000, // 1000 max steps by episode
|
);
|
||||||
dense_size: 256, // 128 neural network complexity (default 128)
|
let mut conf: Config = confy::load("trictrac_bot", None).expect("Could not load config");
|
||||||
|
conf.save_path = Some(path.clone());
|
||||||
gamma: 0.9999, // 0.999 discount factor. Plus élevé = encourage stratégies à long terme
|
|
||||||
tau: 0.0005, // 0.005 soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation
|
|
||||||
// plus lente moins sensible aux coups de chance
|
|
||||||
learning_rate: 0.001, // 0.001 taille du pas. Bas : plus lent, haut : risque de ne jamais
|
|
||||||
// converger
|
|
||||||
batch_size: 128, // 32 nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy.
|
|
||||||
clip_grad: 70.0, // 100 limite max de correction à apporter au gradient (default 100)
|
|
||||||
|
|
||||||
// SAC
|
|
||||||
min_probability: 1e-9,
|
|
||||||
|
|
||||||
// DQN
|
|
||||||
eps_start: 0.9, // 0.9 epsilon initial value (0.9 => more exploration)
|
|
||||||
eps_end: 0.05, // 0.05
|
|
||||||
// eps_decay higher = epsilon decrease slower
|
|
||||||
// used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay);
|
|
||||||
// epsilon is updated at the start of each episode
|
|
||||||
eps_decay: 2000.0, // 1000 ?
|
|
||||||
|
|
||||||
// PPO
|
|
||||||
lambda: 0.95,
|
|
||||||
epsilon_clip: 0.2,
|
|
||||||
critic_weight: 0.5,
|
|
||||||
entropy_weight: 0.01,
|
|
||||||
epochs: 8,
|
|
||||||
};
|
|
||||||
println!("{conf}----------");
|
println!("{conf}----------");
|
||||||
|
|
||||||
match algo.as_str() {
|
match algo.as_str() {
|
||||||
|
|
|
||||||
|
|
@ -3,28 +3,39 @@ use burn::nn::Linear;
|
||||||
use burn::tensor::backend::Backend;
|
use burn::tensor::backend::Backend;
|
||||||
use burn::tensor::Tensor;
|
use burn::tensor::Tensor;
|
||||||
use burn_rl::base::{Agent, ElemType, Environment};
|
use burn_rl::base::{Agent, ElemType, Environment};
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
#[derive(Serialize, Deserialize)]
|
||||||
pub struct Config {
|
pub struct Config {
|
||||||
pub save_path: Option<String>,
|
pub save_path: Option<String>,
|
||||||
pub max_steps: usize,
|
pub max_steps: usize, // max steps by episode
|
||||||
pub num_episodes: usize,
|
pub num_episodes: usize,
|
||||||
pub dense_size: usize,
|
pub dense_size: usize, // neural network complexity
|
||||||
|
|
||||||
|
// discount factor. Plus élevé = encourage stratégies à long terme
|
||||||
pub gamma: f32,
|
pub gamma: f32,
|
||||||
|
// soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation plus lente moins sensible aux coups de chance
|
||||||
pub tau: f32,
|
pub tau: f32,
|
||||||
|
// taille du pas. Bas : plus lent, haut : risque de ne jamais
|
||||||
pub learning_rate: f32,
|
pub learning_rate: f32,
|
||||||
|
// nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy.
|
||||||
pub batch_size: usize,
|
pub batch_size: usize,
|
||||||
|
// limite max de correction à apporter au gradient (default 100)
|
||||||
pub clip_grad: f32,
|
pub clip_grad: f32,
|
||||||
|
|
||||||
// for SAC
|
// ---- for SAC
|
||||||
pub min_probability: f32,
|
pub min_probability: f32,
|
||||||
|
|
||||||
// for DQN
|
// ---- for DQN
|
||||||
|
// epsilon initial value (0.9 => more exploration)
|
||||||
pub eps_start: f64,
|
pub eps_start: f64,
|
||||||
pub eps_end: f64,
|
pub eps_end: f64,
|
||||||
|
// eps_decay higher = epsilon decrease slower
|
||||||
|
// used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay);
|
||||||
|
// epsilon is updated at the start of each episode
|
||||||
pub eps_decay: f64,
|
pub eps_decay: f64,
|
||||||
|
|
||||||
// for PPO
|
// ---- for PPO
|
||||||
pub lambda: f32,
|
pub lambda: f32,
|
||||||
pub epsilon_clip: f32,
|
pub epsilon_clip: f32,
|
||||||
pub critic_weight: f32,
|
pub critic_weight: f32,
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue