refact dqn simple
This commit is contained in:
parent
1b58ca4ccc
commit
a19c5d8596
|
|
@ -10,8 +10,8 @@ name = "train_dqn_burn"
|
||||||
path = "src/dqn/burnrl/main.rs"
|
path = "src/dqn/burnrl/main.rs"
|
||||||
|
|
||||||
[[bin]]
|
[[bin]]
|
||||||
name = "train_dqn"
|
name = "train_dqn_simple"
|
||||||
path = "src/bin/train_dqn.rs"
|
path = "src/dqn/simple/main.rs"
|
||||||
|
|
||||||
[dependencies]
|
[dependencies]
|
||||||
pretty_assertions = "1.4.0"
|
pretty_assertions = "1.4.0"
|
||||||
|
|
|
||||||
|
|
@ -106,157 +106,6 @@ impl TrictracAction {
|
||||||
// }
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Configuration pour l'agent DQN
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct DqnConfig {
|
|
||||||
pub state_size: usize,
|
|
||||||
pub hidden_size: usize,
|
|
||||||
pub num_actions: usize,
|
|
||||||
pub learning_rate: f64,
|
|
||||||
pub gamma: f64,
|
|
||||||
pub epsilon: f64,
|
|
||||||
pub epsilon_decay: f64,
|
|
||||||
pub epsilon_min: f64,
|
|
||||||
pub replay_buffer_size: usize,
|
|
||||||
pub batch_size: usize,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for DqnConfig {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self {
|
|
||||||
state_size: 36,
|
|
||||||
hidden_size: 512, // Augmenter la taille pour gérer l'espace d'actions élargi
|
|
||||||
num_actions: TrictracAction::action_space_size(),
|
|
||||||
learning_rate: 0.001,
|
|
||||||
gamma: 0.99,
|
|
||||||
epsilon: 0.1,
|
|
||||||
epsilon_decay: 0.995,
|
|
||||||
epsilon_min: 0.01,
|
|
||||||
replay_buffer_size: 10000,
|
|
||||||
batch_size: 32,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Réseau de neurones DQN simplifié (matrice de poids basique)
|
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
|
||||||
pub struct SimpleNeuralNetwork {
|
|
||||||
pub weights1: Vec<Vec<f32>>,
|
|
||||||
pub biases1: Vec<f32>,
|
|
||||||
pub weights2: Vec<Vec<f32>>,
|
|
||||||
pub biases2: Vec<f32>,
|
|
||||||
pub weights3: Vec<Vec<f32>>,
|
|
||||||
pub biases3: Vec<f32>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl SimpleNeuralNetwork {
|
|
||||||
pub fn new(input_size: usize, hidden_size: usize, output_size: usize) -> Self {
|
|
||||||
use rand::{thread_rng, Rng};
|
|
||||||
let mut rng = thread_rng();
|
|
||||||
|
|
||||||
// Initialisation aléatoire des poids avec Xavier/Glorot
|
|
||||||
let scale1 = (2.0 / input_size as f32).sqrt();
|
|
||||||
let weights1 = (0..hidden_size)
|
|
||||||
.map(|_| {
|
|
||||||
(0..input_size)
|
|
||||||
.map(|_| rng.gen_range(-scale1..scale1))
|
|
||||||
.collect()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let biases1 = vec![0.0; hidden_size];
|
|
||||||
|
|
||||||
let scale2 = (2.0 / hidden_size as f32).sqrt();
|
|
||||||
let weights2 = (0..hidden_size)
|
|
||||||
.map(|_| {
|
|
||||||
(0..hidden_size)
|
|
||||||
.map(|_| rng.gen_range(-scale2..scale2))
|
|
||||||
.collect()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let biases2 = vec![0.0; hidden_size];
|
|
||||||
|
|
||||||
let scale3 = (2.0 / hidden_size as f32).sqrt();
|
|
||||||
let weights3 = (0..output_size)
|
|
||||||
.map(|_| {
|
|
||||||
(0..hidden_size)
|
|
||||||
.map(|_| rng.gen_range(-scale3..scale3))
|
|
||||||
.collect()
|
|
||||||
})
|
|
||||||
.collect();
|
|
||||||
let biases3 = vec![0.0; output_size];
|
|
||||||
|
|
||||||
Self {
|
|
||||||
weights1,
|
|
||||||
biases1,
|
|
||||||
weights2,
|
|
||||||
biases2,
|
|
||||||
weights3,
|
|
||||||
biases3,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn forward(&self, input: &[f32]) -> Vec<f32> {
|
|
||||||
// Première couche
|
|
||||||
let mut layer1: Vec<f32> = self.biases1.clone();
|
|
||||||
for (i, neuron_weights) in self.weights1.iter().enumerate() {
|
|
||||||
for (j, &weight) in neuron_weights.iter().enumerate() {
|
|
||||||
if j < input.len() {
|
|
||||||
layer1[i] += input[j] * weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer1[i] = layer1[i].max(0.0); // ReLU
|
|
||||||
}
|
|
||||||
|
|
||||||
// Deuxième couche
|
|
||||||
let mut layer2: Vec<f32> = self.biases2.clone();
|
|
||||||
for (i, neuron_weights) in self.weights2.iter().enumerate() {
|
|
||||||
for (j, &weight) in neuron_weights.iter().enumerate() {
|
|
||||||
if j < layer1.len() {
|
|
||||||
layer2[i] += layer1[j] * weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
layer2[i] = layer2[i].max(0.0); // ReLU
|
|
||||||
}
|
|
||||||
|
|
||||||
// Couche de sortie
|
|
||||||
let mut output: Vec<f32> = self.biases3.clone();
|
|
||||||
for (i, neuron_weights) in self.weights3.iter().enumerate() {
|
|
||||||
for (j, &weight) in neuron_weights.iter().enumerate() {
|
|
||||||
if j < layer2.len() {
|
|
||||||
output[i] += layer2[j] * weight;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
output
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn get_best_action(&self, input: &[f32]) -> usize {
|
|
||||||
let q_values = self.forward(input);
|
|
||||||
q_values
|
|
||||||
.iter()
|
|
||||||
.enumerate()
|
|
||||||
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
|
|
||||||
.map(|(index, _)| index)
|
|
||||||
.unwrap_or(0)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn save<P: AsRef<std::path::Path>>(
|
|
||||||
&self,
|
|
||||||
path: P,
|
|
||||||
) -> Result<(), Box<dyn std::error::Error>> {
|
|
||||||
let data = serde_json::to_string_pretty(self)?;
|
|
||||||
std::fs::write(path, data)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn load<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
|
|
||||||
let data = std::fs::read_to_string(path)?;
|
|
||||||
let network = serde_json::from_str(&data)?;
|
|
||||||
Ok(network)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Obtient les actions valides pour l'état de jeu actuel
|
/// Obtient les actions valides pour l'état de jeu actuel
|
||||||
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||||
use store::TurnStage;
|
use store::TurnStage;
|
||||||
|
|
|
||||||
154
bot/src/dqn/simple/dqn_model.rs
Normal file
154
bot/src/dqn/simple/dqn_model.rs
Normal file
|
|
@ -0,0 +1,154 @@
|
||||||
|
use crate::dqn::dqn_common::TrictracAction;
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// Configuration pour l'agent DQN
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct DqnConfig {
|
||||||
|
pub state_size: usize,
|
||||||
|
pub hidden_size: usize,
|
||||||
|
pub num_actions: usize,
|
||||||
|
pub learning_rate: f64,
|
||||||
|
pub gamma: f64,
|
||||||
|
pub epsilon: f64,
|
||||||
|
pub epsilon_decay: f64,
|
||||||
|
pub epsilon_min: f64,
|
||||||
|
pub replay_buffer_size: usize,
|
||||||
|
pub batch_size: usize,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl Default for DqnConfig {
|
||||||
|
fn default() -> Self {
|
||||||
|
Self {
|
||||||
|
state_size: 36,
|
||||||
|
hidden_size: 512, // Augmenter la taille pour gérer l'espace d'actions élargi
|
||||||
|
num_actions: TrictracAction::action_space_size(),
|
||||||
|
learning_rate: 0.001,
|
||||||
|
gamma: 0.99,
|
||||||
|
epsilon: 0.1,
|
||||||
|
epsilon_decay: 0.995,
|
||||||
|
epsilon_min: 0.01,
|
||||||
|
replay_buffer_size: 10000,
|
||||||
|
batch_size: 32,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// Réseau de neurones DQN simplifié (matrice de poids basique)
|
||||||
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
pub struct SimpleNeuralNetwork {
|
||||||
|
pub weights1: Vec<Vec<f32>>,
|
||||||
|
pub biases1: Vec<f32>,
|
||||||
|
pub weights2: Vec<Vec<f32>>,
|
||||||
|
pub biases2: Vec<f32>,
|
||||||
|
pub weights3: Vec<Vec<f32>>,
|
||||||
|
pub biases3: Vec<f32>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SimpleNeuralNetwork {
|
||||||
|
pub fn new(input_size: usize, hidden_size: usize, output_size: usize) -> Self {
|
||||||
|
use rand::{thread_rng, Rng};
|
||||||
|
let mut rng = thread_rng();
|
||||||
|
|
||||||
|
// Initialisation aléatoire des poids avec Xavier/Glorot
|
||||||
|
let scale1 = (2.0 / input_size as f32).sqrt();
|
||||||
|
let weights1 = (0..hidden_size)
|
||||||
|
.map(|_| {
|
||||||
|
(0..input_size)
|
||||||
|
.map(|_| rng.gen_range(-scale1..scale1))
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let biases1 = vec![0.0; hidden_size];
|
||||||
|
|
||||||
|
let scale2 = (2.0 / hidden_size as f32).sqrt();
|
||||||
|
let weights2 = (0..hidden_size)
|
||||||
|
.map(|_| {
|
||||||
|
(0..hidden_size)
|
||||||
|
.map(|_| rng.gen_range(-scale2..scale2))
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let biases2 = vec![0.0; hidden_size];
|
||||||
|
|
||||||
|
let scale3 = (2.0 / hidden_size as f32).sqrt();
|
||||||
|
let weights3 = (0..output_size)
|
||||||
|
.map(|_| {
|
||||||
|
(0..hidden_size)
|
||||||
|
.map(|_| rng.gen_range(-scale3..scale3))
|
||||||
|
.collect()
|
||||||
|
})
|
||||||
|
.collect();
|
||||||
|
let biases3 = vec![0.0; output_size];
|
||||||
|
|
||||||
|
Self {
|
||||||
|
weights1,
|
||||||
|
biases1,
|
||||||
|
weights2,
|
||||||
|
biases2,
|
||||||
|
weights3,
|
||||||
|
biases3,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn forward(&self, input: &[f32]) -> Vec<f32> {
|
||||||
|
// Première couche
|
||||||
|
let mut layer1: Vec<f32> = self.biases1.clone();
|
||||||
|
for (i, neuron_weights) in self.weights1.iter().enumerate() {
|
||||||
|
for (j, &weight) in neuron_weights.iter().enumerate() {
|
||||||
|
if j < input.len() {
|
||||||
|
layer1[i] += input[j] * weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
layer1[i] = layer1[i].max(0.0); // ReLU
|
||||||
|
}
|
||||||
|
|
||||||
|
// Deuxième couche
|
||||||
|
let mut layer2: Vec<f32> = self.biases2.clone();
|
||||||
|
for (i, neuron_weights) in self.weights2.iter().enumerate() {
|
||||||
|
for (j, &weight) in neuron_weights.iter().enumerate() {
|
||||||
|
if j < layer1.len() {
|
||||||
|
layer2[i] += layer1[j] * weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
layer2[i] = layer2[i].max(0.0); // ReLU
|
||||||
|
}
|
||||||
|
|
||||||
|
// Couche de sortie
|
||||||
|
let mut output: Vec<f32> = self.biases3.clone();
|
||||||
|
for (i, neuron_weights) in self.weights3.iter().enumerate() {
|
||||||
|
for (j, &weight) in neuron_weights.iter().enumerate() {
|
||||||
|
if j < layer2.len() {
|
||||||
|
output[i] += layer2[j] * weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
output
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn get_best_action(&self, input: &[f32]) -> usize {
|
||||||
|
let q_values = self.forward(input);
|
||||||
|
q_values
|
||||||
|
.iter()
|
||||||
|
.enumerate()
|
||||||
|
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
|
||||||
|
.map(|(index, _)| index)
|
||||||
|
.unwrap_or(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn save<P: AsRef<std::path::Path>>(
|
||||||
|
&self,
|
||||||
|
path: P,
|
||||||
|
) -> Result<(), Box<dyn std::error::Error>> {
|
||||||
|
let data = serde_json::to_string_pretty(self)?;
|
||||||
|
std::fs::write(path, data)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn load<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
|
||||||
|
let data = std::fs::read_to_string(path)?;
|
||||||
|
let network = serde_json::from_str(&data)?;
|
||||||
|
Ok(network)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
@ -5,7 +5,8 @@ use serde::{Deserialize, Serialize};
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage};
|
use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage};
|
||||||
|
|
||||||
use crate::dqn::dqn_common::{get_valid_actions, DqnConfig, SimpleNeuralNetwork, TrictracAction};
|
use super::dqn_model::{DqnConfig, SimpleNeuralNetwork};
|
||||||
|
use crate::dqn::dqn_common::{get_valid_actions, TrictracAction};
|
||||||
|
|
||||||
/// Expérience pour le buffer de replay
|
/// Expérience pour le buffer de replay
|
||||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
use bot::dqn::dqn_common::{DqnConfig, TrictracAction};
|
use bot::dqn::dqn_common::TrictracAction;
|
||||||
|
use bot::dqn::simple::dqn_model::DqnConfig;
|
||||||
use bot::dqn::simple::dqn_trainer::DqnTrainer;
|
use bot::dqn::simple::dqn_trainer::DqnTrainer;
|
||||||
use std::env;
|
use std::env;
|
||||||
|
|
||||||
|
|
@ -1 +1,2 @@
|
||||||
|
pub mod dqn_model;
|
||||||
pub mod dqn_trainer;
|
pub mod dqn_trainer;
|
||||||
|
|
|
||||||
|
|
@ -3,9 +3,8 @@ use log::info;
|
||||||
use std::path::Path;
|
use std::path::Path;
|
||||||
use store::MoveRules;
|
use store::MoveRules;
|
||||||
|
|
||||||
use crate::dqn::dqn_common::{
|
use crate::dqn::dqn_common::{get_valid_actions, sample_valid_action, TrictracAction};
|
||||||
get_valid_actions, sample_valid_action, SimpleNeuralNetwork, TrictracAction,
|
use crate::dqn::simple::dqn_model::SimpleNeuralNetwork;
|
||||||
};
|
|
||||||
|
|
||||||
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
|
||||||
4
justfile
4
justfile
|
|
@ -22,8 +22,8 @@ pythonlib:
|
||||||
maturin build -m store/Cargo.toml --release
|
maturin build -m store/Cargo.toml --release
|
||||||
pip install --no-deps --force-reinstall --prefix .devenv/state/venv target/wheels/*.whl
|
pip install --no-deps --force-reinstall --prefix .devenv/state/venv target/wheels/*.whl
|
||||||
trainsimple:
|
trainsimple:
|
||||||
cargo build --release --bin=train_dqn
|
cargo build --release --bin=train_dqn_simple
|
||||||
LD_LIBRARY_PATH=./target/release ./target/release/train_dqn | tee /tmp/train.out
|
LD_LIBRARY_PATH=./target/release ./target/release/train_dqn_simple | tee /tmp/train.out
|
||||||
trainbot:
|
trainbot:
|
||||||
#python ./store/python/trainModel.py
|
#python ./store/python/trainModel.py
|
||||||
# cargo run --bin=train_dqn # ok
|
# cargo run --bin=train_dqn # ok
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue