Compare commits
159 commits
44c040b414
...
2ef1f7ee50
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
2ef1f7ee50 | ||
|
|
73cc6ee67e | ||
|
|
f2a89f60bc | ||
|
|
866ba611a6 | ||
|
|
e1b8d7e679 | ||
|
|
8f41cc1412 | ||
|
|
0c58490f87 | ||
|
|
afeb3561e0 | ||
|
|
18e85744d6 | ||
|
|
97167ff389 | ||
|
|
088124fad1 | ||
|
|
fcd50bc0f2 | ||
|
|
e66921fcce | ||
|
|
2499c3377f | ||
|
|
a7aa087b18 | ||
|
|
1dc29d0ff0 | ||
|
|
db9560dfac | ||
|
|
47a8502b63 | ||
|
|
c1e99a5f35 | ||
|
|
56d155b911 | ||
|
|
d313cb6151 | ||
|
|
93624c425d | ||
|
|
86a67ae66a | ||
|
|
ac14341cf9 | ||
|
|
cfc19e6064 | ||
|
|
ec6ae26d38 | ||
|
|
5370eb4307 | ||
|
|
bfd2a4ed47 | ||
|
|
4353ba2bd1 | ||
|
|
1fb04209f5 | ||
|
|
778ac1817b | ||
|
|
e4b3092018 | ||
|
|
5b02293221 | ||
|
|
17d29b8633 | ||
|
|
a19c5d8596 | ||
|
|
1b58ca4ccc | ||
|
|
bf820ecc4e | ||
|
|
b02ce8d185 | ||
|
|
dc80243a1a | ||
|
|
12004ec4f3 | ||
|
|
fa9c02084a | ||
|
|
fc9733b729 | ||
|
|
744a70cf1d | ||
|
|
c0d42a0c45 | ||
|
|
28c2aa836f | ||
|
|
ad5ae17168 | ||
|
|
2e0a874879 | ||
|
|
ad58c0ec60 | ||
|
|
fd269b491d | ||
|
|
3e1775428d | ||
|
|
cb30fd3229 | ||
|
|
b92c9eb7ff | ||
|
|
1e18b784d1 | ||
|
|
f3fc053dbd | ||
|
|
6fa8a31cc7 | ||
|
|
c6d33555ec | ||
|
|
354dcfd341 | ||
|
|
b98a135749 | ||
|
|
6a7b1cbebc | ||
|
|
f05094b2d4 | ||
|
|
cf93255f03 | ||
|
|
a06b47628e | ||
|
|
cf1175e497 | ||
|
|
dcd97d1df1 | ||
|
|
5b133cfe0a | ||
|
|
dc197fbc6f | ||
|
|
7507ea5d78 | ||
|
|
bae0632f82 | ||
|
|
ebe98ca229 | ||
|
|
f7eea0ed02 | ||
|
|
a2e54bc449 | ||
|
|
ab959fa27b | ||
|
|
480b2ff427 | ||
|
|
3d01e8fe06 | ||
|
|
4fd1f00af0 | ||
|
|
27fc08c47d | ||
|
|
ab770f3a34 | ||
|
|
899a690869 | ||
|
|
8368b0d837 | ||
|
|
12f53d00ca | ||
|
|
883d799edb | ||
|
|
59c80c66e4 | ||
|
|
52e400ca4b | ||
|
|
33e77ff095 | ||
|
|
6478f5043d | ||
|
|
38100a61b2 | ||
|
|
e95b25a9bc | ||
|
|
0edcb3e810 | ||
|
|
53f95c0fe4 | ||
|
|
ff5ff74282 | ||
|
|
a3bcdb8912 | ||
|
|
447ec8cc58 | ||
|
|
2fe2f66c13 | ||
|
|
71992ccf7c | ||
|
|
14021ff09b | ||
|
|
e9f4940c40 | ||
|
|
37cc50ed2a | ||
|
|
25acc86059 | ||
|
|
5762187b04 | ||
|
|
4ee2b02a41 | ||
|
|
5d4401a1fe | ||
|
|
94021137a3 | ||
|
|
7848bfcbca | ||
|
|
acab0b0593 | ||
|
|
c5321e6186 | ||
|
|
fbd5976d88 | ||
|
|
82e34e78af | ||
|
|
b624629cad | ||
|
|
fb3e2678fc | ||
|
|
5741a6894b | ||
|
|
18bd87e68f | ||
|
|
fc58768006 | ||
|
|
3d41517909 | ||
|
|
17605efe76 | ||
|
|
08fd067a95 | ||
|
|
4c17148315 | ||
|
|
4f118319ed | ||
|
|
6d785dcb94 | ||
|
|
a3278cbe1a | ||
|
|
5b1e46826c | ||
|
|
8be3de3ede | ||
|
|
5fd11f7ef2 | ||
|
|
90c3db3ba7 | ||
|
|
41b07d5ce2 | ||
|
|
97f8678645 | ||
|
|
41f8b0ac6e | ||
|
|
fb5e954b85 | ||
|
|
bfd58cc399 | ||
|
|
1af5694b71 | ||
|
|
310d96f3bc | ||
|
|
6be25f699c | ||
|
|
a2b700db7c | ||
|
|
9bf5cedfa0 | ||
|
|
0df394c0b1 | ||
|
|
b528fa3ac6 | ||
|
|
152d2673f7 | ||
|
|
4b3bae7caf | ||
|
|
b9f381c970 | ||
|
|
b7255e9d16 | ||
|
|
07ec82ae7c | ||
|
|
aff489f134 | ||
|
|
7790c07dcc | ||
|
|
e43a742c1e | ||
|
|
86220f6408 | ||
|
|
3c3c6d8458 | ||
|
|
84b5ab6a5f | ||
|
|
104200888c | ||
|
|
a62e91a233 | ||
|
|
33ad73103b | ||
|
|
7d3124030f | ||
|
|
50a4038aa6 | ||
|
|
2139de2fcd | ||
|
|
6a0dc9395a | ||
|
|
6ceefe01ab | ||
|
|
dff9f7f3e1 | ||
|
|
f61d2044f8 | ||
|
|
24ddcce233 | ||
|
|
80d4c256c0 | ||
|
|
a886526fcf |
9
.gitignore
vendored
9
.gitignore
vendored
|
|
@ -1,13 +1,14 @@
|
|||
# Generated by Cargo
|
||||
# will have compiled files and executables
|
||||
/target/
|
||||
/target
|
||||
|
||||
# These are backup files generated by rustfmt
|
||||
**/*.rs.bk
|
||||
|
||||
.devenv*
|
||||
devenv.local.nix
|
||||
|
||||
|
||||
# Added by cargo
|
||||
|
||||
/target
|
||||
# generated by samply rust profiler
|
||||
profile.json
|
||||
bot/models
|
||||
|
|
|
|||
|
|
@ -1 +0,0 @@
|
|||
/nix/store/qp1vc91wm7s7vz337hpv6zimpc4gcvf0-pre-commit-config.json
|
||||
6336
Cargo.lock
generated
6336
Cargo.lock
generated
File diff suppressed because it is too large
Load diff
|
|
@ -1,10 +1,4 @@
|
|||
[workspace]
|
||||
resolver = "2"
|
||||
|
||||
members = [
|
||||
"client",
|
||||
"client_tui",
|
||||
"client_cli",
|
||||
"server",
|
||||
"store"
|
||||
]
|
||||
members = ["client_tui", "client_cli", "bot", "server", "store"]
|
||||
|
|
|
|||
11
Makefile
11
Makefile
|
|
@ -1,11 +0,0 @@
|
|||
shell:
|
||||
devenv shell
|
||||
# nix develop
|
||||
startserver:
|
||||
RUST_LOG=trictrac_server cargo run --bin trictrac-server
|
||||
startclient1:
|
||||
RUST_LOG=trictrac_client cargo run --bin=trictrac-client Titi
|
||||
startclient2:
|
||||
RUST_LOG=trictrac_client cargo run --bin=trictrac-client Titu
|
||||
startclienttui:
|
||||
RUST_LOG=trictrac_client cargo run --bin=client_tui Tutu
|
||||
28
bot/Cargo.toml
Normal file
28
bot/Cargo.toml
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
[package]
|
||||
name = "bot"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[[bin]]
|
||||
name = "burn_train"
|
||||
path = "src/burnrl/main.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "train_dqn_simple"
|
||||
path = "src/dqn_simple/main.rs"
|
||||
|
||||
[dependencies]
|
||||
pretty_assertions = "1.4.0"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
serde_json = "1.0"
|
||||
store = { path = "../store" }
|
||||
rand = "0.8"
|
||||
env_logger = "0.10"
|
||||
burn = { version = "0.17", features = ["ndarray", "autodiff"] }
|
||||
burn-rl = { git = "https://github.com/yunjhongwu/burn-rl-examples.git", package = "burn-rl" }
|
||||
log = "0.4.20"
|
||||
confy = "1.0.0"
|
||||
board-game = "0.8.2"
|
||||
internal-iterator = "0.2.3"
|
||||
50
bot/scripts/train.sh
Executable file
50
bot/scripts/train.sh
Executable file
|
|
@ -0,0 +1,50 @@
|
|||
#!/usr/bin/env bash
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")" && pwd)/../.."
|
||||
LOGS_DIR="$ROOT/bot/models/logs"
|
||||
|
||||
CFG_SIZE=17
|
||||
BINBOT=burn_train
|
||||
# BINBOT=train_ppo_burn
|
||||
# BINBOT=train_dqn_burn
|
||||
# BINBOT=train_dqn_burn_big
|
||||
# BINBOT=train_dqn_burn_before
|
||||
OPPONENT="random"
|
||||
|
||||
PLOT_EXT="png"
|
||||
|
||||
train() {
|
||||
ALGO=$1
|
||||
cargo build --release --bin=$BINBOT
|
||||
NAME="$(date +%Y-%m-%d_%H:%M:%S)"
|
||||
LOGS="$LOGS_DIR/$ALGO/$NAME.out"
|
||||
mkdir -p "$LOGS_DIR/$ALGO"
|
||||
LD_LIBRARY_PATH="$ROOT/target/release" "$ROOT/target/release/$BINBOT" $ALGO | tee "$LOGS"
|
||||
}
|
||||
|
||||
plot() {
|
||||
ALGO=$1
|
||||
NAME=$(ls -rt "$LOGS_DIR/$ALGO" | grep -v png | tail -n 1)
|
||||
LOGS="$LOGS_DIR/$ALGO/$NAME"
|
||||
cfgs=$(grep -v "info:" "$LOGS" | head -n $CFG_SIZE)
|
||||
for cfg in $cfgs; do
|
||||
eval "$cfg"
|
||||
done
|
||||
|
||||
tail -n +$((CFG_SIZE + 2)) "$LOGS" |
|
||||
grep -v "info:" |
|
||||
awk -F '[ ,]' '{print $5}' |
|
||||
feedgnuplot --lines --points --unset grid --title "adv = $OPPONENT ; density = $dense_size ; decay = $eps_decay ; max steps = $max_steps" --terminal $PLOT_EXT >"$LOGS_DIR/$ALGO/$NAME.$PLOT_EXT"
|
||||
}
|
||||
|
||||
if [[ -z "$1" ]]; then
|
||||
echo "Usage : train [plot] <algo>"
|
||||
elif [ "$1" = "plot" ]; then
|
||||
if [[ -z "$2" ]]; then
|
||||
echo "Usage : train [plot] <algo>"
|
||||
else
|
||||
plot $2
|
||||
fi
|
||||
else
|
||||
train $1
|
||||
fi
|
||||
49
bot/scripts/trainValid.sh
Executable file
49
bot/scripts/trainValid.sh
Executable file
|
|
@ -0,0 +1,49 @@
|
|||
#!/usr/bin/env sh
|
||||
|
||||
ROOT="$(cd "$(dirname "$0")" && pwd)/../.."
|
||||
LOGS_DIR="$ROOT/bot/models/logs"
|
||||
|
||||
CFG_SIZE=11
|
||||
OPPONENT="random"
|
||||
|
||||
PLOT_EXT="png"
|
||||
|
||||
train() {
|
||||
cargo build --release --bin=train_dqn_burn_valid
|
||||
NAME="trainValid_$(date +%Y-%m-%d_%H:%M:%S)"
|
||||
LOGS="$LOGS_DIR/$NAME.out"
|
||||
mkdir -p "$LOGS_DIR"
|
||||
LD_LIBRARY_PATH="$ROOT/target/release" "$ROOT/target/release/train_dqn_burn_valid" | tee "$LOGS"
|
||||
}
|
||||
|
||||
plot() {
|
||||
NAME=$(ls -rt "$LOGS_DIR" | grep -v "png" | tail -n 1)
|
||||
LOGS="$LOGS_DIR/$NAME"
|
||||
cfgs=$(head -n $CFG_SIZE "$LOGS")
|
||||
for cfg in $cfgs; do
|
||||
eval "$cfg"
|
||||
done
|
||||
|
||||
# tail -n +$((CFG_SIZE + 2)) "$LOGS"
|
||||
tail -n +$((CFG_SIZE + 2)) "$LOGS" |
|
||||
grep -v "info:" |
|
||||
awk -F '[ ,]' '{print $5}' |
|
||||
feedgnuplot --lines --points --unset grid --title "adv = $OPPONENT ; density = $dense_size ; decay = $eps_decay ; max steps = $max_steps" --terminal $PLOT_EXT >"$LOGS_DIR/$OPPONENT-$dense_size-$eps_decay-$max_steps-$NAME.$PLOT_EXT"
|
||||
}
|
||||
|
||||
avg() {
|
||||
NAME=$(ls -rt "$LOGS_DIR" | grep -v "png" | tail -n 1)
|
||||
LOGS="$LOGS_DIR/$NAME"
|
||||
echo $LOGS
|
||||
tail -n +$((CFG_SIZE + 2)) "$LOGS" |
|
||||
grep -v "info:" |
|
||||
awk -F '[ ,]' '{print $5}' | awk '{ sum += $1; n++ } END { if (n > 0) print sum / n; }'
|
||||
}
|
||||
|
||||
if [ "$1" = "plot" ]; then
|
||||
plot
|
||||
elif [ "$1" = "avg" ]; then
|
||||
avg
|
||||
else
|
||||
train
|
||||
fi
|
||||
195
bot/src/burnrl/algos/dqn.rs
Normal file
195
bot/src/burnrl/algos/dqn.rs
Normal file
|
|
@ -0,0 +1,195 @@
|
|||
use crate::burnrl::environment::TrictracEnvironment;
|
||||
use crate::burnrl::utils::{soft_update_linear, Config};
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::relu;
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::DQN;
|
||||
use burn_rl::agent::{DQNModel, DQNTrainingConfig};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Net<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Net<B> {
|
||||
#[allow(unused)]
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
|
||||
(self.linear_0, self.linear_1, self.linear_2)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Net<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
relu(self.linear_2.forward(layer_1_output))
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> DQNModel<B> for Net<B> {
|
||||
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
|
||||
let (linear_0, linear_1, linear_2) = this.consume();
|
||||
|
||||
Self {
|
||||
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
|
||||
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
|
||||
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 8192;
|
||||
|
||||
type MyAgent<E, B> = DQN<E, B, Net<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
// pub fn run<E: Environment + AsMut<TrictracEnvironment>, B: AutodiffBackend>(
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
// ) -> DQN<E, B, Net<B>> {
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
// env.as_mut().min_steps = conf.min_steps;
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
|
||||
let model = Net::<B>::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
|
||||
let mut agent = MyAgent::new(model);
|
||||
|
||||
// let config = DQNTrainingConfig::default();
|
||||
let config = DQNTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
tau: conf.tau,
|
||||
learning_rate: conf.learning_rate,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
|
||||
let mut optimizer = AdamWConfig::new()
|
||||
.with_grad_clipping(config.clip_grad.clone())
|
||||
.init();
|
||||
|
||||
let mut policy_net = agent.model().as_ref().unwrap().clone();
|
||||
|
||||
let mut step = 0_usize;
|
||||
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward: ElemType = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut state = env.state();
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
while !episode_done {
|
||||
let eps_threshold = conf.eps_end
|
||||
+ (conf.eps_start - conf.eps_end) * f64::exp(-(step as f64) / conf.eps_decay);
|
||||
let action =
|
||||
DQN::<E, B, Net<B>>::react_with_exploration(&policy_net, state, eps_threshold);
|
||||
let snapshot = env.step(action);
|
||||
|
||||
episode_reward +=
|
||||
<<E as Environment>::RewardType as Into<ElemType>>::into(snapshot.reward().clone());
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
if config.batch_size < memory.len() {
|
||||
policy_net =
|
||||
agent.train::<MEMORY_SIZE>(policy_net, &memory, &mut optimizer, &config);
|
||||
}
|
||||
|
||||
step += 1;
|
||||
episode_duration += 1;
|
||||
|
||||
if snapshot.done() || episode_duration >= conf.max_steps {
|
||||
let envmut = env.as_mut();
|
||||
let goodmoves_ratio = ((envmut.goodmoves_count as f32 / episode_duration as f32)
|
||||
* 100.0)
|
||||
.round() as u32;
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"epsilon\": {eps_threshold:.3}, \"goodmoves\": {}, \"ratio\": {}%, \"rollpoints\":{}, \"duration\": {}}}",
|
||||
envmut.goodmoves_count,
|
||||
goodmoves_ratio,
|
||||
envmut.pointrolls_count,
|
||||
now.elapsed().unwrap().as_secs(),
|
||||
);
|
||||
env.reset();
|
||||
episode_done = true;
|
||||
now = SystemTime::now();
|
||||
} else {
|
||||
state = *snapshot.state();
|
||||
}
|
||||
}
|
||||
}
|
||||
let valid_agent = agent.valid();
|
||||
if let Some(path) = &conf.save_path {
|
||||
save_model(valid_agent.model().as_ref().unwrap(), path);
|
||||
}
|
||||
valid_agent
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Net::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
194
bot/src/burnrl/algos/dqn_big.rs
Normal file
194
bot/src/burnrl/algos/dqn_big.rs
Normal file
|
|
@ -0,0 +1,194 @@
|
|||
use crate::burnrl::environment_big::TrictracEnvironment;
|
||||
use crate::burnrl::utils::{soft_update_linear, Config};
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::relu;
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::DQN;
|
||||
use burn_rl::agent::{DQNModel, DQNTrainingConfig};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Net<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Net<B> {
|
||||
#[allow(unused)]
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
|
||||
(self.linear_0, self.linear_1, self.linear_2)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Net<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
relu(self.linear_2.forward(layer_1_output))
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> DQNModel<B> for Net<B> {
|
||||
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
|
||||
let (linear_0, linear_1, linear_2) = this.consume();
|
||||
|
||||
Self {
|
||||
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
|
||||
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
|
||||
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 8192;
|
||||
|
||||
type MyAgent<E, B> = DQN<E, B, Net<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
// pub fn run<E: Environment + AsMut<TrictracEnvironment>, B: AutodiffBackend>(
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
// ) -> DQN<E, B, Net<B>> {
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
|
||||
let model = Net::<B>::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
|
||||
let mut agent = MyAgent::new(model);
|
||||
|
||||
// let config = DQNTrainingConfig::default();
|
||||
let config = DQNTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
tau: conf.tau,
|
||||
learning_rate: conf.learning_rate,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
|
||||
let mut optimizer = AdamWConfig::new()
|
||||
.with_grad_clipping(config.clip_grad.clone())
|
||||
.init();
|
||||
|
||||
let mut policy_net = agent.model().as_ref().unwrap().clone();
|
||||
|
||||
let mut step = 0_usize;
|
||||
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward: ElemType = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut state = env.state();
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
while !episode_done {
|
||||
let eps_threshold = conf.eps_end
|
||||
+ (conf.eps_start - conf.eps_end) * f64::exp(-(step as f64) / conf.eps_decay);
|
||||
let action =
|
||||
DQN::<E, B, Net<B>>::react_with_exploration(&policy_net, state, eps_threshold);
|
||||
let snapshot = env.step(action);
|
||||
|
||||
episode_reward +=
|
||||
<<E as Environment>::RewardType as Into<ElemType>>::into(snapshot.reward().clone());
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
if config.batch_size < memory.len() {
|
||||
policy_net =
|
||||
agent.train::<MEMORY_SIZE>(policy_net, &memory, &mut optimizer, &config);
|
||||
}
|
||||
|
||||
step += 1;
|
||||
episode_duration += 1;
|
||||
|
||||
if snapshot.done() || episode_duration >= conf.max_steps {
|
||||
let envmut = env.as_mut();
|
||||
let goodmoves_ratio = ((envmut.goodmoves_count as f32 / episode_duration as f32)
|
||||
* 100.0)
|
||||
.round() as u32;
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"epsilon\": {eps_threshold:.3}, \"goodmoves\": {}, \"ratio\": {}%, \"rollpoints\":{}, \"duration\": {}}}",
|
||||
envmut.goodmoves_count,
|
||||
goodmoves_ratio,
|
||||
envmut.pointrolls_count,
|
||||
now.elapsed().unwrap().as_secs(),
|
||||
);
|
||||
env.reset();
|
||||
episode_done = true;
|
||||
now = SystemTime::now();
|
||||
} else {
|
||||
state = *snapshot.state();
|
||||
}
|
||||
}
|
||||
}
|
||||
let valid_agent = agent.valid();
|
||||
if let Some(path) = &conf.save_path {
|
||||
save_model(valid_agent.model().as_ref().unwrap(), path);
|
||||
}
|
||||
valid_agent
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Net::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
189
bot/src/burnrl/algos/dqn_valid.rs
Normal file
189
bot/src/burnrl/algos/dqn_valid.rs
Normal file
|
|
@ -0,0 +1,189 @@
|
|||
use crate::burnrl::environment_valid::TrictracEnvironment;
|
||||
use crate::burnrl::utils::{soft_update_linear, Config};
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::relu;
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::DQN;
|
||||
use burn_rl::agent::{DQNModel, DQNTrainingConfig};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Net<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Net<B> {
|
||||
#[allow(unused)]
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
|
||||
(self.linear_0, self.linear_1, self.linear_2)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Net<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
relu(self.linear_2.forward(layer_1_output))
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> DQNModel<B> for Net<B> {
|
||||
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
|
||||
let (linear_0, linear_1, linear_2) = this.consume();
|
||||
|
||||
Self {
|
||||
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
|
||||
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
|
||||
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 8192;
|
||||
|
||||
type MyAgent<E, B> = DQN<E, B, Net<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
// pub fn run<E: Environment + AsMut<TrictracEnvironment>, B: AutodiffBackend>(
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
// ) -> DQN<E, B, Net<B>> {
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
|
||||
let model = Net::<B>::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
|
||||
let mut agent = MyAgent::new(model);
|
||||
|
||||
// let config = DQNTrainingConfig::default();
|
||||
let config = DQNTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
tau: conf.tau,
|
||||
learning_rate: conf.learning_rate,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
|
||||
let mut optimizer = AdamWConfig::new()
|
||||
.with_grad_clipping(config.clip_grad.clone())
|
||||
.init();
|
||||
|
||||
let mut policy_net = agent.model().as_ref().unwrap().clone();
|
||||
|
||||
let mut step = 0_usize;
|
||||
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward: ElemType = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut state = env.state();
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
while !episode_done {
|
||||
let eps_threshold = conf.eps_end
|
||||
+ (conf.eps_start - conf.eps_end) * f64::exp(-(step as f64) / conf.eps_decay);
|
||||
let action =
|
||||
DQN::<E, B, Net<B>>::react_with_exploration(&policy_net, state, eps_threshold);
|
||||
let snapshot = env.step(action);
|
||||
|
||||
episode_reward +=
|
||||
<<E as Environment>::RewardType as Into<ElemType>>::into(snapshot.reward().clone());
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
if config.batch_size < memory.len() {
|
||||
policy_net =
|
||||
agent.train::<MEMORY_SIZE>(policy_net, &memory, &mut optimizer, &config);
|
||||
}
|
||||
|
||||
step += 1;
|
||||
episode_duration += 1;
|
||||
|
||||
if snapshot.done() || episode_duration >= conf.max_steps {
|
||||
let envmut = env.as_mut();
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"epsilon\": {eps_threshold:.3}, \"rollpoints\":{}, \"duration\": {}}}",
|
||||
envmut.pointrolls_count,
|
||||
now.elapsed().unwrap().as_secs(),
|
||||
);
|
||||
env.reset();
|
||||
episode_done = true;
|
||||
now = SystemTime::now();
|
||||
} else {
|
||||
state = *snapshot.state();
|
||||
}
|
||||
}
|
||||
}
|
||||
let valid_agent = agent.valid();
|
||||
if let Some(path) = &conf.save_path {
|
||||
save_model(valid_agent.model().as_ref().unwrap(), path);
|
||||
}
|
||||
valid_agent
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Net::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
9
bot/src/burnrl/algos/mod.rs
Normal file
9
bot/src/burnrl/algos/mod.rs
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
pub mod dqn;
|
||||
pub mod dqn_big;
|
||||
pub mod dqn_valid;
|
||||
pub mod ppo;
|
||||
pub mod ppo_big;
|
||||
pub mod ppo_valid;
|
||||
pub mod sac;
|
||||
pub mod sac_big;
|
||||
pub mod sac_valid;
|
||||
191
bot/src/burnrl/algos/ppo.rs
Normal file
191
bot/src/burnrl/algos/ppo.rs
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
use crate::burnrl::environment::TrictracEnvironment;
|
||||
use crate::burnrl::utils::Config;
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Initializer, Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::{relu, softmax};
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::{PPOModel, PPOOutput, PPOTrainingConfig, PPO};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Net<B: Backend> {
|
||||
linear: Linear<B>,
|
||||
linear_actor: Linear<B>,
|
||||
linear_critic: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Net<B> {
|
||||
#[allow(unused)]
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
let initializer = Initializer::XavierUniform { gain: 1.0 };
|
||||
Self {
|
||||
linear: LinearConfig::new(input_size, dense_size)
|
||||
.with_initializer(initializer.clone())
|
||||
.init(&Default::default()),
|
||||
linear_actor: LinearConfig::new(dense_size, output_size)
|
||||
.with_initializer(initializer.clone())
|
||||
.init(&Default::default()),
|
||||
linear_critic: LinearConfig::new(dense_size, 1)
|
||||
.with_initializer(initializer)
|
||||
.init(&Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, PPOOutput<B>, Tensor<B, 2>> for Net<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> PPOOutput<B> {
|
||||
let layer_0_output = relu(self.linear.forward(input));
|
||||
let policies = softmax(self.linear_actor.forward(layer_0_output.clone()), 1);
|
||||
let values = self.linear_critic.forward(layer_0_output);
|
||||
|
||||
PPOOutput::<B>::new(policies, values)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear.forward(input));
|
||||
softmax(self.linear_actor.forward(layer_0_output.clone()), 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> PPOModel<B> for Net<B> {}
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 512;
|
||||
|
||||
type MyAgent<E, B> = PPO<E, B, Net<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
// ) -> PPO<E, B, Net<B>> {
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
|
||||
let mut model = Net::<B>::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
let agent = MyAgent::default();
|
||||
let config = PPOTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
lambda: conf.lambda,
|
||||
epsilon_clip: conf.epsilon_clip,
|
||||
critic_weight: conf.critic_weight,
|
||||
entropy_weight: conf.entropy_weight,
|
||||
learning_rate: conf.learning_rate,
|
||||
epochs: conf.epochs,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut optimizer = AdamWConfig::new()
|
||||
.with_grad_clipping(config.clip_grad.clone())
|
||||
.init();
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
env.reset();
|
||||
while !episode_done {
|
||||
let state = env.state();
|
||||
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &model) {
|
||||
let snapshot = env.step(action);
|
||||
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
|
||||
snapshot.reward().clone(),
|
||||
);
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
episode_duration += 1;
|
||||
episode_done = snapshot.done() || episode_duration >= conf.max_steps;
|
||||
}
|
||||
}
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
|
||||
now.elapsed().unwrap().as_secs(),
|
||||
);
|
||||
|
||||
now = SystemTime::now();
|
||||
model = MyAgent::train::<MEMORY_SIZE>(model, &memory, &mut optimizer, &config);
|
||||
memory.clear();
|
||||
}
|
||||
|
||||
if let Some(path) = &conf.save_path {
|
||||
let device = NdArrayDevice::default();
|
||||
let recorder = CompactRecorder::new();
|
||||
let tmp_path = env::temp_dir().join("tmp_model.mpk");
|
||||
|
||||
// Save the trained model (backend B) to a temporary file
|
||||
recorder
|
||||
.record(model.clone().into_record(), tmp_path.clone())
|
||||
.expect("Failed to save temporary model");
|
||||
|
||||
// Create a new model instance with the target backend (NdArray)
|
||||
let model_to_save: Net<NdArray<ElemType>> = Net::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
|
||||
// Load the record from the temporary file into the new model
|
||||
let record = recorder
|
||||
.load(tmp_path.clone(), &device)
|
||||
.expect("Failed to load temporary model");
|
||||
let model_with_loaded_weights = model_to_save.load_record(record);
|
||||
|
||||
// Clean up the temporary file
|
||||
fs::remove_file(tmp_path).expect("Failed to remove temporary model file");
|
||||
|
||||
save_model(&model_with_loaded_weights, path);
|
||||
}
|
||||
agent.valid(model)
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Net::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
191
bot/src/burnrl/algos/ppo_big.rs
Normal file
191
bot/src/burnrl/algos/ppo_big.rs
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
use crate::burnrl::environment_big::TrictracEnvironment;
|
||||
use crate::burnrl::utils::Config;
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Initializer, Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::{relu, softmax};
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::{PPOModel, PPOOutput, PPOTrainingConfig, PPO};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Net<B: Backend> {
|
||||
linear: Linear<B>,
|
||||
linear_actor: Linear<B>,
|
||||
linear_critic: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Net<B> {
|
||||
#[allow(unused)]
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
let initializer = Initializer::XavierUniform { gain: 1.0 };
|
||||
Self {
|
||||
linear: LinearConfig::new(input_size, dense_size)
|
||||
.with_initializer(initializer.clone())
|
||||
.init(&Default::default()),
|
||||
linear_actor: LinearConfig::new(dense_size, output_size)
|
||||
.with_initializer(initializer.clone())
|
||||
.init(&Default::default()),
|
||||
linear_critic: LinearConfig::new(dense_size, 1)
|
||||
.with_initializer(initializer)
|
||||
.init(&Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, PPOOutput<B>, Tensor<B, 2>> for Net<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> PPOOutput<B> {
|
||||
let layer_0_output = relu(self.linear.forward(input));
|
||||
let policies = softmax(self.linear_actor.forward(layer_0_output.clone()), 1);
|
||||
let values = self.linear_critic.forward(layer_0_output);
|
||||
|
||||
PPOOutput::<B>::new(policies, values)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear.forward(input));
|
||||
softmax(self.linear_actor.forward(layer_0_output.clone()), 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> PPOModel<B> for Net<B> {}
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 512;
|
||||
|
||||
type MyAgent<E, B> = PPO<E, B, Net<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
// ) -> PPO<E, B, Net<B>> {
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
|
||||
let mut model = Net::<B>::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
let agent = MyAgent::default();
|
||||
let config = PPOTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
lambda: conf.lambda,
|
||||
epsilon_clip: conf.epsilon_clip,
|
||||
critic_weight: conf.critic_weight,
|
||||
entropy_weight: conf.entropy_weight,
|
||||
learning_rate: conf.learning_rate,
|
||||
epochs: conf.epochs,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut optimizer = AdamWConfig::new()
|
||||
.with_grad_clipping(config.clip_grad.clone())
|
||||
.init();
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
env.reset();
|
||||
while !episode_done {
|
||||
let state = env.state();
|
||||
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &model) {
|
||||
let snapshot = env.step(action);
|
||||
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
|
||||
snapshot.reward().clone(),
|
||||
);
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
episode_duration += 1;
|
||||
episode_done = snapshot.done() || episode_duration >= conf.max_steps;
|
||||
}
|
||||
}
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
|
||||
now.elapsed().unwrap().as_secs(),
|
||||
);
|
||||
|
||||
now = SystemTime::now();
|
||||
model = MyAgent::train::<MEMORY_SIZE>(model, &memory, &mut optimizer, &config);
|
||||
memory.clear();
|
||||
}
|
||||
|
||||
if let Some(path) = &conf.save_path {
|
||||
let device = NdArrayDevice::default();
|
||||
let recorder = CompactRecorder::new();
|
||||
let tmp_path = env::temp_dir().join("tmp_model.mpk");
|
||||
|
||||
// Save the trained model (backend B) to a temporary file
|
||||
recorder
|
||||
.record(model.clone().into_record(), tmp_path.clone())
|
||||
.expect("Failed to save temporary model");
|
||||
|
||||
// Create a new model instance with the target backend (NdArray)
|
||||
let model_to_save: Net<NdArray<ElemType>> = Net::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
|
||||
// Load the record from the temporary file into the new model
|
||||
let record = recorder
|
||||
.load(tmp_path.clone(), &device)
|
||||
.expect("Failed to load temporary model");
|
||||
let model_with_loaded_weights = model_to_save.load_record(record);
|
||||
|
||||
// Clean up the temporary file
|
||||
fs::remove_file(tmp_path).expect("Failed to remove temporary model file");
|
||||
|
||||
save_model(&model_with_loaded_weights, path);
|
||||
}
|
||||
agent.valid(model)
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Net::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
191
bot/src/burnrl/algos/ppo_valid.rs
Normal file
191
bot/src/burnrl/algos/ppo_valid.rs
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
use crate::burnrl::environment_valid::TrictracEnvironment;
|
||||
use crate::burnrl::utils::Config;
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Initializer, Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::{relu, softmax};
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::{PPOModel, PPOOutput, PPOTrainingConfig, PPO};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::env;
|
||||
use std::fs;
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Net<B: Backend> {
|
||||
linear: Linear<B>,
|
||||
linear_actor: Linear<B>,
|
||||
linear_critic: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Net<B> {
|
||||
#[allow(unused)]
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
let initializer = Initializer::XavierUniform { gain: 1.0 };
|
||||
Self {
|
||||
linear: LinearConfig::new(input_size, dense_size)
|
||||
.with_initializer(initializer.clone())
|
||||
.init(&Default::default()),
|
||||
linear_actor: LinearConfig::new(dense_size, output_size)
|
||||
.with_initializer(initializer.clone())
|
||||
.init(&Default::default()),
|
||||
linear_critic: LinearConfig::new(dense_size, 1)
|
||||
.with_initializer(initializer)
|
||||
.init(&Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, PPOOutput<B>, Tensor<B, 2>> for Net<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> PPOOutput<B> {
|
||||
let layer_0_output = relu(self.linear.forward(input));
|
||||
let policies = softmax(self.linear_actor.forward(layer_0_output.clone()), 1);
|
||||
let values = self.linear_critic.forward(layer_0_output);
|
||||
|
||||
PPOOutput::<B>::new(policies, values)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear.forward(input));
|
||||
softmax(self.linear_actor.forward(layer_0_output.clone()), 1)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> PPOModel<B> for Net<B> {}
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 512;
|
||||
|
||||
type MyAgent<E, B> = PPO<E, B, Net<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
// ) -> PPO<E, B, Net<B>> {
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
|
||||
let mut model = Net::<B>::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
let agent = MyAgent::default();
|
||||
let config = PPOTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
lambda: conf.lambda,
|
||||
epsilon_clip: conf.epsilon_clip,
|
||||
critic_weight: conf.critic_weight,
|
||||
entropy_weight: conf.entropy_weight,
|
||||
learning_rate: conf.learning_rate,
|
||||
epochs: conf.epochs,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut optimizer = AdamWConfig::new()
|
||||
.with_grad_clipping(config.clip_grad.clone())
|
||||
.init();
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
env.reset();
|
||||
while !episode_done {
|
||||
let state = env.state();
|
||||
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &model) {
|
||||
let snapshot = env.step(action);
|
||||
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
|
||||
snapshot.reward().clone(),
|
||||
);
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
episode_duration += 1;
|
||||
episode_done = snapshot.done() || episode_duration >= conf.max_steps;
|
||||
}
|
||||
}
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
|
||||
now.elapsed().unwrap().as_secs(),
|
||||
);
|
||||
|
||||
now = SystemTime::now();
|
||||
model = MyAgent::train::<MEMORY_SIZE>(model, &memory, &mut optimizer, &config);
|
||||
memory.clear();
|
||||
}
|
||||
|
||||
if let Some(path) = &conf.save_path {
|
||||
let device = NdArrayDevice::default();
|
||||
let recorder = CompactRecorder::new();
|
||||
let tmp_path = env::temp_dir().join("tmp_model.mpk");
|
||||
|
||||
// Save the trained model (backend B) to a temporary file
|
||||
recorder
|
||||
.record(model.clone().into_record(), tmp_path.clone())
|
||||
.expect("Failed to save temporary model");
|
||||
|
||||
// Create a new model instance with the target backend (NdArray)
|
||||
let model_to_save: Net<NdArray<ElemType>> = Net::new(
|
||||
<<E as Environment>::StateType as State>::size(),
|
||||
conf.dense_size,
|
||||
<<E as Environment>::ActionType as Action>::size(),
|
||||
);
|
||||
|
||||
// Load the record from the temporary file into the new model
|
||||
let record = recorder
|
||||
.load(tmp_path.clone(), &device)
|
||||
.expect("Failed to load temporary model");
|
||||
let model_with_loaded_weights = model_to_save.load_record(record);
|
||||
|
||||
// Clean up the temporary file
|
||||
fs::remove_file(tmp_path).expect("Failed to remove temporary model file");
|
||||
|
||||
save_model(&model_with_loaded_weights, path);
|
||||
}
|
||||
agent.valid(model)
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Net<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Net<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Net::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
221
bot/src/burnrl/algos/sac.rs
Normal file
221
bot/src/burnrl/algos/sac.rs
Normal file
|
|
@ -0,0 +1,221 @@
|
|||
use crate::burnrl::environment::TrictracEnvironment;
|
||||
use crate::burnrl::utils::{soft_update_linear, Config};
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::{relu, softmax};
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::{SACActor, SACCritic, SACNets, SACOptimizer, SACTrainingConfig, SAC};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Actor<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Actor<B> {
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Actor<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
softmax(self.linear_2.forward(layer_1_output), 1)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> SACActor<B> for Actor<B> {}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Critic<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Critic<B> {
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
|
||||
(self.linear_0, self.linear_1, self.linear_2)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Critic<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
self.linear_2.forward(layer_1_output)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> SACCritic<B> for Critic<B> {
|
||||
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
|
||||
let (linear_0, linear_1, linear_2) = this.consume();
|
||||
|
||||
Self {
|
||||
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
|
||||
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
|
||||
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 4096;
|
||||
|
||||
type MyAgent<E, B> = SAC<E, B, Actor<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
let state_dim = <<E as Environment>::StateType as State>::size();
|
||||
let action_dim = <<E as Environment>::ActionType as Action>::size();
|
||||
|
||||
let actor = Actor::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let critic_1 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let critic_2 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let mut nets = SACNets::<B, Actor<B>, Critic<B>>::new(actor, critic_1, critic_2);
|
||||
|
||||
let mut agent = MyAgent::default();
|
||||
|
||||
let config = SACTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
tau: conf.tau,
|
||||
learning_rate: conf.learning_rate,
|
||||
min_probability: conf.min_probability,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
|
||||
let optimizer_config = AdamWConfig::new().with_grad_clipping(config.clip_grad.clone());
|
||||
|
||||
let mut optimizer = SACOptimizer::new(
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.init(),
|
||||
);
|
||||
|
||||
let mut step = 0_usize;
|
||||
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut state = env.state();
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
while !episode_done {
|
||||
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &nets.actor) {
|
||||
let snapshot = env.step(action);
|
||||
|
||||
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
|
||||
snapshot.reward().clone(),
|
||||
);
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
if config.batch_size < memory.len() {
|
||||
nets = agent.train::<MEMORY_SIZE, _>(nets, &memory, &mut optimizer, &config);
|
||||
}
|
||||
|
||||
step += 1;
|
||||
episode_duration += 1;
|
||||
|
||||
if snapshot.done() || episode_duration >= conf.max_steps {
|
||||
env.reset();
|
||||
episode_done = true;
|
||||
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
|
||||
now.elapsed().unwrap().as_secs()
|
||||
);
|
||||
now = SystemTime::now();
|
||||
} else {
|
||||
state = *snapshot.state();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let valid_agent = agent.valid(nets.actor);
|
||||
if let Some(path) = &conf.save_path {
|
||||
if let Some(model) = valid_agent.model() {
|
||||
save_model(model, path);
|
||||
}
|
||||
}
|
||||
valid_agent
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Actor<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Actor<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Actor::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
222
bot/src/burnrl/algos/sac_big.rs
Normal file
222
bot/src/burnrl/algos/sac_big.rs
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
use crate::burnrl::environment_big::TrictracEnvironment;
|
||||
use crate::burnrl::utils::{soft_update_linear, Config};
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::{relu, softmax};
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::{SACActor, SACCritic, SACNets, SACOptimizer, SACTrainingConfig, SAC};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Actor<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Actor<B> {
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Actor<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
softmax(self.linear_2.forward(layer_1_output), 1)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> SACActor<B> for Actor<B> {}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Critic<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Critic<B> {
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
|
||||
(self.linear_0, self.linear_1, self.linear_2)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Critic<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
self.linear_2.forward(layer_1_output)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> SACCritic<B> for Critic<B> {
|
||||
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
|
||||
let (linear_0, linear_1, linear_2) = this.consume();
|
||||
|
||||
Self {
|
||||
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
|
||||
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
|
||||
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 4096;
|
||||
|
||||
type MyAgent<E, B> = SAC<E, B, Actor<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
let state_dim = <<E as Environment>::StateType as State>::size();
|
||||
let action_dim = <<E as Environment>::ActionType as Action>::size();
|
||||
|
||||
let actor = Actor::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let critic_1 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let critic_2 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let mut nets = SACNets::<B, Actor<B>, Critic<B>>::new(actor, critic_1, critic_2);
|
||||
|
||||
let mut agent = MyAgent::default();
|
||||
|
||||
let config = SACTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
tau: conf.tau,
|
||||
learning_rate: conf.learning_rate,
|
||||
min_probability: conf.min_probability,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
|
||||
let optimizer_config = AdamWConfig::new().with_grad_clipping(config.clip_grad.clone());
|
||||
|
||||
let mut optimizer = SACOptimizer::new(
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.init(),
|
||||
);
|
||||
|
||||
let mut step = 0_usize;
|
||||
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut state = env.state();
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
while !episode_done {
|
||||
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &nets.actor) {
|
||||
let snapshot = env.step(action);
|
||||
|
||||
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
|
||||
snapshot.reward().clone(),
|
||||
);
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
if config.batch_size < memory.len() {
|
||||
nets = agent.train::<MEMORY_SIZE, _>(nets, &memory, &mut optimizer, &config);
|
||||
}
|
||||
|
||||
step += 1;
|
||||
episode_duration += 1;
|
||||
|
||||
if snapshot.done() || episode_duration >= conf.max_steps {
|
||||
env.reset();
|
||||
episode_done = true;
|
||||
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
|
||||
now.elapsed().unwrap().as_secs()
|
||||
);
|
||||
now = SystemTime::now();
|
||||
} else {
|
||||
state = *snapshot.state();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let valid_agent = agent.valid(nets.actor);
|
||||
if let Some(path) = &conf.save_path {
|
||||
if let Some(model) = valid_agent.model() {
|
||||
save_model(model, path);
|
||||
}
|
||||
}
|
||||
valid_agent
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Actor<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Actor<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Actor::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
|
||||
222
bot/src/burnrl/algos/sac_valid.rs
Normal file
222
bot/src/burnrl/algos/sac_valid.rs
Normal file
|
|
@ -0,0 +1,222 @@
|
|||
use crate::burnrl::environment_valid::TrictracEnvironment;
|
||||
use crate::burnrl::utils::{soft_update_linear, Config};
|
||||
use burn::backend::{ndarray::NdArrayDevice, NdArray};
|
||||
use burn::module::Module;
|
||||
use burn::nn::{Linear, LinearConfig};
|
||||
use burn::optim::AdamWConfig;
|
||||
use burn::record::{CompactRecorder, Recorder};
|
||||
use burn::tensor::activation::{relu, softmax};
|
||||
use burn::tensor::backend::{AutodiffBackend, Backend};
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::agent::{SACActor, SACCritic, SACNets, SACOptimizer, SACTrainingConfig, SAC};
|
||||
use burn_rl::base::{Action, Agent, ElemType, Environment, Memory, Model, State};
|
||||
use std::time::SystemTime;
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Actor<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Actor<B> {
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Actor<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
softmax(self.linear_2.forward(layer_1_output), 1)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> SACActor<B> for Actor<B> {}
|
||||
|
||||
#[derive(Module, Debug)]
|
||||
pub struct Critic<B: Backend> {
|
||||
linear_0: Linear<B>,
|
||||
linear_1: Linear<B>,
|
||||
linear_2: Linear<B>,
|
||||
}
|
||||
|
||||
impl<B: Backend> Critic<B> {
|
||||
pub fn new(input_size: usize, dense_size: usize, output_size: usize) -> Self {
|
||||
Self {
|
||||
linear_0: LinearConfig::new(input_size, dense_size).init(&Default::default()),
|
||||
linear_1: LinearConfig::new(dense_size, dense_size).init(&Default::default()),
|
||||
linear_2: LinearConfig::new(dense_size, output_size).init(&Default::default()),
|
||||
}
|
||||
}
|
||||
|
||||
fn consume(self) -> (Linear<B>, Linear<B>, Linear<B>) {
|
||||
(self.linear_0, self.linear_1, self.linear_2)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> Model<B, Tensor<B, 2>, Tensor<B, 2>> for Critic<B> {
|
||||
fn forward(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
let layer_0_output = relu(self.linear_0.forward(input));
|
||||
let layer_1_output = relu(self.linear_1.forward(layer_0_output));
|
||||
|
||||
self.linear_2.forward(layer_1_output)
|
||||
}
|
||||
|
||||
fn infer(&self, input: Tensor<B, 2>) -> Tensor<B, 2> {
|
||||
self.forward(input)
|
||||
}
|
||||
}
|
||||
|
||||
impl<B: Backend> SACCritic<B> for Critic<B> {
|
||||
fn soft_update(this: Self, that: &Self, tau: ElemType) -> Self {
|
||||
let (linear_0, linear_1, linear_2) = this.consume();
|
||||
|
||||
Self {
|
||||
linear_0: soft_update_linear(linear_0, &that.linear_0, tau),
|
||||
linear_1: soft_update_linear(linear_1, &that.linear_1, tau),
|
||||
linear_2: soft_update_linear(linear_2, &that.linear_2, tau),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(unused)]
|
||||
const MEMORY_SIZE: usize = 4096;
|
||||
|
||||
type MyAgent<E, B> = SAC<E, B, Actor<B>>;
|
||||
|
||||
#[allow(unused)]
|
||||
pub fn run<
|
||||
E: Environment + AsMut<TrictracEnvironment>,
|
||||
B: AutodiffBackend<InnerBackend = NdArray>,
|
||||
>(
|
||||
conf: &Config,
|
||||
visualized: bool,
|
||||
) -> impl Agent<E> {
|
||||
let mut env = E::new(visualized);
|
||||
env.as_mut().max_steps = conf.max_steps;
|
||||
let state_dim = <<E as Environment>::StateType as State>::size();
|
||||
let action_dim = <<E as Environment>::ActionType as Action>::size();
|
||||
|
||||
let actor = Actor::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let critic_1 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let critic_2 = Critic::<B>::new(state_dim, conf.dense_size, action_dim);
|
||||
let mut nets = SACNets::<B, Actor<B>, Critic<B>>::new(actor, critic_1, critic_2);
|
||||
|
||||
let mut agent = MyAgent::default();
|
||||
|
||||
let config = SACTrainingConfig {
|
||||
gamma: conf.gamma,
|
||||
tau: conf.tau,
|
||||
learning_rate: conf.learning_rate,
|
||||
min_probability: conf.min_probability,
|
||||
batch_size: conf.batch_size,
|
||||
clip_grad: Some(burn::grad_clipping::GradientClippingConfig::Value(
|
||||
conf.clip_grad,
|
||||
)),
|
||||
};
|
||||
|
||||
let mut memory = Memory::<E, B, MEMORY_SIZE>::default();
|
||||
|
||||
let optimizer_config = AdamWConfig::new().with_grad_clipping(config.clip_grad.clone());
|
||||
|
||||
let mut optimizer = SACOptimizer::new(
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.clone().init(),
|
||||
optimizer_config.init(),
|
||||
);
|
||||
|
||||
let mut step = 0_usize;
|
||||
|
||||
for episode in 0..conf.num_episodes {
|
||||
let mut episode_done = false;
|
||||
let mut episode_reward = 0.0;
|
||||
let mut episode_duration = 0_usize;
|
||||
let mut state = env.state();
|
||||
let mut now = SystemTime::now();
|
||||
|
||||
while !episode_done {
|
||||
if let Some(action) = MyAgent::<E, _>::react_with_model(&state, &nets.actor) {
|
||||
let snapshot = env.step(action);
|
||||
|
||||
episode_reward += <<E as Environment>::RewardType as Into<ElemType>>::into(
|
||||
snapshot.reward().clone(),
|
||||
);
|
||||
|
||||
memory.push(
|
||||
state,
|
||||
*snapshot.state(),
|
||||
action,
|
||||
snapshot.reward().clone(),
|
||||
snapshot.done(),
|
||||
);
|
||||
|
||||
if config.batch_size < memory.len() {
|
||||
nets = agent.train::<MEMORY_SIZE, _>(nets, &memory, &mut optimizer, &config);
|
||||
}
|
||||
|
||||
step += 1;
|
||||
episode_duration += 1;
|
||||
|
||||
if snapshot.done() || episode_duration >= conf.max_steps {
|
||||
env.reset();
|
||||
episode_done = true;
|
||||
|
||||
println!(
|
||||
"{{\"episode\": {episode}, \"reward\": {episode_reward:.4}, \"steps count\": {episode_duration}, \"duration\": {}}}",
|
||||
now.elapsed().unwrap().as_secs()
|
||||
);
|
||||
now = SystemTime::now();
|
||||
} else {
|
||||
state = *snapshot.state();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let valid_agent = agent.valid(nets.actor);
|
||||
if let Some(path) = &conf.save_path {
|
||||
if let Some(model) = valid_agent.model() {
|
||||
save_model(model, path);
|
||||
}
|
||||
}
|
||||
valid_agent
|
||||
}
|
||||
|
||||
pub fn save_model(model: &Actor<NdArray<ElemType>>, path: &String) {
|
||||
let recorder = CompactRecorder::new();
|
||||
let model_path = format!("{path}.mpk");
|
||||
println!("info: Modèle de validation sauvegardé : {model_path}");
|
||||
recorder
|
||||
.record(model.clone().into_record(), model_path.into())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
pub fn load_model(dense_size: usize, path: &String) -> Option<Actor<NdArray<ElemType>>> {
|
||||
let model_path = format!("{path}.mpk");
|
||||
// println!("Chargement du modèle depuis : {model_path}");
|
||||
|
||||
CompactRecorder::new()
|
||||
.load(model_path.into(), &NdArrayDevice::default())
|
||||
.map(|record| {
|
||||
Actor::new(
|
||||
<TrictracEnvironment as Environment>::StateType::size(),
|
||||
dense_size,
|
||||
<TrictracEnvironment as Environment>::ActionType::size(),
|
||||
)
|
||||
.load_record(record)
|
||||
})
|
||||
.ok()
|
||||
}
|
||||
|
||||
424
bot/src/burnrl/environment.rs
Normal file
424
bot/src/burnrl/environment.rs
Normal file
|
|
@ -0,0 +1,424 @@
|
|||
use std::io::Write;
|
||||
|
||||
use crate::training_common;
|
||||
use burn::{prelude::Backend, tensor::Tensor};
|
||||
use burn_rl::base::{Action, Environment, Snapshot, State};
|
||||
use rand::{thread_rng, Rng};
|
||||
use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
||||
|
||||
const ERROR_REWARD: f32 = -1.12121;
|
||||
const REWARD_VALID_MOVE: f32 = 1.12121;
|
||||
const REWARD_RATIO: f32 = 0.01;
|
||||
const WIN_POINTS: f32 = 1.0;
|
||||
|
||||
/// État du jeu Trictrac pour burn-rl
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct TrictracState {
|
||||
pub data: [i8; 36], // Représentation vectorielle de l'état du jeu
|
||||
}
|
||||
|
||||
impl State for TrictracState {
|
||||
type Data = [i8; 36];
|
||||
|
||||
fn to_tensor<B: Backend>(&self) -> Tensor<B, 1> {
|
||||
Tensor::from_floats(self.data, &B::Device::default())
|
||||
}
|
||||
|
||||
fn size() -> usize {
|
||||
36
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracState {
|
||||
/// Convertit un GameState en TrictracState
|
||||
pub fn from_game_state(game_state: &GameState) -> Self {
|
||||
let state_vec = game_state.to_vec();
|
||||
let mut data = [0; 36];
|
||||
|
||||
// Copier les données en s'assurant qu'on ne dépasse pas la taille
|
||||
let copy_len = state_vec.len().min(36);
|
||||
data[..copy_len].copy_from_slice(&state_vec[..copy_len]);
|
||||
|
||||
TrictracState { data }
|
||||
}
|
||||
}
|
||||
|
||||
/// Actions possibles dans Trictrac pour burn-rl
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct TrictracAction {
|
||||
// u32 as required by burn_rl::base::Action type
|
||||
pub index: u32,
|
||||
}
|
||||
|
||||
impl Action for TrictracAction {
|
||||
fn random() -> Self {
|
||||
use rand::{thread_rng, Rng};
|
||||
let mut rng = thread_rng();
|
||||
TrictracAction {
|
||||
index: rng.gen_range(0..Self::size() as u32),
|
||||
}
|
||||
}
|
||||
|
||||
fn enumerate() -> Vec<Self> {
|
||||
(0..Self::size() as u32)
|
||||
.map(|index| TrictracAction { index })
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn size() -> usize {
|
||||
514
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for TrictracAction {
|
||||
fn from(index: u32) -> Self {
|
||||
TrictracAction { index }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TrictracAction> for u32 {
|
||||
fn from(action: TrictracAction) -> u32 {
|
||||
action.index
|
||||
}
|
||||
}
|
||||
|
||||
/// Environnement Trictrac pour burn-rl
|
||||
#[derive(Debug)]
|
||||
pub struct TrictracEnvironment {
|
||||
pub game: GameState,
|
||||
active_player_id: PlayerId,
|
||||
opponent_id: PlayerId,
|
||||
current_state: TrictracState,
|
||||
episode_reward: f32,
|
||||
pub step_count: usize,
|
||||
pub best_ratio: f32,
|
||||
pub max_steps: usize,
|
||||
pub pointrolls_count: usize,
|
||||
pub goodmoves_count: usize,
|
||||
pub goodmoves_ratio: f32,
|
||||
pub visualized: bool,
|
||||
}
|
||||
|
||||
impl Environment for TrictracEnvironment {
|
||||
type StateType = TrictracState;
|
||||
type ActionType = TrictracAction;
|
||||
type RewardType = f32;
|
||||
|
||||
fn new(visualized: bool) -> Self {
|
||||
let mut game = GameState::new(false);
|
||||
|
||||
// Ajouter deux joueurs
|
||||
game.init_player("DQN Agent");
|
||||
game.init_player("Opponent");
|
||||
let player1_id = 1;
|
||||
let player2_id = 2;
|
||||
|
||||
// Commencer la partie
|
||||
game.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
let current_state = TrictracState::from_game_state(&game);
|
||||
TrictracEnvironment {
|
||||
game,
|
||||
active_player_id: player1_id,
|
||||
opponent_id: player2_id,
|
||||
current_state,
|
||||
episode_reward: 0.0,
|
||||
step_count: 0,
|
||||
best_ratio: 0.0,
|
||||
max_steps: 2000,
|
||||
pointrolls_count: 0,
|
||||
goodmoves_count: 0,
|
||||
goodmoves_ratio: 0.0,
|
||||
visualized,
|
||||
}
|
||||
}
|
||||
|
||||
fn state(&self) -> Self::StateType {
|
||||
self.current_state
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Snapshot<Self> {
|
||||
// Réinitialiser le jeu
|
||||
let history = self.game.history.clone();
|
||||
self.game = GameState::new(false);
|
||||
self.game.init_player("DQN Agent");
|
||||
self.game.init_player("Opponent");
|
||||
|
||||
// Commencer la partie
|
||||
self.game.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
self.current_state = TrictracState::from_game_state(&self.game);
|
||||
self.episode_reward = 0.0;
|
||||
self.goodmoves_ratio = if self.step_count == 0 {
|
||||
0.0
|
||||
} else {
|
||||
self.goodmoves_count as f32 / self.step_count as f32
|
||||
};
|
||||
self.best_ratio = self.best_ratio.max(self.goodmoves_ratio);
|
||||
let _warning = if self.best_ratio > 0.7 && self.goodmoves_ratio < 0.1 {
|
||||
let path = "bot/models/logs/debug.log";
|
||||
if let Ok(mut out) = std::fs::File::create(path) {
|
||||
write!(out, "{history:?}").expect("could not write history log");
|
||||
}
|
||||
"!!!!"
|
||||
} else {
|
||||
""
|
||||
};
|
||||
// println!(
|
||||
// "info: correct moves: {} ({}%) {}",
|
||||
// self.goodmoves_count,
|
||||
// (100.0 * self.goodmoves_ratio).round() as u32,
|
||||
// warning
|
||||
// );
|
||||
self.step_count = 0;
|
||||
self.pointrolls_count = 0;
|
||||
self.goodmoves_count = 0;
|
||||
|
||||
Snapshot::new(self.current_state, 0.0, false)
|
||||
}
|
||||
|
||||
fn step(&mut self, action: Self::ActionType) -> Snapshot<Self> {
|
||||
self.step_count += 1;
|
||||
|
||||
// Convertir l'action burn-rl vers une action Trictrac
|
||||
let trictrac_action = Self::convert_action(action);
|
||||
|
||||
let mut reward = 0.0;
|
||||
let is_rollpoint;
|
||||
|
||||
// Exécuter l'action si c'est le tour de l'agent DQN
|
||||
if self.game.active_player_id == self.active_player_id {
|
||||
if let Some(action) = trictrac_action {
|
||||
(reward, is_rollpoint) = self.execute_action(action);
|
||||
if is_rollpoint {
|
||||
self.pointrolls_count += 1;
|
||||
}
|
||||
if reward != ERROR_REWARD {
|
||||
self.goodmoves_count += 1;
|
||||
}
|
||||
} else {
|
||||
// Action non convertible, pénalité
|
||||
panic!("action non convertible");
|
||||
//reward = -0.5;
|
||||
}
|
||||
}
|
||||
|
||||
// Faire jouer l'adversaire (stratégie simple)
|
||||
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||
reward += self.play_opponent_if_needed();
|
||||
}
|
||||
|
||||
// Vérifier si la partie est terminée
|
||||
// let max_steps = self.max_steps;
|
||||
// let max_steps = self.min_steps
|
||||
// + (self.max_steps as f32 - self.min_steps)
|
||||
// * f32::exp((self.goodmoves_ratio - 1.0) / 0.25);
|
||||
let done = self.game.stage == Stage::Ended || self.game.determine_winner().is_some();
|
||||
|
||||
if done {
|
||||
// Récompense finale basée sur le résultat
|
||||
if let Some(winner_id) = self.game.determine_winner() {
|
||||
if winner_id == self.active_player_id {
|
||||
reward += WIN_POINTS; // Victoire
|
||||
} else {
|
||||
reward -= WIN_POINTS; // Défaite
|
||||
}
|
||||
}
|
||||
}
|
||||
let terminated = done || self.step_count >= self.max_steps;
|
||||
// let terminated = done || self.step_count >= max_steps.round() as usize;
|
||||
|
||||
// Mettre à jour l'état
|
||||
self.current_state = TrictracState::from_game_state(&self.game);
|
||||
self.episode_reward += reward;
|
||||
|
||||
if self.visualized && terminated {
|
||||
println!(
|
||||
"Episode terminé. Récompense totale: {:.2}, Étapes: {}",
|
||||
self.episode_reward, self.step_count
|
||||
);
|
||||
}
|
||||
|
||||
Snapshot::new(self.current_state, reward, terminated)
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracEnvironment {
|
||||
/// Convertit une action burn-rl vers une action Trictrac
|
||||
pub fn convert_action(action: TrictracAction) -> Option<training_common::TrictracAction> {
|
||||
training_common::TrictracAction::from_action_index(action.index.try_into().unwrap())
|
||||
}
|
||||
|
||||
/// Convertit l'index d'une action au sein des actions valides vers une action Trictrac
|
||||
#[allow(dead_code)]
|
||||
fn convert_valid_action_index(
|
||||
&self,
|
||||
action: TrictracAction,
|
||||
game_state: &GameState,
|
||||
) -> Option<training_common::TrictracAction> {
|
||||
use training_common::get_valid_actions;
|
||||
|
||||
// Obtenir les actions valides dans le contexte actuel
|
||||
let valid_actions = get_valid_actions(game_state);
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Mapper l'index d'action sur une action valide
|
||||
let action_index = (action.index as usize) % valid_actions.len();
|
||||
Some(valid_actions[action_index].clone())
|
||||
}
|
||||
|
||||
/// Exécute une action Trictrac dans le jeu
|
||||
// fn execute_action(
|
||||
// &mut self,
|
||||
// action: training_common::TrictracAction,
|
||||
// ) -> Result<f32, Box<dyn std::error::Error>> {
|
||||
fn execute_action(&mut self, action: training_common::TrictracAction) -> (f32, bool) {
|
||||
use training_common::TrictracAction;
|
||||
|
||||
let mut reward = 0.0;
|
||||
let mut is_rollpoint = false;
|
||||
|
||||
// Appliquer l'événement si valide
|
||||
if let Some(event) = action.to_event(&self.game) {
|
||||
if self.game.validate(&event) {
|
||||
self.game.consume(&event);
|
||||
reward += REWARD_VALID_MOVE;
|
||||
// Simuler le résultat des dés après un Roll
|
||||
if matches!(action, TrictracAction::Roll) {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
let dice_event = GameEvent::RollResult {
|
||||
player_id: self.active_player_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
};
|
||||
if self.game.validate(&dice_event) {
|
||||
self.game.consume(&dice_event);
|
||||
let (points, adv_points) = self.game.dice_points;
|
||||
reward += REWARD_RATIO * (points as f32 - adv_points as f32);
|
||||
if points > 0 {
|
||||
is_rollpoint = true;
|
||||
// println!("info: rolled for {reward}");
|
||||
}
|
||||
// Récompense proportionnelle aux points
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Pénalité pour action invalide
|
||||
// on annule les précédents reward
|
||||
// et on indique une valeur reconnaissable pour statistiques
|
||||
reward = ERROR_REWARD;
|
||||
}
|
||||
} else {
|
||||
reward = ERROR_REWARD;
|
||||
}
|
||||
|
||||
(reward, is_rollpoint)
|
||||
}
|
||||
|
||||
/// Fait jouer l'adversaire avec une stratégie simple
|
||||
fn play_opponent_if_needed(&mut self) -> f32 {
|
||||
let mut reward = 0.0;
|
||||
|
||||
// Si c'est le tour de l'adversaire, jouer automatiquement
|
||||
if self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||
// Utiliser la stratégie default pour l'adversaire
|
||||
use crate::BotStrategy;
|
||||
|
||||
let mut strategy = crate::strategy::random::RandomStrategy::default();
|
||||
strategy.set_player_id(self.opponent_id);
|
||||
if let Some(color) = self.game.player_color_by_id(&self.opponent_id) {
|
||||
strategy.set_color(color);
|
||||
}
|
||||
*strategy.get_mut_game() = self.game.clone();
|
||||
|
||||
// Exécuter l'action selon le turn_stage
|
||||
let mut calculate_points = false;
|
||||
let opponent_color = store::Color::Black;
|
||||
let event = match self.game.turn_stage {
|
||||
TurnStage::RollDice => GameEvent::Roll {
|
||||
player_id: self.opponent_id,
|
||||
},
|
||||
TurnStage::RollWaiting => {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
calculate_points = true;
|
||||
GameEvent::RollResult {
|
||||
player_id: self.opponent_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
}
|
||||
}
|
||||
TurnStage::MarkPoints => {
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
GameEvent::Mark {
|
||||
player_id: self.opponent_id,
|
||||
points: points_rules.get_points(dice_roll_count).0,
|
||||
}
|
||||
}
|
||||
TurnStage::MarkAdvPoints => {
|
||||
let opponent_color = store::Color::Black;
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
// pas de reward : déjà comptabilisé lors du tour de blanc
|
||||
GameEvent::Mark {
|
||||
player_id: self.opponent_id,
|
||||
points: points_rules.get_points(dice_roll_count).1,
|
||||
}
|
||||
}
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
// Stratégie simple : toujours continuer
|
||||
GameEvent::Go {
|
||||
player_id: self.opponent_id,
|
||||
}
|
||||
}
|
||||
TurnStage::Move => GameEvent::Move {
|
||||
player_id: self.opponent_id,
|
||||
moves: strategy.choose_move(),
|
||||
},
|
||||
};
|
||||
|
||||
if self.game.validate(&event) {
|
||||
self.game.consume(&event);
|
||||
if calculate_points {
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||
// Récompense proportionnelle aux points
|
||||
reward -= REWARD_RATIO * (points as f32 - adv_points as f32);
|
||||
}
|
||||
}
|
||||
}
|
||||
reward
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<TrictracEnvironment> for TrictracEnvironment {
|
||||
fn as_mut(&mut self) -> &mut Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
469
bot/src/burnrl/environment_big.rs
Normal file
469
bot/src/burnrl/environment_big.rs
Normal file
|
|
@ -0,0 +1,469 @@
|
|||
use crate::training_common_big;
|
||||
use burn::{prelude::Backend, tensor::Tensor};
|
||||
use burn_rl::base::{Action, Environment, Snapshot, State};
|
||||
use rand::{thread_rng, Rng};
|
||||
use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
||||
|
||||
const ERROR_REWARD: f32 = -2.12121;
|
||||
const REWARD_VALID_MOVE: f32 = 2.12121;
|
||||
const REWARD_RATIO: f32 = 0.01;
|
||||
const WIN_POINTS: f32 = 0.1;
|
||||
|
||||
/// État du jeu Trictrac pour burn-rl
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct TrictracState {
|
||||
pub data: [i8; 36], // Représentation vectorielle de l'état du jeu
|
||||
}
|
||||
|
||||
impl State for TrictracState {
|
||||
type Data = [i8; 36];
|
||||
|
||||
fn to_tensor<B: Backend>(&self) -> Tensor<B, 1> {
|
||||
Tensor::from_floats(self.data, &B::Device::default())
|
||||
}
|
||||
|
||||
fn size() -> usize {
|
||||
36
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracState {
|
||||
/// Convertit un GameState en TrictracState
|
||||
pub fn from_game_state(game_state: &GameState) -> Self {
|
||||
let state_vec = game_state.to_vec();
|
||||
let mut data = [0; 36];
|
||||
|
||||
// Copier les données en s'assurant qu'on ne dépasse pas la taille
|
||||
let copy_len = state_vec.len().min(36);
|
||||
data[..copy_len].copy_from_slice(&state_vec[..copy_len]);
|
||||
|
||||
TrictracState { data }
|
||||
}
|
||||
}
|
||||
|
||||
/// Actions possibles dans Trictrac pour burn-rl
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct TrictracAction {
|
||||
// u32 as required by burn_rl::base::Action type
|
||||
pub index: u32,
|
||||
}
|
||||
|
||||
impl Action for TrictracAction {
|
||||
fn random() -> Self {
|
||||
use rand::{thread_rng, Rng};
|
||||
let mut rng = thread_rng();
|
||||
TrictracAction {
|
||||
index: rng.gen_range(0..Self::size() as u32),
|
||||
}
|
||||
}
|
||||
|
||||
fn enumerate() -> Vec<Self> {
|
||||
(0..Self::size() as u32)
|
||||
.map(|index| TrictracAction { index })
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn size() -> usize {
|
||||
1252
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for TrictracAction {
|
||||
fn from(index: u32) -> Self {
|
||||
TrictracAction { index }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TrictracAction> for u32 {
|
||||
fn from(action: TrictracAction) -> u32 {
|
||||
action.index
|
||||
}
|
||||
}
|
||||
|
||||
/// Environnement Trictrac pour burn-rl
|
||||
#[derive(Debug)]
|
||||
pub struct TrictracEnvironment {
|
||||
pub game: GameState,
|
||||
active_player_id: PlayerId,
|
||||
opponent_id: PlayerId,
|
||||
current_state: TrictracState,
|
||||
episode_reward: f32,
|
||||
pub step_count: usize,
|
||||
pub max_steps: usize,
|
||||
pub pointrolls_count: usize,
|
||||
pub goodmoves_count: usize,
|
||||
pub goodmoves_ratio: f32,
|
||||
pub visualized: bool,
|
||||
}
|
||||
|
||||
impl Environment for TrictracEnvironment {
|
||||
type StateType = TrictracState;
|
||||
type ActionType = TrictracAction;
|
||||
type RewardType = f32;
|
||||
|
||||
fn new(visualized: bool) -> Self {
|
||||
let mut game = GameState::new(false);
|
||||
|
||||
// Ajouter deux joueurs
|
||||
game.init_player("DQN Agent");
|
||||
game.init_player("Opponent");
|
||||
let player1_id = 1;
|
||||
let player2_id = 2;
|
||||
|
||||
// Commencer la partie
|
||||
game.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
let current_state = TrictracState::from_game_state(&game);
|
||||
TrictracEnvironment {
|
||||
game,
|
||||
active_player_id: player1_id,
|
||||
opponent_id: player2_id,
|
||||
current_state,
|
||||
episode_reward: 0.0,
|
||||
step_count: 0,
|
||||
max_steps: 2000,
|
||||
pointrolls_count: 0,
|
||||
goodmoves_count: 0,
|
||||
goodmoves_ratio: 0.0,
|
||||
visualized,
|
||||
}
|
||||
}
|
||||
|
||||
fn state(&self) -> Self::StateType {
|
||||
self.current_state
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Snapshot<Self> {
|
||||
// Réinitialiser le jeu
|
||||
self.game = GameState::new(false);
|
||||
self.game.init_player("DQN Agent");
|
||||
self.game.init_player("Opponent");
|
||||
|
||||
// Commencer la partie
|
||||
self.game.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
self.current_state = TrictracState::from_game_state(&self.game);
|
||||
self.episode_reward = 0.0;
|
||||
self.goodmoves_ratio = if self.step_count == 0 {
|
||||
0.0
|
||||
} else {
|
||||
self.goodmoves_count as f32 / self.step_count as f32
|
||||
};
|
||||
println!(
|
||||
"info: correct moves: {} ({}%)",
|
||||
self.goodmoves_count,
|
||||
(100.0 * self.goodmoves_ratio).round() as u32
|
||||
);
|
||||
self.step_count = 0;
|
||||
self.pointrolls_count = 0;
|
||||
self.goodmoves_count = 0;
|
||||
|
||||
Snapshot::new(self.current_state, 0.0, false)
|
||||
}
|
||||
|
||||
fn step(&mut self, action: Self::ActionType) -> Snapshot<Self> {
|
||||
self.step_count += 1;
|
||||
|
||||
// Convertir l'action burn-rl vers une action Trictrac
|
||||
let trictrac_action = Self::convert_action(action);
|
||||
|
||||
let mut reward = 0.0;
|
||||
let is_rollpoint;
|
||||
|
||||
// Exécuter l'action si c'est le tour de l'agent DQN
|
||||
if self.game.active_player_id == self.active_player_id {
|
||||
if let Some(action) = trictrac_action {
|
||||
(reward, is_rollpoint) = self.execute_action(action);
|
||||
if is_rollpoint {
|
||||
self.pointrolls_count += 1;
|
||||
}
|
||||
if reward != ERROR_REWARD {
|
||||
self.goodmoves_count += 1;
|
||||
// println!("{str_action}");
|
||||
}
|
||||
} else {
|
||||
// Action non convertible, pénalité
|
||||
reward = -0.5;
|
||||
}
|
||||
}
|
||||
|
||||
// Faire jouer l'adversaire (stratégie simple)
|
||||
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||
// print!(":");
|
||||
reward += self.play_opponent_if_needed();
|
||||
}
|
||||
|
||||
// Vérifier si la partie est terminée
|
||||
// let max_steps = self.max_steps
|
||||
// let max_steps = self.min_steps
|
||||
// + (self.max_steps as f32 - self.min_steps)
|
||||
// * f32::exp((self.goodmoves_ratio - 1.0) / 0.25);
|
||||
let done = self.game.stage == Stage::Ended || self.game.determine_winner().is_some();
|
||||
|
||||
if done {
|
||||
// Récompense finale basée sur le résultat
|
||||
if let Some(winner_id) = self.game.determine_winner() {
|
||||
if winner_id == self.active_player_id {
|
||||
reward += WIN_POINTS; // Victoire
|
||||
} else {
|
||||
reward -= WIN_POINTS; // Défaite
|
||||
}
|
||||
}
|
||||
}
|
||||
let terminated = done || self.step_count >= self.max_steps;
|
||||
|
||||
// Mettre à jour l'état
|
||||
self.current_state = TrictracState::from_game_state(&self.game);
|
||||
self.episode_reward += reward;
|
||||
if self.visualized && terminated {
|
||||
println!(
|
||||
"Episode terminé. Récompense totale: {:.2}, Étapes: {}",
|
||||
self.episode_reward, self.step_count
|
||||
);
|
||||
}
|
||||
|
||||
Snapshot::new(self.current_state, reward, terminated)
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracEnvironment {
|
||||
/// Convertit une action burn-rl vers une action Trictrac
|
||||
pub fn convert_action(action: TrictracAction) -> Option<training_common_big::TrictracAction> {
|
||||
training_common_big::TrictracAction::from_action_index(action.index.try_into().unwrap())
|
||||
}
|
||||
|
||||
/// Convertit l'index d'une action au sein des actions valides vers une action Trictrac
|
||||
#[allow(dead_code)]
|
||||
fn convert_valid_action_index(
|
||||
&self,
|
||||
action: TrictracAction,
|
||||
game_state: &GameState,
|
||||
) -> Option<training_common_big::TrictracAction> {
|
||||
use training_common_big::get_valid_actions;
|
||||
|
||||
// Obtenir les actions valides dans le contexte actuel
|
||||
let valid_actions = get_valid_actions(game_state);
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Mapper l'index d'action sur une action valide
|
||||
let action_index = (action.index as usize) % valid_actions.len();
|
||||
Some(valid_actions[action_index].clone())
|
||||
}
|
||||
|
||||
/// Exécute une action Trictrac dans le jeu
|
||||
// fn execute_action(
|
||||
// &mut self,
|
||||
// action:training_common_big::TrictracAction,
|
||||
// ) -> Result<f32, Box<dyn std::error::Error>> {
|
||||
fn execute_action(&mut self, action: training_common_big::TrictracAction) -> (f32, bool) {
|
||||
use training_common_big::TrictracAction;
|
||||
|
||||
let mut reward = 0.0;
|
||||
let mut is_rollpoint = false;
|
||||
let mut need_roll = false;
|
||||
|
||||
let event = match action {
|
||||
TrictracAction::Roll => {
|
||||
// Lancer les dés
|
||||
need_roll = true;
|
||||
Some(GameEvent::Roll {
|
||||
player_id: self.active_player_id,
|
||||
})
|
||||
}
|
||||
// TrictracAction::Mark => {
|
||||
// // Marquer des points
|
||||
// let points = self.game.
|
||||
// reward += 0.1 * points as f32;
|
||||
// Some(GameEvent::Mark {
|
||||
// player_id: self.active_player_id,
|
||||
// points,
|
||||
// })
|
||||
// }
|
||||
TrictracAction::Go => {
|
||||
// Continuer après avoir gagné un trou
|
||||
Some(GameEvent::Go {
|
||||
player_id: self.active_player_id,
|
||||
})
|
||||
}
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
from1,
|
||||
from2,
|
||||
} => {
|
||||
// Effectuer un mouvement
|
||||
let (dice1, dice2) = if dice_order {
|
||||
(self.game.dice.values.0, self.game.dice.values.1)
|
||||
} else {
|
||||
(self.game.dice.values.1, self.game.dice.values.0)
|
||||
};
|
||||
let mut to1 = from1 + dice1 as usize;
|
||||
let mut to2 = from2 + dice2 as usize;
|
||||
|
||||
// Gestion prise de coin par puissance
|
||||
let opp_rest_field = 13;
|
||||
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||
to1 -= 1;
|
||||
to2 -= 1;
|
||||
}
|
||||
|
||||
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let checker_move2 = store::CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
Some(GameEvent::Move {
|
||||
player_id: self.active_player_id,
|
||||
moves: (checker_move1, checker_move2),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Appliquer l'événement si valide
|
||||
if let Some(event) = event {
|
||||
if self.game.validate(&event) {
|
||||
self.game.consume(&event);
|
||||
reward += REWARD_VALID_MOVE;
|
||||
// Simuler le résultat des dés après un Roll
|
||||
// if matches!(action, TrictracAction::Roll) {
|
||||
if need_roll {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
let dice_event = GameEvent::RollResult {
|
||||
player_id: self.active_player_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
};
|
||||
// print!("o");
|
||||
if self.game.validate(&dice_event) {
|
||||
self.game.consume(&dice_event);
|
||||
let (points, adv_points) = self.game.dice_points;
|
||||
reward += REWARD_RATIO * (points - adv_points) as f32;
|
||||
if points > 0 {
|
||||
is_rollpoint = true;
|
||||
// println!("info: rolled for {reward}");
|
||||
}
|
||||
// Récompense proportionnelle aux points
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Pénalité pour action invalide
|
||||
// on annule les précédents reward
|
||||
// et on indique une valeur reconnaissable pour statistiques
|
||||
reward = ERROR_REWARD;
|
||||
}
|
||||
}
|
||||
|
||||
(reward, is_rollpoint)
|
||||
}
|
||||
|
||||
/// Fait jouer l'adversaire avec une stratégie simple
|
||||
fn play_opponent_if_needed(&mut self) -> f32 {
|
||||
// print!("z?");
|
||||
let mut reward = 0.0;
|
||||
|
||||
// Si c'est le tour de l'adversaire, jouer automatiquement
|
||||
if self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||
// Utiliser la stratégie default pour l'adversaire
|
||||
use crate::BotStrategy;
|
||||
|
||||
let mut strategy = crate::strategy::random::RandomStrategy::default();
|
||||
strategy.set_player_id(self.opponent_id);
|
||||
if let Some(color) = self.game.player_color_by_id(&self.opponent_id) {
|
||||
strategy.set_color(color);
|
||||
}
|
||||
*strategy.get_mut_game() = self.game.clone();
|
||||
|
||||
// Exécuter l'action selon le turn_stage
|
||||
let mut calculate_points = false;
|
||||
let opponent_color = store::Color::Black;
|
||||
let event = match self.game.turn_stage {
|
||||
TurnStage::RollDice => GameEvent::Roll {
|
||||
player_id: self.opponent_id,
|
||||
},
|
||||
TurnStage::RollWaiting => {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
calculate_points = true; // comment to replicate burnrl_before
|
||||
GameEvent::RollResult {
|
||||
player_id: self.opponent_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
}
|
||||
}
|
||||
TurnStage::MarkPoints => {
|
||||
panic!("in play_opponent_if_needed > TurnStage::MarkPoints");
|
||||
// let dice_roll_count = self
|
||||
// .game
|
||||
// .players
|
||||
// .get(&self.opponent_id)
|
||||
// .unwrap()
|
||||
// .dice_roll_count;
|
||||
// let points_rules =
|
||||
// PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
// GameEvent::Mark {
|
||||
// player_id: self.opponent_id,
|
||||
// points: points_rules.get_points(dice_roll_count).0,
|
||||
// }
|
||||
}
|
||||
TurnStage::MarkAdvPoints => {
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
// pas de reward : déjà comptabilisé lors du tour de blanc
|
||||
GameEvent::Mark {
|
||||
player_id: self.opponent_id,
|
||||
points: points_rules.get_points(dice_roll_count).1,
|
||||
}
|
||||
}
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
// Stratégie simple : toujours continuer
|
||||
GameEvent::Go {
|
||||
player_id: self.opponent_id,
|
||||
}
|
||||
}
|
||||
TurnStage::Move => GameEvent::Move {
|
||||
player_id: self.opponent_id,
|
||||
moves: strategy.choose_move(),
|
||||
},
|
||||
};
|
||||
|
||||
if self.game.validate(&event) {
|
||||
self.game.consume(&event);
|
||||
// print!(".");
|
||||
if calculate_points {
|
||||
// print!("x");
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||
// Récompense proportionnelle aux points
|
||||
let adv_reward = REWARD_RATIO * (points - adv_points) as f32;
|
||||
reward -= adv_reward;
|
||||
// if adv_reward != 0.0 {
|
||||
// println!("info: opponent : {adv_reward} -> {reward}");
|
||||
// }
|
||||
}
|
||||
}
|
||||
}
|
||||
reward
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<TrictracEnvironment> for TrictracEnvironment {
|
||||
fn as_mut(&mut self) -> &mut Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
443
bot/src/burnrl/environment_valid.rs
Normal file
443
bot/src/burnrl/environment_valid.rs
Normal file
|
|
@ -0,0 +1,443 @@
|
|||
use crate::training_common_big;
|
||||
use burn::{prelude::Backend, tensor::Tensor};
|
||||
use burn_rl::base::{Action, Environment, Snapshot, State};
|
||||
use rand::{thread_rng, Rng};
|
||||
use store::{GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
||||
|
||||
/// État du jeu Trictrac pour burn-rl
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
pub struct TrictracState {
|
||||
pub data: [i8; 36], // Représentation vectorielle de l'état du jeu
|
||||
}
|
||||
|
||||
impl State for TrictracState {
|
||||
type Data = [i8; 36];
|
||||
|
||||
fn to_tensor<B: Backend>(&self) -> Tensor<B, 1> {
|
||||
Tensor::from_floats(self.data, &B::Device::default())
|
||||
}
|
||||
|
||||
fn size() -> usize {
|
||||
36
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracState {
|
||||
/// Convertit un GameState en TrictracState
|
||||
pub fn from_game_state(game_state: &GameState) -> Self {
|
||||
let state_vec = game_state.to_vec();
|
||||
let mut data = [0; 36];
|
||||
|
||||
// Copier les données en s'assurant qu'on ne dépasse pas la taille
|
||||
let copy_len = state_vec.len().min(36);
|
||||
data[..copy_len].copy_from_slice(&state_vec[..copy_len]);
|
||||
|
||||
TrictracState { data }
|
||||
}
|
||||
}
|
||||
|
||||
/// Actions possibles dans Trictrac pour burn-rl
|
||||
#[derive(Debug, Clone, Copy, PartialEq)]
|
||||
pub struct TrictracAction {
|
||||
// u32 as required by burn_rl::base::Action type
|
||||
pub index: u32,
|
||||
}
|
||||
|
||||
impl Action for TrictracAction {
|
||||
fn random() -> Self {
|
||||
use rand::{thread_rng, Rng};
|
||||
let mut rng = thread_rng();
|
||||
TrictracAction {
|
||||
index: rng.gen_range(0..Self::size() as u32),
|
||||
}
|
||||
}
|
||||
|
||||
fn enumerate() -> Vec<Self> {
|
||||
(0..Self::size() as u32)
|
||||
.map(|index| TrictracAction { index })
|
||||
.collect()
|
||||
}
|
||||
|
||||
fn size() -> usize {
|
||||
// état avec le plus de choix : mouvement
|
||||
// choix premier dé : 16 (15 pions + aucun pion), choix deuxième dé 16, x2 ordre dé
|
||||
64
|
||||
}
|
||||
}
|
||||
|
||||
impl From<u32> for TrictracAction {
|
||||
fn from(index: u32) -> Self {
|
||||
TrictracAction { index }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TrictracAction> for u32 {
|
||||
fn from(action: TrictracAction) -> u32 {
|
||||
action.index
|
||||
}
|
||||
}
|
||||
|
||||
/// Environnement Trictrac pour burn-rl
|
||||
#[derive(Debug)]
|
||||
pub struct TrictracEnvironment {
|
||||
pub game: GameState,
|
||||
active_player_id: PlayerId,
|
||||
opponent_id: PlayerId,
|
||||
current_state: TrictracState,
|
||||
episode_reward: f32,
|
||||
pub step_count: usize,
|
||||
pub max_steps: usize,
|
||||
pub pointrolls_count: usize,
|
||||
pub visualized: bool,
|
||||
}
|
||||
|
||||
impl Environment for TrictracEnvironment {
|
||||
type StateType = TrictracState;
|
||||
type ActionType = TrictracAction;
|
||||
type RewardType = f32;
|
||||
|
||||
fn new(visualized: bool) -> Self {
|
||||
let mut game = GameState::new(false);
|
||||
|
||||
// Ajouter deux joueurs
|
||||
game.init_player("DQN Agent");
|
||||
game.init_player("Opponent");
|
||||
let player1_id = 1;
|
||||
let player2_id = 2;
|
||||
|
||||
// Commencer la partie
|
||||
game.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
let current_state = TrictracState::from_game_state(&game);
|
||||
TrictracEnvironment {
|
||||
game,
|
||||
active_player_id: player1_id,
|
||||
opponent_id: player2_id,
|
||||
current_state,
|
||||
episode_reward: 0.0,
|
||||
step_count: 0,
|
||||
max_steps: 2000,
|
||||
pointrolls_count: 0,
|
||||
visualized,
|
||||
}
|
||||
}
|
||||
|
||||
fn state(&self) -> Self::StateType {
|
||||
self.current_state
|
||||
}
|
||||
|
||||
fn reset(&mut self) -> Snapshot<Self> {
|
||||
// Réinitialiser le jeu
|
||||
self.game = GameState::new(false);
|
||||
self.game.init_player("DQN Agent");
|
||||
self.game.init_player("Opponent");
|
||||
|
||||
// Commencer la partie
|
||||
self.game.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
self.current_state = TrictracState::from_game_state(&self.game);
|
||||
self.episode_reward = 0.0;
|
||||
self.step_count = 0;
|
||||
self.pointrolls_count = 0;
|
||||
|
||||
Snapshot::new(self.current_state, 0.0, false)
|
||||
}
|
||||
|
||||
fn step(&mut self, action: Self::ActionType) -> Snapshot<Self> {
|
||||
self.step_count += 1;
|
||||
|
||||
// Convertir l'action burn-rl vers une action Trictrac
|
||||
// let trictrac_action = Self::convert_action(action);
|
||||
let trictrac_action = self.convert_valid_action_index(action);
|
||||
let mut reward = 0.0;
|
||||
let is_rollpoint: bool;
|
||||
|
||||
// Exécuter l'action si c'est le tour de l'agent DQN
|
||||
if self.game.active_player_id == self.active_player_id {
|
||||
if let Some(action) = trictrac_action {
|
||||
(reward, is_rollpoint) = self.execute_action(action);
|
||||
// if reward != 0.0 {
|
||||
// println!("info: self rew {reward}");
|
||||
// }
|
||||
if is_rollpoint {
|
||||
self.pointrolls_count += 1;
|
||||
}
|
||||
} else {
|
||||
// Action non convertible, pénalité
|
||||
println!("info: action non convertible -> -1 {trictrac_action:?}");
|
||||
reward = -1.0;
|
||||
}
|
||||
}
|
||||
|
||||
// Faire jouer l'adversaire (stratégie simple)
|
||||
while self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||
// let op_rew = self.play_opponent_if_needed();
|
||||
// if op_rew != 0.0 {
|
||||
// println!("info: op rew {op_rew}");
|
||||
// }
|
||||
// reward += op_rew;
|
||||
reward += self.play_opponent_if_needed();
|
||||
}
|
||||
|
||||
// Vérifier si la partie est terminée
|
||||
let done = self.game.stage == Stage::Ended || self.game.determine_winner().is_some();
|
||||
|
||||
if done {
|
||||
// Récompense finale basée sur le résultat
|
||||
if let Some(winner_id) = self.game.determine_winner() {
|
||||
if winner_id == self.active_player_id {
|
||||
reward += 100.0; // Victoire
|
||||
} else {
|
||||
reward -= 100.0; // Défaite
|
||||
}
|
||||
}
|
||||
}
|
||||
let terminated = done || self.step_count >= self.max_steps;
|
||||
|
||||
// Mettre à jour l'état
|
||||
self.current_state = TrictracState::from_game_state(&self.game);
|
||||
self.episode_reward += reward;
|
||||
|
||||
if self.visualized && terminated {
|
||||
println!(
|
||||
"Episode terminé. Récompense totale: {:.2}, Étapes: {}",
|
||||
self.episode_reward, self.step_count
|
||||
);
|
||||
}
|
||||
|
||||
Snapshot::new(self.current_state, reward, terminated)
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracEnvironment {
|
||||
const ERROR_REWARD: f32 = -1.12121;
|
||||
const REWARD_RATIO: f32 = 1.0;
|
||||
|
||||
/// Convertit une action burn-rl vers une action Trictrac
|
||||
pub fn convert_action(action: TrictracAction) -> Option<training_common_big::TrictracAction> {
|
||||
training_common_big::TrictracAction::from_action_index(action.index.try_into().unwrap())
|
||||
}
|
||||
|
||||
/// Convertit l'index d'une action au sein des actions valides vers une action Trictrac
|
||||
fn convert_valid_action_index(
|
||||
&self,
|
||||
action: TrictracAction,
|
||||
) -> Option<training_common_big::TrictracAction> {
|
||||
use training_common_big::get_valid_actions;
|
||||
|
||||
// Obtenir les actions valides dans le contexte actuel
|
||||
let valid_actions = get_valid_actions(&self.game);
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Mapper l'index d'action sur une action valide
|
||||
let action_index = (action.index as usize) % valid_actions.len();
|
||||
Some(valid_actions[action_index].clone())
|
||||
}
|
||||
|
||||
/// Exécute une action Trictrac dans le jeu
|
||||
// fn execute_action(
|
||||
// &mut self,
|
||||
// action: training_common_big::TrictracAction,
|
||||
// ) -> Result<f32, Box<dyn std::error::Error>> {
|
||||
fn execute_action(&mut self, action: training_common_big::TrictracAction) -> (f32, bool) {
|
||||
use training_common_big::TrictracAction;
|
||||
|
||||
let mut reward = 0.0;
|
||||
let mut is_rollpoint = false;
|
||||
|
||||
let event = match action {
|
||||
TrictracAction::Roll => {
|
||||
// Lancer les dés
|
||||
Some(GameEvent::Roll {
|
||||
player_id: self.active_player_id,
|
||||
})
|
||||
}
|
||||
// TrictracAction::Mark => {
|
||||
// // Marquer des points
|
||||
// let points = self.game.
|
||||
// reward += 0.1 * points as f32;
|
||||
// Some(GameEvent::Mark {
|
||||
// player_id: self.active_player_id,
|
||||
// points,
|
||||
// })
|
||||
// }
|
||||
TrictracAction::Go => {
|
||||
// Continuer après avoir gagné un trou
|
||||
Some(GameEvent::Go {
|
||||
player_id: self.active_player_id,
|
||||
})
|
||||
}
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
from1,
|
||||
from2,
|
||||
} => {
|
||||
// Effectuer un mouvement
|
||||
let (dice1, dice2) = if dice_order {
|
||||
(self.game.dice.values.0, self.game.dice.values.1)
|
||||
} else {
|
||||
(self.game.dice.values.1, self.game.dice.values.0)
|
||||
};
|
||||
let mut to1 = from1 + dice1 as usize;
|
||||
let mut to2 = from2 + dice2 as usize;
|
||||
|
||||
// Gestion prise de coin par puissance
|
||||
let opp_rest_field = 13;
|
||||
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||
to1 -= 1;
|
||||
to2 -= 1;
|
||||
}
|
||||
|
||||
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let checker_move2 = store::CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
Some(GameEvent::Move {
|
||||
player_id: self.active_player_id,
|
||||
moves: (checker_move1, checker_move2),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Appliquer l'événement si valide
|
||||
if let Some(event) = event {
|
||||
if self.game.validate(&event) {
|
||||
self.game.consume(&event);
|
||||
|
||||
// Simuler le résultat des dés après un Roll
|
||||
if matches!(action, TrictracAction::Roll) {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
let dice_event = GameEvent::RollResult {
|
||||
player_id: self.active_player_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
};
|
||||
if self.game.validate(&dice_event) {
|
||||
self.game.consume(&dice_event);
|
||||
let (points, adv_points) = self.game.dice_points;
|
||||
reward += Self::REWARD_RATIO * (points - adv_points) as f32;
|
||||
if points > 0 {
|
||||
is_rollpoint = true;
|
||||
// println!("info: rolled for {reward}");
|
||||
}
|
||||
// Récompense proportionnelle aux points
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Pénalité pour action invalide
|
||||
// on annule les précédents reward
|
||||
// et on indique une valeur reconnaissable pour statistiques
|
||||
println!("info: action invalide -> err_reward");
|
||||
reward = Self::ERROR_REWARD;
|
||||
}
|
||||
}
|
||||
|
||||
(reward, is_rollpoint)
|
||||
}
|
||||
|
||||
/// Fait jouer l'adversaire avec une stratégie simple
|
||||
fn play_opponent_if_needed(&mut self) -> f32 {
|
||||
let mut reward = 0.0;
|
||||
|
||||
// Si c'est le tour de l'adversaire, jouer automatiquement
|
||||
if self.game.active_player_id == self.opponent_id && self.game.stage != Stage::Ended {
|
||||
// Utiliser la stratégie default pour l'adversaire
|
||||
use crate::BotStrategy;
|
||||
|
||||
let mut strategy = crate::strategy::random::RandomStrategy::default();
|
||||
strategy.set_player_id(self.opponent_id);
|
||||
if let Some(color) = self.game.player_color_by_id(&self.opponent_id) {
|
||||
strategy.set_color(color);
|
||||
}
|
||||
*strategy.get_mut_game() = self.game.clone();
|
||||
|
||||
// Exécuter l'action selon le turn_stage
|
||||
let mut calculate_points = false;
|
||||
let opponent_color = store::Color::Black;
|
||||
let event = match self.game.turn_stage {
|
||||
TurnStage::RollDice => GameEvent::Roll {
|
||||
player_id: self.opponent_id,
|
||||
},
|
||||
TurnStage::RollWaiting => {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
calculate_points = true;
|
||||
GameEvent::RollResult {
|
||||
player_id: self.opponent_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
}
|
||||
}
|
||||
TurnStage::MarkPoints => {
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
GameEvent::Mark {
|
||||
player_id: self.opponent_id,
|
||||
points: points_rules.get_points(dice_roll_count).0,
|
||||
}
|
||||
}
|
||||
TurnStage::MarkAdvPoints => {
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
let points = points_rules.get_points(dice_roll_count).1;
|
||||
// pas de reward : déjà comptabilisé lors du tour de blanc
|
||||
GameEvent::Mark {
|
||||
player_id: self.opponent_id,
|
||||
points,
|
||||
}
|
||||
}
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
// Stratégie simple : toujours continuer
|
||||
GameEvent::Go {
|
||||
player_id: self.opponent_id,
|
||||
}
|
||||
}
|
||||
TurnStage::Move => GameEvent::Move {
|
||||
player_id: self.opponent_id,
|
||||
moves: strategy.choose_move(),
|
||||
},
|
||||
};
|
||||
|
||||
if self.game.validate(&event) {
|
||||
self.game.consume(&event);
|
||||
if calculate_points {
|
||||
let dice_roll_count = self
|
||||
.game
|
||||
.players
|
||||
.get(&self.opponent_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules =
|
||||
PointsRules::new(&opponent_color, &self.game.board, self.game.dice);
|
||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||
reward -= Self::REWARD_RATIO * (points - adv_points) as f32;
|
||||
// Récompense proportionnelle aux points
|
||||
}
|
||||
}
|
||||
}
|
||||
reward
|
||||
}
|
||||
}
|
||||
|
||||
impl AsMut<TrictracEnvironment> for TrictracEnvironment {
|
||||
fn as_mut(&mut self) -> &mut Self {
|
||||
self
|
||||
}
|
||||
}
|
||||
123
bot/src/burnrl/main.rs
Normal file
123
bot/src/burnrl/main.rs
Normal file
|
|
@ -0,0 +1,123 @@
|
|||
use bot::burnrl::algos::{
|
||||
dqn, dqn_big, dqn_valid, ppo, ppo_big, ppo_valid, sac, sac_big, sac_valid,
|
||||
};
|
||||
use bot::burnrl::environment::TrictracEnvironment;
|
||||
use bot::burnrl::environment_big::TrictracEnvironment as TrictracEnvironmentBig;
|
||||
use bot::burnrl::environment_valid::TrictracEnvironment as TrictracEnvironmentValid;
|
||||
use bot::burnrl::utils::{demo_model, Config};
|
||||
use burn::backend::{Autodiff, NdArray};
|
||||
use burn_rl::base::ElemType;
|
||||
use std::env;
|
||||
|
||||
type Backend = Autodiff<NdArray<ElemType>>;
|
||||
|
||||
fn main() {
|
||||
let args: Vec<String> = env::args().collect();
|
||||
let algo = &args[1];
|
||||
// let dir_path = &args[2];
|
||||
|
||||
let path = format!("bot/models/burnrl_{algo}");
|
||||
println!(
|
||||
"info: loading configuration from file {:?}",
|
||||
confy::get_configuration_file_path("trictrac_bot", None).unwrap()
|
||||
);
|
||||
let mut conf: Config = confy::load("trictrac_bot", None).expect("Could not load config");
|
||||
conf.save_path = Some(path.clone());
|
||||
println!("{conf}----------");
|
||||
|
||||
match algo.as_str() {
|
||||
"dqn" => {
|
||||
let _agent = dqn::run::<TrictracEnvironment, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = dqn::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::DQN<TrictracEnvironment, _, _> =
|
||||
burn_rl::agent::DQN::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"dqn_big" => {
|
||||
let _agent = dqn_big::run::<TrictracEnvironmentBig, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = dqn_big::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::DQN<TrictracEnvironmentBig, _, _> =
|
||||
burn_rl::agent::DQN::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"dqn_valid" => {
|
||||
let _agent = dqn_valid::run::<TrictracEnvironmentValid, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = dqn_valid::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::DQN<TrictracEnvironmentValid, _, _> =
|
||||
burn_rl::agent::DQN::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"sac" => {
|
||||
let _agent = sac::run::<TrictracEnvironment, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = sac::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::SAC<TrictracEnvironment, _, _> =
|
||||
burn_rl::agent::SAC::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"sac_big" => {
|
||||
let _agent = sac_big::run::<TrictracEnvironmentBig, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = sac_big::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::SAC<TrictracEnvironmentBig, _, _> =
|
||||
burn_rl::agent::SAC::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"sac_valid" => {
|
||||
let _agent = sac_valid::run::<TrictracEnvironmentValid, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = sac_valid::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::SAC<TrictracEnvironmentValid, _, _> =
|
||||
burn_rl::agent::SAC::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"ppo" => {
|
||||
let _agent = ppo::run::<TrictracEnvironment, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = ppo::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::PPO<TrictracEnvironment, _, _> =
|
||||
burn_rl::agent::PPO::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"ppo_big" => {
|
||||
let _agent = ppo_big::run::<TrictracEnvironmentBig, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = ppo_big::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::PPO<TrictracEnvironmentBig, _, _> =
|
||||
burn_rl::agent::PPO::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
"ppo_valid" => {
|
||||
let _agent = ppo_valid::run::<TrictracEnvironmentValid, Backend>(&conf, false);
|
||||
println!("> Chargement du modèle pour test");
|
||||
let loaded_model = ppo_valid::load_model(conf.dense_size, &path);
|
||||
let loaded_agent: burn_rl::agent::PPO<TrictracEnvironmentValid, _, _> =
|
||||
burn_rl::agent::PPO::new(loaded_model.unwrap());
|
||||
|
||||
println!("> Test avec le modèle chargé");
|
||||
demo_model(loaded_agent);
|
||||
}
|
||||
&_ => {
|
||||
println!("unknown algo {algo}");
|
||||
}
|
||||
}
|
||||
}
|
||||
5
bot/src/burnrl/mod.rs
Normal file
5
bot/src/burnrl/mod.rs
Normal file
|
|
@ -0,0 +1,5 @@
|
|||
pub mod algos;
|
||||
pub mod environment;
|
||||
pub mod environment_big;
|
||||
pub mod environment_valid;
|
||||
pub mod utils;
|
||||
132
bot/src/burnrl/utils.rs
Normal file
132
bot/src/burnrl/utils.rs
Normal file
|
|
@ -0,0 +1,132 @@
|
|||
use burn::module::{Param, ParamId};
|
||||
use burn::nn::Linear;
|
||||
use burn::tensor::backend::Backend;
|
||||
use burn::tensor::Tensor;
|
||||
use burn_rl::base::{Agent, ElemType, Environment};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
pub struct Config {
|
||||
pub save_path: Option<String>,
|
||||
pub max_steps: usize, // max steps by episode
|
||||
pub num_episodes: usize,
|
||||
pub dense_size: usize, // neural network complexity
|
||||
|
||||
// discount factor. Plus élevé = encourage stratégies à long terme
|
||||
pub gamma: f32,
|
||||
// soft update rate. Taux de mise à jour du réseau cible. Plus bas = adaptation plus lente moins sensible aux coups de chance
|
||||
pub tau: f32,
|
||||
// taille du pas. Bas : plus lent, haut : risque de ne jamais
|
||||
pub learning_rate: f32,
|
||||
// nombre d'expériences passées sur lesquelles pour calcul de l'erreur moy.
|
||||
pub batch_size: usize,
|
||||
// limite max de correction à apporter au gradient (default 100)
|
||||
pub clip_grad: f32,
|
||||
|
||||
// ---- for SAC
|
||||
pub min_probability: f32,
|
||||
|
||||
// ---- for DQN
|
||||
// epsilon initial value (0.9 => more exploration)
|
||||
pub eps_start: f64,
|
||||
pub eps_end: f64,
|
||||
// eps_decay higher = epsilon decrease slower
|
||||
// used in : epsilon = eps_end + (eps_start - eps_end) * e^(-step / eps_decay);
|
||||
// epsilon is updated at the start of each episode
|
||||
pub eps_decay: f64,
|
||||
|
||||
// ---- for PPO
|
||||
pub lambda: f32,
|
||||
pub epsilon_clip: f32,
|
||||
pub critic_weight: f32,
|
||||
pub entropy_weight: f32,
|
||||
pub epochs: usize,
|
||||
}
|
||||
|
||||
impl Default for Config {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
save_path: None,
|
||||
max_steps: 2000,
|
||||
num_episodes: 1000,
|
||||
dense_size: 256,
|
||||
gamma: 0.999,
|
||||
tau: 0.005,
|
||||
learning_rate: 0.001,
|
||||
batch_size: 32,
|
||||
clip_grad: 100.0,
|
||||
min_probability: 1e-9,
|
||||
eps_start: 0.9,
|
||||
eps_end: 0.05,
|
||||
eps_decay: 1000.0,
|
||||
lambda: 0.95,
|
||||
epsilon_clip: 0.2,
|
||||
critic_weight: 0.5,
|
||||
entropy_weight: 0.01,
|
||||
epochs: 8,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl std::fmt::Display for Config {
|
||||
fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result {
|
||||
let mut s = String::new();
|
||||
s.push_str(&format!("max_steps={:?}\n", self.max_steps));
|
||||
s.push_str(&format!("num_episodes={:?}\n", self.num_episodes));
|
||||
s.push_str(&format!("dense_size={:?}\n", self.dense_size));
|
||||
s.push_str(&format!("eps_start={:?}\n", self.eps_start));
|
||||
s.push_str(&format!("eps_end={:?}\n", self.eps_end));
|
||||
s.push_str(&format!("eps_decay={:?}\n", self.eps_decay));
|
||||
s.push_str(&format!("gamma={:?}\n", self.gamma));
|
||||
s.push_str(&format!("tau={:?}\n", self.tau));
|
||||
s.push_str(&format!("learning_rate={:?}\n", self.learning_rate));
|
||||
s.push_str(&format!("batch_size={:?}\n", self.batch_size));
|
||||
s.push_str(&format!("clip_grad={:?}\n", self.clip_grad));
|
||||
s.push_str(&format!("min_probability={:?}\n", self.min_probability));
|
||||
s.push_str(&format!("lambda={:?}\n", self.lambda));
|
||||
s.push_str(&format!("epsilon_clip={:?}\n", self.epsilon_clip));
|
||||
s.push_str(&format!("critic_weight={:?}\n", self.critic_weight));
|
||||
s.push_str(&format!("entropy_weight={:?}\n", self.entropy_weight));
|
||||
s.push_str(&format!("epochs={:?}\n", self.epochs));
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
|
||||
pub fn demo_model<E: Environment>(agent: impl Agent<E>) {
|
||||
let mut env = E::new(true);
|
||||
let mut state = env.state();
|
||||
let mut done = false;
|
||||
while !done {
|
||||
if let Some(action) = agent.react(&state) {
|
||||
let snapshot = env.step(action);
|
||||
state = *snapshot.state();
|
||||
done = snapshot.done();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn soft_update_tensor<const N: usize, B: Backend>(
|
||||
this: &Param<Tensor<B, N>>,
|
||||
that: &Param<Tensor<B, N>>,
|
||||
tau: ElemType,
|
||||
) -> Param<Tensor<B, N>> {
|
||||
let that_weight = that.val();
|
||||
let this_weight = this.val();
|
||||
let new_weight = this_weight * (1.0 - tau) + that_weight * tau;
|
||||
|
||||
Param::initialized(ParamId::new(), new_weight)
|
||||
}
|
||||
|
||||
pub fn soft_update_linear<B: Backend>(
|
||||
this: Linear<B>,
|
||||
that: &Linear<B>,
|
||||
tau: ElemType,
|
||||
) -> Linear<B> {
|
||||
let weight = soft_update_tensor(&this.weight, &that.weight, tau);
|
||||
let bias = match (&this.bias, &that.bias) {
|
||||
(Some(this_bias), Some(that_bias)) => Some(soft_update_tensor(this_bias, that_bias, tau)),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
Linear::<B> { weight, bias }
|
||||
}
|
||||
153
bot/src/dqn_simple/dqn_model.rs
Normal file
153
bot/src/dqn_simple/dqn_model.rs
Normal file
|
|
@ -0,0 +1,153 @@
|
|||
use crate::training_common_big::TrictracAction;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Configuration pour l'agent DQN
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct DqnConfig {
|
||||
pub state_size: usize,
|
||||
pub hidden_size: usize,
|
||||
pub num_actions: usize,
|
||||
pub learning_rate: f64,
|
||||
pub gamma: f64,
|
||||
pub epsilon: f64,
|
||||
pub epsilon_decay: f64,
|
||||
pub epsilon_min: f64,
|
||||
pub replay_buffer_size: usize,
|
||||
pub batch_size: usize,
|
||||
}
|
||||
|
||||
impl Default for DqnConfig {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
state_size: 36,
|
||||
hidden_size: 512, // Augmenter la taille pour gérer l'espace d'actions élargi
|
||||
num_actions: TrictracAction::action_space_size(),
|
||||
learning_rate: 0.001,
|
||||
gamma: 0.99,
|
||||
epsilon: 0.1,
|
||||
epsilon_decay: 0.995,
|
||||
epsilon_min: 0.01,
|
||||
replay_buffer_size: 10000,
|
||||
batch_size: 32,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Réseau de neurones DQN simplifié (matrice de poids basique)
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct SimpleNeuralNetwork {
|
||||
pub weights1: Vec<Vec<f32>>,
|
||||
pub biases1: Vec<f32>,
|
||||
pub weights2: Vec<Vec<f32>>,
|
||||
pub biases2: Vec<f32>,
|
||||
pub weights3: Vec<Vec<f32>>,
|
||||
pub biases3: Vec<f32>,
|
||||
}
|
||||
|
||||
impl SimpleNeuralNetwork {
|
||||
pub fn new(input_size: usize, hidden_size: usize, output_size: usize) -> Self {
|
||||
use rand::{thread_rng, Rng};
|
||||
let mut rng = thread_rng();
|
||||
|
||||
// Initialisation aléatoire des poids avec Xavier/Glorot
|
||||
let scale1 = (2.0 / input_size as f32).sqrt();
|
||||
let weights1 = (0..hidden_size)
|
||||
.map(|_| {
|
||||
(0..input_size)
|
||||
.map(|_| rng.gen_range(-scale1..scale1))
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
let biases1 = vec![0.0; hidden_size];
|
||||
|
||||
let scale2 = (2.0 / hidden_size as f32).sqrt();
|
||||
let weights2 = (0..hidden_size)
|
||||
.map(|_| {
|
||||
(0..hidden_size)
|
||||
.map(|_| rng.gen_range(-scale2..scale2))
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
let biases2 = vec![0.0; hidden_size];
|
||||
|
||||
let scale3 = (2.0 / hidden_size as f32).sqrt();
|
||||
let weights3 = (0..output_size)
|
||||
.map(|_| {
|
||||
(0..hidden_size)
|
||||
.map(|_| rng.gen_range(-scale3..scale3))
|
||||
.collect()
|
||||
})
|
||||
.collect();
|
||||
let biases3 = vec![0.0; output_size];
|
||||
|
||||
Self {
|
||||
weights1,
|
||||
biases1,
|
||||
weights2,
|
||||
biases2,
|
||||
weights3,
|
||||
biases3,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn forward(&self, input: &[f32]) -> Vec<f32> {
|
||||
// Première couche
|
||||
let mut layer1: Vec<f32> = self.biases1.clone();
|
||||
for (i, neuron_weights) in self.weights1.iter().enumerate() {
|
||||
for (j, &weight) in neuron_weights.iter().enumerate() {
|
||||
if j < input.len() {
|
||||
layer1[i] += input[j] * weight;
|
||||
}
|
||||
}
|
||||
layer1[i] = layer1[i].max(0.0); // ReLU
|
||||
}
|
||||
|
||||
// Deuxième couche
|
||||
let mut layer2: Vec<f32> = self.biases2.clone();
|
||||
for (i, neuron_weights) in self.weights2.iter().enumerate() {
|
||||
for (j, &weight) in neuron_weights.iter().enumerate() {
|
||||
if j < layer1.len() {
|
||||
layer2[i] += layer1[j] * weight;
|
||||
}
|
||||
}
|
||||
layer2[i] = layer2[i].max(0.0); // ReLU
|
||||
}
|
||||
|
||||
// Couche de sortie
|
||||
let mut output: Vec<f32> = self.biases3.clone();
|
||||
for (i, neuron_weights) in self.weights3.iter().enumerate() {
|
||||
for (j, &weight) in neuron_weights.iter().enumerate() {
|
||||
if j < layer2.len() {
|
||||
output[i] += layer2[j] * weight;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn get_best_action(&self, input: &[f32]) -> usize {
|
||||
let q_values = self.forward(input);
|
||||
q_values
|
||||
.iter()
|
||||
.enumerate()
|
||||
.max_by(|(_, a), (_, b)| a.partial_cmp(b).unwrap())
|
||||
.map(|(index, _)| index)
|
||||
.unwrap_or(0)
|
||||
}
|
||||
|
||||
pub fn save<P: AsRef<std::path::Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
let data = serde_json::to_string_pretty(self)?;
|
||||
std::fs::write(path, data)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn load<P: AsRef<std::path::Path>>(path: P) -> Result<Self, Box<dyn std::error::Error>> {
|
||||
let data = std::fs::read_to_string(path)?;
|
||||
let network = serde_json::from_str(&data)?;
|
||||
Ok(network)
|
||||
}
|
||||
}
|
||||
494
bot/src/dqn_simple/dqn_trainer.rs
Normal file
494
bot/src/dqn_simple/dqn_trainer.rs
Normal file
|
|
@ -0,0 +1,494 @@
|
|||
use crate::{CheckerMove, Color, GameState, PlayerId};
|
||||
use rand::prelude::SliceRandom;
|
||||
use rand::{thread_rng, Rng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::collections::VecDeque;
|
||||
use store::{GameEvent, MoveRules, PointsRules, Stage, TurnStage};
|
||||
|
||||
use super::dqn_model::{DqnConfig, SimpleNeuralNetwork};
|
||||
use crate::training_common_big::{get_valid_actions, TrictracAction};
|
||||
|
||||
/// Expérience pour le buffer de replay
|
||||
#[derive(Debug, Clone, Serialize, Deserialize)]
|
||||
pub struct Experience {
|
||||
pub state: Vec<f32>,
|
||||
pub action: TrictracAction,
|
||||
pub reward: f32,
|
||||
pub next_state: Vec<f32>,
|
||||
pub done: bool,
|
||||
}
|
||||
|
||||
/// Buffer de replay pour stocker les expériences
|
||||
#[derive(Debug)]
|
||||
pub struct ReplayBuffer {
|
||||
buffer: VecDeque<Experience>,
|
||||
capacity: usize,
|
||||
}
|
||||
|
||||
impl ReplayBuffer {
|
||||
pub fn new(capacity: usize) -> Self {
|
||||
Self {
|
||||
buffer: VecDeque::with_capacity(capacity),
|
||||
capacity,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn push(&mut self, experience: Experience) {
|
||||
if self.buffer.len() >= self.capacity {
|
||||
self.buffer.pop_front();
|
||||
}
|
||||
self.buffer.push_back(experience);
|
||||
}
|
||||
|
||||
pub fn sample(&self, batch_size: usize) -> Vec<Experience> {
|
||||
let mut rng = thread_rng();
|
||||
let len = self.buffer.len();
|
||||
if len < batch_size {
|
||||
return self.buffer.iter().cloned().collect();
|
||||
}
|
||||
|
||||
let mut batch = Vec::with_capacity(batch_size);
|
||||
for _ in 0..batch_size {
|
||||
let idx = rng.gen_range(0..len);
|
||||
batch.push(self.buffer[idx].clone());
|
||||
}
|
||||
batch
|
||||
}
|
||||
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.buffer.is_empty()
|
||||
}
|
||||
|
||||
pub fn len(&self) -> usize {
|
||||
self.buffer.len()
|
||||
}
|
||||
}
|
||||
|
||||
/// Agent DQN pour l'apprentissage par renforcement
|
||||
#[derive(Debug)]
|
||||
pub struct DqnAgent {
|
||||
config: DqnConfig,
|
||||
model: SimpleNeuralNetwork,
|
||||
target_model: SimpleNeuralNetwork,
|
||||
replay_buffer: ReplayBuffer,
|
||||
epsilon: f64,
|
||||
step_count: usize,
|
||||
}
|
||||
|
||||
impl DqnAgent {
|
||||
pub fn new(config: DqnConfig) -> Self {
|
||||
let model =
|
||||
SimpleNeuralNetwork::new(config.state_size, config.hidden_size, config.num_actions);
|
||||
let target_model = model.clone();
|
||||
let replay_buffer = ReplayBuffer::new(config.replay_buffer_size);
|
||||
let epsilon = config.epsilon;
|
||||
|
||||
Self {
|
||||
config,
|
||||
model,
|
||||
target_model,
|
||||
replay_buffer,
|
||||
epsilon,
|
||||
step_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn select_action(&mut self, game_state: &GameState, state: &[f32]) -> TrictracAction {
|
||||
let valid_actions = get_valid_actions(game_state);
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
// Fallback si aucune action valide
|
||||
return TrictracAction::Roll;
|
||||
}
|
||||
|
||||
let mut rng = thread_rng();
|
||||
if rng.gen::<f64>() < self.epsilon {
|
||||
// Exploration : action valide aléatoire
|
||||
valid_actions
|
||||
.choose(&mut rng)
|
||||
.cloned()
|
||||
.unwrap_or(TrictracAction::Roll)
|
||||
} else {
|
||||
// Exploitation : meilleure action valide selon le modèle
|
||||
let q_values = self.model.forward(state);
|
||||
|
||||
let mut best_action = &valid_actions[0];
|
||||
let mut best_q_value = f32::NEG_INFINITY;
|
||||
|
||||
for action in &valid_actions {
|
||||
let action_index = action.to_action_index();
|
||||
if action_index < q_values.len() {
|
||||
let q_value = q_values[action_index];
|
||||
if q_value > best_q_value {
|
||||
best_q_value = q_value;
|
||||
best_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
best_action.clone()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn store_experience(&mut self, experience: Experience) {
|
||||
self.replay_buffer.push(experience);
|
||||
}
|
||||
|
||||
pub fn train(&mut self) {
|
||||
if self.replay_buffer.len() < self.config.batch_size {
|
||||
return;
|
||||
}
|
||||
|
||||
// Pour l'instant, on simule l'entraînement en mettant à jour epsilon
|
||||
// Dans une implémentation complète, ici on ferait la backpropagation
|
||||
self.epsilon = (self.epsilon * self.config.epsilon_decay).max(self.config.epsilon_min);
|
||||
self.step_count += 1;
|
||||
|
||||
// Mise à jour du target model tous les 100 steps
|
||||
if self.step_count % 100 == 0 {
|
||||
self.target_model = self.model.clone();
|
||||
}
|
||||
}
|
||||
|
||||
pub fn save_model<P: AsRef<std::path::Path>>(
|
||||
&self,
|
||||
path: P,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
self.model.save(path)
|
||||
}
|
||||
|
||||
pub fn get_epsilon(&self) -> f64 {
|
||||
self.epsilon
|
||||
}
|
||||
|
||||
pub fn get_step_count(&self) -> usize {
|
||||
self.step_count
|
||||
}
|
||||
}
|
||||
|
||||
/// Environnement Trictrac pour l'entraînement
|
||||
#[derive(Debug)]
|
||||
pub struct TrictracEnv {
|
||||
pub game_state: GameState,
|
||||
pub agent_player_id: PlayerId,
|
||||
pub opponent_player_id: PlayerId,
|
||||
pub agent_color: Color,
|
||||
pub max_steps: usize,
|
||||
pub current_step: usize,
|
||||
}
|
||||
|
||||
impl Default for TrictracEnv {
|
||||
fn default() -> Self {
|
||||
let mut game_state = GameState::new(false);
|
||||
game_state.init_player("agent");
|
||||
game_state.init_player("opponent");
|
||||
|
||||
Self {
|
||||
game_state,
|
||||
agent_player_id: 1,
|
||||
opponent_player_id: 2,
|
||||
agent_color: Color::White,
|
||||
max_steps: 1000,
|
||||
current_step: 0,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracEnv {
|
||||
pub fn reset(&mut self) -> Vec<f32> {
|
||||
self.game_state = GameState::new(false);
|
||||
self.game_state.init_player("agent");
|
||||
self.game_state.init_player("opponent");
|
||||
|
||||
// Commencer la partie
|
||||
self.game_state.consume(&GameEvent::BeginGame {
|
||||
goes_first: self.agent_player_id,
|
||||
});
|
||||
|
||||
self.current_step = 0;
|
||||
self.game_state.to_vec_float()
|
||||
}
|
||||
|
||||
pub fn step(&mut self, action: TrictracAction) -> (Vec<f32>, f32, bool) {
|
||||
let mut reward = 0.0;
|
||||
|
||||
// Appliquer l'action de l'agent
|
||||
if self.game_state.active_player_id == self.agent_player_id {
|
||||
reward += self.apply_agent_action(action);
|
||||
}
|
||||
|
||||
// Faire jouer l'adversaire (stratégie simple)
|
||||
while self.game_state.active_player_id == self.opponent_player_id
|
||||
&& self.game_state.stage != Stage::Ended
|
||||
{
|
||||
reward += self.play_opponent_turn();
|
||||
}
|
||||
|
||||
// Vérifier si la partie est terminée
|
||||
let done = self.game_state.stage == Stage::Ended
|
||||
|| self.game_state.determine_winner().is_some()
|
||||
|| self.current_step >= self.max_steps;
|
||||
|
||||
// Récompense finale si la partie est terminée
|
||||
if done {
|
||||
if let Some(winner) = self.game_state.determine_winner() {
|
||||
if winner == self.agent_player_id {
|
||||
reward += 100.0; // Bonus pour gagner
|
||||
} else {
|
||||
reward -= 50.0; // Pénalité pour perdre
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
self.current_step += 1;
|
||||
let next_state = self.game_state.to_vec_float();
|
||||
(next_state, reward, done)
|
||||
}
|
||||
|
||||
fn apply_agent_action(&mut self, action: TrictracAction) -> f32 {
|
||||
let mut reward = 0.0;
|
||||
|
||||
let event = match action {
|
||||
TrictracAction::Roll => {
|
||||
// Lancer les dés
|
||||
reward += 0.1;
|
||||
Some(GameEvent::Roll {
|
||||
player_id: self.agent_player_id,
|
||||
})
|
||||
}
|
||||
// TrictracAction::Mark => {
|
||||
// // Marquer des points
|
||||
// let points = self.game_state.
|
||||
// reward += 0.1 * points as f32;
|
||||
// Some(GameEvent::Mark {
|
||||
// player_id: self.agent_player_id,
|
||||
// points,
|
||||
// })
|
||||
// }
|
||||
TrictracAction::Go => {
|
||||
// Continuer après avoir gagné un trou
|
||||
reward += 0.2;
|
||||
Some(GameEvent::Go {
|
||||
player_id: self.agent_player_id,
|
||||
})
|
||||
}
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
from1,
|
||||
from2,
|
||||
} => {
|
||||
// Effectuer un mouvement
|
||||
let (dice1, dice2) = if dice_order {
|
||||
(self.game_state.dice.values.0, self.game_state.dice.values.1)
|
||||
} else {
|
||||
(self.game_state.dice.values.1, self.game_state.dice.values.0)
|
||||
};
|
||||
let mut to1 = from1 + dice1 as usize;
|
||||
let mut to2 = from2 + dice2 as usize;
|
||||
|
||||
// Gestion prise de coin par puissance
|
||||
let opp_rest_field = 13;
|
||||
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||
to1 -= 1;
|
||||
to2 -= 1;
|
||||
}
|
||||
|
||||
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let checker_move2 = store::CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
reward += 0.2;
|
||||
Some(GameEvent::Move {
|
||||
player_id: self.agent_player_id,
|
||||
moves: (checker_move1, checker_move2),
|
||||
})
|
||||
}
|
||||
};
|
||||
|
||||
// Appliquer l'événement si valide
|
||||
if let Some(event) = event {
|
||||
if self.game_state.validate(&event) {
|
||||
self.game_state.consume(&event);
|
||||
|
||||
// Simuler le résultat des dés après un Roll
|
||||
if matches!(action, TrictracAction::Roll) {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
let dice_event = GameEvent::RollResult {
|
||||
player_id: self.agent_player_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
};
|
||||
if self.game_state.validate(&dice_event) {
|
||||
self.game_state.consume(&dice_event);
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Pénalité pour action invalide
|
||||
reward -= 2.0;
|
||||
}
|
||||
}
|
||||
|
||||
reward
|
||||
}
|
||||
|
||||
// TODO : use default bot strategy
|
||||
fn play_opponent_turn(&mut self) -> f32 {
|
||||
let mut reward = 0.0;
|
||||
let event = match self.game_state.turn_stage {
|
||||
TurnStage::RollDice => GameEvent::Roll {
|
||||
player_id: self.opponent_player_id,
|
||||
},
|
||||
TurnStage::RollWaiting => {
|
||||
let mut rng = thread_rng();
|
||||
let dice_values = (rng.gen_range(1..=6), rng.gen_range(1..=6));
|
||||
GameEvent::RollResult {
|
||||
player_id: self.opponent_player_id,
|
||||
dice: store::Dice {
|
||||
values: dice_values,
|
||||
},
|
||||
}
|
||||
}
|
||||
TurnStage::MarkAdvPoints | TurnStage::MarkPoints => {
|
||||
let opponent_color = self.agent_color.opponent_color();
|
||||
let dice_roll_count = self
|
||||
.game_state
|
||||
.players
|
||||
.get(&self.opponent_player_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules = PointsRules::new(
|
||||
&opponent_color,
|
||||
&self.game_state.board,
|
||||
self.game_state.dice,
|
||||
);
|
||||
let (points, adv_points) = points_rules.get_points(dice_roll_count);
|
||||
reward -= 0.3 * (points - adv_points) as f32; // Récompense proportionnelle aux points
|
||||
|
||||
GameEvent::Mark {
|
||||
player_id: self.opponent_player_id,
|
||||
points,
|
||||
}
|
||||
}
|
||||
TurnStage::Move => {
|
||||
let opponent_color = self.agent_color.opponent_color();
|
||||
let rules = MoveRules::new(
|
||||
&opponent_color,
|
||||
&self.game_state.board,
|
||||
self.game_state.dice,
|
||||
);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
// Stratégie simple : choix aléatoire
|
||||
let mut rng = thread_rng();
|
||||
let choosen_move = *possible_moves
|
||||
.choose(&mut rng)
|
||||
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||
|
||||
GameEvent::Move {
|
||||
player_id: self.opponent_player_id,
|
||||
moves: if opponent_color == Color::White {
|
||||
choosen_move
|
||||
} else {
|
||||
(choosen_move.0.mirror(), choosen_move.1.mirror())
|
||||
},
|
||||
}
|
||||
}
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
// Stratégie simple : toujours continuer
|
||||
GameEvent::Go {
|
||||
player_id: self.opponent_player_id,
|
||||
}
|
||||
}
|
||||
};
|
||||
if self.game_state.validate(&event) {
|
||||
self.game_state.consume(&event);
|
||||
}
|
||||
reward
|
||||
}
|
||||
}
|
||||
|
||||
/// Entraîneur pour le modèle DQN
|
||||
pub struct DqnTrainer {
|
||||
agent: DqnAgent,
|
||||
env: TrictracEnv,
|
||||
}
|
||||
|
||||
impl DqnTrainer {
|
||||
pub fn new(config: DqnConfig) -> Self {
|
||||
Self {
|
||||
agent: DqnAgent::new(config),
|
||||
env: TrictracEnv::default(),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn train_episode(&mut self) -> f32 {
|
||||
let mut total_reward = 0.0;
|
||||
let mut state = self.env.reset();
|
||||
// let mut step_count = 0;
|
||||
|
||||
loop {
|
||||
// step_count += 1;
|
||||
let action = self.agent.select_action(&self.env.game_state, &state);
|
||||
let (next_state, reward, done) = self.env.step(action.clone());
|
||||
total_reward += reward;
|
||||
|
||||
let experience = Experience {
|
||||
state: state.clone(),
|
||||
action,
|
||||
reward,
|
||||
next_state: next_state.clone(),
|
||||
done,
|
||||
};
|
||||
self.agent.store_experience(experience);
|
||||
self.agent.train();
|
||||
|
||||
if done {
|
||||
break;
|
||||
}
|
||||
// if step_count % 100 == 0 {
|
||||
// println!("{:?}", next_state);
|
||||
// }
|
||||
state = next_state;
|
||||
}
|
||||
|
||||
total_reward
|
||||
}
|
||||
|
||||
pub fn train(
|
||||
&mut self,
|
||||
episodes: usize,
|
||||
save_every: usize,
|
||||
model_path: &str,
|
||||
) -> Result<(), Box<dyn std::error::Error>> {
|
||||
println!("Démarrage de l'entraînement DQN pour {episodes} épisodes");
|
||||
|
||||
for episode in 1..=episodes {
|
||||
let reward = self.train_episode();
|
||||
|
||||
if episode % 100 == 0 {
|
||||
println!(
|
||||
"Épisode {}/{}: Récompense = {:.2}, Epsilon = {:.3}, Steps = {}",
|
||||
episode,
|
||||
episodes,
|
||||
reward,
|
||||
self.agent.get_epsilon(),
|
||||
self.agent.get_step_count()
|
||||
);
|
||||
}
|
||||
|
||||
if episode % save_every == 0 {
|
||||
let save_path = format!("{model_path}_episode_{episode}.json");
|
||||
self.agent.save_model(&save_path)?;
|
||||
println!("Modèle sauvegardé : {save_path}");
|
||||
}
|
||||
}
|
||||
|
||||
// Sauvegarder le modèle final
|
||||
let final_path = format!("{model_path}_final.json");
|
||||
self.agent.save_model(&final_path)?;
|
||||
println!("Modèle final sauvegardé : {final_path}");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
109
bot/src/dqn_simple/main.rs
Normal file
109
bot/src/dqn_simple/main.rs
Normal file
|
|
@ -0,0 +1,109 @@
|
|||
use bot::dqn_simple::dqn_model::DqnConfig;
|
||||
use bot::dqn_simple::dqn_trainer::DqnTrainer;
|
||||
use bot::training_common::TrictracAction;
|
||||
use std::env;
|
||||
|
||||
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
||||
env_logger::init();
|
||||
|
||||
let args: Vec<String> = env::args().collect();
|
||||
|
||||
// Paramètres par défaut
|
||||
let mut episodes = 1000;
|
||||
let mut model_path = "models/dqn_model".to_string();
|
||||
let mut save_every = 100;
|
||||
|
||||
// Parser les arguments de ligne de commande
|
||||
let mut i = 1;
|
||||
while i < args.len() {
|
||||
match args[i].as_str() {
|
||||
"--episodes" => {
|
||||
if i + 1 < args.len() {
|
||||
episodes = args[i + 1].parse().unwrap_or(1000);
|
||||
i += 2;
|
||||
} else {
|
||||
eprintln!("Erreur : --episodes nécessite une valeur");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
"--model-path" => {
|
||||
if i + 1 < args.len() {
|
||||
model_path = args[i + 1].clone();
|
||||
i += 2;
|
||||
} else {
|
||||
eprintln!("Erreur : --model-path nécessite une valeur");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
"--save-every" => {
|
||||
if i + 1 < args.len() {
|
||||
save_every = args[i + 1].parse().unwrap_or(100);
|
||||
i += 2;
|
||||
} else {
|
||||
eprintln!("Erreur : --save-every nécessite une valeur");
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
"--help" | "-h" => {
|
||||
print_help();
|
||||
std::process::exit(0);
|
||||
}
|
||||
_ => {
|
||||
eprintln!("Argument inconnu : {}", args[i]);
|
||||
print_help();
|
||||
std::process::exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Créer le dossier models s'il n'existe pas
|
||||
std::fs::create_dir_all("models")?;
|
||||
|
||||
println!("Configuration d'entraînement DQN :");
|
||||
println!(" Épisodes : {episodes}");
|
||||
println!(" Chemin du modèle : {model_path}");
|
||||
println!(" Sauvegarde tous les {save_every} épisodes");
|
||||
println!();
|
||||
|
||||
// Configuration DQN
|
||||
let config = DqnConfig {
|
||||
state_size: 36, // state.to_vec size
|
||||
hidden_size: 256,
|
||||
num_actions: TrictracAction::action_space_size(),
|
||||
learning_rate: 0.001,
|
||||
gamma: 0.99,
|
||||
epsilon: 0.9, // Commencer avec plus d'exploration
|
||||
epsilon_decay: 0.995,
|
||||
epsilon_min: 0.01,
|
||||
replay_buffer_size: 10000,
|
||||
batch_size: 32,
|
||||
};
|
||||
|
||||
// Créer et lancer l'entraîneur
|
||||
let mut trainer = DqnTrainer::new(config);
|
||||
trainer.train(episodes, save_every, &model_path)?;
|
||||
|
||||
println!("Entraînement terminé avec succès !");
|
||||
println!("Pour utiliser le modèle entraîné :");
|
||||
println!(" cargo run --bin=client_cli -- --bot dqn:{model_path}_final.json,dummy");
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn print_help() {
|
||||
println!("Entraîneur DQN pour Trictrac");
|
||||
println!();
|
||||
println!("USAGE:");
|
||||
println!(" cargo run --bin=train_dqn [OPTIONS]");
|
||||
println!();
|
||||
println!("OPTIONS:");
|
||||
println!(" --episodes <NUM> Nombre d'épisodes d'entraînement (défaut: 1000)");
|
||||
println!(" --model-path <PATH> Chemin de base pour sauvegarder les modèles (défaut: models/dqn_model)");
|
||||
println!(" --save-every <NUM> Sauvegarder le modèle tous les N épisodes (défaut: 100)");
|
||||
println!(" -h, --help Afficher cette aide");
|
||||
println!();
|
||||
println!("EXEMPLES:");
|
||||
println!(" cargo run --bin=train_dqn");
|
||||
println!(" cargo run --bin=train_dqn -- --episodes 5000 --save-every 500");
|
||||
println!(" cargo run --bin=train_dqn -- --model-path models/my_model --episodes 2000");
|
||||
}
|
||||
2
bot/src/dqn_simple/mod.rs
Normal file
2
bot/src/dqn_simple/mod.rs
Normal file
|
|
@ -0,0 +1,2 @@
|
|||
pub mod dqn_model;
|
||||
pub mod dqn_trainer;
|
||||
191
bot/src/lib.rs
Normal file
191
bot/src/lib.rs
Normal file
|
|
@ -0,0 +1,191 @@
|
|||
pub mod burnrl;
|
||||
pub mod dqn_simple;
|
||||
pub mod strategy;
|
||||
pub mod training_common;
|
||||
pub mod training_common_big;
|
||||
pub mod trictrac_board;
|
||||
|
||||
use log::debug;
|
||||
use store::{CheckerMove, Color, GameEvent, GameState, PlayerId, PointsRules, Stage, TurnStage};
|
||||
pub use strategy::default::DefaultStrategy;
|
||||
pub use strategy::dqn::DqnStrategy;
|
||||
pub use strategy::dqnburn::DqnBurnStrategy;
|
||||
pub use strategy::erroneous_moves::ErroneousStrategy;
|
||||
pub use strategy::random::RandomStrategy;
|
||||
pub use strategy::stable_baselines3::StableBaselines3Strategy;
|
||||
|
||||
pub trait BotStrategy: std::fmt::Debug {
|
||||
fn get_game(&self) -> &GameState;
|
||||
fn get_mut_game(&mut self) -> &mut GameState;
|
||||
fn calculate_points(&self) -> u8;
|
||||
fn calculate_adv_points(&self) -> u8;
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove);
|
||||
fn choose_go(&self) -> bool;
|
||||
fn set_player_id(&mut self, player_id: PlayerId);
|
||||
fn set_color(&mut self, color: Color);
|
||||
fn init_players(&mut self) {
|
||||
self.get_mut_game().init_player("p1");
|
||||
self.get_mut_game().init_player("p2");
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct Bot {
|
||||
pub player_id: PlayerId,
|
||||
strategy: Box<dyn BotStrategy>,
|
||||
color: Color,
|
||||
// schools_enabled: bool,
|
||||
}
|
||||
|
||||
impl Default for Bot {
|
||||
fn default() -> Self {
|
||||
let strategy = DefaultStrategy::default();
|
||||
Self {
|
||||
player_id: 1,
|
||||
strategy: Box::new(strategy),
|
||||
color: Color::White,
|
||||
// schools_enabled: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Bot {
|
||||
/// new initialize a bot
|
||||
// pub fn new(mut strategy: Box<dyn BotStrategy>, color: Color, schools_enabled: bool) -> Self {
|
||||
pub fn new(mut strategy: Box<dyn BotStrategy>, color: Color) -> Self {
|
||||
// let game = strategy.get_mut_game();
|
||||
strategy.init_players();
|
||||
let player_id = match color {
|
||||
Color::White => 1,
|
||||
Color::Black => 2,
|
||||
};
|
||||
// strategy.set_player_id(player_id);
|
||||
// strategy.set_color(color);
|
||||
Self {
|
||||
player_id,
|
||||
strategy,
|
||||
color,
|
||||
// schools_enabled: false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn handle_event(&mut self, event: &GameEvent) -> Option<GameEvent> {
|
||||
debug!(">>>> {:?} BOT handle", self.color);
|
||||
let game = self.strategy.get_mut_game();
|
||||
let internal_event = if self.color == Color::Black {
|
||||
&event.get_mirror()
|
||||
} else {
|
||||
event
|
||||
};
|
||||
|
||||
let init_player_points = game.who_plays().map(|p| (p.points, p.holes));
|
||||
let turn_stage = game.turn_stage;
|
||||
game.consume(internal_event);
|
||||
if game.stage == Stage::Ended {
|
||||
debug!("<<<< end {:?} BOT handle", self.color);
|
||||
return None;
|
||||
}
|
||||
let active_player_id = if self.color == Color::Black {
|
||||
if game.active_player_id == 1 {
|
||||
2
|
||||
} else {
|
||||
1
|
||||
}
|
||||
} else {
|
||||
game.active_player_id
|
||||
};
|
||||
if active_player_id == self.player_id {
|
||||
let player_points = game.who_plays().map(|p| (p.points, p.holes));
|
||||
if self.color == Color::Black {
|
||||
debug!( " input (internal) evt : {internal_event:?}, points : {init_player_points:?}, stage : {turn_stage:?}");
|
||||
}
|
||||
let internal_event = match game.turn_stage {
|
||||
TurnStage::MarkAdvPoints => Some(GameEvent::Mark {
|
||||
player_id: 1,
|
||||
points: self.strategy.calculate_adv_points(),
|
||||
}),
|
||||
TurnStage::RollDice => Some(GameEvent::Roll { player_id: 1 }),
|
||||
TurnStage::MarkPoints => Some(GameEvent::Mark {
|
||||
player_id: 1,
|
||||
points: self.strategy.calculate_points(),
|
||||
}),
|
||||
TurnStage::Move => Some(GameEvent::Move {
|
||||
player_id: 1,
|
||||
moves: self.strategy.choose_move(),
|
||||
}),
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
if self.strategy.choose_go() {
|
||||
Some(GameEvent::Go { player_id: 1 })
|
||||
} else {
|
||||
Some(GameEvent::Move {
|
||||
player_id: 1,
|
||||
moves: self.strategy.choose_move(),
|
||||
})
|
||||
}
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
return if self.color == Color::Black {
|
||||
debug!(" bot (internal) evt : {internal_event:?} ; points : {player_points:?}");
|
||||
debug!("<<<< end {:?} BOT handle", self.color);
|
||||
internal_event.map(|evt| evt.get_mirror())
|
||||
} else {
|
||||
debug!("<<<< end {:?} BOT handle", self.color);
|
||||
internal_event
|
||||
};
|
||||
}
|
||||
debug!("<<<< end {:?} BOT handle", self.color);
|
||||
None
|
||||
}
|
||||
|
||||
// Only used in tests below
|
||||
pub fn get_state(&self) -> &GameState {
|
||||
self.strategy.get_game()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use store::{Dice, Stage};
|
||||
|
||||
#[test]
|
||||
fn test_new() {
|
||||
let bot = Bot::new(Box::new(DefaultStrategy::default()), Color::Black);
|
||||
// let bot = Bot::new(Box::new(DefaultStrategy::default()), Color::Black, false);
|
||||
assert_eq!(bot.get_state().stage, Stage::PreGame);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_handle_event() {
|
||||
let mut bot = Bot::new(Box::new(DefaultStrategy::default()), Color::Black);
|
||||
// let mut bot = Bot::new(Box::new(DefaultStrategy::default()), Color::Black, false);
|
||||
let mut event = bot.handle_event(&GameEvent::BeginGame { goes_first: 2 });
|
||||
assert_eq!(event, Some(GameEvent::Roll { player_id: 2 }));
|
||||
assert_eq!(bot.get_state().active_player_id, 1); // bot internal active_player_id for black
|
||||
event = bot.handle_event(&GameEvent::RollResult {
|
||||
player_id: 2,
|
||||
dice: Dice { values: (2, 3) },
|
||||
});
|
||||
assert_eq!(
|
||||
event,
|
||||
Some(GameEvent::Move {
|
||||
player_id: 2,
|
||||
moves: (
|
||||
CheckerMove::new(24, 21).unwrap(),
|
||||
CheckerMove::new(24, 22).unwrap()
|
||||
)
|
||||
})
|
||||
);
|
||||
|
||||
event = bot.handle_event(&GameEvent::BeginGame { goes_first: 1 });
|
||||
assert_eq!(event, None);
|
||||
|
||||
assert_eq!(bot.get_state().active_player_id, 2); //internal active_player_id
|
||||
bot.handle_event(&GameEvent::RollResult {
|
||||
player_id: 1,
|
||||
dice: Dice { values: (2, 3) },
|
||||
});
|
||||
assert_eq!(bot.get_state().turn_stage, TurnStage::Move);
|
||||
}
|
||||
}
|
||||
79
bot/src/strategy/client.rs
Normal file
79
bot/src/strategy/client.rs
Normal file
|
|
@ -0,0 +1,79 @@
|
|||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId, PointsRules};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ClientStrategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
}
|
||||
|
||||
impl Default for ClientStrategy {
|
||||
fn default() -> Self {
|
||||
let game = GameState::default();
|
||||
Self {
|
||||
game,
|
||||
player_id: 1,
|
||||
color: Color::White,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for ClientStrategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
let dice_roll_count = self
|
||||
.get_game()
|
||||
.players
|
||||
.get(&self.player_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
||||
points_rules.get_points(dice_roll_count).0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
let dice_roll_count = self
|
||||
.get_game()
|
||||
.players
|
||||
.get(&self.player_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
||||
points_rules.get_points(dice_roll_count).1
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
let (dice1, dice2) = match self.color {
|
||||
Color::White => (self.game.dice.values.0 as i8, self.game.dice.values.1 as i8),
|
||||
Color::Black => (
|
||||
0 - self.game.dice.values.0 as i8,
|
||||
0 - self.game.dice.values.1 as i8,
|
||||
),
|
||||
};
|
||||
|
||||
let fields = self.game.board.get_color_fields(self.color);
|
||||
let first_field = fields.first().unwrap();
|
||||
(
|
||||
CheckerMove::new(first_field.0, (first_field.0 as i8 + dice1) as usize).unwrap(),
|
||||
CheckerMove::new(first_field.0, (first_field.0 as i8 + dice2) as usize).unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
||||
86
bot/src/strategy/default.rs
Normal file
86
bot/src/strategy/default.rs
Normal file
|
|
@ -0,0 +1,86 @@
|
|||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId};
|
||||
use store::MoveRules;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct DefaultStrategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
}
|
||||
|
||||
impl Default for DefaultStrategy {
|
||||
fn default() -> Self {
|
||||
let game = GameState::default();
|
||||
Self {
|
||||
game,
|
||||
player_id: 1,
|
||||
color: Color::White,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for DefaultStrategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
// let dice_roll_count = self
|
||||
// .get_game()
|
||||
// .players
|
||||
// .get(&self.player_id)
|
||||
// .unwrap()
|
||||
// .dice_roll_count;
|
||||
// let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
||||
// points_rules.get_points(dice_roll_count).0
|
||||
self.game.dice_points.0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
// self.calculate_points()
|
||||
self.game.dice_points.1
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
let choosen_move = *possible_moves
|
||||
.first()
|
||||
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||
if self.color == Color::White {
|
||||
choosen_move
|
||||
} else {
|
||||
(choosen_move.0.mirror(), choosen_move.1.mirror())
|
||||
}
|
||||
|
||||
// let (dice1, dice2) = match self.color {
|
||||
// Color::White => (self.game.dice.values.0 as i8, self.game.dice.values.1 as i8),
|
||||
// Color::Black => (
|
||||
// 0 - self.game.dice.values.0 as i8,
|
||||
// 0 - self.game.dice.values.1 as i8,
|
||||
// ),
|
||||
// };
|
||||
//
|
||||
// let fields = self.game.board.get_color_fields(self.color);
|
||||
// let first_field = fields.first().unwrap();
|
||||
// (
|
||||
// CheckerMove::new(first_field.0, (first_field.0 as i8 + dice1) as usize).unwrap(),
|
||||
// CheckerMove::new(first_field.0, (first_field.0 as i8 + dice2) as usize).unwrap(),
|
||||
// )
|
||||
}
|
||||
}
|
||||
174
bot/src/strategy/dqn.rs
Normal file
174
bot/src/strategy/dqn.rs
Normal file
|
|
@ -0,0 +1,174 @@
|
|||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId};
|
||||
use log::info;
|
||||
use std::path::Path;
|
||||
use store::MoveRules;
|
||||
|
||||
use crate::dqn_simple::dqn_model::SimpleNeuralNetwork;
|
||||
use crate::training_common_big::{get_valid_actions, sample_valid_action, TrictracAction};
|
||||
|
||||
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
||||
#[derive(Debug)]
|
||||
pub struct DqnStrategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
pub model: Option<SimpleNeuralNetwork>,
|
||||
}
|
||||
|
||||
impl Default for DqnStrategy {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
game: GameState::default(),
|
||||
player_id: 1,
|
||||
color: Color::White,
|
||||
model: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DqnStrategy {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn new_with_model<P: AsRef<Path> + std::fmt::Debug>(model_path: P) -> Self {
|
||||
let mut strategy = Self::new();
|
||||
if let Ok(model) = SimpleNeuralNetwork::load(&model_path) {
|
||||
info!("Loading model {model_path:?}");
|
||||
strategy.model = Some(model);
|
||||
}
|
||||
strategy
|
||||
}
|
||||
|
||||
/// Utilise le modèle DQN pour choisir une action valide
|
||||
fn get_dqn_action(&self) -> Option<TrictracAction> {
|
||||
if let Some(ref model) = self.model {
|
||||
let state = self.game.to_vec_float();
|
||||
let valid_actions = get_valid_actions(&self.game);
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Obtenir les Q-values pour toutes les actions
|
||||
let q_values = model.forward(&state);
|
||||
|
||||
// Trouver la meilleure action valide
|
||||
let mut best_action = &valid_actions[0];
|
||||
let mut best_q_value = f32::NEG_INFINITY;
|
||||
|
||||
for action in &valid_actions {
|
||||
let action_index = action.to_action_index();
|
||||
if action_index < q_values.len() {
|
||||
let q_value = q_values[action_index];
|
||||
if q_value > best_q_value {
|
||||
best_q_value = q_value;
|
||||
best_action = action;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Some(best_action.clone())
|
||||
} else {
|
||||
// Fallback : action aléatoire valide
|
||||
sample_valid_action(&self.game)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for DqnStrategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
self.game.dice_points.0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
self.game.dice_points.1
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
// Utiliser le DQN pour décider si on continue
|
||||
if let Some(action) = self.get_dqn_action() {
|
||||
matches!(action, TrictracAction::Go)
|
||||
} else {
|
||||
// Fallback : toujours continuer
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
// Utiliser le DQN pour choisir le mouvement
|
||||
if let Some(TrictracAction::Move {
|
||||
dice_order,
|
||||
from1,
|
||||
from2,
|
||||
}) = self.get_dqn_action()
|
||||
{
|
||||
let dicevals = self.game.dice.values;
|
||||
let (mut dice1, mut dice2) = if dice_order {
|
||||
(dicevals.0, dicevals.1)
|
||||
} else {
|
||||
(dicevals.1, dicevals.0)
|
||||
};
|
||||
|
||||
if from1 == 0 {
|
||||
// empty move
|
||||
dice1 = 0;
|
||||
}
|
||||
let mut to1 = from1 + dice1 as usize;
|
||||
if 24 < to1 {
|
||||
// sortie
|
||||
to1 = 0;
|
||||
}
|
||||
if from2 == 0 {
|
||||
// empty move
|
||||
dice2 = 0;
|
||||
}
|
||||
let mut to2 = from2 + dice2 as usize;
|
||||
if 24 < to2 {
|
||||
// sortie
|
||||
to2 = 0;
|
||||
}
|
||||
|
||||
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
let chosen_move = if self.color == Color::White {
|
||||
(checker_move1, checker_move2)
|
||||
} else {
|
||||
(checker_move1.mirror(), checker_move2.mirror())
|
||||
};
|
||||
|
||||
return chosen_move;
|
||||
}
|
||||
|
||||
// Fallback : utiliser la stratégie par défaut
|
||||
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
let chosen_move = *possible_moves
|
||||
.first()
|
||||
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||
|
||||
if self.color == Color::White {
|
||||
chosen_move
|
||||
} else {
|
||||
(chosen_move.0.mirror(), chosen_move.1.mirror())
|
||||
}
|
||||
}
|
||||
}
|
||||
220
bot/src/strategy/dqnburn.rs
Normal file
220
bot/src/strategy/dqnburn.rs
Normal file
|
|
@ -0,0 +1,220 @@
|
|||
use burn::backend::NdArray;
|
||||
use burn::tensor::cast::ToElement;
|
||||
use burn_rl::base::{ElemType, Model, State};
|
||||
|
||||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId};
|
||||
use log::info;
|
||||
use store::MoveRules;
|
||||
|
||||
use crate::burnrl::algos::dqn;
|
||||
use crate::burnrl::environment;
|
||||
use crate::training_common::{get_valid_action_indices, sample_valid_action, TrictracAction};
|
||||
|
||||
type DqnBurnNetwork = dqn::Net<NdArray<ElemType>>;
|
||||
|
||||
/// Stratégie DQN pour le bot - ne fait que charger et utiliser un modèle pré-entraîné
|
||||
#[derive(Debug)]
|
||||
pub struct DqnBurnStrategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
pub model: Option<DqnBurnNetwork>,
|
||||
}
|
||||
|
||||
impl Default for DqnBurnStrategy {
|
||||
fn default() -> Self {
|
||||
Self {
|
||||
game: GameState::default(),
|
||||
player_id: 1,
|
||||
color: Color::White,
|
||||
model: None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl DqnBurnStrategy {
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
pub fn new_with_model(model_path: &String) -> Self {
|
||||
info!("Loading model {model_path:?}");
|
||||
let mut strategy = Self::new();
|
||||
strategy.model = dqn::load_model(256, model_path);
|
||||
strategy
|
||||
}
|
||||
|
||||
/// Utilise le modèle DQN pour choisir une action valide
|
||||
fn get_dqn_action(&self) -> Option<TrictracAction> {
|
||||
if let Some(ref model) = self.model {
|
||||
let state = environment::TrictracState::from_game_state(&self.game);
|
||||
let valid_actions_indices = get_valid_action_indices(&self.game);
|
||||
if valid_actions_indices.is_empty() {
|
||||
return None; // No valid actions, end of episode
|
||||
}
|
||||
|
||||
// Obtenir les Q-values pour toutes les actions
|
||||
let q_values = model.infer(state.to_tensor().unsqueeze());
|
||||
|
||||
// Set non valid actions q-values to lowest
|
||||
let mut masked_q_values = q_values.clone();
|
||||
let q_values_vec: Vec<f32> = q_values.into_data().into_vec().unwrap();
|
||||
for (index, q_value) in q_values_vec.iter().enumerate() {
|
||||
if !valid_actions_indices.contains(&index) {
|
||||
masked_q_values = masked_q_values.clone().mask_fill(
|
||||
masked_q_values.clone().equal_elem(*q_value),
|
||||
f32::NEG_INFINITY,
|
||||
);
|
||||
}
|
||||
}
|
||||
// Get best action (highest q-value)
|
||||
let action_index = masked_q_values.argmax(1).into_scalar().to_u32();
|
||||
environment::TrictracEnvironment::convert_action(environment::TrictracAction::from(
|
||||
action_index,
|
||||
))
|
||||
} else {
|
||||
// Fallback : action aléatoire valide
|
||||
sample_valid_action(&self.game)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for DqnBurnStrategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
self.game.dice_points.0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
self.game.dice_points.1
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
// Utiliser le DQN pour décider si on continue
|
||||
if let Some(action) = self.get_dqn_action() {
|
||||
matches!(action, TrictracAction::Go)
|
||||
} else {
|
||||
// Fallback : toujours continuer
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
// Utiliser le DQN pour choisir le mouvement
|
||||
if let Some(TrictracAction::Move {
|
||||
dice_order,
|
||||
checker1,
|
||||
checker2,
|
||||
}) = self.get_dqn_action()
|
||||
{
|
||||
let dicevals = self.game.dice.values;
|
||||
let (mut dice1, mut dice2) = if dice_order {
|
||||
(dicevals.0, dicevals.1)
|
||||
} else {
|
||||
(dicevals.1, dicevals.0)
|
||||
};
|
||||
|
||||
assert_eq!(self.color, Color::White);
|
||||
let from1 = self
|
||||
.game
|
||||
.board
|
||||
.get_checker_field(&self.color, checker1 as u8)
|
||||
.unwrap_or(0);
|
||||
|
||||
if from1 == 0 {
|
||||
// empty move
|
||||
dice1 = 0;
|
||||
}
|
||||
let mut to1 = from1;
|
||||
if self.color == Color::White {
|
||||
to1 += dice1 as usize;
|
||||
if 24 < to1 {
|
||||
// sortie
|
||||
to1 = 0;
|
||||
}
|
||||
} else {
|
||||
let fto1 = to1 as i16 - dice1 as i16;
|
||||
to1 = if fto1 < 0 { 0 } else { fto1 as usize };
|
||||
}
|
||||
|
||||
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
|
||||
let mut tmp_board = self.game.board.clone();
|
||||
let move_res = tmp_board.move_checker(&self.color, checker_move1);
|
||||
if move_res.is_err() {
|
||||
panic!("could not move {move_res:?}");
|
||||
}
|
||||
let from2 = tmp_board
|
||||
.get_checker_field(&self.color, checker2 as u8)
|
||||
.unwrap_or(0);
|
||||
if from2 == 0 {
|
||||
// empty move
|
||||
dice2 = 0;
|
||||
}
|
||||
let mut to2 = from2;
|
||||
if self.color == Color::White {
|
||||
to2 += dice2 as usize;
|
||||
if 24 < to2 {
|
||||
// sortie
|
||||
to2 = 0;
|
||||
}
|
||||
} else {
|
||||
let fto2 = to2 as i16 - dice2 as i16;
|
||||
to2 = if fto2 < 0 { 0 } else { fto2 as usize };
|
||||
}
|
||||
|
||||
// Gestion prise de coin par puissance
|
||||
let opp_rest_field = if self.color == Color::White { 13 } else { 12 };
|
||||
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||
if self.color == Color::White {
|
||||
to1 -= 1;
|
||||
to2 -= 1;
|
||||
} else {
|
||||
to1 += 1;
|
||||
to2 += 1;
|
||||
}
|
||||
}
|
||||
|
||||
let checker_move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let checker_move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
let chosen_move = if self.color == Color::White {
|
||||
(checker_move1, checker_move2)
|
||||
} else {
|
||||
// XXX : really ?
|
||||
(checker_move1.mirror(), checker_move2.mirror())
|
||||
};
|
||||
|
||||
return chosen_move;
|
||||
}
|
||||
|
||||
// Fallback : utiliser la stratégie par défaut
|
||||
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
let chosen_move = *possible_moves
|
||||
.first()
|
||||
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||
|
||||
if self.color == Color::White {
|
||||
chosen_move
|
||||
} else {
|
||||
(chosen_move.0.mirror(), chosen_move.1.mirror())
|
||||
}
|
||||
}
|
||||
}
|
||||
62
bot/src/strategy/erroneous_moves.rs
Normal file
62
bot/src/strategy/erroneous_moves.rs
Normal file
|
|
@ -0,0 +1,62 @@
|
|||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId, PointsRules};
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct ErroneousStrategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
}
|
||||
|
||||
impl Default for ErroneousStrategy {
|
||||
fn default() -> Self {
|
||||
let game = GameState::default();
|
||||
Self {
|
||||
game,
|
||||
player_id: 2,
|
||||
color: Color::Black,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for ErroneousStrategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
let dice_roll_count = self
|
||||
.get_game()
|
||||
.players
|
||||
.get(&self.player_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
||||
points_rules.get_points(dice_roll_count).0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
self.calculate_points()
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
(
|
||||
CheckerMove::new(1, 10).unwrap(),
|
||||
CheckerMove::new(2, 7).unwrap(),
|
||||
)
|
||||
}
|
||||
}
|
||||
7
bot/src/strategy/mod.rs
Normal file
7
bot/src/strategy/mod.rs
Normal file
|
|
@ -0,0 +1,7 @@
|
|||
pub mod client;
|
||||
pub mod default;
|
||||
pub mod dqn;
|
||||
pub mod dqnburn;
|
||||
pub mod erroneous_moves;
|
||||
pub mod random;
|
||||
pub mod stable_baselines3;
|
||||
67
bot/src/strategy/random.rs
Normal file
67
bot/src/strategy/random.rs
Normal file
|
|
@ -0,0 +1,67 @@
|
|||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId};
|
||||
use store::MoveRules;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct RandomStrategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
}
|
||||
|
||||
impl Default for RandomStrategy {
|
||||
fn default() -> Self {
|
||||
let game = GameState::default();
|
||||
Self {
|
||||
game,
|
||||
player_id: 1,
|
||||
color: Color::White,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for RandomStrategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
self.game.dice_points.0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
self.game.dice_points.1
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
true
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
use rand::{seq::SliceRandom, thread_rng};
|
||||
let mut rng = thread_rng();
|
||||
let choosen_move = possible_moves
|
||||
.choose(&mut rng)
|
||||
.cloned()
|
||||
.unwrap_or((CheckerMove::default(), CheckerMove::default()));
|
||||
|
||||
if self.color == Color::White {
|
||||
choosen_move
|
||||
} else {
|
||||
(choosen_move.0.mirror(), choosen_move.1.mirror())
|
||||
}
|
||||
}
|
||||
}
|
||||
272
bot/src/strategy/stable_baselines3.rs
Normal file
272
bot/src/strategy/stable_baselines3.rs
Normal file
|
|
@ -0,0 +1,272 @@
|
|||
use crate::{BotStrategy, CheckerMove, Color, GameState, PlayerId, PointsRules};
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::fs::File;
|
||||
use std::io::Read;
|
||||
use std::io::Write;
|
||||
use std::path::Path;
|
||||
use std::process::Command;
|
||||
use store::MoveRules;
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct StableBaselines3Strategy {
|
||||
pub game: GameState,
|
||||
pub player_id: PlayerId,
|
||||
pub color: Color,
|
||||
pub model_path: String,
|
||||
}
|
||||
|
||||
impl Default for StableBaselines3Strategy {
|
||||
fn default() -> Self {
|
||||
let game = GameState::default();
|
||||
Self {
|
||||
game,
|
||||
player_id: 2,
|
||||
color: Color::Black,
|
||||
model_path: "models/trictrac_ppo.zip".to_string(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Serialize, Deserialize)]
|
||||
struct GameStateJson {
|
||||
board: Vec<i8>,
|
||||
active_player: u8,
|
||||
dice: [u8; 2],
|
||||
white_points: u8,
|
||||
white_holes: u8,
|
||||
black_points: u8,
|
||||
black_holes: u8,
|
||||
turn_stage: u8,
|
||||
}
|
||||
|
||||
#[derive(Deserialize)]
|
||||
struct ActionJson {
|
||||
action_type: u8,
|
||||
from1: usize,
|
||||
to1: usize,
|
||||
from2: usize,
|
||||
to2: usize,
|
||||
}
|
||||
|
||||
impl StableBaselines3Strategy {
|
||||
pub fn new(model_path: &str) -> Self {
|
||||
let game = GameState::default();
|
||||
Self {
|
||||
game,
|
||||
player_id: 2,
|
||||
color: Color::Black,
|
||||
model_path: model_path.to_string(),
|
||||
}
|
||||
}
|
||||
|
||||
fn get_state_as_json(&self) -> GameStateJson {
|
||||
// Convertir l'état du jeu en un format compatible avec notre modèle Python
|
||||
let mut board = vec![0; 24];
|
||||
|
||||
// Remplir les positions des pièces blanches (valeurs positives)
|
||||
for (pos, count) in self.game.board.get_color_fields(Color::White) {
|
||||
if pos < 24 {
|
||||
board[pos] = count;
|
||||
}
|
||||
}
|
||||
|
||||
// Remplir les positions des pièces noires (valeurs négatives)
|
||||
for (pos, count) in self.game.board.get_color_fields(Color::Black) {
|
||||
if pos < 24 {
|
||||
board[pos] = -count;
|
||||
}
|
||||
}
|
||||
|
||||
// Convertir l'étape du tour en entier
|
||||
let turn_stage = match self.game.turn_stage {
|
||||
store::TurnStage::RollDice => 0,
|
||||
store::TurnStage::RollWaiting => 1,
|
||||
store::TurnStage::MarkPoints => 2,
|
||||
store::TurnStage::HoldOrGoChoice => 3,
|
||||
store::TurnStage::Move => 4,
|
||||
store::TurnStage::MarkAdvPoints => 5,
|
||||
};
|
||||
|
||||
// Récupérer les points et trous des joueurs
|
||||
let white_points = self.game.players.get(&1).map_or(0, |p| p.points);
|
||||
let white_holes = self.game.players.get(&1).map_or(0, |p| p.holes);
|
||||
let black_points = self.game.players.get(&2).map_or(0, |p| p.points);
|
||||
let black_holes = self.game.players.get(&2).map_or(0, |p| p.holes);
|
||||
|
||||
// Créer l'objet JSON
|
||||
GameStateJson {
|
||||
board,
|
||||
active_player: self.game.active_player_id as u8,
|
||||
dice: [self.game.dice.values.0, self.game.dice.values.1],
|
||||
white_points,
|
||||
white_holes,
|
||||
black_points,
|
||||
black_holes,
|
||||
turn_stage,
|
||||
}
|
||||
}
|
||||
|
||||
fn predict_action(&self) -> Option<ActionJson> {
|
||||
// Convertir l'état du jeu en JSON
|
||||
let state_json = self.get_state_as_json();
|
||||
let state_str = serde_json::to_string(&state_json).unwrap();
|
||||
|
||||
// Écrire l'état dans un fichier temporaire
|
||||
let temp_input_path = "temp_state.json";
|
||||
let mut file = File::create(temp_input_path).ok()?;
|
||||
file.write_all(state_str.as_bytes()).ok()?;
|
||||
|
||||
// Exécuter le script Python pour faire une prédiction
|
||||
let output_path = "temp_action.json";
|
||||
let python_script = format!(
|
||||
r#"
|
||||
import sys
|
||||
import json
|
||||
import numpy as np
|
||||
from stable_baselines3 import PPO
|
||||
import torch
|
||||
|
||||
# Charger le modèle
|
||||
model = PPO.load("{}")
|
||||
|
||||
# Lire l'état du jeu
|
||||
with open("temp_state.json", "r") as f:
|
||||
state_dict = json.load(f)
|
||||
|
||||
# Convertir en format d'observation attendu par le modèle
|
||||
observation = {{
|
||||
'board': np.array(state_dict['board'], dtype=np.int8),
|
||||
'active_player': state_dict['active_player'],
|
||||
'dice': np.array(state_dict['dice'], dtype=np.int32),
|
||||
'white_points': state_dict['white_points'],
|
||||
'white_holes': state_dict['white_holes'],
|
||||
'black_points': state_dict['black_points'],
|
||||
'black_holes': state_dict['black_holes'],
|
||||
'turn_stage': state_dict['turn_stage'],
|
||||
}}
|
||||
|
||||
# Prédire l'action
|
||||
action, _ = model.predict(observation)
|
||||
|
||||
# Convertir l'action en format lisible
|
||||
action_dict = {{
|
||||
'action_type': int(action[0]),
|
||||
'from1': int(action[1]),
|
||||
'to1': int(action[2]),
|
||||
'from2': int(action[3]),
|
||||
'to2': int(action[4]),
|
||||
}}
|
||||
|
||||
# Écrire l'action dans un fichier
|
||||
with open("{}", "w") as f:
|
||||
json.dump(action_dict, f)
|
||||
"#,
|
||||
self.model_path, output_path
|
||||
);
|
||||
|
||||
let temp_script_path = "temp_predict.py";
|
||||
let mut script_file = File::create(temp_script_path).ok()?;
|
||||
script_file.write_all(python_script.as_bytes()).ok()?;
|
||||
|
||||
// Exécuter le script Python
|
||||
let status = Command::new("python").arg(temp_script_path).status().ok()?;
|
||||
|
||||
if !status.success() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Lire la prédiction
|
||||
if Path::new(output_path).exists() {
|
||||
let mut file = File::open(output_path).ok()?;
|
||||
let mut contents = String::new();
|
||||
file.read_to_string(&mut contents).ok()?;
|
||||
|
||||
// Nettoyer les fichiers temporaires
|
||||
std::fs::remove_file(temp_input_path).ok();
|
||||
std::fs::remove_file(temp_script_path).ok();
|
||||
std::fs::remove_file(output_path).ok();
|
||||
|
||||
// Analyser la prédiction
|
||||
let action: ActionJson = serde_json::from_str(&contents).ok()?;
|
||||
Some(action)
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BotStrategy for StableBaselines3Strategy {
|
||||
fn get_game(&self) -> &GameState {
|
||||
&self.game
|
||||
}
|
||||
|
||||
fn get_mut_game(&mut self) -> &mut GameState {
|
||||
&mut self.game
|
||||
}
|
||||
|
||||
fn set_color(&mut self, color: Color) {
|
||||
self.color = color;
|
||||
}
|
||||
|
||||
fn set_player_id(&mut self, player_id: PlayerId) {
|
||||
self.player_id = player_id;
|
||||
}
|
||||
|
||||
fn calculate_points(&self) -> u8 {
|
||||
// Utiliser la prédiction du modèle uniquement si c'est une action de type "mark" (1)
|
||||
if let Some(action) = self.predict_action() {
|
||||
if action.action_type == 1 {
|
||||
// Marquer les points calculés par le modèle (ici on utilise la somme des dés comme proxy)
|
||||
return self.game.dice.values.0 + self.game.dice.values.1;
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback vers la méthode standard si la prédiction échoue
|
||||
let dice_roll_count = self
|
||||
.get_game()
|
||||
.players
|
||||
.get(&self.player_id)
|
||||
.unwrap()
|
||||
.dice_roll_count;
|
||||
let points_rules = PointsRules::new(&Color::White, &self.game.board, self.game.dice);
|
||||
points_rules.get_points(dice_roll_count).0
|
||||
}
|
||||
|
||||
fn calculate_adv_points(&self) -> u8 {
|
||||
self.calculate_points()
|
||||
}
|
||||
|
||||
fn choose_go(&self) -> bool {
|
||||
// Utiliser la prédiction du modèle uniquement si c'est une action de type "go" (2)
|
||||
if let Some(action) = self.predict_action() {
|
||||
return action.action_type == 2;
|
||||
}
|
||||
|
||||
// Fallback vers la méthode standard si la prédiction échoue
|
||||
true
|
||||
}
|
||||
|
||||
fn choose_move(&self) -> (CheckerMove, CheckerMove) {
|
||||
// Utiliser la prédiction du modèle uniquement si c'est une action de type "move" (0)
|
||||
if let Some(action) = self.predict_action() {
|
||||
if action.action_type == 0 {
|
||||
let move1 = CheckerMove::new(action.from1, action.to1).unwrap_or_default();
|
||||
let move2 = CheckerMove::new(action.from2, action.to2).unwrap_or_default();
|
||||
return (move1, move2);
|
||||
}
|
||||
}
|
||||
|
||||
// Fallback vers la méthode standard si la prédiction échoue
|
||||
let rules = MoveRules::new(&self.color, &self.game.board, self.game.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
let choosen_move = *possible_moves
|
||||
.first()
|
||||
.unwrap_or(&(CheckerMove::default(), CheckerMove::default()));
|
||||
|
||||
if self.color == Color::White {
|
||||
choosen_move
|
||||
} else {
|
||||
(choosen_move.0.mirror(), choosen_move.1.mirror())
|
||||
}
|
||||
}
|
||||
}
|
||||
351
bot/src/training_common.rs
Normal file
351
bot/src/training_common.rs
Normal file
|
|
@ -0,0 +1,351 @@
|
|||
use std::cmp::{max, min};
|
||||
use std::fmt::{Debug, Display, Formatter};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store::{CheckerMove, GameEvent, GameState};
|
||||
|
||||
/// Types d'actions possibles dans le jeu
|
||||
#[derive(Debug, Copy, Clone, Eq, Serialize, Deserialize, PartialEq)]
|
||||
pub enum TrictracAction {
|
||||
/// Lancer les dés
|
||||
Roll,
|
||||
/// Continuer après avoir gagné un trou
|
||||
Go,
|
||||
/// Effectuer un mouvement de pions
|
||||
Move {
|
||||
dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
|
||||
checker1: usize, // premier pion à déplacer en numérotant depuis la colonne de départ (0-15) 0 : aucun pion
|
||||
checker2: usize, // deuxième pion (0-15)
|
||||
},
|
||||
// Marquer les points : à activer si support des écoles
|
||||
// Mark,
|
||||
}
|
||||
|
||||
impl Display for TrictracAction {
|
||||
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
||||
let s = format!("{self:?}");
|
||||
writeln!(f, "{}", s.chars().rev().collect::<String>())?;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
impl TrictracAction {
|
||||
/// Encode une action en index pour le réseau de neurones
|
||||
pub fn to_action_index(&self) -> usize {
|
||||
match self {
|
||||
TrictracAction::Roll => 0,
|
||||
TrictracAction::Go => 1,
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
checker1,
|
||||
checker2,
|
||||
} => {
|
||||
// Encoder les mouvements dans l'espace d'actions
|
||||
// Indices 2+ pour les mouvements
|
||||
// de 2 à 513 (2 à 257 pour dé 1 en premier, 258 à 513 pour dé 2 en premier)
|
||||
let mut start = 2;
|
||||
if !dice_order {
|
||||
// 16 * 16 = 256
|
||||
start += 256;
|
||||
}
|
||||
start + checker1 * 16 + checker2
|
||||
} // TrictracAction::Mark => 514,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn to_event(&self, state: &GameState) -> Option<GameEvent> {
|
||||
match self {
|
||||
TrictracAction::Roll => {
|
||||
// Lancer les dés
|
||||
Some(GameEvent::Roll {
|
||||
player_id: state.active_player_id,
|
||||
})
|
||||
}
|
||||
// TrictracAction::Mark => {
|
||||
// // Marquer des points
|
||||
// let points = self.game.
|
||||
// Some(GameEvent::Mark {
|
||||
// player_id: self.active_player_id,
|
||||
// points,
|
||||
// })
|
||||
// }
|
||||
TrictracAction::Go => {
|
||||
// Continuer après avoir gagné un trou
|
||||
Some(GameEvent::Go {
|
||||
player_id: state.active_player_id,
|
||||
})
|
||||
}
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
checker1,
|
||||
checker2,
|
||||
} => {
|
||||
// Effectuer un mouvement
|
||||
let (dice1, dice2) = if *dice_order {
|
||||
(state.dice.values.0, state.dice.values.1)
|
||||
} else {
|
||||
(state.dice.values.1, state.dice.values.0)
|
||||
};
|
||||
|
||||
let color = &store::Color::White;
|
||||
let from1 = state
|
||||
.board
|
||||
.get_checker_field(color, *checker1 as u8)
|
||||
.unwrap_or(0);
|
||||
let mut to1 = from1 + dice1 as usize;
|
||||
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
|
||||
let mut tmp_board = state.board.clone();
|
||||
let move_result = tmp_board.move_checker(color, checker_move1);
|
||||
if move_result.is_err() {
|
||||
None
|
||||
// panic!("Error while moving checker {move_result:?}")
|
||||
} else {
|
||||
let from2 = tmp_board
|
||||
.get_checker_field(color, *checker2 as u8)
|
||||
.unwrap_or(0);
|
||||
let mut to2 = from2 + dice2 as usize;
|
||||
|
||||
// Gestion prise de coin par puissance
|
||||
let opp_rest_field = 13;
|
||||
if to1 == opp_rest_field && to2 == opp_rest_field {
|
||||
to1 -= 1;
|
||||
to2 -= 1;
|
||||
}
|
||||
|
||||
let checker_move1 = store::CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let checker_move2 = store::CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
Some(GameEvent::Move {
|
||||
player_id: state.active_player_id,
|
||||
moves: (checker_move1, checker_move2),
|
||||
})
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Décode un index d'action en TrictracAction
|
||||
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
||||
match index {
|
||||
0 => Some(TrictracAction::Roll),
|
||||
1 => Some(TrictracAction::Go),
|
||||
// 514 => Some(TrictracAction::Mark),
|
||||
i if i >= 2 => {
|
||||
let move_code = i - 2;
|
||||
let (dice_order, checker1, checker2) = Self::decode_move(move_code);
|
||||
Some(TrictracAction::Move {
|
||||
dice_order,
|
||||
checker1,
|
||||
checker2,
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Décode un entier en paire de mouvements
|
||||
fn decode_move(code: usize) -> (bool, usize, usize) {
|
||||
let mut encoded = code;
|
||||
let dice_order = code < 256;
|
||||
if !dice_order {
|
||||
encoded -= 256
|
||||
}
|
||||
let checker1 = encoded / 16;
|
||||
let checker2 = encoded % 16;
|
||||
(dice_order, checker1, checker2)
|
||||
}
|
||||
|
||||
/// Retourne la taille de l'espace d'actions total
|
||||
pub fn action_space_size() -> usize {
|
||||
// 1 (Roll) + 1 (Go) + mouvements possibles
|
||||
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
||||
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
||||
2 + (2 * 16 * 16) // = 514
|
||||
}
|
||||
|
||||
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
||||
// match action {
|
||||
// TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
|
||||
// TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
|
||||
// TrictracAction::Go => Some(GameEvent::Go { player_id }),
|
||||
// TrictracAction::Move {
|
||||
// dice_order,
|
||||
// from1,
|
||||
// from2,
|
||||
// } => {
|
||||
// // Effectuer un mouvement
|
||||
// let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
||||
// let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
||||
//
|
||||
// Some(GameEvent::Move {
|
||||
// player_id: self.agent_player_id,
|
||||
// moves: (checker_move1, checker_move2),
|
||||
// })
|
||||
// }
|
||||
// };
|
||||
// }
|
||||
}
|
||||
|
||||
/// Obtient les actions valides pour l'état de jeu actuel
|
||||
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||
use store::TurnStage;
|
||||
|
||||
let mut valid_actions = Vec::new();
|
||||
|
||||
let active_player_id = game_state.active_player_id;
|
||||
let player_color = game_state.player_color_by_id(&active_player_id);
|
||||
|
||||
if let Some(color) = player_color {
|
||||
match game_state.turn_stage {
|
||||
TurnStage::RollDice => {
|
||||
valid_actions.push(TrictracAction::Roll);
|
||||
}
|
||||
TurnStage::MarkPoints | TurnStage::MarkAdvPoints | TurnStage::RollWaiting => {
|
||||
// valid_actions.push(TrictracAction::Mark);
|
||||
panic!(
|
||||
"get_valid_actions not implemented for turn stage {:?}",
|
||||
game_state.turn_stage
|
||||
);
|
||||
}
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
valid_actions.push(TrictracAction::Go);
|
||||
|
||||
// Ajoute aussi les mouvements possibles
|
||||
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||
assert_eq!(color, store::Color::White);
|
||||
for (move1, move2) in possible_moves {
|
||||
valid_actions.push(checker_moves_to_trictrac_action(
|
||||
&move1, &move2, &color, game_state,
|
||||
));
|
||||
}
|
||||
}
|
||||
TurnStage::Move => {
|
||||
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||
assert_eq!(color, store::Color::White);
|
||||
for (move1, move2) in possible_moves {
|
||||
valid_actions.push(checker_moves_to_trictrac_action(
|
||||
&move1, &move2, &color, game_state,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
panic!("empty valid_actions for state {game_state}");
|
||||
}
|
||||
valid_actions
|
||||
}
|
||||
|
||||
// Valid only for White player
|
||||
fn checker_moves_to_trictrac_action(
|
||||
move1: &CheckerMove,
|
||||
move2: &CheckerMove,
|
||||
color: &store::Color,
|
||||
state: &crate::GameState,
|
||||
) -> TrictracAction {
|
||||
let to1 = move1.get_to();
|
||||
let to2 = move2.get_to();
|
||||
let from1 = move1.get_from();
|
||||
let from2 = move2.get_from();
|
||||
let dice = state.dice;
|
||||
|
||||
let mut diff_move1 = if to1 > 0 {
|
||||
// Mouvement sans sortie
|
||||
to1 - from1
|
||||
} else {
|
||||
// sortie, on utilise la valeur du dé
|
||||
if to2 > 0 {
|
||||
// sortie pour le mouvement 1 uniquement
|
||||
let dice2 = to2 - from2;
|
||||
if dice2 == dice.values.0 as usize {
|
||||
dice.values.1 as usize
|
||||
} else {
|
||||
dice.values.0 as usize
|
||||
}
|
||||
} else {
|
||||
// double sortie
|
||||
if from1 < from2 {
|
||||
max(dice.values.0, dice.values.1) as usize
|
||||
} else {
|
||||
min(dice.values.0, dice.values.1) as usize
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// modification de diff_move1 si on est dans le cas d'un mouvement par puissance
|
||||
let rest_field = 12;
|
||||
if to1 == rest_field
|
||||
&& to2 == rest_field
|
||||
&& max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field
|
||||
{
|
||||
// prise par puissance
|
||||
diff_move1 += 1;
|
||||
}
|
||||
let dice_order = diff_move1 == dice.values.0 as usize;
|
||||
|
||||
let checker1 = state.board.get_field_checker(color, from1) as usize;
|
||||
let mut tmp_board = state.board.clone();
|
||||
// should not raise an error for a valid action
|
||||
let move_res = tmp_board.move_checker(color, *move1);
|
||||
if move_res.is_err() {
|
||||
panic!("error while moving checker {move_res:?}");
|
||||
}
|
||||
let checker2 = tmp_board.get_field_checker(color, from2) as usize;
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
checker1,
|
||||
checker2,
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne les indices des actions valides
|
||||
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
||||
get_valid_actions(game_state)
|
||||
.into_iter()
|
||||
.map(|action| action.to_action_index())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Sélectionne une action valide aléatoire
|
||||
pub fn sample_valid_action(game_state: &crate::GameState) -> Option<TrictracAction> {
|
||||
use rand::{seq::SliceRandom, thread_rng};
|
||||
|
||||
let valid_actions = get_valid_actions(game_state);
|
||||
let mut rng = thread_rng();
|
||||
valid_actions.choose(&mut rng).cloned()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn to_action_index() {
|
||||
let action = TrictracAction::Move {
|
||||
dice_order: true,
|
||||
checker1: 3,
|
||||
checker2: 4,
|
||||
};
|
||||
let index = action.to_action_index();
|
||||
assert_eq!(Some(action), TrictracAction::from_action_index(index));
|
||||
assert_eq!(54, index);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_action_index() {
|
||||
let action = TrictracAction::Move {
|
||||
dice_order: true,
|
||||
checker1: 3,
|
||||
checker2: 4,
|
||||
};
|
||||
assert_eq!(Some(action), TrictracAction::from_action_index(54));
|
||||
}
|
||||
}
|
||||
262
bot/src/training_common_big.rs
Normal file
262
bot/src/training_common_big.rs
Normal file
|
|
@ -0,0 +1,262 @@
|
|||
use std::cmp::{max, min};
|
||||
|
||||
use serde::{Deserialize, Serialize};
|
||||
use store::{CheckerMove, Dice};
|
||||
|
||||
/// Types d'actions possibles dans le jeu
|
||||
#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
|
||||
pub enum TrictracAction {
|
||||
/// Lancer les dés
|
||||
Roll,
|
||||
/// Continuer après avoir gagné un trou
|
||||
Go,
|
||||
/// Effectuer un mouvement de pions
|
||||
Move {
|
||||
dice_order: bool, // true = utiliser dice[0] en premier, false = dice[1] en premier
|
||||
from1: usize, // position de départ du premier pion (0-24)
|
||||
from2: usize, // position de départ du deuxième pion (0-24)
|
||||
},
|
||||
// Marquer les points : à activer si support des écoles
|
||||
// Mark,
|
||||
}
|
||||
|
||||
impl TrictracAction {
|
||||
/// Encode une action en index pour le réseau de neurones
|
||||
pub fn to_action_index(&self) -> usize {
|
||||
match self {
|
||||
TrictracAction::Roll => 0,
|
||||
TrictracAction::Go => 1,
|
||||
TrictracAction::Move {
|
||||
dice_order,
|
||||
from1,
|
||||
from2,
|
||||
} => {
|
||||
// Encoder les mouvements dans l'espace d'actions
|
||||
// Indices 2+ pour les mouvements
|
||||
// de 2 à 1251 (2 à 626 pour dé 1 en premier, 627 à 1251 pour dé 2 en premier)
|
||||
let mut start = 2;
|
||||
if !dice_order {
|
||||
// 25 * 25 = 625
|
||||
start += 625;
|
||||
}
|
||||
start + from1 * 25 + from2
|
||||
} // TrictracAction::Mark => 1252,
|
||||
}
|
||||
}
|
||||
|
||||
/// Décode un index d'action en TrictracAction
|
||||
pub fn from_action_index(index: usize) -> Option<TrictracAction> {
|
||||
match index {
|
||||
0 => Some(TrictracAction::Roll),
|
||||
// 1252 => Some(TrictracAction::Mark),
|
||||
1 => Some(TrictracAction::Go),
|
||||
i if i >= 3 => {
|
||||
let move_code = i - 3;
|
||||
let (dice_order, from1, from2) = Self::decode_move(move_code);
|
||||
Some(TrictracAction::Move {
|
||||
dice_order,
|
||||
from1,
|
||||
from2,
|
||||
})
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Décode un entier en paire de mouvements
|
||||
fn decode_move(code: usize) -> (bool, usize, usize) {
|
||||
let mut encoded = code;
|
||||
let dice_order = code < 626;
|
||||
if !dice_order {
|
||||
encoded -= 625
|
||||
}
|
||||
let from1 = encoded / 25;
|
||||
let from2 = 1 + encoded % 25;
|
||||
(dice_order, from1, from2)
|
||||
}
|
||||
|
||||
/// Retourne la taille de l'espace d'actions total
|
||||
pub fn action_space_size() -> usize {
|
||||
// 1 (Roll) + 1 (Go) + mouvements possibles
|
||||
// Pour les mouvements : 2*25*25 = 1250 (choix du dé + position 0-24 pour chaque from)
|
||||
// Mais on peut optimiser en limitant aux positions valides (1-24)
|
||||
2 + (2 * 25 * 25) // = 1252
|
||||
}
|
||||
|
||||
// pub fn to_game_event(&self, player_id: PlayerId, dice: Dice) -> GameEvent {
|
||||
// match action {
|
||||
// TrictracAction::Roll => Some(GameEvent::Roll { player_id }),
|
||||
// TrictracAction::Mark => Some(GameEvent::Mark { player_id, points }),
|
||||
// TrictracAction::Go => Some(GameEvent::Go { player_id }),
|
||||
// TrictracAction::Move {
|
||||
// dice_order,
|
||||
// from1,
|
||||
// from2,
|
||||
// } => {
|
||||
// // Effectuer un mouvement
|
||||
// let checker_move1 = store::CheckerMove::new(move1.0, move1.1).unwrap_or_default();
|
||||
// let checker_move2 = store::CheckerMove::new(move2.0, move2.1).unwrap_or_default();
|
||||
//
|
||||
// Some(GameEvent::Move {
|
||||
// player_id: self.agent_player_id,
|
||||
// moves: (checker_move1, checker_move2),
|
||||
// })
|
||||
// }
|
||||
// };
|
||||
// }
|
||||
}
|
||||
|
||||
/// Obtient les actions valides pour l'état de jeu actuel
|
||||
pub fn get_valid_actions(game_state: &crate::GameState) -> Vec<TrictracAction> {
|
||||
use store::TurnStage;
|
||||
|
||||
let mut valid_actions = Vec::new();
|
||||
|
||||
let active_player_id = game_state.active_player_id;
|
||||
let player_color = game_state.player_color_by_id(&active_player_id);
|
||||
|
||||
if let Some(color) = player_color {
|
||||
match game_state.turn_stage {
|
||||
TurnStage::RollDice => {
|
||||
valid_actions.push(TrictracAction::Roll);
|
||||
}
|
||||
TurnStage::MarkPoints | TurnStage::MarkAdvPoints | TurnStage::RollWaiting => {
|
||||
panic!(
|
||||
"get_valid_actions not implemented for turn stage {:?}",
|
||||
game_state.turn_stage
|
||||
);
|
||||
// valid_actions.push(TrictracAction::Mark);
|
||||
}
|
||||
TurnStage::HoldOrGoChoice => {
|
||||
valid_actions.push(TrictracAction::Go);
|
||||
|
||||
// Ajoute aussi les mouvements possibles
|
||||
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||
assert_eq!(color, store::Color::White);
|
||||
for (move1, move2) in possible_moves {
|
||||
valid_actions.push(checker_moves_to_trictrac_action(
|
||||
&move1,
|
||||
&move2,
|
||||
&game_state.dice,
|
||||
));
|
||||
}
|
||||
}
|
||||
TurnStage::Move => {
|
||||
let rules = store::MoveRules::new(&color, &game_state.board, game_state.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
// Modififier checker_moves_to_trictrac_action si on doit gérer Black
|
||||
assert_eq!(color, store::Color::White);
|
||||
for (move1, move2) in possible_moves {
|
||||
valid_actions.push(checker_moves_to_trictrac_action(
|
||||
&move1,
|
||||
&move2,
|
||||
&game_state.dice,
|
||||
));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if valid_actions.is_empty() {
|
||||
panic!("empty valid_actions for state {game_state}");
|
||||
}
|
||||
valid_actions
|
||||
}
|
||||
|
||||
// Valid only for White player
|
||||
fn checker_moves_to_trictrac_action(
|
||||
move1: &CheckerMove,
|
||||
move2: &CheckerMove,
|
||||
dice: &Dice,
|
||||
) -> TrictracAction {
|
||||
let to1 = move1.get_to();
|
||||
let to2 = move2.get_to();
|
||||
let from1 = move1.get_from();
|
||||
let from2 = move2.get_from();
|
||||
|
||||
let mut diff_move1 = if to1 > 0 {
|
||||
// Mouvement sans sortie
|
||||
to1 - from1
|
||||
} else {
|
||||
// sortie, on utilise la valeur du dé
|
||||
if to2 > 0 {
|
||||
// sortie pour le mouvement 1 uniquement
|
||||
let dice2 = to2 - from2;
|
||||
if dice2 == dice.values.0 as usize {
|
||||
dice.values.1 as usize
|
||||
} else {
|
||||
dice.values.0 as usize
|
||||
}
|
||||
} else {
|
||||
// double sortie
|
||||
if from1 < from2 {
|
||||
max(dice.values.0, dice.values.1) as usize
|
||||
} else {
|
||||
min(dice.values.0, dice.values.1) as usize
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// modification de diff_move1 si on est dans le cas d'un mouvement par puissance
|
||||
let rest_field = 12;
|
||||
if to1 == rest_field
|
||||
&& to2 == rest_field
|
||||
&& max(dice.values.0 as usize, dice.values.1 as usize) + min(from1, from2) != rest_field
|
||||
{
|
||||
// prise par puissance
|
||||
diff_move1 += 1;
|
||||
}
|
||||
TrictracAction::Move {
|
||||
dice_order: diff_move1 == dice.values.0 as usize,
|
||||
from1: move1.get_from(),
|
||||
from2: move2.get_from(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Retourne les indices des actions valides
|
||||
pub fn get_valid_action_indices(game_state: &crate::GameState) -> Vec<usize> {
|
||||
get_valid_actions(game_state)
|
||||
.into_iter()
|
||||
.map(|action| action.to_action_index())
|
||||
.collect()
|
||||
}
|
||||
|
||||
/// Sélectionne une action valide aléatoire
|
||||
pub fn sample_valid_action(game_state: &crate::GameState) -> Option<TrictracAction> {
|
||||
use rand::{seq::SliceRandom, thread_rng};
|
||||
|
||||
let valid_actions = get_valid_actions(game_state);
|
||||
let mut rng = thread_rng();
|
||||
valid_actions.choose(&mut rng).cloned()
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn to_action_index() {
|
||||
let action = TrictracAction::Move {
|
||||
dice_order: true,
|
||||
from1: 3,
|
||||
from2: 4,
|
||||
};
|
||||
let index = action.to_action_index();
|
||||
assert_eq!(Some(action), TrictracAction::from_action_index(index));
|
||||
assert_eq!(81, index);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn from_action_index() {
|
||||
let action = TrictracAction::Move {
|
||||
dice_order: true,
|
||||
from1: 3,
|
||||
from2: 4,
|
||||
};
|
||||
assert_eq!(Some(action), TrictracAction::from_action_index(81));
|
||||
}
|
||||
}
|
||||
149
bot/src/trictrac_board.rs
Normal file
149
bot/src/trictrac_board.rs
Normal file
|
|
@ -0,0 +1,149 @@
|
|||
// https://docs.rs/board-game/ implementation
|
||||
use crate::training_common::{get_valid_actions, TrictracAction};
|
||||
use board_game::board::{
|
||||
Board as BoardGameBoard, BoardDone, BoardMoves, Outcome, PlayError, Player as BoardGamePlayer,
|
||||
};
|
||||
use board_game::impl_unit_symmetry_board;
|
||||
use internal_iterator::InternalIterator;
|
||||
use std::fmt;
|
||||
use std::ops::ControlFlow;
|
||||
use store::Color;
|
||||
|
||||
#[derive(Clone, Debug, Eq, PartialEq)]
|
||||
pub struct TrictracBoard(crate::GameState);
|
||||
|
||||
impl Default for TrictracBoard {
|
||||
fn default() -> Self {
|
||||
TrictracBoard(crate::GameState::new_with_players("white", "black"))
|
||||
}
|
||||
}
|
||||
|
||||
impl fmt::Display for TrictracBoard {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
self.0.fmt(f)
|
||||
}
|
||||
}
|
||||
|
||||
impl_unit_symmetry_board!(TrictracBoard);
|
||||
|
||||
impl BoardGameBoard for TrictracBoard {
|
||||
// impl TrictracBoard {
|
||||
type Move = TrictracAction;
|
||||
|
||||
fn next_player(&self) -> BoardGamePlayer {
|
||||
self.0
|
||||
.who_plays()
|
||||
.map(|p| {
|
||||
if p.color == Color::Black {
|
||||
BoardGamePlayer::B
|
||||
} else {
|
||||
BoardGamePlayer::A
|
||||
}
|
||||
})
|
||||
.unwrap_or(BoardGamePlayer::A)
|
||||
}
|
||||
|
||||
fn is_available_move(&self, mv: Self::Move) -> Result<bool, BoardDone> {
|
||||
self.check_done()?;
|
||||
let is_valid = mv
|
||||
.to_event(&self.0)
|
||||
.map(|evt| self.0.validate(&evt))
|
||||
.unwrap_or(false);
|
||||
Ok(is_valid)
|
||||
}
|
||||
|
||||
fn play(&mut self, mv: Self::Move) -> Result<(), PlayError> {
|
||||
self.check_can_play(mv)?;
|
||||
self.0.consume(&mv.to_event(&self.0).unwrap());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn outcome(&self) -> Option<Outcome> {
|
||||
if self.0.stage == crate::Stage::Ended {
|
||||
self.0.determine_winner().map(|player_id| {
|
||||
Outcome::WonBy(if player_id == 1 {
|
||||
BoardGamePlayer::A
|
||||
} else {
|
||||
BoardGamePlayer::B
|
||||
})
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
fn can_lose_after_move() -> bool {
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BoardMoves<'a, TrictracBoard> for TrictracBoard {
|
||||
type AllMovesIterator = TrictracAllMovesIterator;
|
||||
type AvailableMovesIterator = TrictracAvailableMovesIterator<'a>;
|
||||
|
||||
fn all_possible_moves() -> Self::AllMovesIterator {
|
||||
TrictracAllMovesIterator::default()
|
||||
}
|
||||
|
||||
fn available_moves(&'a self) -> Result<Self::AvailableMovesIterator, BoardDone> {
|
||||
TrictracAvailableMovesIterator::new(self)
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TrictracAllMovesIterator;
|
||||
|
||||
impl Default for TrictracAllMovesIterator {
|
||||
fn default() -> Self {
|
||||
TrictracAllMovesIterator
|
||||
}
|
||||
}
|
||||
|
||||
impl InternalIterator for TrictracAllMovesIterator {
|
||||
type Item = TrictracAction;
|
||||
|
||||
fn try_for_each<R, F: FnMut(Self::Item) -> ControlFlow<R>>(self, mut f: F) -> ControlFlow<R> {
|
||||
f(TrictracAction::Roll)?;
|
||||
f(TrictracAction::Go)?;
|
||||
for dice_order in [false, true] {
|
||||
for checker1 in 0..16 {
|
||||
for checker2 in 0..16 {
|
||||
f(TrictracAction::Move {
|
||||
dice_order,
|
||||
checker1,
|
||||
checker2,
|
||||
})?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
ControlFlow::Continue(())
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct TrictracAvailableMovesIterator<'a> {
|
||||
board: &'a TrictracBoard,
|
||||
}
|
||||
|
||||
impl<'a> TrictracAvailableMovesIterator<'a> {
|
||||
pub fn new(board: &'a TrictracBoard) -> Result<Self, BoardDone> {
|
||||
board.check_done()?;
|
||||
Ok(TrictracAvailableMovesIterator { board })
|
||||
}
|
||||
|
||||
pub fn board(&self) -> &'a TrictracBoard {
|
||||
self.board
|
||||
}
|
||||
}
|
||||
|
||||
impl InternalIterator for TrictracAvailableMovesIterator<'_> {
|
||||
type Item = TrictracAction;
|
||||
|
||||
fn try_for_each<R, F>(self, f: F) -> ControlFlow<R>
|
||||
where
|
||||
F: FnMut(Self::Item) -> ControlFlow<R>,
|
||||
{
|
||||
get_valid_actions(&self.board.0).into_iter().try_for_each(f)
|
||||
}
|
||||
}
|
||||
|
Before Width: | Height: | Size: 2.9 MiB After Width: | Height: | Size: 2.9 MiB |
|
Before Width: | Height: | Size: 8.6 KiB After Width: | Height: | Size: 8.6 KiB |
|
Before Width: | Height: | Size: 5.4 KiB After Width: | Height: | Size: 5.4 KiB |
|
|
@ -12,3 +12,7 @@ pico-args = "0.5.0"
|
|||
pretty_assertions = "1.4.0"
|
||||
renet = "0.0.13"
|
||||
store = { path = "../store" }
|
||||
bot = { path = "../bot" }
|
||||
itertools = "0.13.0"
|
||||
env_logger = "0.11.6"
|
||||
log = "0.4.20"
|
||||
|
|
|
|||
|
|
@ -1,68 +1,169 @@
|
|||
use pretty_assertions::assert_eq;
|
||||
use store::{CheckerMove, GameEvent, GameState, PlayerId};
|
||||
use bot::{
|
||||
BotStrategy, DefaultStrategy, DqnBurnStrategy, DqnStrategy, ErroneousStrategy, RandomStrategy,
|
||||
StableBaselines3Strategy,
|
||||
};
|
||||
use itertools::Itertools;
|
||||
|
||||
use crate::game_runner::GameRunner;
|
||||
use store::{CheckerMove, GameEvent, GameState, Stage, TurnStage};
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct AppArgs {
|
||||
pub seed: Option<u32>,
|
||||
pub bot: Option<String>,
|
||||
}
|
||||
|
||||
// Application.
|
||||
#[derive(Debug, Default)]
|
||||
pub struct App {
|
||||
// should the application exit?
|
||||
pub should_quit: bool,
|
||||
pub game: GameState,
|
||||
first_move: Option<CheckerMove>,
|
||||
player_id: Option<PlayerId>,
|
||||
pub schools_enabled: bool,
|
||||
pub game: GameRunner,
|
||||
}
|
||||
|
||||
impl App {
|
||||
// Constructs a new instance of [`App`].
|
||||
pub fn new() -> Self {
|
||||
// Self::default()
|
||||
|
||||
let mut state = GameState::default();
|
||||
// local : player
|
||||
let player_id: Option<PlayerId> = state.init_player("myself");
|
||||
state.init_player("adversary");
|
||||
println!("player_id ? {:?}", player_id);
|
||||
pub fn new(args: AppArgs) -> Self {
|
||||
let bot_strategies: Vec<Box<dyn BotStrategy>> = args
|
||||
.bot
|
||||
.as_deref()
|
||||
.map(|str_bots| {
|
||||
str_bots
|
||||
.split(",")
|
||||
.filter_map(|s| match s.trim() {
|
||||
"dummy" => {
|
||||
Some(Box::new(DefaultStrategy::default()) as Box<dyn BotStrategy>)
|
||||
}
|
||||
"random" => {
|
||||
Some(Box::new(RandomStrategy::default()) as Box<dyn BotStrategy>)
|
||||
}
|
||||
"erroneous" => {
|
||||
Some(Box::new(ErroneousStrategy::default()) as Box<dyn BotStrategy>)
|
||||
}
|
||||
"ai" => Some(Box::new(StableBaselines3Strategy::default())
|
||||
as Box<dyn BotStrategy>),
|
||||
"dqn" => Some(Box::new(DqnStrategy::default()) as Box<dyn BotStrategy>),
|
||||
"dqnburn" => {
|
||||
Some(Box::new(DqnBurnStrategy::default()) as Box<dyn BotStrategy>)
|
||||
}
|
||||
s if s.starts_with("ai:") => {
|
||||
let path = s.trim_start_matches("ai:");
|
||||
Some(Box::new(StableBaselines3Strategy::new(path))
|
||||
as Box<dyn BotStrategy>)
|
||||
}
|
||||
s if s.starts_with("dqn:") => {
|
||||
let path = s.trim_start_matches("dqn:");
|
||||
Some(Box::new(DqnStrategy::new_with_model(path))
|
||||
as Box<dyn BotStrategy>)
|
||||
}
|
||||
s if s.starts_with("dqnburn:") => {
|
||||
let path = s.trim_start_matches("dqnburn:");
|
||||
Some(Box::new(DqnBurnStrategy::new_with_model(&path.to_string()))
|
||||
as Box<dyn BotStrategy>)
|
||||
}
|
||||
_ => None,
|
||||
})
|
||||
.collect()
|
||||
})
|
||||
.unwrap_or_default();
|
||||
let schools_enabled = false;
|
||||
let should_quit = bot_strategies.len() > 1;
|
||||
Self {
|
||||
game: state,
|
||||
should_quit: false,
|
||||
first_move: None,
|
||||
player_id,
|
||||
game: GameRunner::new(schools_enabled, bot_strategies, args.seed.map(|s| s as u64)),
|
||||
should_quit,
|
||||
schools_enabled,
|
||||
}
|
||||
}
|
||||
|
||||
fn get_my_player(&mut self) {}
|
||||
|
||||
// Constructs a new instance of [`App`].
|
||||
pub fn start(&mut self) {
|
||||
self.game = GameState::new();
|
||||
self.game.state = GameState::new(self.schools_enabled);
|
||||
}
|
||||
|
||||
pub fn input(&mut self, input: &str) {
|
||||
println!("'{}'", input);
|
||||
// println!("'{}'", input);
|
||||
match input {
|
||||
"state" => self.show_state(),
|
||||
"history" => self.show_history(),
|
||||
"quit" => self.quit(),
|
||||
// run bots game (when two bots)
|
||||
"bots" => self.bots_all(),
|
||||
"" => self.bots_next_step(),
|
||||
// play (when one bot)
|
||||
"roll" => self.roll_dice(),
|
||||
"go" => self.go(),
|
||||
_ => self.add_move(input),
|
||||
}
|
||||
println!("{}", self.display());
|
||||
}
|
||||
|
||||
// --- 2 bots game actions
|
||||
|
||||
fn bots_all(&mut self) {}
|
||||
|
||||
fn bots_next_step(&mut self) {}
|
||||
|
||||
// Set running to false to quit the application.
|
||||
pub fn quit(&mut self) {
|
||||
self.should_quit = true;
|
||||
}
|
||||
|
||||
pub fn show_state(&self) {
|
||||
println!("{:?}", self.game.state)
|
||||
}
|
||||
|
||||
pub fn show_history(&self) {
|
||||
for hist in self.game.state.history.iter() {
|
||||
println!("{hist:?}\n");
|
||||
}
|
||||
}
|
||||
|
||||
fn roll_dice(&mut self) {
|
||||
if self.player_id.is_none() {
|
||||
if self.game.player_id.is_none() {
|
||||
println!("player_id not set ");
|
||||
return;
|
||||
}
|
||||
self.game.consume(&GameEvent::Roll {
|
||||
player_id: self.player_id.unwrap(),
|
||||
if self.game.state.turn_stage != TurnStage::RollDice {
|
||||
println!("Not in the dice roll stage");
|
||||
return;
|
||||
}
|
||||
let dice = self.game.dice_roller.roll();
|
||||
|
||||
// get correct points for these board and dice
|
||||
// let points_rules = PointsRules::new(
|
||||
// &self
|
||||
// .game
|
||||
// .state
|
||||
// .player_color_by_id(&self.game.player_id.unwrap())
|
||||
// .unwrap(),
|
||||
// &self.game.state.board,
|
||||
// dice,
|
||||
// );
|
||||
self.game.handle_event(&GameEvent::Roll {
|
||||
player_id: self.game.player_id.unwrap(),
|
||||
});
|
||||
self.game.handle_event(&GameEvent::RollResult {
|
||||
player_id: self.game.player_id.unwrap(),
|
||||
dice,
|
||||
});
|
||||
}
|
||||
|
||||
fn go(&mut self) {
|
||||
if self.game.player_id.is_none() {
|
||||
println!("player_id not set ");
|
||||
return;
|
||||
}
|
||||
if self.game.state.turn_stage != TurnStage::HoldOrGoChoice {
|
||||
println!("Not in position to go");
|
||||
return;
|
||||
}
|
||||
self.game.handle_event(&GameEvent::Go {
|
||||
player_id: self.game.player_id.unwrap(),
|
||||
});
|
||||
}
|
||||
|
||||
fn add_move(&mut self, input: &str) {
|
||||
if self.player_id.is_none() {
|
||||
if self.game.player_id.is_none() {
|
||||
println!("player_id not set ");
|
||||
return;
|
||||
}
|
||||
|
|
@ -71,34 +172,81 @@ impl App {
|
|||
.map(|str| str.parse().unwrap_or(0))
|
||||
.collect();
|
||||
if positions.len() == 2 && positions[0] != 0 && positions[1] != 0 {
|
||||
let checker_move = CheckerMove::new(positions[0], positions[1]);
|
||||
if checker_move.is_ok() {
|
||||
if self.first_move.is_some() {
|
||||
if let Ok(checker_move) = CheckerMove::new(positions[0], positions[1]) {
|
||||
// if checker_move.is_ok() {
|
||||
if self.game.first_move.is_some() {
|
||||
let move_event = GameEvent::Move {
|
||||
player_id: self.player_id.unwrap(),
|
||||
moves: (self.first_move.unwrap(), checker_move.unwrap()),
|
||||
player_id: self.game.player_id.unwrap(),
|
||||
moves: (self.game.first_move.unwrap(), checker_move),
|
||||
};
|
||||
if !self.game.validate(&move_event) {
|
||||
if !self.game.state.validate(&move_event) {
|
||||
println!("Move invalid");
|
||||
self.first_move = None;
|
||||
self.game.first_move = None;
|
||||
return;
|
||||
}
|
||||
self.game.consume(&move_event);
|
||||
self.first_move = None;
|
||||
self.game.handle_event(&move_event);
|
||||
self.game.first_move = None;
|
||||
} else {
|
||||
self.first_move = Some(checker_move.unwrap());
|
||||
self.game.first_move = Some(checker_move);
|
||||
}
|
||||
return;
|
||||
}
|
||||
}
|
||||
println!("invalid move : {}", input);
|
||||
println!("invalid move : {input}");
|
||||
}
|
||||
|
||||
pub fn display(&mut self) -> String {
|
||||
let winner = self
|
||||
.game
|
||||
.state
|
||||
.determine_winner()
|
||||
.and_then(|id| self.game.state.players.get(&id));
|
||||
let str_won: String = winner
|
||||
.map(|p| {
|
||||
let mut name = " winner: ".to_owned();
|
||||
name.push_str(&p.name);
|
||||
name
|
||||
})
|
||||
.unwrap_or("".to_owned());
|
||||
let mut output = "-------------------------------".to_owned();
|
||||
output = output + "\nRolled dice : " + &self.game.dices.to_display_string();
|
||||
output = output + "\n-------------------------------";
|
||||
output = output + "\n" + &self.game.board.to_display_grid(9);
|
||||
output += format!(
|
||||
"\n{:?}{} > {} > {:?}",
|
||||
self.game.state.stage,
|
||||
str_won,
|
||||
self.game
|
||||
.state
|
||||
.who_plays()
|
||||
.map(|pl| &pl.name)
|
||||
.unwrap_or(&"?".to_owned()),
|
||||
self.game.state.turn_stage
|
||||
)
|
||||
.as_str();
|
||||
|
||||
output = output + "\nRolled dice : " + &self.game.state.dice.to_display_string();
|
||||
|
||||
if self.game.state.stage != Stage::PreGame {
|
||||
output = output + "\nRolled dice jans : " + &format!("{:?}", self.game.state.dice_jans);
|
||||
output = output
|
||||
+ "\nLast move : "
|
||||
+ &self.game.state.dice_moves.0.to_display_string()
|
||||
+ ", "
|
||||
+ &self.game.state.dice_moves.1.to_display_string();
|
||||
|
||||
// display players points
|
||||
output += format!("\n\n{:<11} :: {:<5} :: {}", "Player", "holes", "points").as_str();
|
||||
|
||||
for player_id in self.game.state.players.keys().sorted() {
|
||||
let player = &self.game.state.players[player_id];
|
||||
output += format!(
|
||||
"\n{}. {:<8} :: {:<5} :: {}",
|
||||
&player_id, &player.name, &player.holes, &player.points,
|
||||
)
|
||||
.as_str();
|
||||
}
|
||||
}
|
||||
|
||||
output += "\n-------------------------------\n";
|
||||
output += &self.game.state.board.to_display_grid(9);
|
||||
output
|
||||
}
|
||||
}
|
||||
|
|
@ -106,10 +254,12 @@ impl App {
|
|||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use pretty_assertions::assert_eq;
|
||||
|
||||
#[test]
|
||||
fn test_display() {
|
||||
let expected = "-------------------------------
|
||||
PreGame > ? > RollDice
|
||||
Rolled dice : 0 & 0
|
||||
-------------------------------
|
||||
|
||||
|
|
@ -124,7 +274,7 @@ Rolled dice : 0 & 0
|
|||
| | | X |
|
||||
| | | X |
|
||||
| | | 15 |
|
||||
|----------------------------- | | ------------------------------|
|
||||
|------------------------------ | | -----------------------------|
|
||||
| | | 15 |
|
||||
| | | O |
|
||||
| | | O |
|
||||
|
|
@ -144,11 +294,19 @@ Rolled dice : 0 & 0
|
|||
#[test]
|
||||
fn test_move() {
|
||||
let expected = "-------------------------------
|
||||
Rolled dice : 0 & 0
|
||||
InGame > myself > RollDice
|
||||
Rolled dice : 4 & 6
|
||||
Rolled dice jans : {}
|
||||
Last move : CheckerMove { from: 24, to: 18 } , CheckerMove { from: 24, to: 20 }
|
||||
|
||||
Player :: holes :: points
|
||||
1. myself :: 0 :: 0
|
||||
2. bot :: 0 :: 0
|
||||
-------------------------------
|
||||
|
||||
13 14 15 16 17 18 19 20 21 22 23 24
|
||||
----------------------------------------------------------------
|
||||
| X | | X X |
|
||||
| | | X |
|
||||
| | | X |
|
||||
| | | X |
|
||||
|
|
@ -156,9 +314,8 @@ Rolled dice : 0 & 0
|
|||
| | | X |
|
||||
| | | X |
|
||||
| | | X |
|
||||
| | | X |
|
||||
| | | 15 |
|
||||
|----------------------------- | | ------------------------------|
|
||||
| | | 13 |
|
||||
|------------------------------ | | -----------------------------|
|
||||
| | | 13 |
|
||||
| | | O |
|
||||
| | | O |
|
||||
|
|
@ -171,9 +328,14 @@ Rolled dice : 0 & 0
|
|||
----------------------------------------------------------------
|
||||
12 11 10 9 8 7 6 5 4 3 2 1
|
||||
";
|
||||
let mut app = App::new();
|
||||
let mut app = App::new(AppArgs {
|
||||
seed: Some(1327),
|
||||
bot: Some("dummy".into()),
|
||||
});
|
||||
println!("avant : {}", app.display());
|
||||
app.input("roll");
|
||||
app.input("1 3");
|
||||
app.input("1 4");
|
||||
app.input("1 5");
|
||||
self::assert_eq!(app.display(), expected);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
131
client_cli/src/game_runner.rs
Normal file
131
client_cli/src/game_runner.rs
Normal file
|
|
@ -0,0 +1,131 @@
|
|||
use bot::{Bot, BotStrategy};
|
||||
use log::{debug, error};
|
||||
use store::{CheckerMove, DiceRoller, GameEvent, GameState, PlayerId, TurnStage};
|
||||
|
||||
// Application Game
|
||||
#[derive(Debug, Default)]
|
||||
pub struct GameRunner {
|
||||
pub state: GameState,
|
||||
pub dice_roller: DiceRoller,
|
||||
pub first_move: Option<CheckerMove>,
|
||||
pub player_id: Option<PlayerId>,
|
||||
bots: Vec<Bot>,
|
||||
}
|
||||
|
||||
impl GameRunner {
|
||||
// Constructs a new instance of [`App`].
|
||||
pub fn new(
|
||||
schools_enabled: bool,
|
||||
bot_strategies: Vec<Box<dyn BotStrategy>>,
|
||||
seed: Option<u64>,
|
||||
) -> Self {
|
||||
let mut state = GameState::new(schools_enabled);
|
||||
// local : player
|
||||
let player_id: Option<PlayerId> = if bot_strategies.len() > 1 {
|
||||
None
|
||||
} else {
|
||||
state.init_player("myself")
|
||||
};
|
||||
|
||||
// bots
|
||||
let bots: Vec<Bot> = bot_strategies
|
||||
.into_iter()
|
||||
.map(|strategy| {
|
||||
let bot_id: PlayerId = state.init_player("bot").unwrap();
|
||||
let bot_color = state.player_color_by_id(&bot_id).unwrap();
|
||||
Bot::new(strategy, bot_color)
|
||||
})
|
||||
.collect();
|
||||
// let bot_strategy = Box::new(DefaultStrategy::default());
|
||||
// let bot: Bot = Bot::new(bot_strategy, bot_color, schools_enabled);
|
||||
// let bot: Bot = Bot::new(bot_strategy, bot_color);
|
||||
|
||||
let first_player_id = if bots.len() > 1 {
|
||||
bots[0].player_id
|
||||
} else {
|
||||
player_id.unwrap()
|
||||
};
|
||||
let mut game = Self {
|
||||
state,
|
||||
dice_roller: DiceRoller::new(seed),
|
||||
first_move: None,
|
||||
player_id,
|
||||
bots,
|
||||
};
|
||||
game.handle_event(&GameEvent::BeginGame {
|
||||
goes_first: first_player_id,
|
||||
});
|
||||
game
|
||||
}
|
||||
|
||||
pub fn handle_event(&mut self, event: &GameEvent) -> Option<GameEvent> {
|
||||
if event == &GameEvent::PlayError {
|
||||
return None;
|
||||
}
|
||||
let valid_event = if self.state.validate(event) {
|
||||
debug!(
|
||||
"--------------- new valid event {event:?} (stage {:?}) -----------",
|
||||
self.state.turn_stage
|
||||
);
|
||||
self.state.consume(event);
|
||||
debug!(
|
||||
" --> stage {:?} ; active player points {:?}",
|
||||
self.state.turn_stage,
|
||||
self.state.who_plays().map(|p| p.points)
|
||||
);
|
||||
event
|
||||
} else {
|
||||
debug!("{}", self.state);
|
||||
error!("event not valid : {event:?}");
|
||||
// panic!("crash and burn {} \nevt not valid {event:?}", self.state);
|
||||
&GameEvent::PlayError
|
||||
};
|
||||
|
||||
// chain all successive bot actions
|
||||
if self.bots.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
// Collect bot actions to avoid borrow conflicts
|
||||
let bot_events: Vec<GameEvent> = self
|
||||
.bots
|
||||
.iter_mut()
|
||||
.filter_map(|bot| bot.handle_event(valid_event))
|
||||
.collect();
|
||||
|
||||
// if bot_events.len() > 1 {
|
||||
// println!(
|
||||
// "There might be a problem : 2 bots events : {:?}",
|
||||
// bot_events
|
||||
// );
|
||||
// }
|
||||
|
||||
let mut next_event = None;
|
||||
for bot_event in bot_events {
|
||||
let bot_result_event = self.handle_event(&bot_event);
|
||||
if let Some(bot_id) = bot_event.player_id() {
|
||||
next_event = if self.bot_needs_dice_roll(bot_id) {
|
||||
let dice = self.dice_roller.roll();
|
||||
self.handle_event(&GameEvent::RollResult {
|
||||
player_id: bot_id,
|
||||
dice,
|
||||
})
|
||||
} else {
|
||||
bot_result_event
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(winner) = self.state.determine_winner() {
|
||||
next_event = Some(store::GameEvent::EndGame {
|
||||
reason: store::EndGameReason::PlayerWon { winner },
|
||||
});
|
||||
}
|
||||
|
||||
next_event
|
||||
}
|
||||
|
||||
fn bot_needs_dice_roll(&self, bot_id: PlayerId) -> bool {
|
||||
self.state.active_player_id == bot_id && self.state.turn_stage == TurnStage::RollWaiting
|
||||
}
|
||||
}
|
||||
|
|
@ -1,13 +1,48 @@
|
|||
// Application.
|
||||
pub mod app;
|
||||
mod game_runner;
|
||||
|
||||
use anyhow::Result;
|
||||
use app::App;
|
||||
use app::{App, AppArgs};
|
||||
use std::io;
|
||||
|
||||
// see pico-args example at https://github.com/RazrFalcon/pico-args/blob/master/examples/app.rs
|
||||
const HELP: &str = "\
|
||||
Trictrac CLI
|
||||
|
||||
USAGE:
|
||||
trictrac-cli [OPTIONS]
|
||||
|
||||
FLAGS:
|
||||
-h, --help Prints help information
|
||||
|
||||
OPTIONS:
|
||||
--seed SEED Sets the random generator seed
|
||||
--bot STRATEGY_BOT Add a bot player with strategy STRATEGY, a second bot may be added to play against the first : --bot STRATEGY_BOT1,STRATEGY_BOT2
|
||||
Available strategies:
|
||||
- dummy: Default strategy selecting the first valid move
|
||||
- ai: AI strategy using the default model at models/trictrac_ppo.zip
|
||||
- ai:/path/to/model.zip: AI strategy using a custom model
|
||||
- dqn: DQN strategy using native Rust implementation with Burn
|
||||
- dqn:/path/to/model: DQN strategy using a custom model
|
||||
|
||||
ARGS:
|
||||
<INPUT>
|
||||
";
|
||||
|
||||
fn main() -> Result<()> {
|
||||
env_logger::init();
|
||||
let args = match parse_args() {
|
||||
Ok(v) => v,
|
||||
Err(e) => {
|
||||
eprintln!("Error: {e}.");
|
||||
std::process::exit(1);
|
||||
}
|
||||
};
|
||||
// println!("{:#?}", args);
|
||||
|
||||
// Create an application.
|
||||
let mut app = App::new();
|
||||
let mut app = App::new(args);
|
||||
|
||||
// Start the main loop.
|
||||
while !app.should_quit {
|
||||
|
|
@ -17,5 +52,38 @@ fn main() -> Result<()> {
|
|||
app.input(input.trim());
|
||||
}
|
||||
|
||||
// display app final state
|
||||
println!("{}", app.display());
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn parse_args() -> Result<AppArgs, pico_args::Error> {
|
||||
let mut pargs = pico_args::Arguments::from_env();
|
||||
|
||||
// Help has a higher priority and should be handled separately.
|
||||
if pargs.contains(["-h", "--help"]) {
|
||||
print!("{HELP}");
|
||||
std::process::exit(0);
|
||||
}
|
||||
|
||||
let args = AppArgs {
|
||||
// Parses an optional value that implements `FromStr`.
|
||||
seed: pargs.opt_value_from_str("--seed")?,
|
||||
bot: pargs.opt_value_from_str("--bot")?,
|
||||
// Parses an optional value from `&str` using a specified function.
|
||||
// width: pargs.opt_value_from_fn("--width", parse_width)?.unwrap_or(10),
|
||||
};
|
||||
|
||||
// It's up to the caller what to do with the remaining arguments.
|
||||
let remaining = pargs.finish();
|
||||
if !remaining.is_empty() {
|
||||
eprintln!("Warning: unused arguments left: {remaining:?}.");
|
||||
}
|
||||
|
||||
Ok(args)
|
||||
}
|
||||
|
||||
// fn parse_width(s: &str) -> Result<u32, &'static str> {
|
||||
// s.parse().map_err(|_| "not a number")
|
||||
// }
|
||||
|
|
|
|||
|
|
@ -6,9 +6,9 @@ edition = "2021"
|
|||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[dependencies]
|
||||
anyhow = "1.0.75"
|
||||
anyhow = "1.0.89"
|
||||
bincode = "1.3.3"
|
||||
crossterm = "0.27.0"
|
||||
ratatui = "0.24.0"
|
||||
renet = "0.0.13"
|
||||
crossterm = "0.28.1"
|
||||
ratatui = "0.28.1"
|
||||
# renet = "0.0.13"
|
||||
store = { path = "../store" }
|
||||
|
|
|
|||
|
|
@ -25,6 +25,6 @@ pub fn render(app: &mut App, f: &mut Frame) {
|
|||
)
|
||||
.style(Style::default().fg(Color::Yellow))
|
||||
.alignment(Alignment::Center),
|
||||
f.size(),
|
||||
f.area(),
|
||||
)
|
||||
}
|
||||
|
|
|
|||
103
devenv.lock
103
devenv.lock
|
|
@ -3,11 +3,10 @@
|
|||
"devenv": {
|
||||
"locked": {
|
||||
"dir": "src/modules",
|
||||
"lastModified": 1698243190,
|
||||
"narHash": "sha256-n+SbyNQRhUcaZoU00d+7wi17HJpw/kAUrXOL4zRcqE8=",
|
||||
"lastModified": 1753667201,
|
||||
"owner": "cachix",
|
||||
"repo": "devenv",
|
||||
"rev": "86f476f7edb86159fd20764489ab4e4df6edb4b6",
|
||||
"rev": "4d584d7686a50387f975879788043e55af9f0ad4",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -20,11 +19,10 @@
|
|||
"flake-compat": {
|
||||
"flake": false,
|
||||
"locked": {
|
||||
"lastModified": 1673956053,
|
||||
"narHash": "sha256-4gtG9iQuiKITOjNQQeQIpoIB6b16fm+504Ch3sNKLd8=",
|
||||
"lastModified": 1747046372,
|
||||
"owner": "edolstra",
|
||||
"repo": "flake-compat",
|
||||
"rev": "35bb57c0c8d8b62bbfd284272c928ceb64ddbde9",
|
||||
"rev": "9100a0f413b0c601e0533d1d94ffd501ce2e7885",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -33,37 +31,39 @@
|
|||
"type": "github"
|
||||
}
|
||||
},
|
||||
"flake-utils": {
|
||||
"git-hooks": {
|
||||
"inputs": {
|
||||
"systems": "systems"
|
||||
"flake-compat": "flake-compat",
|
||||
"gitignore": "gitignore",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1685518550,
|
||||
"narHash": "sha256-o2d0KcvaXzTrPRIo0kOLV0/QXHhDQ5DTi+OxcjO8xqY=",
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"rev": "a1720a10a6cfe8234c0e93907ffe81be440f4cef",
|
||||
"lastModified": 1750779888,
|
||||
"owner": "cachix",
|
||||
"repo": "git-hooks.nix",
|
||||
"rev": "16ec914f6fb6f599ce988427d9d94efddf25fe6d",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "numtide",
|
||||
"repo": "flake-utils",
|
||||
"owner": "cachix",
|
||||
"repo": "git-hooks.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"gitignore": {
|
||||
"inputs": {
|
||||
"nixpkgs": [
|
||||
"pre-commit-hooks",
|
||||
"git-hooks",
|
||||
"nixpkgs"
|
||||
]
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1660459072,
|
||||
"narHash": "sha256-8DFJjXG8zqoONA1vXtgeKXy68KdJL5UaXR8NtVMUbx8=",
|
||||
"lastModified": 1709087332,
|
||||
"owner": "hercules-ci",
|
||||
"repo": "gitignore.nix",
|
||||
"rev": "a20de23b925fd8264fd7fad6454652e142fd7f73",
|
||||
"rev": "637db329424fd7e46cf4185293b9cc8c88c95394",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -74,11 +74,10 @@
|
|||
},
|
||||
"nixpkgs": {
|
||||
"locked": {
|
||||
"lastModified": 1698553279,
|
||||
"narHash": "sha256-T/9P8yBSLcqo/v+FTOBK+0rjzjPMctVymZydbvR/Fak=",
|
||||
"lastModified": 1753432016,
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "90e85bc7c1a6fc0760a94ace129d3a1c61c3d035",
|
||||
"rev": "6027c30c8e9810896b92429f0092f624f7b1aace",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
|
|
@ -88,66 +87,14 @@
|
|||
"type": "github"
|
||||
}
|
||||
},
|
||||
"nixpkgs-stable": {
|
||||
"locked": {
|
||||
"lastModified": 1685801374,
|
||||
"narHash": "sha256-otaSUoFEMM+LjBI1XL/xGB5ao6IwnZOXc47qhIgJe8U=",
|
||||
"owner": "NixOS",
|
||||
"repo": "nixpkgs",
|
||||
"rev": "c37ca420157f4abc31e26f436c1145f8951ff373",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "NixOS",
|
||||
"ref": "nixos-23.05",
|
||||
"repo": "nixpkgs",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"pre-commit-hooks": {
|
||||
"inputs": {
|
||||
"flake-compat": "flake-compat",
|
||||
"flake-utils": "flake-utils",
|
||||
"gitignore": "gitignore",
|
||||
"nixpkgs": [
|
||||
"nixpkgs"
|
||||
],
|
||||
"nixpkgs-stable": "nixpkgs-stable"
|
||||
},
|
||||
"locked": {
|
||||
"lastModified": 1698227354,
|
||||
"narHash": "sha256-Fi5H9jbaQLmLw9qBi/mkR33CoFjNbobo5xWdX4tKz1Q=",
|
||||
"owner": "cachix",
|
||||
"repo": "pre-commit-hooks.nix",
|
||||
"rev": "bd38df3d508dfcdff52cd243d297f218ed2257bf",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "cachix",
|
||||
"repo": "pre-commit-hooks.nix",
|
||||
"type": "github"
|
||||
}
|
||||
},
|
||||
"root": {
|
||||
"inputs": {
|
||||
"devenv": "devenv",
|
||||
"git-hooks": "git-hooks",
|
||||
"nixpkgs": "nixpkgs",
|
||||
"pre-commit-hooks": "pre-commit-hooks"
|
||||
}
|
||||
},
|
||||
"systems": {
|
||||
"locked": {
|
||||
"lastModified": 1681028828,
|
||||
"narHash": "sha256-Vy1rq5AaRuLzOxct8nz4T6wlgyUR7zLU309k9mBC768=",
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"rev": "da67096a3b9bf56a91d16901293e51ba5b49a27e",
|
||||
"type": "github"
|
||||
},
|
||||
"original": {
|
||||
"owner": "nix-systems",
|
||||
"repo": "default",
|
||||
"type": "github"
|
||||
"pre-commit-hooks": [
|
||||
"git-hooks"
|
||||
]
|
||||
}
|
||||
}
|
||||
},
|
||||
|
|
|
|||
29
devenv.nix
29
devenv.nix
|
|
@ -1,16 +1,27 @@
|
|||
{ pkgs, ... }:
|
||||
|
||||
{
|
||||
# https://devenv.sh/basics/
|
||||
# env.GREET = "devenv";
|
||||
|
||||
packages = [
|
||||
|
||||
# pour burn-rs
|
||||
pkgs.SDL2_gfx
|
||||
# (compilation sdl2-sys)
|
||||
pkgs.cmake
|
||||
pkgs.libffi
|
||||
pkgs.wayland-scanner
|
||||
|
||||
# dev tools
|
||||
pkgs.samply # code profiler
|
||||
pkgs.feedgnuplot # to visualize bots training results
|
||||
|
||||
# for bevy
|
||||
pkgs.alsaLib
|
||||
pkgs.alsa-lib
|
||||
pkgs.udev
|
||||
|
||||
# bevy fast compile
|
||||
pkgs.clang pkgs.lld
|
||||
pkgs.clang
|
||||
pkgs.lld
|
||||
|
||||
# copié de https://github.com/mmai/Hyperspeedcube/blob/develop/devenv.nix
|
||||
# TODO : retirer ce qui est inutile
|
||||
|
|
@ -27,16 +38,12 @@
|
|||
pkgs.xorg.libXi
|
||||
pkgs.xorg.libX11
|
||||
|
||||
pkgs.vulkan-headers pkgs.vulkan-loader
|
||||
pkgs.vulkan-headers
|
||||
pkgs.vulkan-loader
|
||||
# ------------ fin copie
|
||||
|
||||
];
|
||||
|
||||
# enterShell = ''
|
||||
# hello
|
||||
# git --version
|
||||
# '';
|
||||
|
||||
# https://devenv.sh/languages/
|
||||
languages.rust.enable = true;
|
||||
|
||||
|
|
@ -44,7 +51,7 @@
|
|||
# scripts.hello.exec = "echo hello from $GREET";
|
||||
|
||||
# https://devenv.sh/pre-commit-hooks/
|
||||
pre-commit.hooks.shellcheck.enable = true;
|
||||
# pre-commit.hooks.shellcheck.enable = true;
|
||||
|
||||
# https://devenv.sh/processes/
|
||||
# processes.ping.exec = "ping example.com";
|
||||
|
|
|
|||
|
|
@ -1,34 +1,59 @@
|
|||
# Backlog
|
||||
|
||||
position dans tutoriel :
|
||||
|
||||
## DONE
|
||||
|
||||
### Epic : jeu simple
|
||||
|
||||
Store
|
||||
- gestion des évènements
|
||||
- déplacement de dames
|
||||
- jet des dés
|
||||
- déplacements physiques possibles
|
||||
|
||||
## TODO
|
||||
|
||||
### stack overflow
|
||||
|
||||
- <https://crates.io/crates/backtrace-on-stack-overflow>
|
||||
- <https://users.rust-lang.org/t/how-to-diagnose-a-stack-overflow-issues-cause/17320/11>
|
||||
- <https://www.reddit.com/r/rust/comments/1d8lxtd/debugging_stack_overflows/>
|
||||
|
||||
Méthodes pour limiter la stack : réduire la taille de la pile avant de lancer ton binaire en ligne de commande :
|
||||
|
||||
```sh
|
||||
ulimit -s 6144 # Limite la pile à 6Mo
|
||||
# just trainbot
|
||||
RUST_BACKTRACE=1 LD_LIBRARY_PATH=./target/debug ./target/debug/train_dqn_burn
|
||||
ulimit -s unlimited # Pour revenir à la normale
|
||||
```
|
||||
|
||||
- bot burn
|
||||
- train = `just trainbot`
|
||||
- durée d'entrainement selon params ?
|
||||
- save
|
||||
- load and run against default bot
|
||||
- many configs, save models selon config
|
||||
- retrain against himself ?
|
||||
|
||||
### Doc
|
||||
|
||||
Cheatsheet : arbre des situations et priorité des règles
|
||||
|
||||
### Epic : jeu simple
|
||||
|
||||
- déplacements autorisés par les règles (pourront être validés physiquement si jeu avec écoles)
|
||||
- calcul des points automatique (pas d'écoles)
|
||||
|
||||
Server
|
||||
|
||||
-
|
||||
|
||||
Client
|
||||
|
||||
- client tui (ratatui)
|
||||
- client desktop (bevy)
|
||||
- client web
|
||||
|
||||
### Epic : jeu avec écoles
|
||||
|
||||
- déplacement de fiches points : validation physique
|
||||
- évenements de déclaration d'école & contre école
|
||||
|
||||
### Epic : Bot
|
||||
|
||||
- OpenAi gym
|
||||
- doc gymnasium <https://gymnasium.farama.org/introduction/basic_usage/>
|
||||
- Rust implementation for OpenAi gym <https://github.com/MathisWellmann/gym-rs>
|
||||
- Backgammon (?) <https://github.com/dellalibera/gym-backgammon>
|
||||
|
|
|
|||
143
doc/diagrammes.md
Normal file
143
doc/diagrammes.md
Normal file
|
|
@ -0,0 +1,143 @@
|
|||
# Diagrammes d'architecture
|
||||
|
||||
|
||||
## Diagramme de Classes / Structures
|
||||
|
||||
Ce diagramme montre les relations statiques entre les composants principaux.
|
||||
|
||||
@startuml
|
||||
|
||||
!theme vibrant
|
||||
|
||||
package "client_cli" {
|
||||
class GameRunner {
|
||||
- state: GameState
|
||||
- bots: Vec<Bot>
|
||||
+ new(Vec<Box<dyn BotStrategy>>)
|
||||
+ handle_event(&GameEvent)
|
||||
}
|
||||
}
|
||||
|
||||
package "bot" {
|
||||
class Bot {
|
||||
- strategy: Box<dyn BotStrategy>
|
||||
+ new(Box<dyn BotStrategy>)
|
||||
+ handle_event(&GameEvent): Option<GameEvent>
|
||||
}
|
||||
|
||||
interface BotStrategy {
|
||||
+ choose_move(): (CheckerMove, CheckerMove)
|
||||
+ get_game(): &GameState
|
||||
' ... autres méthodes
|
||||
}
|
||||
|
||||
class DefaultStrategy
|
||||
class DqnStrategy
|
||||
class ErroneousStrategy
|
||||
}
|
||||
|
||||
package "store" {
|
||||
class GameState {
|
||||
+ stage: Stage
|
||||
+ turn_stage: TurnStage
|
||||
+ board: Board
|
||||
+ active_player_id: PlayerId
|
||||
' ...
|
||||
+ validate(&GameEvent): bool
|
||||
+ consume(&GameEvent)
|
||||
}
|
||||
|
||||
class GameEvent
|
||||
}
|
||||
|
||||
GameRunner "1" *-- "1..2" Bot : contient
|
||||
Bot "1" *-- "1" BotStrategy : utilise
|
||||
BotStrategy <|.. DefaultStrategy : implémente
|
||||
BotStrategy <|.. DqnStrategy : implémente
|
||||
BotStrategy <|.. ErroneousStrategy : implémente
|
||||
|
||||
GameRunner ..> GameState : dépend de
|
||||
GameRunner ..> GameEvent : gère
|
||||
Bot ..> GameState : dépend de
|
||||
Bot ..> GameEvent : traite et génère
|
||||
BotStrategy ..> GameState : analyse
|
||||
@enduml
|
||||
|
||||
|
||||
## Diagramme de Séquence : Boucle de jeu d'un Bot
|
||||
|
||||
Ce diagramme montre les interactions dynamiques lors d'un tour de jeu où c'est à un bot de jouer.
|
||||
|
||||
@startuml
|
||||
!theme vibrant
|
||||
autonumber
|
||||
|
||||
participant "main (client_cli)" as Main
|
||||
participant "runner: GameRunner" as Runner
|
||||
participant "bot: Bot" as Bot
|
||||
participant "strategy: BotStrategy" as Strategy
|
||||
participant "state: GameState" as GameState
|
||||
|
||||
Main -> Runner: new(strategies, seed)
|
||||
activate Runner
|
||||
Runner -> GameState: new()
|
||||
activate GameState
|
||||
GameState --> Runner: state
|
||||
deactivate GameState
|
||||
Runner -> Main: runner
|
||||
deactivate Runner
|
||||
|
||||
... Boucle de jeu principale ...
|
||||
|
||||
Main -> Runner: handle_event(event)
|
||||
activate Runner
|
||||
|
||||
Runner -> GameState: validate(event)
|
||||
activate GameState
|
||||
GameState --> Runner: bool
|
||||
deactivate GameState
|
||||
|
||||
Runner -> GameState: consume(event)
|
||||
activate GameState
|
||||
deactivate GameState
|
||||
|
||||
Runner -> Bot: handle_event(event)
|
||||
activate Bot
|
||||
|
||||
note right of Bot: Le bot vérifie si c'est son tour de jouer
|
||||
|
||||
Bot -> Strategy: get_mut_game()
|
||||
activate Strategy
|
||||
Strategy --> Bot: &mut GameState
|
||||
deactivate Strategy
|
||||
|
||||
' Supposons que c'est au bot de jouer un coup
|
||||
Bot -> Strategy: choose_move()
|
||||
activate Strategy
|
||||
Strategy -> GameState: Accède à l'état (board, dice, etc.)
|
||||
activate GameState
|
||||
deactivate GameState
|
||||
Strategy --> Bot: moves
|
||||
deactivate Strategy
|
||||
|
||||
Bot --> Runner: Some(GameEvent::Move)
|
||||
deactivate Bot
|
||||
|
||||
Runner -> Runner: handle_event(GameEvent::Move)
|
||||
note right of Runner: Appel récursif pour traiter le coup du bot
|
||||
|
||||
Runner -> GameState: validate(GameEvent::Move)
|
||||
activate GameState
|
||||
GameState --> Runner: true
|
||||
deactivate GameState
|
||||
|
||||
Runner -> GameState: consume(GameEvent::Move)
|
||||
activate GameState
|
||||
note right of GameState: L'état du jeu est mis à jour\n(pions déplacés, joueur actif changé)
|
||||
deactivate GameState
|
||||
|
||||
Runner --> Main: Option<GameEvent> (ou None)
|
||||
deactivate Runner
|
||||
|
||||
@enduml
|
||||
|
||||
775
doc/refs/claudeAIquestion.md
Normal file
775
doc/refs/claudeAIquestion.md
Normal file
|
|
@ -0,0 +1,775 @@
|
|||
# Description
|
||||
|
||||
Je développe un jeu de TricTrac (<https://fr.wikipedia.org/wiki/Trictrac>) dans le langage rust.
|
||||
Pour le moment je me concentre sur l'application en ligne de commande simple, donc ne t'occupe pas des dossiers 'client_bevy', 'client_tui', et 'server' qui ne seront utilisés que pour de prochaines évolutions.
|
||||
|
||||
Les règles du jeu et l'état d'une partie sont implémentées dans 'store', l'application ligne de commande est implémentée dans 'client_cli', elle permet déjà de jouer contre un bot, ou de faire jouer deux bots l'un contre l'autre.
|
||||
Les stratégies de bots sont implémentées dans le dossier 'bot'.
|
||||
|
||||
Plus précisément, l'état du jeu est défini par le struct GameState dans store/src/game.rs, la méthode to_string_id() permet de coder cet état de manière compacte dans une chaîne de caractères, mais il n'y a pas l'historique des coups joués. Il y a aussi fmt::Display d'implémenté pour une representation textuelle plus lisible.
|
||||
|
||||
'client_cli/src/game_runner.rs' contient la logique permettant de faire jouer deux bots l'un contre l'autre.
|
||||
'bot/src/default.rs' contient le code d'une stratégie de bot basique : il détermine la liste des mouvements valides (avec la méthode get_possible_moves_sequences de store::MoveRules) et joue simplement le premier de la liste.
|
||||
|
||||
J'aimerais maintenant ajouter une stratégie de bot plus forte en entrainant un agent/bot avec OpenAI Gym.
|
||||
|
||||
Pour bénéficier des bibliothèques disponibles en python, comme 'Stable-Baselines3', j'ai utilisé PyO3 pour appeler mon code rust depuis python. Le code rust exposé se trouve dans 'store/src/engine.rs', il ne contient pour le moment que du code d'exemple pour m'assurer que la passerelle entre python et rust fonctionne, mais tout est à implémenter : l'état du jeu, les mouvements disponibles, etc.
|
||||
|
||||
La classe python 'store/python/trictracEnv.py' est elle aussi presque vide, elle ne contient pour le moment qu'un squelette avec des fonctions à implémenter.
|
||||
|
||||
Peux-tu m'aider à modifier engine.rs et trictracenv.py pour permettre d'entrainer un agent (bot) ?
|
||||
|
||||
J'aimerais utiliser ce que j'ai déjà implémenté en rust : l'état d'une partie et les règles, et la fonction renvoyant la liste des coups disponibles pour un joueur à partir d'un état du jeu (get_possible_moves_sequences), mais je ne sais pas trop comment m'y prendre.
|
||||
|
||||
## Réponse de Claude
|
||||
|
||||
Je vois que je n'ai pas l'autorisation d'utiliser l'outil Edit. Je vais vous montrer les modifications nécessaires pour les deux fichiers principaux.
|
||||
|
||||
Voici le contenu à utiliser pour `store/src/engine.rs` :
|
||||
|
||||
```rust
|
||||
//! # Expose trictrac game state and rules in a python module
|
||||
use pyo3::prelude::*;
|
||||
use pyo3::types::{PyDict, PyTuple};
|
||||
|
||||
use crate::board::CheckerMove;
|
||||
use crate::dice::Dice;
|
||||
use crate::game::{GameEvent, GameState, Stage, TurnStage};
|
||||
use crate::game_rules_moves::MoveRules;
|
||||
use crate::game_rules_points::{PointsRules, PossibleJans};
|
||||
use crate::player::{Color, Player, PlayerId};
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[pyclass]
|
||||
struct TricTrac {
|
||||
game_state: GameState,
|
||||
dice_roll_sequence: Vec<(u8, u8)>,
|
||||
current_dice_index: usize,
|
||||
}
|
||||
|
||||
#[pymethods]
|
||||
impl TricTrac {
|
||||
#[new]
|
||||
fn new() -> Self {
|
||||
let mut game_state = GameState::new(false); // schools_enabled = false
|
||||
|
||||
// Initialiser 2 joueurs
|
||||
game_state.init_player("player1");
|
||||
game_state.init_player("bot");
|
||||
|
||||
// Commencer la partie avec le joueur 1
|
||||
game_state.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
TricTrac {
|
||||
game_state,
|
||||
dice_roll_sequence: Vec::new(),
|
||||
current_dice_index: 0,
|
||||
}
|
||||
}
|
||||
|
||||
/// Obtenir l'état du jeu sous forme de chaîne de caractères compacte
|
||||
fn get_state_id(&self) -> String {
|
||||
self.game_state.to_string_id()
|
||||
}
|
||||
|
||||
/// Obtenir l'état du jeu sous forme de dictionnaire pour faciliter l'entrainement
|
||||
fn get_state_dict(&self) -> PyResult<Py<PyDict>> {
|
||||
let gil = Python::acquire_gil();
|
||||
let py = gil.python();
|
||||
|
||||
let state_dict = PyDict::new(py);
|
||||
|
||||
// Informations essentielles sur l'état du jeu
|
||||
state_dict.set_item("active_player", self.game_state.active_player_id)?;
|
||||
state_dict.set_item("stage", format!("{:?}", self.game_state.stage))?;
|
||||
state_dict.set_item("turn_stage", format!("{:?}", self.game_state.turn_stage))?;
|
||||
|
||||
// Dés
|
||||
let (dice1, dice2) = self.game_state.dice.values;
|
||||
state_dict.set_item("dice", (dice1, dice2))?;
|
||||
|
||||
// Points des joueurs
|
||||
if let Some(white_player) = self.game_state.get_white_player() {
|
||||
state_dict.set_item("white_points", white_player.points)?;
|
||||
state_dict.set_item("white_holes", white_player.holes)?;
|
||||
}
|
||||
|
||||
if let Some(black_player) = self.game_state.get_black_player() {
|
||||
state_dict.set_item("black_points", black_player.points)?;
|
||||
state_dict.set_item("black_holes", black_player.holes)?;
|
||||
}
|
||||
|
||||
// Positions des pièces
|
||||
let white_positions = self.get_checker_positions(Color::White);
|
||||
let black_positions = self.get_checker_positions(Color::Black);
|
||||
|
||||
state_dict.set_item("white_positions", white_positions)?;
|
||||
state_dict.set_item("black_positions", black_positions)?;
|
||||
|
||||
// État compact pour la comparaison d'états
|
||||
state_dict.set_item("state_id", self.game_state.to_string_id())?;
|
||||
|
||||
Ok(state_dict.into())
|
||||
}
|
||||
|
||||
/// Renvoie les positions des pièces pour un joueur spécifique
|
||||
fn get_checker_positions(&self, color: Color) -> Vec<(usize, i8)> {
|
||||
self.game_state.board.get_color_fields(color)
|
||||
}
|
||||
|
||||
/// Obtenir la liste des mouvements légaux sous forme de paires (from, to)
|
||||
fn get_available_moves(&self) -> Vec<((usize, usize), (usize, usize))> {
|
||||
// L'agent joue toujours le joueur actif
|
||||
let color = self.game_state.player_color_by_id(&self.game_state.active_player_id).unwrap_or(Color::White);
|
||||
|
||||
// Si ce n'est pas le moment de déplacer les pièces, retourner une liste vide
|
||||
if self.game_state.turn_stage != TurnStage::Move && self.game_state.turn_stage != TurnStage::HoldOrGoChoice {
|
||||
return vec![];
|
||||
}
|
||||
|
||||
let rules = MoveRules::new(&color, &self.game_state.board, self.game_state.dice);
|
||||
let possible_moves = rules.get_possible_moves_sequences(true, vec![]);
|
||||
|
||||
// Convertir les mouvements CheckerMove en tuples (from, to) pour Python
|
||||
possible_moves.into_iter()
|
||||
.map(|(move1, move2)| (
|
||||
(move1.get_from(), move1.get_to()),
|
||||
(move2.get_from(), move2.get_to())
|
||||
)).collect()
|
||||
}
|
||||
|
||||
/// Jouer un coup ((from1, to1), (from2, to2))
|
||||
fn play_move(&mut self, moves: ((usize, usize), (usize, usize))) -> bool {
|
||||
let ((from1, to1), (from2, to2)) = moves;
|
||||
|
||||
// Vérifier que c'est au tour du joueur de jouer
|
||||
if self.game_state.turn_stage != TurnStage::Move && self.game_state.turn_stage != TurnStage::HoldOrGoChoice {
|
||||
return false;
|
||||
}
|
||||
|
||||
let move1 = CheckerMove::new(from1, to1).unwrap_or_default();
|
||||
let move2 = CheckerMove::new(from2, to2).unwrap_or_default();
|
||||
|
||||
let event = GameEvent::Move {
|
||||
player_id: self.game_state.active_player_id,
|
||||
moves: (move1, move2),
|
||||
};
|
||||
|
||||
// Vérifier si le mouvement est valide
|
||||
if !self.game_state.validate(&event) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Exécuter le mouvement
|
||||
self.game_state.consume(&event);
|
||||
|
||||
// Si l'autre joueur doit lancer les dés maintenant, simuler ce lancement
|
||||
if self.game_state.turn_stage == TurnStage::RollDice {
|
||||
self.roll_dice();
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Lancer les dés (soit aléatoirement, soit en utilisant une séquence prédéfinie)
|
||||
fn roll_dice(&mut self) -> (u8, u8) {
|
||||
// Vérifier que c'est au bon moment pour lancer les dés
|
||||
if self.game_state.turn_stage != TurnStage::RollDice && self.game_state.turn_stage != TurnStage::RollWaiting {
|
||||
return self.game_state.dice.values;
|
||||
}
|
||||
|
||||
// Simuler un lancer de dés
|
||||
let dice_values = if !self.dice_roll_sequence.is_empty() && self.current_dice_index < self.dice_roll_sequence.len() {
|
||||
// Utiliser la séquence prédéfinie
|
||||
let dice = self.dice_roll_sequence[self.current_dice_index];
|
||||
self.current_dice_index += 1;
|
||||
dice
|
||||
} else {
|
||||
// Générer aléatoirement
|
||||
(
|
||||
(1 + (rand::random::<u8>() % 6)),
|
||||
(1 + (rand::random::<u8>() % 6))
|
||||
)
|
||||
};
|
||||
|
||||
// Envoyer les événements appropriés
|
||||
let roll_event = GameEvent::Roll {
|
||||
player_id: self.game_state.active_player_id,
|
||||
};
|
||||
|
||||
if self.game_state.validate(&roll_event) {
|
||||
self.game_state.consume(&roll_event);
|
||||
}
|
||||
|
||||
let roll_result_event = GameEvent::RollResult {
|
||||
player_id: self.game_state.active_player_id,
|
||||
dice: Dice { values: dice_values },
|
||||
};
|
||||
|
||||
if self.game_state.validate(&roll_result_event) {
|
||||
self.game_state.consume(&roll_result_event);
|
||||
}
|
||||
|
||||
dice_values
|
||||
}
|
||||
|
||||
/// Marquer des points
|
||||
fn mark_points(&mut self, points: u8) -> bool {
|
||||
// Vérifier que c'est au bon moment pour marquer des points
|
||||
if self.game_state.turn_stage != TurnStage::MarkPoints && self.game_state.turn_stage != TurnStage::MarkAdvPoints {
|
||||
return false;
|
||||
}
|
||||
|
||||
let event = GameEvent::Mark {
|
||||
player_id: self.game_state.active_player_id,
|
||||
points,
|
||||
};
|
||||
|
||||
// Vérifier si l'événement est valide
|
||||
if !self.game_state.validate(&event) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Exécuter l'événement
|
||||
self.game_state.consume(&event);
|
||||
|
||||
// Si l'autre joueur doit lancer les dés maintenant, simuler ce lancement
|
||||
if self.game_state.turn_stage == TurnStage::RollDice {
|
||||
self.roll_dice();
|
||||
}
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Choisir de "continuer" (Go) après avoir gagné un trou
|
||||
fn choose_go(&mut self) -> bool {
|
||||
// Vérifier que c'est au bon moment pour choisir de continuer
|
||||
if self.game_state.turn_stage != TurnStage::HoldOrGoChoice {
|
||||
return false;
|
||||
}
|
||||
|
||||
let event = GameEvent::Go {
|
||||
player_id: self.game_state.active_player_id,
|
||||
};
|
||||
|
||||
// Vérifier si l'événement est valide
|
||||
if !self.game_state.validate(&event) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Exécuter l'événement
|
||||
self.game_state.consume(&event);
|
||||
|
||||
// Simuler le lancer de dés pour le prochain tour
|
||||
self.roll_dice();
|
||||
|
||||
true
|
||||
}
|
||||
|
||||
/// Calcule les points maximaux que le joueur actif peut obtenir avec les dés actuels
|
||||
fn calculate_points(&self) -> u8 {
|
||||
let active_player = self.game_state.players.get(&self.game_state.active_player_id);
|
||||
|
||||
if let Some(player) = active_player {
|
||||
let dice_roll_count = player.dice_roll_count;
|
||||
let color = player.color;
|
||||
|
||||
let points_rules = PointsRules::new(&color, &self.game_state.board, self.game_state.dice);
|
||||
let (points, _) = points_rules.get_points(dice_roll_count);
|
||||
|
||||
points
|
||||
} else {
|
||||
0
|
||||
}
|
||||
}
|
||||
|
||||
/// Réinitialise la partie
|
||||
fn reset(&mut self) {
|
||||
self.game_state = GameState::new(false);
|
||||
|
||||
// Initialiser 2 joueurs
|
||||
self.game_state.init_player("player1");
|
||||
self.game_state.init_player("bot");
|
||||
|
||||
// Commencer la partie avec le joueur 1
|
||||
self.game_state.consume(&GameEvent::BeginGame { goes_first: 1 });
|
||||
|
||||
// Réinitialiser l'index de la séquence de dés
|
||||
self.current_dice_index = 0;
|
||||
}
|
||||
|
||||
/// Vérifie si la partie est terminée
|
||||
fn is_done(&self) -> bool {
|
||||
self.game_state.stage == Stage::Ended || self.game_state.determine_winner().is_some()
|
||||
}
|
||||
|
||||
/// Obtenir le gagnant de la partie
|
||||
fn get_winner(&self) -> Option<PlayerId> {
|
||||
self.game_state.determine_winner()
|
||||
}
|
||||
|
||||
/// Obtenir le score du joueur actif (nombre de trous)
|
||||
fn get_score(&self, player_id: PlayerId) -> i32 {
|
||||
if let Some(player) = self.game_state.players.get(&player_id) {
|
||||
player.holes as i32
|
||||
} else {
|
||||
-1
|
||||
}
|
||||
}
|
||||
|
||||
/// Obtenir l'ID du joueur actif
|
||||
fn get_active_player_id(&self) -> PlayerId {
|
||||
self.game_state.active_player_id
|
||||
}
|
||||
|
||||
/// Définir une séquence de dés à utiliser (pour la reproductibilité)
|
||||
fn set_dice_sequence(&mut self, sequence: Vec<(u8, u8)>) {
|
||||
self.dice_roll_sequence = sequence;
|
||||
self.current_dice_index = 0;
|
||||
}
|
||||
|
||||
/// Afficher l'état du jeu (pour le débogage)
|
||||
fn __str__(&self) -> String {
|
||||
format!("{}", self.game_state)
|
||||
}
|
||||
}
|
||||
|
||||
/// A Python module implemented in Rust. The name of this function must match
|
||||
/// the `lib.name` setting in the `Cargo.toml`, else Python will not be able to
|
||||
/// import the module.
|
||||
#[pymodule]
|
||||
fn trictrac(m: &Bound<'_, PyModule>) -> PyResult<()> {
|
||||
m.add_class::<TricTrac>()?;
|
||||
|
||||
Ok(())
|
||||
}
|
||||
```
|
||||
|
||||
Et pour `store/python/trictracEnv.py` :
|
||||
|
||||
```python
|
||||
import gym
|
||||
import numpy as np
|
||||
from gym import spaces
|
||||
import trictrac # module Rust exposé via PyO3
|
||||
from typing import Dict, List, Tuple, Optional, Any, Union
|
||||
|
||||
class TricTracEnv(gym.Env):
|
||||
"""Environnement OpenAI Gym pour le jeu de Trictrac"""
|
||||
|
||||
metadata = {"render.modes": ["human"]}
|
||||
|
||||
def __init__(self, opponent_strategy="random"):
|
||||
super(TricTracEnv, self).__init__()
|
||||
|
||||
# Instancier le jeu
|
||||
self.game = trictrac.TricTrac()
|
||||
|
||||
# Stratégie de l'adversaire
|
||||
self.opponent_strategy = opponent_strategy
|
||||
|
||||
# Constantes
|
||||
self.MAX_FIELD = 24 # Nombre de cases sur le plateau
|
||||
self.MAX_CHECKERS = 15 # Nombre maximum de pièces par joueur
|
||||
|
||||
# Définition de l'espace d'observation
|
||||
# Format:
|
||||
# - Position des pièces blanches (24)
|
||||
# - Position des pièces noires (24)
|
||||
# - Joueur actif (1: blanc, 2: noir) (1)
|
||||
# - Valeurs des dés (2)
|
||||
# - Points de chaque joueur (2)
|
||||
# - Trous de chaque joueur (2)
|
||||
# - Phase du jeu (1)
|
||||
self.observation_space = spaces.Dict({
|
||||
'board': spaces.Box(low=-self.MAX_CHECKERS, high=self.MAX_CHECKERS, shape=(self.MAX_FIELD,), dtype=np.int8),
|
||||
'active_player': spaces.Discrete(3), # 0: pas de joueur, 1: blanc, 2: noir
|
||||
'dice': spaces.MultiDiscrete([7, 7]), # Valeurs des dés (1-6)
|
||||
'white_points': spaces.Discrete(13), # Points du joueur blanc (0-12)
|
||||
'white_holes': spaces.Discrete(13), # Trous du joueur blanc (0-12)
|
||||
'black_points': spaces.Discrete(13), # Points du joueur noir (0-12)
|
||||
'black_holes': spaces.Discrete(13), # Trous du joueur noir (0-12)
|
||||
'turn_stage': spaces.Discrete(6), # Étape du tour
|
||||
})
|
||||
|
||||
# Définition de l'espace d'action
|
||||
# Format:
|
||||
# - Action type: 0=move, 1=mark, 2=go
|
||||
# - Move: (from1, to1, from2, to2) ou zeros
|
||||
self.action_space = spaces.Dict({
|
||||
'action_type': spaces.Discrete(3),
|
||||
'move': spaces.MultiDiscrete([self.MAX_FIELD + 1, self.MAX_FIELD + 1,
|
||||
self.MAX_FIELD + 1, self.MAX_FIELD + 1])
|
||||
})
|
||||
|
||||
# État courant
|
||||
self.state = self._get_observation()
|
||||
|
||||
# Historique des états pour éviter les situations sans issue
|
||||
self.state_history = []
|
||||
|
||||
# Pour le débogage et l'entraînement
|
||||
self.steps_taken = 0
|
||||
self.max_steps = 1000 # Limite pour éviter les parties infinies
|
||||
|
||||
def reset(self):
|
||||
"""Réinitialise l'environnement et renvoie l'état initial"""
|
||||
self.game.reset()
|
||||
self.state = self._get_observation()
|
||||
self.state_history = []
|
||||
self.steps_taken = 0
|
||||
return self.state
|
||||
|
||||
def step(self, action):
|
||||
"""
|
||||
Exécute une action et retourne (state, reward, done, info)
|
||||
|
||||
Action format:
|
||||
{
|
||||
'action_type': 0/1/2, # 0=move, 1=mark, 2=go
|
||||
'move': [from1, to1, from2, to2] # Utilisé seulement si action_type=0
|
||||
}
|
||||
"""
|
||||
action_type = action['action_type']
|
||||
reward = 0
|
||||
done = False
|
||||
info = {}
|
||||
|
||||
# Vérifie que l'action est valide pour le joueur humain (id=1)
|
||||
player_id = self.game.get_active_player_id()
|
||||
is_agent_turn = player_id == 1 # L'agent joue toujours le joueur 1
|
||||
|
||||
if is_agent_turn:
|
||||
# Exécute l'action selon son type
|
||||
if action_type == 0: # Move
|
||||
from1, to1, from2, to2 = action['move']
|
||||
move_made = self.game.play_move(((from1, to1), (from2, to2)))
|
||||
if not move_made:
|
||||
# Pénaliser les mouvements invalides
|
||||
reward -= 2.0
|
||||
info['invalid_move'] = True
|
||||
else:
|
||||
# Petit bonus pour un mouvement valide
|
||||
reward += 0.1
|
||||
elif action_type == 1: # Mark
|
||||
points = self.game.calculate_points()
|
||||
marked = self.game.mark_points(points)
|
||||
if not marked:
|
||||
# Pénaliser les actions invalides
|
||||
reward -= 2.0
|
||||
info['invalid_mark'] = True
|
||||
else:
|
||||
# Bonus pour avoir marqué des points
|
||||
reward += 0.1 * points
|
||||
elif action_type == 2: # Go
|
||||
go_made = self.game.choose_go()
|
||||
if not go_made:
|
||||
# Pénaliser les actions invalides
|
||||
reward -= 2.0
|
||||
info['invalid_go'] = True
|
||||
else:
|
||||
# Petit bonus pour l'action valide
|
||||
reward += 0.1
|
||||
else:
|
||||
# Tour de l'adversaire
|
||||
self._play_opponent_turn()
|
||||
|
||||
# Vérifier si la partie est terminée
|
||||
if self.game.is_done():
|
||||
done = True
|
||||
winner = self.game.get_winner()
|
||||
if winner == 1:
|
||||
# Bonus si l'agent gagne
|
||||
reward += 10.0
|
||||
info['winner'] = 'agent'
|
||||
else:
|
||||
# Pénalité si l'adversaire gagne
|
||||
reward -= 5.0
|
||||
info['winner'] = 'opponent'
|
||||
|
||||
# Récompense basée sur la progression des trous
|
||||
agent_holes = self.game.get_score(1)
|
||||
opponent_holes = self.game.get_score(2)
|
||||
reward += 0.5 * (agent_holes - opponent_holes)
|
||||
|
||||
# Mettre à jour l'état
|
||||
new_state = self._get_observation()
|
||||
|
||||
# Vérifier les états répétés
|
||||
if self._is_state_repeating(new_state):
|
||||
reward -= 0.2 # Pénalité légère pour éviter les boucles
|
||||
info['repeating_state'] = True
|
||||
|
||||
# Ajouter l'état à l'historique
|
||||
self.state_history.append(self._get_state_id())
|
||||
|
||||
# Limiter la durée des parties
|
||||
self.steps_taken += 1
|
||||
if self.steps_taken >= self.max_steps:
|
||||
done = True
|
||||
info['timeout'] = True
|
||||
|
||||
# Comparer les scores en cas de timeout
|
||||
if agent_holes > opponent_holes:
|
||||
reward += 5.0
|
||||
info['winner'] = 'agent'
|
||||
elif opponent_holes > agent_holes:
|
||||
reward -= 2.0
|
||||
info['winner'] = 'opponent'
|
||||
|
||||
self.state = new_state
|
||||
return self.state, reward, done, info
|
||||
|
||||
def _play_opponent_turn(self):
|
||||
"""Simule le tour de l'adversaire avec la stratégie choisie"""
|
||||
player_id = self.game.get_active_player_id()
|
||||
|
||||
# Boucle tant qu'il est au tour de l'adversaire
|
||||
while player_id == 2 and not self.game.is_done():
|
||||
# Action selon l'étape du tour
|
||||
state_dict = self._get_state_dict()
|
||||
turn_stage = state_dict.get('turn_stage')
|
||||
|
||||
if turn_stage == 'RollDice' or turn_stage == 'RollWaiting':
|
||||
self.game.roll_dice()
|
||||
elif turn_stage == 'MarkPoints' or turn_stage == 'MarkAdvPoints':
|
||||
points = self.game.calculate_points()
|
||||
self.game.mark_points(points)
|
||||
elif turn_stage == 'HoldOrGoChoice':
|
||||
# Stratégie simple: toujours continuer (Go)
|
||||
self.game.choose_go()
|
||||
elif turn_stage == 'Move':
|
||||
available_moves = self.game.get_available_moves()
|
||||
if available_moves:
|
||||
if self.opponent_strategy == "random":
|
||||
# Choisir un mouvement au hasard
|
||||
move = available_moves[np.random.randint(0, len(available_moves))]
|
||||
else:
|
||||
# Par défaut, prendre le premier mouvement valide
|
||||
move = available_moves[0]
|
||||
self.game.play_move(move)
|
||||
|
||||
# Mise à jour de l'ID du joueur actif
|
||||
player_id = self.game.get_active_player_id()
|
||||
|
||||
def _get_observation(self):
|
||||
"""Convertit l'état du jeu en un format utilisable par l'apprentissage par renforcement"""
|
||||
state_dict = self._get_state_dict()
|
||||
|
||||
# Créer un tableau représentant le plateau
|
||||
board = np.zeros(self.MAX_FIELD, dtype=np.int8)
|
||||
|
||||
# Remplir les positions des pièces blanches (valeurs positives)
|
||||
white_positions = state_dict.get('white_positions', [])
|
||||
for pos, count in white_positions:
|
||||
if 1 <= pos <= self.MAX_FIELD:
|
||||
board[pos-1] = count
|
||||
|
||||
# Remplir les positions des pièces noires (valeurs négatives)
|
||||
black_positions = state_dict.get('black_positions', [])
|
||||
for pos, count in black_positions:
|
||||
if 1 <= pos <= self.MAX_FIELD:
|
||||
board[pos-1] = -count
|
||||
|
||||
# Créer l'observation complète
|
||||
observation = {
|
||||
'board': board,
|
||||
'active_player': state_dict.get('active_player', 0),
|
||||
'dice': np.array([
|
||||
state_dict.get('dice', (1, 1))[0],
|
||||
state_dict.get('dice', (1, 1))[1]
|
||||
]),
|
||||
'white_points': state_dict.get('white_points', 0),
|
||||
'white_holes': state_dict.get('white_holes', 0),
|
||||
'black_points': state_dict.get('black_points', 0),
|
||||
'black_holes': state_dict.get('black_holes', 0),
|
||||
'turn_stage': self._turn_stage_to_int(state_dict.get('turn_stage', 'RollDice')),
|
||||
}
|
||||
|
||||
return observation
|
||||
|
||||
def _get_state_dict(self) -> Dict:
|
||||
"""Récupère l'état du jeu sous forme de dictionnaire depuis le module Rust"""
|
||||
return self.game.get_state_dict()
|
||||
|
||||
def _get_state_id(self) -> str:
|
||||
"""Récupère l'identifiant unique de l'état actuel"""
|
||||
return self.game.get_state_id()
|
||||
|
||||
def _is_state_repeating(self, new_state) -> bool:
|
||||
"""Vérifie si l'état se répète trop souvent"""
|
||||
state_id = self.game.get_state_id()
|
||||
# Compter les occurrences de l'état dans l'historique récent
|
||||
count = sum(1 for s in self.state_history[-10:] if s == state_id)
|
||||
return count >= 3 # Considéré comme répétitif si l'état apparaît 3 fois ou plus
|
||||
|
||||
def _turn_stage_to_int(self, turn_stage: str) -> int:
|
||||
"""Convertit l'étape du tour en entier pour l'observation"""
|
||||
stages = {
|
||||
'RollDice': 0,
|
||||
'RollWaiting': 1,
|
||||
'MarkPoints': 2,
|
||||
'HoldOrGoChoice': 3,
|
||||
'Move': 4,
|
||||
'MarkAdvPoints': 5
|
||||
}
|
||||
return stages.get(turn_stage, 0)
|
||||
|
||||
def render(self, mode="human"):
|
||||
"""Affiche l'état actuel du jeu"""
|
||||
if mode == "human":
|
||||
print(str(self.game))
|
||||
print(f"État actuel: {self._get_state_id()}")
|
||||
|
||||
# Afficher les actions possibles
|
||||
if self.game.get_active_player_id() == 1:
|
||||
turn_stage = self._get_state_dict().get('turn_stage')
|
||||
print(f"Étape: {turn_stage}")
|
||||
|
||||
if turn_stage == 'Move' or turn_stage == 'HoldOrGoChoice':
|
||||
print("Mouvements possibles:")
|
||||
moves = self.game.get_available_moves()
|
||||
for i, move in enumerate(moves):
|
||||
print(f" {i}: {move}")
|
||||
|
||||
if turn_stage == 'HoldOrGoChoice':
|
||||
print("Option: Go (continuer)")
|
||||
|
||||
def get_action_mask(self):
|
||||
"""Retourne un masque des actions valides dans l'état actuel"""
|
||||
state_dict = self._get_state_dict()
|
||||
turn_stage = state_dict.get('turn_stage')
|
||||
|
||||
# Masque par défaut (toutes les actions sont invalides)
|
||||
mask = {
|
||||
'action_type': np.zeros(3, dtype=bool),
|
||||
'move': np.zeros((self.MAX_FIELD + 1, self.MAX_FIELD + 1,
|
||||
self.MAX_FIELD + 1, self.MAX_FIELD + 1), dtype=bool)
|
||||
}
|
||||
|
||||
if self.game.get_active_player_id() != 1:
|
||||
return mask # Pas au tour de l'agent
|
||||
|
||||
# Activer les types d'actions valides selon l'étape du tour
|
||||
if turn_stage == 'Move' or turn_stage == 'HoldOrGoChoice':
|
||||
mask['action_type'][0] = True # Activer l'action de mouvement
|
||||
|
||||
# Activer les mouvements valides
|
||||
valid_moves = self.game.get_available_moves()
|
||||
for ((from1, to1), (from2, to2)) in valid_moves:
|
||||
mask['move'][from1, to1, from2, to2] = True
|
||||
|
||||
if turn_stage == 'MarkPoints' or turn_stage == 'MarkAdvPoints':
|
||||
mask['action_type'][1] = True # Activer l'action de marquer des points
|
||||
|
||||
if turn_stage == 'HoldOrGoChoice':
|
||||
mask['action_type'][2] = True # Activer l'action de continuer (Go)
|
||||
|
||||
return mask
|
||||
|
||||
def sample_valid_action(self):
|
||||
"""Échantillonne une action valide selon le masque d'actions"""
|
||||
mask = self.get_action_mask()
|
||||
|
||||
# Trouver les types d'actions valides
|
||||
valid_action_types = np.where(mask['action_type'])[0]
|
||||
|
||||
if len(valid_action_types) == 0:
|
||||
# Aucune action valide (pas le tour de l'agent)
|
||||
return {
|
||||
'action_type': 0,
|
||||
'move': np.zeros(4, dtype=np.int32)
|
||||
}
|
||||
|
||||
# Choisir un type d'action
|
||||
action_type = np.random.choice(valid_action_types)
|
||||
|
||||
action = {
|
||||
'action_type': action_type,
|
||||
'move': np.zeros(4, dtype=np.int32)
|
||||
}
|
||||
|
||||
# Si c'est un mouvement, sélectionner un mouvement valide
|
||||
if action_type == 0:
|
||||
valid_moves = np.where(mask['move'])
|
||||
if len(valid_moves[0]) > 0:
|
||||
# Sélectionner un mouvement valide aléatoirement
|
||||
idx = np.random.randint(0, len(valid_moves[0]))
|
||||
from1 = valid_moves[0][idx]
|
||||
to1 = valid_moves[1][idx]
|
||||
from2 = valid_moves[2][idx]
|
||||
to2 = valid_moves[3][idx]
|
||||
action['move'] = np.array([from1, to1, from2, to2], dtype=np.int32)
|
||||
|
||||
return action
|
||||
|
||||
def close(self):
|
||||
"""Nettoie les ressources à la fermeture de l'environnement"""
|
||||
pass
|
||||
|
||||
# Exemple d'utilisation avec Stable-Baselines3
|
||||
def example_usage():
|
||||
from stable_baselines3 import PPO
|
||||
from stable_baselines3.common.vec_env import DummyVecEnv
|
||||
|
||||
# Fonction d'enveloppement pour créer l'environnement
|
||||
def make_env():
|
||||
return TricTracEnv()
|
||||
|
||||
# Créer un environnement vectorisé (peut être parallélisé)
|
||||
env = DummyVecEnv([make_env])
|
||||
|
||||
# Créer le modèle
|
||||
model = PPO("MultiInputPolicy", env, verbose=1)
|
||||
|
||||
# Entraîner le modèle
|
||||
model.learn(total_timesteps=10000)
|
||||
|
||||
# Sauvegarder le modèle
|
||||
model.save("trictrac_ppo")
|
||||
|
||||
print("Entraînement terminé et modèle sauvegardé")
|
||||
|
||||
if __name__ == "__main__":
|
||||
# Tester l'environnement
|
||||
env = TricTracEnv()
|
||||
obs = env.reset()
|
||||
|
||||
print("Environnement initialisé")
|
||||
env.render()
|
||||
|
||||
# Jouer quelques coups aléatoires
|
||||
for _ in range(10):
|
||||
action = env.sample_valid_action()
|
||||
obs, reward, done, info = env.step(action)
|
||||
|
||||
print(f"\nAction: {action}")
|
||||
print(f"Reward: {reward}")
|
||||
print(f"Info: {info}")
|
||||
env.render()
|
||||
|
||||
if done:
|
||||
print("Game over!")
|
||||
break
|
||||
|
||||
env.close()
|
||||
```
|
||||
|
||||
Ces implémentations vous permettront de créer un environnement Gym pour votre jeu Trictrac qui:
|
||||
|
||||
1. Expose l'état du jeu dans un format approprié pour l'apprentissage par renforcement
|
||||
2. Gère le déroulement des parties et le tour de l'adversaire
|
||||
3. Définit un système de récompenses pour l'entraînement de l'agent
|
||||
4. Fournit des masques d'actions pour garantir que seules les actions valides sont sélectionnées
|
||||
5. Inclut des exemples pour l'intégration avec Stable-Baselines3
|
||||
|
||||
Ce code nécessitera probablement quelques ajustements en fonction de la façon dont votre code Rust est organisé, mais il devrait vous donner une base solide pour commencer.
|
||||
|
||||
Pour utiliser cette implémentation :
|
||||
|
||||
1. Mettez à jour `engine.rs` pour exposer l'API nécessaire à Python
|
||||
2. Compilez la bibliothèque Rust avec maturin : `maturin build -m store/Cargo.toml --release`
|
||||
3. Implémentez `trictracEnv.py` avec l'environnement Gym
|
||||
4. Créez un script d'entraînement qui utilise Stable-Baselines3 ou une autre bibliothèque d'apprentissage par renforcement
|
||||
|
||||
Voulez-vous que j'explique plus en détail certaines parties de l'implémentation ou que je vous aide à résoudre des problèmes spécifiques ?
|
||||
268
doc/refs/claudeAIquestionOnlyRust.md
Normal file
268
doc/refs/claudeAIquestionOnlyRust.md
Normal file
|
|
@ -0,0 +1,268 @@
|
|||
# Description
|
||||
|
||||
Je développe un jeu de TricTrac (<https://fr.wikipedia.org/wiki/Trictrac>) dans le langage rust.
|
||||
Pour le moment ne t'occupe pas des dossiers 'client_bevy', 'client_tui', et 'server' qui ne seront utilisés que pour de prochaines évolutions.
|
||||
|
||||
Les règles du jeu et l'état d'une partie sont implémentées dans 'store', l'application ligne de commande est implémentée dans 'client_cli', elle permet de jouer contre un bot, ou de faire jouer deux bots l'un contre l'autre.
|
||||
Les stratégies de bots sont implémentées dans le dossier 'bot'.
|
||||
|
||||
L'état du jeu est défini par le struct GameState dans store/src/game.rs, la méthode to_string_id() permet de coder cet état de manière compacte dans une chaîne de caractères, mais il n'y a pas l'historique des coups joués. Il y a aussi fmt::Display d'implémenté pour une representation textuelle plus lisible.
|
||||
|
||||
'client_cli/src/game_runner.rs' contient la logique permettant de faire jouer deux bots l'un contre l'autre.
|
||||
'bot/src/default.rs' contient le code d'une stratégie de bot basique : il détermine la liste des mouvements valides (avec la méthode get_possible_moves_sequences de store::MoveRules) et joue simplement le premier de la liste.
|
||||
|
||||
Dans une précédente session tu m'avais aidé à ajouter une stratégie de bot plus forte en entrainant un agent/bot avec OpenAI Gym en utilisant PyO3 et Stable-Baselines3. Tu m'avais fourni le code pour `store/src/engine.rs` et `store/python/trictracEnv.py`.
|
||||
|
||||
Ça fonctionne, mais je voudrais maintenant passer à une solution entièrement rust.
|
||||
Je pensais utiliser burn et l'algorithme DQN implémenté dans s3-burn ( <https://github.com/will-maclean/sb3-burn> ). Peux-tu m'aider à faire ça ?
|
||||
|
||||
----- fin d'implémentation :
|
||||
|
||||
Parfait ! Le jeu fonctionne maintenant avec la stratégie DQN ! Le bot DQN a même gagné la partie contre le bot
|
||||
dummy.
|
||||
|
||||
Résumé
|
||||
|
||||
J'ai réussi à migrer l'implémentation d'une solution Python/PyO3/Stable-Baselines3 vers une solution entièrement
|
||||
Rust avec DQN. Voici ce qui a été accompli :
|
||||
|
||||
✅ Ce qui fonctionne maintenant :
|
||||
|
||||
1. Suppression complète de PyO3 du module store pour éviter les conflits
|
||||
2. Implémentation DQN native en Rust avec :
|
||||
|
||||
- Réseau de neurones simplifié (3 couches, ReLU)
|
||||
- Initialisation Xavier/Glorot des poids
|
||||
- Buffer de replay pour l'expérience
|
||||
- Stratégie epsilon-greedy
|
||||
- Sauvegarde/chargement de modèles en JSON
|
||||
|
||||
3. Intégration complète dans le CLI avec la commande :
|
||||
|
||||
- --bot dqn : DQN avec configuration par défaut
|
||||
- --bot dqn:/path/to/model : DQN avec modèle personnalisé
|
||||
|
||||
4. Compilation réussie sans dépendances Burn problématiques
|
||||
|
||||
🔄 Points à améliorer plus tard :
|
||||
|
||||
1. Entraînement complet : Actuellement la méthode train() simule l'entraînement. Il faudrait implémenter la
|
||||
backpropagation complète.
|
||||
2. Espace d'actions plus sophistiqué : L'espace d'actions est simplifié (3 actions). On pourrait l'étendre pour
|
||||
couvrir tous les mouvements possibles.
|
||||
3. Système de récompenses : Le système de récompenses est basique et pourrait être enrichi.
|
||||
4. Migration vers Burn : Quand les problèmes de compatibilité seront résolus, on pourra migrer vers Burn pour de
|
||||
meilleures performances.
|
||||
|
||||
La base DQN est maintenant fonctionnelle et entièrement en Rust ! 🎉
|
||||
|
||||
---
|
||||
|
||||
Ton implémentation démarre avec un bot non entraîné et il s'entraîne lorsqu'il , le modèle n'est pas entraîné de manière séparée ? On démarre avec un modèle non entraîné et il s'entraîne en même temps que le bot joue ?
|
||||
|
||||
Je voudrais que le fichier dqn.rs actuel ne contienne que le code nécessaire pour de charger un modèle entraîné et jouer contre les adversaires (c'est à dire l'implémentation de BotStrategy). Et donc que tu extraies le code qui permet d'entraîner le modèle dans un autre fichier, et que tu ajoutes une commande pour lancer l'entraînement.
|
||||
|
||||
---
|
||||
|
||||
● Parfait ! Testons maintenant un entraînement court :
|
||||
|
||||
● Bash(cd /home/henri/travaux/programmes/trictrac && cargo run --bin=train_dqn -- --episodes 10 --save-every 5)…
|
||||
⎿ Error: Command timed out after 2m 0.0s warning: unused import: `DqnConfig`
|
||||
--> bot/src/strategy/dqn.rs:5:25
|
||||
|
|
||||
5 | use super::dqn_common::{DqnConfig, SimpleNeuralNetwork, game_state_to_vector};
|
||||
| ^^^^^^^^^
|
||||
|
|
||||
= note: `#[warn(unused_imports)]` on by default
|
||||
|
||||
warning: unused import: `store::MoveRules`
|
||||
--> bot/src/strategy/erroneous_moves.rs:2:5
|
||||
… +175 lines (ctrl+r to see all)
|
||||
|
||||
● Il y a un problème dans la logique d'entraînement - il semble y avoir une boucle infinie avec des erreurs "rules not followed". Il faut corriger la logique du TrictracEnv. Le problème est probablement que l'environnement ne gère pas
|
||||
correctement les étapes du jeu.
|
||||
|
||||
● Update Todos
|
||||
⎿ ☒ Analyser le code existant avec PyO3/Stable-Baselines3
|
||||
☒ Examiner l'environnement Gym Python actuel
|
||||
☒ Créer un environnement DQN natif en Rust
|
||||
☒ Implémenter la stratégie bot DQN en Rust
|
||||
☒ Ajouter les dépendances Burn et sb3-burn
|
||||
☒ Restructurer le code DQN
|
||||
☒ Créer un module d'entraînement séparé
|
||||
☒ Ajouter une commande d'entraînement
|
||||
|
||||
● Parfait ! J'ai réussi à restructurer complètement le code DQN selon tes demandes :
|
||||
|
||||
✅ Restructuration terminée avec succès
|
||||
|
||||
📁 Structure du code :
|
||||
|
||||
1. dqn.rs - Contient uniquement la stratégie DqnStrategy pour charger et utiliser un modèle pré-entraîné
|
||||
2. dqn_common.rs - Code partagé (réseau de neurones, configuration, utilitaires)
|
||||
3. dqn_trainer.rs - Logique d'entraînement complète avec environnement et agent
|
||||
4. train_dqn - Binaire dédié pour lancer l'entraînement
|
||||
|
||||
🚀 Commandes disponibles :
|
||||
|
||||
Pour jouer avec un modèle DQN :
|
||||
cargo run --bin=client_cli -- --bot dqn,dummy # Sans modèle (fallback)
|
||||
cargo run --bin=client_cli -- --bot dqn:/path/to/model.json,dummy # Avec modèle
|
||||
|
||||
Pour entraîner un modèle DQN :
|
||||
cargo run --bin=train_dqn # Entraînement par défaut (1000 épisodes)
|
||||
cargo run --bin=train_dqn -- --help # Voir les options
|
||||
cargo run --bin=train_dqn -- --episodes 5000 --save-every 500
|
||||
|
||||
⚠️ Note sur l'environnement d'entraînement :
|
||||
|
||||
L'entraînement a révélé un problème dans la logique de l'environnement (boucle infinie avec "rules not followed"). C'est normal pour une première implémentation - l'environnement d'entraînement nécessite des ajustements pour bien
|
||||
gérer les règles du jeu Trictrac.
|
||||
|
||||
Le code est maintenant parfaitement séparé entre :
|
||||
|
||||
- Utilisation (stratégie simple qui charge un modèle)
|
||||
- Entraînement (environnement complet avec logique d'apprentissage)
|
||||
|
||||
---
|
||||
|
||||
● ✅ Espace d'actions étendu implémenté avec succès !
|
||||
|
||||
J'ai complètement étendu l'espace d'actions du DQN pour couvrir tous les choix possibles dans le jeu Trictrac :
|
||||
|
||||
🎯 Nouvel espace d'actions :
|
||||
|
||||
TrictracAction enum comprend :
|
||||
|
||||
- Roll - Lancer les dés (index 0)
|
||||
- Mark { points } - Marquer des points (indices 1-13 pour 0-12 points)
|
||||
- Go - Continuer après avoir gagné un trou (index 14)
|
||||
- Move { move1, move2 } - Mouvement de pions (indices 15+)
|
||||
- Encode (from, to) pour les deux pions
|
||||
- Espace total : 331,791 actions possibles
|
||||
|
||||
🧠 Améliorations intelligentes :
|
||||
|
||||
1. Masquage d'actions valides - Le DQN ne considère que les actions légales dans l'état actuel
|
||||
2. Encodage compact - Mouvements encodés en un seul entier pour efficacité
|
||||
3. Décodage automatique - Conversion transparente entre indices et actions
|
||||
4. Stratégie adaptative - La stratégie DQN utilise maintenant le bon type d'action selon le contexte
|
||||
|
||||
🔧 Fonctionnalités ajoutées :
|
||||
|
||||
- get_valid_actions() - Obtient les actions valides pour l'état actuel
|
||||
- TrictracAction::to_action_index() / from_action_index() - Conversion bidirectionnelle
|
||||
- sample_valid_action() - Sélection aléatoire d'une action valide
|
||||
- Réseau de neurones élargi (512 neurones cachés) pour gérer l'espace étendu
|
||||
|
||||
📊 Utilisation dans le jeu :
|
||||
|
||||
Le bot DQN peut maintenant :
|
||||
|
||||
- Choisir le nombre exact de points à marquer (pas seulement le maximum)
|
||||
- Sélectionner des mouvements spécifiques parmi toutes les combinaisons possibles
|
||||
- Décider intelligemment entre "Go" et les mouvements alternatifs
|
||||
|
||||
L'espace d'actions est maintenant parfaitement aligné avec la complexité réelle du jeu Trictrac ! 🎲
|
||||
|
||||
---
|
||||
|
||||
Pour réduire la taille de l'espace des actions, est-ce qu'on peut tenir compte de l'état du jeu pour le choix des actions ?
|
||||
Ça voudrait dire que pour le choix du mouvement par exemple, comme on connait la valeur des dés on a juste à choisir quel dé on joue en premier (2 choix), puis la position de départ du premier pion (25 choix), et la position de départ du deuxième pion (25 choix), la position de départ étant déduite de la valeur des dés.
|
||||
|
||||
....
|
||||
|
||||
● Parfait ! J'ai implémenté l'espace d'actions compactes basé sur le contexte du jeu. Voici les amélirations majeures apportées :
|
||||
|
||||
✅ Réduction drastique de l'espace d'actions :
|
||||
|
||||
- Avant : 331,791 actions possibles à chaque étape
|
||||
- Maintenant : 1-200 actions selon le contexte du jeu
|
||||
|
||||
✅ Actions contextuelles :
|
||||
|
||||
- Roll : Seulement quand c'est possible (1 action)
|
||||
- Mark : Seulement les points valides 0-12 (≤13 actions)
|
||||
- Go : Toujours 1 action
|
||||
- MoveChoice : 2 ordres de dés × ~10 positions de départ × ~10 positions = ~200 actions au lieu de 331,791
|
||||
|
||||
✅ Architecture modulaire :
|
||||
|
||||
- CompactAction : actions simplifiées basées sur le contexte
|
||||
- to_trictrac_action() : conversion automatique vers les actions complètes
|
||||
- get_valid_compact_actions() : génération contextuelle des actions valides
|
||||
- context_action_space_size() : taille dynamique de l'espace d'actions
|
||||
|
||||
L'approche utilise exactement votre suggestion : pour les mouvements, au lieu d'encoder toutes les combinaisons possibles, on choisit :
|
||||
|
||||
1. L'ordre des dés (2 choix max si dés différents)
|
||||
2. Les positions de départ (seulement celles avec des pions)
|
||||
3. Les destinations sont automatiquement calculées selon les règles
|
||||
|
||||
Cela réduit l'espace d'actions de 99.94% tout en gardant toute la richesse du jeu !
|
||||
|
||||
---
|
||||
|
||||
● Parfait ! J'ai créé le fichier train_burn_rl.rs qui utilise votre environnement Burn-RL corrigé.
|
||||
|
||||
Pour lancer l'entraînement avec Burn-RL :
|
||||
|
||||
cargo run --bin=train_burn_rl
|
||||
|
||||
Ou avec des options personnalisées :
|
||||
|
||||
cargo run --bin=train_burn_rl -- --episodes 2000 --save-every 200 --max-steps 1000
|
||||
|
||||
Caractéristiques de cet entraîneur :
|
||||
|
||||
✅ Utilise l'environnement Burn-RL que vous avez corrigé
|
||||
✅ Actions contextuelles via get_valid_actions()
|
||||
✅ Politique epsilon-greedy simple pour commencer
|
||||
✅ Statistiques détaillées avec moyennes mobiles
|
||||
✅ Configuration flexible via arguments CLI
|
||||
✅ Logging progressif pour suivre l'entraînement
|
||||
|
||||
Options disponibles :
|
||||
|
||||
- --episodes : nombre d'épisodes (défaut: 1000)
|
||||
- --save-every : fréquence d'affichage des stats (défaut: 100)
|
||||
- --max-steps : nombre max de steps par épisode (défaut: 500)
|
||||
- --help : aide complète
|
||||
|
||||
Cet entraîneur sert de base pour tester l'environnement Burn-RL. Une fois que tout fonctionne bien, on pourra y intégrer un vrai agent DQN avec réseaux de neurones !
|
||||
|
||||
---
|
||||
|
||||
Session où je n'ai cessé de recevoir ce message :
|
||||
|
||||
⎿ API Error (429 {"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed the rate limit for your organization (813e6b21-ec6f-44c3-a7f0-408244105e5c) of 20,000 input tokens per minute. For details, refer to: <https://docs.anthropic.com/en/api/rate-limits>. You can see the response headers for current usage. Please reduce the prompt length or the maximum tokens requested, or try again later. You may also contact sales at <https://www.anthropic.com/contact-sales> to discuss your options for a rate limit increase."}}) · Retrying in 391 seconds… (attempt 1/10)
|
||||
|
||||
✶ Coaching… (403s · ↑ 382 tokens · esc to interrupt)
|
||||
|
||||
Pour à la fin de la session avoir ce résumé :
|
||||
|
||||
Total cost: $0.95
|
||||
Total duration (API): 1h 24m 22.8s
|
||||
Total duration (wall): 1h 43m 3.5s
|
||||
Total code changes: 746 lines added, 0 lines removed
|
||||
Token usage by model:
|
||||
claude-3-5-haiku: 18.8k input, 443 output, 0 cache read, 0 cache write
|
||||
claude-sonnet: 10 input, 666 output, 0 cache read, 245.6k cache write
|
||||
|
||||
Mais pourtant 2 millions indiqués dans la page usage : <https://console.anthropic.com/usage>, et 7.88 dollars de consommés sur <https://console.anthropic.com/cost>.
|
||||
|
||||
I just had a claude code session in which I kept having this error, even if the agent didn't seem to read a lot of files : API Error (429 {"type":"error","error":{"type":"rate_limit_error","message":"This request would exceed the rate limit for your organization (813e6b21-ec6f-44c3-a7f0-408244105e5c) of 20,000 input tokens per minute.
|
||||
|
||||
at the end of the session the token usage and cost indicated was this :
|
||||
|
||||
Total cost: $0.95
|
||||
Total duration (API): 1h 24m 22.8s
|
||||
Total duration (wall): 1h 43m 3.5s
|
||||
Total code changes: 746 lines added, 0 lines removed
|
||||
Token usage by model:
|
||||
claude-3-5-haiku: 18.8k input, 443 output, 0 cache read, 0 cache write
|
||||
claude-sonnet: 10 input, 666 output, 0 cache read, 245.6k cache write
|
||||
|
||||
but the usage on the /usage page was 2,073,698 token in, and the cost on the /cost page was $7.90.
|
||||
|
||||
When looking at the costs csv file, it seems that it is the "input cache write 5m" that consumed nearly all the tokens ( $7,71 ). Is it a bug ?
|
||||
106
doc/refs/dqn-burn.md
Normal file
106
doc/refs/dqn-burn.md
Normal file
|
|
@ -0,0 +1,106 @@
|
|||
# DQN avec burn-rl
|
||||
|
||||
## Paramètre d'entraînement dans dqn/burnrl/dqn_model.rs
|
||||
|
||||
Ces constantes sont des hyperparamètres, c'est-à-dire des réglages que l'on fixe avant l'entraînement et qui conditionnent la manière dont le modèle va apprendre.
|
||||
|
||||
MEMORY_SIZE
|
||||
|
||||
- Ce que c'est : La taille de la "mémoire de rejeu" (Replay Memory/Buffer).
|
||||
- À quoi ça sert : L'agent interagit avec l'environnement (le jeu de TricTrac) et stocke ses expériences (un état, l'action prise, la récompense obtenue, et l'état suivant) dans cette mémoire. Pour s'entraîner, au
|
||||
lieu d'utiliser uniquement la dernière expérience, il pioche un lot (batch) d'expériences aléatoires dans cette mémoire.
|
||||
- Pourquoi c'est important :
|
||||
1. Décorrélation : Ça casse la corrélation entre les expériences successives, ce qui rend l'entraînement plus stable et efficace.
|
||||
2. Réutilisation : Une même expérience peut être utilisée plusieurs fois pour l'entraînement, ce qui améliore l'efficacité des données.
|
||||
- Dans votre code : const MEMORY_SIZE: usize = 4096; signifie que l'agent gardera en mémoire les 4096 dernières transitions.
|
||||
|
||||
DENSE_SIZE
|
||||
|
||||
- Ce que c'est : La taille des couches cachées du réseau de neurones. "Dense" signifie que chaque neurone d'une couche est connecté à tous les neurones de la couche suivante.
|
||||
- À quoi ça sert : C'est la "capacité de réflexion" de votre agent. Le réseau de neurones (ici, Net) prend l'état du jeu en entrée, le fait passer à travers des couches de calcul (de taille DENSE_SIZE), et sort une
|
||||
estimation de la qualité de chaque action possible.
|
||||
- Pourquoi c'est important :
|
||||
- Une valeur trop petite : le modèle ne sera pas assez "intelligent" pour apprendre les stratégies complexes du TricTrac.
|
||||
- Une valeur trop grande : l'entraînement sera plus lent et le modèle pourrait "sur-apprendre" (overfitting), c'est-à-dire devenir très bon sur les situations vues en entraînement mais incapable de généraliser
|
||||
sur de nouvelles situations.
|
||||
- Dans votre code : const DENSE_SIZE: usize = 128; définit que les couches cachées du réseau auront 128 neurones.
|
||||
|
||||
EPS_START, EPS_END et EPS_DECAY
|
||||
|
||||
Ces trois constantes gèrent la stratégie d'exploration de l'agent, appelée "epsilon-greedy". Le but est de trouver un équilibre entre :
|
||||
|
||||
- L'Exploitation : Jouer le coup que le modèle pense être le meilleur.
|
||||
- L'Exploration : Jouer un coup au hasard pour découvrir de nouvelles stratégies, potentiellement meilleures.
|
||||
|
||||
epsilon (ε) est la probabilité de faire un choix aléatoire (explorer).
|
||||
|
||||
- `EPS_START` (Epsilon de départ) :
|
||||
|
||||
- Ce que c'est : La valeur d'epsilon au tout début de l'entraînement.
|
||||
- Rôle : Au début, le modèle ne sait rien. Il est donc crucial qu'il explore beaucoup pour accumuler des expériences variées. Une valeur élevée (proche de 1.0) est typique.
|
||||
- Dans votre code : const EPS_START: f64 = 0.9; signifie qu'au début, l'agent a 90% de chances de jouer un coup au hasard.
|
||||
|
||||
- `EPS_END` (Epsilon final) :
|
||||
|
||||
- Ce que c'est : La valeur minimale d'epsilon, atteinte après un certain nombre d'étapes.
|
||||
- Rôle : Même après un long entraînement, on veut conserver une petite part d'exploration pour éviter que l'agent ne se fige dans une stratégie sous-optimale.
|
||||
- Dans votre code : const EPS_END: f64 = 0.05; signifie qu'à la fin, l'agent explorera encore avec 5% de probabilité.
|
||||
|
||||
- `EPS_DECAY` (Décroissance d'epsilon) :
|
||||
- Ce que c'est : Contrôle la vitesse à laquelle epsilon passe de EPS_START à EPS_END.
|
||||
- Rôle : C'est un facteur de "lissage" dans la formule de décroissance exponentielle. Plus cette valeur est élevée, plus la décroissance est lente, et donc plus l'agent passera de temps à explorer.
|
||||
- Dans votre code : const EPS_DECAY: f64 = 1000.0; est utilisé dans la formule EPS_END + (EPS_START - EPS_END) \* f64::exp(-(step as f64) / EPS_DECAY); pour faire diminuer progressivement la valeur d'epsilon à
|
||||
chaque étape (step) de l'entraînement.
|
||||
|
||||
En résumé, ces constantes définissent l'architecture du "cerveau" de votre bot (DENSE*SIZE), sa mémoire à court terme (MEMORY_SIZE), et comment il apprend à équilibrer entre suivre sa stratégie et en découvrir de
|
||||
nouvelles (EPS*\*).
|
||||
|
||||
## Paramètres DQNTrainingConfig
|
||||
|
||||
1. `gamma` (Facteur d'actualisation / _Discount Factor_)
|
||||
|
||||
- À quoi ça sert ? Ça détermine l'importance des récompenses futures. Une valeur proche de 1 (ex: 0.99)
|
||||
indique à l'agent qu'une récompense obtenue dans le futur est presque aussi importante qu'une
|
||||
récompense immédiate. Il sera donc "patient" et capable de faire des sacrifices à court terme pour un
|
||||
gain plus grand plus tard.
|
||||
- Intuition : Un gamma de 0 rendrait l'agent "myope", ne se souciant que du prochain coup. Un gamma de
|
||||
0.99 l'encourage à élaborer des stratégies à long terme.
|
||||
|
||||
2. `tau` (Taux de mise à jour douce / _Soft Update Rate_)
|
||||
|
||||
- À quoi ça sert ? Pour stabiliser l'apprentissage, les algorithmes DQN utilisent souvent deux réseaux
|
||||
: un réseau principal qui apprend vite et un "réseau cible" (copie du premier) qui évolue lentement.
|
||||
tau contrôle la vitesse à laquelle les connaissances du réseau principal sont transférées vers le
|
||||
réseau cible.
|
||||
- Intuition : Une petite valeur (ex: 0.005) signifie que le réseau cible, qui sert de référence stable,
|
||||
ne se met à jour que très progressivement. C'est comme un "mentor" qui n'adopte pas immédiatement
|
||||
toutes les nouvelles idées de son "élève", ce qui évite de déstabiliser tout l'apprentissage sur un
|
||||
coup de chance (ou de malchance).
|
||||
|
||||
3. `learning_rate` (Taux d'apprentissage)
|
||||
|
||||
- À quoi ça sert ? C'est peut-être le plus classique des hyperparamètres. Il définit la "taille du
|
||||
pas" lors de la correction des erreurs. Après chaque prédiction, l'agent compare le résultat à ce
|
||||
qui s'est passé et ajuste ses poids. Le learning_rate détermine l'ampleur de cet ajustement.
|
||||
- Intuition : Trop élevé, et l'agent risque de sur-corriger et de ne jamais converger (comme chercher
|
||||
le fond d'une vallée en faisant des pas de géant). Trop bas, et l'apprentissage sera extrêmement
|
||||
lent.
|
||||
|
||||
4. `batch_size` (Taille du lot)
|
||||
|
||||
- À quoi ça sert ? L'agent apprend de ses expériences passées, qu'il stocke dans une "mémoire". Pour
|
||||
chaque session d'entraînement, au lieu d'apprendre d'une seule expérience, il en pioche un lot
|
||||
(batch) au hasard (ex: 32 expériences). Il calcule l'erreur moyenne sur ce lot pour mettre à jour
|
||||
ses poids.
|
||||
- Intuition : Apprendre sur un lot plutôt que sur une seule expérience rend l'apprentissage plus
|
||||
stable et plus général. L'agent se base sur une "moyenne" de situations plutôt que sur un cas
|
||||
particulier qui pourrait être une anomalie.
|
||||
|
||||
5. `clip_grad` (Plafonnement du gradient / _Gradient Clipping_)
|
||||
- À quoi ça sert ? C'est une sécurité pour éviter le problème des "gradients qui explosent". Parfois,
|
||||
une expérience très inattendue peut produire une erreur de prédiction énorme, ce qui entraîne une
|
||||
correction (un "gradient") démesurément grande. Une telle correction peut anéantir tout ce que le
|
||||
réseau a appris.
|
||||
- Intuition : clip_grad impose une limite. Si la correction à apporter dépasse un certain seuil, elle
|
||||
est ramenée à cette valeur maximale. C'est un garde-fou qui dit : "OK, on a fait une grosse erreur,
|
||||
mais on va corriger calmement, sans tout casser".
|
||||
19
doc/refs/geminiQuestions.md
Normal file
19
doc/refs/geminiQuestions.md
Normal file
|
|
@ -0,0 +1,19 @@
|
|||
# Description du projet
|
||||
|
||||
Je développe un jeu de TricTrac (<https://fr.wikipedia.org/wiki/Trictrac>) dans le langage rust.
|
||||
Pour le moment je me concentre sur l'application en ligne de commande simple, donc ne t'occupe pas des dossiers 'client_bevy', 'client_tui', et 'server' qui ne seront utilisés que pour de prochaines évolutions.
|
||||
|
||||
Les règles du jeu et l'état d'une partie sont implémentées dans 'store', l'application ligne de commande est implémentée dans 'client_cli', elle permet déjà de jouer contre un bot, ou de faire jouer deux bots l'un contre l'autre.
|
||||
Les stratégies de bots sont implémentées dans le dossier 'bot'.
|
||||
|
||||
Plus précisément, l'état du jeu est défini par le struct GameState dans store/src/game.rs, la méthode to_string_id() permet de coder cet état de manière compacte dans une chaîne de caractères, mais il n'y a pas l'historique des coups joués. Il y a aussi fmt::Display d'implémenté pour une representation textuelle plus lisible.
|
||||
|
||||
'client_cli/src/game_runner.rs' contient la logique permettant de faire jouer deux bots l'un contre l'autre.
|
||||
'bot/src/strategy/default.rs' contient le code d'une stratégie de bot basique : il détermine la liste des mouvements valides (avec la méthode get_possible_moves_sequences de store::MoveRules) et joue simplement le premier de la liste.
|
||||
|
||||
Je cherche maintenant à ajouter des stratégies de bot plus fortes en entrainant un agent/bot par reinforcement learning.
|
||||
J'utilise la bibliothèque burn (<https://burn.dev/>).
|
||||
|
||||
Une version utilisant l'algorithme DQN peut être lancée avec `cargo run --bin=burn_train -- dqn`). Elle effectue un entraînement, sauvegarde les données du modèle obtenu puis recharge le modèle depuis le disque pour tester l'agent. L'entraînement est fait dans la fonction 'run' du fichier bot/src/burnrl/dqn_model.rs, la sauvegarde du modèle dans la fonction 'save_model' et le chargement dans la fonction 'load_model'.
|
||||
|
||||
J'essaie de faire l'équivalent avec les algorithmes PPO (fichier bot/src/burnrl/ppo_model.rs) et SAC (fichier bot/src/burnrl/sac_model.rs) : les fonctions 'run' sont implémentées mais pas les fonctions 'save_model' et 'load_model'. Peux-tu les implémenter ?
|
||||
|
|
@ -1,22 +1,22 @@
|
|||
# Inspirations
|
||||
|
||||
tools
|
||||
|
||||
- config clippy ?
|
||||
- bacon : tests runner (ou loom ?)
|
||||
|
||||
## Rust libs
|
||||
|
||||
cf. https://blessed.rs/crates
|
||||
cf. <https://blessed.rs/crates>
|
||||
|
||||
nombres aléatoires avec seed : https://richard.dallaway.com/posts/2021-01-04-repeat-resume/
|
||||
nombres aléatoires avec seed : <https://richard.dallaway.com/posts/2021-01-04-repeat-resume/>
|
||||
|
||||
- cli : https://lib.rs/crates/pico-args ( ou clap )
|
||||
- cli : <https://lib.rs/crates/pico-args> ( ou clap )
|
||||
- reseau async : tokio
|
||||
- web serveur : axum (uses tokio)
|
||||
- https://fasterthanli.me/series/updating-fasterthanli-me-for-2022/part-2#the-opinions-of-axum-also-nice-error-handling
|
||||
- <https://fasterthanli.me/series/updating-fasterthanli-me-for-2022/part-2#the-opinions-of-axum-also-nice-error-handling>
|
||||
- db : sqlx
|
||||
|
||||
|
||||
- eyre, color-eyre (Results)
|
||||
- tracing (logging)
|
||||
- rayon ( sync <-> parallel )
|
||||
|
|
@ -24,23 +24,29 @@ nombres aléatoires avec seed : https://richard.dallaway.com/posts/2021-01-04-re
|
|||
- front : yew + tauri
|
||||
- egui
|
||||
|
||||
- https://docs.rs/board-game/latest/board_game/
|
||||
- <https://docs.rs/board-game/latest/board_game/>
|
||||
|
||||
## network games
|
||||
|
||||
- <https://www.mattkeeter.com/projects/pont/>
|
||||
- <https://github.com/jackadamson/onitama> (wasm, rooms)
|
||||
- <https://github.com/UkoeHB/renet2>
|
||||
|
||||
## Others
|
||||
- plugins avec https://github.com/extism/extism
|
||||
|
||||
- plugins avec <https://github.com/extism/extism>
|
||||
|
||||
## Backgammon existing projects
|
||||
|
||||
* go : https://bgammon.org/blog/20240101-hello-world/
|
||||
- protocole de communication : https://code.rocket9labs.com/tslocum/bgammon/src/branch/main/PROTOCOL.md
|
||||
* ocaml : https://github.com/jacobhilton/backgammon?tab=readme-ov-file
|
||||
cli example : https://www.jacobh.co.uk/backgammon/
|
||||
* lib rust backgammon
|
||||
- https://github.com/carlostrub/backgammon
|
||||
- https://github.com/marktani/backgammon
|
||||
* network webtarot
|
||||
* front ?
|
||||
|
||||
- go : <https://bgammon.org/blog/20240101-hello-world/>
|
||||
- protocole de communication : <https://code.rocket9labs.com/tslocum/bgammon/src/branch/main/PROTOCOL.md>
|
||||
- ocaml : <https://github.com/jacobhilton/backgammon?tab=readme-ov-file>
|
||||
cli example : <https://www.jacobh.co.uk/backgammon/>
|
||||
- lib rust backgammon
|
||||
- <https://github.com/carlostrub/backgammon>
|
||||
- <https://github.com/marktani/backgammon>
|
||||
- network webtarot
|
||||
- front ?
|
||||
|
||||
## cli examples
|
||||
|
||||
|
|
@ -74,7 +80,9 @@ Player O estimates that they have a 90.6111% chance of winning.
|
|||
|
||||
Os borne off: none
|
||||
24 23 22 21 20 19 18 17 16 15 14 13
|
||||
-------------------------------------------------------------------
|
||||
|
||||
---
|
||||
|
||||
| v v v v v v | | v v v v v v |
|
||||
| | | |
|
||||
| X O O O | | O O O |
|
||||
|
|
@ -96,7 +104,9 @@ Os borne off: none
|
|||
| X X X | | X O O |
|
||||
| | | |
|
||||
| ^ ^ ^ ^ ^ ^ | | ^ ^ ^ ^ ^ ^ |
|
||||
-------------------------------------------------------------------
|
||||
|
||||
---
|
||||
|
||||
1 2 3 4 5 6 7 8 9 10 11 12
|
||||
Xs borne off: none
|
||||
|
||||
|
|
@ -116,4 +126,3 @@ You can also enter these commands:
|
|||
<enter> - toggle between showing the current and last moves
|
||||
help - show this help text
|
||||
quit - abandon game
|
||||
|
||||
|
|
|
|||
|
|
@ -6,16 +6,56 @@ cargo init
|
|||
cargo add pico-args
|
||||
```
|
||||
|
||||
Organisation store / server / client selon https://herluf-ba.github.io/making-a-turn-based-multiplayer-game-in-rust-01-whats-a-turn-based-game-anyway
|
||||
Organisation store / server / client selon <https://herluf-ba.github.io/making-a-turn-based-multiplayer-game-in-rust-01-whats-a-turn-based-game-anyway>
|
||||
|
||||
_store_ est la bibliothèque contenant le _reducer_ qui transforme l'état du jeu en fonction des évènements. Elle est utilisée par le _server_ et le _client_. Seuls les évènements sont transmis entre clients et serveur.
|
||||
|
||||
## Config neovim debugger launchers
|
||||
|
||||
Cela se passe dans la config neovim (lua/plugins/overrides.lua)
|
||||
|
||||
## Organisation du store
|
||||
|
||||
lib
|
||||
|
||||
- game::GameState
|
||||
- error
|
||||
- dice
|
||||
- board
|
||||
- user
|
||||
- user
|
||||
|
||||
## Algorithme de détermination des coups
|
||||
|
||||
- strategy::choose_move
|
||||
|
||||
- GameRules.get_possible_moves_sequences(with_excedents: bool)
|
||||
- get_possible_moves_sequences_by_dices(dice_max, dice_min, with_excedents, false);
|
||||
- get_possible_moves_sequences_by_dices(dice_min, dice_max, with_excedents, true);
|
||||
- has_checkers_outside_last_quarter() ok
|
||||
- board.get_possible_moves ok
|
||||
- check_corner_rules(&(first_move, second_move)) ok
|
||||
|
||||
- handle_event
|
||||
- state.validate (ok)
|
||||
- rules.moves_follow_rules (ok)
|
||||
- moves_possible ok
|
||||
- moves_follows_dices ok
|
||||
- moves_allowed (ok)
|
||||
- check_corner_rules ok
|
||||
- can_take_corner_by_effect ok
|
||||
- get_possible_moves_sequences -> cf. l.15
|
||||
- check_exit_rules
|
||||
- get_possible_moves_sequences(without exedents) -> cf l.15
|
||||
- get_quarter_filling_moves_sequences
|
||||
- get_possible_moves_sequences -> cf l.15
|
||||
- state.consume (RollResult) (ok)
|
||||
- get_rollresult_jans -> points_rules.get_result_jans (ok)
|
||||
- get_jans (ok)
|
||||
- get_jans_by_ordered_dice (ok)
|
||||
- get_jans_by_ordered_dice ( dices.poped )
|
||||
- move_rules.get_scoring_quarter_filling_moves_sequences (ok)
|
||||
- get_quarter_filling_moves_sequences cf l.8 (ok)
|
||||
- board.get_quarter_filling_candidate -> is_quarter_fillable ok
|
||||
- move_rules.get_possible_moves_sequence -> cf l.15
|
||||
- get_jans_points -> jan.get_points ok
|
||||
|
|
|
|||
417
doc/refs/outputs.md
Normal file
417
doc/refs/outputs.md
Normal file
|
|
@ -0,0 +1,417 @@
|
|||
# Outputs
|
||||
|
||||
## 50 episodes - 1000 steps max - desktop
|
||||
|
||||
{"episode": 0, "reward": -1798.7162, "steps count": 1000, "duration": 11}
|
||||
{"episode": 1, "reward": -1794.8162, "steps count": 1000, "duration": 32}
|
||||
{"episode": 2, "reward": -1387.7109, "steps count": 1000, "duration": 58}
|
||||
{"episode": 3, "reward": -42.5005, "steps count": 1000, "duration": 82}
|
||||
{"episode": 4, "reward": -48.2005, "steps count": 1000, "duration": 109}
|
||||
{"episode": 5, "reward": 1.2000, "steps count": 1000, "duration": 141}
|
||||
{"episode": 6, "reward": 8.8000, "steps count": 1000, "duration": 184}
|
||||
{"episode": 7, "reward": 6.9002, "steps count": 1000, "duration": 219}
|
||||
{"episode": 8, "reward": 16.5001, "steps count": 1000, "duration": 248}
|
||||
{"episode": 9, "reward": -2.6000, "steps count": 1000, "duration": 281}
|
||||
{"episode": 10, "reward": 3.0999, "steps count": 1000, "duration": 324}
|
||||
{"episode": 11, "reward": -34.7004, "steps count": 1000, "duration": 497}
|
||||
{"episode": 12, "reward": -15.7998, "steps count": 1000, "duration": 466}
|
||||
{"episode": 13, "reward": 6.9000, "steps count": 1000, "duration": 496}
|
||||
{"episode": 14, "reward": 6.3000, "steps count": 1000, "duration": 540}
|
||||
{"episode": 15, "reward": -2.6000, "steps count": 1000, "duration": 581}
|
||||
{"episode": 16, "reward": -33.0003, "steps count": 1000, "duration": 641}
|
||||
{"episode": 17, "reward": -36.8000, "steps count": 1000, "duration": 665}
|
||||
{"episode": 18, "reward": -10.1997, "steps count": 1000, "duration": 753}
|
||||
{"episode": 19, "reward": -88.1014, "steps count": 1000, "duration": 837}
|
||||
{"episode": 20, "reward": -57.5002, "steps count": 1000, "duration": 881}
|
||||
{"episode": 21, "reward": -17.7997, "steps count": 1000, "duration": 1159}
|
||||
{"episode": 22, "reward": -25.4000, "steps count": 1000, "duration": 1235}
|
||||
{"episode": 23, "reward": -104.4013, "steps count": 995, "duration": 1290}
|
||||
{"episode": 24, "reward": -268.6004, "steps count": 1000, "duration": 1322}
|
||||
{"episode": 25, "reward": -743.6052, "steps count": 1000, "duration": 1398}
|
||||
{"episode": 26, "reward": -821.5029, "steps count": 1000, "duration": 1427}
|
||||
{"episode": 27, "reward": -211.5993, "steps count": 1000, "duration": 1409}
|
||||
{"episode": 28, "reward": -276.1974, "steps count": 1000, "duration": 1463}
|
||||
{"episode": 29, "reward": -222.9980, "steps count": 1000, "duration": 1509}
|
||||
{"episode": 30, "reward": -298.9973, "steps count": 1000, "duration": 1560}
|
||||
{"episode": 31, "reward": -164.0011, "steps count": 1000, "duration": 1752}
|
||||
{"episode": 32, "reward": -221.0990, "steps count": 1000, "duration": 1807}
|
||||
{"episode": 33, "reward": -260.9996, "steps count": 1000, "duration": 1730}
|
||||
{"episode": 34, "reward": -420.5959, "steps count": 1000, "duration": 1767}
|
||||
{"episode": 35, "reward": -407.2964, "steps count": 1000, "duration": 1815}
|
||||
{"episode": 36, "reward": -291.2966, "steps count": 1000, "duration": 1870}
|
||||
|
||||
thread 'main' has overflowed its stack
|
||||
fatal runtime error: stack overflow, aborting
|
||||
error: Recipe `trainbot` was terminated on line 24 by signal 6
|
||||
|
||||
## 50 episodes - 700 steps max - desktop
|
||||
|
||||
const MEMORY_SIZE: usize = 4096;
|
||||
const DENSE_SIZE: usize = 128;
|
||||
const EPS_DECAY: f64 = 1000.0;
|
||||
const EPS_START: f64 = 0.9;
|
||||
const EPS_END: f64 = 0.05;
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -862.8993, "steps count": 700, "duration": 6}
|
||||
> {"episode": 1, "reward": -418.8971, "steps count": 700, "duration": 13}
|
||||
> {"episode": 2, "reward": -64.9999, "steps count": 453, "duration": 14}
|
||||
> {"episode": 3, "reward": -142.8002, "steps count": 700, "duration": 31}
|
||||
> {"episode": 4, "reward": -74.4004, "steps count": 700, "duration": 45}
|
||||
> {"episode": 5, "reward": -40.2002, "steps count": 700, "duration": 58}
|
||||
> {"episode": 6, "reward": -21.1998, "steps count": 700, "duration": 70}
|
||||
> {"episode": 7, "reward": 99.7000, "steps count": 642, "duration": 79}
|
||||
> {"episode": 8, "reward": -5.9999, "steps count": 700, "duration": 99}
|
||||
> {"episode": 9, "reward": -7.8999, "steps count": 700, "duration": 118}
|
||||
> {"episode": 10, "reward": 92.5000, "steps count": 624, "duration": 117}
|
||||
> {"episode": 11, "reward": -17.1998, "steps count": 700, "duration": 144}
|
||||
> {"episode": 12, "reward": 1.7000, "steps count": 700, "duration": 157}
|
||||
> {"episode": 13, "reward": -7.9000, "steps count": 700, "duration": 172}
|
||||
> {"episode": 14, "reward": -7.9000, "steps count": 700, "duration": 196}
|
||||
> {"episode": 15, "reward": -2.8000, "steps count": 700, "duration": 214}
|
||||
> {"episode": 16, "reward": 16.8002, "steps count": 700, "duration": 250}
|
||||
> {"episode": 17, "reward": -47.7001, "steps count": 700, "duration": 272}
|
||||
> k{"episode": 18, "reward": -13.6000, "steps count": 700, "duration": 288}
|
||||
> {"episode": 19, "reward": -79.9002, "steps count": 700, "duration": 304}
|
||||
> {"episode": 20, "reward": -355.5985, "steps count": 700, "duration": 317}
|
||||
> {"episode": 21, "reward": -205.5001, "steps count": 700, "duration": 333}
|
||||
> {"episode": 22, "reward": -207.3974, "steps count": 700, "duration": 348}
|
||||
> {"episode": 23, "reward": -161.7999, "steps count": 700, "duration": 367}
|
||||
|
||||
---
|
||||
|
||||
const MEMORY_SIZE: usize = 8192;
|
||||
const DENSE_SIZE: usize = 128;
|
||||
const EPS_DECAY: f64 = 10000.0;
|
||||
const EPS_START: f64 = 0.9;
|
||||
const EPS_END: f64 = 0.05;
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1119.9921, "steps count": 700, "duration": 6}
|
||||
> {"episode": 1, "reward": -928.6963, "steps count": 700, "duration": 13}
|
||||
> {"episode": 2, "reward": -364.5009, "steps count": 380, "duration": 11}
|
||||
> {"episode": 3, "reward": -797.5981, "steps count": 700, "duration": 28}
|
||||
> {"episode": 4, "reward": -577.5994, "steps count": 599, "duration": 34}
|
||||
> {"episode": 5, "reward": -725.2992, "steps count": 700, "duration": 49}
|
||||
> {"episode": 6, "reward": -638.8995, "steps count": 700, "duration": 59}
|
||||
> {"episode": 7, "reward": -1039.1932, "steps count": 700, "duration": 73}
|
||||
> field invalid : White, 3, Board { positions: [13, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, -2, 0, -11] }
|
||||
|
||||
thread 'main' panicked at store/src/game.rs:556:65:
|
||||
called `Result::unwrap()` on an `Err` value: FieldInvalid
|
||||
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
|
||||
error: Recipe `trainbot` failed on line 27 with exit code 101
|
||||
|
||||
---
|
||||
|
||||
# [allow(unused)]
|
||||
|
||||
const MEMORY_SIZE: usize = 8192;
|
||||
const DENSE_SIZE: usize = 256;
|
||||
const EPS_DECAY: f64 = 10000.0;
|
||||
const EPS_START: f64 = 0.9;
|
||||
const EPS_END: f64 = 0.05;
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1102.6925, "steps count": 700, "duration": 9}
|
||||
> field invalid : White, 6, Board { positions: [14, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -1, 0, 0, -13] }
|
||||
|
||||
thread 'main' panicked at store/src/game.rs:556:65:
|
||||
called `Result::unwrap()` on an `Err` value: FieldInvalid
|
||||
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
|
||||
error: Recipe `trainbot` failed on line 27 with exit code 101
|
||||
|
||||
---
|
||||
|
||||
const MEMORY_SIZE: usize = 8192;
|
||||
const DENSE_SIZE: usize = 256;
|
||||
const EPS_DECAY: f64 = 1000.0;
|
||||
const EPS_START: f64 = 0.9;
|
||||
const EPS_END: f64 = 0.05;
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1116.2921, "steps count": 700, "duration": 9}
|
||||
> {"episode": 1, "reward": -1116.2922, "steps count": 700, "duration": 18}
|
||||
> {"episode": 2, "reward": -1119.9921, "steps count": 700, "duration": 29}
|
||||
> {"episode": 3, "reward": -1089.1927, "steps count": 700, "duration": 41}
|
||||
> {"episode": 4, "reward": -1116.2921, "steps count": 700, "duration": 53}
|
||||
> {"episode": 5, "reward": -684.8043, "steps count": 700, "duration": 66}
|
||||
> {"episode": 6, "reward": 0.3000, "steps count": 700, "duration": 80}
|
||||
> {"episode": 7, "reward": 2.0000, "steps count": 700, "duration": 96}
|
||||
> {"episode": 8, "reward": 30.9001, "steps count": 700, "duration": 112}
|
||||
> {"episode": 9, "reward": 0.3000, "steps count": 700, "duration": 128}
|
||||
> {"episode": 10, "reward": 0.3000, "steps count": 700, "duration": 141}
|
||||
> {"episode": 11, "reward": 8.8000, "steps count": 700, "duration": 155}
|
||||
> {"episode": 12, "reward": 7.1000, "steps count": 700, "duration": 169}
|
||||
> {"episode": 13, "reward": 17.3001, "steps count": 700, "duration": 190}
|
||||
> {"episode": 14, "reward": -107.9005, "steps count": 700, "duration": 210}
|
||||
> {"episode": 15, "reward": 7.1001, "steps count": 700, "duration": 236}
|
||||
> {"episode": 16, "reward": 17.3001, "steps count": 700, "duration": 268}
|
||||
> {"episode": 17, "reward": 7.1000, "steps count": 700, "duration": 283}
|
||||
> {"episode": 18, "reward": -5.9000, "steps count": 700, "duration": 300}
|
||||
> {"episode": 19, "reward": -36.8009, "steps count": 700, "duration": 316}
|
||||
> {"episode": 20, "reward": 19.0001, "steps count": 700, "duration": 332}
|
||||
> {"episode": 21, "reward": 113.3000, "steps count": 461, "duration": 227}
|
||||
> field invalid : White, 1, Board { positions: [0, 2, 2, 0, 2, 4, 5, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -7, -2, -1, 0, -1, -1] }
|
||||
|
||||
thread 'main' panicked at store/src/game.rs:556:65:
|
||||
called `Result::unwrap()` on an `Err` value: FieldInvalid
|
||||
note: run with `RUST_BACKTRACE=1` environment variable to display a backtrace
|
||||
error: Recipe `trainbot` failed on line 27 with exit code 101
|
||||
|
||||
---
|
||||
|
||||
num_episodes: 50,
|
||||
// memory_size: 8192, // must be set in dqn_model.rs with the MEMORY_SIZE constant
|
||||
// max_steps: 700, // must be set in environment.rs with the MAX_STEPS constant
|
||||
dense_size: 256, // neural network complexity
|
||||
eps_start: 0.9, // epsilon initial value (0.9 => more exploration)
|
||||
eps_end: 0.05,
|
||||
eps_decay: 1000.0,
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1118.8921, "steps count": 700, "duration": 9}
|
||||
> {"episode": 1, "reward": -1119.9921, "steps count": 700, "duration": 17}
|
||||
> {"episode": 2, "reward": -1118.8921, "steps count": 700, "duration": 28}
|
||||
> {"episode": 3, "reward": -283.5977, "steps count": 700, "duration": 41}
|
||||
> {"episode": 4, "reward": -23.4998, "steps count": 700, "duration": 54}
|
||||
> {"episode": 5, "reward": -31.9999, "steps count": 700, "duration": 68}
|
||||
> {"episode": 6, "reward": 2.0000, "steps count": 700, "duration": 82}
|
||||
> {"episode": 7, "reward": 109.3000, "steps count": 192, "duration": 26}
|
||||
> {"episode": 8, "reward": -4.8000, "steps count": 700, "duration": 102}
|
||||
> {"episode": 9, "reward": 15.6001, "steps count": 700, "duration": 124}
|
||||
> {"episode": 10, "reward": 15.6002, "steps count": 700, "duration": 144}
|
||||
> {"episode": 11, "reward": -65.7008, "steps count": 700, "duration": 162}
|
||||
> {"episode": 12, "reward": 19.0002, "steps count": 700, "duration": 182}
|
||||
> {"episode": 13, "reward": 20.7001, "steps count": 700, "duration": 197}
|
||||
> {"episode": 14, "reward": 12.2002, "steps count": 700, "duration": 229}
|
||||
> {"episode": 15, "reward": -32.0007, "steps count": 700, "duration": 242}
|
||||
> {"episode": 16, "reward": 10.5000, "steps count": 700, "duration": 287}
|
||||
> {"episode": 17, "reward": 24.1001, "steps count": 700, "duration": 318}
|
||||
> {"episode": 18, "reward": 25.8002, "steps count": 700, "duration": 335}
|
||||
> {"episode": 19, "reward": 29.2001, "steps count": 700, "duration": 367}
|
||||
> {"episode": 20, "reward": 9.1000, "steps count": 700, "duration": 366}
|
||||
> {"episode": 21, "reward": 3.7001, "steps count": 700, "duration": 398}
|
||||
> {"episode": 22, "reward": 10.5000, "steps count": 700, "duration": 417}
|
||||
> {"episode": 23, "reward": 10.5000, "steps count": 700, "duration": 438}
|
||||
> {"episode": 24, "reward": 13.9000, "steps count": 700, "duration": 444}
|
||||
> {"episode": 25, "reward": 7.1000, "steps count": 700, "duration": 486}
|
||||
> {"episode": 26, "reward": 12.2001, "steps count": 700, "duration": 499}
|
||||
> {"episode": 27, "reward": 8.8001, "steps count": 700, "duration": 554}
|
||||
> {"episode": 28, "reward": -6.5000, "steps count": 700, "duration": 608}
|
||||
> {"episode": 29, "reward": -3.1000, "steps count": 700, "duration": 633}
|
||||
> {"episode": 30, "reward": -32.0001, "steps count": 700, "duration": 696}
|
||||
> {"episode": 31, "reward": 22.4002, "steps count": 700, "duration": 843}
|
||||
> {"episode": 32, "reward": -77.9004, "steps count": 700, "duration": 817}
|
||||
> {"episode": 33, "reward": -368.5993, "steps count": 700, "duration": 827}
|
||||
> {"episode": 34, "reward": -254.6986, "steps count": 700, "duration": 852}
|
||||
> {"episode": 35, "reward": -433.1992, "steps count": 700, "duration": 884}
|
||||
> {"episode": 36, "reward": -521.6010, "steps count": 700, "duration": 905}
|
||||
> {"episode": 37, "reward": -71.1004, "steps count": 700, "duration": 930}
|
||||
> {"episode": 38, "reward": -251.0004, "steps count": 700, "duration": 956}
|
||||
> {"episode": 39, "reward": -594.7045, "steps count": 700, "duration": 982}
|
||||
> {"episode": 40, "reward": -154.4001, "steps count": 700, "duration": 1008}
|
||||
> {"episode": 41, "reward": -171.3994, "steps count": 700, "duration": 1033}
|
||||
> {"episode": 42, "reward": -118.7004, "steps count": 700, "duration": 1059}
|
||||
> {"episode": 43, "reward": -137.4003, "steps count": 700, "duration": 1087}
|
||||
|
||||
thread 'main' has overflowed its stack
|
||||
fatal runtime error: stack overflow, aborting
|
||||
error: Recipe `trainbot` was terminated on line 27 by signal 6
|
||||
|
||||
---
|
||||
|
||||
num_episodes: 40,
|
||||
// memory_size: 8192, // must be set in dqn_model.rs with the MEMORY_SIZE constant
|
||||
// max_steps: 1500, // must be set in environment.rs with the MAX_STEPS constant
|
||||
dense_size: 256, // neural network complexity
|
||||
eps_start: 0.9, // epsilon initial value (0.9 => more exploration)
|
||||
eps_end: 0.05,
|
||||
eps_decay: 1000.0,
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -2399.9993, "steps count": 1500, "duration": 31}
|
||||
> {"episode": 1, "reward": -2061.6736, "steps count": 1500, "duration": 81}
|
||||
> {"episode": 2, "reward": -48.9010, "steps count": 1500, "duration": 145}
|
||||
> {"episode": 3, "reward": 3.8000, "steps count": 1500, "duration": 215}
|
||||
> {"episode": 4, "reward": -6.3999, "steps count": 1500, "duration": 302}
|
||||
> {"episode": 5, "reward": 20.8004, "steps count": 1500, "duration": 374}
|
||||
> {"episode": 6, "reward": 49.6992, "steps count": 1500, "duration": 469}
|
||||
> {"episode": 7, "reward": 29.3002, "steps count": 1500, "duration": 597}
|
||||
> {"episode": 8, "reward": 34.3999, "steps count": 1500, "duration": 710}
|
||||
> {"episode": 9, "reward": 115.3003, "steps count": 966, "duration": 515}
|
||||
> {"episode": 10, "reward": 25.9004, "steps count": 1500, "duration": 852}
|
||||
> {"episode": 11, "reward": -122.0007, "steps count": 1500, "duration": 1017}
|
||||
> {"episode": 12, "reward": -274.9966, "steps count": 1500, "duration": 1073}
|
||||
> {"episode": 13, "reward": 54.8994, "steps count": 651, "duration": 518}
|
||||
> {"episode": 14, "reward": -439.8978, "steps count": 1500, "duration": 1244}
|
||||
> {"episode": 15, "reward": -506.1997, "steps count": 1500, "duration": 1676}
|
||||
> {"episode": 16, "reward": -829.5031, "steps count": 1500, "duration": 1855}
|
||||
> {"episode": 17, "reward": -545.2961, "steps count": 1500, "duration": 1892}
|
||||
> {"episode": 18, "reward": -795.2026, "steps count": 1500, "duration": 2008}
|
||||
> {"episode": 19, "reward": -637.1031, "steps count": 1500, "duration": 2124}
|
||||
> {"episode": 20, "reward": -989.6997, "steps count": 1500, "duration": 2241}
|
||||
|
||||
thread 'main' has overflowed its stack
|
||||
fatal runtime error: stack overflow, aborting
|
||||
error: Recipe `trainbot` was terminated on line 27 by signal 6
|
||||
|
||||
---
|
||||
|
||||
num_episodes: 40,
|
||||
// memory_size: 8192, // must be set in dqn_model.rs with the MEMORY_SIZE constant
|
||||
// max_steps: 1000, // must be set in environment.rs with the MAX_STEPS constant
|
||||
dense_size: 256, // neural network complexity
|
||||
eps_start: 0.9, // epsilon initial value (0.9 => more exploration)
|
||||
eps_end: 0.05,
|
||||
eps_decay: 10000.0,
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1598.8848, "steps count": 1000, "duration": 16}
|
||||
> {"episode": 1, "reward": -1531.9866, "steps count": 1000, "duration": 34}
|
||||
> {"episode": 2, "reward": -515.6000, "steps count": 530, "duration": 25}
|
||||
> {"episode": 3, "reward": -396.1008, "steps count": 441, "duration": 27}
|
||||
> {"episode": 4, "reward": -540.6996, "steps count": 605, "duration": 43}
|
||||
> {"episode": 5, "reward": -976.0975, "steps count": 1000, "duration": 89}
|
||||
> {"episode": 6, "reward": -1014.2944, "steps count": 1000, "duration": 117}
|
||||
> {"episode": 7, "reward": -806.7012, "steps count": 1000, "duration": 140}
|
||||
> {"episode": 8, "reward": -1276.6891, "steps count": 1000, "duration": 166}
|
||||
> {"episode": 9, "reward": -1554.3855, "steps count": 1000, "duration": 197}
|
||||
> {"episode": 10, "reward": -1178.3925, "steps count": 1000, "duration": 219}
|
||||
> {"episode": 11, "reward": -1457.4869, "steps count": 1000, "duration": 258}
|
||||
> {"episode": 12, "reward": -1475.8882, "steps count": 1000, "duration": 291}
|
||||
|
||||
---
|
||||
|
||||
num_episodes: 40,
|
||||
// memory_size: 8192, // must be set in dqn_model.rs with the MEMORY_SIZE constant
|
||||
// max_steps: 1000, // must be set in environment.rs with the MAX_STEPS constant
|
||||
dense_size: 256, // neural network complexity
|
||||
eps_start: 0.9, // epsilon initial value (0.9 => more exploration)
|
||||
eps_end: 0.05,
|
||||
eps_decay: 3000.0,
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1598.8848, "steps count": 1000, "duration": 15}
|
||||
> {"episode": 1, "reward": -1599.9847, "steps count": 1000, "duration": 33}
|
||||
> {"episode": 2, "reward": -751.7018, "steps count": 1000, "duration": 57}
|
||||
> {"episode": 3, "reward": -402.8979, "steps count": 1000, "duration": 81}
|
||||
> {"episode": 4, "reward": -289.2985, "steps count": 1000, "duration": 108}
|
||||
> {"episode": 5, "reward": -231.4988, "steps count": 1000, "duration": 140}
|
||||
> {"episode": 6, "reward": -138.0006, "steps count": 1000, "duration": 165}
|
||||
> {"episode": 7, "reward": -145.0998, "steps count": 1000, "duration": 200}
|
||||
> {"episode": 8, "reward": -60.4005, "steps count": 1000, "duration": 236}
|
||||
> {"episode": 9, "reward": -35.7999, "steps count": 1000, "duration": 276}
|
||||
> {"episode": 10, "reward": -42.2002, "steps count": 1000, "duration": 313}
|
||||
> {"episode": 11, "reward": 69.0002, "steps count": 874, "duration": 300}
|
||||
> {"episode": 12, "reward": 93.2000, "steps count": 421, "duration": 153}
|
||||
> {"episode": 13, "reward": -324.9010, "steps count": 866, "duration": 364}
|
||||
> {"episode": 14, "reward": -1331.3883, "steps count": 1000, "duration": 478}
|
||||
> {"episode": 15, "reward": -1544.5859, "steps count": 1000, "duration": 514}
|
||||
> {"episode": 16, "reward": -1599.9847, "steps count": 1000, "duration": 552}
|
||||
|
||||
---
|
||||
|
||||
Nouveaux points...
|
||||
|
||||
num_episodes: 40,
|
||||
// memory_size: 8192, // must be set in dqn_model.rs with the MEMORY_SIZE constant
|
||||
// max_steps: 1000, // must be set in environment.rs with the MAX_STEPS constant
|
||||
dense_size: 256, // neural network complexity
|
||||
eps_start: 0.9, // epsilon initial value (0.9 => more exploration)
|
||||
eps_end: 0.05,
|
||||
eps_decay: 3000.0,
|
||||
|
||||
> Entraînement
|
||||
> {"episode": 0, "reward": -1798.1161, "steps count": 1000, "duration": 15}
|
||||
> {"episode": 1, "reward": -1800.0162, "steps count": 1000, "duration": 34}
|
||||
> {"episode": 2, "reward": -1718.6151, "steps count": 1000, "duration": 57}
|
||||
> {"episode": 3, "reward": -1369.5055, "steps count": 1000, "duration": 82}
|
||||
> {"episode": 4, "reward": -321.5974, "steps count": 1000, "duration": 115}
|
||||
> {"episode": 5, "reward": -213.2988, "steps count": 1000, "duration": 148}
|
||||
> {"episode": 6, "reward": -175.4995, "steps count": 1000, "duration": 172}
|
||||
> {"episode": 7, "reward": -126.1011, "steps count": 1000, "duration": 203}
|
||||
> {"episode": 8, "reward": -105.1011, "steps count": 1000, "duration": 242}
|
||||
> {"episode": 9, "reward": -46.3007, "steps count": 1000, "duration": 281}
|
||||
> {"episode": 10, "reward": -57.7006, "steps count": 1000, "duration": 323}
|
||||
> {"episode": 11, "reward": -15.7997, "steps count": 1000, "duration": 354}
|
||||
> {"episode": 12, "reward": -38.6999, "steps count": 1000, "duration": 414}
|
||||
> {"episode": 13, "reward": 10.7002, "steps count": 1000, "duration": 513}
|
||||
> {"episode": 14, "reward": -10.1999, "steps count": 1000, "duration": 585}
|
||||
> {"episode": 15, "reward": -8.3000, "steps count": 1000, "duration": 644}
|
||||
> {"episode": 16, "reward": -463.4984, "steps count": 973, "duration": 588}
|
||||
> {"episode": 17, "reward": -148.8951, "steps count": 1000, "duration": 646}
|
||||
> {"episode": 18, "reward": 3.0999, "steps count": 1000, "duration": 676}
|
||||
> {"episode": 19, "reward": -12.0999, "steps count": 1000, "duration": 753}
|
||||
> {"episode": 20, "reward": 6.9000, "steps count": 1000, "duration": 801}
|
||||
> {"episode": 21, "reward": 14.5001, "steps count": 1000, "duration": 850}
|
||||
> {"episode": 22, "reward": -19.6999, "steps count": 1000, "duration": 937}
|
||||
> {"episode": 23, "reward": 83.0000, "steps count": 456, "duration": 532}
|
||||
> {"episode": 24, "reward": -13.9998, "steps count": 1000, "duration": 1236}
|
||||
> {"episode": 25, "reward": 25.9003, "steps count": 1000, "duration": 1264}
|
||||
> {"episode": 26, "reward": 1.2002, "steps count": 1000, "duration": 1349}
|
||||
> {"episode": 27, "reward": 3.1000, "steps count": 1000, "duration": 1364}
|
||||
> {"episode": 28, "reward": -6.4000, "steps count": 1000, "duration": 1392}
|
||||
> {"episode": 29, "reward": -4.4998, "steps count": 1000, "duration": 1444}
|
||||
> {"episode": 30, "reward": 3.1000, "steps count": 1000, "duration": 1611}
|
||||
|
||||
thread 'main' has overflowed its stack
|
||||
fatal runtime error: stack overflow, aborting
|
||||
|
||||
---
|
||||
|
||||
num_episodes: 40,
|
||||
// memory_size: 8192, // must be set in dqn_model.rs with the MEMORY_SIZE constant
|
||||
// max_steps: 700, // must be set in environment.rs with the MAX_STEPS constant
|
||||
dense_size: 256, // neural network complexity
|
||||
eps_start: 0.9, // epsilon initial value (0.9 => more exploration)
|
||||
eps_end: 0.05,
|
||||
eps_decay: 3000.0,
|
||||
|
||||
{"episode": 0, "reward": -1256.1014, "steps count": 700, "duration": 9}
|
||||
{"episode": 1, "reward": -1256.1013, "steps count": 700, "duration": 20}
|
||||
{"episode": 2, "reward": -1256.1014, "steps count": 700, "duration": 31}
|
||||
{"episode": 3, "reward": -1258.7015, "steps count": 700, "duration": 44}
|
||||
{"episode": 4, "reward": -1206.8009, "steps count": 700, "duration": 56}
|
||||
{"episode": 5, "reward": -473.2974, "steps count": 700, "duration": 68}
|
||||
{"episode": 6, "reward": -285.2984, "steps count": 700, "duration": 82}
|
||||
{"episode": 7, "reward": -332.6987, "steps count": 700, "duration": 103}
|
||||
{"episode": 8, "reward": -359.2984, "steps count": 700, "duration": 114}
|
||||
{"episode": 9, "reward": -118.7008, "steps count": 700, "duration": 125}
|
||||
{"episode": 10, "reward": -83.9004, "steps count": 700, "duration": 144}
|
||||
{"episode": 11, "reward": -68.7006, "steps count": 700, "duration": 165}
|
||||
{"episode": 12, "reward": -49.7002, "steps count": 700, "duration": 180}
|
||||
{"episode": 13, "reward": -68.7002, "steps count": 700, "duration": 204}
|
||||
{"episode": 14, "reward": -38.3001, "steps count": 700, "duration": 223}
|
||||
{"episode": 15, "reward": -19.2999, "steps count": 700, "duration": 240}
|
||||
{"episode": 16, "reward": -19.1998, "steps count": 700, "duration": 254}
|
||||
{"episode": 17, "reward": -21.1999, "steps count": 700, "duration": 250}
|
||||
{"episode": 18, "reward": -26.8998, "steps count": 700, "duration": 280}
|
||||
{"episode": 19, "reward": -11.6999, "steps count": 700, "duration": 301}
|
||||
{"episode": 20, "reward": -13.5998, "steps count": 700, "duration": 317}
|
||||
{"episode": 21, "reward": 5.4000, "steps count": 700, "duration": 334}
|
||||
{"episode": 22, "reward": 3.5000, "steps count": 700, "duration": 353}
|
||||
{"episode": 23, "reward": 13.0000, "steps count": 700, "duration": 374}
|
||||
{"episode": 24, "reward": 7.3001, "steps count": 700, "duration": 391}
|
||||
{"episode": 25, "reward": -4.1000, "steps count": 700, "duration": 408}
|
||||
{"episode": 26, "reward": -17.3998, "steps count": 700, "duration": 437}
|
||||
{"episode": 27, "reward": 11.1001, "steps count": 700, "duration": 480}
|
||||
{"episode": 28, "reward": -4.1000, "steps count": 700, "duration": 505}
|
||||
{"episode": 29, "reward": -13.5999, "steps count": 700, "duration": 522}
|
||||
{"episode": 30, "reward": -0.3000, "steps count": 700, "duration": 540}
|
||||
{"episode": 31, "reward": -15.4998, "steps count": 700, "duration": 572}
|
||||
{"episode": 32, "reward": 14.9001, "steps count": 700, "duration": 630}
|
||||
{"episode": 33, "reward": -4.1000, "steps count": 700, "duration": 729}
|
||||
{"episode": 34, "reward": 5.4000, "steps count": 700, "duration": 777}
|
||||
{"episode": 35, "reward": 7.3000, "steps count": 700, "duration": 748}
|
||||
{"episode": 36, "reward": 9.2001, "steps count": 700, "duration": 767}
|
||||
{"episode": 37, "reward": 13.0001, "steps count": 700, "duration": 791}
|
||||
{"episode": 38, "reward": -13.5999, "steps count": 700, "duration": 813}
|
||||
{"episode": 39, "reward": 26.3002, "steps count": 700, "duration": 838}
|
||||
|
||||
> Sauvegarde du modèle de validation
|
||||
> Modèle de validation sauvegardé : models/burn_dqn_50_model.mpk
|
||||
> Chargement du modèle pour test
|
||||
> Chargement du modèle depuis : models/burn_dqn_50_model.mpk
|
||||
> Test avec le modèle chargé
|
||||
> Episode terminé. Récompense totale: 70.00, Étapes: 700
|
||||
|
|
@ -37,7 +37,7 @@ Si on peut remplir le coin de l'adversaire avec deux dames (ce qui est interdit)
|
|||
|
||||
Si on a le choix entre remplir son coin directement ou par puissance, on doit obligatoirement le faire directement.
|
||||
|
||||
Tant que l'adversaire à la possibilité de remplir un cadran, on ne peut pas jouer dans ce cadran.
|
||||
Tant que l'adversaire à la possibilité de remplir un des deux cadrans de son côté (son jeu "ordinaire", par opposition au jeu "de retour"), on ne peut pas jouer dans ce cadran.
|
||||
|
||||
Quand on joue dans ses propres deux cadrans, on dit qu'on joue le jeu ordinaire
|
||||
Quand on joue dans les deux cadrans de l'adversaire, on dit qu'on joue le jeu de retour
|
||||
|
|
@ -64,6 +64,7 @@ Si on doit passer par une case occupée par deux dames adverses ou plus pour att
|
|||
|
||||
Remarques
|
||||
- on peut "passer" sur une dame adverse (donc battue) pour battre une seconde dame adverse (avec la somme des deux dés).
|
||||
- comme pour les déplacements, il est possible de passer par le coin de repos vide de l'adversaire pour battre à vrai une dame en "tout d'une" (c'est s'arrêter sur le coin de repos qui est interdit)
|
||||
- même s'il ne reste que deux dames dans son coin de repos (et qu'en théorie elle ne peuvent en sortir qu'en même temps), elles peuvent tout de même battre une dame adverse (à vrai et à faux). En revanche elles ne peuvent pas participer au battage du coin adverse (cf. prochain paragraphe).
|
||||
|
||||
Autre jan de récompense :
|
||||
|
|
|
|||
172
doc/store.puml
Normal file
172
doc/store.puml
Normal file
|
|
@ -0,0 +1,172 @@
|
|||
@startuml
|
||||
|
||||
class "CheckerMove" {
|
||||
- from: Field
|
||||
- to: Field
|
||||
+ to_display_string()
|
||||
+ new(from: Field, to: Field)
|
||||
+ mirror()
|
||||
+ chain(cmove: Self)
|
||||
+ get_from()
|
||||
+ get_to()
|
||||
+ is_exit()
|
||||
+ doable_with_dice(dice: usize)
|
||||
}
|
||||
|
||||
class "Board" {
|
||||
- positions: [i8;24]
|
||||
+ new()
|
||||
+ mirror()
|
||||
+ set_positions(positions: [ i8 ; 24 ])
|
||||
+ count_checkers(color: Color, from: Field, to: Field)
|
||||
+ to_vec()
|
||||
+ to_gnupg_pos_id()
|
||||
+ to_display_grid(col_size: usize)
|
||||
+ set(color: & Color, field: Field, amount: i8)
|
||||
+ blocked(color: & Color, field: Field)
|
||||
+ passage_blocked(color: & Color, field: Field)
|
||||
+ get_field_checkers(field: Field)
|
||||
+ get_checkers_color(field: Field)
|
||||
+ is_field_in_small_jan(field: Field)
|
||||
+ get_color_fields(color: Color)
|
||||
+ get_color_corner(color: & Color)
|
||||
+ get_possible_moves(color: Color, dice: u8, with_excedants: bool, check_rest_corner_exit: bool, forbid_exits: bool)
|
||||
+ passage_possible(color: & Color, cmove: & CheckerMove)
|
||||
+ move_possible(color: & Color, cmove: & CheckerMove)
|
||||
+ any_quarter_filled(color: Color)
|
||||
+ is_quarter_filled(color: Color, field: Field)
|
||||
+ get_quarter_filling_candidate(color: Color)
|
||||
+ is_quarter_fillable(color: Color, field: Field)
|
||||
- get_quarter_fields(field: Field)
|
||||
+ move_checker(color: & Color, cmove: CheckerMove)
|
||||
+ remove_checker(color: & Color, field: Field)
|
||||
+ add_checker(color: & Color, field: Field)
|
||||
}
|
||||
|
||||
class "MoveRules" {
|
||||
+ board: Board
|
||||
+ dice: Dice
|
||||
+ new(color: & Color, board: & Board, dice: Dice)
|
||||
+ set_board(color: & Color, board: & Board)
|
||||
- get_board_from_color(color: & Color, board: & Board)
|
||||
+ moves_follow_rules(moves: & ( CheckerMove , CheckerMove ))
|
||||
- moves_possible(moves: & ( CheckerMove , CheckerMove ))
|
||||
- moves_follows_dices(moves: & ( CheckerMove , CheckerMove ))
|
||||
- get_move_compatible_dices(cmove: & CheckerMove)
|
||||
+ moves_allowed(moves: & ( CheckerMove , CheckerMove ))
|
||||
- check_opponent_can_fill_quarter_rule(moves: & ( CheckerMove , CheckerMove ))
|
||||
- check_must_fill_quarter_rule(moves: & ( CheckerMove , CheckerMove ))
|
||||
- check_corner_rules(moves: & ( CheckerMove , CheckerMove ))
|
||||
- has_checkers_outside_last_quarter()
|
||||
- check_exit_rules(moves: & ( CheckerMove , CheckerMove ))
|
||||
+ get_possible_moves_sequences(with_excedents: bool, ignored_rules: Vec < TricTracRule >)
|
||||
+ get_scoring_quarter_filling_moves_sequences()
|
||||
- get_sequence_origin_from_destination(sequence: ( CheckerMove , CheckerMove ), destination: Field)
|
||||
+ get_quarter_filling_moves_sequences()
|
||||
- get_possible_moves_sequences_by_dices(dice1: u8, dice2: u8, with_excedents: bool, ignore_empty: bool, ignored_rules: Vec < TricTracRule >)
|
||||
- _get_direct_exit_moves(state: & GameState)
|
||||
- is_move_by_puissance(moves: & ( CheckerMove , CheckerMove ))
|
||||
- can_take_corner_by_effect()
|
||||
}
|
||||
|
||||
class "DiceRoller" {
|
||||
- rng: StdRng
|
||||
+ new(opt_seed: Option < u64 >)
|
||||
+ roll()
|
||||
}
|
||||
|
||||
class "Dice" {
|
||||
+ values: (u8,u8)
|
||||
+ to_bits_string()
|
||||
+ to_display_string()
|
||||
+ is_double()
|
||||
}
|
||||
|
||||
class "GameState" {
|
||||
+ stage: Stage
|
||||
+ turn_stage: TurnStage
|
||||
+ board: Board
|
||||
+ active_player_id: PlayerId
|
||||
+ players: HashMap<PlayerId,Player>
|
||||
+ history: Vec<GameEvent>
|
||||
+ dice: Dice
|
||||
+ dice_points: (u8,u8)
|
||||
+ dice_moves: (CheckerMove,CheckerMove)
|
||||
+ dice_jans: PossibleJans
|
||||
- roll_first: bool
|
||||
+ schools_enabled: bool
|
||||
+ new(schools_enabled: bool)
|
||||
- set_schools_enabled(schools_enabled: bool)
|
||||
- get_active_player()
|
||||
- get_opponent_id()
|
||||
+ to_vec_float()
|
||||
+ to_vec()
|
||||
+ to_string_id()
|
||||
+ who_plays()
|
||||
+ get_white_player()
|
||||
+ get_black_player()
|
||||
+ player_id_by_color(color: Color)
|
||||
+ player_id(player: & Player)
|
||||
+ player_color_by_id(player_id: & PlayerId)
|
||||
+ validate(event: & GameEvent)
|
||||
+ init_player(player_name: & str)
|
||||
- add_player(player_id: PlayerId, player: Player)
|
||||
+ switch_active_player()
|
||||
+ consume(valid_event: & GameEvent)
|
||||
- new_pick_up()
|
||||
- get_rollresult_jans(dice: & Dice)
|
||||
+ determine_winner()
|
||||
- inc_roll_count(player_id: PlayerId)
|
||||
- mark_points(player_id: PlayerId, points: u8)
|
||||
}
|
||||
|
||||
class "Player" {
|
||||
+ name: String
|
||||
+ color: Color
|
||||
+ points: u8
|
||||
+ holes: u8
|
||||
+ can_bredouille: bool
|
||||
+ can_big_bredouille: bool
|
||||
+ dice_roll_count: u8
|
||||
+ new(name: String, color: Color)
|
||||
+ to_bits_string()
|
||||
+ to_vec()
|
||||
}
|
||||
|
||||
class "PointsRules" {
|
||||
+ board: Board
|
||||
+ dice: Dice
|
||||
+ move_rules: MoveRules
|
||||
+ new(color: & Color, board: & Board, dice: Dice)
|
||||
+ set_dice(dice: Dice)
|
||||
+ update_positions(positions: [ i8 ; 24 ])
|
||||
- get_jans(board_ini: & Board, dice_rolls_count: u8)
|
||||
+ get_jans_points(jans: HashMap < Jan , Vec < ( CheckerMove , CheckerMove ) > >)
|
||||
+ get_points(dice_rolls_count: u8)
|
||||
+ get_result_jans(dice_rolls_count: u8)
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
"MoveRules" <-- "Board"
|
||||
"MoveRules" <-- "Dice"
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
"GameState" <-- "Board"
|
||||
"HashMap<PlayerId,Player>" <-- "Player"
|
||||
"GameState" <-- "HashMap<PlayerId,Player>"
|
||||
"GameState" <-- "Dice"
|
||||
|
||||
|
||||
|
||||
|
||||
"PointsRules" <-- "Board"
|
||||
"PointsRules" <-- "Dice"
|
||||
"PointsRules" <-- "MoveRules"
|
||||
|
||||
@enduml
|
||||
|
|
@ -1,3 +1,5 @@
|
|||
# traité
|
||||
|
||||
En 12 chapitres (trous) de 12 sous-chapitres (points / niveaux de compréhension) ?
|
||||
|
||||
Célébration -> s'inspirer du _petit traité invitant à la découverte de l'art subtil du go_
|
||||
|
|
@ -30,3 +32,9 @@ L'encyclopédie
|
|||
- le dictionnaire du trictrac
|
||||
- fabriquer un boîtier/plateau de jeu
|
||||
- jouer en ligne
|
||||
|
||||
## rêveries
|
||||
|
||||
Trictrac : un domaine grand et complexe, un univers dans lequel on peut s'absorber. Un jeu geek parfait. Qui a la noblesse d'avoir été populaire, qui a la noblesse de règles nécessitant apprentissage et presque companionage.
|
||||
Pourquoi s'investir dans ce genre d'activité ? Toucher un absolu. Sauver de la mort une pépite du passé. Entrer dans le monde des morts comme Orphée ou Ulysse ?
|
||||
Et maîtriser un vocabulaire, des gestes, des règles affinées au fil des siècles.
|
||||
|
|
|
|||
|
|
@ -2,3 +2,4 @@
|
|||
|
||||
Dames : checkers / men
|
||||
cases : points
|
||||
cadrant : quarter
|
||||
|
|
|
|||
25
doc/workflow.md
Normal file
25
doc/workflow.md
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
# Workflow
|
||||
|
||||
@startuml
|
||||
|
||||
state c <<choice>>
|
||||
state haswon <<choice>>
|
||||
state MarkPoints #lightblue
|
||||
state MarkAdvPoints #lightblue
|
||||
note right of MarkPoints : automatic 'Mark' transition\nwhen no school
|
||||
note right of MarkAdvPoints : automatic 'Mark' transition\nwhen no school
|
||||
|
||||
[*] -> RollDice : BeginGame
|
||||
RollDice --> RollWaiting : Roll (current player)
|
||||
RollWaiting --> MarkPoints : RollResult (engine)
|
||||
MarkPoints --> c : Mark (current player)
|
||||
c --> HoldHorGoChoice : [new hole]
|
||||
c --> [*] : [has won]
|
||||
c --> Move : [not new hole]
|
||||
HoldHorGoChoice --> RollDice : Go
|
||||
HoldHorGoChoice --> MarkAdvPoints : Move
|
||||
Move --> MarkAdvPoints : Move
|
||||
MarkAdvPoints --> haswon : Mark (adversary)
|
||||
haswon --> RollDice : [has not won]
|
||||
haswon --> [*] : [has won]
|
||||
@enduml
|
||||
41
justfile
Normal file
41
justfile
Normal file
|
|
@ -0,0 +1,41 @@
|
|||
#!/usr/bin/env -S just --justfile
|
||||
# ^ A shebang isn't required, but allows a justfile to be executed
|
||||
# like a script, with `./justfile test`, for example.
|
||||
|
||||
doc:
|
||||
cargo doc --no-deps
|
||||
shell:
|
||||
devenv shell
|
||||
runcli:
|
||||
RUST_LOG=info cargo run --bin=client_cli
|
||||
runclibots:
|
||||
cargo run --bin=client_cli -- --bot random,dqnburn:./bot/models/burnrl_dqn_40.mpk
|
||||
#cargo run --bin=client_cli -- --bot dqn:./bot/models/dqn_model_final.json,dummy
|
||||
# RUST_LOG=info cargo run --bin=client_cli -- --bot dummy,dqn
|
||||
match:
|
||||
cargo build --release --bin=client_cli
|
||||
LD_LIBRARY_PATH=./target/release ./target/release/client_cli -- --bot dummy,dqn
|
||||
profile:
|
||||
echo '1' | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
cargo build --profile profiling
|
||||
samply record ./target/profiling/client_cli --bot dummy,dummy
|
||||
pythonlib:
|
||||
maturin build -m store/Cargo.toml --release
|
||||
pip install --no-deps --force-reinstall --prefix .devenv/state/venv target/wheels/*.whl
|
||||
trainsimple:
|
||||
cargo build --release --bin=train_dqn_simple
|
||||
LD_LIBRARY_PATH=./target/release ./target/release/train_dqn_simple | tee /tmp/train.out
|
||||
trainbot algo:
|
||||
#python ./store/python/trainModel.py
|
||||
# cargo run --bin=train_dqn # ok
|
||||
# ./bot/scripts/trainValid.sh
|
||||
./bot/scripts/train.sh {{algo}}
|
||||
plottrainbot algo:
|
||||
./bot/scripts/train.sh plot {{algo}}
|
||||
debugtrainbot:
|
||||
cargo build --bin=train_dqn_burn
|
||||
RUST_BACKTRACE=1 LD_LIBRARY_PATH=./target/debug ./target/debug/train_dqn_burn
|
||||
profiletrainbot:
|
||||
echo '1' | sudo tee /proc/sys/kernel/perf_event_paranoid
|
||||
cargo build --profile profiling --bin=train_dqn_burn
|
||||
LD_LIBRARY_PATH=./target/profiling samply record ./target/profiling/train_dqn_burn
|
||||
|
|
@ -1,4 +1,3 @@
|
|||
use bincode;
|
||||
use log::{info, trace, warn};
|
||||
use std::net::{IpAddr, Ipv4Addr, SocketAddr, UdpSocket};
|
||||
use std::thread;
|
||||
|
|
@ -44,7 +43,7 @@ fn main() {
|
|||
.unwrap();
|
||||
let mut transport = NetcodeServerTransport::new(current_time, server_config, socket).unwrap();
|
||||
|
||||
trace!("❂ TricTrac server listening on {}", SERVER_ADDR);
|
||||
trace!("❂ TricTrac server listening on {SERVER_ADDR}");
|
||||
|
||||
let mut game_state = store::GameState::default();
|
||||
let mut last_updated = Instant::now();
|
||||
|
|
@ -81,7 +80,7 @@ fn main() {
|
|||
// Tell all players that a new player has joined
|
||||
server.broadcast_message(0, bincode::serialize(&event).unwrap());
|
||||
|
||||
info!("🎉 Client {} connected.", client_id);
|
||||
info!("🎉 Client {client_id} connected.");
|
||||
// In TicTacTussle the game can begin once two players has joined
|
||||
if game_state.players.len() == 2 {
|
||||
let event = store::GameEvent::BeginGame {
|
||||
|
|
@ -92,14 +91,17 @@ fn main() {
|
|||
trace!("The game gas begun");
|
||||
}
|
||||
}
|
||||
ServerEvent::ClientDisconnected { client_id, reason: _ } => {
|
||||
ServerEvent::ClientDisconnected {
|
||||
client_id,
|
||||
reason: _,
|
||||
} => {
|
||||
// First consume a disconnect event
|
||||
let event = store::GameEvent::PlayerDisconnected {
|
||||
player_id: client_id,
|
||||
};
|
||||
game_state.consume(&event);
|
||||
server.broadcast_message(0, bincode::serialize(&event).unwrap());
|
||||
info!("Client {} disconnected", client_id);
|
||||
info!("Client {client_id} disconnected");
|
||||
|
||||
// Then end the game, since tic tac toe can't go on with a single player
|
||||
let event = store::GameEvent::EndGame {
|
||||
|
|
@ -122,7 +124,7 @@ fn main() {
|
|||
if let Ok(event) = bincode::deserialize::<store::GameEvent>(&message) {
|
||||
if game_state.validate(&event) {
|
||||
game_state.consume(&event);
|
||||
trace!("Player {} sent:\n\t{:#?}", client_id, event);
|
||||
trace!("Player {client_id} sent:\n\t{event:#?}");
|
||||
server.broadcast_message(0, bincode::serialize(&event).unwrap());
|
||||
|
||||
// Determine if a player has won the game
|
||||
|
|
@ -133,7 +135,7 @@ fn main() {
|
|||
server.broadcast_message(0, bincode::serialize(&event).unwrap());
|
||||
}
|
||||
} else {
|
||||
warn!("Player {} sent invalid event:\n\t{:#?}", client_id, event);
|
||||
warn!("Player {client_id} sent invalid event:\n\t{event:#?}");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,9 +5,16 @@ edition = "2021"
|
|||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
[lib]
|
||||
name = "store"
|
||||
# Only "rlib" is needed for other Rust crates to use this library
|
||||
crate-type = ["rlib"]
|
||||
|
||||
[dependencies]
|
||||
base64 = "0.21.7"
|
||||
# provides macros for creating log messages to be used by a logger (for example env_logger)
|
||||
log = "0.4.20"
|
||||
merge = "0.1.0"
|
||||
rand = "0.8.5"
|
||||
serde = { version = "1.0", features = ["derive"] }
|
||||
transpose = "0.2.2"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
use crate::player::{Color, Player};
|
||||
use crate::player::Color;
|
||||
use crate::Error;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp;
|
||||
|
|
@ -6,13 +6,16 @@ use std::fmt;
|
|||
|
||||
/// field (aka 'point') position on the board (from 0 to 24, 0 being 'outside')
|
||||
pub type Field = usize;
|
||||
pub type FieldWithCount = (Field, i8);
|
||||
|
||||
#[derive(Debug, Copy, Clone, Serialize, PartialEq, Deserialize)]
|
||||
#[derive(Debug, Copy, Clone, Serialize, PartialEq, Eq, Deserialize)]
|
||||
pub struct CheckerMove {
|
||||
from: Field,
|
||||
to: Field,
|
||||
}
|
||||
|
||||
pub const EMPTY_MOVE: CheckerMove = CheckerMove { from: 0, to: 0 };
|
||||
|
||||
fn transpose(matrix: Vec<Vec<String>>) -> Vec<Vec<String>> {
|
||||
let num_cols = matrix.first().unwrap().len();
|
||||
let mut row_iters: Vec<_> = matrix.into_iter().map(Vec::into_iter).collect();
|
||||
|
|
@ -26,20 +29,41 @@ fn transpose(matrix: Vec<Vec<String>>) -> Vec<Vec<String>> {
|
|||
out
|
||||
}
|
||||
|
||||
impl Default for CheckerMove {
|
||||
fn default() -> Self {
|
||||
EMPTY_MOVE
|
||||
}
|
||||
}
|
||||
|
||||
impl CheckerMove {
|
||||
pub fn to_display_string(self) -> String {
|
||||
format!("{self:?} ")
|
||||
}
|
||||
|
||||
pub fn new(from: Field, to: Field) -> Result<Self, Error> {
|
||||
// println!("from {} to {}", from, to);
|
||||
// check if the field is on the board
|
||||
// we allow 0 for 'to', which represents the exit of a checker
|
||||
if from < 1 || 24 < from || 24 < to {
|
||||
// and (0, 0) which represent the absence of a move (when there is only one checker left on the
|
||||
// board)
|
||||
if ((from, to) != (0, 0)) && (!(1..25).contains(&from) || 24 < to) {
|
||||
return Err(Error::FieldInvalid);
|
||||
}
|
||||
// check that the destination is after the origin field
|
||||
if to < from && to != 0 {
|
||||
return Err(Error::MoveInvalid);
|
||||
}
|
||||
// --> not applicable for black moves
|
||||
// if to < from && to != 0 {
|
||||
// return Err(Error::MoveInvalid);
|
||||
// }
|
||||
Ok(Self { from, to })
|
||||
}
|
||||
|
||||
/// Get the mirrord CheckerMove (ie change colors)
|
||||
pub fn mirror(&self) -> Self {
|
||||
let from = if self.from == 0 { 0 } else { 25 - self.from };
|
||||
let to = if self.to == 0 { 0 } else { 25 - self.to };
|
||||
Self { from, to }
|
||||
}
|
||||
|
||||
// Construct the move resulting of two successive moves
|
||||
pub fn chain(self, cmove: Self) -> Result<Self, Error> {
|
||||
if self.to != cmove.from {
|
||||
|
|
@ -58,10 +82,19 @@ impl CheckerMove {
|
|||
pub fn get_to(&self) -> Field {
|
||||
self.to
|
||||
}
|
||||
|
||||
pub fn is_exit(&self) -> bool {
|
||||
self.to == 0 && self != &EMPTY_MOVE
|
||||
}
|
||||
|
||||
pub fn doable_with_dice(&self, dice: usize) -> bool {
|
||||
(self.to == 0 && 25 - self.from <= dice)
|
||||
|| (self.from < self.to && self.to - self.from == dice)
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents the Tric Trac board
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct Board {
|
||||
positions: [i8; 24],
|
||||
}
|
||||
|
|
@ -81,7 +114,7 @@ impl fmt::Display for Board {
|
|||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut s = String::new();
|
||||
s.push_str(&format!("{:?}", self.positions));
|
||||
write!(f, "{}", s)
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -91,12 +124,86 @@ impl Board {
|
|||
Board::default()
|
||||
}
|
||||
|
||||
/// Get the mirrord board (ie change colors)
|
||||
pub fn mirror(&self) -> Self {
|
||||
let mut positions = self.positions.map(|c| 0 - c);
|
||||
positions.reverse();
|
||||
Board { positions }
|
||||
}
|
||||
|
||||
/// Globally set pieces on board ( for tests )
|
||||
pub fn set_positions(&mut self, color: &Color, positions: [i8; 24]) {
|
||||
let mut new_positions = positions;
|
||||
if color == &Color::Black {
|
||||
new_positions = new_positions.map(|c| 0 - c);
|
||||
new_positions.reverse();
|
||||
}
|
||||
self.positions = new_positions;
|
||||
}
|
||||
|
||||
pub fn count_checkers(&self, color: Color, from: Field, to: Field) -> u8 {
|
||||
if to == 0 || from == 0 {
|
||||
return 0;
|
||||
}
|
||||
self.positions[(from - 1)..to]
|
||||
.iter()
|
||||
.filter(|count| {
|
||||
if color == Color::White {
|
||||
**count > 0
|
||||
} else {
|
||||
**count < 0
|
||||
}
|
||||
})
|
||||
.sum::<i8>()
|
||||
.unsigned_abs()
|
||||
}
|
||||
|
||||
// get the number of the last checker in a field
|
||||
pub fn get_field_checker(&self, color: &Color, field: Field) -> u8 {
|
||||
assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror
|
||||
let mut total_count: u8 = 0;
|
||||
for (i, checker_count) in self.positions.iter().enumerate() {
|
||||
// count white checkers (checker_count > 0)
|
||||
if *checker_count > 0 {
|
||||
total_count += *checker_count as u8;
|
||||
if field == i + 1 {
|
||||
return total_count;
|
||||
}
|
||||
}
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
// get the field of the nth checker
|
||||
pub fn get_checker_field(&self, color: &Color, checker_pos: u8) -> Option<Field> {
|
||||
assert_eq!(color, &Color::White); // sinon ajouter la gestion des noirs avec mirror
|
||||
if checker_pos == 0 {
|
||||
return None;
|
||||
}
|
||||
let mut total_count: u8 = 0;
|
||||
for (i, checker_count) in self.positions.iter().enumerate() {
|
||||
// count white checkers (checker_count > 0)
|
||||
if *checker_count > 0 {
|
||||
total_count += *checker_count as u8;
|
||||
}
|
||||
// return the current field if it contains the checker
|
||||
if checker_pos <= total_count {
|
||||
return Some(i + 1);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
pub fn to_vec(&self) -> Vec<i8> {
|
||||
self.positions.to_vec()
|
||||
}
|
||||
|
||||
// maybe todo : operate on bits (cf. https://github.com/bungogood/bkgm/blob/a2fb3f395243bcb0bc9f146df73413f73f5ea1e0/src/position.rs#L217)
|
||||
pub fn to_gnupg_pos_id(&self) -> String {
|
||||
// Pieces placement -> 77bits (24 + 23 + 30 max)
|
||||
// inspired by https://www.gnu.org/software/gnubg/manual/html_node/A-technical-description-of-the-Position-ID.html
|
||||
// - white positions
|
||||
let white_board = self.positions.clone();
|
||||
let white_board = self.positions;
|
||||
let mut pos_bits = white_board.iter().fold(vec![], |acc, nb| {
|
||||
let mut new_acc = acc.clone();
|
||||
if *nb > 0 {
|
||||
|
|
@ -108,7 +215,7 @@ impl Board {
|
|||
});
|
||||
|
||||
// - black positions
|
||||
let mut black_board = self.positions.clone();
|
||||
let mut black_board = self.positions;
|
||||
black_board.reverse();
|
||||
let mut pos_black_bits = black_board.iter().fold(vec![], |acc, nb| {
|
||||
let mut new_acc = acc.clone();
|
||||
|
|
@ -164,7 +271,7 @@ impl Board {
|
|||
.map(|cells| {
|
||||
cells
|
||||
.into_iter()
|
||||
.map(|cell| format!("{:>5}", cell))
|
||||
.map(|cell| format!("{cell:>5}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join("")
|
||||
})
|
||||
|
|
@ -175,7 +282,7 @@ impl Board {
|
|||
.map(|cells| {
|
||||
cells
|
||||
.into_iter()
|
||||
.map(|cell| format!("{:>5}", cell))
|
||||
.map(|cell| format!("{cell:>5}"))
|
||||
.collect::<Vec<String>>()
|
||||
.join("")
|
||||
})
|
||||
|
|
@ -187,17 +294,16 @@ impl Board {
|
|||
.to_owned();
|
||||
for mut line in upper {
|
||||
// add middle bar
|
||||
line.replace_range(30..30, "| |");
|
||||
line.replace_range(31..31, "| |");
|
||||
output = output + " |" + &line + " |\n";
|
||||
}
|
||||
output = output + " |----------------------------- | | ------------------------------|\n";
|
||||
output += " |------------------------------ | | -----------------------------|\n";
|
||||
for mut line in lower {
|
||||
// add middle bar
|
||||
line.replace_range(30..30, "| |");
|
||||
line.replace_range(31..31, "| |");
|
||||
output = output + " |" + &line + " |\n";
|
||||
}
|
||||
output = output
|
||||
+ " ----------------------------------------------------------------
|
||||
output += " ----------------------------------------------------------------
|
||||
12 11 10 9 8 7 6 5 4 3 2 1 \n";
|
||||
output
|
||||
}
|
||||
|
|
@ -249,6 +355,13 @@ impl Board {
|
|||
|
||||
/// Check if a field is blocked for a player
|
||||
pub fn blocked(&self, color: &Color, field: Field) -> Result<bool, Error> {
|
||||
// the square is blocked on the opponent rest corner
|
||||
// let opp_corner_field = if color == &Color::White { 13 } else { 12 };
|
||||
self.passage_blocked(color, field)
|
||||
// .map(|blocked| blocked || opp_corner_field == field)
|
||||
}
|
||||
|
||||
pub fn passage_blocked(&self, color: &Color, field: Field) -> Result<bool, Error> {
|
||||
if 24 < field {
|
||||
return Err(Error::FieldInvalid);
|
||||
}
|
||||
|
|
@ -258,42 +371,55 @@ impl Board {
|
|||
return Ok(false);
|
||||
}
|
||||
|
||||
// the square is blocked on the opponent rest corner or if there are opponent's men on the square
|
||||
match color {
|
||||
Color::White => {
|
||||
if field == 13 || self.positions[field - 1] < 0 {
|
||||
Ok(true)
|
||||
// the square is blocked if there are opponent's men on the square
|
||||
let blocked = if color == &Color::White {
|
||||
self.positions[field - 1] < 0
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
Color::Black => {
|
||||
if field == 12 || self.positions[23 - field] > 1 {
|
||||
Ok(true)
|
||||
} else {
|
||||
Ok(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
self.positions[field - 1] > 0
|
||||
};
|
||||
Ok(blocked)
|
||||
}
|
||||
|
||||
pub fn get_field_checkers(&self, field: Field) -> Result<(u8, Option<&Color>), Error> {
|
||||
if field < 1 || field > 24 {
|
||||
if !(1..25).contains(&field) {
|
||||
return Err(Error::FieldInvalid);
|
||||
}
|
||||
let checkers_count = self.positions[field - 1];
|
||||
let color = if checkers_count < 0 {
|
||||
Some(&Color::Black)
|
||||
} else if checkers_count > 0 {
|
||||
Some(&Color::White)
|
||||
} else {
|
||||
None
|
||||
let color = match checkers_count.cmp(&0) {
|
||||
cmp::Ordering::Less => Some(&Color::Black),
|
||||
cmp::Ordering::Greater => Some(&Color::White),
|
||||
cmp::Ordering::Equal => None,
|
||||
};
|
||||
Ok((checkers_count.abs() as u8, color))
|
||||
Ok((checkers_count.unsigned_abs(), color))
|
||||
}
|
||||
|
||||
pub fn get_checkers_color(&self, field: Field) -> Result<Option<&Color>, Error> {
|
||||
self.get_field_checkers(field).map(|(count, color)| color)
|
||||
self.get_field_checkers(field).map(|(_ount, color)| color)
|
||||
}
|
||||
|
||||
pub fn is_field_in_small_jan(field: Field) -> bool {
|
||||
!(7..=18).contains(&field)
|
||||
}
|
||||
|
||||
/// returns the list of Fields containing Checkers of the Color
|
||||
pub fn get_color_fields(&self, color: Color) -> Vec<(usize, i8)> {
|
||||
match color {
|
||||
Color::White => self
|
||||
.positions
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, count)| *count > 0)
|
||||
.map(|(i, count)| (i + 1, *count))
|
||||
.collect(),
|
||||
Color::Black => self
|
||||
.positions
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter(|&(_, count)| *count < 0)
|
||||
.rev()
|
||||
.map(|(i, count)| (i + 1, (0 - count)))
|
||||
.collect(),
|
||||
}
|
||||
}
|
||||
|
||||
// Get the corner field for the color
|
||||
|
|
@ -305,11 +431,166 @@ impl Board {
|
|||
}
|
||||
}
|
||||
|
||||
pub fn get_possible_moves(
|
||||
&self,
|
||||
color: Color,
|
||||
dice: u8,
|
||||
with_excedants: bool,
|
||||
check_rest_corner_exit: bool,
|
||||
forbid_exits: bool,
|
||||
) -> Vec<CheckerMove> {
|
||||
let mut moves = Vec::new();
|
||||
|
||||
let get_dest = |from| {
|
||||
if color == Color::White {
|
||||
if from + dice as i32 == 25 {
|
||||
0
|
||||
} else {
|
||||
from + dice as i32
|
||||
}
|
||||
} else {
|
||||
from - dice as i32
|
||||
}
|
||||
};
|
||||
|
||||
for (field, count) in self.get_color_fields(color) {
|
||||
// check rest corner exit
|
||||
if field == self.get_color_corner(&color) && count == 2 && check_rest_corner_exit {
|
||||
continue;
|
||||
}
|
||||
let mut dest = get_dest(field as i32);
|
||||
if dest == 0 && forbid_exits {
|
||||
continue;
|
||||
}
|
||||
if !(0..25).contains(&dest) {
|
||||
if with_excedants && !forbid_exits {
|
||||
dest = 0;
|
||||
} else {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
if let Ok(cmove) = CheckerMove::new(field, dest.unsigned_abs() as usize) {
|
||||
if let Ok(false) = self.blocked(&color, dest.unsigned_abs() as usize) {
|
||||
moves.push(cmove);
|
||||
}
|
||||
}
|
||||
}
|
||||
moves
|
||||
}
|
||||
|
||||
pub fn passage_possible(&self, color: &Color, cmove: &CheckerMove) -> bool {
|
||||
!self.passage_blocked(color, cmove.to).unwrap_or(true)
|
||||
}
|
||||
|
||||
pub fn move_possible(&self, color: &Color, cmove: &CheckerMove) -> bool {
|
||||
let blocked = self.blocked(color, cmove.to).unwrap_or(true);
|
||||
// Check if there is a player's checker on the 'from' square
|
||||
let has_checker = self.get_checkers_color(cmove.from).unwrap_or(None) == Some(color);
|
||||
has_checker && !blocked
|
||||
(has_checker && !blocked) || cmove == &EMPTY_MOVE
|
||||
}
|
||||
|
||||
/// Return if there is a quarter filled by the color
|
||||
pub fn any_quarter_filled(&self, color: Color) -> bool {
|
||||
[1, 7, 13, 19]
|
||||
.iter()
|
||||
.any(|field| self.is_quarter_filled(color, *field))
|
||||
}
|
||||
|
||||
/// Return if the quarter containing `field` is filled by the `color`
|
||||
pub fn is_quarter_filled(&self, color: Color, field: Field) -> bool {
|
||||
let fields = self.get_quarter_fields(field);
|
||||
!fields.iter().any(|field| {
|
||||
if color == Color::White {
|
||||
self.positions[field - 1] < 2
|
||||
} else {
|
||||
self.positions[field - 1] > -2
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
pub fn get_quarter_filling_candidate(&self, color: Color) -> Vec<Field> {
|
||||
let mut missing = vec![];
|
||||
// first quarter
|
||||
for quarter in [1..7, 7..13, 13..19, 19..25] {
|
||||
missing = vec![];
|
||||
for field in quarter {
|
||||
let field_count = if color == Color::Black {
|
||||
0 - self.positions[field - 1]
|
||||
} else {
|
||||
self.positions[field - 1]
|
||||
};
|
||||
if field_count < 0 {
|
||||
// opponent checker found : this quarter cannot be filled
|
||||
missing = vec![];
|
||||
continue;
|
||||
}
|
||||
if field_count == 0 {
|
||||
missing.push(field);
|
||||
missing.push(field);
|
||||
} else if field_count == 1 {
|
||||
missing.push(field);
|
||||
}
|
||||
}
|
||||
if missing.len() < 3 {
|
||||
// fillable quarter found (no more than two missing checkers)
|
||||
if let Some(field) = missing.first() {
|
||||
// We check that there are sufficient checkers left to fill the quarter
|
||||
if !self.is_quarter_fillable(color, *field) {
|
||||
missing = vec![];
|
||||
}
|
||||
}
|
||||
// there will be no other fillable quarter
|
||||
break;
|
||||
}
|
||||
}
|
||||
missing
|
||||
}
|
||||
|
||||
/// Returns whether the `color` player can still fill the quarter containing the `field`
|
||||
/// * `color` - color of the player
|
||||
/// * `field` - field belonging to the quarter
|
||||
pub fn is_quarter_fillable(&self, color: Color, field: Field) -> bool {
|
||||
let fields = self.get_quarter_fields(field);
|
||||
|
||||
// opponent rest corner
|
||||
if color == Color::White && fields.contains(&13)
|
||||
|| color == Color::Black && fields.contains(&12)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// is there a sufficient number of checkers on or before each fields ?
|
||||
for field in fields {
|
||||
// Number of checkers needed before this field (included) :
|
||||
// 2 checkers by field, from the begining of the quarter
|
||||
let mut field_pos = field % 6;
|
||||
if field_pos == 0 {
|
||||
field_pos = 6;
|
||||
}
|
||||
if color == Color::Black {
|
||||
field_pos = 7 - field_pos;
|
||||
}
|
||||
let needed = 2 * field_pos;
|
||||
|
||||
let (from, to) = if color == Color::White {
|
||||
(1, field)
|
||||
} else {
|
||||
(field, 24)
|
||||
};
|
||||
if self.count_checkers(color, from, to) < needed as u8 {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
/// Returns the 6 fields of the quarter containing the `field`
|
||||
fn get_quarter_fields(&self, field: Field) -> [Field; 6] {
|
||||
if field == 0 {
|
||||
return [0; 6];
|
||||
}
|
||||
let min = 1 + ((field - 1) / 6) * 6;
|
||||
core::array::from_fn(|i| i + min)
|
||||
}
|
||||
|
||||
pub fn move_checker(&mut self, color: &Color, cmove: CheckerMove) -> Result<(), Error> {
|
||||
|
|
@ -319,38 +600,47 @@ impl Board {
|
|||
}
|
||||
|
||||
pub fn remove_checker(&mut self, color: &Color, field: Field) -> Result<(), Error> {
|
||||
if field == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
let checker_color = self.get_checkers_color(field)?;
|
||||
if Some(color) != checker_color {
|
||||
println!("field invalid : {color:?}, {field:?}, {self:?}");
|
||||
return Err(Error::FieldInvalid);
|
||||
}
|
||||
self.positions[field - 1] -= 1;
|
||||
let unit = match color {
|
||||
Color::White => 1,
|
||||
Color::Black => -1,
|
||||
};
|
||||
self.positions[field - 1] -= unit;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
pub fn add_checker(&mut self, color: &Color, field: Field) -> Result<(), Error> {
|
||||
let checker_color = self.get_checkers_color(field)?;
|
||||
// error if the case contains the other color
|
||||
if None != checker_color && Some(color) != checker_color {
|
||||
return Err(Error::FieldInvalid);
|
||||
// Sortie
|
||||
if field == 0 {
|
||||
return Ok(());
|
||||
}
|
||||
self.positions[field - 1] += 1;
|
||||
|
||||
// let checker_color = self.get_checkers_color(field)?;
|
||||
let (count, checker_color) = self.get_field_checkers(field)?;
|
||||
// error if the case contains the other color
|
||||
if checker_color.is_some() && Some(color) != checker_color {
|
||||
return if count > 1 {
|
||||
Err(Error::FieldBlocked)
|
||||
} else {
|
||||
Err(Error::FieldBlockedByOne)
|
||||
};
|
||||
}
|
||||
let unit = match color {
|
||||
Color::White => 1,
|
||||
Color::Black => -1,
|
||||
};
|
||||
self.positions[field - 1] += unit;
|
||||
Ok(())
|
||||
}
|
||||
}
|
||||
|
||||
/// Trait to move checkers
|
||||
pub trait Move {
|
||||
/// Move a checker
|
||||
fn move_checker(&mut self, player: &Player, dice: u8, from: Field) -> Result<&mut Self, Error>
|
||||
where
|
||||
Self: Sized;
|
||||
|
||||
/// Move permitted
|
||||
fn move_permitted(&mut self, player: &Player, dice: u8) -> Result<&mut Self, Error>
|
||||
where
|
||||
Self: Sized;
|
||||
}
|
||||
|
||||
// Unit Tests
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
|
|
@ -364,7 +654,7 @@ mod tests {
|
|||
#[test]
|
||||
fn blocked_outofrange() -> Result<(), Error> {
|
||||
let board = Board::new();
|
||||
assert!(!board.blocked(&Color::White, 0).is_err());
|
||||
assert!(board.blocked(&Color::White, 0).is_ok());
|
||||
assert!(board.blocked(&Color::White, 28).is_err());
|
||||
Ok(())
|
||||
}
|
||||
|
|
@ -404,7 +694,95 @@ mod tests {
|
|||
#[test]
|
||||
fn set_wrong_amount1() {
|
||||
let mut board = Board::new();
|
||||
let player = Player::new("".into(), Color::White);
|
||||
assert!(board.set(&Color::White, 23, -3).is_err());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn move_possible() {
|
||||
let board = Board::new();
|
||||
assert!(board.move_possible(&Color::White, &EMPTY_MOVE));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_color_fields() {
|
||||
let board = Board::new();
|
||||
assert_eq!(board.get_color_fields(Color::White), vec![(1, 15)]);
|
||||
assert_eq!(board.get_color_fields(Color::Black), vec![(24, 15)]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn is_quarter_fillable() {
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
15, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -15,
|
||||
],
|
||||
);
|
||||
assert!(board.is_quarter_fillable(Color::Black, 1));
|
||||
assert!(!board.is_quarter_fillable(Color::Black, 12));
|
||||
assert!(board.is_quarter_fillable(Color::Black, 13));
|
||||
assert!(board.is_quarter_fillable(Color::Black, 24));
|
||||
assert!(board.is_quarter_fillable(Color::White, 1));
|
||||
assert!(board.is_quarter_fillable(Color::White, 12));
|
||||
assert!(!board.is_quarter_fillable(Color::White, 13));
|
||||
assert!(board.is_quarter_fillable(Color::White, 24));
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
5, 0, 0, 0, 0, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -8, 0, 0, 0, 0, 0, -5,
|
||||
],
|
||||
);
|
||||
assert!(board.is_quarter_fillable(Color::Black, 13));
|
||||
assert!(!board.is_quarter_fillable(Color::Black, 24));
|
||||
assert!(!board.is_quarter_fillable(Color::White, 1));
|
||||
assert!(board.is_quarter_fillable(Color::White, 12));
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 5, 0, 0, 0, 0, 0, 0, -12, 0, 0, 0, 0, 1, 0,
|
||||
],
|
||||
);
|
||||
assert!(board.is_quarter_fillable(Color::Black, 16));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_quarter_filling_candidate() {
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
assert_eq!(vec![2], board.get_quarter_filling_candidate(Color::White));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_checker_field() {
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
assert_eq!(None, board.get_checker_field(&Color::White, 0));
|
||||
assert_eq!(Some(3), board.get_checker_field(&Color::White, 5));
|
||||
assert_eq!(Some(3), board.get_checker_field(&Color::White, 6));
|
||||
assert_eq!(None, board.get_checker_field(&Color::White, 14));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_field_checker() {
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
assert_eq!(4, board.get_field_checker(&Color::White, 2));
|
||||
assert_eq!(6, board.get_field_checker(&Color::White, 3));
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,34 +1,56 @@
|
|||
use crate::Error;
|
||||
use rand::distributions::{Distribution, Uniform};
|
||||
use rand::{rngs::StdRng, SeedableRng};
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// Represents the two dices
|
||||
#[derive(Debug)]
|
||||
pub struct DiceRoller {
|
||||
rng: StdRng,
|
||||
}
|
||||
|
||||
impl Default for DiceRoller {
|
||||
fn default() -> Self {
|
||||
Self::new(None)
|
||||
}
|
||||
}
|
||||
|
||||
impl DiceRoller {
|
||||
pub fn new(opt_seed: Option<u64>) -> Self {
|
||||
Self {
|
||||
rng: match opt_seed {
|
||||
None => StdRng::from_rng(rand::thread_rng()).unwrap(),
|
||||
Some(seed) => SeedableRng::seed_from_u64(seed),
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Roll the dices which generates two random numbers between 1 and 6, replicating a perfect
|
||||
/// dice. We use the operating system's random number generator.
|
||||
pub fn roll(&mut self) -> Dice {
|
||||
let between = Uniform::new_inclusive(1, 6);
|
||||
|
||||
let v = (between.sample(&mut self.rng), between.sample(&mut self.rng));
|
||||
|
||||
Dice { values: (v.0, v.1) }
|
||||
}
|
||||
|
||||
// Heads or tails
|
||||
// pub fn coin(self) -> bool {
|
||||
// let between = Uniform::new_inclusive(1, 2);
|
||||
// let mut rng = rand::thread_rng();
|
||||
// between.sample(&mut rng) == 1
|
||||
// }
|
||||
}
|
||||
|
||||
/// Represents the two dice
|
||||
///
|
||||
/// Trictrac is always played with two dices.
|
||||
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Deserialize, Default)]
|
||||
pub struct Dices {
|
||||
/// Trictrac is always played with two dice.
|
||||
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, Deserialize, Default)]
|
||||
pub struct Dice {
|
||||
/// The two dice values
|
||||
pub values: (u8, u8),
|
||||
}
|
||||
impl Dices {
|
||||
/// Roll the dices which generates two random numbers between 1 and 6, replicating a perfect
|
||||
/// dice. We use the operating system's random number generator.
|
||||
pub fn roll(self) -> Self {
|
||||
let between = Uniform::new_inclusive(1, 6);
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let v = (between.sample(&mut rng), between.sample(&mut rng));
|
||||
|
||||
Dices { values: (v.0, v.1) }
|
||||
}
|
||||
|
||||
/// Heads or tails
|
||||
pub fn coin(self) -> bool {
|
||||
let between = Uniform::new_inclusive(1, 2);
|
||||
let mut rng = rand::thread_rng();
|
||||
between.sample(&mut rng) == 1
|
||||
}
|
||||
|
||||
impl Dice {
|
||||
pub fn to_bits_string(self) -> String {
|
||||
format!("{:0>3b}{:0>3b}", self.values.0, self.values.1)
|
||||
}
|
||||
|
|
@ -37,6 +59,10 @@ impl Dices {
|
|||
format!("{} & {}", self.values.0, self.values.1)
|
||||
}
|
||||
|
||||
pub fn is_double(&self) -> bool {
|
||||
self.values.0 == self.values.1
|
||||
}
|
||||
|
||||
// pub fn to_bits(self) -> [bool;6] {
|
||||
// self.to_bits_string().into_bytes().iter().map(|strbit| *strbit == '1' as u8).collect()
|
||||
// }
|
||||
|
|
@ -49,26 +75,32 @@ impl Dices {
|
|||
// }
|
||||
}
|
||||
|
||||
/// Trait to roll the dices
|
||||
pub trait Roll {
|
||||
/// Roll the dices
|
||||
fn roll(&mut self) -> &mut Self;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_roll() {
|
||||
let dices = Dices::default().roll();
|
||||
assert!(dices.values.0 >= 1 && dices.values.0 <= 6);
|
||||
assert!(dices.values.1 >= 1 && dices.values.1 <= 6);
|
||||
let dice = DiceRoller::default().roll();
|
||||
assert!(dice.values.0 >= 1 && dice.values.0 <= 6);
|
||||
assert!(dice.values.1 >= 1 && dice.values.1 <= 6);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_seed() {
|
||||
let seed = Some(123);
|
||||
let dice1 = DiceRoller::new(seed).roll();
|
||||
let dice2 = DiceRoller::new(seed).roll();
|
||||
let dice3 = DiceRoller::new(seed).roll();
|
||||
assert!(dice1.values.0 == dice2.values.0);
|
||||
assert!(dice1.values.0 == dice3.values.0);
|
||||
assert!(dice1.values.1 == dice2.values.1);
|
||||
assert!(dice1.values.1 == dice3.values.1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_to_bits_string() {
|
||||
let dices = Dices { values: (4, 2) };
|
||||
assert!(dices.to_bits_string() == "100010");
|
||||
let dice = Dice { values: (4, 2) };
|
||||
assert!(dice.to_bits_string() == "100010");
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -14,6 +14,8 @@ pub enum Error {
|
|||
PlayerInvalid,
|
||||
/// Field blocked
|
||||
FieldBlocked,
|
||||
/// Field blocked
|
||||
FieldBlockedByOne,
|
||||
/// Invalid field
|
||||
FieldInvalid,
|
||||
/// Not your turn
|
||||
|
|
@ -40,6 +42,7 @@ impl fmt::Display for Error {
|
|||
Error::PlayerInvalid => write!(f, "Invalid player"),
|
||||
Error::DoublingNotPermitted => write!(f, "Doubling not permitted"),
|
||||
Error::FieldBlocked => write!(f, "Field blocked"),
|
||||
Error::FieldBlockedByOne => write!(f, "Field blocked by one opponent"),
|
||||
Error::FieldInvalid => write!(f, "Invalid field"),
|
||||
Error::NotYourTurn => write!(f, "Not your turn"),
|
||||
Error::MoveInvalid => write!(f, "Invalid move"),
|
||||
|
|
|
|||
|
|
@ -1,10 +1,10 @@
|
|||
//! # Play a TricTrac Game
|
||||
use crate::board::{Board, CheckerMove, Field, Move};
|
||||
use crate::dice::{Dices, Roll};
|
||||
use crate::board::{Board, CheckerMove};
|
||||
use crate::dice::Dice;
|
||||
use crate::game_rules_moves::MoveRules;
|
||||
use crate::game_rules_points::{PointsRules, PossibleJans};
|
||||
use crate::player::{Color, Player, PlayerId};
|
||||
use crate::Error;
|
||||
use log::{error, info};
|
||||
use std::cmp;
|
||||
use log::{debug, error};
|
||||
|
||||
// use itertools::Itertools;
|
||||
use serde::{Deserialize, Serialize};
|
||||
|
|
@ -25,12 +25,42 @@ pub enum Stage {
|
|||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, Serialize, Deserialize)]
|
||||
pub enum TurnStage {
|
||||
RollDice,
|
||||
RollWaiting,
|
||||
MarkPoints,
|
||||
HoldOrGoChoice,
|
||||
Move,
|
||||
MarkAdvPoints,
|
||||
}
|
||||
|
||||
impl From<u8> for TurnStage {
|
||||
fn from(item: u8) -> Self {
|
||||
match item {
|
||||
0 => TurnStage::RollWaiting,
|
||||
1 => TurnStage::RollDice,
|
||||
2 => TurnStage::MarkPoints,
|
||||
3 => TurnStage::HoldOrGoChoice,
|
||||
4 => TurnStage::Move,
|
||||
5 => TurnStage::MarkAdvPoints,
|
||||
_ => TurnStage::RollWaiting,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<TurnStage> for u8 {
|
||||
fn from(stage: TurnStage) -> u8 {
|
||||
match stage {
|
||||
TurnStage::RollWaiting => 0,
|
||||
TurnStage::RollDice => 1,
|
||||
TurnStage::MarkPoints => 2,
|
||||
TurnStage::HoldOrGoChoice => 3,
|
||||
TurnStage::Move => 4,
|
||||
TurnStage::MarkAdvPoints => 5,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a TricTrac game
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct GameState {
|
||||
pub stage: Stage,
|
||||
pub turn_stage: TurnStage,
|
||||
|
|
@ -39,19 +69,30 @@ pub struct GameState {
|
|||
pub players: HashMap<PlayerId, Player>,
|
||||
pub history: Vec<GameEvent>,
|
||||
/// last dice pair rolled
|
||||
pub dices: Dices,
|
||||
pub dice: Dice,
|
||||
/// players points computed for the last dice pair rolled
|
||||
pub dice_points: (u8, u8),
|
||||
pub dice_moves: (CheckerMove, CheckerMove),
|
||||
pub dice_jans: PossibleJans,
|
||||
/// true if player needs to roll first
|
||||
roll_first: bool,
|
||||
// NOTE: add to a Setting struct if other fields needed
|
||||
pub schools_enabled: bool,
|
||||
}
|
||||
|
||||
// implement Display trait
|
||||
impl fmt::Display for GameState {
|
||||
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
|
||||
let mut s = String::new();
|
||||
s.push_str(&format!("Dices: {:?}\n", self.dices));
|
||||
s.push_str(&format!(
|
||||
"Stage: {:?} / {:?}\n",
|
||||
self.stage, self.turn_stage
|
||||
));
|
||||
s.push_str(&format!("Dice: {:?}\n", self.dice));
|
||||
// s.push_str(&format!("Who plays: {}\n", self.who_plays().map(|player| &player.name ).unwrap_or("")));
|
||||
s.push_str(&format!("Board: {:?}\n", self.board));
|
||||
write!(f, "{}", s)
|
||||
// s.push_str(&format!("History: {:?}\n", self.history));
|
||||
write!(f, "{s}")
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -64,22 +105,113 @@ impl Default for GameState {
|
|||
active_player_id: 0,
|
||||
players: HashMap::new(),
|
||||
history: Vec::new(),
|
||||
dices: Dices::default(),
|
||||
dice: Dice::default(),
|
||||
dice_points: (0, 0),
|
||||
dice_moves: (CheckerMove::default(), CheckerMove::default()),
|
||||
dice_jans: PossibleJans::default(),
|
||||
roll_first: true,
|
||||
schools_enabled: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl GameState {
|
||||
/// Create a new default game
|
||||
pub fn new() -> Self {
|
||||
GameState::default()
|
||||
pub fn new(schools_enabled: bool) -> Self {
|
||||
let mut gs = GameState::default();
|
||||
gs.set_schools_enabled(schools_enabled);
|
||||
gs
|
||||
}
|
||||
|
||||
pub fn new_with_players(p1_name: &str, p2_name: &str) -> Self {
|
||||
let mut game = Self::default();
|
||||
if let Some(p1) = game.init_player(p1_name) {
|
||||
game.init_player(p2_name);
|
||||
game.consume(&GameEvent::BeginGame { goes_first: p1 });
|
||||
}
|
||||
game
|
||||
}
|
||||
|
||||
fn set_schools_enabled(&mut self, schools_enabled: bool) {
|
||||
self.schools_enabled = schools_enabled;
|
||||
}
|
||||
|
||||
fn get_active_player(&self) -> Option<&Player> {
|
||||
self.players.get(&self.active_player_id)
|
||||
}
|
||||
|
||||
fn get_opponent_id(&self) -> Option<PlayerId> {
|
||||
self.players
|
||||
.keys()
|
||||
.copied()
|
||||
.filter(|k| k != &self.active_player_id)
|
||||
.collect::<Vec<PlayerId>>()
|
||||
.first()
|
||||
.copied()
|
||||
}
|
||||
|
||||
// -------------------------------------------------------------------------
|
||||
// accessors
|
||||
// -------------------------------------------------------------------------
|
||||
|
||||
pub fn to_vec_float(&self) -> Vec<f32> {
|
||||
self.to_vec().iter().map(|&x| x as f32).collect()
|
||||
}
|
||||
|
||||
/// Get state as a vector (to be used for bot training input) :
|
||||
/// length = 36
|
||||
/// i8 for board positions with negative values for blacks
|
||||
pub fn to_vec(&self) -> Vec<i8> {
|
||||
let state_len = 36;
|
||||
let mut state = Vec::with_capacity(state_len);
|
||||
|
||||
// length = 24
|
||||
state.extend(self.board.to_vec());
|
||||
|
||||
// active player -> length = 1
|
||||
// white : 0 (false)
|
||||
// black : 1 (true)
|
||||
state.push(
|
||||
self.who_plays()
|
||||
.map(|player| if player.color == Color::Black { 1 } else { 0 })
|
||||
.unwrap_or(0), // White by default
|
||||
);
|
||||
|
||||
// step -> length = 1
|
||||
let turn_stage: u8 = self.turn_stage.into();
|
||||
state.push(turn_stage as i8);
|
||||
|
||||
// dice roll -> length = 2
|
||||
state.push(self.dice.values.0 as i8);
|
||||
state.push(self.dice.values.1 as i8);
|
||||
|
||||
// points, trous, bredouille, grande bredouille length=4 x2 joueurs = 8
|
||||
let white_player: Vec<i8> = self
|
||||
.get_white_player()
|
||||
.unwrap()
|
||||
.to_vec()
|
||||
.iter()
|
||||
.map(|&x| x as i8)
|
||||
.collect();
|
||||
state.extend(white_player);
|
||||
let black_player: Vec<i8> = self
|
||||
.get_black_player()
|
||||
.unwrap()
|
||||
.to_vec()
|
||||
.iter()
|
||||
.map(|&x| x as i8)
|
||||
.collect();
|
||||
// .iter().map(|&x| x as i8) .collect()
|
||||
state.extend(black_player);
|
||||
|
||||
// ensure state has length state_len
|
||||
state.truncate(state_len);
|
||||
while state.len() < state_len {
|
||||
state.push(0);
|
||||
}
|
||||
state
|
||||
}
|
||||
|
||||
/// Calculate game state id :
|
||||
pub fn to_string_id(&self) -> String {
|
||||
// Pieces placement -> 77 bits (24 + 23 + 30 max)
|
||||
|
|
@ -100,16 +232,19 @@ impl GameState {
|
|||
.unwrap_or('0'), // White by default
|
||||
);
|
||||
|
||||
// step -> 2 bits
|
||||
// step -> 3 bits
|
||||
let step_bits = match self.turn_stage {
|
||||
TurnStage::RollDice => "01",
|
||||
TurnStage::MarkPoints => "01",
|
||||
TurnStage::Move => "10",
|
||||
TurnStage::RollWaiting => "000",
|
||||
TurnStage::RollDice => "001",
|
||||
TurnStage::MarkPoints => "010",
|
||||
TurnStage::HoldOrGoChoice => "011",
|
||||
TurnStage::Move => "100",
|
||||
TurnStage::MarkAdvPoints => "101",
|
||||
};
|
||||
pos_bits.push_str(step_bits);
|
||||
|
||||
// dice roll -> 6 bits
|
||||
let dice_bits = self.dices.to_bits_string();
|
||||
let dice_bits = self.dice.to_bits_string();
|
||||
pos_bits.push_str(&dice_bits);
|
||||
|
||||
// points 10bits x2 joueurs = 20bits
|
||||
|
|
@ -118,7 +253,7 @@ impl GameState {
|
|||
pos_bits.push_str(&white_bits);
|
||||
pos_bits.push_str(&black_bits);
|
||||
|
||||
pos_bits = format!("{:0>108}", pos_bits);
|
||||
pos_bits = format!("{pos_bits:0>108}");
|
||||
// println!("{}", pos_bits);
|
||||
let pos_u8 = pos_bits
|
||||
.as_bytes()
|
||||
|
|
@ -130,7 +265,7 @@ impl GameState {
|
|||
}
|
||||
|
||||
pub fn who_plays(&self) -> Option<&Player> {
|
||||
self.players.get(&self.active_player_id)
|
||||
self.get_active_player()
|
||||
}
|
||||
|
||||
pub fn get_white_player(&self) -> Option<&Player> {
|
||||
|
|
@ -165,6 +300,14 @@ impl GameState {
|
|||
.next()
|
||||
}
|
||||
|
||||
pub fn player_color_by_id(&self, player_id: &PlayerId) -> Option<Color> {
|
||||
self.players
|
||||
.iter()
|
||||
.filter(|(id, _)| *id == player_id)
|
||||
.map(|(_, player)| player.color)
|
||||
.next()
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------
|
||||
// Rules checks
|
||||
// ----------------------------------------------------------------------------------
|
||||
|
|
@ -184,15 +327,14 @@ impl GameState {
|
|||
return false;
|
||||
}
|
||||
}
|
||||
EndGame { reason } => match reason {
|
||||
EndGameReason::PlayerWon { winner: _ } => {
|
||||
EndGame { reason } => {
|
||||
if let EndGameReason::PlayerWon { winner: _ } = reason {
|
||||
// Check that the game has started before someone wins it
|
||||
if self.stage != Stage::InGame {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
_ => {}
|
||||
},
|
||||
}
|
||||
PlayerJoined { player_id, name: _ } => {
|
||||
// Check that there isn't another player with the same id
|
||||
if self.players.contains_key(player_id) {
|
||||
|
|
@ -214,8 +356,13 @@ impl GameState {
|
|||
if self.active_player_id != *player_id {
|
||||
return false;
|
||||
}
|
||||
// Check the turn stage
|
||||
if self.turn_stage != TurnStage::RollDice {
|
||||
error!("bad stage {:?}", self.turn_stage);
|
||||
return false;
|
||||
}
|
||||
Mark { player_id, points } => {
|
||||
}
|
||||
RollResult { player_id, dice: _ } => {
|
||||
// Check player exists
|
||||
if !self.players.contains_key(player_id) {
|
||||
return false;
|
||||
|
|
@ -224,11 +371,39 @@ impl GameState {
|
|||
if self.active_player_id != *player_id {
|
||||
return false;
|
||||
}
|
||||
// Check the turn stage
|
||||
if self.turn_stage != TurnStage::RollWaiting {
|
||||
error!("bad stage {:?}", self.turn_stage);
|
||||
return false;
|
||||
}
|
||||
Move { player_id, moves } => {
|
||||
}
|
||||
Mark {
|
||||
player_id,
|
||||
points: _,
|
||||
} => {
|
||||
// Check player exists
|
||||
if !self.players.contains_key(player_id) {
|
||||
error!("Player {} unknown", player_id);
|
||||
return false;
|
||||
}
|
||||
// Check player is currently the one making their move
|
||||
if self.active_player_id != *player_id {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check points are correct
|
||||
// let (board, moves) = if *color == Color::Black {
|
||||
// (board.mirror(), (moves.0.mirror(), moves.1.mirror()))
|
||||
// } else {
|
||||
// (board.clone(), *moves)
|
||||
// };
|
||||
// let rules_points: u8 = self.get_points().iter().map(|r| r.0).sum();
|
||||
// if rules_points != *points {
|
||||
// return false;
|
||||
// }
|
||||
}
|
||||
Go { player_id } => {
|
||||
if !self.players.contains_key(player_id) {
|
||||
error!("Player {player_id} unknown");
|
||||
return false;
|
||||
}
|
||||
// Check player is currently the one making their move
|
||||
|
|
@ -236,22 +411,51 @@ impl GameState {
|
|||
error!("Player not active : {}", self.active_player_id);
|
||||
return false;
|
||||
}
|
||||
// Check the player can leave (ie the game is in the KeepOrLeaveChoice stage)
|
||||
if self.turn_stage != TurnStage::HoldOrGoChoice {
|
||||
error!("bad stage {:?}", self.turn_stage);
|
||||
error!(
|
||||
"black player points : {:?}",
|
||||
self.get_black_player()
|
||||
.map(|player| (player.points, player.holes))
|
||||
);
|
||||
// error!("history {:?}", self.history);
|
||||
return false;
|
||||
}
|
||||
}
|
||||
Move { player_id, moves } => {
|
||||
// Check player exists
|
||||
if !self.players.contains_key(player_id) {
|
||||
error!("Player {player_id} unknown");
|
||||
return false;
|
||||
}
|
||||
// Check player is currently the one making their move
|
||||
if self.active_player_id != *player_id {
|
||||
error!("Player not active : {}", self.active_player_id);
|
||||
return false;
|
||||
}
|
||||
// Check the turn stage
|
||||
if self.turn_stage != TurnStage::Move
|
||||
&& self.turn_stage != TurnStage::HoldOrGoChoice
|
||||
{
|
||||
error!("bad stage {:?}", self.turn_stage);
|
||||
return false;
|
||||
}
|
||||
let color = &self.players[player_id].color;
|
||||
|
||||
// Check moves possibles on the board
|
||||
if !self.moves_possible(color, moves) {
|
||||
let rules = MoveRules::new(color, &self.board, self.dice);
|
||||
let moves = if *color == Color::Black {
|
||||
(moves.0.mirror(), moves.1.mirror())
|
||||
} else {
|
||||
*moves
|
||||
};
|
||||
if !rules.moves_follow_rules(&moves) {
|
||||
error!("rules not followed ");
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check moves conforms to the dices
|
||||
if !self.moves_follows_dices(color, moves) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Check move is allowed by the rules (to desactivate when playing with schools)
|
||||
if !self.moves_allowed(color, moves) {
|
||||
return false;
|
||||
}
|
||||
PlayError => {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -259,77 +463,6 @@ impl GameState {
|
|||
true
|
||||
}
|
||||
|
||||
fn moves_possible(&self, color: &Color, moves: &(CheckerMove, CheckerMove)) -> bool {
|
||||
// Check move is physically possible
|
||||
if !self.board.move_possible(color, &moves.0) {
|
||||
return false;
|
||||
}
|
||||
|
||||
// Chained_move : "Tout d'une"
|
||||
let chained_move = moves.0.chain(moves.1);
|
||||
if chained_move.is_ok() {
|
||||
if !self.board.move_possible(color, &chained_move.unwrap()) {
|
||||
return false;
|
||||
}
|
||||
} else if !self.board.move_possible(color, &moves.1) {
|
||||
return false;
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
fn moves_follows_dices(&self, color: &Color, moves: &(CheckerMove, CheckerMove)) -> bool {
|
||||
let (dice1, dice2) = self.dices.values;
|
||||
let (move1, move2): &(CheckerMove, CheckerMove) = moves.into();
|
||||
let dist1 = (move1.get_to() - move1.get_from()) as u8;
|
||||
let dist2 = (move2.get_to() - move2.get_from()) as u8;
|
||||
print!("{}, {}, {}, {}", dist1, dist2, dice1, dice2);
|
||||
// basic : same number
|
||||
if cmp::min(dist1, dist2) != cmp::min(dice1, dice2)
|
||||
|| cmp::max(dist1, dist2) != cmp::max(dice1, dice2)
|
||||
{
|
||||
return false;
|
||||
}
|
||||
// prise de coin par puissance
|
||||
// sorties
|
||||
// no rule was broken
|
||||
true
|
||||
}
|
||||
|
||||
fn moves_allowed(&self, color: &Color, moves: &(CheckerMove, CheckerMove)) -> bool {
|
||||
// ------- corner rules ----------
|
||||
let corner_field: Field = self.board.get_color_corner(color);
|
||||
let (corner_count, _color) = self.board.get_field_checkers(corner_field).unwrap();
|
||||
let (from0, to0, from1, to1) = (
|
||||
moves.0.get_from(),
|
||||
moves.0.get_to(),
|
||||
moves.1.get_from(),
|
||||
moves.1.get_to(),
|
||||
);
|
||||
// 2 checkers must go at the same time on an empty corner
|
||||
if (to0 == corner_field || to1 == corner_field) && (to0 != to1) && corner_count == 0 {
|
||||
return false;
|
||||
}
|
||||
|
||||
// the lat 2 checkers of a corner must leave at the same time
|
||||
if (from0 == corner_field || from1 == corner_field) && (from0 != from1) && corner_count == 2
|
||||
{
|
||||
return false;
|
||||
}
|
||||
|
||||
// ------- exit rules ----------
|
||||
// -- toutes les dames doivent être dans le jan de retour
|
||||
// -- si on peut sortir, on doit sortir
|
||||
// -- priorité :
|
||||
// - dame se trouvant sur la flêche correspondant au dé
|
||||
// - dame se trouvant plus loin de la sortie que la flêche (point défaillant)
|
||||
// - dame se trouvant plus près que la flêche (point exédant)
|
||||
|
||||
// --- cadran rempli si possible ----
|
||||
// --- interdit de jouer dans cadran que l'adversaire peut encore remplir ----
|
||||
// no rule was broken
|
||||
true
|
||||
}
|
||||
|
||||
// ----------------------------------------------------------------------------------
|
||||
// State updates
|
||||
// ----------------------------------------------------------------------------------
|
||||
|
|
@ -341,7 +474,6 @@ impl GameState {
|
|||
}
|
||||
|
||||
let player_id = self.players.len() + 1;
|
||||
println!("player_id {}", player_id);
|
||||
let color = if player_id == 1 {
|
||||
Color::White
|
||||
} else {
|
||||
|
|
@ -352,6 +484,7 @@ impl GameState {
|
|||
Some(player_id as PlayerId)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
fn add_player(&mut self, player_id: PlayerId, player: Player) {
|
||||
self.players.insert(player_id, player);
|
||||
}
|
||||
|
|
@ -372,12 +505,24 @@ impl GameState {
|
|||
match valid_event {
|
||||
BeginGame { goes_first } => {
|
||||
self.active_player_id = *goes_first;
|
||||
// if self.who_plays().is_none() {
|
||||
// let active_color = match self.dice.coin() {
|
||||
// false => Color::Black,
|
||||
// true => Color::White,
|
||||
// };
|
||||
// let color_player_id = self.player_id_by_color(active_color);
|
||||
// if color_player_id.is_some() {
|
||||
// self.active_player_id = *color_player_id.unwrap();
|
||||
// }
|
||||
// }
|
||||
self.stage = Stage::InGame;
|
||||
self.turn_stage = TurnStage::RollDice;
|
||||
}
|
||||
EndGame { reason: _ } => self.stage = Stage::Ended,
|
||||
EndGame { reason: _ } => {
|
||||
self.stage = Stage::Ended;
|
||||
}
|
||||
PlayerJoined { player_id, name } => {
|
||||
let color = if self.players.len() > 0 {
|
||||
let color = if !self.players.is_empty() {
|
||||
Color::White
|
||||
} else {
|
||||
Color::Black
|
||||
|
|
@ -391,6 +536,7 @@ impl GameState {
|
|||
points: 0,
|
||||
can_bredouille: true,
|
||||
can_big_bredouille: true,
|
||||
dice_roll_count: 0,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
|
@ -398,52 +544,186 @@ impl GameState {
|
|||
self.players.remove(player_id);
|
||||
}
|
||||
Roll { player_id: _ } => {
|
||||
self.roll();
|
||||
self.turn_stage = TurnStage::MarkPoints;
|
||||
self.turn_stage = TurnStage::RollWaiting;
|
||||
}
|
||||
Mark { player_id, points } => {
|
||||
self.mark_points(*player_id, *points);
|
||||
if self.stage != Stage::Ended {
|
||||
RollResult { player_id: _, dice } => {
|
||||
self.dice = *dice;
|
||||
self.inc_roll_count(self.active_player_id);
|
||||
self.turn_stage = TurnStage::MarkPoints;
|
||||
(self.dice_jans, self.dice_points) = self.get_rollresult_jans(dice);
|
||||
debug!("points from result : {:?}", self.dice_points);
|
||||
if !self.schools_enabled {
|
||||
// Schools are not enabled. We mark points automatically
|
||||
// the points earned by the opponent will be marked on its turn
|
||||
let new_hole = self.mark_points(self.active_player_id, self.dice_points.0);
|
||||
if new_hole {
|
||||
let holes_count = self.get_active_player().unwrap().holes;
|
||||
debug!("new hole -> {holes_count:?}");
|
||||
if holes_count > 12 {
|
||||
self.stage = Stage::Ended;
|
||||
} else {
|
||||
self.turn_stage = TurnStage::HoldOrGoChoice;
|
||||
}
|
||||
} else {
|
||||
self.turn_stage = TurnStage::Move;
|
||||
}
|
||||
}
|
||||
}
|
||||
Mark { player_id, points } => {
|
||||
if self.schools_enabled {
|
||||
let new_hole = self.mark_points(*player_id, *points);
|
||||
if new_hole {
|
||||
if self.get_active_player().unwrap().holes > 12 {
|
||||
self.stage = Stage::Ended;
|
||||
} else {
|
||||
self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints {
|
||||
TurnStage::RollDice
|
||||
} else {
|
||||
TurnStage::HoldOrGoChoice
|
||||
};
|
||||
}
|
||||
} else {
|
||||
self.turn_stage = if self.turn_stage == TurnStage::MarkAdvPoints {
|
||||
TurnStage::RollDice
|
||||
} else {
|
||||
TurnStage::Move
|
||||
};
|
||||
}
|
||||
}
|
||||
}
|
||||
Go { player_id: _ } => self.new_pick_up(),
|
||||
Move { player_id, moves } => {
|
||||
let player = self.players.get(player_id).unwrap();
|
||||
self.board.move_checker(&player.color, moves.0).unwrap();
|
||||
self.board.move_checker(&player.color, moves.1).unwrap();
|
||||
self.active_player_id = self
|
||||
.players
|
||||
.keys()
|
||||
.find(|id| *id != player_id)
|
||||
.unwrap()
|
||||
.clone();
|
||||
self.dice_moves = *moves;
|
||||
self.active_player_id = *self.players.keys().find(|id| *id != player_id).unwrap();
|
||||
self.turn_stage = if self.schools_enabled {
|
||||
TurnStage::MarkAdvPoints
|
||||
} else {
|
||||
// The player has moved, we can mark its opponent's points (which is now the current player)
|
||||
let new_hole = self.mark_points(self.active_player_id, self.dice_points.1);
|
||||
if new_hole && self.get_active_player().unwrap().holes > 12 {
|
||||
self.stage = Stage::Ended;
|
||||
}
|
||||
TurnStage::RollDice
|
||||
};
|
||||
}
|
||||
PlayError => {}
|
||||
}
|
||||
self.history.push(valid_event.clone());
|
||||
}
|
||||
|
||||
self.history.push(valid_event.clone());
|
||||
/// Set a new pick up ('relevé') after a player won a hole and choose to 'go',
|
||||
/// or after a player has bore off (took of his men off the board)
|
||||
fn new_pick_up(&mut self) {
|
||||
self.players.iter_mut().for_each(|(_id, p)| {
|
||||
// reset points
|
||||
p.points = 0;
|
||||
// reset dice_roll_count
|
||||
p.dice_roll_count = 0;
|
||||
// reset bredouille
|
||||
p.can_bredouille = true;
|
||||
// XXX : switch colors
|
||||
// désactivé pour le moment car la vérification des mouvements échoue, cf. https://code.rhumbs.fr/henri/trictrac/issues/31
|
||||
// p.color = p.color.opponent_color();
|
||||
});
|
||||
// joueur actif = joueur ayant sorti ses dames ou est parti (donc deux jeux successifs)
|
||||
self.turn_stage = TurnStage::RollDice;
|
||||
// reset board
|
||||
self.board = Board::new();
|
||||
}
|
||||
|
||||
fn get_rollresult_jans(&self, dice: &Dice) -> (PossibleJans, (u8, u8)) {
|
||||
let player = &self.players.get(&self.active_player_id).unwrap();
|
||||
debug!(
|
||||
"get rollresult for {:?} {:?} {:?} (roll count {:?})",
|
||||
player.color, self.board, dice, player.dice_roll_count
|
||||
);
|
||||
let points_rules = PointsRules::new(&player.color, &self.board, *dice);
|
||||
points_rules.get_result_jans(player.dice_roll_count)
|
||||
}
|
||||
|
||||
/// Determines if someone has won the game
|
||||
pub fn determine_winner(&self) -> Option<PlayerId> {
|
||||
None
|
||||
// A player has won if he has got 12 holes
|
||||
self.players
|
||||
.iter()
|
||||
.filter(|(_, p)| p.holes > 11)
|
||||
.map(|(id, _)| *id)
|
||||
.next()
|
||||
}
|
||||
|
||||
fn mark_points(&mut self, player_id: PlayerId, points: u8) {
|
||||
todo!()
|
||||
fn inc_roll_count(&mut self, player_id: PlayerId) {
|
||||
self.players.get_mut(&player_id).map(|p| {
|
||||
p.dice_roll_count = p.dice_roll_count.saturating_add(1);
|
||||
p
|
||||
});
|
||||
}
|
||||
|
||||
fn mark_points(&mut self, player_id: PlayerId, points: u8) -> bool {
|
||||
// Update player points and holes
|
||||
let mut new_hole = false;
|
||||
self.players.get_mut(&player_id).map(|p| {
|
||||
let sum_points = p.points + points;
|
||||
let jeux = sum_points / 12;
|
||||
let holes = match (jeux, p.can_bredouille) {
|
||||
(0, _) => 0,
|
||||
(_, false) => 2 * jeux - 1,
|
||||
(_, true) => 2 * jeux,
|
||||
};
|
||||
|
||||
new_hole = holes > 0;
|
||||
if new_hole {
|
||||
p.can_bredouille = true;
|
||||
}
|
||||
p.points = sum_points % 12;
|
||||
p.holes += holes;
|
||||
|
||||
// if points > 0 && p.holes > 15 {
|
||||
if points > 0 {
|
||||
debug!(
|
||||
"player {player_id:?} holes : {:?} (+{holes:?}) points : {:?} (+{points:?} - {jeux:?})",
|
||||
p.holes, p.points
|
||||
)
|
||||
}
|
||||
p
|
||||
});
|
||||
|
||||
// Opponent updates
|
||||
let maybe_op = if player_id == self.active_player_id {
|
||||
self.get_opponent_id()
|
||||
} else {
|
||||
Some(player_id)
|
||||
};
|
||||
if let Some(opp_id) = maybe_op {
|
||||
if points > 0 {
|
||||
self.players.get_mut(&opp_id).map(|opponent| {
|
||||
// Cancel opponent bredouille
|
||||
opponent.can_bredouille = false;
|
||||
// Reset opponent points if the player finished a hole
|
||||
if new_hole {
|
||||
opponent.points = 0;
|
||||
opponent.can_bredouille = true;
|
||||
}
|
||||
opponent
|
||||
});
|
||||
}
|
||||
}
|
||||
|
||||
new_hole
|
||||
}
|
||||
}
|
||||
|
||||
/// The reasons why a game could end
|
||||
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Deserialize)]
|
||||
#[derive(Debug, Clone, Copy, Serialize, PartialEq, Eq, Deserialize)]
|
||||
pub enum EndGameReason {
|
||||
// In tic tac toe it doesn't make sense to keep playing when one of the players disconnect.
|
||||
// Note that it might make sense to keep playing in some other game (like Team Fight Tactics for instance).
|
||||
PlayerLeft { player_id: PlayerId },
|
||||
PlayerWon { winner: PlayerId },
|
||||
}
|
||||
|
||||
/// An event that progresses the GameState forward
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Deserialize)]
|
||||
#[derive(Debug, Clone, Serialize, PartialEq, Eq, Deserialize)]
|
||||
pub enum GameEvent {
|
||||
BeginGame {
|
||||
goes_first: PlayerId,
|
||||
|
|
@ -461,80 +741,94 @@ pub enum GameEvent {
|
|||
Roll {
|
||||
player_id: PlayerId,
|
||||
},
|
||||
RollResult {
|
||||
player_id: PlayerId,
|
||||
dice: Dice,
|
||||
},
|
||||
Mark {
|
||||
player_id: PlayerId,
|
||||
points: u8,
|
||||
},
|
||||
Go {
|
||||
player_id: PlayerId,
|
||||
},
|
||||
Move {
|
||||
player_id: PlayerId,
|
||||
moves: (CheckerMove, CheckerMove),
|
||||
},
|
||||
PlayError,
|
||||
}
|
||||
|
||||
impl Roll for GameState {
|
||||
fn roll(&mut self) -> &mut Self {
|
||||
self.dices = self.dices.roll();
|
||||
if self.who_plays().is_none() {
|
||||
let active_color = match self.dices.coin() {
|
||||
false => Color::Black,
|
||||
true => Color::White,
|
||||
};
|
||||
let color_player_id = self.player_id_by_color(active_color);
|
||||
if color_player_id.is_some() {
|
||||
self.active_player_id = *color_player_id.unwrap();
|
||||
}
|
||||
}
|
||||
self
|
||||
impl GameEvent {
|
||||
pub fn player_id(&self) -> Option<PlayerId> {
|
||||
match self {
|
||||
Self::PlayerJoined { player_id, name: _ } => Some(*player_id),
|
||||
Self::PlayerDisconnected { player_id } => Some(*player_id),
|
||||
Self::Roll { player_id } => Some(*player_id),
|
||||
Self::RollResult { player_id, dice: _ } => Some(*player_id),
|
||||
Self::Mark {
|
||||
player_id,
|
||||
points: _,
|
||||
} => Some(*player_id),
|
||||
Self::Go { player_id } => Some(*player_id),
|
||||
Self::Move {
|
||||
player_id,
|
||||
moves: _,
|
||||
} => Some(*player_id),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
impl Move for GameState {
|
||||
fn move_checker(&mut self, player: &Player, dice: u8, from: usize) -> Result<&mut Self, Error> {
|
||||
// check if move is permitted
|
||||
let _ = self.move_permitted(player, dice)?;
|
||||
|
||||
// remove checker from old position
|
||||
self.board.set(&player.color, from, -1)?;
|
||||
|
||||
// move checker to new position, in case it is reaching the off position, set it off
|
||||
let new_position = from as i8 - dice as i8;
|
||||
if new_position < 0 {
|
||||
// self.board.set_off(player, 1)?;
|
||||
pub fn get_mirror(&self) -> Self {
|
||||
// let mut mirror = self.clone();
|
||||
let mirror_player_id = if let Some(player_id) = self.player_id() {
|
||||
if player_id == 1 {
|
||||
2
|
||||
} else {
|
||||
// self.board.set(player, new_position as usize, 1)?;
|
||||
1
|
||||
}
|
||||
} else {
|
||||
0
|
||||
};
|
||||
|
||||
// switch to other player if all dices have been consumed
|
||||
self.switch_active_player();
|
||||
self.roll_first = true;
|
||||
|
||||
Ok(self)
|
||||
match self {
|
||||
Self::PlayerJoined { player_id: _, name } => Self::PlayerJoined {
|
||||
player_id: mirror_player_id,
|
||||
name: name.clone(),
|
||||
},
|
||||
Self::PlayerDisconnected { player_id: _ } => GameEvent::PlayerDisconnected {
|
||||
player_id: mirror_player_id,
|
||||
},
|
||||
Self::Roll { player_id: _ } => GameEvent::Roll {
|
||||
player_id: mirror_player_id,
|
||||
},
|
||||
Self::RollResult { player_id: _, dice } => GameEvent::RollResult {
|
||||
player_id: mirror_player_id,
|
||||
dice: *dice,
|
||||
},
|
||||
Self::Mark {
|
||||
player_id: _,
|
||||
points,
|
||||
} => GameEvent::Mark {
|
||||
player_id: mirror_player_id,
|
||||
points: *points,
|
||||
},
|
||||
Self::Go { player_id: _ } => GameEvent::Go {
|
||||
player_id: mirror_player_id,
|
||||
},
|
||||
Self::Move {
|
||||
player_id: _,
|
||||
moves: (move1, move2),
|
||||
} => Self::Move {
|
||||
player_id: mirror_player_id,
|
||||
moves: (move1.mirror(), move2.mirror()),
|
||||
},
|
||||
Self::BeginGame { goes_first } => GameEvent::BeginGame {
|
||||
goes_first: (if *goes_first == 1 { 2 } else { 1 }),
|
||||
},
|
||||
Self::EndGame { reason } => GameEvent::EndGame { reason: *reason },
|
||||
Self::PlayError => GameEvent::PlayError,
|
||||
}
|
||||
|
||||
/// Implements checks to validate if the player is allowed to move
|
||||
fn move_permitted(&mut self, player: &Player, dice: u8) -> Result<&mut Self, Error> {
|
||||
let maybe_player_id = self.player_id(&player);
|
||||
// check if player is allowed to move
|
||||
if maybe_player_id != Some(&self.active_player_id) {
|
||||
return Err(Error::NotYourTurn);
|
||||
}
|
||||
|
||||
// if player is nobody, you can not play and have to roll first
|
||||
if maybe_player_id.is_none() {
|
||||
return Err(Error::RollFirst);
|
||||
}
|
||||
|
||||
// check if player has to roll first
|
||||
if self.roll_first {
|
||||
return Err(Error::RollFirst);
|
||||
}
|
||||
|
||||
// check if dice value has actually been rolled
|
||||
if dice != self.dices.values.0 && dice != self.dices.values.1 {
|
||||
return Err(Error::DiceInvalid);
|
||||
}
|
||||
|
||||
Ok(self)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -542,64 +836,73 @@ impl Move for GameState {
|
|||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_to_string_id() {
|
||||
fn init_test_gamestate(turn: TurnStage) -> GameState {
|
||||
let mut state = GameState::default();
|
||||
state.add_player(1, Player::new("player1".into(), Color::White));
|
||||
state.add_player(2, Player::new("player2".into(), Color::Black));
|
||||
state.active_player_id = 1;
|
||||
state.turn_stage = turn;
|
||||
state
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn to_string_id() {
|
||||
let state = init_test_gamestate(TurnStage::RollDice);
|
||||
let string_id = state.to_string_id();
|
||||
// println!("string_id : {}", string_id);
|
||||
assert!(string_id == "Dz8+AAAAAT8/MAAAAAQAADAD");
|
||||
assert_eq!(string_id, "Hz88AAAAAz8/IAAAAAQAADAD");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_moves_possible() {
|
||||
let mut state = GameState::default();
|
||||
let player1 = Player::new("player1".into(), Color::White);
|
||||
let player_id = 1;
|
||||
state.add_player(player_id, player1);
|
||||
state.add_player(2, Player::new("player2".into(), Color::Black));
|
||||
state.consume(&GameEvent::BeginGame {
|
||||
goes_first: player_id,
|
||||
});
|
||||
fn hold_or_go() {
|
||||
let mut game_state = init_test_gamestate(TurnStage::MarkPoints);
|
||||
game_state.schools_enabled = true;
|
||||
let pid = game_state.active_player_id;
|
||||
game_state.consume(
|
||||
&(GameEvent::Mark {
|
||||
player_id: pid,
|
||||
points: 13,
|
||||
}),
|
||||
);
|
||||
let player = game_state.get_active_player().unwrap();
|
||||
assert_eq!(player.points, 1);
|
||||
assert_eq!(player.holes, 2); // because can bredouille
|
||||
assert_eq!(game_state.turn_stage, TurnStage::HoldOrGoChoice);
|
||||
|
||||
// Chained moves
|
||||
// Go
|
||||
game_state.consume(
|
||||
&(GameEvent::Go {
|
||||
player_id: game_state.active_player_id,
|
||||
}),
|
||||
);
|
||||
assert_eq!(game_state.active_player_id, pid);
|
||||
let player = game_state.get_active_player().unwrap();
|
||||
assert_eq!(player.points, 0);
|
||||
assert_eq!(game_state.turn_stage, TurnStage::RollDice);
|
||||
|
||||
// Hold
|
||||
let mut game_state = init_test_gamestate(TurnStage::MarkPoints);
|
||||
game_state.schools_enabled = true;
|
||||
let pid = game_state.active_player_id;
|
||||
game_state.consume(
|
||||
&(GameEvent::Mark {
|
||||
player_id: pid,
|
||||
points: 13,
|
||||
}),
|
||||
);
|
||||
let moves = (
|
||||
CheckerMove::new(1, 5).unwrap(),
|
||||
CheckerMove::new(5, 9).unwrap(),
|
||||
CheckerMove::new(1, 3).unwrap(),
|
||||
CheckerMove::new(1, 3).unwrap(),
|
||||
);
|
||||
assert!(state.moves_possible(&Color::White, &moves));
|
||||
|
||||
// not chained moves
|
||||
let moves = (
|
||||
CheckerMove::new(1, 5).unwrap(),
|
||||
CheckerMove::new(6, 9).unwrap(),
|
||||
game_state.consume(
|
||||
&(GameEvent::Move {
|
||||
player_id: game_state.active_player_id,
|
||||
moves,
|
||||
}),
|
||||
);
|
||||
assert!(!state.moves_possible(&Color::White, &moves));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_moves_follow_dices() {
|
||||
let mut state = GameState::default();
|
||||
let player1 = Player::new("player1".into(), Color::White);
|
||||
let player_id = 1;
|
||||
state.add_player(player_id, player1);
|
||||
state.add_player(2, Player::new("player2".into(), Color::Black));
|
||||
state.consume(&GameEvent::BeginGame {
|
||||
goes_first: player_id,
|
||||
});
|
||||
state.consume(&GameEvent::Roll { player_id });
|
||||
let dices = state.dices.values;
|
||||
let moves = (
|
||||
CheckerMove::new(1, (1 + dices.0).into()).unwrap(),
|
||||
CheckerMove::new((1 + dices.0).into(), (1 + dices.0 + dices.1).into()).unwrap(),
|
||||
);
|
||||
assert!(state.moves_follows_dices(&Color::White, &moves));
|
||||
|
||||
let badmoves = (
|
||||
CheckerMove::new(1, (2 + dices.0).into()).unwrap(),
|
||||
CheckerMove::new((1 + dices.0).into(), (1 + dices.0 + dices.1).into()).unwrap(),
|
||||
);
|
||||
assert!(!state.moves_follows_dices(&Color::White, &badmoves));
|
||||
assert_ne!(game_state.active_player_id, pid);
|
||||
assert_eq!(game_state.players.get(&pid).unwrap().points, 1);
|
||||
assert_eq!(game_state.get_active_player().unwrap().points, 0);
|
||||
assert_eq!(game_state.turn_stage, TurnStage::MarkAdvPoints);
|
||||
}
|
||||
}
|
||||
|
|
|
|||
1297
store/src/game_rules_moves.rs
Normal file
1297
store/src/game_rules_moves.rs
Normal file
File diff suppressed because it is too large
Load diff
885
store/src/game_rules_points.rs
Normal file
885
store/src/game_rules_points.rs
Normal file
|
|
@ -0,0 +1,885 @@
|
|||
use crate::board::{Board, FieldWithCount, EMPTY_MOVE};
|
||||
use crate::dice::Dice;
|
||||
use crate::game_rules_moves::MoveRules;
|
||||
use crate::player::Color;
|
||||
use crate::CheckerMove;
|
||||
use crate::Error;
|
||||
|
||||
use log::debug;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use std::cmp;
|
||||
use std::collections::HashMap;
|
||||
|
||||
#[allow(clippy::enum_variant_names)]
|
||||
#[derive(PartialEq, Eq, Hash, Clone, Debug, Serialize, Deserialize)]
|
||||
pub enum Jan {
|
||||
FilledQuarter,
|
||||
TrueHitSmallJan,
|
||||
TrueHitBigJan,
|
||||
TrueHitOpponentCorner,
|
||||
FirstPlayerToExit,
|
||||
SixTables,
|
||||
TwoTables,
|
||||
Mezeas,
|
||||
|
||||
FalseHitSmallJan,
|
||||
FalseHitBigJan,
|
||||
ContreTwoTables,
|
||||
ContreMezeas,
|
||||
HelplessMan,
|
||||
}
|
||||
|
||||
impl Jan {
|
||||
pub fn get_points(&self, is_double: bool) -> i8 {
|
||||
match self {
|
||||
Self::FalseHitSmallJan | Self::ContreTwoTables | Self::ContreMezeas => {
|
||||
if is_double {
|
||||
-6
|
||||
} else {
|
||||
-4
|
||||
}
|
||||
}
|
||||
Self::FalseHitBigJan | Self::HelplessMan => {
|
||||
if is_double {
|
||||
-4
|
||||
} else {
|
||||
-2
|
||||
}
|
||||
}
|
||||
Self::TrueHitBigJan => {
|
||||
if is_double {
|
||||
4
|
||||
} else {
|
||||
2
|
||||
}
|
||||
}
|
||||
_ => {
|
||||
if is_double {
|
||||
6
|
||||
} else {
|
||||
4
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub type PossibleJans = HashMap<Jan, Vec<(CheckerMove, CheckerMove)>>;
|
||||
|
||||
pub trait PossibleJansMethods {
|
||||
fn push(&mut self, jan: Jan, cmoves: (CheckerMove, CheckerMove));
|
||||
fn merge(&mut self, other: Self);
|
||||
// fn get_points(&self) -> u8;
|
||||
}
|
||||
|
||||
impl PossibleJansMethods for PossibleJans {
|
||||
fn push(&mut self, jan: Jan, cmoves: (CheckerMove, CheckerMove)) {
|
||||
if let Some(ways) = self.get_mut(&jan) {
|
||||
if !ways.contains(&cmoves) {
|
||||
ways.push(cmoves);
|
||||
}
|
||||
} else {
|
||||
self.insert(jan, [cmoves].into());
|
||||
}
|
||||
}
|
||||
|
||||
fn merge(&mut self, other: Self) {
|
||||
for (jan, cmoves_list) in other {
|
||||
for cmoves in cmoves_list {
|
||||
// pour un même mouvement, le battage à vrai est prioritaire sur le battage à faux.
|
||||
match jan {
|
||||
Jan::FalseHitBigJan => {
|
||||
let mut has_true_hit = false;
|
||||
if let Some(true_moves) = self.get(&Jan::TrueHitBigJan) {
|
||||
has_true_hit = true_moves.contains(&cmoves);
|
||||
}
|
||||
if !has_true_hit {
|
||||
self.push(jan.clone(), cmoves);
|
||||
}
|
||||
}
|
||||
Jan::FalseHitSmallJan => {
|
||||
let mut has_true_hit = false;
|
||||
if let Some(true_moves) = self.get(&Jan::TrueHitSmallJan) {
|
||||
has_true_hit = true_moves.contains(&cmoves);
|
||||
}
|
||||
if !has_true_hit {
|
||||
self.push(jan.clone(), cmoves);
|
||||
}
|
||||
}
|
||||
Jan::TrueHitBigJan => {
|
||||
if let Some(false_moves) = self.get_mut(&Jan::FalseHitBigJan) {
|
||||
false_moves.retain(|fmoves| *fmoves != cmoves);
|
||||
}
|
||||
self.push(jan.clone(), cmoves);
|
||||
}
|
||||
Jan::TrueHitSmallJan => {
|
||||
if let Some(false_moves) = self.get_mut(&Jan::FalseHitSmallJan) {
|
||||
false_moves.retain(|fmoves| *fmoves != cmoves);
|
||||
}
|
||||
self.push(jan.clone(), cmoves);
|
||||
}
|
||||
_ => {
|
||||
self.push(jan.clone(), cmoves);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// PointsRules always consider that the current player is White
|
||||
/// You must use 'mirror' function on board if player is Black
|
||||
#[derive(Default)]
|
||||
pub struct PointsRules {
|
||||
pub board: Board,
|
||||
pub dice: Dice,
|
||||
pub move_rules: MoveRules,
|
||||
}
|
||||
|
||||
impl PointsRules {
|
||||
/// Revert board if color is black
|
||||
pub fn new(color: &Color, board: &Board, dice: Dice) -> Self {
|
||||
let board = if *color == Color::Black {
|
||||
board.mirror()
|
||||
} else {
|
||||
board.clone()
|
||||
};
|
||||
// the board is already reverted for black, so we pretend color is white
|
||||
let move_rules = MoveRules::new(&Color::White, &board, dice);
|
||||
// let move_rules = MoveRules::new(color, &board, dice);
|
||||
|
||||
// let move_rules = MoveRules::new(color, &self.board, dice, moves);
|
||||
Self {
|
||||
board,
|
||||
dice,
|
||||
move_rules,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_dice(&mut self, dice: Dice) {
|
||||
self.dice = dice;
|
||||
self.move_rules.dice = dice;
|
||||
}
|
||||
|
||||
pub fn update_positions(&mut self, color: &Color, positions: [i8; 24]) {
|
||||
self.board.set_positions(color, positions);
|
||||
self.move_rules.board.set_positions(color, positions);
|
||||
}
|
||||
|
||||
fn get_jans(&self, board_ini: &Board, dice_rolls_count: u8) -> PossibleJans {
|
||||
let dices = &[self.dice.values.0, self.dice.values.1];
|
||||
let dices_reversed = &[self.dice.values.1, self.dice.values.0];
|
||||
let dice1 = self.dice.values.0 as usize;
|
||||
let dice2 = self.dice.values.1 as usize;
|
||||
|
||||
// « JAN DE RÉCOMPENSE »
|
||||
// Battre à vrai une dame située dans la table des grands jans
|
||||
// Battre à vrai une dame située dans la table des petits jans
|
||||
let mut jans = get_jans_by_ordered_dice(board_ini, dices, None, false);
|
||||
let jans_revert_dices = get_jans_by_ordered_dice(board_ini, dices_reversed, None, false);
|
||||
jans.merge(jans_revert_dices);
|
||||
|
||||
// Battre à vrai le coin de repos de l'adversaire
|
||||
let corner_field = board_ini.get_color_corner(&Color::White);
|
||||
let adv_corner_field = board_ini.get_color_corner(&Color::Black);
|
||||
let (adv_corner_count, _color) = board_ini.get_field_checkers(adv_corner_field).unwrap();
|
||||
if adv_corner_count == 0 {
|
||||
let from0 = adv_corner_field - self.dice.values.0 as usize;
|
||||
let from1 = adv_corner_field - self.dice.values.1 as usize;
|
||||
|
||||
let (from0_count, _from0_color) = board_ini.get_field_checkers(from0).unwrap();
|
||||
let (from1_count, _from1_color) = board_ini.get_field_checkers(from1).unwrap();
|
||||
let hit_moves = vec![(
|
||||
CheckerMove::new(from0, adv_corner_field).unwrap(),
|
||||
CheckerMove::new(from1, adv_corner_field).unwrap(),
|
||||
)];
|
||||
|
||||
if from0 == from1 {
|
||||
// doublet
|
||||
if from0_count > if from0 == corner_field { 3 } else { 1 } {
|
||||
jans.insert(Jan::TrueHitOpponentCorner, hit_moves);
|
||||
}
|
||||
} else {
|
||||
// simple
|
||||
if from0_count > if from0 == corner_field { 2 } else { 0 }
|
||||
&& from1_count > if from1 == corner_field { 2 } else { 0 }
|
||||
{
|
||||
jans.insert(Jan::TrueHitOpponentCorner, hit_moves);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// « JAN DE REMPLISSAGE »
|
||||
// Faire un petit jan, un grand jan ou un jan de retour
|
||||
let filling_moves_sequences = self
|
||||
.move_rules
|
||||
.get_scoring_quarter_filling_moves_sequences();
|
||||
if !filling_moves_sequences.is_empty() {
|
||||
jans.insert(Jan::FilledQuarter, filling_moves_sequences);
|
||||
}
|
||||
|
||||
// « AUTRE »
|
||||
// sortir le premier toutes ses dames
|
||||
let mut checkers = board_ini.get_color_fields(Color::White);
|
||||
checkers.sort_by(|a, b| b.0.cmp(&a.0));
|
||||
let checkers_count = checkers.iter().fold(0, |acc, (_f, count)| acc + count);
|
||||
if checkers_count < 3 {
|
||||
let mut farthest = 24;
|
||||
let mut next_farthest = 24;
|
||||
if let Some((field, count)) = checkers.first() {
|
||||
farthest = *field;
|
||||
if *count > 1 {
|
||||
next_farthest = *field;
|
||||
} else if let Some((field, _count)) = checkers.get(1) {
|
||||
next_farthest = *field;
|
||||
}
|
||||
}
|
||||
|
||||
if farthest + cmp::max(self.dice.values.0, self.dice.values.1) as usize > 23
|
||||
&& next_farthest + cmp::min(self.dice.values.0, self.dice.values.1) as usize > 23
|
||||
{
|
||||
let exit_moves = vec![(
|
||||
CheckerMove::new(farthest, 0).unwrap(),
|
||||
if checkers_count > 1 {
|
||||
CheckerMove::new(next_farthest, 0).unwrap()
|
||||
} else {
|
||||
CheckerMove::new(0, 0).unwrap()
|
||||
},
|
||||
)];
|
||||
|
||||
jans.insert(Jan::FirstPlayerToExit, exit_moves);
|
||||
}
|
||||
}
|
||||
|
||||
// « JANS RARES »
|
||||
// Jan de 3 coups ou de 6 tables
|
||||
if dice_rolls_count == 3 {
|
||||
// on devrait avoir 4 cases occupées par une dame chacune
|
||||
let fields_with_single: Vec<&(usize, i8)> =
|
||||
checkers.iter().filter(|(_, c)| c == &1).collect();
|
||||
if fields_with_single.len() == 4 {
|
||||
let checkers_fields: Vec<usize> = checkers.iter().map(|(f, _)| *f).collect();
|
||||
let mut missing_for_6tables: Vec<usize> = Vec::from([2, 3, 4, 5, 6, 7])
|
||||
.into_iter()
|
||||
.filter(|f| !checkers_fields.contains(f))
|
||||
.collect();
|
||||
if missing_for_6tables.len() == 2 {
|
||||
// Les dés doivent permettre le mouvement de deux dames du talon vers les 2 cases
|
||||
// vides
|
||||
let mut dice_to: Vec<usize> = vec![
|
||||
1 + self.dice.values.0 as usize,
|
||||
1 + self.dice.values.1 as usize,
|
||||
];
|
||||
missing_for_6tables.sort();
|
||||
dice_to.sort();
|
||||
if dice_to == missing_for_6tables {
|
||||
let moves = vec![(
|
||||
CheckerMove::new(1, missing_for_6tables[0]).unwrap(),
|
||||
CheckerMove::new(1, missing_for_6tables[1]).unwrap(),
|
||||
)];
|
||||
jans.insert(Jan::SixTables, moves);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Jans nécessitant que deux dames uniquement soient sorties du talon
|
||||
let (talon, candidates): (Vec<FieldWithCount>, Vec<FieldWithCount>) =
|
||||
checkers.iter().partition(|(field, _count)| field == &1);
|
||||
let candidates_fields = candidates.iter().fold(vec![], |mut acc, (f, c)| {
|
||||
acc.extend_from_slice(&vec![*f; *c as usize]);
|
||||
acc
|
||||
});
|
||||
if !talon.is_empty() && talon[0].1 == 13 && candidates_fields.len() == 2 {
|
||||
let field1 = candidates_fields[0];
|
||||
let field2 = candidates_fields[1];
|
||||
|
||||
// Jan de 2 tables et contre jan de 2 tables
|
||||
let jan = if adv_corner_count == 0 {
|
||||
Jan::TwoTables
|
||||
} else {
|
||||
Jan::ContreTwoTables
|
||||
};
|
||||
if (field1 + dice1 == 12 && field2 + dice2 == 13)
|
||||
|| (field1 + dice2 == 12 && field2 + dice1 == 13)
|
||||
{
|
||||
let moves = vec![(
|
||||
CheckerMove::new(field1, 12).unwrap(),
|
||||
CheckerMove::new(field2, 13).unwrap(),
|
||||
)];
|
||||
jans.insert(jan, moves);
|
||||
} else if (field1 + dice1 == 13 && field2 + dice2 == 12)
|
||||
|| (field1 + dice2 == 13 && field2 + dice1 == 12)
|
||||
{
|
||||
let moves = vec![(
|
||||
CheckerMove::new(field1, 13).unwrap(),
|
||||
CheckerMove::new(field2, 12).unwrap(),
|
||||
)];
|
||||
jans.insert(jan, moves);
|
||||
}
|
||||
|
||||
// Jan de Mezeas et contre jan de Mezeas
|
||||
let jan = if adv_corner_count == 0 {
|
||||
Jan::Mezeas
|
||||
} else {
|
||||
Jan::ContreMezeas
|
||||
};
|
||||
if field1 == 12 && field2 == 12 && (dice1 == 1 || dice2 == 1) {
|
||||
let moves = vec![(
|
||||
CheckerMove::new(field1, field1 + dice1).unwrap(),
|
||||
CheckerMove::new(field2, field2 + dice2).unwrap(),
|
||||
)];
|
||||
jans.insert(jan, moves);
|
||||
}
|
||||
}
|
||||
|
||||
// Jan qui ne peut : dés non jouables
|
||||
let poss = self.move_rules.get_possible_moves_sequences(true, vec![]);
|
||||
let moves = poss.iter().fold(vec![], |mut acc, (m1, m2)| {
|
||||
acc.push(*m1);
|
||||
acc.push(*m2);
|
||||
acc
|
||||
});
|
||||
let moves_dice1: Vec<&CheckerMove> =
|
||||
moves.iter().filter(|m| m.doable_with_dice(dice1)).collect();
|
||||
let moves_dice2: Vec<&CheckerMove> =
|
||||
moves.iter().filter(|m| m.doable_with_dice(dice2)).collect();
|
||||
if poss.is_empty() {
|
||||
jans.insert(
|
||||
Jan::HelplessMan,
|
||||
vec![(CheckerMove::default(), CheckerMove::default())],
|
||||
);
|
||||
} else if moves_dice1.is_empty() || moves_dice2.is_empty() {
|
||||
jans.insert(Jan::HelplessMan, vec![poss[0]]);
|
||||
}
|
||||
|
||||
jans
|
||||
}
|
||||
|
||||
pub fn get_jans_points(&self, jans: HashMap<Jan, Vec<(CheckerMove, CheckerMove)>>) -> (u8, u8) {
|
||||
let (points, adv_points) = jans
|
||||
.into_iter()
|
||||
.fold((0, 0), |acc: (i8, i8), (jan, moves)| {
|
||||
// println!("get_points : {:?}", jan);
|
||||
let is_double = if jan == Jan::HelplessMan {
|
||||
moves[0] == (CheckerMove::default(), CheckerMove::default())
|
||||
} else {
|
||||
self.dice.is_double()
|
||||
};
|
||||
let points = jan.get_points(is_double) * (moves.len() as i8);
|
||||
if points < 0 {
|
||||
(acc.0, acc.1 - points)
|
||||
} else {
|
||||
(acc.0 + points, acc.1)
|
||||
}
|
||||
});
|
||||
|
||||
(points as u8, adv_points as u8)
|
||||
}
|
||||
|
||||
pub fn get_points(&self, dice_rolls_count: u8) -> (u8, u8) {
|
||||
let jans = self.get_jans(&self.board, dice_rolls_count);
|
||||
self.get_jans_points(jans)
|
||||
}
|
||||
|
||||
pub fn get_result_jans(&self, dice_rolls_count: u8) -> (PossibleJans, (u8, u8)) {
|
||||
let jans = self.get_jans(&self.board, dice_rolls_count);
|
||||
debug!("jans : {jans:?}");
|
||||
let points_jans = jans.clone();
|
||||
(jans, self.get_jans_points(points_jans))
|
||||
}
|
||||
}
|
||||
|
||||
fn get_jans_by_ordered_dice(
|
||||
board_ini: &Board,
|
||||
// dices: &Vec<u8>,
|
||||
dices: &[u8],
|
||||
only_from: Option<usize>,
|
||||
only_false_hit: bool,
|
||||
) -> PossibleJans {
|
||||
let mut jans = PossibleJans::default();
|
||||
let mut dices: Vec<u8> = dices.to_vec();
|
||||
if let Some(dice) = dices.pop() {
|
||||
let color = Color::White;
|
||||
let mut board = board_ini.clone();
|
||||
let corner_field = board.get_color_corner(&color);
|
||||
let adv_corner_field = board.get_color_corner(&Color::Black);
|
||||
let froms = if let Some(from) = only_from {
|
||||
vec![from]
|
||||
} else {
|
||||
board
|
||||
.get_color_fields(color)
|
||||
.iter()
|
||||
.map(|cf| cf.0)
|
||||
.collect()
|
||||
};
|
||||
for from in froms {
|
||||
// for (from, _) in board.get_color_fields(color) {
|
||||
let to = if from + dice as usize > 24 {
|
||||
0
|
||||
} else {
|
||||
from + dice as usize
|
||||
};
|
||||
if let Ok(cmove) = CheckerMove::new(from, to) {
|
||||
// print!(
|
||||
// " <dice_move dice='{:?}' moves='{:?} -> {:?}'> ",
|
||||
// dice, from, to
|
||||
// );
|
||||
// On vérifie que le mouvement n'est pas interdit par les règles des coins de
|
||||
// repos :
|
||||
// - on ne va pas sur le coin de l'adversaire
|
||||
// - ni sur son propre coin de repos avec une seule dame
|
||||
// - règle non prise en compte pour le battage des dames : on ne sort pas de son coin de repos s'il n'y reste que deux dames
|
||||
let (corner_count, _color) = board.get_field_checkers(corner_field).unwrap();
|
||||
if to != adv_corner_field && (to != corner_field || corner_count > 1) {
|
||||
// si only_false_hit est vrai, on est déja dans une tentative tout d'une
|
||||
let mut can_try_toutdune = !only_false_hit;
|
||||
let mut only_falsehit = false;
|
||||
match board.move_checker(&color, cmove) {
|
||||
Err(Error::FieldBlockedByOne) => {
|
||||
let jan = match (Board::is_field_in_small_jan(to), only_false_hit) {
|
||||
(true, false) => Jan::TrueHitSmallJan,
|
||||
(true, true) => Jan::FalseHitSmallJan,
|
||||
(false, false) => Jan::TrueHitBigJan,
|
||||
(false, true) => Jan::FalseHitBigJan,
|
||||
};
|
||||
jans.push(jan, (cmove, EMPTY_MOVE));
|
||||
}
|
||||
Err(Error::FieldBlocked) => {
|
||||
only_falsehit = true;
|
||||
}
|
||||
Err(_) => {
|
||||
can_try_toutdune = false;
|
||||
// let next_dice_jan = self.get_jans(&board, &dices);
|
||||
// jans possibles en tout d'une après un battage à vrai :
|
||||
// truehit
|
||||
}
|
||||
Ok(()) => {}
|
||||
}
|
||||
if can_try_toutdune {
|
||||
// Try tout d'une :
|
||||
// - use original board before first die move
|
||||
// - use a virtual dice by adding current dice to remaining dice
|
||||
// - limit the checker to the current one
|
||||
let next_dice_jan = get_jans_by_ordered_dice(
|
||||
board_ini,
|
||||
&dices.iter().map(|d| d + dice).collect::<Vec<u8>>(),
|
||||
Some(from),
|
||||
only_falsehit,
|
||||
);
|
||||
jans.merge(next_dice_jan);
|
||||
}
|
||||
}
|
||||
// Second die
|
||||
let next_dice_jan = get_jans_by_ordered_dice(board_ini, &dices, None, false);
|
||||
jans.merge(next_dice_jan);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
jans
|
||||
}
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
#[test]
|
||||
fn get_jans_by_dice_order() {
|
||||
let mut rules = PointsRules::default();
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -1, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[2, 3], None, false);
|
||||
assert_eq!(1, jans.len());
|
||||
assert_eq!(3, jans.get(&Jan::TrueHitSmallJan).unwrap().len());
|
||||
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[2, 2], None, false);
|
||||
assert_eq!(1, jans.len());
|
||||
assert_eq!(1, jans.get(&Jan::TrueHitSmallJan).unwrap().len());
|
||||
|
||||
// On peut passer par une dame battue pour battre une autre dame
|
||||
// mais pas par une case remplie par l'adversaire
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -1, -2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
|
||||
let mut jans = get_jans_by_ordered_dice(&rules.board, &[2, 3], None, false);
|
||||
let jans_revert_dices = get_jans_by_ordered_dice(&rules.board, &[3, 2], None, false);
|
||||
assert_eq!(2, jans.len());
|
||||
assert_eq!(1, jans_revert_dices.len());
|
||||
jans.merge(jans_revert_dices);
|
||||
assert_eq!(2, jans.get(&Jan::TrueHitSmallJan).unwrap().len());
|
||||
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[2, 3], None, false);
|
||||
assert_eq!(1, jans.len());
|
||||
assert_eq!(2, jans.get(&Jan::TrueHitSmallJan).unwrap().len());
|
||||
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[2, 3], None, false);
|
||||
assert_eq!(1, jans.len());
|
||||
assert_eq!(1, jans.get(&Jan::TrueHitSmallJan).unwrap().len());
|
||||
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, 1, 1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[2, 3], None, false);
|
||||
assert_eq!(1, jans.len());
|
||||
assert_eq!(3, jans.get(&Jan::TrueHitSmallJan).unwrap().len());
|
||||
|
||||
// corners handling
|
||||
|
||||
// deux dés bloqués (coin de repos et coin de l'adversaire)
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
// le premier dé traité est le dernier du vecteur : 1
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[2, 1], None, false);
|
||||
// println!("jans (dés bloqués) : {:?}", jans.get(&Jan::TrueHit));
|
||||
assert_eq!(0, jans.len());
|
||||
|
||||
// dé dans son coin de repos : peut tout de même battre à vrai
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
let jans = get_jans_by_ordered_dice(&rules.board, &[3, 3], None, false);
|
||||
assert_eq!(1, jans.len());
|
||||
|
||||
// premier dé bloqué, mais tout d'une possible en commençant par le second
|
||||
rules.board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
let mut jans = get_jans_by_ordered_dice(&rules.board, &[3, 1], None, false);
|
||||
let jans_revert_dices = get_jans_by_ordered_dice(&rules.board, &[1, 3], None, false);
|
||||
assert_eq!(1, jans_revert_dices.len());
|
||||
|
||||
jans.merge(jans_revert_dices);
|
||||
assert_eq!(1, jans.len());
|
||||
// print!("jans (2) : {:?}", jans.get(&Jan::TrueHit));
|
||||
|
||||
// battage à faux : ne pas prendre en compte si en inversant l'ordre des dés il y a battage
|
||||
// à vrai
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_result_jans() {
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 5, 2, 4, 3, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -2, -2, -2, -2, -2, -2,
|
||||
],
|
||||
);
|
||||
let points_rules = PointsRules::new(&Color::Black, &board, Dice { values: (2, 4) });
|
||||
let jans = points_rules.get_result_jans(8);
|
||||
assert!(!jans.0.is_empty());
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn get_points() {
|
||||
// ----- Jan de récompense
|
||||
// Battre à vrai une dame située dans la table des petits jans : 4 + 4 + 4 = 12
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -1, -1, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 3) });
|
||||
assert_eq!(12, rules.get_points(5).0);
|
||||
|
||||
// Calcul des points pour noir
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, -2,
|
||||
],
|
||||
);
|
||||
let rules = PointsRules::new(&Color::Black, &board, Dice { values: (2, 3) });
|
||||
assert_eq!(12, rules.get_points(5).0);
|
||||
|
||||
// Battre à vrai une dame située dans la table des grands jans : 2 + 2 = 4
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, 0, -1, 2, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 4) });
|
||||
assert_eq!(4, rules.get_points(5).0);
|
||||
// Battre à vrai une dame située dans la table des grands jans : 2
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -2, -1, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 4) });
|
||||
assert_eq!((2, 2), rules.get_points(5));
|
||||
|
||||
// Battre à vrai le coin adverse par doublet : 6
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 2) });
|
||||
assert_eq!(6, rules.get_points(5).0);
|
||||
|
||||
// Cas de battage du coin de repos adverse impossible
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 1) });
|
||||
assert_eq!(0, rules.get_points(5).0);
|
||||
|
||||
// ---- Jan de remplissage
|
||||
// Faire un petit jan : 4
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
3, 1, 2, 2, 3, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 1) });
|
||||
assert_eq!(1, rules.get_jans(&rules.board, 5).len());
|
||||
assert_eq!(4, rules.get_points(5).0);
|
||||
|
||||
// Faire un petit jan avec un doublet : 6
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 3, 1, 2, 2, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 1) });
|
||||
assert_eq!(6, rules.get_points(5).0);
|
||||
|
||||
// Faire un petit jan avec 2 moyens : 6 + 6 = 12
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
3, 3, 1, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 1) });
|
||||
assert_eq!(12, rules.get_points(5).0);
|
||||
|
||||
// Conserver un jan avec un doublet : 6
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
3, 3, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 1) });
|
||||
assert_eq!(6, rules.get_points(5).0);
|
||||
|
||||
// Conserver un jan
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 2, 2, 2, 2, 4, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -3, -1, -2, -3, -5, 0, -1,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (3, 1) });
|
||||
assert_eq!((4, 0), rules.get_points(8));
|
||||
|
||||
// Conserver un jan (black)
|
||||
let mut board = Board::new();
|
||||
board.set_positions(
|
||||
&Color::White,
|
||||
[
|
||||
1, 0, 5, 3, 2, 1, 3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, -4, -2, -2, -2, -2, -2,
|
||||
],
|
||||
);
|
||||
let rules = PointsRules::new(&Color::Black, &board, Dice { values: (3, 1) });
|
||||
assert_eq!((4, 0), rules.get_points(8));
|
||||
|
||||
// ---- Sorties
|
||||
// Sortir toutes ses dames avant l'adversaire (simple)
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, -2, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (3, 1) });
|
||||
assert_eq!(4, rules.get_points(5).0);
|
||||
|
||||
// Sortir toutes ses dames avant l'adversaire (doublet)
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
0, 0, -2, 0, 0, -1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 2) });
|
||||
assert_eq!(6, rules.get_points(5).0);
|
||||
|
||||
// ---- JANS RARES
|
||||
// Jan de six tables
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
10, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 3) });
|
||||
assert_eq!(0, rules.get_points(5).0);
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
10, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 3) });
|
||||
assert_eq!(4, rules.get_points(3).0);
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
10, 1, 0, 0, 1, 2, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 3) });
|
||||
assert_eq!(0, rules.get_points(3).0);
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
10, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 3) });
|
||||
assert_eq!(0, rules.get_points(3).0);
|
||||
|
||||
// Jan de deux tables
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
13, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 2) });
|
||||
assert_eq!(6, rules.get_points(5).0);
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
12, 0, 0, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 2) });
|
||||
assert_eq!(0, rules.get_points(5).0);
|
||||
|
||||
// Contre jan de deux tables
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
13, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 2) });
|
||||
assert_eq!((0, 6), rules.get_points(5));
|
||||
|
||||
// Jan de mézéas
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 1) });
|
||||
assert_eq!(6, rules.get_points(5).0);
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 2) });
|
||||
assert_eq!(4, rules.get_points(5).0);
|
||||
|
||||
// Contre jan de mézéas
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
13, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, -1, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (1, 1) });
|
||||
assert_eq!((0, 6), rules.get_points(5));
|
||||
|
||||
// ---- JANS QUI NE PEUT
|
||||
// Battre à faux une dame située dans la table des petits jans
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -2, -2, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 3) });
|
||||
assert_eq!((0, 4), rules.get_points(5));
|
||||
|
||||
// Battre à faux une dame située dans la table des grands jans
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -2, -1, -2, 0, -1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 4) });
|
||||
assert_eq!((0, 2), rules.get_points(5));
|
||||
|
||||
// Pour chaque dé non jouable (dame impuissante)
|
||||
let mut rules = PointsRules::default();
|
||||
rules.update_positions(
|
||||
&Color::White,
|
||||
[
|
||||
2, 0, -2, -2, -2, 0, -2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
||||
],
|
||||
);
|
||||
rules.set_dice(Dice { values: (2, 4) });
|
||||
assert_eq!((0, 4), rules.get_points(5));
|
||||
}
|
||||
}
|
||||
|
|
@ -1,5 +1,9 @@
|
|||
mod game;
|
||||
pub use game::{EndGameReason, GameEvent, GameState, Stage};
|
||||
mod game_rules_moves;
|
||||
pub use game_rules_moves::MoveRules;
|
||||
mod game_rules_points;
|
||||
pub use game::{EndGameReason, GameEvent, GameState, Stage, TurnStage};
|
||||
pub use game_rules_points::PointsRules;
|
||||
|
||||
mod player;
|
||||
pub use player::{Color, Player, PlayerId};
|
||||
|
|
@ -11,3 +15,4 @@ mod board;
|
|||
pub use board::CheckerMove;
|
||||
|
||||
mod dice;
|
||||
pub use dice::{Dice, DiceRoller};
|
||||
|
|
|
|||
|
|
@ -4,14 +4,23 @@ use std::fmt;
|
|||
// This just makes it easier to dissern between a player id and any ol' u64
|
||||
pub type PlayerId = u64;
|
||||
|
||||
#[derive(Copy, Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Copy, Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub enum Color {
|
||||
White,
|
||||
Black,
|
||||
}
|
||||
|
||||
impl Color {
|
||||
pub fn opponent_color(&self) -> Self {
|
||||
match self {
|
||||
Self::White => Self::Black,
|
||||
Self::Black => Self::White,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Struct for storing player related data.
|
||||
#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct Player {
|
||||
pub name: String,
|
||||
pub color: Color,
|
||||
|
|
@ -19,6 +28,9 @@ pub struct Player {
|
|||
pub holes: u8,
|
||||
pub can_bredouille: bool,
|
||||
pub can_big_bredouille: bool,
|
||||
/// Number of dice rolls since beginning of the current setting (all 15 dames in the talon )
|
||||
/// (used to check jan de 3 coups)
|
||||
pub dice_roll_count: u8,
|
||||
}
|
||||
|
||||
impl Player {
|
||||
|
|
@ -30,6 +42,7 @@ impl Player {
|
|||
holes: 0,
|
||||
can_bredouille: true,
|
||||
can_big_bredouille: true,
|
||||
dice_roll_count: 0,
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -39,6 +52,15 @@ impl Player {
|
|||
self.points, self.holes, self.can_bredouille as u8, self.can_big_bredouille as u8
|
||||
)
|
||||
}
|
||||
|
||||
pub fn to_vec(&self) -> Vec<u8> {
|
||||
vec![
|
||||
self.points,
|
||||
self.holes,
|
||||
self.can_bredouille as u8,
|
||||
self.can_big_bredouille as u8,
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a player in the game.
|
||||
|
|
@ -63,6 +85,7 @@ pub enum CurrentPlayer {
|
|||
|
||||
impl CurrentPlayer {
|
||||
/// Returns the other player, i.e. the player who is not the current player.
|
||||
#[cfg(test)]
|
||||
pub fn other(&self) -> Self {
|
||||
match *self {
|
||||
CurrentPlayer::Nobody => CurrentPlayer::Nobody,
|
||||
|
|
@ -104,6 +127,7 @@ mod tests {
|
|||
holes: 3,
|
||||
can_bredouille: true,
|
||||
can_big_bredouille: false,
|
||||
dice_roll_count: 0,
|
||||
};
|
||||
println!("{}", player.to_bits_string());
|
||||
assert!(player.to_bits_string() == "1011001110");
|
||||
|
|
|
|||
Loading…
Reference in a new issue