Skip to content

Commit ff5bdc0

Browse files
committed
Randomly play opening moves based on priors
1 parent 5389cd8 commit ff5bdc0

File tree

4 files changed

+54
-17
lines changed

4 files changed

+54
-17
lines changed

src/selfplay/game.cc

Lines changed: 44 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
#include <algorithm>
3030

3131
#include "neural/writer.h"
32+
#include "utils/random.h"
3233

3334
namespace lczero {
3435

@@ -69,11 +70,12 @@ SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2,
6970
}
7071

7172
void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
72-
bool enable_resign) {
73-
bool blacks_move = false;
73+
int random_opening_plies, bool enable_resign) {
74+
SearchLimits random_opening_limits;
75+
random_opening_limits.visits = 1;
7476

7577
// Do moves while not end of the game. (And while not abort_)
76-
while (!abort_) {
78+
for (auto blacks_move = false; !abort_; blacks_move = !blacks_move) {
7779
game_result_ = tree_[0]->GetPositionHistory().ComputeGameResult();
7880

7981
// If endgame, stop.
@@ -94,7 +96,9 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
9496
if (abort_) break;
9597
search_ = std::make_unique<Search>(
9698
*tree_[idx], options_[idx].network, options_[idx].best_move_callback,
97-
options_[idx].info_callback, options_[idx].search_limits,
99+
options_[idx].info_callback,
100+
random_opening_plies ? random_opening_limits
101+
: options_[idx].search_limits,
98102
*options_[idx].uci_options, options_[idx].cache, nullptr);
99103
// TODO: add Syzygy option for selfplay.
100104
}
@@ -103,8 +107,23 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
103107
search_->RunBlocking(blacks_move ? black_threads : white_threads);
104108
if (abort_) break;
105109

110+
// Override the best move if we're still picking a random opening.
111+
auto move = search_->GetBestMove().first;
112+
if (random_opening_plies) {
113+
// Pick a move proportionally to its prior.
114+
auto toss = Random::Get().GetFloat(1.0f);
115+
for (const auto& edge : tree_[idx]->GetCurrentHead()->Edges()) {
116+
toss -= edge.GetP();
117+
if (toss < 0.0f) {
118+
move = edge.GetMove(blacks_move);
119+
break;
120+
}
121+
}
122+
}
123+
124+
// Get data from the head before cleaning up the children with the move.
106125
auto best_eval = search_->GetBestEval();
107-
if (training) {
126+
if (!random_opening_plies && training) {
108127
// Append training data. The GameResult is later overwritten.
109128
auto best_q = best_eval.first;
110129
auto best_d = best_eval.second;
@@ -113,10 +132,28 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
113132
search_->GetParams().GetHistoryFill(), best_q, best_d));
114133
}
115134

135+
// Add move to the tree.
136+
tree_[0]->MakeMove(move);
137+
if (tree_[0] != tree_[1]) tree_[1]->MakeMove(move);
138+
139+
// Skip adjudication if random openings can still balance out.
140+
if (random_opening_plies) {
141+
random_opening_plies--;
142+
143+
// Randomly played into a decided result, so stop and treat as undecided.
144+
if (tree_[0]->GetPositionHistory().ComputeGameResult() !=
145+
GameResult::UNDECIDED) {
146+
break;
147+
}
148+
continue;
149+
}
150+
151+
// Adjudicate if evals exceed resignation thresholds.
116152
float eval = best_eval.first;
117153
eval = (eval + 1) / 2;
118154
if (eval < min_eval_[idx]) min_eval_[idx] = eval;
119-
const int move_number = tree_[0]->GetPositionHistory().GetLength() / 2 + 1;
155+
const int move_number =
156+
(tree_[0]->GetPositionHistory().GetLength() + 1) / 2;
120157
if (enable_resign && move_number >= options_[idx].uci_options->Get<int>(
121158
kResignEarliestMoveId.GetId())) {
122159
const float resignpct =
@@ -149,12 +186,6 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
149186
}
150187
}
151188
}
152-
153-
// Add best move to the tree.
154-
const Move move = search_->GetBestMove().first;
155-
tree_[0]->MakeMove(move);
156-
if (tree_[0] != tree_[1]) tree_[1]->MakeMove(move);
157-
blacks_move = !blacks_move;
158189
}
159190
}
160191

@@ -184,9 +215,8 @@ void SelfPlayGame::Abort() {
184215

185216
void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const {
186217
assert(!training_data_.empty());
187-
bool black_to_move =
188-
tree_[0]->GetPositionHistory().Starting().IsBlackToMove();
189218
for (auto chunk : training_data_) {
219+
auto black_to_move = chunk.side_to_move;
190220
if (game_result_ == GameResult::WHITE_WON) {
191221
chunk.result = black_to_move ? -1 : 1;
192222
} else if (game_result_ == GameResult::BLACK_WON) {
@@ -195,7 +225,6 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const {
195225
chunk.result = 0;
196226
}
197227
writer->WriteChunk(chunk);
198-
black_to_move = !black_to_move;
199228
}
200229
}
201230

src/selfplay/game.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class SelfPlayGame {
7070

7171
// Starts the game and blocks until the game is finished.
7272
void Play(int white_threads, int black_threads, bool training,
73-
bool enable_resign = true);
73+
int random_opening_plies, bool enable_resign = true);
7474
// Aborts the game currently played, doesn't matter if it's synchronous or
7575
// not.
7676
void Abort();

src/selfplay/tournament.cc

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,10 @@ const OptionId kTrainingId{
5959
"temporary subdirectory that the engine creates."};
6060
const OptionId kVerboseThinkingId{"verbose-thinking", "VerboseThinking",
6161
"Show verbose thinking messages."};
62+
const OptionId kMaxRandomOpeningPliesId{
63+
"max-random-opening-plies", "MaxRandomOpeningPlies",
64+
"Maximum number of opening plies to randomly play based on move priors. "
65+
"The actual number of opening plies will be uniformly randomly picked."};
6266
const OptionId kResignPlaythroughId{
6367
"resign-playthrough", "ResignPlaythrough",
6468
"The percentage of games which ignore resign."};
@@ -82,6 +86,7 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) {
8286
options->Add<IntOption>(kTimeMsId, -1, 999999999) = -1;
8387
options->Add<BoolOption>(kTrainingId) = false;
8488
options->Add<BoolOption>(kVerboseThinkingId) = false;
89+
options->Add<IntOption>(kMaxRandomOpeningPliesId, 0, 999) = 0;
8590
options->Add<FloatOption>(kResignPlaythroughId, 0.0f, 100.0f) = 0.0f;
8691

8792
SelfPlayGame::PopulateUciParams(options);
@@ -124,6 +129,8 @@ SelfPlayTournament::SelfPlayTournament(const OptionsDict& options,
124129
kShareTree(options.Get<bool>(kShareTreesId.GetId())),
125130
kParallelism(options.Get<int>(kParallelGamesId.GetId())),
126131
kTraining(options.Get<bool>(kTrainingId.GetId())),
132+
kMaxRandomOpeningPlies(
133+
options.Get<int>(kMaxRandomOpeningPliesId.GetId())),
127134
kResignPlaythrough(options.Get<float>(kResignPlaythroughId.GetId())) {
128135
// If playing just one game, the player1 is white, otherwise randomize.
129136
if (kTotalGames != 1) {
@@ -243,7 +250,7 @@ void SelfPlayTournament::PlayOneGame(int game_number) {
243250

244251
// PLAY GAME!
245252
game.Play(kThreads[color_idx[0]], kThreads[color_idx[1]], kTraining,
246-
enable_resign);
253+
Random::Get().GetInt(0, kMaxRandomOpeningPlies), enable_resign);
247254

248255
// If game was aborted, it's still undecided.
249256
if (game.GetGameResult() != GameResult::UNDECIDED) {

src/selfplay/tournament.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ class SelfPlayTournament {
101101
const bool kShareTree;
102102
const size_t kParallelism;
103103
const bool kTraining;
104+
const int kMaxRandomOpeningPlies;
104105
const float kResignPlaythrough;
105106
};
106107

0 commit comments

Comments
 (0)