Skip to content

Commit 05f4ef6

Browse files
committed
Randomly play opening moves based on priors
1 parent 5389cd8 commit 05f4ef6

File tree

4 files changed

+73
-18
lines changed

4 files changed

+73
-18
lines changed

src/selfplay/game.cc

Lines changed: 62 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -29,10 +29,14 @@
2929
#include <algorithm>
3030

3131
#include "neural/writer.h"
32+
#include "utils/random.h"
3233

3334
namespace lczero {
3435

3536
namespace {
37+
const OptionId kRandomOpeningTemperatureId{
38+
"random-opening-temperature", "RandomOpeningTemperature",
39+
"Tau value for softmax applied to move priors of random opening moves."};
3640
const OptionId kReuseTreeId{"reuse-tree", "ReuseTree",
3741
"Reuse the search tree between moves."};
3842
const OptionId kResignPercentageId{
@@ -48,6 +52,7 @@ const OptionId kResignEarliestMoveId{"resign-earliest-move",
4852
} // namespace
4953

5054
void SelfPlayGame::PopulateUciParams(OptionsParser* options) {
55+
options->Add<FloatOption>(kRandomOpeningTemperatureId, 0.0f, 100.0f) = 1.0f;
5156
options->Add<BoolOption>(kReuseTreeId) = false;
5257
options->Add<BoolOption>(kResignWDLStyleId) = false;
5358
options->Add<FloatOption>(kResignPercentageId, 0.0f, 100.0f) = 0.0f;
@@ -69,11 +74,12 @@ SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2,
6974
}
7075

7176
void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
72-
bool enable_resign) {
73-
bool blacks_move = false;
77+
int random_opening_plies, bool enable_resign) {
78+
SearchLimits random_opening_limits;
79+
random_opening_limits.visits = 1;
7480

7581
// Do moves while not end of the game. (And while not abort_)
76-
while (!abort_) {
82+
for (auto blacks_move = false; !abort_; blacks_move = !blacks_move) {
7783
game_result_ = tree_[0]->GetPositionHistory().ComputeGameResult();
7884

7985
// If endgame, stop.
@@ -94,7 +100,9 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
94100
if (abort_) break;
95101
search_ = std::make_unique<Search>(
96102
*tree_[idx], options_[idx].network, options_[idx].best_move_callback,
97-
options_[idx].info_callback, options_[idx].search_limits,
103+
options_[idx].info_callback,
104+
random_opening_plies ? random_opening_limits
105+
: options_[idx].search_limits,
98106
*options_[idx].uci_options, options_[idx].cache, nullptr);
99107
// TODO: add Syzygy option for selfplay.
100108
}
@@ -103,8 +111,37 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
103111
search_->RunBlocking(blacks_move ? black_threads : white_threads);
104112
if (abort_) break;
105113

114+
// Maybe override the best move if we're still picking a random opening.
115+
auto move = search_->GetBestMove().first;
116+
if (random_opening_plies) {
117+
const auto temperature = options_[idx].uci_options->Get<float>(
118+
kRandomOpeningTemperatureId.GetId());
119+
auto edges = tree_[idx]->GetCurrentHead()->Edges();
120+
std::vector<float> cumulative_sums;
121+
auto sum = 0.0f;
122+
for (const auto& edge : edges) {
123+
sum += std::pow(edge.GetP(), 1 / temperature);
124+
cumulative_sums.push_back(sum);
125+
}
126+
127+
// Pick a move proportionally to its softmax prior.
128+
if (sum > 0.0f) {
129+
const auto toss = Random::Get().GetFloat(cumulative_sums.back());
130+
auto moveIdx = std::lower_bound(cumulative_sums.begin(),
131+
cumulative_sums.end(), toss) -
132+
cumulative_sums.begin();
133+
for (const auto& edge : edges) {
134+
if (moveIdx-- == 0) {
135+
move = edge.GetMove(blacks_move);
136+
break;
137+
}
138+
}
139+
}
140+
}
141+
142+
// Get data from the head before cleaning up the children with the move.
106143
auto best_eval = search_->GetBestEval();
107-
if (training) {
144+
if (!random_opening_plies && training) {
108145
// Append training data. The GameResult is later overwritten.
109146
auto best_q = best_eval.first;
110147
auto best_d = best_eval.second;
@@ -113,10 +150,28 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
113150
search_->GetParams().GetHistoryFill(), best_q, best_d));
114151
}
115152

153+
// Add move to the tree.
154+
tree_[0]->MakeMove(move);
155+
if (tree_[0] != tree_[1]) tree_[1]->MakeMove(move);
156+
157+
// Skip adjudication if random openings can still balance out.
158+
if (random_opening_plies) {
159+
random_opening_plies--;
160+
161+
// Randomly played into a decided result, so stop and treat as undecided.
162+
if (tree_[0]->GetPositionHistory().ComputeGameResult() !=
163+
GameResult::UNDECIDED) {
164+
break;
165+
}
166+
continue;
167+
}
168+
169+
// Adjudicate if evals exceed resignation thresholds.
116170
float eval = best_eval.first;
117171
eval = (eval + 1) / 2;
118172
if (eval < min_eval_[idx]) min_eval_[idx] = eval;
119-
const int move_number = tree_[0]->GetPositionHistory().GetLength() / 2 + 1;
173+
const int move_number =
174+
(tree_[0]->GetPositionHistory().GetLength() + 1) / 2;
120175
if (enable_resign && move_number >= options_[idx].uci_options->Get<int>(
121176
kResignEarliestMoveId.GetId())) {
122177
const float resignpct =
@@ -149,12 +204,6 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
149204
}
150205
}
151206
}
152-
153-
// Add best move to the tree.
154-
const Move move = search_->GetBestMove().first;
155-
tree_[0]->MakeMove(move);
156-
if (tree_[0] != tree_[1]) tree_[1]->MakeMove(move);
157-
blacks_move = !blacks_move;
158207
}
159208
}
160209

@@ -184,9 +233,8 @@ void SelfPlayGame::Abort() {
184233

185234
void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const {
186235
assert(!training_data_.empty());
187-
bool black_to_move =
188-
tree_[0]->GetPositionHistory().Starting().IsBlackToMove();
189236
for (auto chunk : training_data_) {
237+
auto black_to_move = chunk.side_to_move;
190238
if (game_result_ == GameResult::WHITE_WON) {
191239
chunk.result = black_to_move ? -1 : 1;
192240
} else if (game_result_ == GameResult::BLACK_WON) {
@@ -195,7 +243,6 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const {
195243
chunk.result = 0;
196244
}
197245
writer->WriteChunk(chunk);
198-
black_to_move = !black_to_move;
199246
}
200247
}
201248

src/selfplay/game.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ class SelfPlayGame {
7070

7171
// Starts the game and blocks until the game is finished.
7272
void Play(int white_threads, int black_threads, bool training,
73-
bool enable_resign = true);
73+
int random_opening_plies, bool enable_resign = true);
7474
// Aborts the game currently played, doesn't matter if it's synchronous or
7575
// not.
7676
void Abort();

src/selfplay/tournament.cc

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,10 @@ const OptionId kVerboseThinkingId{"verbose-thinking", "VerboseThinking",
6262
const OptionId kResignPlaythroughId{
6363
"resign-playthrough", "ResignPlaythrough",
6464
"The percentage of games which ignore resign."};
65+
const OptionId kRandomOpeningMaxPliesId{
66+
"random-opening-max-plies", "RandomOpeningMaxPlies",
67+
"Maximum number of opening plies to randomly play based on move priors. "
68+
"The actual number of opening plies will be uniformly randomly picked."};
6569

6670
} // namespace
6771

@@ -83,6 +87,7 @@ void SelfPlayTournament::PopulateOptions(OptionsParser* options) {
8387
options->Add<BoolOption>(kTrainingId) = false;
8488
options->Add<BoolOption>(kVerboseThinkingId) = false;
8589
options->Add<FloatOption>(kResignPlaythroughId, 0.0f, 100.0f) = 0.0f;
90+
options->Add<IntOption>(kRandomOpeningMaxPliesId, 0, 999) = 0;
8691

8792
SelfPlayGame::PopulateUciParams(options);
8893

@@ -124,7 +129,9 @@ SelfPlayTournament::SelfPlayTournament(const OptionsDict& options,
124129
kShareTree(options.Get<bool>(kShareTreesId.GetId())),
125130
kParallelism(options.Get<int>(kParallelGamesId.GetId())),
126131
kTraining(options.Get<bool>(kTrainingId.GetId())),
127-
kResignPlaythrough(options.Get<float>(kResignPlaythroughId.GetId())) {
132+
kResignPlaythrough(options.Get<float>(kResignPlaythroughId.GetId())),
133+
kRandomOpeningMaxPlies(
134+
options.Get<int>(kRandomOpeningMaxPliesId.GetId())) {
128135
// If playing just one game, the player1 is white, otherwise randomize.
129136
if (kTotalGames != 1) {
130137
next_game_black_ = Random::Get().GetBool();
@@ -243,7 +250,7 @@ void SelfPlayTournament::PlayOneGame(int game_number) {
243250

244251
// PLAY GAME!
245252
game.Play(kThreads[color_idx[0]], kThreads[color_idx[1]], kTraining,
246-
enable_resign);
253+
Random::Get().GetInt(0, kRandomOpeningMaxPlies), enable_resign);
247254

248255
// If game was aborted, it's still undecided.
249256
if (game.GetGameResult() != GameResult::UNDECIDED) {

src/selfplay/tournament.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ class SelfPlayTournament {
102102
const size_t kParallelism;
103103
const bool kTraining;
104104
const float kResignPlaythrough;
105+
const int kRandomOpeningMaxPlies;
105106
};
106107

107108
} // namespace lczero

0 commit comments

Comments
 (0)