29
29
#include < algorithm>
30
30
31
31
#include " neural/writer.h"
32
+ #include " utils/random.h"
32
33
33
34
namespace lczero {
34
35
35
36
namespace {
37
+ const OptionId kRandomOpeningTemperatureId {
38
+ " random-opening-temperature" , " RandomOpeningTemperature" ,
39
+ " Tau value for softmax applied to move priors of random opening moves." };
36
40
const OptionId kReuseTreeId {" reuse-tree" , " ReuseTree" ,
37
41
" Reuse the search tree between moves." };
38
42
const OptionId kResignPercentageId {
@@ -48,6 +52,7 @@ const OptionId kResignEarliestMoveId{"resign-earliest-move",
48
52
} // namespace
49
53
50
54
void SelfPlayGame::PopulateUciParams (OptionsParser* options) {
55
+ options->Add <FloatOption>(kRandomOpeningTemperatureId , 0 .0f , 100 .0f ) = 1 .0f ;
51
56
options->Add <BoolOption>(kReuseTreeId ) = false ;
52
57
options->Add <BoolOption>(kResignWDLStyleId ) = false ;
53
58
options->Add <FloatOption>(kResignPercentageId , 0 .0f , 100 .0f ) = 0 .0f ;
@@ -69,11 +74,12 @@ SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2,
69
74
}
70
75
71
76
void SelfPlayGame::Play (int white_threads, int black_threads, bool training,
72
- bool enable_resign) {
73
- bool blacks_move = false ;
77
+ int random_opening_plies, bool enable_resign) {
78
+ SearchLimits random_opening_limits;
79
+ random_opening_limits.visits = 1 ;
74
80
75
81
// Do moves while not end of the game. (And while not abort_)
76
- while ( !abort_) {
82
+ for ( auto blacks_move = false ; !abort_; blacks_move = !blacks_move ) {
77
83
game_result_ = tree_[0 ]->GetPositionHistory ().ComputeGameResult ();
78
84
79
85
// If endgame, stop.
@@ -94,7 +100,9 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
94
100
if (abort_) break ;
95
101
search_ = std::make_unique<Search>(
96
102
*tree_[idx], options_[idx].network , options_[idx].best_move_callback ,
97
- options_[idx].info_callback , options_[idx].search_limits ,
103
+ options_[idx].info_callback ,
104
+ random_opening_plies ? random_opening_limits
105
+ : options_[idx].search_limits ,
98
106
*options_[idx].uci_options , options_[idx].cache , nullptr );
99
107
// TODO: add Syzygy option for selfplay.
100
108
}
@@ -103,8 +111,37 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
103
111
search_->RunBlocking (blacks_move ? black_threads : white_threads);
104
112
if (abort_) break ;
105
113
114
+ // Maybe override the best move if we're still picking a random opening.
115
+ auto move = search_->GetBestMove ().first ;
116
+ if (random_opening_plies) {
117
+ const auto temperature = options_[idx].uci_options ->Get <float >(
118
+ kRandomOpeningTemperatureId .GetId ());
119
+ auto edges = tree_[idx]->GetCurrentHead ()->Edges ();
120
+ std::vector<float > cumulative_sums;
121
+ auto sum = 0 .0f ;
122
+ for (const auto & edge : edges) {
123
+ sum += std::pow (edge.GetP (), 1 / temperature);
124
+ cumulative_sums.push_back (sum);
125
+ }
126
+
127
+ // Pick a move proportionally to its softmax prior.
128
+ if (sum > 0 .0f ) {
129
+ const auto toss = Random::Get ().GetFloat (cumulative_sums.back ());
130
+ auto moveIdx = std::lower_bound (cumulative_sums.begin (),
131
+ cumulative_sums.end (), toss) -
132
+ cumulative_sums.begin ();
133
+ for (const auto & edge : edges) {
134
+ if (moveIdx-- == 0 ) {
135
+ move = edge.GetMove (blacks_move);
136
+ break ;
137
+ }
138
+ }
139
+ }
140
+ }
141
+
142
+ // Get data from the head before cleaning up the children with the move.
106
143
auto best_eval = search_->GetBestEval ();
107
- if (training) {
144
+ if (!random_opening_plies && training) {
108
145
// Append training data. The GameResult is later overwritten.
109
146
auto best_q = best_eval.first ;
110
147
auto best_d = best_eval.second ;
@@ -113,10 +150,28 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
113
150
search_->GetParams ().GetHistoryFill (), best_q, best_d));
114
151
}
115
152
153
+ // Add move to the tree.
154
+ tree_[0 ]->MakeMove (move);
155
+ if (tree_[0 ] != tree_[1 ]) tree_[1 ]->MakeMove (move);
156
+
157
+ // Skip adjudication if random openings can still balance out.
158
+ if (random_opening_plies) {
159
+ random_opening_plies--;
160
+
161
+ // Randomly played into a decided result, so stop and treat as undecided.
162
+ if (tree_[0 ]->GetPositionHistory ().ComputeGameResult () !=
163
+ GameResult::UNDECIDED) {
164
+ break ;
165
+ }
166
+ continue ;
167
+ }
168
+
169
+ // Adjudicate if evals exceed resignation thresholds.
116
170
float eval = best_eval.first ;
117
171
eval = (eval + 1 ) / 2 ;
118
172
if (eval < min_eval_[idx]) min_eval_[idx] = eval;
119
- const int move_number = tree_[0 ]->GetPositionHistory ().GetLength () / 2 + 1 ;
173
+ const int move_number =
174
+ (tree_[0 ]->GetPositionHistory ().GetLength () + 1 ) / 2 ;
120
175
if (enable_resign && move_number >= options_[idx].uci_options ->Get <int >(
121
176
kResignEarliestMoveId .GetId ())) {
122
177
const float resignpct =
@@ -149,12 +204,6 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
149
204
}
150
205
}
151
206
}
152
-
153
- // Add best move to the tree.
154
- const Move move = search_->GetBestMove ().first ;
155
- tree_[0 ]->MakeMove (move);
156
- if (tree_[0 ] != tree_[1 ]) tree_[1 ]->MakeMove (move);
157
- blacks_move = !blacks_move;
158
207
}
159
208
}
160
209
@@ -184,9 +233,8 @@ void SelfPlayGame::Abort() {
184
233
185
234
void SelfPlayGame::WriteTrainingData (TrainingDataWriter* writer) const {
186
235
assert (!training_data_.empty ());
187
- bool black_to_move =
188
- tree_[0 ]->GetPositionHistory ().Starting ().IsBlackToMove ();
189
236
for (auto chunk : training_data_) {
237
+ auto black_to_move = chunk.side_to_move ;
190
238
if (game_result_ == GameResult::WHITE_WON) {
191
239
chunk.result = black_to_move ? -1 : 1 ;
192
240
} else if (game_result_ == GameResult::BLACK_WON) {
@@ -195,7 +243,6 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const {
195
243
chunk.result = 0 ;
196
244
}
197
245
writer->WriteChunk (chunk);
198
- black_to_move = !black_to_move;
199
246
}
200
247
}
201
248
0 commit comments