29
29
#include < algorithm>
30
30
31
31
#include " neural/writer.h"
32
+ #include " utils/random.h"
32
33
33
34
namespace lczero {
34
35
@@ -69,11 +70,12 @@ SelfPlayGame::SelfPlayGame(PlayerOptions player1, PlayerOptions player2,
69
70
}
70
71
71
72
void SelfPlayGame::Play (int white_threads, int black_threads, bool training,
72
- bool enable_resign) {
73
- bool blacks_move = false ;
73
+ int random_opening_plies, bool enable_resign) {
74
+ SearchLimits random_opening_limits;
75
+ random_opening_limits.visits = 1 ;
74
76
75
77
// Do moves while not end of the game. (And while not abort_)
76
- while ( !abort_) {
78
+ for ( auto blacks_move = false ; !abort_; blacks_move = !blacks_move ) {
77
79
game_result_ = tree_[0 ]->GetPositionHistory ().ComputeGameResult ();
78
80
79
81
// If endgame, stop.
@@ -94,7 +96,9 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
94
96
if (abort_) break ;
95
97
search_ = std::make_unique<Search>(
96
98
*tree_[idx], options_[idx].network , options_[idx].best_move_callback ,
97
- options_[idx].info_callback , options_[idx].search_limits ,
99
+ options_[idx].info_callback ,
100
+ random_opening_plies ? random_opening_limits
101
+ : options_[idx].search_limits ,
98
102
*options_[idx].uci_options , options_[idx].cache , nullptr );
99
103
// TODO: add Syzygy option for selfplay.
100
104
}
@@ -103,8 +107,23 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
103
107
search_->RunBlocking (blacks_move ? black_threads : white_threads);
104
108
if (abort_) break ;
105
109
110
+ // Override the best move if we're still picking a random opening.
111
+ auto move = search_->GetBestMove ().first ;
112
+ if (random_opening_plies) {
113
+ // Pick a move proportionally to its prior.
114
+ auto toss = Random::Get ().GetFloat (1 .0f );
115
+ for (const auto & edge : tree_[idx]->GetCurrentHead ()->Edges ()) {
116
+ toss -= edge.GetP ();
117
+ if (toss < 0 .0f ) {
118
+ move = edge.GetMove (blacks_move);
119
+ break ;
120
+ }
121
+ }
122
+ }
123
+
124
+ // Get data from the head before cleaning up the children with the move.
106
125
auto best_eval = search_->GetBestEval ();
107
- if (training) {
126
+ if (!random_opening_plies && training) {
108
127
// Append training data. The GameResult is later overwritten.
109
128
auto best_q = best_eval.first ;
110
129
auto best_d = best_eval.second ;
@@ -113,10 +132,28 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
113
132
search_->GetParams ().GetHistoryFill (), best_q, best_d));
114
133
}
115
134
135
+ // Add move to the tree.
136
+ tree_[0 ]->MakeMove (move);
137
+ if (tree_[0 ] != tree_[1 ]) tree_[1 ]->MakeMove (move);
138
+
139
+ // Skip adjudication if random openings can still balance out.
140
+ if (random_opening_plies) {
141
+ random_opening_plies--;
142
+
143
+ // Randomly played into a decided result, so stop and treat as undecided.
144
+ if (tree_[0 ]->GetPositionHistory ().ComputeGameResult () !=
145
+ GameResult::UNDECIDED) {
146
+ break ;
147
+ }
148
+ continue ;
149
+ }
150
+
151
+ // Adjudicate if evals exceed resignation thresholds.
116
152
float eval = best_eval.first ;
117
153
eval = (eval + 1 ) / 2 ;
118
154
if (eval < min_eval_[idx]) min_eval_[idx] = eval;
119
- const int move_number = tree_[0 ]->GetPositionHistory ().GetLength () / 2 + 1 ;
155
+ const int move_number =
156
+ (tree_[0 ]->GetPositionHistory ().GetLength () + 1 ) / 2 ;
120
157
if (enable_resign && move_number >= options_[idx].uci_options ->Get <int >(
121
158
kResignEarliestMoveId .GetId ())) {
122
159
const float resignpct =
@@ -149,12 +186,6 @@ void SelfPlayGame::Play(int white_threads, int black_threads, bool training,
149
186
}
150
187
}
151
188
}
152
-
153
- // Add best move to the tree.
154
- const Move move = search_->GetBestMove ().first ;
155
- tree_[0 ]->MakeMove (move);
156
- if (tree_[0 ] != tree_[1 ]) tree_[1 ]->MakeMove (move);
157
- blacks_move = !blacks_move;
158
189
}
159
190
}
160
191
@@ -184,9 +215,8 @@ void SelfPlayGame::Abort() {
184
215
185
216
void SelfPlayGame::WriteTrainingData (TrainingDataWriter* writer) const {
186
217
assert (!training_data_.empty ());
187
- bool black_to_move =
188
- tree_[0 ]->GetPositionHistory ().Starting ().IsBlackToMove ();
189
218
for (auto chunk : training_data_) {
219
+ auto black_to_move = chunk.side_to_move ;
190
220
if (game_result_ == GameResult::WHITE_WON) {
191
221
chunk.result = black_to_move ? -1 : 1 ;
192
222
} else if (game_result_ == GameResult::BLACK_WON) {
@@ -195,7 +225,6 @@ void SelfPlayGame::WriteTrainingData(TrainingDataWriter* writer) const {
195
225
chunk.result = 0 ;
196
226
}
197
227
writer->WriteChunk (chunk);
198
- black_to_move = !black_to_move;
199
228
}
200
229
}
201
230
0 commit comments