Skip to content

Temperature and FPU related params. #568

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 5 commits into from
Dec 7, 2018
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 35 additions & 1 deletion src/mcts/params.cc
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,18 @@ const OptionId SearchParams::kTempDecayMovesId{
"Reduce temperature for every move from the game start to this number of "
"moves, decreasing linearly from initial temperature to 0. A value of 0 "
"disables tempdecay."};
const OptionId SearchParams::kTemperatureCutoffMoveId{
"temp-cutoff-move", "TempCutoffMove",
"Move number, starting from which endgame temperature is used rather "
"than initial temperature. Setting it to 0 disables cutoff."};
const OptionId SearchParams::kTemperatureEndgameId{
"temp-endgame", "TempEndgame",
"Temperature used during endgame (starting from cutoff move). Endgame "
"temperature doesn't decay."};
const OptionId SearchParams::kTemperatureWinpctCutoffId{
"temp-value-cutoff", "TempValueCutoff",
"When move is selected using temperature, bad moves (with win "
"probability less than X than the best move) are not considered at all."};
const OptionId SearchParams::kTemperatureVisitOffsetId{
"temp-visit-offset", "TempVisitOffset",
"Reduces visits by this value when picking a move with a temperature. When "
Expand All @@ -92,13 +104,26 @@ const OptionId SearchParams::kSmartPruningFactorId{
"promising moves from being considered even earlier. Values less than 1 "
"causes hopeless moves to still have some attention. When set to 0, smart "
"pruning is deactivated."};
const OptionId SearchParams::kFpuStrategyId{
"fpu-strategy", "FpuStrategy",
"How is an eval of unvisited node determined. \"reduction\" subtracts "
"--fpu-reduction value from the parent eval. \"absolute\" sets eval of "
"unvisited nodes to the value specified in --fpu-value."};
// TODO(crem) Make FPU in "reduction" mode use fpu-value too. For now it's kept
// for backwards compatibility.
const OptionId SearchParams::kFpuReductionId{
"fpu-reduction", "FpuReduction",
"\"First Play Urgency\" reduction. Normally when a move has no visits, "
"\"First Play Urgency\" reduction (used when FPU strategy is "
"\"reduction\"). Normally when a move has no visits, "
"it's eval is assumed to be equal to parent's eval. With non-zero FPU "
"reduction, eval of unvisited move is decreased by that value, "
"discouraging visits of unvisited moves, and saving those visits for "
"(hopefully) more promising moves."};
const OptionId SearchParams::kFpuValueId{
"fpu-value", "FpuValue",
"\"First Play Urgency\" value. When FPU strategy is \"absolute\", value of "
"unvisited node is assumed to be equal to this value, and does not depend "
"on parent eval."};
const OptionId SearchParams::kCacheHistoryLengthId{
"cache-history-length", "CacheHistoryLength",
"Length of history, in half-moves, to include into the cache key. When "
Expand Down Expand Up @@ -147,12 +172,18 @@ void SearchParams::Populate(OptionsParser* options) {
options->Add<FloatOption>(kCpuctFactorId, 0.0f, 1000.0f) = 0.0f;
options->Add<FloatOption>(kTemperatureId, 0.0f, 100.0f) = 0.0f;
options->Add<IntOption>(kTempDecayMovesId, 0, 100) = 0;
options->Add<IntOption>(kTemperatureCutoffMoveId, 0, 1000) = 0;
options->Add<FloatOption>(kTemperatureEndgameId, 0.0f, 100.0f) = 0.0f;
options->Add<FloatOption>(kTemperatureWinpctCutoffId, 0.0f, 100.0f) = 100.0f;
options->Add<FloatOption>(kTemperatureVisitOffsetId, -0.99999f, 1000.0f) =
0.0f;
options->Add<BoolOption>(kNoiseId) = false;
options->Add<BoolOption>(kVerboseStatsId) = false;
options->Add<FloatOption>(kSmartPruningFactorId, 0.0f, 10.0f) = 1.33f;
std::vector<std::string> fpu_strategy = {"reduction", "absolute"};
options->Add<ChoiceOption>(kFpuStrategyId, fpu_strategy) = "reduction";
options->Add<FloatOption>(kFpuReductionId, -100.0f, 100.0f) = 0.0f;
options->Add<FloatOption>(kFpuValueId, -1.0f, 1.0f) = -1.0f;
options->Add<IntOption>(kCacheHistoryLengthId, 0, 7) = 7;
options->Add<FloatOption>(kPolicySoftmaxTempId, 0.1f, 10.0f) = 1.0f;
options->Add<IntOption>(kMaxCollisionEventsId, 1, 1024) = 1;
Expand All @@ -172,7 +203,10 @@ SearchParams::SearchParams(const OptionsDict& options)
kCpuctFactor(options.Get<float>(kCpuctFactorId.GetId())),
kNoise(options.Get<bool>(kNoiseId.GetId())),
kSmartPruningFactor(options.Get<float>(kSmartPruningFactorId.GetId())),
kFpuAbsolute(options.Get<std::string>(kFpuStrategyId.GetId()) ==
"absolute"),
kFpuReduction(options.Get<float>(kFpuReductionId.GetId())),
kFpuValue(options.Get<float>(kFpuValueId.GetId())),
kCacheHistoryLength(options.Get<int>(kCacheHistoryLengthId.GetId())),
kPolicySoftmaxTemp(options.Get<float>(kPolicySoftmaxTempId.GetId())),
kMaxCollisionEvents(options.Get<int>(kMaxCollisionEventsId.GetId())),
Expand Down
19 changes: 19 additions & 0 deletions src/mcts/params.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,24 @@ class SearchParams {
int GetTempDecayMoves() const {
return options_.Get<int>(kTempDecayMovesId.GetId());
}
int GetTemperatureCutoffMove() const {
return options_.Get<int>(kTemperatureCutoffMoveId.GetId());
}
float GetTemperatureEndgame() const {
return options_.Get<float>(kTemperatureEndgameId.GetId());
}
float GetTemperatureWinpctCutoff() const {
return options_.Get<float>(kTemperatureWinpctCutoffId.GetId());
}

bool GetNoise() const { return kNoise; }
bool GetVerboseStats() const {
return options_.Get<bool>(kVerboseStatsId.GetId());
}
float GetSmartPruningFactor() const { return kSmartPruningFactor; }
bool GetFpuAbsolute() const { return kFpuAbsolute; }
float GetFpuReduction() const { return kFpuReduction; }
float GetFpuValue() const { return kFpuValue; }
int GetCacheHistoryLength() const { return kCacheHistoryLength; }
float GetPolicySoftmaxTemp() const { return kPolicySoftmaxTemp; }
int GetMaxCollisionEvents() const { return kMaxCollisionEvents; }
Expand All @@ -85,11 +97,16 @@ class SearchParams {
static const OptionId kCpuctFactorId;
static const OptionId kTemperatureId;
static const OptionId kTempDecayMovesId;
static const OptionId kTemperatureCutoffMoveId;
static const OptionId kTemperatureEndgameId;
static const OptionId kTemperatureWinpctCutoffId;
static const OptionId kTemperatureVisitOffsetId;
static const OptionId kNoiseId;
static const OptionId kVerboseStatsId;
static const OptionId kSmartPruningFactorId;
static const OptionId kFpuStrategyId;
static const OptionId kFpuReductionId;
static const OptionId kFpuValueId;
static const OptionId kCacheHistoryLengthId;
static const OptionId kPolicySoftmaxTempId;
static const OptionId kMaxCollisionEventsId;
Expand All @@ -112,7 +129,9 @@ class SearchParams {
const float kCpuctFactor;
const bool kNoise;
const float kSmartPruningFactor;
const bool kFpuAbsolute;
const float kFpuReduction;
const float kFpuValue;
const int kCacheHistoryLength;
const float kPolicySoftmaxTemp;
const int kMaxCollisionEvents;
Expand Down
71 changes: 40 additions & 31 deletions src/mcts/search.cc
Original file line number Diff line number Diff line change
Expand Up @@ -184,35 +184,40 @@ int64_t Search::GetTimeToDeadline() const {
}

namespace {
inline float GetFpu(const SearchParams& params, Node* node, bool is_root_node) {
return params.GetFpuAbsolute()
? params.GetFpuValue()
: ((is_root_node && params.GetNoise()) ||
!params.GetFpuReduction())
? -node->GetQ()
: -node->GetQ() - params.GetFpuReduction() *
std::sqrt(node->GetVisitedPolicy());
}

inline float ComputeCpuct(const SearchParams& params, uint32_t N) {
const float init = params.GetCpuct();
const float k = params.GetCpuctFactor();
const float base = params.GetCpuctBase();
return init + (k ? k * std::log((N + base) / base) : 0.0f);
}

} // namespace

std::vector<std::string> Search::GetVerboseStats(Node* node,
bool is_black_to_move) const {
const float parent_q =
-node->GetQ() -
params_.GetFpuReduction() * std::sqrt(node->GetVisitedPolicy());
const float fpu = GetFpu(params_, node, node == root_node_);
const float cpuct = ComputeCpuct(params_, node->GetN());
const float U_coeff =
cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u));

std::vector<EdgeAndNode> edges;
for (const auto& edge : node->Edges()) edges.push_back(edge);

std::sort(edges.begin(), edges.end(),
[&parent_q, &U_coeff](EdgeAndNode a, EdgeAndNode b) {
return std::forward_as_tuple(a.GetN(),
a.GetQ(parent_q) + a.GetU(U_coeff)) <
std::forward_as_tuple(b.GetN(),
b.GetQ(parent_q) + b.GetU(U_coeff));
});
std::sort(
edges.begin(), edges.end(),
[&fpu, &U_coeff](EdgeAndNode a, EdgeAndNode b) {
return std::forward_as_tuple(a.GetN(), a.GetQ(fpu) + a.GetU(U_coeff)) <
std::forward_as_tuple(b.GetN(), b.GetQ(fpu) + b.GetU(U_coeff));
});

std::vector<std::string> infos;
for (const auto& edge : edges) {
Expand All @@ -230,14 +235,14 @@ std::vector<std::string> Search::GetVerboseStats(Node* node,
oss << "(P: " << std::setw(5) << std::setprecision(2) << edge.GetP() * 100
<< "%) ";

oss << "(Q: " << std::setw(8) << std::setprecision(5) << edge.GetQ(parent_q)
oss << "(Q: " << std::setw(8) << std::setprecision(5) << edge.GetQ(fpu)
<< ") ";

oss << "(U: " << std::setw(6) << std::setprecision(5) << edge.GetU(U_coeff)
<< ") ";

oss << "(Q+U: " << std::setw(8) << std::setprecision(5)
<< edge.GetQ(parent_q) + edge.GetU(U_coeff) << ") ";
<< edge.GetQ(fpu) + edge.GetU(U_coeff) << ") ";

oss << "(V: ";
optional<float> v;
Expand Down Expand Up @@ -456,8 +461,11 @@ void Search::EnsureBestMoveKnown() REQUIRES(nodes_mutex_)
if (!root_node_->HasChildren()) return;

float temperature = params_.GetTemperature();
if (temperature && params_.GetTempDecayMoves()) {
int moves = played_history_.Last().GetGamePly() / 2;
const int cutoff_move = params_.GetTemperatureCutoffMove();
const int moves = played_history_.Last().GetGamePly() / 2;
if (cutoff_move && (moves + 1) >= cutoff_move) {
temperature = params_.GetTemperatureEndgame();
} else if (temperature && params_.GetTempDecayMoves()) {
if (moves >= params_.GetTempDecayMoves()) {
temperature = 0.0;
} else {
Expand Down Expand Up @@ -525,27 +533,32 @@ EdgeAndNode Search::GetBestChildWithTemperature(Node* parent,
float sum = 0.0;
float max_n = 0.0;
float offset = params_.GetTemperatureVisitOffset();
float max_eval = -1.0f;
const float fpu = GetFpu(params_, parent, parent == root_node_);

for (auto edge : parent->Edges()) {
if (parent == root_node_ && !root_limit.empty() &&
std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) ==
root_limit.end()) {
continue;
}
if (edge.GetN() + offset > max_n) {
max_n = edge.GetN() + offset;
}
if (edge.GetN() + offset > max_n) max_n = edge.GetN() + offset;
if (edge.GetQ(fpu) > max_eval) max_eval = edge.GetQ(fpu);
}

// No move had enough visits for temperature, so use default child criteria
if (max_n <= 0.0f) return GetBestChildNoTemperature(parent);

// TODO(crem) Simplify this code when samplers.h is merged.
const float min_eval =
max_eval - params_.GetTemperatureWinpctCutoff() / 50.0f;
for (auto edge : parent->Edges()) {
if (parent == root_node_ && !root_limit.empty() &&
std::find(root_limit.begin(), root_limit.end(), edge.GetMove()) ==
root_limit.end()) {
continue;
}
if (edge.GetQ(fpu) < min_eval) continue;
sum += std::pow(
std::max(0.0f, (static_cast<float>(edge.GetN()) + offset) / max_n),
1 / temperature);
Expand All @@ -564,6 +577,7 @@ EdgeAndNode Search::GetBestChildWithTemperature(Node* parent,
root_limit.end()) {
continue;
}
if (edge.GetQ(fpu) < min_eval) continue;
if (idx-- == 0) return edge;
}
assert(false);
Expand Down Expand Up @@ -844,11 +858,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
float best = std::numeric_limits<float>::lowest();
float second_best = std::numeric_limits<float>::lowest();
int possible_moves = 0;
float parent_q =
((is_root_node && params_.GetNoise()) || !params_.GetFpuReduction())
? -node->GetQ()
: -node->GetQ() - params_.GetFpuReduction() *
std::sqrt(node->GetVisitedPolicy());
const float fpu = GetFpu(params_, node, is_root_node);
for (auto child : node->Edges()) {
if (is_root_node) {
// If there's no chance to catch up to the current best node with
Expand All @@ -868,7 +878,7 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
}
++possible_moves;
}
float Q = child.GetQ(parent_q);
float Q = child.GetQ(fpu);
const float score = child.GetU(puct_mult) + Q;
if (score > best) {
second_best = best;
Expand All @@ -882,9 +892,9 @@ SearchWorker::NodeToProcess SearchWorker::PickNodeToExtend(
}

if (second_best_edge) {
collision_limit = std::min(
collision_limit,
best_edge.GetVisitsToReachU(second_best, puct_mult, parent_q));
collision_limit =
std::min(collision_limit,
best_edge.GetVisitsToReachU(second_best, puct_mult, fpu));
assert(collision_limit >= 1);
second_best_edge.Reset();
}
Expand Down Expand Up @@ -1041,12 +1051,11 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget) {
std::vector<ScoredEdge> scores;
const float cpuct = ComputeCpuct(params_, node->GetN());
float puct_mult = cpuct * std::sqrt(std::max(node->GetChildrenVisits(), 1u));
// FPU reduction is not taken into account.
const float parent_q = -node->GetQ();
const float fpu = GetFpu(params_, node, node == search_->root_node_);
for (auto edge : node->Edges()) {
if (edge.GetP() == 0.0f) continue;
// Flip the sign of a score to be able to easily sort.
scores.emplace_back(-edge.GetU(puct_mult) - edge.GetQ(parent_q), edge);
scores.emplace_back(-edge.GetU(puct_mult) - edge.GetQ(fpu), edge);
}

size_t first_unsorted_index = 0;
Expand Down Expand Up @@ -1076,7 +1085,7 @@ int SearchWorker::PrefetchIntoCache(Node* node, int budget) {
if (i != scores.size() - 1) {
// Sign of the score was flipped for sorting, so flip it back.
const float next_score = -scores[i + 1].first;
const float q = edge.GetQ(-parent_q);
const float q = edge.GetQ(-fpu);
if (next_score > q) {
budget_to_spend =
std::min(budget, int(edge.GetP() * puct_mult / (next_score - q) -
Expand Down