Fix policy softmax accuracy if masking is enabled. (#912)

ddobbelaere · Tilps · commit 7bb95ca4b821 · 2019-08-03T21:26:54.000+10:00
* Do softmax outside backend on set of legal moves. * Remove policy softmax from blas backend. * Remove policy softmax from CUDA backend. * Remove policy softmax from OpenCL backend. * Remove policy softmax from TensorFlow backend. * Use FastExp for policy softmax calculations. * Fix for negative exponentials. * Revert "Fix for negative exponentials." This reverts commit 9fb73d0. * Fuse softmax with softmax temperature. * Modify random backend policy value distribution. * Comment improvements.
diff --git a/src/mcts/search.cc b/src/mcts/search.cc
@@ -1235,16 +1235,23 @@ void SearchWorker::FetchSingleNodeResult(NodeToProcess* node_to_process,
   node_to_process->v = -computation_->GetQVal(idx_in_computation);
   node_to_process->d = computation_->GetDVal(idx_in_computation);
   // ...and secondly, the policy data.
+  // Calculate maximum first.
+  float max_p = -std::numeric_limits<float>::infinity();
+  for (auto edge : node->Edges()) {
+    max_p =
+        std::max(max_p, computation_->GetPVal(idx_in_computation,
+                                              edge.GetMove().as_nn_index()));
+  }
   float total = 0.0;
   for (auto edge : node->Edges()) {
     float p =
         computation_->GetPVal(idx_in_computation, edge.GetMove().as_nn_index());
-    if (params_.GetPolicySoftmaxTemp() != 1.0f) {
-      // Flush denormals to zero.
-      p = p < 1.17549435E-38
-              ? 0.0
-              : FastPow2(FastLog2(p) / params_.GetPolicySoftmaxTemp());
-    }
+    // Perform softmax and take into account policy softmax temperature T.
+    // Note that we want to calculate (exp(p-max_p))^(1/T) = exp((p-max_p)/T).
+    p = FastExp((p - max_p) / params_.GetPolicySoftmaxTemp());
+
+    // Note that p now lies in [0, 1], so it is safe to store it in compressed
+    // format. Normalization happens later.
     edge.edge()->SetP(p);
     // Edge::SetP does some rounding, so only add to the total after rounding.
     total += edge.edge()->GetP();
diff --git a/src/neural/blas/network_blas.cc b/src/neural/blas/network_blas.cc
@@ -303,9 +303,8 @@ void BlasComputation::ComputeBlocking() {
       std::vector<float> policy(num_output_policy);
 
       // Get the moves
-      SoftmaxActivation(num_output_policy, &output_pol[j * num_output_policy],
-                        policy.data());
-
+      policy.assign(output_pol.begin() + j * num_output_policy,
+                    output_pol.begin() + (j + 1) * num_output_policy);
       policies_.emplace_back(std::move(policy));
     }
 
@@ -418,8 +417,8 @@ BlasNetwork::BlasNetwork(const WeightsFile& file, const OptionsDict& options)
   CERR << "MKL " << versionbuf << ".";
   MKLVersion version;
   mkl_get_version(&version);
-  CERR << "MKL platform: " << version.Platform << ", processor: "
-       << version.Processor << ".";
+  CERR << "MKL platform: " << version.Platform
+       << ", processor: " << version.Processor << ".";
   CERR << "MKL can use up to " << max_procs << " thread(s).";
   CERR << "MKL using " << blas_cores << " thread(s) for this backend.";
 #endif
diff --git a/src/neural/cuda/network_cudnn.cc b/src/neural/cuda/network_cudnn.cc
@@ -99,8 +99,8 @@ struct InputsOutputs {
     ReportCUDAErrors(cudaMalloc(
         &op_policy_mem_gpu_, maxBatchSize * kNumOutputPolicy * sizeof(float)));
 
-    ReportCUDAErrors(
-        cudaHostAlloc(&op_value_mem_, maxBatchSize * (wdl ? 3 : 1) * sizeof(float),
+    ReportCUDAErrors(cudaHostAlloc(&op_value_mem_,
+                                   maxBatchSize * (wdl ? 3 : 1) * sizeof(float),
                                    cudaHostAllocMapped));
     ReportCUDAErrors(
         cudaHostGetDevicePointer(&op_value_mem_gpu_, op_value_mem_, 0));
@@ -239,8 +239,7 @@ class CudnnNetwork : public Network {
       }
 
       // Override if forced from backend option
-      if (!options.IsDefault<bool>("nhwc")) 
-          nhwc_ = options.Get<bool>("nhwc");
+      if (!options.IsDefault<bool>("nhwc")) nhwc_ = options.Get<bool>("nhwc");
 
       if (nhwc_)
         ReportCUBLASErrors(cublasSetMathMode(cublas_, CUBLAS_TENSOR_OP_MATH));
@@ -377,10 +376,6 @@ class CudnnNetwork : public Network {
       policymap->LoadWeights(kConvPolicyMap, scratch_mem_);
 
       network_.emplace_back(std::move(policymap));
-
-      auto softmaxPol =
-          std::make_unique<SoftMaxLayer<DataType>>(getLastLayer());
-      network_.emplace_back(std::move(softmaxPol));
     } else {
       auto convPol = std::make_unique<ConvLayer<DataType>>(
           resi_last_, weights.policy.biases.size(), 8, 8, 1, kNumFilters, true,
@@ -394,10 +389,6 @@ class CudnnNetwork : public Network {
       FCPol->LoadWeights(&weights.ip_pol_w[0], &weights.ip_pol_b[0],
                          scratch_mem_);
       network_.emplace_back(std::move(FCPol));
-
-      auto softmaxPol =
-          std::make_unique<SoftMaxLayer<DataType>>(getLastLayer());
-      network_.emplace_back(std::move(softmaxPol));
     }
     policy_out_ = getLastLayer();
 
@@ -533,39 +524,32 @@ class CudnnNetwork : public Network {
                           scratch_mem_, scratch_size_, cudnn_,
                           cublas_);  // conv1
 
-      network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr,
-                          scratch_mem_, scratch_size_, cudnn_,
-                          cublas_);  // pol FC
       if (fp16) {
-        // TODO: consider softmax layer that writes directly to fp32
-        network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr,
+        network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr,
                             scratch_mem_, scratch_size_, cudnn_,
-                            cublas_);  // pol softmax
-        copyTypeConverted(opPol, (half*)(tensor_mem_[1]),
+                            cublas_);  // pol FC
+        copyTypeConverted(opPol, (half*)(tensor_mem_[0]),
                           batchSize * kNumOutputPolicy);  // POLICY
       } else {
-        network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[0],
+        network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[1],
                             nullptr, scratch_mem_, scratch_size_, cudnn_,
-                            cublas_);  // pol softmax  // POLICY
+                            cublas_);  // pol FC  // POLICY
       }
     } else {
       network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[2], nullptr,
                           scratch_mem_, scratch_size_, cudnn_,
                           cublas_);  // pol conv
-      network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr,
-                          scratch_mem_, scratch_size_, cudnn_,
-                          cublas_);  // pol FC
+
       if (fp16) {
-        // TODO: consider softmax layer that writes directly to fp32.
-        network_[l++]->Eval(batchSize, tensor_mem_[0], tensor_mem_[1], nullptr,
+        network_[l++]->Eval(batchSize, tensor_mem_[1], tensor_mem_[0], nullptr,
                             scratch_mem_, scratch_size_, cudnn_,
-                            cublas_);  // pol softmax
-        copyTypeConverted(opPol, (half*)(tensor_mem_[0]),
+                            cublas_);  // pol FC
+        copyTypeConverted(opPol, (half*)(tensor_mem_[1]),
                           batchSize * kNumOutputPolicy);  // POLICY
       } else {
-        network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[1],
+        network_[l++]->Eval(batchSize, (DataType*)opPol, tensor_mem_[0],
                             nullptr, scratch_mem_, scratch_size_, cudnn_,
-                            cublas_);  // pol softmax  // POLICY
+                            cublas_);  // pol FC  // POLICY
       }
     }
 
diff --git a/src/neural/network_random.cc b/src/neural/network_random.cc
@@ -78,9 +78,16 @@ class RandomNetworkComputation : public NetworkComputation {
 
   float GetPVal(int sample, int move_id) const override {
     if (uniform_mode_) return 1.0f;
+
+    // Note that this function returns the policy value *before* softmax.
+    // We choose a uniform distribution over [0, a], implying that the
+    // proportion between the smallest and largest policy value *after* softmax
+    // exponentiation (but before normalization) is equal to S = exp(-a).
+    // Choosing a = 3.0 leads to S = 0.05.
+    const float a = 3.0f;
     return (HashCat({inputs_[sample], static_cast<unsigned long>(move_id)}) %
-            10000) /
-           10000.0;
+            10000) *
+           (a / 10000.0f);
   }
 
  private:
@@ -97,7 +104,8 @@ class RandomNetwork : public Network {
         seed_(options.GetOrDefault<int>("seed", 0)),
         uniform_mode_(options.GetOrDefault<bool>("uniform", false)) {}
   std::unique_ptr<NetworkComputation> NewComputation() override {
-    return std::make_unique<RandomNetworkComputation>(delay_ms_, seed_, uniform_mode_);
+    return std::make_unique<RandomNetworkComputation>(delay_ms_, seed_,
+                                                      uniform_mode_);
   }
 
  private:
diff --git a/src/neural/network_tf.cc b/src/neural/network_tf.cc
@@ -144,7 +144,6 @@ std::pair<Output, Output> MakeNetwork(const Scope& scope, Input input,
   ip_pol_w = Reshape(scope, ip_pol_w, Const(scope, {32 * 8 * 8, 1858}));
   auto ip_pol_b = MakeConst(scope, {1858}, weights.ip_pol_b);
   auto policy_fc = Add(scope, MatMul(scope, conv_pol, ip_pol_w), ip_pol_b);
-  auto policy_head = Softmax(scope, policy_fc);
 
   // Value head
   auto conv_val =
@@ -163,7 +162,7 @@ std::pair<Output, Output> MakeNetwork(const Scope& scope, Input input,
   auto value_head =
       Tanh(scope, Add(scope, MatMul(scope, value_flow, ip2_val_w), ip2_val_b));
 
-  return {policy_head, value_head};
+  return {policy_fc, value_head};
 }
 
 template <bool CPU>
diff --git a/src/neural/opencl/network_opencl.cc b/src/neural/opencl/network_opencl.cc
@@ -102,12 +102,11 @@ class OpenCLComputation : public NetworkComputation {
       buffers_->forward(input_data, output_pol, output_val, batch_size);
 
       for (size_t j = 0; j < batch_size; j++) {
-        std::vector<float> policy(weights_.num_output_policies);
+        std::vector<float> policy(num_output_policies);
 
         // Get the moves.
-        SoftmaxActivation(num_output_policies,
-                          &output_pol[j * num_output_policies], policy.data());
-
+        policy.assign(output_pol.begin() + j * num_output_policies,
+                      output_pol.begin() + (j + 1) * num_output_policies);
         policies_.emplace_back(std::move(policy));
 
         // Now get the score.
diff --git a/src/utils/fastmath.h b/src/utils/fastmath.h
@@ -65,4 +65,7 @@ inline float FastLog(const float a) {
   return 0.6931471805599453f * FastLog2(a);
 }
 
+// Fast approximate exp(x). Does only limited range checking.
+inline float FastExp(const float a) { return FastPow2(1.442695040f * a); }
+
 }  // namespace lczero

Original file line number	Diff line number	Diff line change
`@@ -65,4 +65,7 @@ inline float FastLog(const float a) {`
`65`	`65`	`return 0.6931471805599453f * FastLog2(a);`
`66`	`66`	`}`
`67`	`67`
	`68`	`+// Fast approximate exp(x). Does only limited range checking.`
	`69`	`+inline float FastExp(const float a) { return FastPow2(1.442695040f * a); }`
	`70`	`+`
`68`	`71`	`} // namespace lczero`