Add cuQuantum test (but failed at numeric diff test)

jaeyoo · jaeyoo · commit f97c496bfd93 · 2023-03-26T23:15:58.000-07:00
diff --git a/WORKSPACE b/WORKSPACE
@@ -34,9 +34,9 @@ cc_library(
 # TODO: After merging this patch later into qsim mainstream, remove this and uncomment the above.
 http_archive(
     name = "qsim",
-    sha256 = "97b26e1a8fe13cfa465611f2618ade00f539480c6a7849f020fbee3582686bbf",
-    strip_prefix = "qsim-0.15.0-dev20230317",
-    urls = ["https://github.com/jaeyoo/qsim/archive/refs/tags/v0.15.0+dev20230317.tar.gz"],
+    sha256 = "",
+    strip_prefix = "qsim-0.15.0-dev20230327_v3",
+    urls = ["https://github.com/jaeyoo/qsim/archive/refs/tags/v0.15.0+dev20230327_v3.tar.gz"],
 )
 
 http_archive(
@@ -81,3 +81,21 @@ bind(
     actual = "@six_archive//:six",
 )
 
+new_local_repository(
+    name = "cuquantum_libs",
+    path = "/usr/local/google/home/jaeyoo/workspace/cuquantum-linux-x86_64-22.11.0.13-archive",
+    build_file_content = """
+cc_library(
+    name = "custatevec_headers",
+    srcs = ["include/custatevec.h"],
+    visibility = ["//visibility:public"],
+)
+
+cc_library(
+    name = "custatevec",
+    srcs = ["lib/libcustatevec.so"],
+    visibility = ["//visibility:public"],
+)
+""",
+)
+
diff --git a/tensorflow_quantum/core/ops/BUILD b/tensorflow_quantum/core/ops/BUILD
@@ -652,11 +652,25 @@ py_library(
     ],
 )
 
+py_library(
+    name = "tfq_simulate_ops_cuquantum_py",
+    srcs = ["tfq_simulate_ops_cuquantum.py"],
+    data = [
+        ":_tfq_simulate_ops_cuquantum.so",
+    ],
+    srcs_version = "PY3",
+    deps = [
+        # tensorflow framework for wrappers
+        ":load_module",
+    ],
+)
+
 py_test(
-    name = "tfq_simulate_ops_cuda_test",
-    srcs = ["tfq_simulate_ops_cuda_test.py"],
+    name = "tfq_simulate_ops_gpu_test",
+    srcs = ["tfq_simulate_ops_gpu_test.py"],
     deps = [
         ":tfq_simulate_ops_cuda_py",
+        ":tfq_simulate_ops_cuquantum_py",
         ":tfq_simulate_ops_py",
         "//tensorflow_quantum/python:util",
     ],
@@ -726,13 +740,94 @@ cc_binary(
         "//tensorflow_quantum/core/proto:program_cc_proto",
         "//tensorflow_quantum/core/src:circuit_parser_qsim",
         "//tensorflow_quantum/core/src:util_qsim",
+        "@eigen//:eigen3",
+        # "@local_cuda//:cuda_headers"
+        # tensorflow core framework
+        # tensorflow core lib
+        # tensorflow core protos
+    ] + if_cuda_is_configured([
+        ":cuda",
+        "@local_config_cuda//cuda:cuda_headers",
         "@qsim//lib:qsim_cuda_lib",
+    ]),
+    # alwayslink=1,
+)
+
+cc_binary(
+    name = "_tfq_simulate_ops_cuquantum.so",
+    srcs = [
+        "tfq_simulate_expectation_op_cuquantum.cu.cc",
+    ],
+    linkshared = 1,
+    features = select({
+        ":windows": ["windows_export_all_symbols"],
+        "//conditions:default": [],
+    }),
+    copts = select({
+        ":windows": [
+            "/D__CLANG_SUPPORT_DYN_ANNOTATION__",
+            "/D_USE_MATH_DEFINES",
+            "/DEIGEN_MPL2_ONLY",
+            "/DEIGEN_MAX_ALIGN_BYTES=64",
+            "/DEIGEN_HAS_TYPE_TRAITS=0",
+            "/DTF_USE_SNAPPY",
+            "/showIncludes",
+            "/MD",
+            "/O2",
+            "/DNDEBUG",
+            "/w",
+            "-DWIN32_LEAN_AND_MEAN",
+            "-DNOGDI",
+            "/d2ReducedOptimizeHugeFunctions",
+            "/arch:AVX",
+            "/std:c++17",
+            "-DTENSORFLOW_MONOLITHIC_BUILD",
+            "/DPLATFORM_WINDOWS",
+            "/DEIGEN_HAS_C99_MATH",
+            "/DTENSORFLOW_USE_EIGEN_THREADPOOL",
+            "/DEIGEN_AVOID_STL_ARRAY",
+            "/Iexternal/gemmlowp",
+            "/wd4018",
+            "/wd4577",
+            "/DNOGDI",
+            "/UTF_COMPILE_LIBRARY",
+        ],
+        "//conditions:default": [
+            "-Iexternal/local_cuda/cuda/include",
+            # "--cuda-gpu-arch=sm_86",
+            # "-L/usr/local/cuda/lib64",
+            # "-lcudart_static",
+            # "-ldl",
+            # "-lrt",
+            "-pthread",
+            "-std=c++17",
+            "-D_GLIBCXX_USE_CXX11_ABI=1",
+            "-O3",
+            "-Iexternal/cuda_headers",
+            "-DNV_CUDNN_DISABLE_EXCEPTION",
+            # "-fpermissive",
+        ],
+    }) + if_cuda_is_configured(["-DTENSORFLOW_USE_NVCC=1", "-DGOOGLE_CUDA=1", "-x cuda", "-nvcc_options=relaxed-constexpr", "-nvcc_options=ftz=true"]),
+    deps = [
+        # cirq cc proto
+        "//tensorflow_quantum/core/ops:parse_context",
+        "//tensorflow_quantum/core/ops:tfq_simulate_utils",
+        "//tensorflow_quantum/core/proto:pauli_sum_cc_proto",
+        "//tensorflow_quantum/core/proto:program_cc_proto",
+        "//tensorflow_quantum/core/src:circuit_parser_qsim",
+        "//tensorflow_quantum/core/src:util_qsim",
         "@eigen//:eigen3",
         # "@local_cuda//:cuda_headers"
         # tensorflow core framework
         # tensorflow core lib
         # tensorflow core protos
-    ] + if_cuda_is_configured([":cuda",  "@local_config_cuda//cuda:cuda_headers"]),
+    ] + if_cuda_is_configured([
+        ":cuda",
+        "@cuquantum_libs//:custatevec",
+        "@cuquantum_libs//:custatevec_headers",
+        "@local_config_cuda//cuda:cuda_headers",
+        "@qsim//lib:qsim_cuquantum_lib",
+    ]),
     # alwayslink=1,
 )
 
diff --git a/tensorflow_quantum/core/ops/tfq_simulate_expectation_op_cuquantum.cu.cc b/tensorflow_quantum/core/ops/tfq_simulate_expectation_op_cuquantum.cu.cc
@@ -14,8 +14,8 @@ limitations under the License.
 #include <vector>
 
 #include <chrono>
-#include <custatevec.h>
 
+#include "../cuquantum_libs/include/custatevec.h"
 #include "../qsim/lib/circuit.h"
 #include "../qsim/lib/gate_appl.h"
 #include "../qsim/lib/gates_cirq.h"
@@ -48,7 +48,7 @@ typedef qsim::Circuit<QsimGate> QsimCircuit;
 class TfqSimulateExpectationOpCuQuantum : public tensorflow::OpKernel {
  public:
   explicit TfqSimulateExpectationOpCuQuantum(tensorflow::OpKernelConstruction* context)
-      : OpKernel(context) {  }
+      : OpKernel(context) {}
 
   void Compute(tensorflow::OpKernelContext* context) override {
     // TODO (mbbrough): add more dimension checks for other inputs here.
@@ -147,7 +147,7 @@ class TfqSimulateExpectationOpCuQuantum : public tensorflow::OpKernel {
     // Launch the cuda kernel.
     // Begin simulation.
     int largest_nq = 1;
-    Simulator sim = Simulator(custatevec_handle_);
+    Simulator sim = Simulator(cublas_handle_, custatevec_handle_);
     StateSpace ss = StateSpace(cublas_handle_, custatevec_handle_);
     auto sv = ss.Create(largest_nq);
     ss.SetStateZero(sv);
@@ -207,7 +207,7 @@ class TfqSimulateExpectationOpCuQuantum : public tensorflow::OpKernel {
       int cur_op_index;
 
       // Launch custatevec, begin simulation.
-      auto sim = Simulator(custatevec_handle_);
+      auto sim = Simulator(cublas_handle_, custatevec_handle_);
       auto ss = StateSpace(cublas_handle_, custatevec_handle_);
       auto sv = ss.Create(largest_nq);
       auto scratch = ss.Create(largest_nq);
@@ -259,10 +259,10 @@ class TfqSimulateExpectationOpCuQuantum : public tensorflow::OpKernel {
 };
 
 REGISTER_KERNEL_BUILDER(
-    Name("TfqSimulateExpectationOpCuQuantum").Device(tensorflow::DEVICE_CPU),
-    TfqSimulateExpectationOpCuQuantumOp);
+    Name("TfqSimulateExpectationCuquantum").Device(tensorflow::DEVICE_CPU),
+    TfqSimulateExpectationOpCuQuantum);
 
-REGISTER_OP("TfqSimulateExpectationOpCuQuantum")
+REGISTER_OP("TfqSimulateExpectationCuquantum")
     .Input("programs: string")
     .Input("symbol_names: string")
     .Input("symbol_values: float")
diff --git a/tensorflow_quantum/core/ops/tfq_simulate_ops_cuquantum.py b/tensorflow_quantum/core/ops/tfq_simulate_ops_cuquantum.py
@@ -0,0 +1,45 @@
+# Copyright 2023 The TensorFlow Quantum Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# ==============================================================================
+"""Module to register cuQuantum simulation python op."""
+import os
+import tensorflow as tf
+from tensorflow_quantum.core.ops.load_module import load_module
+
+SIM_OP_MODULE = load_module("_tfq_simulate_ops_cuquantum.so")
+
+
+def tfq_simulate_expectation(programs, symbol_names, symbol_values, pauli_sums):
+    """Calculates the expectation value of circuits wrt some operator(s).
+    Args:
+        programs: `tf.Tensor` of strings with shape [batch_size] containing
+            the string representations of the circuits to be executed.
+        symbol_names: `tf.Tensor` of strings with shape [n_params], which
+            is used to specify the order in which the values in
+            `symbol_values` should be placed inside of the circuits in
+            `programs`.
+        symbol_values: `tf.Tensor` of real numbers with shape
+            [batch_size, n_params] specifying parameter values to resolve
+            into the circuits specificed by programs, following the ordering
+            dictated by `symbol_names`.
+        pauli_sums: `tf.Tensor` of strings with shape [batch_size, n_ops]
+            containing the string representation of the operators that will
+            be used on all of the circuits in the expectation calculations.
+    Returns:
+        `tf.Tensor` with shape [batch_size, n_ops] that holds the
+            expectation value for each circuit with each op applied to it
+            (after resolving the corresponding parameters in).
+    """
+    return SIM_OP_MODULE.tfq_simulate_expectation_cuquantum(
+        programs, symbol_names, tf.cast(symbol_values, tf.float32), pauli_sums)
diff --git a/tensorflow_quantum/core/ops/tfq_simulate_ops_gpu_test.py b/tensorflow_quantum/core/ops/tfq_simulate_ops_gpu_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # ==============================================================================
-"""Tests that specifically target tfq_simulate_ops_cuda."""
+"""Tests that specifically target tfq_simulate_ops_cu*."""
 import os
 import time
 import numpy as np
@@ -22,6 +22,7 @@
 
 from tensorflow_quantum.core.ops import tfq_simulate_ops
 from tensorflow_quantum.core.ops import tfq_simulate_ops_cuda
+from tensorflow_quantum.core.ops import tfq_simulate_ops_cuquantum
 from tensorflow_quantum.python import util
 
 def measure_average_runtime(fn, tag, num_samples=10):
@@ -36,7 +37,7 @@ def measure_average_runtime(fn, tag, num_samples=10):
     return avg_time, result
 
 
-class SimulateExpectationTest(tf.test.TestCase):
+class SimulateExpectationGpuTest(tf.test.TestCase):
     """Tests tfq_simulate_expectation."""
 
     def test_simulate_expectation_cpu_vs_cuda(self):
@@ -81,6 +82,47 @@ def test_simulate_expectation_cpu_vs_cuda(self):
         # CUDA op should be faster than CPU op.
         self.assertGreater(cpu_avg_time, cuda_avg_time)
 
+    def test_simulate_expectation_cpu_vs_cuquantum(self):
+        """Make sure that cpu & gpu(cuquantum) ops have the same results."""
+        n_qubits = 20
+        batch_size = 5
+        symbol_names = ['alpha']
+        qubits = cirq.GridQubit.rect(1, n_qubits)
+        circuit_batch, resolver_batch = \
+            util.random_symbol_circuit_resolver_batch(
+                qubits, symbol_names, batch_size)
+
+        circuit_batch_tensor = util.convert_to_tensor(circuit_batch)
+
+        symbol_values_array = np.array(
+            [[resolver[symbol]
+              for symbol in symbol_names]
+             for resolver in resolver_batch])
+
+        pauli_sums = util.random_pauli_sums(qubits, 3, batch_size)
+        pauli_sums_tensor = util.convert_to_tensor([[x] for x in pauli_sums])
+
+        cpu_avg_time, res_cpu = measure_average_runtime(
+            lambda: tfq_simulate_ops.tfq_simulate_expectation(
+                circuit_batch_tensor,
+                symbol_names, symbol_values_array.astype(np.float64),
+                pauli_sums_tensor),
+            "CPU"
+        )
+
+        cuda_avg_time, res_cuda = measure_average_runtime(
+            lambda: tfq_simulate_ops_cuquantum.tfq_simulate_expectation(
+                circuit_batch_tensor,
+                symbol_names, symbol_values_array.astype(np.float64),
+                pauli_sums_tensor),
+            "cuQuantum"
+        )
+
+        # The result should be the similar within a tolerance.
+        np.testing.assert_allclose(res_cpu, res_cuda, atol=1e-5)
+
+        # cuQuantum op should be faster than CPU op.
+        self.assertGreater(cpu_avg_time, cuda_avg_time)
 
 if __name__ == "__main__":
     tf.test.main()