feat: Enable sparsity support in TRTorch

peri044 · peri044 · commit decd0edca7e8 · 2021-07-23T00:45:42.000-07:00
Signed-off-by: Dheeraj Peri &lt;peri.dheeraj@gmail.com&gt;
diff --git a/core/conversion/conversionctx/ConversionCtx.cpp b/core/conversion/conversionctx/ConversionCtx.cpp
@@ -86,6 +86,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
     cfg->clearFlag(nvinfer1::BuilderFlag::kTF32);
   }
 
+  if (settings.sparse_weights) {
+    cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS);
+  }
+
   if (settings.refit) {
     cfg->setFlag(nvinfer1::BuilderFlag::kREFIT);
   }
diff --git a/core/conversion/conversionctx/ConversionCtx.h b/core/conversion/conversionctx/ConversionCtx.h
@@ -24,6 +24,7 @@ struct Device {
 
 struct BuilderSettings {
   nvinfer1::DataType op_precision = nvinfer1::DataType::kFLOAT;
+  bool sparse_weights = false;
   bool disable_tf32 = false;
   bool refit = false;
   bool debug = false;
diff --git a/cpp/api/include/trtorch/trtorch.h b/cpp/api/include/trtorch/trtorch.h
@@ -248,6 +248,11 @@ struct TRTORCH_API CompileSpec {
    */
   bool disable_tf32 = false;
 
+  /**
+   * Enable sparsity for weights of conv and FC layers
+   */
+  bool sparse_weights = false;
+
   /**
    * Build a refitable engine
    */
diff --git a/cpp/api/src/compile_spec.cpp b/cpp/api/src/compile_spec.cpp
@@ -102,6 +102,7 @@ core::CompileSpec to_internal_compile_spec(CompileSpec external) {
       internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kFLOAT;
   }
 
+  internal.convert_info.engine_settings.sparse_weights = external.sparse_weights;
   internal.convert_info.engine_settings.disable_tf32 = external.disable_tf32;
   internal.convert_info.engine_settings.refit = external.refit;
   internal.convert_info.engine_settings.debug = external.debug;
diff --git a/cpp/trtorchexec/main.cpp b/cpp/trtorchexec/main.cpp
@@ -57,12 +57,13 @@ int main(int argc, const char* argv[]) {
 
   auto compile_spec = trtorch::CompileSpec(dims);
   compile_spec.workspace_size = 1 << 24;
+  compile_spec.sparse_weights = true;
 
-  std::cout << "Checking operator support" << std::endl;
-  if (!trtorch::CheckMethodOperatorSupport(mod, "forward")) {
-    std::cerr << "Method is not currently supported by TRTorch" << std::endl;
-    return -1;
-  }
+  // std::cout << "Checking operator support" << std::endl;
+  // if (!trtorch::CheckMethodOperatorSupport(mod, "forward")) {
+  //   std::cerr << "Method is not currently supported by TRTorch" << std::endl;
+  //   return -1;
+  // }
 
   std::cout << "Compiling graph to save as TRT engine (/tmp/engine_converted_from_jit.trt)" << std::endl;
   auto engine = trtorch::ConvertGraphToTRTEngine(mod, "forward", compile_spec);
diff --git a/py/trtorch/_compile_spec.py b/py/trtorch/_compile_spec.py
@@ -156,6 +156,10 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> trtorch._C.CompileSpec:
     if "calibrator" in compile_spec:
         info.ptq_calibrator = compile_spec["calibrator"]
 
+    if "sparse_weights" in compile_spec:
+        assert isinstance(compile_spec["sparse_weights"], bool)
+        info.sparse_weights = compile_spec["sparse_weights"]
+
     if "disable_tf32" in compile_spec:
         assert isinstance(compile_spec["disable_tf32"], bool)
         info.disable_tf32 = compile_spec["disable_tf32"]
@@ -237,6 +241,7 @@ def TensorRTCompileSpec(compile_spec: Dict[str, Any]) -> torch.classes.tensorrt.
                             "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
                         },
                         "op_precision": torch.half, # Operating precision set to FP16
+                        "sparse_weights": Enable sparsity for convolution and fully connected layers.
                         "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
                         "refit": False, # enable refit
                         "debug": False, # enable debuggable engine
diff --git a/py/trtorch/_compiler.py b/py/trtorch/_compiler.py
@@ -41,6 +41,8 @@ def compile(module: torch.jit.ScriptModule, compile_spec: Any) -> torch.jit.Scri
                         "allow_gpu_fallback": false, # (DLA only) Allow layers unsupported on DLA to run on GPU
                     },
                     "op_precision": torch.half, # Operating precision set to FP16
+                    "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
+                    "sparse_weights": Enable sparsity for convolution and fully connected layers.
                     "refit": false, # enable refit
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
@@ -107,6 +109,7 @@ def convert_method_to_trt_engine(module: torch.jit.ScriptModule, method_name: st
                     },
                     "op_precision": torch.half, # Operating precision set to FP16
                     "disable_tf32": False, # Force FP32 layers to use traditional as FP32 format vs the default behavior of rounding the inputs to 10-bit mantissas before multiplying, but accumulates the sum using 23-bit mantissas
+                    "sparse_weights": Enable sparsity for convolution and fully connected layers.
                     "refit": false, # enable refit
                     "debug": false, # enable debuggable engine
                     "strict_types": false, # kernels should strictly run in operating precision
diff --git a/py/trtorch/csrc/register_tensorrt_classes.cpp b/py/trtorch/csrc/register_tensorrt_classes.cpp
@@ -46,6 +46,7 @@ void RegisterTRTCompileSpec() {
           .def("__str__", &trtorch::pyapi::CompileSpec::stringify);
 
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, op_precision);
+  ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, sparse_weights);
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, disable_tf32);
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, refit);
   ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, trtorch::pyapi::CompileSpec, debug);
diff --git a/py/trtorch/csrc/tensorrt_classes.cpp b/py/trtorch/csrc/tensorrt_classes.cpp
@@ -127,6 +127,7 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
   auto info = core::CompileSpec(internal_input_ranges);
   info.convert_info.engine_settings.op_precision = toTRTDataType(op_precision);
   info.convert_info.engine_settings.calibrator = ptq_calibrator;
+  info.convert_info.engine_settings.sparse_weights = sparse_weights;
   info.convert_info.engine_settings.disable_tf32 = disable_tf32;
   info.convert_info.engine_settings.refit = refit;
   info.convert_info.engine_settings.debug = debug;
@@ -163,6 +164,7 @@ std::string CompileSpec::stringify() {
   ss << "    ]" << std::endl;
   ss << "    \"Op Precision\": " << to_str(op_precision) << std::endl;
   ss << "    \"TF32 Disabled\": " << disable_tf32 << std::endl;
+  ss << "    \"Sparsity\": " << sparse_weights << std::endl;
   ss << "    \"Refit\": " << refit << std::endl;
   ss << "    \"Debug\": " << debug << std::endl;
   ss << "    \"Strict Types\": " << strict_types << std::endl;
diff --git a/py/trtorch/csrc/tensorrt_classes.h b/py/trtorch/csrc/tensorrt_classes.h
@@ -126,6 +126,7 @@ struct CompileSpec : torch::CustomClassHolder {
 
   ADD_ENUM_GET_SET(op_precision, DataType, static_cast<int64_t>(DataType::kChar));
   ADD_FIELD_GET_SET(disable_tf32, bool);
+  ADD_FIELD_GET_SET(sparse_weights, bool);
   ADD_FIELD_GET_SET(refit, bool);
   ADD_FIELD_GET_SET(debug, bool);
   ADD_FIELD_GET_SET(strict_types, bool);
@@ -142,6 +143,7 @@ struct CompileSpec : torch::CustomClassHolder {
   std::vector<InputRange> input_ranges;
   nvinfer1::IInt8Calibrator* ptq_calibrator = nullptr;
   DataType op_precision = DataType::kFloat;
+  bool sparse_weights = false;
   bool disable_tf32 = false;
   bool refit = false;
   bool debug = false;
diff --git a/py/trtorch/csrc/trtorch_py.cpp b/py/trtorch/csrc/trtorch_py.cpp
@@ -244,6 +244,7 @@ PYBIND11_MODULE(_C, m) {
       .def_readwrite("op_precision", &CompileSpec::op_precision)
       .def_readwrite("ptq_calibrator", &CompileSpec::ptq_calibrator)
       .def_readwrite("refit", &CompileSpec::refit)
+      .def_readwrite("sparse_weights", &CompileSpec::sparse_weights)
       .def_readwrite("disable_tf32", &CompileSpec::disable_tf32)
       .def_readwrite("debug", &CompileSpec::debug)
       .def_readwrite("strict_types", &CompileSpec::strict_types)

Original file line number	Diff line number	Diff line change
`@@ -86,6 +86,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)`
`86`	`86`	`cfg->clearFlag(nvinfer1::BuilderFlag::kTF32);`
`87`	`87`	`}`
`88`	`88`
	`89`	`+ if (settings.sparse_weights) {`
	`90`	`+ cfg->setFlag(nvinfer1::BuilderFlag::kSPARSE_WEIGHTS);`
	`91`	`+ }`
	`92`	`+`
`89`	`93`	`if (settings.refit) {`
`90`	`94`	`cfg->setFlag(nvinfer1::BuilderFlag::kREFIT);`
`91`	`95`	`}`
Original file line number	Diff line number	Diff line change
`@@ -102,6 +102,7 @@ core::CompileSpec to_internal_compile_spec(CompileSpec external) {`
`102`	`102`	`internal.convert_info.engine_settings.op_precision = nvinfer1::DataType::kFLOAT;`
`103`	`103`	`}`
`104`	`104`
	`105`	`+ internal.convert_info.engine_settings.sparse_weights = external.sparse_weights;`
`105`	`106`	`internal.convert_info.engine_settings.disable_tf32 = external.disable_tf32;`
`106`	`107`	`internal.convert_info.engine_settings.refit = external.refit;`
`107`	`108`	`internal.convert_info.engine_settings.debug = external.debug;`