pytorch · gs-olive · Jun 28, 2023 · May 26, 2023 · May 26, 2023 · frank-wei
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -14,16 +14,13 @@
 )
 from torch_tensorrt.dynamo.backend.conversion import convert_module
 
-from torch._dynamo.backends.common import fake_tensor_unsupported
-
 from torch._functorch.aot_autograd import aot_module_simplified, make_boxed_compiler
 
 
 logger = logging.getLogger(__name__)
 
 
 @td.register_backend(name="torch_tensorrt")
-@fake_tensor_unsupported
 def torch_tensorrt_backend(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[torch.Tensor],
@@ -35,7 +32,6 @@ def torch_tensorrt_backend(
 
 
 @td.register_backend(name="aot_torch_tensorrt_aten")
-@fake_tensor_unsupported
 def aot_torch_tensorrt_aten_backend(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[torch.Tensor],
@@ -55,7 +51,6 @@ def aot_torch_tensorrt_aten_backend(
     )
 
 
-@fake_tensor_unsupported
 def _pretraced_backend(
     gm: torch.fx.GraphModule,
     sample_inputs: Sequence[torch.Tensor],

diff --git a/py/torch_tensorrt/dynamo/backend/test/test_specialized_models.py b/py/torch_tensorrt/dynamo/backend/test/test_specialized_models.py
@@ -0,0 +1,114 @@
+from utils import lower_graph_testing
+from torch.testing._internal.common_utils import run_tests, TestCase
+import torch
+from torch_tensorrt.dynamo import compile
+
+
+class TestFakeTensors(TestCase):
+    def test_lowering_mul_int(self):
+        class MulInt(torch.nn.Module):
+            def forward(self, x):
+                return x * 7
+
+        # Operations expected to be included in the traced graph after decompositions
+        expected_ops = {
+            torch.ops.aten.mul.Tensor,
+        }
+
+        inputs = [
+            torch.rand(
+                3,
+                5,
+                7,
+            ).cuda(),
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(MulInt())
+        _, expected_ops_unseen = lower_graph_testing(
+            fx_graph,
+            inputs,
+            expected_ops=expected_ops,
+            min_block_size=1,
+        )
+
+        self.assertEquals(
+            len(expected_ops_unseen),
+            0,
+            f"The following expected ops were not encountered: {expected_ops_unseen}",
+        )
+
+        torch._dynamo.reset()
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = compile(
+            fx_graph, inputs, min_block_size=1, pass_through_build_failures=True
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            msg=f"MulInt TRT outputs don't match with the original model.",
+        )
+        torch._dynamo.reset()
+
+    def test_lowering_add_float(self):
+        class AddFloat(torch.nn.Module):
+            def forward(self, x):
+                return x + 84.0
+
+        # Operations expected to be included in the traced graph after decompositions
+        expected_ops = {
+            torch.ops.aten.add.Tensor,
+        }
+
+        inputs = [
+            torch.rand(
+                1,
+                5,
+                7,
+                9,
+            ).cuda(),
+        ]
+
+        fx_graph = torch.fx.symbolic_trace(AddFloat())
+        _, expected_ops_unseen = lower_graph_testing(
+            fx_graph,
+            inputs,
+            expected_ops=expected_ops,
+            min_block_size=1,
+        )
+
+        self.assertEquals(
+            len(expected_ops_unseen),
+            0,
+            f"The following expected ops were not encountered: {expected_ops_unseen}",
+        )
+
+        torch._dynamo.reset()
+
+        # Validate that the results between Torch and Torch-TRT are similar
+        optimized_model = compile(
+            fx_graph, inputs, min_block_size=1, pass_through_build_failures=True
+        )
+        optimized_model_results = optimized_model(*inputs).detach().cpu()
+        torch_model_results = fx_graph(*inputs).detach().cpu()
+
+        max_diff = float(
+            torch.max(torch.abs(optimized_model_results - torch_model_results))
+        )
+        self.assertAlmostEqual(
+            max_diff,
+            0,
+            msg=f"AddFloat TRT outputs don't match with the original model.",
+        )
+
+        torch._dynamo.reset()
+
+
+if __name__ == "__main__":
+    run_tests()
diff --git a/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py b/py/torch_tensorrt/dynamo/fx_ts_compat/fx2trt.py
@@ -16,7 +16,12 @@
 from torch_tensorrt.dynamo.fx_ts_compat import CONVERTERS
 from .input_tensor_spec import InputTensorSpec
 from torch_tensorrt.fx.observer import Observer
-from torch_tensorrt.fx.utils import get_dynamic_dims, LowerPrecision, torch_dtype_to_trt
+from torch_tensorrt.fx.utils import (
+    get_dynamic_dims,
+    LowerPrecision,
+    unified_dtype_converter,
+    Frameworks,
+)
 
 _LOGGER: logging.Logger = logging.getLogger(__name__)
 
@@ -321,7 +326,9 @@ def placeholder(self, target, args, kwargs):
                 self.optimization_profiles[i].set_shape(target, *shape_range)
 
         return self.network.add_input(
-            name=target, shape=tuple(shape), dtype=torch_dtype_to_trt(dtype)
+            name=target,
+            shape=tuple(shape),
+            dtype=unified_dtype_converter(dtype, Frameworks.TRT),
         )
 
     def call_module(self, target, args, kwargs):

diff --git a/py/torch_tensorrt/fx/converters/acc_ops_converters.py b/py/torch_tensorrt/fx/converters/acc_ops_converters.py
@@ -18,7 +18,7 @@
 from torch.fx.immutable_collections import immutable_list
 from torch.fx.node import Argument, Target
 
-from ..utils import get_dynamic_dims, torch_dtype_from_trt, torch_dtype_to_trt
+from ..utils import get_dynamic_dims, unified_dtype_converter, Frameworks
 
 from .converter_utils import *  # noqa: F403
 from torch_tensorrt.fx.passes.lower_basic_pass import (
@@ -400,7 +400,7 @@ def acc_ops_pad_with_slice_layer(
         )
 
     # cast value to TRTensor
-    dt = torch_dtype_from_trt(input_val.dtype)
+    dt = unified_dtype_converter(input_val.dtype, Frameworks.TORCH)
     value = 0 if value == None else value
     value_const = get_trt_tensor(
         network, torch.tensor([value], dtype=dt), f"{name}_value"
@@ -1550,7 +1550,7 @@ def acc_ops_to_dtype(
     input_t = get_trt_tensor(network, input_val, f"{name}_input_t")
     if input_dtype:
         if isinstance(input_dtype, torch.dtype):
-            input_dtype = torch_dtype_to_trt(input_dtype)
+            input_dtype = unified_dtype_converter(input_dtype, Frameworks.TRT)
         input_t = type_cast(network, target, f"{name}_input", input_t, input_dtype)
     return input_t
 
@@ -1811,7 +1811,7 @@ def acc_ops_logical_xor(
 #             f"isinf received input {input_t} that is not part "
 #             "of the TensorRT region!"
 #         )
-#     tdtype = torch_dtype_from_trt(input_t.dtype)
+#     tdtype = unified_dtype_converter(input_t.dtype, Frameworks.TORCH)
 
 #     inf_t = torch.ones(tuple(input_t.shape))
 #     inf_t = inf_t * float("inf")
@@ -1849,7 +1849,7 @@ def acc_ops_any(
 
     if input_t.dtype in (trt.float32, trt.float16, trt.int32):
         comp_t = torch.zeros(tuple([*input_t.shape])).to(
-            torch_dtype_from_trt(input_t.dtype)
+            unified_dtype_converter(input_t.dtype, Frameworks.TORCH)
         )
         comp_t = get_trt_tensor(network, comp_t, f"{name}_comp_t")
         kwargs_new = {"input": input_t, "other": comp_t}
@@ -2738,7 +2738,7 @@ def acc_ops_masked_fill_tensor(
     if type(value_t) is torch.Tensor:
         value_t = value_t.cpu().numpy()
     # cast to input type
-    input_dtype = torch_dtype_from_trt(input_t.dtype)
+    input_dtype = unified_dtype_converter(input_t.dtype, Frameworks.TORCH)
     value_t = (torch.ones(shape) * value_t).to(input_dtype)
     input_val = get_trt_tensor(network, input_t, f"{name}_input")
     value_val = get_trt_tensor(network, value_t, f"{name}_input")
@@ -2872,7 +2872,11 @@ def add_clamp(network, input, val, op, name):
         # clamping scalar
         acc_ops_clamp_trt = get_trt_tensor(
             network,
-            squeeze_left(torch.tensor([val], dtype=torch_dtype_from_trt(input.dtype))),
+            squeeze_left(
+                torch.tensor(
+                    [val], dtype=unified_dtype_converter(input.dtype, Frameworks.TORCH)
+                )
+            ),
             f"{name}_clamp_{val}",
         )
     else:
@@ -2881,7 +2885,8 @@ def add_clamp(network, input, val, op, name):
             (
                 val
                 * torch.ones(
-                    acc_ops_clamp_shape, dtype=torch_dtype_from_trt(input.dtype)
+                    acc_ops_clamp_shape,
+                    dtype=unified_dtype_converter(input.dtype, Frameworks.TORCH),
                 )
             )
             .cpu()
@@ -3527,7 +3532,9 @@ def acc_ops_cumsum(
     iterator = loop.add_iterator(input_val, dim, False)
     data = iterator.get_output(0)
     new_dims = tuple(data.shape)
-    zero_tensor = torch.zeros(new_dims, dtype=trt_dtype_to_torch_dtype(input_val.dtype))
+    zero_tensor = torch.zeros(
+        new_dims, dtype=unified_dtype_converter(input_val.dtype, Frameworks.TORCH)
+    )
     zero_tensor = network.add_constant(
         zero_tensor.shape, to_numpy(zero_tensor)
     ).get_output(0)
@@ -3670,7 +3677,7 @@ def acc_ops_new_ones(
     dtype_val = kwargs.get("dtype")
     if dtype_val is None:
         dtype_val = input_val.dtype
-        dtype_val = torch_dtype_from_trt(dtype_val)
+        dtype_val = unified_dtype_converter(dtype_val, Frameworks.TORCH)
 
     device_val = kwargs.get("device")
     assert (
@@ -3694,7 +3701,7 @@ def acc_ops_new_empty(
     dtype_val = kwargs.get("dtype")
     if dtype_val is None:
         dtype_val = input_val.dtype
-        dtype_val = torch_dtype_from_trt(dtype_val)
+        dtype_val = unified_dtype_converter(dtype_val, Frameworks.TORCH)
 
     device_val = kwargs.get("device")
     assert (

diff --git a/py/torch_tensorrt/fx/converters/aten_ops_converters.py b/py/torch_tensorrt/fx/converters/aten_ops_converters.py
@@ -18,8 +18,6 @@
 from torch.fx.immutable_collections import immutable_list
 from torch.fx.node import Argument, Target
 
-from ..utils import get_dynamic_dims, torch_dtype_from_trt, torch_dtype_to_trt
-
 from .converter_utils import *  # noqa: F403
 import torch_tensorrt.fx.tracer.acc_tracer.acc_utils as acc_utils
 from torch_tensorrt.fx.converters.impl import activation