pytorch
diff --git a/‎core/runtime/TRTEngine.cpp
+1-1 b/‎core/runtime/TRTEngine.cpp
+1-1
diff --git a/‎docsrc/py_api/dynamo.rst
+2 b/‎docsrc/py_api/dynamo.rst
+2
diff --git a/‎docsrc/user_guide/saving_models.rst
+35-36 b/‎docsrc/user_guide/saving_models.rst
+35-36
diff --git a/‎py/torch_tensorrt/_compile.py
+118-8 b/‎py/torch_tensorrt/_compile.py
+118-8
@@ -266,7 +266,7 @@ std::string TRTEngine::to_str() const {
               exec_ctx->getEngine().getTensorDataType(out_binding_names[o].c_str()))
        << std::endl;
   }
-  ss << "  }" << std::endl;
+  ss << "  ]" << std::endl;
   ss << "  Device: " << device_info << std::endl;
   ss << "  Hardware Compatibility: " << (hardware_compatible ? "Enabled" : "Disabled") << std::endl;
   // clang-format on
 
@@ -22,6 +22,8 @@ Functions
 
 .. autofunction:: export
 
+.. autofunction:: convert_module_to_trt_engine
+
 
 
 Classes
 
@@ -9,23 +9,22 @@ Saving models compiled with Torch-TensorRT
    :undoc-members:
    :show-inheritance:
 
-Saving models compiled with Torch-TensorRT varies slightly with the `ir` that has been used for compilation.
+Saving models compiled with Torch-TensorRT can be done using `torch_tensorrt.save` API.
 
 Dynamo IR
 -------------
 
-The output type of `ir=dynamo` compilation of Torch-TensorRT is `torch.export.ExportedProgram` object by default. 
-In addition, we provide a new parameter `output_format` in the `CompilationSetting` object provided before compilation.
-The `output_format` can take the following options 
+The output type of `ir=dynamo` compilation of Torch-TensorRT is `torch.fx.GraphModule` object by default. 
+We can save this object in either `TorchScript` (`torch.jit.ScriptModule`) or `ExportedProgram` (`torch.export.ExportedProgram`) formats by 
+specifying the `output_format` flag. Here are the options `output_format` will accept
 
-* `exported_program` (or) `ep` : This is the default. Returns an ExportedProgram 
-* `torchscript` (or) `ts` : This returns a TorchScript module
-* `graph_module` (or) `fx` : This returns a torch.fx.GraphModule which can be traced into Torchscript to save to disk.
+* `exported_program` : This is the default. We perform transformations on the graphmodule first and use `torch.export.save` to save the module.
+* `torchscript` : We trace the graphmodule via `torch.jit.trace` and save it via `torch.jit.save`.
 
-a) Torchscript
+a) ExportedProgram
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-If you set the `output_format="torchscript"`, this will return a `ScriptModule` which can be serialized via torch.jit.save
+Here's an example usage
 
 .. code-block:: python
 
@@ -34,50 +33,32 @@ If you set the `output_format="torchscript"`, this will return a `ScriptModule`
 
     model = MyModel().eval().cuda()
     inputs = [torch.randn((1, 3, 224, 224)).cuda()]
-    # trt_ts is a torch.jit.ScriptModule object
-    trt_ts = torch_tensorrt.compile(model, ir="dynamo", inputs, output_format="torchscript")
-    torch.jit.save(trt_ts, "trt_model.ts")
+    # trt_ep is a torch.fx.GraphModule object
+    trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs) 
+    torchtrt.save(trt_gm, "trt.ep", inputs=inputs)
 
     # Later, you can load it and run inference
-    model = torch.jit.load("trt_model.ts").cuda()
+    model = torch.export.load("trt.ep").module()
     model(*inputs)
 
-b) ExportedProgram
+b) Torchscript
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-`torch.export.ExportedProgram`, a new format introduced in Pytorch 2.X is the default return type of Torch-TensorRT compilation.
-
 .. code-block:: python
 
     import torch
     import torch_tensorrt
 
     model = MyModel().eval().cuda()
     inputs = [torch.randn((1, 3, 224, 224)).cuda()]
-    # trt_ep is a torch.export.ExportedProgram object
-    trt_ep = torch_tensorrt.compile(model, ir="dynamo", inputs) 
-    torch.export.save(trt_ep, "trt_model.ep")
+    # trt_gm is a torch.fx.GraphModule object
+    trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs)
+    torch_tensorrt.save(trt_gm, "trt.ts", output_format="torchscript", inputs=inputs)
 
     # Later, you can load it and run inference
-    model = torch.export.load("trt_model.ep")
+    model = torch.jit.load("trt.ts").cuda()
     model(*inputs)
 
-c) GraphModule
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-We can also return a `torch.fx.GraphModule` object as the output of Torch-TensorRT compilation by setting `output_format="graph_module"`.
-Internally, partitioning, lowering, conversion phases operate using GraphModule objects. These can be either traced into a Torchscript modules or 
-exported into `ExportedProgram` objects
-
-.. code-block:: python
-
-    import torch
-    import torch_tensorrt
-
-    model = MyModel().eval().cuda()
-    inputs = [torch.randn((1, 3, 224, 224)).cuda()]
-    # trt_gm is a torch.fx.GraphModule object
-    trt_gm = torch_tensorrt.compile(model, ir="dynamo", inputs, output_format="graph_module") 
 
 Torchscript IR
 -------------
@@ -99,3 +80,21 @@ For `ir=ts`, this behavior stays the same in 2.X versions as well.
   model = torch.jit.load("trt_model.ts").cuda()
   model(*inputs)
 
+
+Loading the models
+--------------------
+
+We can load torchscript or exported_program models using `torch.jit.load` and `torch.export.load` APIs from PyTorch directly.
+Alternatively, we provide a light wrapper `torch_tensorrt.load(file_path)` which can load either of the above model types.
+
+Here's an example usage
+
+.. code-block:: python
+
+    import torch
+    import torch_tensorrt
+
+    # file_path can be trt.ep or trt.ts file obtained via saving the model (refer to the above section)
+    inputs = [torch.randn((1, 3, 224, 224)).cuda()]
+    model = torch_tensorrt.load(<file_path>).module()
+    model(*inputs)
@@ -1,5 +1,6 @@
 from __future__ import annotations
 
+import collections.abc
 import logging
 from enum import Enum
 from typing import Any, Callable, List, Optional, Sequence, Set
@@ -32,10 +33,7 @@
 
 logger = logging.getLogger(__name__)
 
-__all__ = [
-    "compile",
-    "convert_method_to_trt_engine",
-]
+__all__ = ["compile", "convert_method_to_trt_engine", "save", "load"]
 
 
 def _non_fx_input_interface(
@@ -240,8 +238,6 @@ def compile(
         return compiled_fx_module
     elif target_ir == _IRType.dynamo:
         # Prepare torch and torchtrt inputs
-        import collections.abc
-
         from torch_tensorrt.dynamo.utils import prepare_inputs
 
         if not isinstance(input_list, collections.abc.Sequence):
@@ -345,10 +341,19 @@ def convert_method_to_trt_engine(
             "convert_method_to_trt_engine call is not supported for ir=fx"
         )
     elif target_ir == _IRType.dynamo:
+        # Prepare torch and torchtrt inputs
+        from torch_tensorrt.dynamo.utils import prepare_inputs
+
+        if not isinstance(inputs, collections.abc.Sequence):
+            inputs = [inputs]
+
+        # Export the module
+        torchtrt_inputs = prepare_inputs(inputs)
+        exp_program = torch_tensorrt.dynamo.trace(module, torchtrt_inputs, **kwargs)
+
         return dynamo_convert_module_to_trt_engine(  # type: ignore[no-any-return]
-            module,
+            exp_program,
             inputs=inputs,
-            method_name=method_name,
             enabled_precisions=enabled_precisions_set,
             **kwargs,
         )
@@ -358,3 +363,108 @@ def convert_method_to_trt_engine(
         )
     else:
         raise RuntimeError("Module is an unknown format or the ir requested is unknown")
+
+
+def load(file_path: str = "") -> Any:
+    """
+    Load either a Torchscript model or ExportedProgram. Autodetect the type using
+    try, except
+    """
+    try:
+        logger.debug(f"Loading the provided file {file_path} using torch.jit.load()")
+        ts_module = torch.jit.load(file_path)
+        return ts_module
+    except Exception:
+        logger.info(
+            f"Loading the provided file {file_path} via torch.jit.load() failed with the following error",
+            exc_info=True,
+        )
+        pass
+
+    try:
+        logger.debug(f"Loading the provided file {file_path} using torch.export.load()")
+        exp_program = torch.export.load(file_path)
+        return exp_program
+    except Exception:
+        logger.info(
+            f"Loading the provided file {file_path} via torch.export.load() failed with the following error",
+            exc_info=True,
+        )
+        raise ValueError(
+            f"The file {file_path} doesn't correspond to a valid Torchscript module or ExportedProgram. Please verify the file path."
+        )
+
+
+def save(
+    module: Any,
+    file_path: str = "",
+    *,
+    output_format: str = "exported_program",
+    inputs: Optional[Sequence[torch.Tensor]] = None,
+    retrace: bool = False,
+) -> None:
+    """
+    Save the model to disk in the specified output format.
+    Arguments:
+        module : Compiled Torch-TensorRT module (Options include torch.jit.ScriptModule | torch.export.ExportedProgram | torch.fx.GraphModule)
+        inputs (torch.Tensor): Torch input tensors
+        output_format: Format to save the model. Options include exported_program | torchscript.
+        retrace: When the module type is a fx.GraphModule, this option re-exports the graph using torch.export.export(strict=False) to save it.
+                This flag is experimental for now.
+    """
+    module_type = _parse_module_type(module)
+    accepted_formats = {"exported_program", "torchscript"}
+    if inputs is not None and not all(
+        isinstance(input, torch.Tensor) for input in inputs
+    ):
+        raise ValueError(
+            "Not all inputs provided are torch.tensors. Please provide torch.tensors as inputs"
+        )
+    if output_format not in accepted_formats:
+        raise ValueError(
+            f"Provided output_format {output_format} is not supported. Supported options are exported_program | torchscript"
+        )
+    if not file_path:
+        raise ValueError("File path cannot be empty. Please provide a valid file path")
+
+    if module_type == _ModuleType.nn:
+        raise ValueError(
+            "Input model is of type nn.Module. Saving nn.Module directly is not supported. Supported model types torch.jit.ScriptModule | torch.fx.GraphModule | torch.export.ExportedProgram."
+        )
+    elif module_type == _ModuleType.ts:
+        if output_format == "exported_program":
+            raise ValueError(
+                "Provided model is a torch.jit.ScriptModule but the output_format specified is exported_program. Please verify the output_format"
+            )
+        else:
+            torch.jit.save(module, file_path)
+    elif module_type == _ModuleType.ep:
+        if output_format == "torchscript":
+            raise ValueError(
+                "Provided model is a torch.export.ExportedProgram but the output_format specified is torchscript. Please verify the output_format"
+            )
+        else:
+            torch.export.save(module, file_path)
+    elif module_type == _ModuleType.fx:
+        if inputs is None:
+            raise ValueError(
+                "Provided model is a torch.fx.GraphModule however the inputs are empty. Please provide valid torch.tensors as inputs to trace and save the model"
+            )
+        # The module type is torch.fx.GraphModule
+        if output_format == "torchscript":
+            module_ts = torch.jit.trace(module, inputs)
+            torch.jit.save(module_ts, file_path)
+        else:
+            if not retrace:
+                from torch_tensorrt.dynamo._exporter import export
+
+                exp_program = export(module, inputs)
+                torch.export.save(exp_program, file_path)
+            else:
+                from torch._higher_order_ops.torchbind import enable_torchbind_tracing
+
+                with enable_torchbind_tracing():
+                    exp_program = torch.export.export(
+                        module, tuple(inputs), strict=False
+                    )
+                    torch.export.save(exp_program, file_path)
Original file line number	Diff line number	Diff line change
`@@ -266,7 +266,7 @@ std::string TRTEngine::to_str() const {`
`266`	`266`	`exec_ctx->getEngine().getTensorDataType(out_binding_names[o].c_str()))`
`267`	`267`	`<< std::endl;`
`268`	`268`	`}`
`269`		`- ss << " }" << std::endl;`
	`269`	`+ ss << " ]" << std::endl;`
`270`	`270`	`ss << " Device: " << device_info << std::endl;`
`271`	`271`	`ss << " Hardware Compatibility: " << (hardware_compatible ? "Enabled" : "Disabled") << std::endl;`
`272`	`272`	`// clang-format on`