Skip to content

Commit b30cbd9

Browse files
committed
refactor: removing the strict_types and max_batch_size apis
BREAKING CHANGE: This commit removes the strict types and max_batch_size apis. We are doing this because the functionality of these APIs in TRT is convoluted and likely to be ignored during building. A replacement for strict types with actual guarantees will be added at a later date. Signed-off-by: Dheeraj Peri <[email protected]>
1 parent 733a4b1 commit b30cbd9

21 files changed

+3
-91
lines changed

WORKSPACE

+3-3
Original file line numberDiff line numberDiff line change
@@ -86,10 +86,10 @@ http_archive(
8686
http_archive(
8787
name = "tensorrt",
8888
build_file = "@//third_party/tensorrt/archive:BUILD",
89-
sha256 = "3177435024ff4aa5a6dba8c1ed06ab11cc0e1bf3bb712dfa63a43422f41313f3",
90-
strip_prefix = "TensorRT-8.0.3.4",
89+
sha256 = "da130296ac6636437ff8465812eb55dbab0621747d82dc4fe9b9376f00d214af",
90+
strip_prefix = "TensorRT-8.2.2.1",
9191
urls = [
92-
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.0.3/tars/tensorrt-8.0.3.4.linux.x86_64-gnu.cuda-11.3.cudnn8.2.tar.gz",
92+
"https://developer.nvidia.com/compute/machine-learning/tensorrt/secure/8.2.2.1/tars/tensorrt-8.2.2.1.linux.x86_64-gnu.cuda-11.4.cudnn8.2.tar.gz",
9393
],
9494
)
9595

core/conversion/conversionctx/ConversionCtx.cpp

-15
Original file line numberDiff line numberDiff line change
@@ -18,19 +18,12 @@ std::ostream& operator<<(std::ostream& os, const BuilderSettings& s) {
1818
<< "\n Truncate Long and Double: " << s.truncate_long_and_double \
1919
<< "\n Make Refittable Engine: " << s.refit \
2020
<< "\n Debuggable Engine: " << s.debug \
21-
<< "\n Strict Types: " << s.strict_types \
2221
<< "\n GPU ID: " << s.device.gpu_id \
2322
<< "\n Allow GPU Fallback (if running on DLA): " << s.device.allow_gpu_fallback \
2423
<< "\n Min Timing Iterations: " << s.num_min_timing_iters \
2524
<< "\n Avg Timing Iterations: " << s.num_avg_timing_iters \
2625
<< "\n Max Workspace Size: " << s.workspace_size;
2726

28-
if (s.max_batch_size != 0) {
29-
os << "\n Max Batch Size: " << s.max_batch_size;
30-
} else {
31-
os << "\n Max Batch Size: Not set";
32-
}
33-
3427
os << "\n Device Type: " << s.device.device_type \
3528
<< "\n GPU ID: " << s.device.gpu_id;
3629
if (s.device.device_type == nvinfer1::DeviceType::kDLA) {
@@ -107,18 +100,10 @@ ConversionCtx::ConversionCtx(BuilderSettings build_settings)
107100
cfg->setFlag(nvinfer1::BuilderFlag::kDEBUG);
108101
}
109102

110-
if (settings.strict_types) {
111-
cfg->setFlag(nvinfer1::BuilderFlag::kSTRICT_TYPES);
112-
}
113-
114103
if (settings.device.allow_gpu_fallback) {
115104
cfg->setFlag(nvinfer1::BuilderFlag::kGPU_FALLBACK);
116105
}
117106

118-
if (settings.max_batch_size != 0) {
119-
builder->setMaxBatchSize(settings.max_batch_size);
120-
}
121-
122107
cfg->setMinTimingIterations(settings.num_min_timing_iters);
123108
cfg->setAvgTimingIterations(settings.num_avg_timing_iters);
124109
cfg->setMaxWorkspaceSize(settings.workspace_size);

core/conversion/conversionctx/ConversionCtx.h

-2
Original file line numberDiff line numberDiff line change
@@ -29,15 +29,13 @@ struct BuilderSettings {
2929
bool disable_tf32 = false;
3030
bool refit = false;
3131
bool debug = false;
32-
bool strict_types = false;
3332
bool truncate_long_and_double = false;
3433
Device device;
3534
nvinfer1::EngineCapability capability = TRT_ENGINE_CAPABILITY_STANDARD;
3635
nvinfer1::IInt8Calibrator* calibrator = nullptr;
3736
uint64_t num_min_timing_iters = 2;
3837
uint64_t num_avg_timing_iters = 1;
3938
uint64_t workspace_size = 0;
40-
uint64_t max_batch_size = 0;
4139

4240
BuilderSettings() = default;
4341
BuilderSettings(const BuilderSettings& other) = default;

cpp/bin/torchtrtc/README.md

-4
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,6 @@ OPTIONS:
3131
--i, --info Dumps info messages generated during
3232
compilation onto the console
3333
--build-debuggable-engine Creates a debuggable engine
34-
--use-strict-types Restrict operating type to only use set
35-
operation precision
3634
--allow-gpu-fallback (Only used when targeting DLA
3735
(device-type)) Lets engine run layers on
3836
GPU if they are not supported on DLA
@@ -90,8 +88,6 @@ OPTIONS:
9088
used to select kernels
9189
--workspace-size=[workspace_size] Maximum size of workspace given to
9290
TensorRT
93-
--max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be
94-
set, 0 means not set)
9591
-t[threshold],
9692
--threshold=[threshold] Maximum acceptable numerical deviation
9793
from standard torchscript output

cpp/include/torch_tensorrt/torch_tensorrt.h

-11
Original file line numberDiff line numberDiff line change
@@ -626,12 +626,6 @@ struct TORCHTRT_API CompileSpec {
626626
*/
627627
bool truncate_long_and_double = false;
628628

629-
/**
630-
* Restrict operating type to only the lowest enabled operation precision
631-
* (enabled_precisions)
632-
*/
633-
bool strict_types = false;
634-
635629
/**
636630
* Target Device
637631
*/
@@ -656,11 +650,6 @@ struct TORCHTRT_API CompileSpec {
656650
*/
657651
uint64_t workspace_size = 0;
658652

659-
/**
660-
* Maximum batch size (must be >= 1 to be set, 0 means not set)
661-
*/
662-
uint64_t max_batch_size = 0;
663-
664653
/**
665654
* Calibration dataloaders for each input for post training quantizatiom
666655
*/

cpp/src/compile_spec.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -40,9 +40,7 @@ torchtrt::core::CompileSpec to_internal_compile_spec(CompileSpec external) {
4040
internal.convert_info.engine_settings.refit = external.refit;
4141
internal.convert_info.engine_settings.debug = external.debug;
4242
internal.convert_info.engine_settings.truncate_long_and_double = external.truncate_long_and_double;
43-
internal.convert_info.engine_settings.strict_types = external.strict_types;
4443
internal.convert_info.engine_settings.device.allow_gpu_fallback = external.device.allow_gpu_fallback;
45-
internal.convert_info.engine_settings.max_batch_size = external.max_batch_size;
4644

4745
TORCHTRT_CHECK(
4846
!(external.require_full_compilation && (external.torch_executed_ops.size() > 0)),

docsrc/tutorials/ptq.rst

-1
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,6 @@ to use ``CacheCalibrator`` to use in INT8 mode.
194194
"inputs": [torch_tensorrt.Input([1, 3, 32, 32])],
195195
"enabled_precisions": {torch.float, torch.half, torch.int8},
196196
"calibrator": calibrator,
197-
"max_batch_size": 32,
198197
}
199198
200199
trt_mod = torch_tensorrt.compile(model, compile_settings)

docsrc/tutorials/torchtrtc.rst

-4
Original file line numberDiff line numberDiff line change
@@ -34,8 +34,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
3434
--i, --info Dumps info messages generated during
3535
compilation onto the console
3636
--build-debuggable-engine Creates a debuggable engine
37-
--use-strict-types Restrict operating type to only use set
38-
operation precision
3937
--allow-gpu-fallback (Only used when targeting DLA
4038
(device-type)) Lets engine run layers on
4139
GPU if they are not supported on DLA
@@ -93,8 +91,6 @@ to standard TorchScript. Load with ``torch.jit.load()`` and run like you would r
9391
used to select kernels
9492
--workspace-size=[workspace_size] Maximum size of workspace given to
9593
TensorRT
96-
--max-batch-size=[max_batch_size] Maximum batch size (must be >= 1 to be
97-
set, 0 means not set)
9894
-t[threshold],
9995
--threshold=[threshold] Maximum acceptable numerical deviation
10096
from standard torchscript output

docsrc/tutorials/use_from_pytorch.rst

-2
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API.
3838
"enabled_precisions": {torch.float, torch.half},
3939
"refit": False,
4040
"debug": False,
41-
"strict_types": False,
4241
"device": {
4342
"device_type": torch_tensorrt.DeviceType.GPU,
4443
"gpu_id": 0,
@@ -48,7 +47,6 @@ at the documentation for the Torch-TensorRT ``TensorRTCompileSpec`` API.
4847
"capability": torch_tensorrt.EngineCapability.default,
4948
"num_min_timing_iters": 2,
5049
"num_avg_timing_iters": 1,
51-
"max_batch_size": 0,
5250
})
5351
}
5452

examples/int8/ptq/main.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,6 @@ torch::jit::Module compile_int8_model(const std::string& data_dir, torch::jit::M
4949
compile_spec.enabled_precisions.insert(torch::kI8);
5050
/// Use the TensorRT Entropy Calibrator
5151
compile_spec.ptq_calibrator = calibrator;
52-
/// Set max batch size for the engine
53-
compile_spec.max_batch_size = 32;
5452
/// Set a larger workspace
5553
compile_spec.workspace_size = 1 << 28;
5654

examples/int8/qat/main.cpp

-3
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,6 @@ torch::jit::Module compile_int8_qat_model(const std::string& data_dir, torch::ji
3333
auto compile_spec = torch_tensorrt::ts::CompileSpec(inputs);
3434
/// Set operating precision to INT8
3535
compile_spec.enabled_precisions.insert(torch::kI8);
36-
/// Set max batch size for the engine
37-
compile_spec.max_batch_size = 32;
3836
/// Set a larger workspace
3937
compile_spec.workspace_size = 1 << 28;
4038

@@ -126,4 +124,3 @@ int main(int argc, const char* argv[]) {
126124
print_avg_std_dev("TRT quantized model", trt_runtimes, dims[0][0]);
127125
trt_mod.save("/tmp/qat_vgg16.trt.ts");
128126
}
129-

py/torch_tensorrt/csrc/register_tensorrt_classes.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -59,14 +59,12 @@ void RegisterTRTCompileSpec() {
5959
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, disable_tf32);
6060
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, refit);
6161
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, debug);
62-
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, strict_types);
6362
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, capability);
6463
ADD_FIELD_GET_SET_REGISTRATION(
6564
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, num_min_timing_iters);
6665
ADD_FIELD_GET_SET_REGISTRATION(
6766
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, num_avg_timing_iters);
6867
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, workspace_size);
69-
ADD_FIELD_GET_SET_REGISTRATION(TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, max_batch_size);
7068
ADD_FIELD_GET_SET_REGISTRATION(
7169
TRTCompileSpecTSRegistration, torch_tensorrt::pyapi::CompileSpec, truncate_long_and_double);
7270
}

py/torch_tensorrt/csrc/tensorrt_classes.cpp

-5
Original file line numberDiff line numberDiff line change
@@ -209,7 +209,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
209209
info.convert_info.engine_settings.disable_tf32 = disable_tf32;
210210
info.convert_info.engine_settings.refit = refit;
211211
info.convert_info.engine_settings.debug = debug;
212-
info.convert_info.engine_settings.strict_types = strict_types;
213212
info.convert_info.engine_settings.device.device_type = toTRTDeviceType(device.device_type);
214213
info.convert_info.engine_settings.device.gpu_id = device.gpu_id;
215214
info.convert_info.engine_settings.device.dla_core = device.dla_core;
@@ -227,8 +226,6 @@ core::CompileSpec CompileSpec::toInternalCompileSpec() {
227226
info.convert_info.engine_settings.num_avg_timing_iters = num_avg_timing_iters;
228227
TORCHTRT_CHECK(workspace_size >= 0, "workspace_size must be 0 or greater");
229228
info.convert_info.engine_settings.workspace_size = workspace_size;
230-
TORCHTRT_CHECK(max_batch_size >= 0, "max_batch_size must be 0 or greater");
231-
info.convert_info.engine_settings.max_batch_size = max_batch_size;
232229
return info;
233230
}
234231

@@ -249,13 +246,11 @@ std::string CompileSpec::stringify() {
249246
ss << " \"Sparsity\": " << sparse_weights << std::endl;
250247
ss << " \"Refit\": " << refit << std::endl;
251248
ss << " \"Debug\": " << debug << std::endl;
252-
ss << " \"Strict Types\": " << strict_types << std::endl;
253249
ss << " \"Device\": " << device.to_str() << std::endl;
254250
ss << " \"Engine Capability\": " << to_str(capability) << std::endl;
255251
ss << " \"Num Min Timing Iters\": " << num_min_timing_iters << std::endl;
256252
ss << " \"Num Avg Timing Iters\": " << num_avg_timing_iters << std::endl;
257253
ss << " \"Workspace Size\": " << workspace_size << std::endl;
258-
ss << " \"Max Batch Size\": " << max_batch_size << std::endl;
259254
ss << " \"Truncate long and double\": " << truncate_long_and_double << std::endl;
260255
ss << " \"Torch Fallback\": " << torch_fallback.to_str();
261256
ss << "}";

py/torch_tensorrt/csrc/tensorrt_classes.h

-4
Original file line numberDiff line numberDiff line change
@@ -146,13 +146,11 @@ struct CompileSpec : torch::CustomClassHolder {
146146
ADD_FIELD_GET_SET(sparse_weights, bool);
147147
ADD_FIELD_GET_SET(refit, bool);
148148
ADD_FIELD_GET_SET(debug, bool);
149-
ADD_FIELD_GET_SET(strict_types, bool);
150149
ADD_ENUM_GET_SET(capability, EngineCapability, static_cast<int64_t>(EngineCapability::kSAFE_DLA));
151150
ADD_FIELD_GET_SET(num_min_timing_iters, int64_t);
152151
ADD_FIELD_GET_SET(num_avg_timing_iters, int64_t);
153152
ADD_FIELD_GET_SET(workspace_size, int64_t);
154153
ADD_FIELD_GET_SET(truncate_long_and_double, bool);
155-
ADD_FIELD_GET_SET(max_batch_size, int64_t);
156154
ADD_FIELD_GET_SET(device, Device);
157155
ADD_FIELD_GET_SET(torch_fallback, TorchFallback);
158156
ADD_FIELD_GET_SET(ptq_calibrator, nvinfer1::IInt8Calibrator*);
@@ -164,15 +162,13 @@ struct CompileSpec : torch::CustomClassHolder {
164162
bool disable_tf32 = false;
165163
bool refit = false;
166164
bool debug = false;
167-
bool strict_types = false;
168165
bool truncate_long_and_double = false;
169166
Device device;
170167
TorchFallback torch_fallback;
171168
EngineCapability capability = EngineCapability::kDEFAULT;
172169
int64_t num_min_timing_iters = 2;
173170
int64_t num_avg_timing_iters = 1;
174171
int64_t workspace_size = 0;
175-
int64_t max_batch_size = 0;
176172
};
177173

178174
} // namespace pyapi

py/torch_tensorrt/csrc/torch_tensorrt_py.cpp

-2
Original file line numberDiff line numberDiff line change
@@ -298,13 +298,11 @@ PYBIND11_MODULE(_C, m) {
298298
.def_readwrite("sparse_weights", &CompileSpec::sparse_weights)
299299
.def_readwrite("disable_tf32", &CompileSpec::disable_tf32)
300300
.def_readwrite("debug", &CompileSpec::debug)
301-
.def_readwrite("strict_types", &CompileSpec::strict_types)
302301
.def_readwrite("device", &CompileSpec::device)
303302
.def_readwrite("capability", &CompileSpec::capability)
304303
.def_readwrite("num_min_timing_iters", &CompileSpec::num_min_timing_iters)
305304
.def_readwrite("num_avg_timing_iters", &CompileSpec::num_avg_timing_iters)
306305
.def_readwrite("workspace_size", &CompileSpec::workspace_size)
307-
.def_readwrite("max_batch_size", &CompileSpec::max_batch_size)
308306
.def_readwrite("torch_fallback", &CompileSpec::torch_fallback)
309307
.def_readwrite("truncate_long_and_double", &CompileSpec::truncate_long_and_double);
310308

py/torch_tensorrt/ts/_compile_spec.py

-16
Original file line numberDiff line numberDiff line change
@@ -196,10 +196,6 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec:
196196
assert isinstance(compile_spec["debug"], bool)
197197
info.debug = compile_spec["debug"]
198198

199-
if "strict_types" in compile_spec:
200-
assert isinstance(compile_spec["strict_types"], bool)
201-
info.strict_types = compile_spec["strict_types"]
202-
203199
if "device" in compile_spec:
204200
info.device = _parse_device(compile_spec["device"])
205201

@@ -219,10 +215,6 @@ def _parse_compile_spec(compile_spec: Dict[str, Any]) -> _ts_C.CompileSpec:
219215
assert type(compile_spec["workspace_size"]) is int
220216
info.workspace_size = compile_spec["workspace_size"]
221217

222-
if "max_batch_size" in compile_spec:
223-
assert type(compile_spec["max_batch_size"]) is int
224-
info.max_batch_size = compile_spec["max_batch_size"]
225-
226218
if "truncate_long_and_double" in compile_spec:
227219
assert type(compile_spec["truncate_long_and_double"]) is bool
228220
info.truncate_long_and_double = compile_spec["truncate_long_and_double"]
@@ -240,12 +232,10 @@ def TensorRTCompileSpec(inputs=[],
240232
enabled_precisions=set(),
241233
refit=False,
242234
debug=False,
243-
strict_types=False,
244235
capability=_enums.EngineCapability.default,
245236
num_min_timing_iters=2,
246237
num_avg_timing_iters=1,
247238
workspace_size=0,
248-
max_batch_size=0,
249239
truncate_long_and_double=False,
250240
calibrator=None) -> torch.classes.tensorrt.CompileSpec:
251241
"""Utility to create a formated spec dictionary for using the PyTorch TensorRT backend
@@ -276,12 +266,10 @@ def TensorRTCompileSpec(inputs=[],
276266
enabled_precision (Set(Union(torch.dtype, torch_tensorrt.dtype))): The set of datatypes that TensorRT can use when selecting kernels
277267
refit (bool): Enable refitting
278268
debug (bool): Enable debuggable engine
279-
strict_types (bool): Kernels should strictly run in a particular operating precision. Enabled precision should only have one type in the set
280269
capability (torch_tensorrt.EngineCapability): Restrict kernel selection to safe gpu kernels or safe dla kernels
281270
num_min_timing_iters (int): Number of minimization timing iterations used to select kernels
282271
num_avg_timing_iters (int): Number of averaging timing iterations used to select kernels
283272
workspace_size (int): Maximum size of workspace given to TensorRT
284-
max_batch_size (int): Maximum batch size (must be >= 1 to be set, 0 means not set)
285273
truncate_long_and_double (bool): Truncate weights provided in int64 or double (float64) to int32 and float32
286274
calibrator (Union(torch_tensorrt._C.IInt8Calibrator, tensorrt.IInt8Calibrator)): Calibrator object which will provide data to the PTQ system for INT8 Calibration
287275
@@ -298,12 +286,10 @@ def TensorRTCompileSpec(inputs=[],
298286
"enabled_precisions": enabled_precisions, # Enabling FP16 kernels
299287
"refit": refit, # enable refit
300288
"debug": debug, # enable debuggable engine
301-
"strict_types": strict_types, # kernels should strictly run in operating precision
302289
"capability": capability, # Restrict kernel selection to safe gpu kernels or safe dla kernels
303290
"num_min_timing_iters": num_min_timing_iters, # Number of minimization timing iterations used to select kernels
304291
"num_avg_timing_iters": num_avg_timing_iters, # Number of averaging timing iterations used to select kernels
305292
"workspace_size": workspace_size, # Maximum size of workspace given to TensorRT
306-
"max_batch_size": max_batch_size, # Maximum batch size (must be >= 1 to be set, 0 means not set)
307293
"calibrator": calibrator,
308294
"truncate_long_and_double": truncate_long_and_double
309295
}
@@ -348,12 +334,10 @@ def TensorRTCompileSpec(inputs=[],
348334
backend_spec._set_refit(parsed_spec.refit)
349335
backend_spec._set_debug(parsed_spec.debug)
350336
backend_spec._set_refit(parsed_spec.refit)
351-
backend_spec._set_strict_types(parsed_spec.strict_types)
352337
backend_spec._set_capability(int(parsed_spec.capability))
353338
backend_spec._set_num_min_timing_iters(parsed_spec.num_min_timing_iters)
354339
backend_spec._set_num_avg_timing_iters(parsed_spec.num_avg_timing_iters)
355340
backend_spec._set_workspace_size(parsed_spec.workspace_size)
356-
backend_spec._set_max_batch_size(parsed_spec.max_batch_size)
357341
backend_spec._set_truncate_long_and_double(parsed_spec.truncate_long_and_double)
358342
backend_spec._set_ptq_calibrator(parsed_spec._get_calibrator_handle())
359343

0 commit comments

Comments
 (0)