Skip to content

Commit 5708634

Browse files
committed
chore: Add quantization files
Signed-off-by: Dheeraj Peri <[email protected]>
1 parent fc8eafb commit 5708634

File tree

1 file changed

+59
-0
lines changed

1 file changed

+59
-0
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
#include <torch/torch.h>
2+
#include "core/conversion/converters/converters.h"
3+
#include "core/util/prelude.h"
4+
5+
namespace trtorch {
6+
namespace core {
7+
namespace conversion {
8+
namespace converters {
9+
namespace impl {
10+
namespace {
11+
12+
// clang-format off
13+
auto quantization_registrations TRTORCH_UNUSED = RegisterNodeConversionPatterns()
14+
.pattern({"aten::fake_quantize_per_tensor_affine(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> (Tensor)",
15+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
16+
auto input = args[0].ITensorOrFreeze(ctx);
17+
auto scale = args[1].unwrapToScalar().to<float>();
18+
auto scaleTensor = tensor_to_const(ctx, torch::tensor({scale}));
19+
20+
// Add and configure a QuantizeLayer.
21+
nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scaleTensor);
22+
// Set an invalid axis
23+
quantize_layer->setAxis(1);
24+
25+
// Add and configure DequantizeLayer
26+
nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scaleTensor);
27+
dequantize_layer->setAxis(1);
28+
29+
auto qdq_out = ctx->AssociateValueAndTensor(n->outputs()[0], dequantize_layer->getOutput(0));
30+
LOG_DEBUG("[fake_quantize_per_tensor_affine] Output tensor shape: " << qdq_out->getDimensions());
31+
32+
return true;
33+
}})
34+
.pattern({"aten::fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> (Tensor)",
35+
[](ConversionCtx* ctx, const torch::jit::Node* n, args& args) -> bool {
36+
auto input = args[0].ITensorOrFreeze(ctx);
37+
auto scale = args[1].ITensorOrFreeze(ctx);
38+
39+
// Add and configure a QuantizeLayer.
40+
nvinfer1::IQuantizeLayer* quantize_layer = ctx->net->addQuantize(*input, *scale);
41+
// Set a channel axis=0 which represents output channels
42+
quantize_layer->setAxis(0);
43+
44+
// Add and configure a DequantizeLayer.
45+
nvinfer1::IDequantizeLayer* dequantize_layer = ctx->net->addDequantize(*quantize_layer->getOutput(0), *scale);
46+
dequantize_layer->setAxis(0);
47+
auto qdq_out = ctx->AssociateValueAndTensor(n->outputs()[0], dequantize_layer->getOutput(0));
48+
49+
LOG_DEBUG("[fake_quantize_per_channel_affine] Ouput tensor shape: " << qdq_out->getDimensions());
50+
51+
return true;
52+
}});
53+
// clang-format on
54+
} // namespace
55+
} // namespace impl
56+
} // namespace converters
57+
} // namespace conversion
58+
} // namespace core
59+
} // namespace trtorch

0 commit comments

Comments
 (0)