Skip to content

Commit f8b2627

Browse files
authored
[IE CLDNN] int8 batches optimization (#632)
1 parent b9d6792 commit f8b2627

31 files changed

+1157
-42
lines changed

inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_kernel_imad.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ ParamsKey ConvolutionKernel_imad::GetSupportedKey() const {
8383
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv4);
8484
k.EnableOutputLayout(DataLayout::byxf_af32);
8585
k.EnableOutputLayout(DataLayout::b_fs_yx_fsv16);
86+
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
8687

8788
k.EnableDifferentTypes();
8889
k.EnableDifferentInputWeightsTypes();
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,145 @@
1+
// Copyright (c) 2020 Intel Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
16+
#include "convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1.h"
17+
#include "kernel_selector_utils.h"
18+
#include "common_tools.h"
19+
#include <vector>
20+
#include <iostream>
21+
22+
//
23+
// Kernel specific constants
24+
//
25+
#define SIMD_SIZE 16
26+
27+
namespace kernel_selector {
28+
29+
ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetSupportedKey() const {
30+
ParamsKey k;
31+
k.EnableInputDataType(Datatype::INT8);
32+
k.EnableInputDataType(Datatype::UINT8);
33+
34+
k.EnableOutputDataType(Datatype::INT8);
35+
k.EnableOutputDataType(Datatype::UINT8);
36+
k.EnableOutputDataType(Datatype::F32);
37+
38+
k.EnableInputWeightsType(WeightsType::INT8);
39+
40+
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
41+
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
42+
43+
k.EnableDifferentTypes();
44+
k.EnableDifferentInputWeightsTypes();
45+
k.EnableTensorOffset();
46+
k.EnableTensorPitches();
47+
k.EnableBiasPerFeature();
48+
k.EnableNonBiasTerm();
49+
k.EnableBatching();
50+
k.EnableQuantization(QuantizationType::SYMMETRIC);
51+
k.DisableTuning();
52+
return k;
53+
}
54+
55+
KernelsData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetKernelsData(const Params& params, const optional_params& options) const {
56+
return GetCommonKernelsData(params, options);
57+
}
58+
59+
JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
60+
auto mem_consts = Parent::GetJitConstants(params, kd);
61+
if (!params.fused_ops.empty()) {
62+
auto input_dt = GetActivationType(params);
63+
FusedOpsConfiguration conf_scalar = {"",
64+
{"out_b", "16 * j + out_f + get_sub_group_local_id()", "out_y", "out_x"},
65+
"dequantized",
66+
input_dt,
67+
1,
68+
LoadType::FEATURE_SHUFFLE};
69+
conf_scalar.SetLoopAxes({ Tensor::DataChannelName::BATCH }, true);
70+
conf_scalar.SetShuffleVarName("i");
71+
72+
mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
73+
}
74+
75+
return mem_consts;
76+
} // GetJitConstants
77+
78+
ConvolutionKernelBase::DispatchData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::SetDefault(const convolution_params& params, int) const {
79+
DispatchData kd;
80+
const auto& output = params.output;
81+
82+
std::vector<size_t> global = {output.X().v, output.Y().v, output.Feature().v / 32 * output.Batch().v};
83+
std::vector<size_t> local = {1, 1, SIMD_SIZE};
84+
85+
kd.gws0 = global[0];
86+
kd.gws1 = global[1];
87+
kd.gws2 = global[2];
88+
89+
kd.lws0 = local[0];
90+
kd.lws1 = local[1];
91+
kd.lws2 = local[2];
92+
93+
kd.cldnnStyle = {0, 0, 0, 0, 0};
94+
kd.gemmStyle = {0, 0, 0, 0, 0, 0};
95+
96+
kd.efficiency = FORCE_PRIORITY_2;
97+
98+
return kd;
99+
} // SetDefault
100+
101+
bool Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1::Validate(const Params& params, const optional_params& options) const {
102+
if (!Parent::Validate(params, options)) {
103+
return false;
104+
}
105+
106+
KernelData kd = KernelData::Default<convolution_params>(params);
107+
convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
108+
109+
if ((newParams.filterSize.x != newParams.filterSize.y) || newParams.filterSize.x != 1) {
110+
// Fitler size needs to be 1x1
111+
return false;
112+
}
113+
114+
if (newParams.stride.x != newParams.stride.y) {
115+
// Strides must be equal
116+
return false;
117+
}
118+
if (newParams.output.X().v != newParams.output.Y().v) {
119+
// W and H must be equal
120+
return false;
121+
}
122+
123+
if (newParams.output.Feature().v % 32 != 0) {
124+
// output feature size must be divided by 32
125+
return false;
126+
}
127+
128+
if (newParams.output.Batch().v % 16 != 0) {
129+
// batch size must be divided by 16
130+
return false;
131+
}
132+
133+
// check that all fused ops except eltwise have only feature or scalar inputs
134+
for (auto& fo : newParams.fused_ops) {
135+
if (fo.GetType() == FusedOpType::ELTWISE)
136+
continue;
137+
for (auto& input : fo.tensors) {
138+
if (input.X().v != 1 || input.Y().v != 1 || input.Batch().v != 1)
139+
return false;
140+
}
141+
}
142+
143+
return true;
144+
}
145+
} // namespace kernel_selector
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*
2+
// Copyright (c) 2020 Intel Corporation
3+
//
4+
// Licensed under the Apache License, Version 2.0 (the "License");
5+
// you may not use this file except in compliance with the License.
6+
// You may obtain a copy of the License at
7+
//
8+
// http://www.apache.org/licenses/LICENSE-2.0
9+
//
10+
// Unless required by applicable law or agreed to in writing, software
11+
// distributed under the License is distributed on an "AS IS" BASIS,
12+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13+
// See the License for the specific language governing permissions and
14+
// limitations under the License.
15+
*/
16+
17+
#pragma once
18+
19+
#include "convolution_kernel_base.h"
20+
#include <vector>
21+
22+
namespace kernel_selector {
23+
24+
class Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1 : public ConvolutionKernelBase {
25+
public:
26+
using Parent = ConvolutionKernelBase;
27+
Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1() : ConvolutionKernelBase("convolution_gpu_imad_bs_fs_yx_bsv16_fsv16_1x1") {}
28+
virtual ~Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_1x1() {}
29+
30+
KernelsData GetKernelsData(const Params& params, const optional_params& options) const override;
31+
ParamsKey GetSupportedKey() const override;
32+
33+
protected:
34+
bool Validate(const Params& params, const optional_params& options) const override;
35+
JitConstants GetJitConstants(const convolution_params& params, const DispatchData& kd) const override;
36+
DispatchData SetDefault(const convolution_params& params, int autoTuneIndex = -1) const override;
37+
bool NeedPaddedInput() const override { return true; }
38+
WeightsLayout GetPreferredWeightsLayout(const convolution_params&) const override {
39+
return { WeightsLayout::os_is_yx_osv16_isv16 };
40+
}
41+
42+
std::vector<FusedOpType> GetSupportedFusedOps() const override {
43+
return { FusedOpType::ELTWISE,
44+
FusedOpType::QUANTIZE,
45+
FusedOpType::SCALE,
46+
FusedOpType::ACTIVATION };
47+
}
48+
};
49+
} // namespace kernel_selector
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
// Copyright (c) 2020 Intel Corporation
2+
//
3+
// Licensed under the Apache License, Version 2.0 (the "License");
4+
// you may not use this file except in compliance with the License.
5+
// You may obtain a copy of the License at
6+
//
7+
// http://www.apache.org/licenses/LICENSE-2.0
8+
//
9+
// Unless required by applicable law or agreed to in writing, software
10+
// distributed under the License is distributed on an "AS IS" BASIS,
11+
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
// See the License for the specific language governing permissions and
13+
// limitations under the License.
14+
15+
16+
#include "convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3.h"
17+
#include "kernel_selector_utils.h"
18+
#include "common_tools.h"
19+
#include <vector>
20+
#include <iostream>
21+
22+
//
23+
// Kernel specific constants
24+
//
25+
#define SIMD_SIZE 16
26+
27+
namespace kernel_selector {
28+
29+
ParamsKey Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetSupportedKey() const {
30+
ParamsKey k;
31+
k.EnableInputDataType(Datatype::INT8);
32+
k.EnableInputDataType(Datatype::UINT8);
33+
34+
k.EnableOutputDataType(Datatype::INT8);
35+
k.EnableOutputDataType(Datatype::UINT8);
36+
k.EnableOutputDataType(Datatype::F32);
37+
38+
k.EnableInputWeightsType(WeightsType::INT8);
39+
40+
k.EnableInputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
41+
k.EnableOutputLayout(DataLayout::bs_fs_yx_bsv16_fsv16);
42+
43+
k.EnableDifferentTypes();
44+
k.EnableDifferentInputWeightsTypes();
45+
k.EnableTensorOffset();
46+
k.EnableTensorPitches();
47+
k.EnableBiasPerFeature();
48+
k.EnableNonBiasTerm();
49+
k.EnableBatching();
50+
k.EnableQuantization(QuantizationType::SYMMETRIC);
51+
k.DisableTuning();
52+
return k;
53+
}
54+
55+
KernelsData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetKernelsData(const Params& params, const optional_params& options) const {
56+
return GetCommonKernelsData(params, options);
57+
}
58+
59+
JitConstants Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::GetJitConstants(const convolution_params& params, const DispatchData& kd) const {
60+
auto mem_consts = Parent::GetJitConstants(params, kd);
61+
62+
if (!params.fused_ops.empty()) {
63+
auto input_dt = GetActivationType(params);
64+
FusedOpsConfiguration conf_scalar = {"",
65+
{"out_b", "out_f + get_sub_group_local_id()", "out_y", "out_x"},
66+
"dequantized",
67+
input_dt,
68+
1,
69+
LoadType::FEATURE_SHUFFLE};
70+
conf_scalar.SetLoopAxes({ Tensor::DataChannelName::BATCH }, true);
71+
conf_scalar.SetShuffleVarName("i");
72+
mem_consts.Merge(MakeFusedOpsJitConstants(params, {conf_scalar}));
73+
}
74+
75+
return mem_consts;
76+
} // GetJitConstants
77+
78+
ConvolutionKernelBase::DispatchData Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::SetDefault(const convolution_params& params, int) const {
79+
DispatchData kd;
80+
const auto& output = params.output;
81+
82+
std::vector<size_t> global = {output.X().v, output.Y().v, output.Feature().v / 16 * output.Batch().v};
83+
std::vector<size_t> local = {1, 1, SIMD_SIZE};
84+
85+
kd.gws0 = global[0];
86+
kd.gws1 = global[1];
87+
kd.gws2 = global[2];
88+
89+
kd.lws0 = local[0];
90+
kd.lws1 = local[1];
91+
kd.lws2 = local[2];
92+
93+
kd.cldnnStyle = {0, 0, 0, 0, 0};
94+
kd.gemmStyle = {0, 0, 0, 0, 0, 0};
95+
96+
kd.efficiency = FORCE_PRIORITY_2;
97+
98+
return kd;
99+
} // SetDefault
100+
101+
bool Convolution_kernel_imad_bs_fs_yx_bsv16_fsv16_3x3::Validate(const Params& params, const optional_params& options) const {
102+
if (!Parent::Validate(params, options)) {
103+
return false;
104+
}
105+
106+
KernelData kd = KernelData::Default<convolution_params>(params);
107+
convolution_params& newParams = *static_cast<convolution_params*>(kd.params.get());
108+
109+
if ((newParams.filterSize.x != newParams.filterSize.y) ||
110+
newParams.filterSize.x != 3) {
111+
// Fitler size needs to be 3x3
112+
return false;
113+
}
114+
115+
if (newParams.stride.x != newParams.stride.y) {
116+
// Strides must be equal
117+
return false;
118+
}
119+
if (newParams.output.X().v != newParams.output.Y().v) {
120+
// W and H must be equal
121+
return false;
122+
}
123+
124+
if (newParams.output.Feature().v % 16 != 0) {
125+
// output feature size must be divided by 16
126+
return false;
127+
}
128+
129+
if (newParams.output.Batch().v % 16 != 0) {
130+
// batch size must be divided by 16
131+
return false;
132+
}
133+
134+
// check that all fused ops except eltwise have only feature or scalar inputs
135+
for (auto& fo : newParams.fused_ops) {
136+
if (fo.GetType() == FusedOpType::ELTWISE)
137+
continue;
138+
for (auto& input : fo.tensors) {
139+
if (input.X().v != 1 || input.Y().v != 1 || input.Batch().v != 1)
140+
return false;
141+
}
142+
}
143+
144+
return true;
145+
}
146+
} // namespace kernel_selector

0 commit comments

Comments
 (0)