Skip to content

Commit 0509c66

Browse files
author
Konrad Dobros
authored
[IE CLDNN] Add some auto-tuning improvements (#1154)
- add error reporting for failed kernel runs during auto-tune - fix auto-tuning for asymmetric quantization - add asymmetric quantization information to cache - change auto-tuning metric from average to min
1 parent 054e1cf commit 0509c66

File tree

5 files changed

+42
-14
lines changed

5 files changed

+42
-14
lines changed

inference-engine/thirdparty/clDNN/kernel_selector/core/actual_kernels/convolution/convolution_params.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,7 @@ std::string convolution_params::to_string() const {
4141
std::string convolution_params::to_cache_string_v2() const {
4242
std::stringstream s;
4343

44-
s << weight_bias_params::to_cache_string_v2() << ";";
44+
s << parent::to_cache_string_v2() << ";";
4545
s << filterSize.x << "_" << filterSize.y << "_" << filterSize.z << ";";
4646
s << stride.x << "_" << stride.y << "_" << stride.z << ";";
4747
s << dilation.x << "_" << dilation.y << "_" << dilation.z << ";";

inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
*/
1616

1717
#include "weight_bias_params.h"
18+
#include <sstream>
1819

1920
namespace kernel_selector {
2021
ParamsKey weight_bias_params::GetParamsKey() const {
@@ -37,4 +38,19 @@ ParamsKey weight_bias_params::GetParamsKey() const {
3738

3839
return k;
3940
}
41+
42+
std::string weight_bias_zero_point_params::to_cache_string_v2() const {
43+
std::stringstream s;
44+
45+
s << weight_bias_params::to_cache_string_v2();
46+
if (!activations_zero_points.empty())
47+
s << ";activation_zp";
48+
if (!weights_zero_points.empty())
49+
s << ";weights_zp";
50+
if (HasCompensation())
51+
s << ";compensation";
52+
53+
return s.str();
54+
}
55+
4056
} // namespace kernel_selector

inference-engine/thirdparty/clDNN/kernel_selector/core/common/weight_bias_params.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ struct weight_bias_zero_point_params : public weight_bias_params {
4343
MultiDataTensor compensation;
4444

4545
bool HasCompensation() const { return !compensation.empty(); }
46+
std::string to_cache_string_v2() const override;
4647
};
4748

4849
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////

inference-engine/thirdparty/clDNN/src/gpu/kernel.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -238,7 +238,7 @@ void set_arguments(kernels_cache::kernel_type& kernel,
238238
}
239239

240240
if (status != CL_SUCCESS) {
241-
throw std::runtime_error("Error set args\n");
241+
throw std::runtime_error("Error set arg " + std::to_string(i) + ", error code: " + std::to_string(status) + "\n");
242242
}
243243
}
244244
}

inference-engine/thirdparty/clDNN/src/gpu/kernel_runner.cpp

Lines changed: 23 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -135,8 +135,9 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
135135
if (zero_points_exist) {
136136
const auto& zero_point_params =
137137
static_cast<const kernel_selector::weight_bias_zero_point_params&>(weights_bias_params);
138-
if (weight_zero_point_buffers.empty()) {
139-
for (auto& weight_zero_point : zero_point_params.weights_zero_points) {
138+
if (!zero_point_params.weights_zero_points.empty()) {
139+
if (weight_zero_point_buffers.empty()) {
140+
auto& weight_zero_point = zero_point_params.weights_zero_points[0];
140141
auto num_of_elements = static_cast<int>(weight_zero_point.PhysicalSize());
141142
weight_zero_point_buffers.push_back(
142143
engine->allocate_memory({
@@ -145,28 +146,33 @@ void kernel_runner::prepare_kernel_args(const kernel_selector::KernelsData& kern
145146
tensor(1, num_of_elements, 1, 1) },
146147
0));
147148
}
149+
args.weights_zero_points = weight_zero_point_buffers[0];
148150
}
149-
if (activation_zero_point_buffers.empty()) {
150-
for (auto& activation_zero_point : zero_point_params.activations_zero_points) {
151+
if (!zero_point_params.activations_zero_points.empty()) {
152+
if (activation_zero_point_buffers.empty()) {
153+
auto& activation_zero_point = zero_point_params.activations_zero_points[0];
151154
auto num_of_elements = static_cast<int>(activation_zero_point.PhysicalSize());
152-
weight_zero_point_buffers.push_back(
155+
activation_zero_point_buffers.push_back(
153156
engine->allocate_memory({
154157
from_data_type(activation_zero_point.GetDType()),
155158
format::bfyx,
156159
tensor(1, num_of_elements, 1, 1) },
157160
0));
158161
}
162+
args.activations_zero_points = activation_zero_point_buffers[0];
159163
}
160-
if (compensation_buffers.empty()) {
161-
for (auto& compensation : zero_point_params.compensation) {
164+
if (!zero_point_params.compensation.empty()) {
165+
if (compensation_buffers.empty()) {
166+
auto& compensation = zero_point_params.compensation[0];
162167
auto num_of_elements = static_cast<int>(compensation.PhysicalSize());
163-
weight_zero_point_buffers.push_back(
168+
compensation_buffers.push_back(
164169
engine->allocate_memory({
165170
from_data_type(compensation.GetDType()),
166171
format::bfyx,
167172
tensor(1, num_of_elements, 1, 1) },
168173
0));
169174
}
175+
args.compensation = compensation_buffers[0];
170176
}
171177
}
172178
}
@@ -202,27 +208,32 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
202208
int i = 0;
203209
for (auto it = batch_start; it < batch_end; it++) {
204210
std::vector<event_impl::ptr> events;
205-
auto kernel_run_time = std::chrono::nanoseconds::zero();
211+
auto kernel_run_time = std::chrono::nanoseconds::max();
206212
int num_of_runs = 0;
207213

208214
for (int iteration = 0; iteration < runs_per_kernel; iteration++) {
209215
event_impl::ptr event;
210216
try {
211217
event = kernels[i].run(0, it->kernels[0], {}, args);
218+
} catch (std::exception& e) {
219+
std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName
220+
<< " with auto-tune index " << it->autoTuneIndex << std::endl
221+
<< ", error message:" << e.what();
212222
} catch (...) {
213223
// Could not run this kernel. Push back NULL event (will be ignored later).
224+
std::cout << "[clDNN] Could not run kernel for auto-tune: " << it->kernelName
225+
<< " with auto-tune index " << it->autoTuneIndex << std::endl;
214226
}
215227
events.push_back(event);
216228
}
217-
218229
context->queue(0).finish();
219230

220231
for (auto& event : events) {
221232
if (event.get() != NULL) {
222233
auto profiling_intervals = event->get_profiling_info();
223234
for (auto const& profiling_interval : profiling_intervals) {
224235
if (profiling_interval.name == "executing") {
225-
kernel_run_time += profiling_interval.value->value();
236+
kernel_run_time = std::min(profiling_interval.value->value(), kernel_run_time);
226237
num_of_runs++;
227238
break;
228239
}
@@ -231,7 +242,7 @@ std::vector<std::chrono::nanoseconds> kernel_runner::run_kernels(const kernel_se
231242
}
232243

233244
if (num_of_runs > 0) {
234-
run_times.push_back(kernel_run_time / num_of_runs);
245+
run_times.push_back(kernel_run_time);
235246
num_of_kernels_run += 1;
236247
} else {
237248
run_times.push_back(std::chrono::nanoseconds::max());

0 commit comments

Comments
 (0)