Skip to content

Commit 9c60752

Browse files
[GNA] Support export model with multiple inputs/outputs and Permute layer (#1024)
1 parent ae9e051 commit 9c60752

File tree

10 files changed

+436
-92
lines changed

10 files changed

+436
-92
lines changed

inference-engine/src/gna_plugin/backend/dnn_types.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -80,7 +80,7 @@ static const char *intel_dnn_softmax_name[kSoftmaxNumType] = {
8080
};
8181

8282
typedef enum {
83-
kDnnUnknownOrientation,
83+
kDnnUnknownOrientation = 100,
8484
kDnnInterleavedOrientation,
8585
kDnnNonInterleavedOrientation,
8686
kDnnNumOrientation

inference-engine/src/gna_plugin/gna_device.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@
2525
#include "gna_plugin_log.hpp"
2626

2727
uint8_t* GNADeviceHelper::alloc(uint32_t size_requested, uint32_t *size_granted) {
28-
void * memPtr;
28+
void * memPtr = nullptr;
2929
#if GNA_LIB_VER == 1
3030
memPtr = GNAAlloc(nGNAHandle, size_requested, size_granted);
3131
#else

inference-engine/src/gna_plugin/gna_model_serial.cpp

Lines changed: 148 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,9 @@
88
#include <ios>
99
#include <iomanip>
1010
#include <map>
11+
#include <ie_algorithm.hpp>
12+
#include <ie_common.h>
13+
#include <ie_precision.hpp>
1114

1215
#if defined __INTEL_COMPILER || defined _MSC_VER
1316
#include <malloc.h>
@@ -119,11 +122,21 @@ const std::map<Gna2OperationType, std::vector<uint32_t>> GnaParamSize{
119122
sizeof(Gna2Shape),
120123
sizeof(Gna2Shape)}},
121124
{Gna2OperationTypeCopy, {sizeof(Gna2Shape)}},
125+
{Gna2OperationTypeTransposition, {sizeof(Gna2Shape)}},
122126
};
123127

124-
void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream & is) {
128+
void GNAModelSerial::Import(void *basePointer,
129+
size_t gnaGraphSize,
130+
std::istream & is,
131+
std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
132+
std::vector<GNAPluginNS::OutputDesc> &desc,
133+
InferenceEngine::InputsDataMap& inputsDataMap,
134+
InferenceEngine::OutputsDataMap& outputsDataMap) {
125135
is.exceptions(std::istream::failbit);
126136

137+
ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
138+
ImportOutputs(is, basePointer, desc, outputsDataMap);
139+
127140
for (auto operation = gna2Model->Operations; operation != gna2Model->Operations + gna2Model->NumberOfOperations; ++operation) {
128141
readNBits<32>(operation->Type, is);
129142
readBits(operation->NumberOfOperands, is);
@@ -146,11 +159,10 @@ void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream
146159
case Gna2OperationTypeFullyConnectedAffine:
147160
case Gna2OperationTypeConvolution:
148161
case Gna2OperationTypeCopy:
162+
case Gna2OperationTypeTransposition:
149163
break;
150164
case Gna2OperationTypeRecurrent:
151165
THROW_GNA_EXCEPTION << "Importing of recurrent operation not supported";
152-
case Gna2OperationTypeTransposition:
153-
THROW_GNA_EXCEPTION << "Importing of transposition operation not supported";
154166
default:
155167
THROW_GNA_EXCEPTION << "Importing of unknown GNA operation type(" << operation->Type << ") not supported";
156168
}
@@ -237,11 +249,12 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
237249
};
238250

239251
auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep) {
240-
ModelHeader::EndPoint out;
252+
RuntimeEndPoint out;
241253
out.elements_count = ep.elements_count;
242254
out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
243255
out.scaleFactor = ep.scaleFactor;
244256
out.element_size = ep.element_size;
257+
out.orientation = ep.orientation;
245258
return out;
246259
};
247260
/**
@@ -258,15 +271,21 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
258271
header.gnaMemSize = gnaGraphSize;
259272
header.layersCount = layers.size();
260273
header.nGroup = guessGrouping(*gna2Model);
261-
header.input = convert_to_serial(input);
262-
header.output = convert_to_serial(output);
263-
274+
header.nInputs = inputs.size();
275+
header.nOutputs = outputs.size();
264276
header.nRotateRows = nRotateRows;
265277
header.nRotateColumns = nRotateColumns;
266278

267279

268280
writeBits(header, os);
269281

282+
for (const auto &input : inputs) {
283+
writeBits(convert_to_serial(input), os);
284+
}
285+
for (const auto &output : outputs) {
286+
writeBits(convert_to_serial(output), os);
287+
}
288+
270289
for (const auto & layer : layers) {
271290
writeBits(static_cast<uint32_t>(layer.Type), os);
272291
writeBits(layer.NumberOfOperands, os);
@@ -286,11 +305,10 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
286305
case Gna2OperationTypeFullyConnectedAffine:
287306
case Gna2OperationTypeConvolution:
288307
case Gna2OperationTypeCopy:
308+
case Gna2OperationTypeTransposition:
289309
break;
290310
case Gna2OperationTypeRecurrent:
291311
THROW_GNA_EXCEPTION << "Exporting of recurrent operation not supported";
292-
case Gna2OperationTypeTransposition:
293-
THROW_GNA_EXCEPTION << "Exporting of interleave operation not supported";
294312
default:
295313
THROW_GNA_EXCEPTION << "Exporting of unknown GNA operation type(" << layer.Type << ") not supported";
296314
}
@@ -316,9 +334,18 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
316334
}
317335
#else
318336

319-
void GNAModelSerial::Import(void *basePointer, size_t gnaGraphSize, std::istream & is) {
337+
void GNAModelSerial::Import(void *basePointer,
338+
size_t gnaGraphSize,
339+
std::istream & is,
340+
std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
341+
std::vector<GNAPluginNS::OutputDesc> &desc,
342+
InferenceEngine::InputsDataMap& inputsDataMap,
343+
InferenceEngine::OutputsDataMap& outputsDataMap) {
320344
is.exceptions(std::istream::failbit);
321345

346+
ImportInputs(is, basePointer, inputsDesc, inputsDataMap);
347+
ImportOutputs(is, basePointer, desc, outputsDataMap);
348+
322349
auto readPwl = [&is, basePointer](intel_pwl_func_t & value) {
323350
readBits(value.nSegments, is);
324351
if (value.nSegments != 0) {
@@ -468,11 +495,12 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
468495
};
469496

470497
auto convert_to_serial = [getOffsetFromBase](const GNAModelSerial::RuntimeEndPoint& ep){
471-
ModelHeader::EndPoint out;
498+
RuntimeEndPoint out;
472499
out.elements_count = ep.elements_count;
473500
out.element_size = ep.element_size;
474501
out.descriptor_offset = offsetFromBase(ep.descriptor_ptr);
475502
out.scaleFactor = ep.scaleFactor;
503+
out.orientation = ep.orientation;
476504
return out;
477505
};
478506
/**
@@ -488,14 +516,16 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
488516
header.gnaMemSize = gnaGraphSize;
489517
header.layersCount = layers.size();
490518
header.nGroup = ptr_nnet->nGroup;
491-
header.input = convert_to_serial(input);
492-
header.output = convert_to_serial(output);
519+
header.nInputs = 1;
520+
header.nOutputs = 1;
493521
header.headerSize = sizeof(ModelHeader);
494522
header.nRotateRows = nRotateRows;
495523
header.nRotateColumns = nRotateColumns;
496524

497525

498526
writeBits(header, os);
527+
writeBits(convert_to_serial(inputs[0]), os);
528+
writeBits(convert_to_serial(outputs[0]), os);
499529

500530
for (auto & layer : layers) {
501531
writeBits(layer.nInputColumns, os);
@@ -574,3 +604,108 @@ void GNAModelSerial::Export(void * basePointer, size_t gnaGraphSize, std::ostrea
574604
}
575605

576606
#endif
607+
608+
std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeOutputs(const InferenceEngine::OutputsDataMap& outputsDataMap,
609+
const std::vector<GNAPluginNS::OutputDesc>& outputsDesc) {
610+
std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
611+
std::size_t outputIndex = 0;
612+
for (auto const &output : outputsDataMap) {
613+
auto outputName = output.first;
614+
auto inputDims = output.second->getTensorDesc().getDims();
615+
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
616+
617+
GNAModelSerial::RuntimeEndPoint endPoint(outputsDesc[outputIndex].scale_factor,
618+
outputsDesc[outputIndex].ptrs[0],
619+
outputsDesc[outputIndex].num_bytes_per_element,
620+
elementsCount,
621+
outputsDesc[outputIndex].orientation);
622+
endPoints.push_back(endPoint);
623+
outputIndex++;
624+
}
625+
return endPoints;
626+
}
627+
628+
std::vector<GNAModelSerial::RuntimeEndPoint> GNAModelSerial::serializeInputs(const InferenceEngine::InputsDataMap& inputsDataMap,
629+
std::shared_ptr<GNAPluginNS::InputDesc> inputDesc) {
630+
std::vector<GNAModelSerial::RuntimeEndPoint> endPoints;
631+
632+
std::size_t inputIndex = 0;
633+
for (auto const& input : inputsDataMap) {
634+
auto inputName = input.first;
635+
auto inputDims = input.second->getTensorDesc().getDims();
636+
637+
double scaleFactor = inputDesc->getScaleFactor(inputIndex);
638+
std::vector<void *> descriptor_ptr = inputDesc->getPtrInputsGlobal(inputName);
639+
IE_ASSERT(descriptor_ptr.size() > 0);
640+
uint32_t element_size = 2u;
641+
uint32_t elementsCount = static_cast<uint32_t>(InferenceEngine::details::product(inputDims.begin(), inputDims.end()));
642+
intel_dnn_orientation_t orientation = inputDesc->getOrientation(inputName);
643+
644+
GNAModelSerial::RuntimeEndPoint endPoint(scaleFactor,
645+
descriptor_ptr[0],
646+
element_size,
647+
elementsCount,
648+
orientation);
649+
endPoints.push_back(endPoint);
650+
inputIndex++;
651+
}
652+
return endPoints;
653+
}
654+
655+
void GNAModelSerial::ImportInputs(std::istream &is,
656+
void* basePtr,
657+
std::shared_ptr<GNAPluginNS::InputDesc> inputsDesc,
658+
InferenceEngine::InputsDataMap& dataMap) {
659+
dataMap.clear();
660+
661+
for (auto inputIndex = 0; inputIndex < modelHeader.nInputs; inputIndex++) {
662+
std::string name = "input" + std::to_string(inputIndex);
663+
RuntimeEndPoint input;
664+
is.read(reinterpret_cast<char *>(&input), sizeof(input));
665+
inputsDesc->getPtrInputsGlobal(name).push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + input.descriptor_offset));
666+
inputsDesc->orientation_in[name] = input.orientation;
667+
668+
auto inputDims = InferenceEngine::SizeVector({modelHeader.nGroup, input.elements_count / modelHeader.nGroup});
669+
670+
dataMap[name] = std::make_shared<InferenceEngine::InputInfo>();
671+
dataMap[name]->setInputData(std::make_shared<InferenceEngine::Data>(name,
672+
InferenceEngine::TensorDesc(
673+
InferenceEngine::Precision::FP32,
674+
inputDims,
675+
InferenceEngine::Layout::NC)));
676+
inputsDesc->inputScaleFactors.push_back(input.scaleFactor);
677+
}
678+
}
679+
680+
void GNAModelSerial::ImportOutputs(std::istream &is,
681+
void* basePtr,
682+
std::vector<GNAPluginNS::OutputDesc> &desc,
683+
InferenceEngine::OutputsDataMap& dataMap) {
684+
desc.clear();
685+
dataMap.clear();
686+
desc.resize(modelHeader.nOutputs);
687+
688+
for (auto outputIndex = 0; outputIndex < modelHeader.nOutputs; outputIndex++) {
689+
std::string name = "output" + std::to_string(outputIndex);
690+
RuntimeEndPoint output;
691+
is.read(reinterpret_cast<char *>(&output), sizeof(output));
692+
GNAPluginNS::OutputDesc description;
693+
description.ptrs.push_back(reinterpret_cast<float*>(reinterpret_cast<uint8_t *> (basePtr) + output.descriptor_offset));
694+
description.orientation = kDnnInterleavedOrientation;
695+
description.orientation = output.orientation;
696+
description.num_bytes_per_element = output.element_size;
697+
description.scale_factor = output.scaleFactor;
698+
699+
auto outputDims = InferenceEngine::SizeVector({modelHeader.nGroup, output.elements_count / modelHeader.nGroup});
700+
dataMap[name] = std::make_shared<InferenceEngine::Data>(name,
701+
InferenceEngine::TensorDesc(
702+
InferenceEngine::Precision::FP32,
703+
outputDims,
704+
InferenceEngine::Layout::NC));
705+
desc.at(outputIndex) = description;
706+
}
707+
}
708+
709+
void GNAModelSerial::setHeader(ModelHeader header) {
710+
modelHeader = header;
711+
}

0 commit comments

Comments
 (0)