Skip to content

merge poplar sdk231 updates #721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
47 changes: 28 additions & 19 deletions ODLA/platforms/odla_popart/odla_compute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

#include <ODLA/odla.h>
#include <dlfcn.h>
#include <stdlib.h>

#include <cstdlib>
#include <fstream>
Expand All @@ -28,6 +29,9 @@
#include <popart/session.hpp>
#include <popart/tensorinfo.hpp>
#include <popart/voiddata.hpp>
#include <poplar/exceptions.hpp>
#include <random>
#include <stdexcept>
#include <string>

#include "ODLA/odla_common.h"
Expand Down Expand Up @@ -59,9 +63,13 @@ odla_status odla_SetComputationItem(odla_computation comp, odla_item_type type,
comp->opts.cache_dir = (reinterpret_cast<char*>(value));
break;
case 1001: // load cache directly, need set path of cache file
PopartConfig::instance()->set_load_cache(true);
PopartConfig::instance()->set_cache_path(reinterpret_cast<char*>(value));
popart::logging::info("set load_or_save_cache");
PopartConfig::instance()->set_load_or_save_cache(true);
PopartConfig::instance()->set_cache_path(
(std::string) reinterpret_cast<char*>(value));
break;
case 1002:
setenv("POPART_LOG_LEVEL", "INFO", 1);
default:
std::cerr << "Unsupported property type: " << type << std::endl;
return ODLA_UNSUPPORTED_DATATYPE;
Expand All @@ -82,9 +90,14 @@ odla_status odla_CreateExecutable(odla_executable* executable,
return comp->compile_and_export();
} else {
popart::logging::info("Computation is not initialized. init it first");
_odla_computation::instance()->init(true); // set is_compile to true
// this comp init will create
// executable
odla_status ret =
_odla_computation::instance()->init(true); // set is_compile to true
// this comp init will
// create executable
if (ret != ODLA_SUCCESS) {
popart::logging::err("Failed to init computation when compiling.");
return ODLA_FAILURE;
}
_odla_computation::instance()->compile_and_export();
}
}
Expand All @@ -96,7 +109,7 @@ odla_status odla_StoreExecutable(const odla_char* file_name,
return ODLA_SUCCESS;
}

odla_status odla_LoadExecutable(const odla_char* file_name, odla_device device,
odla_status odla_LoadExecutable(const odla_char* file_name,
odla_executable* executable,
odla_context* context,
odla_computation* computation) {
Expand All @@ -115,20 +128,13 @@ odla_status odla_CreateComputation(odla_computation* comp) {
}
}
// Read the config file
popart::logging::info("loading config");
if (!PopartConfig::instance()->inited()) {
if (PopartConfig::instance()->load_cache()) {
odla_status ret = PopartConfig::instance()->extract_config_from_cache();
if (ret == ODLA_FAILURE) {
popart::logging::err("load config from cache failed");
return ret;
}
} else {
auto ret = PopartConfig::instance()->load_config(
std::getenv("ODLA_POPART_CONFIG"));
if (ret != ODLA_SUCCESS) {
popart::logging::err("error load config");
return ret;
}
auto ret = PopartConfig::instance()->load_config(
std::getenv("ODLA_POPART_CONFIG"));
if (ret != ODLA_SUCCESS) {
popart::logging::err("error load config");
return ret;
}
}
odla_status status = _odla_computation::instance()->set_executor();
Expand Down Expand Up @@ -165,6 +171,7 @@ odla_status odla_DestroyContext(odla_context ctx) {
}

odla_status odla_DestroyComputation(odla_computation comp) {
popart::logging::info("call odla_destroyComputation");
if (comp != nullptr) {
if (!comp->is_compile_only()) {
comp->mark_done();
Expand All @@ -173,6 +180,8 @@ odla_status odla_DestroyComputation(odla_computation comp) {
comp->release_session();
_odla_computation::destruct(); // release the real computation
}
popart::logging::info("reset config state");
PopartConfig::instance()->reset_init_state();

return ODLA_SUCCESS;
}
Expand Down
81 changes: 64 additions & 17 deletions ODLA/platforms/odla_popart/odla_popart.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,14 +79,14 @@ void compute_loop(odla_computation comp) {
popart::logging::err("Poplar unrecoverable_runtime_error exception caught");
QManager::instance()->set_status(ODLA_UNRECOVERABLE_ERR);
} catch (poplar::unknown_runtime_error& e) {
popart::logging::info("Poplar unknown runtime exception caught}");
popart::logging::err("Poplar unknown runtime exception caught");
QManager::instance()->set_status(ODLA_UNRECOVERABLE_ERR);
} catch (...) {
popart::logging::info("Poplar unknown exception caught");
popart::logging::err("Poplar unknown exception caught");
QManager::instance()->set_status(ODLA_UNRECOVERABLE_ERR);
}

popart::logging::warn("The pipeline loop finished");
popart::logging::info("The pipeline loop finished");
comp->thread_done();
}

Expand All @@ -99,16 +99,18 @@ odla_status _odla_computation::compile_and_export() {
int file_prefix = cache_file_name.rfind(file_suffix);
if (file_prefix == std::string::npos ||
file_prefix + file_suffix.size() < cache_file_name.size()) {
popart::logging::err("Bad cache file name");
popart::logging::err(
"Bad cache file name. File name should end with '.popart'");
return ODLA_FAILURE;
}
if (file_prefix == std::string::npos) {
file_prefix = cache_file_name.size() - 1;
}
std::string config_file_name(cache_file_name.substr(0, file_prefix) +
".json");
std::fstream cache_fs(cache_file_name,
std::ios_base::out | std::ifstream::binary);
std::fstream cache_fs(cache_file_name, std::ios_base::out |
std::ifstream::binary |
std::ios_base::trunc);
if (!cache_fs.is_open()) {
popart::logging::err("Open or create cache file falied");
return ODLA_FAILURE;
Expand All @@ -119,7 +121,7 @@ odla_status _odla_computation::compile_and_export() {
config_fs.open(config_file_name, std::ios_base::in | std::ifstream::binary);
if (!config_fs.is_open()) {
popart::logging::warn(
"invalid config file name:[ {} ] will use default config",
"Open config file failed:[ {} ] will use default config",
config_file_name);
PopartConfig::instance()->use_default();
config_string = PopartConfig::instance()->get_default_config_string();
Expand All @@ -132,10 +134,12 @@ odla_status _odla_computation::compile_and_export() {
config_string = PopartConfig::instance()->get_default_config_string();
}
// add sdk_version in the file content
std::string version_string(popart::core::versionString());
std::string version_string(popart::core::packageHash());
popart::logging::info("the popart version is: {}", version_string);
version_string = "\n\"sdk_version\":\"" + version_string + "\",";
config_string.insert(1, version_string);
if (config_string.find("sdk_version") == std::string::npos) {
std::string item_string = "\n\"sdk_version\":\"" + version_string + "\",";
config_string.insert(1, item_string);
}
popart::logging::info("the config_string with sdk_version is: {}",
config_string);
// added the sdk_version information to the file content
Expand All @@ -148,6 +152,9 @@ odla_status _odla_computation::compile_and_export() {
} catch (std::exception& e) {
popart::logging::err("compileAndExport Falied: {}", e.what());
ret_value = ODLA_FAILURE;
} catch (...) {
popart::logging::err("compileAndExport Falied");
ret_value = ODLA_FAILURE;
}
cache_fs.flush();
cache_fs.close();
Expand Down Expand Up @@ -191,6 +198,10 @@ odla_status _odla_computation::init(bool is_compile) {
try {
builder = popart::Builder::createFromOnnxModel(set_pipeline_stage());
} catch (std::exception& e) {
popart::logging::err("create builder from onnx model failed:{}",
e.what());
return ODLA_FAILURE;
} catch (...) {
popart::logging::err("create builder from onnx model failed.");
return ODLA_FAILURE;
}
Expand Down Expand Up @@ -219,12 +230,15 @@ odla_status _odla_computation::init(bool is_compile) {
popart::logging::err("Session::createFromOnnxModel failed:{}",
e.what());
return ODLA_FAILURE;
} catch (...) {
popart::logging::err("Session::createFromOnnxModel failed");
return ODLA_FAILURE;
}

if (!is_compile) {
if (PopartConfig::instance()->load_cache()) {
if (PopartConfig::instance()->load_or_save_cache()) {
popart::logging::info("Load cachefile from existing stream");
std::string version_string(popart::core::versionString());
std::string version_string(popart::core::packageHash());
if (!PopartConfig::instance()->sdk_version_match(version_string)) {
popart::logging::err("The sdk version of cache does not match {}",
version_string);
Expand All @@ -233,9 +247,18 @@ odla_status _odla_computation::init(bool is_compile) {
auto cache_fs = PopartConfig::instance()->get_cache_fs();
if (cache_fs->is_open()) {
try {
cache_fs->seekg(0, std::ios::beg);
int config_len = 0;
cache_fs->read((char*)&config_len, sizeof(config_len));
cache_fs->seekg(config_len + sizeof(config_len), std::ios::beg);
new_session->loadExecutableFromStream(*(cache_fs.get()));
} catch (std::exception& e) {
popart::logging::err("bad cache file, will compile the graph:{}",
e.what());
return ODLA_FAILURE;
} catch (...) {
popart::logging::err("bad cache file, will compile the graph");
return ODLA_FAILURE;
}
}
}
Expand All @@ -244,9 +267,32 @@ odla_status _odla_computation::init(bool is_compile) {
new_session->prepareDevice();
new_session->setRandomSeed(0); // Init seed
new_session->weightsFromHost(); // Copy weights from host to IPU
} catch (std::exception& e) {
popart::logging::err("session init failed: {}", e.what());
return ODLA_FAILURE;
} catch (poplar::application_runtime_error& e) {
popart::logging::err(
"Poplar exception application_runtime_error caught:{}", e.what());
return ODLA_INTERNAL_LOGIC_ERR;
} catch (poplar::recoverable_runtime_error& e) {
popart::logging::err(
"Poplar recoverable_runtime_error exception caught");
auto action = e.getRecoveryAction();
popart::logging::err("need to take action:{}", action);
if (action == poplar::RecoveryAction::IPU_RESET) {
return ODLA_RECOVERABLE_ERR;
} else if (action == poplar::RecoveryAction::PARTITION_RESET) {
return ODLA_PARTITION_RESET;
} else if (action == poplar::RecoveryAction::FULL_RESET) {
return ODLA_FULL_RESET;
}
} catch (poplar::unrecoverable_runtime_error& e) {
popart::logging::err(
"Poplar unrecoverable_runtime_error exception caught");
return ODLA_UNRECOVERABLE_ERR;
} catch (poplar::unknown_runtime_error& e) {
popart::logging::err("Poplar unknown runtime exception caught");
return ODLA_UNRECOVERABLE_ERR;
} catch (...) {
popart::logging::err("Poplar unknown exception caught");
return ODLA_UNRECOVERABLE_ERR;
}
// If in parallel mode, start the thread
ExecutionMode mode = PopartConfig::instance()->execution_mode();
Expand All @@ -264,6 +310,7 @@ odla_status _odla_computation::init(bool is_compile) {
std::move(new_session); // set session after all initialization done.
}
}
return ODLA_SUCCESS;
}

// Now we set this by config file, should set by the caller?
Expand Down Expand Up @@ -499,10 +546,10 @@ odla_status Sequence::compute(odla_computation comp, odla_context context,
popart::logging::err("Poplar unrecoverable_runtime_error exception caught");
return ODLA_UNRECOVERABLE_ERR;
} catch (poplar::unknown_runtime_error& e) {
popart::logging::info("Poplar unknown runtime exception caught}");
popart::logging::err("Poplar unknown runtime exception caught.");
return ODLA_UNRECOVERABLE_ERR;
} catch (...) {
popart::logging::info("Poplar unknown exception caught");
popart::logging::err("Poplar unknown exception caught");
return ODLA_UNRECOVERABLE_ERR;
}
return ODLA_SUCCESS;
Expand Down
Loading