Skip to content

ColumnString improve performance(26%) by avoiding vector reallocate #249

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Nov 23, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 18 additions & 22 deletions clickhouse/columns/string.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ void ColumnFixedString::Append(std::string_view str) {
+ std::to_string(str.size()) + " bytes.");
}

if (data_.capacity() - data_.size() < str.size())
{
if (data_.capacity() - data_.size() < str.size()) {
// round up to the next block size
const auto new_size = (((data_.size() + string_size_) / DEFAULT_BLOCK_SIZE) + 1) * DEFAULT_BLOCK_SIZE;
data_.reserve(new_size);
Expand Down Expand Up @@ -129,13 +128,11 @@ struct ColumnString::Block
data_(new CharT[capacity])
{}

inline auto GetAvailable() const
{
inline auto GetAvailable() const {
return capacity - size;
}

std::string_view AppendUnsafe(std::string_view str)
{
std::string_view AppendUnsafe(std::string_view str) {
const auto pos = &data_[size];

memcpy(pos, str.data(), str.size());
Expand All @@ -144,13 +141,11 @@ struct ColumnString::Block
return std::string_view(pos, str.size());
}

auto GetCurrentWritePos()
{
auto GetCurrentWritePos() {
return &data_[size];
}

std::string_view ConsumeTailAsStringViewUnsafe(size_t len)
{
std::string_view ConsumeTailAsStringViewUnsafe(size_t len) {
const auto start = &data_[size];
size += len;
return std::string_view(start, len);
Expand All @@ -166,14 +161,21 @@ ColumnString::ColumnString()
{
}

ColumnString::ColumnString(size_t element_count)
: Column(Type::CreateString())
{
items_.reserve(element_count);
// 100 is arbitrary number, assumption that string values are about ~40 bytes long.
blocks_.reserve(std::max<size_t>(1, element_count / 100));
}

ColumnString::ColumnString(const std::vector<std::string>& data)
: ColumnString()
{
items_.reserve(data.size());
blocks_.emplace_back(ComputeTotalSize(data));

for (const auto & s : data)
{
for (const auto & s : data) {
AppendUnsafe(s);
}
};
Expand All @@ -194,21 +196,15 @@ ColumnString::~ColumnString()
{}

void ColumnString::Append(std::string_view str) {
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length())
{
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < str.length()) {
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, str.size()));
}

items_.emplace_back(blocks_.back().AppendUnsafe(str));
}

void ColumnString::Append(const char* str) {
auto len = strlen(str);
if (blocks_.size() == 0 || blocks_.back().GetAvailable() < len) {
blocks_.emplace_back(std::max(DEFAULT_BLOCK_SIZE, len));
}

items_.emplace_back(blocks_.back().AppendUnsafe(str));
Append(std::string_view(str, strlen(str)));
}

void ColumnString::Append(std::string&& steal_value) {
Expand Down Expand Up @@ -295,10 +291,10 @@ ColumnRef ColumnString::Slice(size_t begin, size_t len) const {

if (begin < items_.size()) {
len = std::min(len, items_.size() - begin);
result->items_.reserve(len);

result->blocks_.emplace_back(ComputeTotalSize(items_, begin, len));
for (size_t i = begin; i < begin + len; ++i)
{
for (size_t i = begin; i < begin + len; ++i) {
result->Append(items_[i]);
}
}
Expand Down
1 change: 1 addition & 0 deletions clickhouse/columns/string.h
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ class ColumnString : public Column {
ColumnString();
~ColumnString();

explicit ColumnString(size_t element_count);
explicit ColumnString(const std::vector<std::string> & data);
explicit ColumnString(std::vector<std::string>&& data);
ColumnString& operator=(const ColumnString&) = delete;
Expand Down