Skip to content

Commit 81a9350

Browse files
authored
Implement lazy BIND (#1543)
Allow the `BIND` operation to handle its input lazily. NOTE: Currently there is only a single local vocab for all the results of the `BIND`, so even when a `BIND` that creates strings is handled lazily, we still need the RAM for the complete local vocab. This will be handled in a follow-up PR.
1 parent 414f50c commit 81a9350

File tree

6 files changed

+252
-84
lines changed

6 files changed

+252
-84
lines changed

src/engine/Bind.cpp

+81-66
Original file line numberDiff line numberDiff line change
@@ -81,104 +81,119 @@ std::vector<QueryExecutionTree*> Bind::getChildren() {
8181
}
8282

8383
// _____________________________________________________________________________
84-
ProtoResult Bind::computeResult([[maybe_unused]] bool requestLaziness) {
85-
using std::endl;
86-
LOG(DEBUG) << "Get input to BIND operation..." << endl;
87-
std::shared_ptr<const Result> subRes = _subtree->getResult();
88-
LOG(DEBUG) << "Got input to Bind operation." << endl;
89-
IdTable idTable{getExecutionContext()->getAllocator()};
90-
91-
idTable.setNumColumns(getResultWidth());
92-
93-
// Make a deep copy of the local vocab from `subRes` and then add to it (in
94-
// case BIND adds a new word or words).
95-
//
96-
// TODO: In most BIND operations, nothing is added to the local vocabulary, so
97-
// it would be more efficient to first share the pointer here (like with
98-
// `shareLocalVocabFrom`) and only copy it when a new word is about to be
99-
// added. Same for GROUP BY.
100-
auto localVocab = subRes->getCopyOfLocalVocab();
101-
102-
size_t inwidth = subRes->idTable().numColumns();
103-
size_t outwidth = getResultWidth();
104-
105-
CALL_FIXED_SIZE((std::array{inwidth, outwidth}), &Bind::computeExpressionBind,
106-
this, &idTable, &localVocab, *subRes,
107-
_bind._expression.getPimpl());
108-
109-
LOG(DEBUG) << "BIND result computation done." << endl;
110-
return {std::move(idTable), resultSortedOn(), std::move(localVocab)};
84+
IdTable Bind::cloneSubView(const IdTable& idTable,
85+
const std::pair<size_t, size_t>& subrange) {
86+
IdTable result(idTable.numColumns(), idTable.getAllocator());
87+
result.resize(subrange.second - subrange.first);
88+
std::ranges::copy(idTable.begin() + subrange.first,
89+
idTable.begin() + subrange.second, result.begin());
90+
return result;
11191
}
11292

11393
// _____________________________________________________________________________
114-
template <size_t IN_WIDTH, size_t OUT_WIDTH>
115-
void Bind::computeExpressionBind(
116-
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
117-
const Result& inputResultTable,
118-
sparqlExpression::SparqlExpression* expression) const {
94+
ProtoResult Bind::computeResult(bool requestLaziness) {
95+
LOG(DEBUG) << "Get input to BIND operation..." << std::endl;
96+
std::shared_ptr<const Result> subRes = _subtree->getResult(requestLaziness);
97+
LOG(DEBUG) << "Got input to Bind operation." << std::endl;
98+
99+
auto applyBind = [this, subRes](IdTable idTable, LocalVocab* localVocab) {
100+
return computeExpressionBind(localVocab, std::move(idTable),
101+
subRes->localVocab(),
102+
_bind._expression.getPimpl());
103+
};
104+
105+
if (subRes->isFullyMaterialized()) {
106+
if (requestLaziness && subRes->idTable().size() > CHUNK_SIZE) {
107+
auto localVocab =
108+
std::make_shared<LocalVocab>(subRes->getCopyOfLocalVocab());
109+
auto generator = [](std::shared_ptr<LocalVocab> vocab, auto applyBind,
110+
std::shared_ptr<const Result> result)
111+
-> cppcoro::generator<IdTable> {
112+
size_t size = result->idTable().size();
113+
for (size_t offset = 0; offset < size; offset += CHUNK_SIZE) {
114+
co_yield applyBind(
115+
cloneSubView(result->idTable(),
116+
{offset, std::min(size, offset + CHUNK_SIZE)}),
117+
vocab.get());
118+
}
119+
}(localVocab, std::move(applyBind), std::move(subRes));
120+
return {std::move(generator), resultSortedOn(), std::move(localVocab)};
121+
}
122+
// Make a deep copy of the local vocab from `subRes` and then add to it (in
123+
// case BIND adds a new word or words).
124+
//
125+
// Make a copy of the local vocab from`subRes`and then add to it (in case
126+
// BIND adds new words). Note: The copy of the local vocab is shallow
127+
// via`shared_ptr`s, so the following is also efficient if the BIND adds no
128+
// new words.
129+
LocalVocab localVocab = subRes->getCopyOfLocalVocab();
130+
IdTable result = applyBind(subRes->idTable().clone(), &localVocab);
131+
LOG(DEBUG) << "BIND result computation done." << std::endl;
132+
return {std::move(result), resultSortedOn(), std::move(localVocab)};
133+
}
134+
auto localVocab = std::make_shared<LocalVocab>();
135+
auto generator =
136+
[](std::shared_ptr<LocalVocab> vocab, auto applyBind,
137+
std::shared_ptr<const Result> result) -> cppcoro::generator<IdTable> {
138+
for (IdTable& idTable : result->idTables()) {
139+
co_yield applyBind(std::move(idTable), vocab.get());
140+
}
141+
std::array<const LocalVocab*, 2> vocabs{vocab.get(), &result->localVocab()};
142+
*vocab = LocalVocab::merge(std::span{vocabs});
143+
}(localVocab, std::move(applyBind), std::move(subRes));
144+
return {std::move(generator), resultSortedOn(), std::move(localVocab)};
145+
}
146+
147+
// _____________________________________________________________________________
148+
IdTable Bind::computeExpressionBind(
149+
LocalVocab* outputLocalVocab, IdTable idTable,
150+
const LocalVocab& inputLocalVocab,
151+
const sparqlExpression::SparqlExpression* expression) const {
119152
sparqlExpression::EvaluationContext evaluationContext(
120-
*getExecutionContext(), _subtree->getVariableColumns(),
121-
inputResultTable.idTable(), getExecutionContext()->getAllocator(),
122-
inputResultTable.localVocab(), cancellationHandle_, deadline_);
153+
*getExecutionContext(), _subtree->getVariableColumns(), idTable,
154+
getExecutionContext()->getAllocator(), inputLocalVocab,
155+
cancellationHandle_, deadline_);
123156

124157
sparqlExpression::ExpressionResult expressionResult =
125158
expression->evaluate(&evaluationContext);
126159

127-
const auto input = inputResultTable.idTable().asStaticView<IN_WIDTH>();
128-
auto output = std::move(*outputIdTable).toStatic<OUT_WIDTH>();
129-
130-
// first initialize the first columns (they remain identical)
131-
const auto inSize = input.size();
132-
output.reserve(inSize);
133-
const auto inCols = input.numColumns();
134-
// copy the input to the first numColumns;
135-
for (size_t i = 0; i < inSize; ++i) {
136-
output.emplace_back();
137-
for (size_t j = 0; j < inCols; ++j) {
138-
output(i, j) = input(i, j);
139-
}
140-
checkCancellation();
141-
}
160+
idTable.addEmptyColumn();
161+
auto outputColumn = idTable.getColumn(idTable.numColumns() - 1);
142162

143163
auto visitor = [&]<sparqlExpression::SingleExpressionResult T>(
144164
T&& singleResult) mutable {
145165
constexpr static bool isVariable = std::is_same_v<T, ::Variable>;
146166
constexpr static bool isStrongId = std::is_same_v<T, Id>;
147167

148168
if constexpr (isVariable) {
149-
auto column =
169+
auto columnIndex =
150170
getInternallyVisibleVariableColumns().at(singleResult).columnIndex_;
151-
for (size_t i = 0; i < inSize; ++i) {
152-
output(i, inCols) = output(i, column);
153-
checkCancellation();
154-
}
171+
auto inputColumn = idTable.getColumn(columnIndex);
172+
AD_CORRECTNESS_CHECK(inputColumn.size() == outputColumn.size());
173+
std::ranges::copy(inputColumn, outputColumn.begin());
155174
} else if constexpr (isStrongId) {
156-
for (size_t i = 0; i < inSize; ++i) {
157-
output(i, inCols) = singleResult;
158-
checkCancellation();
159-
}
175+
std::ranges::fill(outputColumn, singleResult);
160176
} else {
161177
constexpr bool isConstant = sparqlExpression::isConstantResult<T>;
162178

163179
auto resultGenerator = sparqlExpression::detail::makeGenerator(
164-
std::forward<T>(singleResult), inSize, &evaluationContext);
180+
std::forward<T>(singleResult), outputColumn.size(),
181+
&evaluationContext);
165182

166183
if constexpr (isConstant) {
167184
auto it = resultGenerator.begin();
168185
if (it != resultGenerator.end()) {
169186
Id constantId =
170187
sparqlExpression::detail::constantExpressionResultToId(
171188
std::move(*it), *outputLocalVocab);
172-
for (size_t i = 0; i < inSize; ++i) {
173-
output(i, inCols) = constantId;
174-
checkCancellation();
175-
}
189+
checkCancellation();
190+
std::ranges::fill(outputColumn, constantId);
176191
}
177192
} else {
178193
size_t i = 0;
179194
// We deliberately move the values from the generator.
180195
for (auto& resultValue : resultGenerator) {
181-
output(i, inCols) =
196+
outputColumn[i] =
182197
sparqlExpression::detail::constantExpressionResultToId(
183198
std::move(resultValue), *outputLocalVocab);
184199
i++;
@@ -190,5 +205,5 @@ void Bind::computeExpressionBind(
190205

191206
std::visit(visitor, std::move(expressionResult));
192207

193-
*outputIdTable = std::move(output).toDynamic();
208+
return idTable;
194209
}

src/engine/Bind.h

+14-18
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,8 @@
1-
//
2-
// Created by johannes on 19.04.20.
3-
//
1+
// Copyright 2020, University of Freiburg,
2+
// Chair of Algorithms and Data Structures.
3+
// Author: Johannes Kalmbach <[email protected]>
44

5-
#ifndef QLEVER_BIND_H
6-
#define QLEVER_BIND_H
5+
#pragma once
76

87
#include "engine/Operation.h"
98
#include "engine/sparqlExpressions/SparqlExpressionPimpl.h"
@@ -12,6 +11,8 @@
1211
/// BIND operation, currently only supports a very limited subset of expressions
1312
class Bind : public Operation {
1413
public:
14+
static constexpr size_t CHUNK_SIZE = 10'000;
15+
1516
Bind(QueryExecutionContext* qec, std::shared_ptr<QueryExecutionTree> subtree,
1617
parsedQuery::Bind b)
1718
: Operation(qec), _subtree(std::move(subtree)), _bind(std::move(b)) {}
@@ -37,25 +38,20 @@ class Bind : public Operation {
3738
float getMultiplicity(size_t col) override;
3839
bool knownEmptyResult() override;
3940

40-
// Returns the variable to which the expression will be bound
41-
[[nodiscard]] const string& targetVariable() const {
42-
return _bind._target.name();
43-
}
44-
4541
protected:
4642
[[nodiscard]] vector<ColumnIndex> resultSortedOn() const override;
4743

4844
private:
49-
ProtoResult computeResult([[maybe_unused]] bool requestLaziness) override;
45+
ProtoResult computeResult(bool requestLaziness) override;
46+
47+
static IdTable cloneSubView(const IdTable& idTable,
48+
const std::pair<size_t, size_t>& subrange);
5049

5150
// Implementation for the binding of arbitrary expressions.
52-
template <size_t IN_WIDTH, size_t OUT_WIDTH>
53-
void computeExpressionBind(
54-
IdTable* outputIdTable, LocalVocab* outputLocalVocab,
55-
const Result& inputResultTable,
56-
sparqlExpression::SparqlExpression* expression) const;
51+
IdTable computeExpressionBind(
52+
LocalVocab* outputLocalVocab, IdTable idTable,
53+
const LocalVocab& inputLocalVocab,
54+
const sparqlExpression::SparqlExpression* expression) const;
5755

5856
[[nodiscard]] VariableToColumnMap computeVariableToColumnMap() const override;
5957
};
60-
61-
#endif // QLEVER_BIND_H

src/engine/idTable/IdTable.h

+6
Original file line numberDiff line numberDiff line change
@@ -278,6 +278,12 @@ class IdTable {
278278
data().resize(numColumns, ColumnStorage{allocator_});
279279
}
280280

281+
// Add a new empty column to the table.
282+
void addEmptyColumn() requires columnsAreAllocatable && isDynamic {
283+
data().emplace_back(size(), allocator_);
284+
++numColumns_;
285+
}
286+
281287
// The number of rows in the table. We deliberately have an explicitly named
282288
// function `numRows` as well as a generic `size` function because the latter
283289
// can be used to write generic code, for example when using STL algorithms on

test/IdTableTest.cpp

+17
Original file line numberDiff line numberDiff line change
@@ -1119,6 +1119,23 @@ TEST(IdTable, constructorsAreSfinaeFriendly) {
11191119
static_assert(std::is_constructible_v<IntTable, size_t>);
11201120
}
11211121

1122+
// _____________________________________________________________________________
1123+
TEST(IdTable, addEmptyColumn) {
1124+
using ::testing::ElementsAre;
1125+
using ::testing::Eq;
1126+
IdTable table{1, ad_utility::makeUnlimitedAllocator<Id>()};
1127+
table.push_back({V(1)});
1128+
table.push_back({V(2)});
1129+
1130+
table.addEmptyColumn();
1131+
1132+
EXPECT_EQ(table.numColumns(), 2);
1133+
EXPECT_THAT(table.getColumn(0), ElementsAre(V(1), V(2)));
1134+
// The new column is uninitialized, so we can't make any more specific
1135+
// assertions about its content here.
1136+
EXPECT_EQ(table.getColumn(1).size(), 2);
1137+
}
1138+
11221139
// Check that we can completely instantiate `IdTable`s with a different value
11231140
// type and a different underlying storage.
11241141

0 commit comments

Comments
 (0)