Skip to content

Commit 288a08f

Browse files
mattsimplyEvergreen Agent
authored andcommitted
SERVER-73253: Better path tracking when renaming nested/compound grouping fields -- updated
1 parent 04cce47 commit 288a08f

11 files changed

+326
-11
lines changed
Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
// Tests optimization rule for pushing match stages over group stages
2+
3+
(function() {
4+
"use strict";
5+
6+
load("jstests/libs/analyze_plan.js"); // For 'aggPlanHasStage' and other explain helpers.
7+
8+
const coll = db.grouped_match_push_down;
9+
coll.drop();
10+
assert.commandWorked(coll.insert({_id: 1, x: 10}));
11+
assert.commandWorked(coll.insert({_id: 2, x: 20}));
12+
assert.commandWorked(coll.insert({_id: 3, x: 30}));
13+
14+
// Asserts end-to-end the optimization of group-project-match stage sequence which includes a rename
15+
// over a dotted path. It evaluates the correctness of the result, as well as whether the
16+
// optimization pushed the predicate down before the aggregation.
17+
function assertOptimizeMatchRenameAggregationPipelineWithDottedRename(
18+
{pipeline, expectedStages, expectedResult} = {}) {
19+
const explainOutput = coll.explain().aggregate(pipeline);
20+
21+
if (expectedStages) {
22+
for (let expectedStage of expectedStages) {
23+
assert(aggPlanHasStage(explainOutput, expectedStage), explainOutput);
24+
}
25+
}
26+
27+
if (expectedResult) {
28+
const actualResult = coll.aggregate(pipeline).toArray();
29+
assert.sameMembers(expectedResult, actualResult);
30+
}
31+
}
32+
33+
assert.commandWorked(coll.insert({_id: 20, d: 2}));
34+
// Assert that a sequence of stages group, project, match over a rename on a dotted path (depth 3)
35+
// will push the predicate before the group stage.
36+
assertOptimizeMatchRenameAggregationPipelineWithDottedRename({
37+
pipeline: [
38+
{$group: {_id: {c: '$d'}, c: {$sum: {$const: 1}}}},
39+
{$project: {m: '$_id.c'}},
40+
{$match: {m: {$eq: 2}}}
41+
],
42+
expectedStages: ["GROUP"],
43+
expectedResult: [{"_id": {"c": 2}, "m": 2}]
44+
});
45+
46+
// Assert that the optimization over group, project, match over a renamed dotted path will not
47+
// push down the predicate over multiple projection stages.
48+
assertOptimizeMatchRenameAggregationPipelineWithDottedRename({
49+
pipeline: [
50+
{$group: {_id: {c: '$d'}, c: {$sum: {$const: 1}}}},
51+
{$project: {m: '$_id.c'}},
52+
{$project: {m2: '$m'}},
53+
{$match: {m2: {$eq: 2}}}
54+
],
55+
// the optimization will not push the filter, thus, the query will perform a COLLSCAN. The
56+
// system does not have an index to use.
57+
expectedStages: ["COLLSCAN"],
58+
expectedResult: [{"_id": {"c": 2}, "m2": 2}]
59+
});
60+
61+
// Assert that the optimization over group, project, match over a renamed dotted path will not
62+
// push down the predicate when the rename stage renames a dotted path with depth more than 3.
63+
assertOptimizeMatchRenameAggregationPipelineWithDottedRename({
64+
pipeline: [
65+
{$group: {_id: {c: {d: '$d'}}, c: {$sum: {$const: 1}}}},
66+
{$project: {m: '$_id.c.d'}},
67+
{$match: {m: {$eq: 2}}}
68+
],
69+
expectedStages: ["COLLSCAN"],
70+
expectedResult: [{"_id": {"c": {"d": 2}}, "m": 2}]
71+
});
72+
}());

src/mongo/db/exec/add_fields_projection_executor.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -140,10 +140,12 @@ class AddFieldsProjectionExecutor : public ProjectionExecutor {
140140
DocumentSource::GetModPathsReturn getModifiedPaths() const final {
141141
OrderedPathSet computedPaths;
142142
StringMap<std::string> renamedPaths;
143-
_root->reportComputedPaths(&computedPaths, &renamedPaths);
143+
StringMap<std::string> complexRenamedPaths;
144+
_root->reportComputedPaths(&computedPaths, &renamedPaths, &complexRenamedPaths);
144145
return {DocumentSource::GetModPathsReturn::Type::kFiniteSet,
145146
std::move(computedPaths),
146-
std::move(renamedPaths)};
147+
std::move(renamedPaths),
148+
std::move(complexRenamedPaths)};
147149
}
148150

149151
/**

src/mongo/db/exec/inclusion_projection_executor.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -304,11 +304,13 @@ class InclusionProjectionExecutor : public ProjectionExecutor {
304304

305305
OrderedPathSet computedPaths;
306306
StringMap<std::string> renamedPaths;
307-
_root->reportComputedPaths(&computedPaths, &renamedPaths);
307+
StringMap<std::string> complexRenamedPaths;
308+
_root->reportComputedPaths(&computedPaths, &renamedPaths, &complexRenamedPaths);
308309

309310
return {DocumentSource::GetModPathsReturn::Type::kAllExcept,
310311
std::move(preservedPaths),
311-
std::move(renamedPaths)};
312+
std::move(renamedPaths),
313+
std::move(complexRenamedPaths)};
312314
}
313315

314316
/**

src/mongo/db/exec/projection_node.cpp

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,8 @@ void ProjectionNode::reportProjectedPaths(OrderedPathSet* projectedPaths) const
259259
}
260260

261261
void ProjectionNode::reportComputedPaths(OrderedPathSet* computedPaths,
262-
StringMap<std::string>* renamedPaths) const {
262+
StringMap<std::string>* renamedPaths,
263+
StringMap<std::string>* complexRenamedPaths) const {
263264
for (auto&& computedPair : _expressions) {
264265
// The expression's path is the concatenation of the path to this node, plus the field name
265266
// associated with the expression.
@@ -270,9 +271,15 @@ void ProjectionNode::reportComputedPaths(OrderedPathSet* computedPaths,
270271
for (auto&& rename : exprComputedPaths.renames) {
271272
(*renamedPaths)[rename.first] = rename.second;
272273
}
274+
275+
if (complexRenamedPaths) {
276+
for (auto&& complexRename : exprComputedPaths.complexRenames) {
277+
(*complexRenamedPaths)[complexRename.first] = complexRename.second;
278+
}
279+
}
273280
}
274281
for (auto&& childPair : _children) {
275-
childPair.second->reportComputedPaths(computedPaths, renamedPaths);
282+
childPair.second->reportComputedPaths(computedPaths, renamedPaths, complexRenamedPaths);
276283
}
277284
}
278285

src/mongo/db/exec/projection_node.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -139,10 +139,14 @@ class ProjectionNode {
139139
*
140140
* Computed paths that are identified as the result of a simple rename are instead filled out in
141141
* 'renamedPaths'. Each entry in 'renamedPaths' maps from the path's new name to its old name
142-
* prior to application of this projection.
142+
* prior to application of this projection. 'complexRenamedPaths' is an optional parameter that
143+
* acts exactly as the 'renamedPaths' map and includes renames whose old name includes dotted
144+
* paths (Note: the dotted path renames are constrained to length 3). The paths that are
145+
* included in 'complexRenamedPaths' are also included in 'computedPaths'.
143146
*/
144147
void reportComputedPaths(OrderedPathSet* computedPaths,
145-
StringMap<std::string>* renamedPaths) const;
148+
StringMap<std::string>* renamedPaths,
149+
StringMap<std::string>* complexRenamedPaths = nullptr) const;
146150

147151
const std::string& getPath() const {
148152
return _pathToNode;

src/mongo/db/pipeline/document_source.h

Lines changed: 13 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -643,8 +643,14 @@ class DocumentSource : public RefCountable {
643643
kAllExcept,
644644
};
645645

646-
GetModPathsReturn(Type type, OrderedPathSet&& paths, StringMap<std::string>&& renames)
647-
: type(type), paths(std::move(paths)), renames(std::move(renames)) {}
646+
GetModPathsReturn(Type type,
647+
OrderedPathSet&& paths,
648+
StringMap<std::string>&& renames,
649+
StringMap<std::string>&& complexRenames = {})
650+
: type(type),
651+
paths(std::move(paths)),
652+
renames(std::move(renames)),
653+
complexRenames(std::move(complexRenames)) {}
648654

649655
std::set<std::string> getNewNames() {
650656
std::set<std::string> newNames;
@@ -706,6 +712,11 @@ class DocumentSource : public RefCountable {
706712
// This stage should return kAllExcept, since it modifies all paths other than "a". It can
707713
// also fill out 'renames' with the mapping "b" => "c".
708714
StringMap<std::string> renames;
715+
716+
// Including space for returning renames which include dotted paths.
717+
// i.e., "a.b" => c
718+
//
719+
StringMap<std::string> complexRenames;
709720
};
710721

711722
/**

src/mongo/db/pipeline/document_source_group.cpp

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,10 @@
4545
#include "mongo/util/assert_util.h"
4646
#include "mongo/util/intrusive_counter.h"
4747

48+
#include "mongo/db/pipeline/document_source_match.h"
49+
#include "mongo/db/pipeline/document_source_single_document_transformation.h"
50+
#include "mongo/db/pipeline/semantic_analysis.h"
51+
4852
namespace mongo {
4953

5054
constexpr StringData DocumentSourceGroup::kStageName;
@@ -82,6 +86,88 @@ boost::intrusive_ptr<DocumentSource> DocumentSourceGroup::createFromBson(
8286
return createFromBsonWithMaxMemoryUsage(std::move(elem), expCtx, boost::none);
8387
}
8488

89+
Pipeline::SourceContainer::iterator DocumentSourceGroup::doOptimizeAt(
90+
Pipeline::SourceContainer::iterator itr, Pipeline::SourceContainer* container) {
91+
invariant(*itr == this);
92+
93+
if (pushDotRenamedMatch(itr, container)) {
94+
return itr;
95+
}
96+
97+
return std::next(itr);
98+
}
99+
100+
bool DocumentSourceGroup::pushDotRenamedMatch(Pipeline::SourceContainer::iterator itr,
101+
Pipeline::SourceContainer* container) {
102+
if (std::next(itr) == container->end() || std::next(std::next(itr)) == container->end()) {
103+
return false;
104+
}
105+
106+
// Keep separate iterators for each stage (projection, match).
107+
auto prospectiveProjectionItr = std::next(itr);
108+
auto prospectiveProjection =
109+
dynamic_cast<DocumentSourceSingleDocumentTransformation*>(prospectiveProjectionItr->get());
110+
111+
auto prospectiveMatchItr = std::next(std::next(itr));
112+
auto prospectiveMatch = dynamic_cast<DocumentSourceMatch*>(prospectiveMatchItr->get());
113+
114+
if (!prospectiveProjection || !prospectiveMatch) {
115+
return false;
116+
}
117+
118+
stdx::unordered_set<std::string> groupingFields;
119+
StringMap<std::string> relevantRenames;
120+
121+
auto itsGroup = dynamic_cast<DocumentSourceGroup*>(itr->get());
122+
123+
auto idFields = itsGroup->getIdFields();
124+
for (auto& idFieldsItr : idFields) {
125+
groupingFields.insert(idFieldsItr.first);
126+
}
127+
128+
GetModPathsReturn paths = prospectiveProjection->getModifiedPaths();
129+
130+
for (const auto& thisComplexRename : paths.complexRenames) {
131+
132+
// Check if the dotted renaming is done on a grouping field.
133+
// This ensures that the top level is flat i.e., no arrays.
134+
if (groupingFields.find(thisComplexRename.second) != groupingFields.end()) {
135+
relevantRenames.insert(std::pair<std::string, std::string>(thisComplexRename.first,
136+
thisComplexRename.second));
137+
}
138+
}
139+
140+
// Perform all changes on a copy of the match source.
141+
boost::intrusive_ptr<DocumentSource> currentMatchCopyDocument =
142+
prospectiveMatch->clone(prospectiveMatch->getContext());
143+
144+
auto currentMatchCopyDocumentMatch =
145+
dynamic_cast<DocumentSourceMatch*>(currentMatchCopyDocument.get());
146+
147+
paths.renames = std::move(relevantRenames);
148+
149+
// Translate predicate statements based on the projection renames.
150+
auto matchSplitForProject = currentMatchCopyDocumentMatch->splitMatchByModifiedFields(
151+
currentMatchCopyDocumentMatch, paths);
152+
153+
if (matchSplitForProject.first) {
154+
// Perform the swap of the projection and the match stages.
155+
container->erase(prospectiveMatchItr);
156+
container->insert(prospectiveProjectionItr, std::move(matchSplitForProject.first));
157+
158+
if (matchSplitForProject.second) {
159+
// If there is a portion of the match stage predicate that is conflicting with the
160+
// projection, re-insert it below the projection stage.
161+
container->insert(std::next(prospectiveProjectionItr),
162+
std::move(matchSplitForProject.second));
163+
}
164+
165+
return true;
166+
}
167+
168+
return false;
169+
}
170+
85171
boost::intrusive_ptr<DocumentSource> DocumentSourceGroup::createFromBsonWithMaxMemoryUsage(
86172
BSONElement elem,
87173
const boost::intrusive_ptr<ExpressionContext>& expCtx,

src/mongo/db/pipeline/document_source_group.h

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -78,13 +78,39 @@ class DocumentSourceGroup final : public DocumentSourceGroupBase {
7878
const boost::intrusive_ptr<ExpressionContext>& expCtx,
7979
boost::optional<size_t> maxMemoryUsageBytes);
8080

81+
Pipeline::SourceContainer::iterator doOptimizeAt(Pipeline::SourceContainer::iterator itr,
82+
Pipeline::SourceContainer* container) override;
83+
8184
protected:
8285
GetNextResult doGetNext() final;
8386

8487
bool isSpecFieldReserved(StringData) final {
8588
return false;
8689
}
8790

91+
/**
92+
* This optimization pushes a filter over a renamed grouping field
93+
* before the group to improve performance.
94+
*
95+
* Specifically:
96+
* $group { _id: {c: $x}, c: {aggregation}},
97+
* $project { newVar: $_id.c }
98+
* $match { newVar: "value"}
99+
* ->
100+
* $match { x: "value"}
101+
* $group { _id: {c: $x}, c: {aggregation}},
102+
* $project { newVar: $_id.c }
103+
*
104+
* Note: This optimization will not push over multiple grouping stages
105+
* or multiple rename stages. Only the last set of group, project, match
106+
* is taken into account. Furthermore, the optimization addresses specifically
107+
* the defined sequence of operations to ensure the semantics of filters over arrays. Renaming
108+
* dotted paths which include arrays change the evaluation of the filter statement and may lead
109+
* to erroneous results.
110+
*/
111+
bool pushDotRenamedMatch(Pipeline::SourceContainer::iterator itr,
112+
Pipeline::SourceContainer* container);
113+
88114
private:
89115
explicit DocumentSourceGroup(const boost::intrusive_ptr<ExpressionContext>& expCtx,
90116
boost::optional<size_t> maxMemoryUsageBytes = boost::none);

src/mongo/db/pipeline/expression.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2624,6 +2624,15 @@ Expression::ComputedPaths ExpressionFieldPath::getComputedPaths(const std::strin
26242624
if (_variable == renamingVar && _fieldPath.getPathLength() == 2u) {
26252625
outputPaths.renames[exprFieldPath] = _fieldPath.tail().fullPath();
26262626
} else {
2627+
2628+
// Add dotted renames also to complex renames, to be used prospectively in optimizations
2629+
// (e.g., pushDotRenamedMatch).
2630+
// We only include dotted rename paths of length 3, as current optimization are constrained
2631+
// to accepting only such paths to avoid semantic errors from array flattening.
2632+
if (_variable == renamingVar && _fieldPath.getPathLength() == 3u) {
2633+
outputPaths.complexRenames[exprFieldPath] = _fieldPath.tail().fullPath();
2634+
}
2635+
26272636
outputPaths.paths.insert(exprFieldPath);
26282637
}
26292638

src/mongo/db/pipeline/expression.h

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -212,8 +212,13 @@ class Expression : public RefCountable {
212212
OrderedPathSet paths;
213213

214214
// Mappings from the old name of a path before applying this expression, to the new one
215-
// after applying this expression.
215+
// after applying this expression. This map includes solely simple field renaming
216+
// expression.
216217
StringMap<std::string> renames;
218+
219+
// Mappings from the old name of a path before applying this expression. This map includes
220+
// expressions which have dotted notation on the right side.
221+
StringMap<std::string> complexRenames;
217222
};
218223

219224
virtual ~Expression(){};

0 commit comments

Comments
 (0)