Skip to content

Commit 1f31849

Browse files
jed326Jay Deng
authored andcommitted
Correctly calculate doc count error at the slice level for concurrent segment search
Signed-off-by: Jay Deng <[email protected]>
1 parent 6a01d2f commit 1f31849

File tree

22 files changed

+471
-68
lines changed

22 files changed

+471
-68
lines changed

benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/TermsReduceBenchmark.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -178,8 +178,8 @@ private StringTerms newTerms(Random rand, BytesRef[] dict, boolean withNested) {
178178
0,
179179
buckets,
180180
0,
181-
new TermsAggregator.BucketCountThresholds(1, 0, topNSize, numShards)
182-
);
181+
new TermsAggregator.BucketCountThresholds(1, 0, topNSize, numShards),
182+
false);
183183
}
184184

185185
@Override

benchmarks/src/main/java/org/opensearch/benchmark/search/aggregations/bucket/terms/StringTermsSerializationBenchmark.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,8 @@ private StringTerms newTerms(boolean withNested) {
9494
100000,
9595
resultBuckets,
9696
0,
97-
new TermsAggregator.BucketCountThresholds(1, 0, buckets, buckets)
98-
);
97+
new TermsAggregator.BucketCountThresholds(1, 0, buckets, buckets),
98+
false);
9999
}
100100

101101
@Benchmark

server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/TermsFixedDocCountErrorIT.java

Lines changed: 301 additions & 0 deletions
Large diffs are not rendered by default.

server/src/internalClusterTest/java/org/opensearch/search/aggregations/bucket/TermsShardMinDocCountIT.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -131,7 +131,7 @@ public void testShardMinDocCountSignificantTermsTest() throws Exception {
131131
(filter("inclass", QueryBuilders.termQuery("class", true))).subAggregation(
132132
significantTerms("mySignificantTerms").field("text")
133133
.minDocCount(2)
134-
.shardSize(2)
134+
.shardSize(10)
135135
.shardMinDocCount(2)
136136
.size(2)
137137
.executionHint(randomExecutionHint())
@@ -198,7 +198,7 @@ public void testShardMinDocCountTermsTest() throws Exception {
198198
.minDocCount(2)
199199
.shardMinDocCount(2)
200200
.size(2)
201-
.shardSize(2)
201+
.shardSize(10)
202202
.executionHint(randomExecutionHint())
203203
.order(BucketOrder.key(true))
204204
)

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/AbstractStringTermsAggregator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,8 @@ protected StringTerms buildEmptyTermsAggregation() {
8383
0,
8484
emptyList(),
8585
0,
86-
bucketCountThresholds
86+
bucketCountThresholds,
87+
false
8788
);
8889
}
8990

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/DoubleTerms.java

Lines changed: 16 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -137,7 +137,8 @@ public DoubleTerms(
137137
long otherDocCount,
138138
List<Bucket> buckets,
139139
long docCountError,
140-
TermsAggregator.BucketCountThresholds bucketCountThresholds
140+
TermsAggregator.BucketCountThresholds bucketCountThresholds,
141+
boolean hasSliceLevelDocCountError
141142
) {
142143
super(
143144
name,
@@ -150,7 +151,8 @@ public DoubleTerms(
150151
otherDocCount,
151152
buckets,
152153
docCountError,
153-
bucketCountThresholds
154+
bucketCountThresholds,
155+
hasSliceLevelDocCountError
154156
);
155157
}
156158

@@ -179,7 +181,8 @@ public DoubleTerms create(List<Bucket> buckets) {
179181
otherDocCount,
180182
buckets,
181183
docCountError,
182-
bucketCountThresholds
184+
bucketCountThresholds,
185+
hasSliceLevelDocCountError
183186
);
184187
}
185188

@@ -196,7 +199,14 @@ public Bucket createBucket(InternalAggregations aggregations, Bucket prototype)
196199
}
197200

198201
@Override
199-
protected DoubleTerms create(String name, List<Bucket> buckets, BucketOrder reduceOrder, long docCountError, long otherDocCount) {
202+
protected DoubleTerms create(
203+
String name,
204+
List<Bucket> buckets,
205+
BucketOrder reduceOrder,
206+
long docCountError,
207+
long otherDocCount,
208+
boolean hasSliceLevelDocCountError
209+
) {
200210
return new DoubleTerms(
201211
name,
202212
reduceOrder,
@@ -208,7 +218,8 @@ protected DoubleTerms create(String name, List<Bucket> buckets, BucketOrder redu
208218
otherDocCount,
209219
buckets,
210220
docCountError,
211-
bucketCountThresholds
221+
bucketCountThresholds,
222+
hasSliceLevelDocCountError
212223
);
213224
}
214225

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/GlobalOrdinalsStringTermsAggregator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -808,7 +808,8 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu
808808
otherDocCount,
809809
Arrays.asList(topBuckets),
810810
0,
811-
bucketCountThresholds
811+
bucketCountThresholds,
812+
false
812813
);
813814
}
814815

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/InternalMappedTerms.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -71,9 +71,10 @@ protected InternalMappedTerms(
7171
long otherDocCount,
7272
List<B> buckets,
7373
long docCountError,
74-
TermsAggregator.BucketCountThresholds bucketCountThresholds
74+
TermsAggregator.BucketCountThresholds bucketCountThresholds,
75+
boolean hasSliceLevelDocCountError
7576
) {
76-
super(name, reduceOrder, order, bucketCountThresholds, metadata);
77+
super(name, reduceOrder, order, bucketCountThresholds, metadata, hasSliceLevelDocCountError);
7778
this.format = format;
7879
this.shardSize = shardSize;
7980
this.showTermDocCountError = showTermDocCountError;

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/InternalMultiTerms.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ public InternalMultiTerms(
242242
List<Bucket> buckets,
243243
TermsAggregator.BucketCountThresholds bucketCountThresholds
244244
) {
245-
super(name, reduceOrder, order, bucketCountThresholds, metadata);
245+
super(name, reduceOrder, order, bucketCountThresholds, metadata, false);
246246
this.shardSize = shardSize;
247247
this.showTermDocCountError = showTermDocCountError;
248248
this.otherDocCount = otherDocCount;
@@ -349,7 +349,8 @@ protected InternalMultiTerms create(
349349
List<Bucket> buckets,
350350
BucketOrder reduceOrder,
351351
long docCountError,
352-
long otherDocCount
352+
long otherDocCount,
353+
boolean hasSliceLevelDocCountError
353354
) {
354355
return new InternalMultiTerms(
355356
name,

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/InternalTerms.java

Lines changed: 49 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -225,28 +225,33 @@ public int hashCode() {
225225
protected final int requiredSize;
226226
protected final long minDocCount;
227227
protected final TermsAggregator.BucketCountThresholds bucketCountThresholds;
228+
protected boolean hasSliceLevelDocCountError = false;
228229

229230
/**
230231
* Creates a new {@link InternalTerms}
231-
* @param name The name of the aggregation
232-
* @param reduceOrder The {@link BucketOrder} that should be used to merge shard results.
233-
* @param order The {@link BucketOrder} that should be used to sort the final reduce.
234-
* @param bucketCountThresholds Object containing values for minDocCount, shardMinDocCount, size, shardSize.
235-
* @param metadata The metadata associated with the aggregation.
232+
*
233+
* @param name The name of the aggregation
234+
* @param reduceOrder The {@link org.opensearch.search.aggregations.BucketOrder} that should be used to merge shard results.
235+
* @param order The {@link org.opensearch.search.aggregations.BucketOrder} that should be used to sort the final reduce.
236+
* @param bucketCountThresholds Object containing values for minDocCount, shardMinDocCount, size, shardSize.
237+
* @param metadata The metadata associated with the aggregation.
238+
* @param hasSliceLevelDocCountError
236239
*/
237240
protected InternalTerms(
238241
String name,
239242
BucketOrder reduceOrder,
240243
BucketOrder order,
241244
TermsAggregator.BucketCountThresholds bucketCountThresholds,
242-
Map<String, Object> metadata
245+
Map<String, Object> metadata,
246+
boolean hasSliceLevelDocCountError
243247
) {
244248
super(name, metadata);
245249
this.reduceOrder = reduceOrder;
246250
this.order = order;
247251
this.bucketCountThresholds = bucketCountThresholds;
248252
this.requiredSize = bucketCountThresholds.getRequiredSize();
249253
this.minDocCount = bucketCountThresholds.getMinDocCount();
254+
this.hasSliceLevelDocCountError = hasSliceLevelDocCountError;
250255
}
251256

252257
/**
@@ -299,16 +304,19 @@ private BucketOrder getReduceOrder(List<InternalAggregation> aggregations) {
299304

300305
private long getDocCountError(InternalTerms<?, ?> terms, ReduceContext reduceContext) {
301306
int size = terms.getBuckets().size();
302-
// doc_count_error is always computed at the coordinator based on the buckets returned by the shards. This should be 0 during the
303-
// shard level reduce as no buckets are being pruned at this stage.
304-
if (reduceContext.isSliceLevel() || size == 0 || size < terms.getShardSize() || isKeyOrder(terms.order)) {
307+
// TODO: I think this can be size <= terms.getShardSize() but need to validate
308+
if (size == 0 || size < terms.getShardSize() || isKeyOrder(terms.order)) {
305309
return 0;
306310
} else if (InternalOrder.isCountDesc(terms.order)) {
307311
if (terms.getDocCountError() > 0) {
308312
// If there is an existing docCountError for this agg then
309313
// use this as the error for this aggregation
310314
return terms.getDocCountError();
311315
} else {
316+
// We need a way to indicate to the coordinator that doc count error was gathered at the slice level, so do that here
317+
if (reduceContext.isSliceLevel()) {
318+
hasSliceLevelDocCountError = true;
319+
}
312320
// otherwise use the doc count of the last term in the
313321
// aggregation
314322
return terms.getBuckets().stream().mapToLong(MultiBucketsAggregation.Bucket::getDocCount).min().getAsLong();
@@ -500,14 +508,35 @@ For backward compatibility, we disable the merge sort and use ({@link InternalTe
500508
if (sumDocCountError == -1) {
501509
docCountError = -1;
502510
} else {
503-
docCountError = aggregations.size() == 1 ? 0 : sumDocCountError;
511+
// If there is doc count error originating from slice_size that needs to be handled differently:
512+
// If there is slice level doc count error then that needs to be propagated to the top level doc count error even if no
513+
// additional error is introduced by shard_size -- in other words the 1 shard case
514+
// However, if there is only 1 slice, then we can set the doc count error to 0 and disregard any slice level doc count error,
515+
// which is what the shards did before.
516+
if (reduceContext.isFinalReduce() && hasSliceLevelDocCountError) {
517+
docCountError = sumDocCountError;
518+
} else {
519+
if (aggregations.size() == 1) {
520+
docCountError = 0;
521+
hasSliceLevelDocCountError = false;
522+
} else {
523+
docCountError = sumDocCountError;
524+
}
525+
}
504526
}
505527

506528
// Shards must return buckets sorted by key, so we apply the sort here in shard level reduce
507529
if (reduceContext.isSliceLevel()) {
508530
Arrays.sort(list, thisReduceOrder.comparator());
509531
}
510-
return create(name, Arrays.asList(list), reduceContext.isFinalReduce() ? order : thisReduceOrder, docCountError, otherDocCount);
532+
return create(
533+
name,
534+
Arrays.asList(list),
535+
reduceContext.isFinalReduce() ? order : thisReduceOrder,
536+
docCountError,
537+
otherDocCount,
538+
hasSliceLevelDocCountError
539+
);
511540
}
512541

513542
@Override
@@ -523,7 +552,7 @@ protected B reduceBucket(List<B> buckets, ReduceContext context) {
523552
for (B bucket : buckets) {
524553
docCount += bucket.getDocCount();
525554
if (docCountError != -1) {
526-
if (bucket.showDocCountError() == false || bucket.getDocCountError() == -1) {
555+
if (bucket.showDocCountError() == false) {
527556
docCountError = -1;
528557
} else {
529558
docCountError += bucket.getDocCountError();
@@ -539,7 +568,14 @@ protected B reduceBucket(List<B> buckets, ReduceContext context) {
539568

540569
protected abstract int getShardSize();
541570

542-
protected abstract A create(String name, List<B> buckets, BucketOrder reduceOrder, long docCountError, long otherDocCount);
571+
protected abstract A create(
572+
String name,
573+
List<B> buckets,
574+
BucketOrder reduceOrder,
575+
long docCountError,
576+
long otherDocCount,
577+
boolean hasSliceLevelDocCountError
578+
);
543579

544580
/**
545581
* Create an array to hold some buckets. Used in collecting the results.

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/LongTerms.java

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,8 @@ public LongTerms(
149149
long otherDocCount,
150150
List<Bucket> buckets,
151151
long docCountError,
152-
TermsAggregator.BucketCountThresholds bucketCountThresholds
152+
TermsAggregator.BucketCountThresholds bucketCountThresholds,
153+
boolean hasSliceLevelDocCountError
153154
) {
154155
super(
155156
name,
@@ -162,7 +163,8 @@ public LongTerms(
162163
otherDocCount,
163164
buckets,
164165
docCountError,
165-
bucketCountThresholds
166+
bucketCountThresholds,
167+
hasSliceLevelDocCountError
166168
);
167169
}
168170

@@ -191,7 +193,8 @@ public LongTerms create(List<Bucket> buckets) {
191193
otherDocCount,
192194
buckets,
193195
docCountError,
194-
bucketCountThresholds
196+
bucketCountThresholds,
197+
hasSliceLevelDocCountError
195198
);
196199
}
197200

@@ -208,7 +211,14 @@ public Bucket createBucket(InternalAggregations aggregations, Bucket prototype)
208211
}
209212

210213
@Override
211-
protected LongTerms create(String name, List<Bucket> buckets, BucketOrder reduceOrder, long docCountError, long otherDocCount) {
214+
protected LongTerms create(
215+
String name,
216+
List<Bucket> buckets,
217+
BucketOrder reduceOrder,
218+
long docCountError,
219+
long otherDocCount,
220+
boolean hasSliceLevelDocCountError
221+
) {
212222
return new LongTerms(
213223
name,
214224
reduceOrder,
@@ -220,7 +230,8 @@ protected LongTerms create(String name, List<Bucket> buckets, BucketOrder reduce
220230
otherDocCount,
221231
buckets,
222232
docCountError,
223-
bucketCountThresholds
233+
bucketCountThresholds,
234+
hasSliceLevelDocCountError
224235
);
225236
}
226237

@@ -296,7 +307,8 @@ static DoubleTerms convertLongTermsToDouble(LongTerms longTerms, DocValueFormat
296307
longTerms.otherDocCount,
297308
newBuckets,
298309
longTerms.docCountError,
299-
longTerms.bucketCountThresholds
310+
longTerms.bucketCountThresholds,
311+
longTerms.hasSliceLevelDocCountError
300312
);
301313
}
302314
}

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/MapStringTermsAggregator.java

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -463,7 +463,8 @@ StringTerms buildResult(long owningBucketOrd, long otherDocCount, StringTerms.Bu
463463
otherDocCount,
464464
Arrays.asList(topBuckets),
465465
0,
466-
bucketCountThresholds
466+
bucketCountThresholds,
467+
false
467468
);
468469
}
469470

server/src/main/java/org/opensearch/search/aggregations/bucket/terms/NumericTermsAggregator.java

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -404,7 +404,8 @@ LongTerms buildResult(long owningBucketOrd, long otherDocCount, LongTerms.Bucket
404404
otherDocCount,
405405
List.of(topBuckets),
406406
0,
407-
bucketCountThresholds
407+
bucketCountThresholds,
408+
false
408409
);
409410
}
410411

@@ -421,7 +422,8 @@ LongTerms buildEmptyResult() {
421422
0,
422423
emptyList(),
423424
0,
424-
bucketCountThresholds
425+
bucketCountThresholds,
426+
false
425427
);
426428
}
427429
}
@@ -484,7 +486,8 @@ DoubleTerms buildResult(long owningBucketOrd, long otherDocCount, DoubleTerms.Bu
484486
otherDocCount,
485487
List.of(topBuckets),
486488
0,
487-
bucketCountThresholds
489+
bucketCountThresholds,
490+
false
488491
);
489492
}
490493

@@ -501,7 +504,8 @@ DoubleTerms buildEmptyResult() {
501504
0,
502505
emptyList(),
503506
0,
504-
bucketCountThresholds
507+
bucketCountThresholds,
508+
false
505509
);
506510
}
507511
}

0 commit comments

Comments
 (0)