Skip to content

Apply boolean must_not rewrite to numeric match, term, and terms queries #18498

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 15 commits into
base: main
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
### Added
- Add support for Warm Indices Write Block on Flood Watermark breach ([#18375](https://github.com/opensearch-project/OpenSearch/pull/18375))
- Ability to run Code Coverage with Gradle and produce the jacoco reports locally ([#18509](https://github.com/opensearch-project/OpenSearch/issues/18509))
- Extend BooleanQuery must_not rewrite to numeric must, term, and terms queries ([#18498](https://github.com/opensearch-project/OpenSearch/pull/18498))

### Changed

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,18 @@
import org.opensearch.common.settings.Settings;
import org.opensearch.test.ParameterizedStaticSettingsOpenSearchIntegTestCase;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import static org.opensearch.index.query.QueryBuilders.boolQuery;
import static org.opensearch.index.query.QueryBuilders.matchAllQuery;
import static org.opensearch.index.query.QueryBuilders.matchQuery;
import static org.opensearch.index.query.QueryBuilders.rangeQuery;
import static org.opensearch.index.query.QueryBuilders.termQuery;
import static org.opensearch.index.query.QueryBuilders.termsQuery;
import static org.opensearch.search.SearchService.CLUSTER_CONCURRENT_SEGMENT_SEARCH_SETTING;
import static org.opensearch.test.hamcrest.OpenSearchAssertions.assertHitCount;

Expand Down Expand Up @@ -229,6 +234,53 @@ public void testMustNotRangeRewriteWithMoreThanOneValue() throws Exception {
assertHitCount(client().prepareSearch().setQuery(matchAllQuery()).get(), numDocs);
}

public void testMustNotNumericMatchOrTermQueryRewrite() throws Exception {
Map<Integer, Integer> statusToDocCountMap = Map.of(200, 1000, 404, 30, 500, 1, 400, 1293);
String statusField = "status";
createIndex("test");
int totalDocs = 0;
for (Map.Entry<Integer, Integer> entry : statusToDocCountMap.entrySet()) {
for (int i = 0; i < entry.getValue(); i++) {
client().prepareIndex("test").setSource(statusField, entry.getKey()).get();
}
totalDocs += entry.getValue();
}
ensureGreen();
waitForRelocation();
forceMerge();
refresh();

int excludedValue = randomFrom(statusToDocCountMap.keySet());
int expectedHitCount = totalDocs - statusToDocCountMap.get(excludedValue);

// Check the rewritten match query behaves as expected
assertHitCount(
client().prepareSearch("test").setQuery(boolQuery().mustNot(matchQuery(statusField, excludedValue))).get(),
expectedHitCount
);

// Check the rewritten term query behaves as expected
assertHitCount(
client().prepareSearch("test").setQuery(boolQuery().mustNot(termQuery(statusField, excludedValue))).get(),
expectedHitCount
);

// Check a rewritten terms query behaves as expected
List<Integer> excludedValues = new ArrayList<>();
excludedValues.add(excludedValue);
int secondExcludedValue = randomFrom(statusToDocCountMap.keySet());
expectedHitCount = totalDocs - statusToDocCountMap.get(excludedValue);
if (secondExcludedValue != excludedValue) {
excludedValues.add(secondExcludedValue);
expectedHitCount -= statusToDocCountMap.get(secondExcludedValue);
}

assertHitCount(
client().prepareSearch("test").setQuery(boolQuery().mustNot(termsQuery(statusField, excludedValues))).get(),
expectedHitCount
);
}

private String padZeros(int value, int length) {
// String.format() not allowed
String ret = Integer.toString(value);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1750,6 +1750,10 @@ public byte[] encodePoint(Number value) {
public double toDoubleValue(long value) {
return type.toDoubleValue(value);
}

public Number parse(Object value) {
return type.parse(value, coerce);
}
}

private final NumberType type;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -482,31 +482,33 @@ private boolean rewriteMustNotRangeClausesToShould(BoolQueryBuilder newBuilder,
return false;
}

QueryShardContext shardContext = getQueryShardContext(queryRewriteContext);

boolean changed = false;
// For now, only handle the case where there's exactly 1 range query for this field.
Map<String, Integer> fieldCounts = new HashMap<>();
Set<RangeQueryBuilder> rangeQueries = new HashSet<>();
Set<ComplementAwareQueryBuilder> complementAwareQueries = new HashSet<>();
for (QueryBuilder clause : mustNotClauses) {
if (clause instanceof RangeQueryBuilder rq) {
fieldCounts.merge(rq.fieldName(), 1, Integer::sum);
rangeQueries.add(rq);
if (clause instanceof ComplementAwareQueryBuilder caq) {
fieldCounts.merge(caq.fieldName(), 1, Integer::sum);
complementAwareQueries.add(caq);
}
}

for (RangeQueryBuilder rq : rangeQueries) {
String fieldName = rq.fieldName();
for (ComplementAwareQueryBuilder caq : complementAwareQueries) {
String fieldName = caq.fieldName();
if (fieldCounts.getOrDefault(fieldName, 0) == 1) {
// Check that all docs on this field have exactly 1 value, otherwise we can't perform this rewrite
if (checkAllDocsHaveOneValue(leafReaderContexts, fieldName)) {
List<RangeQueryBuilder> complement = rq.getComplement();
List<QueryBuilder> complement = caq.getComplement(shardContext);
if (complement != null) {
BoolQueryBuilder nestedBoolQuery = new BoolQueryBuilder();
nestedBoolQuery.minimumShouldMatch(1);
for (RangeQueryBuilder complementComponent : complement) {
for (QueryBuilder complementComponent : complement) {
nestedBoolQuery.should(complementComponent);
}
newBuilder.must(nestedBoolQuery);
newBuilder.mustNotClauses.remove(rq);
newBuilder.mustNotClauses.remove(caq);
changed = true;
}
}
Expand Down Expand Up @@ -534,6 +536,10 @@ private List<LeafReaderContext> getLeafReaderContexts(QueryRewriteContext queryR
return indexSearcher.getIndexReader().leaves();
}

private QueryShardContext getQueryShardContext(QueryRewriteContext queryRewriteContext) {
return queryRewriteContext == null ? null : queryRewriteContext.convertToShardContext(); // Note this can still be null
}

private boolean checkAllDocsHaveOneValue(List<LeafReaderContext> contexts, String fieldName) {
for (LeafReaderContext lrc : contexts) {
PointValues values;
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.query;

import java.util.List;

/**
* A QueryBuilder which can provide QueryBuilders that make up the complement of the original query.
*/
public interface ComplementAwareQueryBuilder extends WithFieldName {
/**
* Returns a list of RangeQueryBuilder whose elements, when combined, form the complement of this range query.
* May be null, in which case the complement couldn't be determined.
* @return the complement
*/
List<QueryBuilder> getComplement(QueryShardContext context);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/

package org.opensearch.index.query;

import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.util.ArrayList;
import java.util.List;

/**
* A class with helper functions to construct complements for some queries.
*/
public class ComplementHelperUtils {

Check warning on line 20 in server/src/main/java/org/opensearch/index/query/ComplementHelperUtils.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/ComplementHelperUtils.java#L20

Added line #L20 was not covered by tests
/**
* Get the NumberFieldType for this fieldName from the context, or null if it isn't a NumberFieldType.
*/
public static NumberFieldMapper.NumberFieldType getNumberFieldType(QueryShardContext context, String fieldName) {
if (context == null) return null;
MappedFieldType fieldType = context.fieldMapper(fieldName);
if (!(fieldType instanceof NumberFieldMapper.NumberFieldType nft)) return null;
return nft;
}

/**
* Returns a list of 2 RangeQueryBuilders matching everything but value.
*/
public static List<QueryBuilder> numberValueToComplement(String fieldName, Number value) {
List<QueryBuilder> complement = new ArrayList<>();
RangeQueryBuilder belowRange = new RangeQueryBuilder(fieldName);
belowRange.to(value);
belowRange.includeUpper(false);
complement.add(belowRange);

RangeQueryBuilder aboveRange = new RangeQueryBuilder(fieldName);
aboveRange.from(value);
aboveRange.includeLower(false);
complement.add(aboveRange);
return complement;
}

/**
* Returns a list of RangeQueryBuilders matching everything except the provided sorted values.
* if isWholeNumber == true, and two sorted values are off by 1, the range between the two of them won't appear in
* the complement since no value could match it.
*/
public static List<QueryBuilder> numberValuesToComplement(String fieldName, List<Number> sortedValues, boolean isWholeNumber) {
if (sortedValues.isEmpty()) return null;
List<QueryBuilder> complement = new ArrayList<>();
Number lastValue = null;
for (Number value : sortedValues) {
RangeQueryBuilder range = new RangeQueryBuilder(fieldName);
range.includeUpper(false);
range.to(value);
if (lastValue != null) {
// If this is a whole number field and the last value is 1 less than the current value, we can skip this part of the
// complement
if (isWholeNumber && value.longValue() - lastValue.longValue() == 1) {
continue;
}
range.includeLower(false);
range.from(lastValue);
}
complement.add(range);
lastValue = value;
}
// Finally add the last range query
RangeQueryBuilder lastRange = new RangeQueryBuilder(fieldName);
lastRange.from(sortedValues.get(sortedValues.size() - 1));
lastRange.includeLower(false);
lastRange.includeUpper(true);
complement.add(lastRange);
return complement;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -43,11 +43,13 @@
import org.opensearch.core.common.io.stream.StreamOutput;
import org.opensearch.core.xcontent.XContentBuilder;
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.mapper.NumberFieldMapper;
import org.opensearch.index.query.support.QueryParsers;
import org.opensearch.index.search.MatchQuery;
import org.opensearch.index.search.MatchQuery.ZeroTermsQuery;

import java.io.IOException;
import java.util.List;
import java.util.Objects;

/**
Expand All @@ -56,7 +58,7 @@
*
* @opensearch.internal
*/
public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> implements WithFieldName {
public class MatchQueryBuilder extends AbstractQueryBuilder<MatchQueryBuilder> implements ComplementAwareQueryBuilder {

private static final String CUTOFF_FREQUENCY_DEPRECATION_MSG = "you can omit this option, "
+ "the [match] query can skip block of documents efficiently if the total number of hits is not tracked";
Expand Down Expand Up @@ -589,4 +591,12 @@ public static MatchQueryBuilder fromXContent(XContentParser parser) throws IOExc
return matchQuery;
}

@Override
public List<QueryBuilder> getComplement(QueryShardContext context) {
// If this is a match query on a numeric field, we can provide the complement using RangeQueryBuilder.
NumberFieldMapper.NumberFieldType nft = ComplementHelperUtils.getNumberFieldType(context, fieldName);
if (nft == null) return null;
Number numberValue = nft.parse(value);
return ComplementHelperUtils.numberValueToComplement(fieldName, numberValue);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@
*
* @opensearch.internal
*/
public class RangeQueryBuilder extends AbstractQueryBuilder<RangeQueryBuilder> implements MultiTermQueryBuilder {
public class RangeQueryBuilder extends AbstractQueryBuilder<RangeQueryBuilder>
implements
MultiTermQueryBuilder,
ComplementAwareQueryBuilder {
public static final String NAME = "range";

public static final boolean DEFAULT_INCLUDE_UPPER = true;
Expand Down Expand Up @@ -545,16 +548,13 @@
&& Objects.equals(format, other.format);
}

/**
* Returns a list of RangeQueryBuilder whose elements, when combined, form the complement of this range query.
* May be null.
* @return the complement
*/
public List<RangeQueryBuilder> getComplement() {
@Override
public List<QueryBuilder> getComplement(QueryShardContext context) {
// This implementation doesn't need info from QueryShardContext
if (relation != null && relation != ShapeRelation.INTERSECTS) {
return null;
}
List<RangeQueryBuilder> complement = new ArrayList<>();
List<QueryBuilder> complement = new ArrayList<>();
if (from != null) {
RangeQueryBuilder belowRange = new RangeQueryBuilder(fieldName);
belowRange.to(from);
Expand All @@ -570,13 +570,13 @@
}

if (format != null) {
for (RangeQueryBuilder rq : complement) {
rq.format(format);
for (QueryBuilder rq : complement) {
((RangeQueryBuilder) rq).format(format);

Check warning on line 574 in server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java#L574

Added line #L574 was not covered by tests
}
}
if (timeZone != null) {
for (RangeQueryBuilder rq : complement) {
rq.timeZone = timeZone;
for (QueryBuilder rq : complement) {
((RangeQueryBuilder) rq).timeZone = timeZone;

Check warning on line 579 in server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java

View check run for this annotation

Codecov / codecov/patch

server/src/main/java/org/opensearch/index/query/RangeQueryBuilder.java#L579

Added line #L579 was not covered by tests
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -43,16 +43,18 @@
import org.opensearch.core.xcontent.XContentParser;
import org.opensearch.index.mapper.ConstantFieldType;
import org.opensearch.index.mapper.MappedFieldType;
import org.opensearch.index.mapper.NumberFieldMapper;

import java.io.IOException;
import java.util.List;
import java.util.Objects;

/**
* A Query that matches documents containing a term.
*
* @opensearch.internal
*/
public class TermQueryBuilder extends BaseTermQueryBuilder<TermQueryBuilder> {
public class TermQueryBuilder extends BaseTermQueryBuilder<TermQueryBuilder> implements ComplementAwareQueryBuilder {
public static final String NAME = "term";
public static final boolean DEFAULT_CASE_INSENSITIVITY = false;
private static final ParseField CASE_INSENSITIVE_FIELD = new ParseField("case_insensitive");
Expand Down Expand Up @@ -238,4 +240,12 @@ protected final boolean doEquals(TermQueryBuilder other) {
return super.doEquals(other) && Objects.equals(caseInsensitive, other.caseInsensitive);
}

@Override
public List<QueryBuilder> getComplement(QueryShardContext context) {
// If this is a term query on a numeric field, we can provide the complement using RangeQueryBuilder.
NumberFieldMapper.NumberFieldType nft = ComplementHelperUtils.getNumberFieldType(context, fieldName);
if (nft == null) return null;
Number numberValue = nft.parse(value);
return ComplementHelperUtils.numberValueToComplement(fieldName, numberValue);
}
}
Loading
Loading