Skip to content

Add MatchPhrasePrefix As Alternate Syntax for Match_Phrase_Prefix Function #164

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -220,7 +220,8 @@ public enum BuiltinFunctionName {
QUERY(FunctionName.of("query")),
MATCH_QUERY(FunctionName.of("match_query")),
MATCHQUERY(FunctionName.of("matchquery")),
MULTI_MATCH(FunctionName.of("multi_match"));
MULTI_MATCH(FunctionName.of("multi_match")),
MATCHPHRASEPREFIX(FunctionName.of("matchphraseprefix"));

private final FunctionName name;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,8 @@ public void register(BuiltinFunctionRepository repository) {
// compatibility.
repository.register(match_phrase(BuiltinFunctionName.MATCH_PHRASE));
repository.register(match_phrase(BuiltinFunctionName.MATCHPHRASE));
repository.register(match_phrase_prefix());
repository.register(match_phrase_prefix(BuiltinFunctionName.MATCH_PHRASE_PREFIX));
repository.register(match_phrase_prefix(BuiltinFunctionName.MATCHPHRASEPREFIX));
}

private static FunctionResolver match_bool_prefix() {
Expand All @@ -49,8 +50,8 @@ private static FunctionResolver match() {
return new RelevanceFunctionResolver(funcName, STRING);
}

private static FunctionResolver match_phrase_prefix() {
FunctionName funcName = BuiltinFunctionName.MATCH_PHRASE_PREFIX.getName();
private static FunctionResolver match_phrase_prefix(BuiltinFunctionName matchPhrasePrefix) {
FunctionName funcName = matchPhrasePrefix.getName();
return new RelevanceFunctionResolver(funcName, STRING);
}

Expand Down
43 changes: 43 additions & 0 deletions docs/user/dql/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2851,6 +2851,49 @@ Another example to show how to set custom values for the optional parameters::
+----------------------+--------------------------+


MATCHPHRASEPREFIX
------------

Description
>>>>>>>>>>>

``matchphraseprefix(field_expression, query_expression[, option=<option_value>]*)``

The matchphraseprefix function maps to the match_phrase_prefix query used in search engine,
to return the documents that match a provided text with a given field.
It is an alternate syntax for the `match_phrase_prefix`_ function.

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you mention that this was added for backwards compatibility

Available parameters include:

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

whitespace


- analyzer
- slop
- zero_terms_query
- max_expansions
- boost


Example with only ``field`` and ``query`` expressions, and all other parameters are set default values::

os> SELECT author, title FROM books WHERE matchphraseprefix(author, 'Alexander Mil');
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+

Another example to show how to set custom values for the optional parameters::

os> SELECT author, title FROM books WHERE matchphraseprefix(author, 'Alan Mil', slop = 2);
fetched rows / total rows = 2/2
+----------------------+--------------------------+
| author | title |
|----------------------+--------------------------|
| Alan Alexander Milne | The House at Pooh Corner |
| Alan Alexander Milne | Winnie-the-Pooh |
+----------------------+--------------------------+


MULTI_MATCH
-----------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,4 +107,90 @@ public void slop_is_3() throws IOException {
rows("taste draught gas"),
rows("taste gas"));
}

@Test
public void required_parameters_alternate_syntax() throws IOException {
String query = "SELECT Title FROM %s WHERE matchphraseprefix(Title, 'champagne be')";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("Can old flat champagne be used for vinegar?"),
rows("Elder flower champagne best to use natural yeast or add a wine yeast?"));
}

@Test
public void all_optional_parameters_alternate_syntax() throws IOException {
// The values for optional parameters are valid but arbitrary.
String query = "SELECT Title FROM %s " +
"WHERE matchphraseprefix(Title, 'flat champ', boost = 1.0, zero_terms_query='ALL', " +
"max_expansions = 2, analyzer=standard, slop=0)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Can old flat champagne be used for vinegar?"));
}

@Test
public void max_expansions_is_3_alternate_syntax() throws IOException {
// max_expansions applies to the last term in the query -- 'bottl'
// It tells OpenSearch to consider only the first 3 terms that start with 'bottl'
// In this dataset these are 'bottle-conditioning', 'bottling', 'bottles'.

String query = "SELECT Tags FROM %s " +
"WHERE matchphraseprefix(Tags, 'draught bottl', max_expansions=3)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("brewing draught bottling"),
rows("draught bottles"));
}

@Test
public void analyzer_english_alternate_syntax() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// This results in an empty query.
String query = "SELECT Title FROM %s " +
"WHERE matchphraseprefix(Title, 'in to', analyzer=english)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
assertTrue("Expect English analyzer to filter out common words 'in' and 'to'",
result.getInt("total") == 0);
}

@Test
public void analyzer_standard_alternate_syntax() throws IOException {
// Standard analyzer does not treat 'in' and 'to' as special terms.
// This results in 'to' being used as a phrase prefix given us 'Tokyo'.
String query = "SELECT Title FROM %s " +
"WHERE matchphraseprefix(Title, 'in to', analyzer=standard)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("Local microbreweries and craft beer in Tokyo"));
}

@Test
public void zero_term_query_all_alternate_syntax() throws IOException {
// English analyzer removes 'in' and 'to' as they are common words.
// zero_terms_query of 'ALL' causes all rows to be returned.
// ORDER BY ... LIMIT helps make the test understandable.
String query = "SELECT Title FROM %s" +
" WHERE matchphraseprefix(Title, 'in to', analyzer=english, zero_terms_query='ALL')" +
" ORDER BY Title DESC" +
" LIMIT 1";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("was working great, now all foam"));
}


@Test
public void slop_is_2_alternate_syntax() throws IOException {
// When slop is 2, the terms are matched exactly in the order specified.
// 'open' is used to match prefix of the next term.
String query = "SELECT Tags from %s where matchphraseprefix(Tags, 'gas ta', slop=2)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result, rows("taste gas"));
}

@Test
public void slop_is_3_alternate_syntax() throws IOException {
// When slop is 3, results will include phrases where the query terms are transposed.
String query = "SELECT Tags from %s where matchphraseprefix(Tags, 'gas ta', slop=3)";
JSONObject result = executeJdbcRequest(String.format(query, TEST_INDEX_BEER));
verifyDataRows(result,
rows("taste draught gas"),
rows("taste gas"));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ public class FilterQueryBuilder extends ExpressionNodeVisitor<QueryBuilder, Obje
.put(BuiltinFunctionName.QUERY_STRING.getName(), new QueryStringQuery())
.put(BuiltinFunctionName.MATCH_BOOL_PREFIX.getName(), new MatchBoolPrefixQuery())
.put(BuiltinFunctionName.MATCH_PHRASE_PREFIX.getName(), new MatchPhrasePrefixQuery())
.put(BuiltinFunctionName.MATCHPHRASEPREFIX.getName(), new MatchPhrasePrefixQuery())
.build();

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,20 +30,27 @@ public class MatchPhrasePrefixQueryTest {

private final DSL dsl = new ExpressionConfig().dsl(new ExpressionConfig().functionRepository());
private final MatchPhrasePrefixQuery matchPhrasePrefixQuery = new MatchPhrasePrefixQuery();
private final FunctionName matchPhrasePrefix = FunctionName.of("match_phrase_prefix");
private final FunctionName matchPhrasePrefixWithUnderscoresName =
FunctionName.of("match_phrase_prefix");
private final FunctionName matchPhrasePrefixName = FunctionName.of("matchphraseprefix");
private final FunctionName[] functionNames = {matchPhrasePrefixWithUnderscoresName, matchPhrasePrefixName};

@Test
public void test_SyntaxCheckException_when_no_arguments() {
List<Expression> arguments = List.of();
assertThrows(SyntaxCheckException.class,
() -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
assertThrows(SyntaxCheckException.class,
() -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
public void test_SyntaxCheckException_when_one_argument() {
List<Expression> arguments = List.of(dsl.namedArgument("field", "test"));
assertThrows(SyntaxCheckException.class,
() -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
assertThrows(SyntaxCheckException.class,
() -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
Expand All @@ -52,8 +59,10 @@ public void test_SyntaxCheckException_when_invalid_parameter() {
dsl.namedArgument("field", "test"),
dsl.namedArgument("query", "test2"),
dsl.namedArgument("unsupported", "3"));
Assertions.assertThrows(SemanticCheckException.class,
() -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertThrows(SemanticCheckException.class,
() -> matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
Expand All @@ -63,15 +72,19 @@ public void test_analyzer_parameter() {
dsl.namedArgument("query", "t2"),
dsl.namedArgument("analyzer", "standard")
);
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
public void build_succeeds_with_two_arguments() {
List<Expression> arguments = List.of(
dsl.namedArgument("field", "test"),
dsl.namedArgument("query", "test2"));
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
Expand All @@ -81,7 +94,9 @@ public void test_slop_parameter() {
dsl.namedArgument("query", "t2"),
dsl.namedArgument("slop", "2")
);
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
Expand All @@ -91,7 +106,9 @@ public void test_zero_terms_query_parameter() {
dsl.namedArgument("query", "t2"),
dsl.namedArgument("zero_terms_query", "ALL")
);
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
Expand All @@ -101,7 +118,9 @@ public void test_zero_terms_query_parameter_lower_case() {
dsl.namedArgument("query", "t2"),
dsl.namedArgument("zero_terms_query", "all")
);
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

@Test
Expand All @@ -111,12 +130,14 @@ public void test_boost_parameter() {
dsl.namedArgument("query", "t2"),
dsl.namedArgument("boost", "0.1")
);
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments)));
for (FunctionName funcName: functionNames) {
Assertions.assertNotNull(matchPhrasePrefixQuery.build(new MatchPhraseExpression(arguments, funcName)));
}
}

private class MatchPhraseExpression extends FunctionExpression {
public MatchPhraseExpression(List<Expression> arguments) {
super(MatchPhrasePrefixQueryTest.this.matchPhrasePrefix, arguments);
public MatchPhraseExpression(List<Expression> arguments, FunctionName funcName) {
super(funcName, arguments);
}

@Override
Expand Down
1 change: 1 addition & 0 deletions sql/src/main/antlr/OpenSearchSQLLexer.g4
Original file line number Diff line number Diff line change
Expand Up @@ -298,6 +298,7 @@ MATCH_PHRASE: 'MATCH_PHRASE';
SIMPLE_QUERY_STRING: 'SIMPLE_QUERY_STRING';
QUERY_STRING: 'QUERY_STRING';
MATCH_PHRASE_PREFIX: 'MATCH_PHRASE_PREFIX';
MATCHPHRASEPREFIX: 'MATCHPHRASEPREFIX';
MATCHQUERY: 'MATCHQUERY';
MATCH_QUERY: 'MATCH_QUERY';
MINUTE_OF_DAY: 'MINUTE_OF_DAY';
Expand Down
1 change: 1 addition & 0 deletions sql/src/main/antlr/OpenSearchSQLParser.g4
Original file line number Diff line number Diff line change
Expand Up @@ -427,6 +427,7 @@ systemFunctionName
singleFieldRelevanceFunctionName
: MATCH | MATCH_PHRASE | MATCHPHRASE
| MATCH_BOOL_PREFIX | MATCH_PHRASE_PREFIX
| MATCHPHRASEPREFIX

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: put MATCH_PHRASE_PREFIX and MATCHPHRASEPREFIX on the same line

;

multiFieldRelevanceFunctionName
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -398,7 +398,8 @@ public void canParseComplexMatchPhraseArgsTest(String query) {

@ParameterizedTest
@MethodSource({
"generateMatchPhrasePrefixQueries"
"generateMatchPhrasePrefixQueries",
"generateMatchPhrasePrefixQueries_alternateSyntax"
})
public void canParseComplexMatchPhrasePrefixQueries(String query) {
assertNotNull(parser.parse(query));
Expand Down Expand Up @@ -458,6 +459,16 @@ private static Stream<String> generateMatchPhrasePrefixQueries() {
.build());
}

private static Stream<String> generateMatchPhrasePrefixQueries_alternateSyntax() {
return generateQueries("matchphraseprefix", ImmutableMap.<String, Object[]>builder()
.put("analyzer", new String[] {"standard", "stop", "english"})
.put("slop", new Integer[] {0, 1, 2})
.put("max_expansions", new Integer[] {0, 3, 10})
.put("zero_terms_query", new String[] {"NONE", "ALL", "NULL"})
.put("boost", new Float[] {-0.5f, 1.0f, 1.2f})
.build());
}

private static Stream<String> generateQueries(String function,
Map<String, Object[]> functionArgs) {
var rand = new Random(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -469,6 +469,24 @@ public void matchPhrasePrefixAllParameters() {
);
}

@Test
public void matchPhrasePrefixAllParameters_alternateSyntax() {
assertEquals(
AstDSL.function("matchphraseprefix",
unresolvedArg("field", stringLiteral("test")),
unresolvedArg("query", stringLiteral("search query")),
unresolvedArg("slop", stringLiteral("3")),
unresolvedArg("boost", stringLiteral("1.5")),
unresolvedArg("analyzer", stringLiteral("standard")),
unresolvedArg("max_expansions", stringLiteral("4")),
unresolvedArg("zero_terms_query", stringLiteral("NONE"))
),
buildExprAst("matchphraseprefix(test, 'search query', slop = 3, boost = 1.5"
+ ", analyzer = 'standard', max_expansions = 4, zero_terms_query='NONE'"
+ ")")
);
}

@Test
public void relevanceMatch() {
assertEquals(AstDSL.function("match",
Expand Down