Skip to content

Commit 0e8adb7

Browse files
authored
.Net: Hybrid Search (#10814)
### Motivation and Context Adding hybrid search to March release branch ### Description Adding hybrid search to March release branch ### Contribution Checklist <!-- Before submitting this PR, please make sure: --> - [ ] The code builds clean without any errors or warnings - [ ] The PR follows the [SK Contribution Guidelines](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md) and the [pre-submission formatting script](https://github.com/microsoft/semantic-kernel/blob/main/CONTRIBUTING.md#development-scripts) raises no violations - [ ] All unit tests pass, and I have added new tests where possible - [ ] I didn't break anyone 😄
1 parent be79369 commit 0e8adb7

File tree

45 files changed

+2234
-1485
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

45 files changed

+2234
-1485
lines changed

docs/decisions/00NN-hybrid-search.md

+395
Large diffs are not rendered by default.

dotnet/Directory.Packages.props

+1-1
Original file line numberDiff line numberDiff line change
@@ -136,7 +136,7 @@
136136
<PackageVersion Include="YamlDotNet" Version="15.3.0" />
137137
<PackageVersion Include="Fluid.Core" Version="2.11.1" />
138138
<!-- Memory stores -->
139-
<PackageVersion Include="Microsoft.Azure.Cosmos" Version="3.46.1" />
139+
<PackageVersion Include="Microsoft.Azure.Cosmos" Version="3.48.0-preview.0" />
140140
<PackageVersion Include="Pgvector" Version="0.2.0" />
141141
<PackageVersion Include="NRedisStack" Version="0.12.0" />
142142
<PackageVersion Include="Milvus.Client" Version="2.3.0-preview.1" />

dotnet/src/Connectors/Connectors.AzureCosmosDBMongoDB.UnitTests/AzureCosmosDBMongoDBVectorStoreRecordCollectionTests.cs

-2
Original file line numberDiff line numberDiff line change
@@ -569,8 +569,6 @@ public async Task VectorizedSearchThrowsExceptionWithInvalidVectorTypeAsync(obje
569569
}
570570

571571
[Theory]
572-
[InlineData(null, "TestEmbedding1", 1, 1)]
573-
[InlineData("", "TestEmbedding1", 2, 2)]
574572
[InlineData("TestEmbedding1", "TestEmbedding1", 3, 3)]
575573
[InlineData("TestEmbedding2", "test_embedding_2", 4, 4)]
576574
public async Task VectorizedSearchUsesValidQueryAsync(

dotnet/src/Connectors/Connectors.AzureCosmosDBNoSQL.UnitTests/AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilderTests.cs

+88-42
Original file line numberDiff line numberDiff line change
@@ -37,16 +37,19 @@ public void BuildSearchQueryByDefaultReturnsValidQueryDefinition()
3737
.EqualTo("TestProperty2", "test-value-2")
3838
.AnyTagEqualTo("TestProperty3", "test-value-3");
3939

40-
var searchOptions = new VectorSearchOptions<DummyType> { OldFilter = filter, Skip = 5, Top = 10 };
41-
4240
// Act
43-
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery(
41+
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery<ReadOnlyMemory<float>, DummyType>(
4442
vector,
43+
keywords: null,
4544
fields,
4645
this._storagePropertyNames,
4746
vectorPropertyName,
47+
textPropertyName: null,
4848
ScorePropertyName,
49-
searchOptions);
49+
oldFilter: filter,
50+
filter: null,
51+
10,
52+
5);
5053

5154
var queryText = queryDefinition.QueryText;
5255
var queryParameters = queryDefinition.GetQueryParameters();
@@ -56,22 +59,16 @@ public void BuildSearchQueryByDefaultReturnsValidQueryDefinition()
5659
Assert.Contains("FROM x", queryText);
5760
Assert.Contains("WHERE x.test_property_2 = @cv0 AND ARRAY_CONTAINS(x.test_property_3, @cv1)", queryText);
5861
Assert.Contains("ORDER BY VectorDistance(x.test_property_1, @vector)", queryText);
59-
Assert.Contains("OFFSET @offset LIMIT @limit", queryText);
62+
Assert.Contains("OFFSET 5 LIMIT 10", queryText);
6063

6164
Assert.Equal("@vector", queryParameters[0].Name);
6265
Assert.Equal(vector, queryParameters[0].Value);
6366

64-
Assert.Equal("@offset", queryParameters[1].Name);
65-
Assert.Equal(5, queryParameters[1].Value);
66-
67-
Assert.Equal("@limit", queryParameters[2].Name);
68-
Assert.Equal(10, queryParameters[2].Value);
69-
70-
Assert.Equal("@cv0", queryParameters[3].Name);
71-
Assert.Equal("test-value-2", queryParameters[3].Value);
67+
Assert.Equal("@cv0", queryParameters[1].Name);
68+
Assert.Equal("test-value-2", queryParameters[1].Value);
7269

73-
Assert.Equal("@cv1", queryParameters[4].Name);
74-
Assert.Equal("test-value-3", queryParameters[4].Value);
70+
Assert.Equal("@cv1", queryParameters[2].Name);
71+
Assert.Equal("test-value-3", queryParameters[2].Value);
7572
}
7673

7774
[Fact]
@@ -86,39 +83,39 @@ public void BuildSearchQueryWithoutOffsetReturnsQueryDefinitionWithTopParameter(
8683
.EqualTo("TestProperty2", "test-value-2")
8784
.AnyTagEqualTo("TestProperty3", "test-value-3");
8885

89-
var searchOptions = new VectorSearchOptions<DummyType> { OldFilter = filter, Top = 10 };
90-
9186
// Act
92-
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery(
87+
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery<ReadOnlyMemory<float>, DummyType>(
9388
vector,
89+
keywords: null,
9490
fields,
9591
this._storagePropertyNames,
9692
vectorPropertyName,
93+
textPropertyName: null,
9794
ScorePropertyName,
98-
searchOptions);
95+
oldFilter: filter,
96+
filter: null,
97+
10,
98+
0);
9999

100100
var queryText = queryDefinition.QueryText;
101101
var queryParameters = queryDefinition.GetQueryParameters();
102102

103103
// Assert
104-
Assert.Contains("SELECT TOP @top x.test_property_1,x.test_property_2,x.test_property_3,VectorDistance(x.test_property_1, @vector) AS TestScore", queryText);
104+
Assert.Contains("SELECT TOP 10 x.test_property_1,x.test_property_2,x.test_property_3,VectorDistance(x.test_property_1, @vector) AS TestScore", queryText);
105105
Assert.Contains("FROM x", queryText);
106106
Assert.Contains("WHERE x.test_property_2 = @cv0 AND ARRAY_CONTAINS(x.test_property_3, @cv1)", queryText);
107107
Assert.Contains("ORDER BY VectorDistance(x.test_property_1, @vector)", queryText);
108108

109-
Assert.DoesNotContain("OFFSET @offset LIMIT @limit", queryText);
109+
Assert.DoesNotContain("OFFSET 0 LIMIT 10", queryText);
110110

111111
Assert.Equal("@vector", queryParameters[0].Name);
112112
Assert.Equal(vector, queryParameters[0].Value);
113113

114-
Assert.Equal("@top", queryParameters[1].Name);
115-
Assert.Equal(10, queryParameters[1].Value);
116-
117-
Assert.Equal("@cv0", queryParameters[2].Name);
118-
Assert.Equal("test-value-2", queryParameters[2].Value);
114+
Assert.Equal("@cv0", queryParameters[1].Name);
115+
Assert.Equal("test-value-2", queryParameters[1].Value);
119116

120-
Assert.Equal("@cv1", queryParameters[3].Name);
121-
Assert.Equal("test-value-3", queryParameters[3].Value);
117+
Assert.Equal("@cv1", queryParameters[2].Name);
118+
Assert.Equal("test-value-3", queryParameters[2].Value);
122119
}
123120

124121
[Fact]
@@ -131,17 +128,20 @@ public void BuildSearchQueryWithInvalidFilterThrowsException()
131128

132129
var filter = new VectorSearchFilter().EqualTo("non-existent-property", "test-value-2");
133130

134-
var searchOptions = new VectorSearchOptions<DummyType> { OldFilter = filter, Skip = 5, Top = 10 };
135-
136131
// Act & Assert
137132
Assert.Throws<InvalidOperationException>(() =>
138-
AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery(
133+
AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery<ReadOnlyMemory<float>, DummyType>(
139134
vector,
135+
keywords: null,
140136
fields,
141137
this._storagePropertyNames,
142138
vectorPropertyName,
139+
textPropertyName: null,
143140
ScorePropertyName,
144-
searchOptions));
141+
oldFilter: filter,
142+
filter: null,
143+
10,
144+
5));
145145
}
146146

147147
[Fact]
@@ -152,31 +152,29 @@ public void BuildSearchQueryWithoutFilterDoesNotContainWhereClause()
152152
var vectorPropertyName = "test_property_1";
153153
var fields = this._storagePropertyNames.Values.ToList();
154154

155-
var searchOptions = new VectorSearchOptions<DummyType> { Skip = 5, Top = 10 };
156-
157155
// Act
158-
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery(
156+
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery<ReadOnlyMemory<float>, DummyType>(
159157
vector,
158+
keywords: null,
160159
fields,
161160
this._storagePropertyNames,
162161
vectorPropertyName,
162+
textPropertyName: null,
163163
ScorePropertyName,
164-
searchOptions);
164+
oldFilter: null,
165+
filter: null,
166+
10,
167+
5);
165168

166169
var queryText = queryDefinition.QueryText;
167170
var queryParameters = queryDefinition.GetQueryParameters();
168171

169172
// Assert
170173
Assert.DoesNotContain("WHERE", queryText);
174+
Assert.Contains("OFFSET 5 LIMIT 10", queryText);
171175

172176
Assert.Equal("@vector", queryParameters[0].Name);
173177
Assert.Equal(vector, queryParameters[0].Value);
174-
175-
Assert.Equal("@offset", queryParameters[1].Name);
176-
Assert.Equal(5, queryParameters[1].Value);
177-
178-
Assert.Equal("@limit", queryParameters[2].Name);
179-
Assert.Equal(10, queryParameters[2].Value);
180178
}
181179

182180
[Fact]
@@ -215,6 +213,54 @@ FROM x
215213
Assert.Equal("partition_key", queryParameters[1].Value);
216214
}
217215

216+
[Fact]
217+
public void BuildSearchQueryWithHybridFieldsReturnsValidHybridQueryDefinition()
218+
{
219+
// Arrange
220+
var vector = new ReadOnlyMemory<float>([1f, 2f, 3f]);
221+
var keywordText = "hybrid";
222+
var vectorPropertyName = "test_property_1";
223+
var textPropertyName = "test_property_2";
224+
var fields = this._storagePropertyNames.Values.ToList();
225+
226+
var filter = new VectorSearchFilter()
227+
.EqualTo("TestProperty2", "test-value-2")
228+
.AnyTagEqualTo("TestProperty3", "test-value-3");
229+
230+
// Act
231+
var queryDefinition = AzureCosmosDBNoSQLVectorStoreCollectionQueryBuilder.BuildSearchQuery<ReadOnlyMemory<float>, DummyType>(
232+
vector,
233+
[keywordText],
234+
fields,
235+
this._storagePropertyNames,
236+
vectorPropertyName,
237+
textPropertyName,
238+
ScorePropertyName,
239+
oldFilter: filter,
240+
filter: null,
241+
10,
242+
5);
243+
244+
var queryText = queryDefinition.QueryText;
245+
var queryParameters = queryDefinition.GetQueryParameters();
246+
247+
// Assert
248+
Assert.Contains("SELECT x.test_property_1,x.test_property_2,x.test_property_3,VectorDistance(x.test_property_1, @vector) AS TestScore", queryText);
249+
Assert.Contains("FROM x", queryText);
250+
Assert.Contains("WHERE x.test_property_2 = @cv0 AND ARRAY_CONTAINS(x.test_property_3, @cv1)", queryText);
251+
Assert.Contains("ORDER BY RANK RRF(VectorDistance(x.test_property_1, @vector), FullTextScore(x.test_property_2, [\"hybrid\"]))", queryText);
252+
Assert.Contains("OFFSET 5 LIMIT 10", queryText);
253+
254+
Assert.Equal("@vector", queryParameters[0].Name);
255+
Assert.Equal(vector, queryParameters[0].Value);
256+
257+
Assert.Equal("@cv0", queryParameters[1].Name);
258+
Assert.Equal("test-value-2", queryParameters[1].Value);
259+
260+
Assert.Equal("@cv1", queryParameters[2].Name);
261+
Assert.Equal("test-value-3", queryParameters[2].Value);
262+
}
263+
218264
#pragma warning disable CA1812 // An internal class that is apparently never instantiated. If so, remove the code from the assembly.
219265
private sealed class DummyType;
220266
#pragma warning restore CA1812

0 commit comments

Comments
 (0)