@@ -164,9 +164,7 @@ def doc_index_retrieval(
164
164
top_base_chunks_standard_ranking_thread : (
165
165
TimeoutThread [list [InferenceChunkUncleaned ]] | None
166
166
) = None
167
- top_base_chunks_keyword_ranking_thread : (
168
- TimeoutThread [list [InferenceChunkUncleaned ]] | None
169
- ) = None
167
+
170
168
top_semantic_chunks_thread : TimeoutThread [list [InferenceChunkUncleaned ]] | None = (
171
169
None
172
170
)
@@ -190,20 +188,6 @@ def doc_index_retrieval(
190
188
query .offset ,
191
189
)
192
190
193
- # same query but with 1st vespa phase as keyword retrieval
194
- top_base_chunks_keyword_ranking_thread = run_in_background (
195
- document_index .hybrid_retrieval ,
196
- query .query ,
197
- query_embedding ,
198
- query .processed_keywords ,
199
- query .filters ,
200
- query .hybrid_alpha ,
201
- query .recency_bias_multiplier ,
202
- query .num_hits ,
203
- QueryExpansionType .KEYWORD ,
204
- query .offset ,
205
- )
206
-
207
191
if (
208
192
query .expanded_queries
209
193
and query .expanded_queries .keywords_expansions
@@ -264,21 +248,14 @@ def doc_index_retrieval(
264
248
top_base_chunks_standard_ranking = wait_on_background (
265
249
top_base_chunks_standard_ranking_thread
266
250
)
267
- top_base_chunks_keyword_ranking = wait_on_background (
268
- top_base_chunks_keyword_ranking_thread
269
- )
270
251
271
252
top_keyword_chunks = wait_on_background (top_keyword_chunks_thread )
272
253
273
254
if query .search_type == SearchType .SEMANTIC :
274
255
assert top_semantic_chunks_thread is not None
275
256
top_semantic_chunks = wait_on_background (top_semantic_chunks_thread )
276
257
277
- all_top_chunks = (
278
- top_base_chunks_standard_ranking
279
- + top_base_chunks_keyword_ranking
280
- + top_keyword_chunks
281
- )
258
+ all_top_chunks = top_base_chunks_standard_ranking + top_keyword_chunks
282
259
283
260
# use all three retrieval methods to retrieve top chunks
284
261
@@ -293,12 +270,8 @@ def doc_index_retrieval(
293
270
top_base_chunks_standard_ranking = wait_on_background (
294
271
top_base_chunks_standard_ranking_thread
295
272
)
296
- top_base_chunks_keyword_ranking = wait_on_background (
297
- top_base_chunks_keyword_ranking_thread
298
- )
299
- top_chunks = _dedupe_chunks (
300
- top_base_chunks_standard_ranking + top_base_chunks_keyword_ranking
301
- )
273
+
274
+ top_chunks = _dedupe_chunks (top_base_chunks_standard_ranking )
302
275
303
276
logger .info (f"Overall number of top initial retrieval chunks: { len (top_chunks )} " )
304
277
0 commit comments