Skip to content

Commit dfb9d3d

Browse files
joachim-danswerWeves
authored andcommitted
removal of keyword 1st phase
1 parent 7c0fc74 commit dfb9d3d

File tree

1 file changed

+4
-31
lines changed

1 file changed

+4
-31
lines changed

backend/onyx/context/search/retrieval/search_runner.py

+4-31
Original file line numberDiff line numberDiff line change
@@ -164,9 +164,7 @@ def doc_index_retrieval(
164164
top_base_chunks_standard_ranking_thread: (
165165
TimeoutThread[list[InferenceChunkUncleaned]] | None
166166
) = None
167-
top_base_chunks_keyword_ranking_thread: (
168-
TimeoutThread[list[InferenceChunkUncleaned]] | None
169-
) = None
167+
170168
top_semantic_chunks_thread: TimeoutThread[list[InferenceChunkUncleaned]] | None = (
171169
None
172170
)
@@ -190,20 +188,6 @@ def doc_index_retrieval(
190188
query.offset,
191189
)
192190

193-
# same query but with 1st vespa phase as keyword retrieval
194-
top_base_chunks_keyword_ranking_thread = run_in_background(
195-
document_index.hybrid_retrieval,
196-
query.query,
197-
query_embedding,
198-
query.processed_keywords,
199-
query.filters,
200-
query.hybrid_alpha,
201-
query.recency_bias_multiplier,
202-
query.num_hits,
203-
QueryExpansionType.KEYWORD,
204-
query.offset,
205-
)
206-
207191
if (
208192
query.expanded_queries
209193
and query.expanded_queries.keywords_expansions
@@ -264,21 +248,14 @@ def doc_index_retrieval(
264248
top_base_chunks_standard_ranking = wait_on_background(
265249
top_base_chunks_standard_ranking_thread
266250
)
267-
top_base_chunks_keyword_ranking = wait_on_background(
268-
top_base_chunks_keyword_ranking_thread
269-
)
270251

271252
top_keyword_chunks = wait_on_background(top_keyword_chunks_thread)
272253

273254
if query.search_type == SearchType.SEMANTIC:
274255
assert top_semantic_chunks_thread is not None
275256
top_semantic_chunks = wait_on_background(top_semantic_chunks_thread)
276257

277-
all_top_chunks = (
278-
top_base_chunks_standard_ranking
279-
+ top_base_chunks_keyword_ranking
280-
+ top_keyword_chunks
281-
)
258+
all_top_chunks = top_base_chunks_standard_ranking + top_keyword_chunks
282259

283260
# use all three retrieval methods to retrieve top chunks
284261

@@ -293,12 +270,8 @@ def doc_index_retrieval(
293270
top_base_chunks_standard_ranking = wait_on_background(
294271
top_base_chunks_standard_ranking_thread
295272
)
296-
top_base_chunks_keyword_ranking = wait_on_background(
297-
top_base_chunks_keyword_ranking_thread
298-
)
299-
top_chunks = _dedupe_chunks(
300-
top_base_chunks_standard_ranking + top_base_chunks_keyword_ranking
301-
)
273+
274+
top_chunks = _dedupe_chunks(top_base_chunks_standard_ranking)
302275

303276
logger.info(f"Overall number of top initial retrieval chunks: {len(top_chunks)}")
304277

0 commit comments

Comments
 (0)