-
Notifications
You must be signed in to change notification settings - Fork 3.8k
/
Copy pathvector_search.py
117 lines (89 loc) · 4.86 KB
/
vector_search.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
# Copyright (c) Microsoft. All rights reserved.
import logging
from abc import abstractmethod
from collections.abc import AsyncIterable, Sequence
from typing import Any, Generic, TypeVar
from semantic_kernel.data.kernel_search_results import KernelSearchResults
from semantic_kernel.data.search_options import SearchOptions
from semantic_kernel.data.vector_search.vector_search_options import VectorSearchOptions
from semantic_kernel.data.vector_search.vector_search_result import VectorSearchResult
from semantic_kernel.data.vector_storage.vector_store_record_collection import VectorStoreRecordCollection
from semantic_kernel.utils.experimental_decorator import experimental_class
from semantic_kernel.utils.list_handler import desync_list
TModel = TypeVar("TModel")
TKey = TypeVar("TKey")
logger = logging.getLogger(__name__)
@experimental_class
class VectorSearchBase(VectorStoreRecordCollection[TKey, TModel], Generic[TKey, TModel]):
"""Method for searching vectors."""
@property
def options_class(self) -> type[SearchOptions]:
"""The options class for the search."""
return VectorSearchOptions
# region: New abstract methods to be implemented by vector stores
@abstractmethod
async def _inner_search(
self,
options: VectorSearchOptions,
search_text: str | None = None,
vectorizable_text: str | None = None,
vector: list[float | int] | None = None,
**kwargs: Any,
) -> KernelSearchResults[VectorSearchResult[TModel]]:
"""Inner search method.
This is the main search method that should be implemented, and will be called by the public search methods.
Currently, at least one of the three search contents will be provided
(through the public interface mixin functions), in the future, this may be expanded to allow multiple of them.
This method should return a KernelSearchResults object with the results of the search.
The inner "results" object of the KernelSearchResults should be a async iterator that yields the search results,
this allows things like paging to be implemented.
There is a default helper method "_get_vector_search_results_from_results" to convert
the results to a async iterable VectorSearchResults, but this can be overridden if necessary.
Options might be a object of type VectorSearchOptions, or a subclass of it.
The implementation of this method must deal with the possibility that multiple search contents are provided,
and should handle them in a way that makes sense for that particular store.
Args:
options: The search options, can be None.
search_text: The text to search for, optional.
vectorizable_text: The text to search for, will be vectorized downstream, optional.
vector: The vector to search for, optional.
**kwargs: Additional arguments that might be needed.
Returns:
The search results, wrapped in a KernelSearchResults object.
"""
...
@abstractmethod
def _get_record_from_result(self, result: Any) -> Any:
"""Get the record from the returned search result.
Does any unpacking or processing of the result to get just the record.
If the underlying SDK of the store returns a particular type that might include something
like a score or other metadata, this method should be overridden to extract just the record.
Likely returns a dict, but in some cases could return the record in the form of a SDK specific object.
This method is used as part of the _get_vector_search_results_from_results method,
the output of it is passed to the deserializer.
"""
...
@abstractmethod
def _get_score_from_result(self, result: Any) -> float | None:
"""Get the score from the result.
Does any unpacking or processing of the result to get just the score.
If the underlying SDK of the store returns a particular type with a score or other metadata,
this method extracts it.
"""
...
# endregion
# region: New methods
async def _get_vector_search_results_from_results(
self, results: AsyncIterable[Any] | Sequence[Any], options: VectorSearchOptions | None = None
) -> AsyncIterable[VectorSearchResult[TModel]]:
if isinstance(results, Sequence):
results = desync_list(results)
async for result in results:
record = self.deserialize(
self._get_record_from_result(result), include_vectors=options.include_vectors if options else True
)
score = self._get_score_from_result(result)
if record:
# single records are always returned as single records by the deserializer
yield VectorSearchResult(record=record, score=score) # type: ignore
# endregion