forked from microsoft/semantic-kernel
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmemory_with_pandas.py
70 lines (58 loc) · 2.29 KB
/
memory_with_pandas.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
# Copyright (c) Microsoft. All rights reserved.
import asyncio
from uuid import uuid4
import pandas as pd
from semantic_kernel import Kernel
from semantic_kernel.connectors.ai.open_ai import (
OpenAIEmbeddingPromptExecutionSettings,
OpenAITextEmbedding,
)
from semantic_kernel.connectors.memory.azure_ai_search import AzureAISearchCollection
from semantic_kernel.data import (
VectorStoreRecordDataField,
VectorStoreRecordDefinition,
VectorStoreRecordKeyField,
VectorStoreRecordVectorField,
)
from semantic_kernel.data.vector_search import add_vector_to_records
model_fields = VectorStoreRecordDefinition(
container_mode=True,
fields={
"content": VectorStoreRecordDataField(has_embedding=True, embedding_property_name="vector"),
"id": VectorStoreRecordKeyField(),
"vector": VectorStoreRecordVectorField(
embedding_settings={"embedding": OpenAIEmbeddingPromptExecutionSettings(dimensions=1536)}
),
},
to_dict=lambda record, **_: record.to_dict(orient="records"),
from_dict=lambda records, **_: pd.DataFrame(records),
)
async def main():
# setup the kernel
kernel = Kernel()
kernel.add_service(OpenAITextEmbedding(service_id="embedding", ai_model_id="text-embedding-3-small"))
# create the record collection
async with AzureAISearchCollection[pd.DataFrame](
data_model_type=pd.DataFrame,
data_model_definition=model_fields,
) as record_collection:
# create some records
records = [
{"id": str(uuid4()), "content": "my dict text", "vector": None},
{"id": str(uuid4()), "content": "my second text", "vector": None},
]
# create the dataframe and add the embeddings
df = pd.DataFrame(records)
df = await add_vector_to_records(kernel, df, None, data_model_definition=model_fields)
print("Records with embeddings:")
print(df.shape)
print(df.head(5))
# upsert the records (for a container, upsert and upsert_batch are equivalent)
await record_collection.upsert_batch(df)
# retrieve a record
result = await record_collection.get(records[0]["id"])
print("Retrieved records:")
print(result.shape)
print(result.head(5))
if __name__ == "__main__":
asyncio.run(main())