From bf91743e258359d6805d5ab219b7ad44cd3f4d63 Mon Sep 17 00:00:00 2001 From: jonghoon park Date: Thu, 6 Feb 2025 03:02:43 +0900 Subject: [PATCH] Make the embedding field name configurable for the ElasticSearchVectorStore Signed-off-by: jonghoon park --- ...ticsearchVectorStoreAutoConfiguration.java | 6 +- .../ElasticsearchVectorStoreProperties.java | 16 +++++- .../ElasticsearchVectorStore.java | 26 ++++++--- .../ElasticsearchVectorStoreOptions.java | 16 +++++- .../ElasticsearchVectorStoreIT.java | 55 ++++++++++++------- 5 files changed, 88 insertions(+), 31 deletions(-) diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreAutoConfiguration.java index 8541e8e833a..38227442c1a 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreAutoConfiguration.java +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreAutoConfiguration.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023-2025 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -42,6 +42,7 @@ * @author Josh Long * @author Christian Tzolov * @author Soby Chacko + * @author Jonghoon Park * @since 1.0.0 */ @AutoConfiguration(after = ElasticsearchRestClientAutoConfiguration.class) @@ -72,6 +73,9 @@ ElasticsearchVectorStore vectorStore(ElasticsearchVectorStoreProperties properti if (properties.getSimilarity() != null) { elasticsearchVectorStoreOptions.setSimilarity(properties.getSimilarity()); } + if (properties.getFieldName() != null) { + elasticsearchVectorStoreOptions.setFieldName(properties.getFieldName()); + } return ElasticsearchVectorStore.builder(restClient, embeddingModel) .options(elasticsearchVectorStoreOptions) diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreProperties.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreProperties.java index 876c13ab579..e5ccff4cb3c 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreProperties.java +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/elasticsearch/ElasticsearchVectorStoreProperties.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023-2025 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -26,6 +26,7 @@ * @author EddĂș MelĂ©ndez * @author Wei Jiang * @author Josh Long + * @author Jonghoon Park * @since 1.0.0 */ @ConfigurationProperties(prefix = "spring.ai.vectorstore.elasticsearch") @@ -46,6 +47,11 @@ public class ElasticsearchVectorStoreProperties extends CommonVectorStorePropert */ private SimilarityFunction similarity; + /** + * The name of the vector field to search against + */ + private String fieldName = "embedding"; + public String getIndexName() { return this.indexName; } @@ -70,4 +76,12 @@ public void setSimilarity(SimilarityFunction similarity) { this.similarity = similarity; } + public String getFieldName() { + return fieldName; + } + + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + } diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStore.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStore.java index 377ce78dcbf..d3ef28d4904 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStore.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStore.java @@ -145,6 +145,7 @@ * @author Christian Tzolov * @author Thomas Vitale * @author Ilayaperumal Gopinathan + * @author Jonghoon Park * @since 1.0.0 */ public class ElasticsearchVectorStore extends AbstractObservationVectorStore implements InitializingBean { @@ -191,11 +192,12 @@ public void doAdd(List documents) { List embeddings = this.embeddingModel.embed(documents, EmbeddingOptionsBuilder.builder().build(), this.batchingStrategy); - for (Document document : documents) { - ElasticSearchDocument doc = new ElasticSearchDocument(document.getId(), document.getText(), - document.getMetadata(), embeddings.get(documents.indexOf(document))); - bulkRequestBuilder.operations( - op -> op.index(idx -> idx.index(this.options.getIndexName()).id(document.getId()).document(doc))); + for (int i = 0; i < embeddings.size(); i++) { + Document document = documents.get(i); + float[] embedding = embeddings.get(i); + bulkRequestBuilder.operations(op -> op.index(idx -> idx.index(this.options.getIndexName()) + .id(document.getId()) + .document(getDocument(document, embedding, this.options.getFieldName())))); } BulkResponse bulkRequest = bulkRequest(bulkRequestBuilder.build()); if (bulkRequest.errors()) { @@ -208,6 +210,16 @@ public void doAdd(List documents) { } } + private Object getDocument(Document document, float[] embedding, String fieldName) { + Assert.notNull(document.getText(), "document's text must not be null"); + if (fieldName.equals("embedding")) { + return new ElasticSearchDocument(document.getId(), document.getText(), document.getMetadata(), embedding); + } + + return Map.of("id", document.getId(), "content", document.getText(), "metadata", document.getMetadata(), + fieldName, embedding); + } + @Override public Optional doDelete(List idList) { BulkRequest.Builder bulkRequestBuilder = new BulkRequest.Builder(); @@ -264,7 +276,7 @@ public List doSimilaritySearch(SearchRequest searchRequest) { .knn(knn -> knn.queryVector(EmbeddingUtils.toList(vectors)) .similarity(finalThreshold) .k((long) searchRequest.getTopK()) - .field("embedding") + .field(this.options.getFieldName()) .numCandidates((long) (1.5 * searchRequest.getTopK())) .filter(fl -> fl .queryString(qs -> qs.query(getElasticsearchQueryString(searchRequest.getFilterExpression()))))) @@ -322,7 +334,7 @@ private void createIndexMapping() { try { this.elasticsearchClient.indices() .create(cr -> cr.index(this.options.getIndexName()) - .mappings(map -> map.properties("embedding", + .mappings(map -> map.properties(this.options.getFieldName(), p -> p.denseVector(dv -> dv.similarity(this.options.getSimilarity().toString()) .dims(this.options.getDimensions()))))); } diff --git a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreOptions.java b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreOptions.java index 2958b6e6607..47d965a64de 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreOptions.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/main/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreOptions.java @@ -1,5 +1,5 @@ /* - * Copyright 2023-2024 the original author or authors. + * Copyright 2023-2025 the original author or authors. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -21,6 +21,7 @@ * https://www.elastic.co/guide/en/elasticsearch/reference/current/dense-vector.html * * @author Wei Jiang + * @author Jonghoon Park * @since 1.0.0 */ public class ElasticsearchVectorStoreOptions { @@ -40,6 +41,11 @@ public class ElasticsearchVectorStoreOptions { */ private SimilarityFunction similarity = SimilarityFunction.cosine; + /** + * The name of the vector field to search against + */ + private String fieldName = "embedding"; + public String getIndexName() { return this.indexName; } @@ -64,4 +70,12 @@ public void setSimilarity(SimilarityFunction similarity) { this.similarity = similarity; } + public String getFieldName() { + return fieldName; + } + + public void setFieldName(String fieldName) { + this.fieldName = fieldName; + } + } diff --git a/vector-stores/spring-ai-elasticsearch-store/src/test/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreIT.java b/vector-stores/spring-ai-elasticsearch-store/src/test/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreIT.java index 1d8a2521d1f..c5b33f6c7e2 100644 --- a/vector-stores/spring-ai-elasticsearch-store/src/test/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreIT.java +++ b/vector-stores/spring-ai-elasticsearch-store/src/test/java/org/springframework/ai/vectorstore/elasticsearch/ElasticsearchVectorStoreIT.java @@ -117,10 +117,11 @@ void cleanDatabase() { }); } - @Test - public void addAndDeleteDocumentsTest() { + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { "cosine", "custom_field" }) + public void addAndDeleteDocumentsTest(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_cosine", + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); ElasticsearchClient elasticsearchClient = context.getBean(ElasticsearchClient.class); @@ -149,10 +150,11 @@ public void addAndDeleteDocumentsTest() { }); } - @Test - public void deleteDocumentsByFilterExpressionTest() { + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { "cosine", "custom_field" }) + public void deleteDocumentsByFilterExpressionTest(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_cosine", + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); ElasticsearchClient elasticsearchClient = context.getBean(ElasticsearchClient.class); @@ -202,10 +204,11 @@ public void deleteDocumentsByFilterExpressionTest() { }); } - @Test - public void deleteWithStringFilterExpressionTest() { + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { "cosine", "custom_field" }) + public void deleteWithStringFilterExpressionTest(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_cosine", + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); ElasticsearchClient elasticsearchClient = context.getBean(ElasticsearchClient.class); @@ -234,12 +237,12 @@ public void deleteWithStringFilterExpressionTest() { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { "cosine", "l2_norm", "dot_product" }) - public void addAndSearchTest(String similarityFunction) { + @ValueSource(strings = { "cosine", "l2_norm", "dot_product", "custom_field" }) + public void addAndSearchTest(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); vectorStore.add(this.documents); @@ -271,11 +274,11 @@ public void addAndSearchTest(String similarityFunction) { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { "cosine", "l2_norm", "dot_product" }) - public void searchWithFilters(String similarityFunction) { + @ValueSource(strings = { "cosine", "l2_norm", "dot_product", "custom_field" }) + public void searchWithFilters(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); var bgDocument = new Document("1", "The World is Big and Salvation Lurks Around the Corner", @@ -385,11 +388,11 @@ public void searchWithFilters(String similarityFunction) { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { "cosine", "l2_norm", "dot_product" }) - public void documentUpdateTest(String similarityFunction) { + @ValueSource(strings = { "cosine", "l2_norm", "dot_product", "custom_field" }) + public void documentUpdateTest(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!", @@ -443,10 +446,10 @@ public void documentUpdateTest(String similarityFunction) { } @ParameterizedTest(name = "{0} : {displayName} ") - @ValueSource(strings = { "cosine", "l2_norm", "dot_product" }) - public void searchThresholdTest(String similarityFunction) { + @ValueSource(strings = { "cosine", "l2_norm", "dot_product", "custom_field" }) + public void searchThresholdTest(String vectorStoreBeanName) { getContextRunner().run(context -> { - ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + similarityFunction, + ElasticsearchVectorStore vectorStore = context.getBean("vectorStore_" + vectorStoreBeanName, ElasticsearchVectorStore.class); vectorStore.add(this.documents); @@ -581,6 +584,16 @@ public ElasticsearchVectorStore vectorStoreDotProduct(EmbeddingModel embeddingMo .build(); } + @Bean("vectorStore_custom_field") + public ElasticsearchVectorStore vectorStoreCustomField(EmbeddingModel embeddingModel, RestClient restClient) { + ElasticsearchVectorStoreOptions options = new ElasticsearchVectorStoreOptions(); + options.setFieldName("custom_field"); + return ElasticsearchVectorStore.builder(restClient, embeddingModel) + .initializeSchema(true) + .options(options) + .build(); + } + @Bean public EmbeddingModel embeddingModel() { return new OpenAiEmbeddingModel(new OpenAiApi(System.getenv("OPENAI_API_KEY")));