Skip to content

Commit

Permalink
Upgrade Milvus java sdk to 2.5.4
Browse files Browse the repository at this point in the history
  - Upgrade Milvus java sdk to 2.5.4
  - Remove use of fastjson
    - Use com.google.gson.JsonObject as Milvus SDK replaces fastjson with gson
  - Change SearchResult RowRecord similarity metric name to score
     - Milvus SDK 2.5.4 uses "score" instead of "distance"

Signed-off-by: Ilayaperumal Gopinathan <[email protected]>
  • Loading branch information
ilayaperumalg authored and sobychacko committed Feb 26, 2025
1 parent 70fe412 commit 0065949
Show file tree
Hide file tree
Showing 7 changed files with 37 additions and 34 deletions.
4 changes: 2 additions & 2 deletions pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -215,10 +215,10 @@
<pgvector.version>0.1.6</pgvector.version>
<sap.hanadb.version>2.20.11</sap.hanadb.version>
<coherence.version>24.09</coherence.version>
<milvus.version>2.3.5</milvus.version>
<milvus.version>2.5.4</milvus.version>
<gemfire.testcontainers.version>2.3.0</gemfire.testcontainers.version>
<pinecone.version>0.8.0</pinecone.version>
<fastjson.version>2.0.46</fastjson.version>
<fastjson2.version>2.0.46</fastjson2.version>
<azure-core.version>1.53.0</azure-core.version>
<azure-json.version>1.3.0</azure-json.version>
<azure-identity.version>1.14.0</azure-identity.version>
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,8 @@ protected void deleteById() {
assertThat(results.get(0).getId()).isEqualTo(documents.get(2).getId());
Map<String, Object> metadata = results.get(0).getMetadata();
assertThat(normalizeValue(metadata.get("country"))).isEqualTo("BG");
assertThat(normalizeValue(metadata.get("year"))).isEqualTo("2023");
// the values are converted into Double
assertThat(normalizeValue(metadata.get("year"))).isEqualTo("2023.0");

vectorStore.delete(List.of(documents.get(2).getId()));
});
Expand Down
2 changes: 1 addition & 1 deletion vector-stores/spring-ai-azure-store/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,7 @@
<dependency>
<groupId>com.alibaba.fastjson2</groupId>
<artifactId>fastjson2</artifactId>
<version>${fastjson.version}</version>
<version>${fastjson2.version}</version>
</dependency>

<!-- TESTING -->
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@

package org.springframework.ai.vectorstore.milvus;

import java.lang.reflect.Type;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;

import com.alibaba.fastjson.JSONObject;
import com.google.gson.Gson;
import com.google.gson.JsonObject;
import com.google.gson.reflect.TypeToken;
import io.milvus.client.MilvusServiceClient;
import io.milvus.common.clientenum.ConsistencyLevelEnum;
import io.milvus.exception.ParamException;
Expand Down Expand Up @@ -159,7 +162,7 @@ public class MilvusVectorStore extends AbstractObservationVectorStore implements
public static final String EMBEDDING_FIELD_NAME = "embedding";

// Metadata, automatically assigned by Milvus.
private static final String DISTANCE_FIELD_NAME = "distance";
public static final String SIMILARITY_FIELD_NAME = "score";

private static final Logger logger = LoggerFactory.getLogger(MilvusVectorStore.class);

Expand Down Expand Up @@ -234,7 +237,7 @@ public void doAdd(List<Document> documents) {

List<String> docIdArray = new ArrayList<>();
List<String> contentArray = new ArrayList<>();
List<JSONObject> metadataArray = new ArrayList<>();
List<JsonObject> metadataArray = new ArrayList<>();
List<List<Float>> embeddingArray = new ArrayList<>();

// TODO: Need to customize how we pass the embedding options
Expand All @@ -246,7 +249,9 @@ public void doAdd(List<Document> documents) {
// Use a (future) DocumentTextLayoutFormatter instance to extract
// the content used to compute the embeddings
contentArray.add(document.getText());
metadataArray.add(new JSONObject(document.getMetadata()));
Gson gson = new Gson();
String jsonString = gson.toJson(document.getMetadata());
metadataArray.add(gson.fromJson(jsonString, JsonObject.class));
embeddingArray.add(EmbeddingUtils.toList(embeddings.get(documents.indexOf(document))));
}

Expand Down Expand Up @@ -357,29 +362,32 @@ public List<Document> doSimilaritySearch(SearchRequest request) {
.map(rowRecord -> {
String docId = String.valueOf(rowRecord.get(this.idFieldName));
String content = (String) rowRecord.get(this.contentFieldName);
JSONObject metadata = null;
JsonObject metadata = new JsonObject();
try {
metadata = (JSONObject) rowRecord.get(this.metadataFieldName);
metadata = (JsonObject) rowRecord.get(this.metadataFieldName);
// inject the distance into the metadata.
metadata.put(DocumentMetadata.DISTANCE.value(), 1 - getResultSimilarity(rowRecord));
metadata.addProperty(DocumentMetadata.DISTANCE.value(), 1 - getResultSimilarity(rowRecord));
}
catch (ParamException e) {
// skip the ParamException if metadata doesn't exist for the custom
// collection
}
Gson gson = new Gson();
Type type = new TypeToken<Map<String, Object>>() {
}.getType();
return Document.builder()
.id(docId)
.text(content)
.metadata((metadata != null) ? metadata.getInnerMap() : Map.of())
.metadata((metadata != null) ? gson.fromJson(metadata, type) : Map.of())
.score((double) getResultSimilarity(rowRecord))
.build();
})
.toList();
}

private float getResultSimilarity(RowRecord rowRecord) {
Float distance = (Float) rowRecord.get(DISTANCE_FIELD_NAME);
return (this.metricType == MetricType.IP || this.metricType == MetricType.COSINE) ? distance : (1 - distance);
Float score = (Float) rowRecord.get(SIMILARITY_FIELD_NAME);
return (this.metricType == MetricType.IP || this.metricType == MetricType.COSINE) ? score : (1 - score);
}

// ---------------------------------------------------------------------------------
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
*/
public final class MilvusImage {

public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("milvusdb/milvus:v2.4.9");
public static final DockerImageName DEFAULT_IMAGE = DockerImageName.parse("milvusdb/milvus:v2.5.4");

private MilvusImage() {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -103,16 +103,14 @@ void searchWithCustomFieldNames(String metricType) {
List<Document> fullResult = vectorStore
.similaritySearch(SearchRequest.builder().query("Spring").build());

List<Float> distances = fullResult.stream()
.map(doc -> (Float) doc.getMetadata().get("distance"))
.toList();
List<Double> scores = fullResult.stream().map(doc -> doc.getScore()).toList();

assertThat(distances).hasSize(3);
assertThat(scores).hasSize(3);

float threshold = (distances.get(0) + distances.get(1)) / 2;
double threshold = (scores.get(0) + scores.get(1)) / 2;

List<Document> results = vectorStore.similaritySearch(
SearchRequest.builder().query("Spring").topK(5).similarityThreshold(1 - threshold).build());
SearchRequest.builder().query("Spring").topK(5).similarityThreshold(threshold).build());

assertThat(results).hasSize(1);
Document resultDoc = results.get(0);
Expand Down Expand Up @@ -144,16 +142,14 @@ void searchWithoutMetadataFieldOverride(String metricType) {
List<Document> fullResult = vectorStore
.similaritySearch(SearchRequest.builder().query("Spring").build());

List<Float> distances = fullResult.stream()
.map(doc -> (Float) doc.getMetadata().get("distance"))
.toList();
List<Double> scores = fullResult.stream().map(doc -> doc.getScore()).toList();

assertThat(distances).hasSize(3);
assertThat(scores).hasSize(3);

float threshold = (distances.get(0) + distances.get(1)) / 2;
double threshold = (scores.get(0) + scores.get(1)) / 2;

List<Document> results = vectorStore.similaritySearch(
SearchRequest.builder().query("Spring").topK(5).similarityThreshold(1 - threshold).build());
SearchRequest.builder().query("Spring").topK(5).similarityThreshold(threshold).build());

assertThat(results).hasSize(1);
Document resultDoc = results.get(0);
Expand Down Expand Up @@ -187,16 +183,14 @@ void searchWithAutoIdEnabled(String metricType) {
List<Document> fullResult = vectorStore
.similaritySearch(SearchRequest.builder().query("Spring").build());

List<Float> distances = fullResult.stream()
.map(doc -> (Float) doc.getMetadata().get("distance"))
.toList();
List<Double> scores = fullResult.stream().map(doc -> doc.getScore()).toList();

assertThat(distances).hasSize(3);
assertThat(scores).hasSize(3);

float threshold = (distances.get(0) + distances.get(1)) / 2;
double threshold = (scores.get(0) + scores.get(1)) / 2;

List<Document> results = vectorStore.similaritySearch(
SearchRequest.builder().query("Spring").topK(5).similarityThreshold(1 - threshold).build());
SearchRequest.builder().query("Spring").topK(5).similarityThreshold(threshold).build());

assertThat(results).hasSize(1);
Document resultDoc = results.get(0);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -319,7 +319,7 @@ public void deleteWithComplexFilterExpression() {
assertThat(results.stream().map(doc -> doc.getMetadata().get("type")).collect(Collectors.toList()))
.containsExactlyInAnyOrder("A", "B");
assertThat(results.stream().map(doc -> doc.getMetadata().get("priority")).collect(Collectors.toList()))
.containsExactlyInAnyOrder(1, 1);
.containsExactlyInAnyOrder(1.0, 1.0);
});
}

Expand Down

0 comments on commit 0065949

Please sign in to comment.