Skip to content

unite knowledge search model #596

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ namespace BotSharp.Abstraction.Knowledges;
public interface IKnowledgeService
{
Task<IEnumerable<string>> GetKnowledgeCollections();
Task<IEnumerable<KnowledgeRetrievalResult>> SearchKnowledge(string collectionName, KnowledgeRetrievalOptions options);
Task<IEnumerable<KnowledgeSearchResult>> SearchKnowledge(string collectionName, KnowledgeSearchOptions options);
Task FeedKnowledge(string collectionName, KnowledgeCreationModel model);
Task<StringIdPagedItems<KnowledgeCollectionData>> GetKnowledgeCollectionData(string collectionName, KnowledgeFilter filter);
Task<StringIdPagedItems<KnowledgeSearchResult>> GetKnowledgeCollectionData(string collectionName, KnowledgeFilter filter);
Task<bool> DeleteKnowledgeCollectionData(string collectionName, string id);
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ namespace BotSharp.Abstraction.Knowledges.Models;
public class KnowledgeCollectionData
{
public string Id { get; set; }
public string Question { get; set; }
public string Answer { get; set; }
public Dictionary<string, string> Data { get; set; } = new();
public double? Score { get; set; }
public float[]? Vector { get; set; }
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

namespace BotSharp.Abstraction.Knowledges.Models;

public class KnowledgeRetrievalOptions
public class KnowledgeSearchOptions
{
public string Text { get; set; } = string.Empty;
public IEnumerable<string>? Fields { get; set; } = new List<string> { KnowledgePayloadName.Text, KnowledgePayloadName.Answer };
Expand Down
Original file line number Diff line number Diff line change
@@ -1,12 +1,20 @@
namespace BotSharp.Abstraction.Knowledges.Models;

public class KnowledgeSearchResult
public class KnowledgeSearchResult : KnowledgeCollectionData
{
public Dictionary<string, string> Data { get; set; } = new();
public double Score { get; set; }
public float[]? Vector { get; set; }
}
public KnowledgeSearchResult()
{
}

public class KnowledgeRetrievalResult : KnowledgeSearchResult
{
public static KnowledgeSearchResult CopyFrom(KnowledgeCollectionData data)
{
return new KnowledgeSearchResult
{
Id = data.Id,
Data = data.Data,
Score = data.Score,
Vector = data.Vector
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,11 @@ namespace BotSharp.Abstraction.VectorStorage;
public interface IVectorDb
{
string Name { get; }

Task<IEnumerable<string>> GetCollections();
Task<StringIdPagedItems<KnowledgeCollectionData>> GetCollectionData(string collectionName, KnowledgeFilter filter);
Task CreateCollection(string collectionName, int dim);
Task<bool> Upsert(string collectionName, string id, float[] vector, string text, Dictionary<string, string>? payload = null);
Task<IEnumerable<KnowledgeSearchResult>> Search(string collectionName, float[] vector, IEnumerable<string> fields, int limit = 5, float confidence = 0.5f, bool withVector = false);
Task<IEnumerable<KnowledgeCollectionData>> Search(string collectionName, float[] vector, IEnumerable<string>? fields, int limit = 5, float confidence = 0.5f, bool withVector = false);
Task<bool> DeleteCollectionData(string collectionName, string id);
}
Original file line number Diff line number Diff line change
Expand Up @@ -23,29 +23,29 @@ public async Task<IEnumerable<string>> GetKnowledgeCollections()
}

[HttpPost("/knowledge/{collection}/search")]
public async Task<IEnumerable<KnowledgeRetrivalViewModel>> SearchKnowledge([FromRoute] string collection, [FromBody] SearchKnowledgeModel model)
public async Task<IEnumerable<KnowledgeSearchResultViewModel>> SearchKnowledge([FromRoute] string collection, [FromBody] SearchKnowledgeRequest request)
{
var options = new KnowledgeRetrievalOptions
var options = new KnowledgeSearchOptions
{
Text = model.Text,
Fields = model.Fields,
Limit = model.Limit ?? 5,
Confidence = model.Confidence ?? 0.5f,
WithVector = model.WithVector
Text = request.Text,
Fields = request.Fields,
Limit = request.Limit ?? 5,
Confidence = request.Confidence ?? 0.5f,
WithVector = request.WithVector
};

var results = await _knowledgeService.SearchKnowledge(collection, options);
return results.Select(x => KnowledgeRetrivalViewModel.From(x)).ToList();
return results.Select(x => KnowledgeSearchResultViewModel.From(x)).ToList();
}

[HttpPost("/knowledge/{collection}/data")]
public async Task<StringIdPagedItems<KnowledgeCollectionDataViewModel>> GetKnowledgeCollectionData([FromRoute] string collection, [FromBody] KnowledgeFilter filter)
public async Task<StringIdPagedItems<KnowledgeSearchResultViewModel>> GetKnowledgeCollectionData([FromRoute] string collection, [FromBody] KnowledgeFilter filter)
{
var data = await _knowledgeService.GetKnowledgeCollectionData(collection, filter);
var items = data.Items?.Select(x => KnowledgeCollectionDataViewModel.From(x))?
.ToList() ?? new List<KnowledgeCollectionDataViewModel>();
var items = data.Items?.Select(x => KnowledgeSearchResultViewModel.From(x))?
.ToList() ?? new List<KnowledgeSearchResultViewModel>();

return new StringIdPagedItems<KnowledgeCollectionDataViewModel>
return new StringIdPagedItems<KnowledgeSearchResultViewModel>
{
Count = data.Count,
NextId = data.NextId,
Expand Down

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
using BotSharp.Abstraction.Knowledges.Models;
using System.Text.Json.Serialization;

namespace BotSharp.OpenAPI.ViewModels.Knowledges;

public class KnowledgeSearchResultViewModel
{
[JsonPropertyName("id")]
public string Id { get; set; }

[JsonPropertyName("data")]
public IDictionary<string, string> Data { get; set; }

[JsonPropertyName("score")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public double? Score { get; set; }

[JsonPropertyName("vector")]
[JsonIgnore(Condition = JsonIgnoreCondition.WhenWritingNull)]
public float[]? Vector { get; set; }


public static KnowledgeSearchResultViewModel From(KnowledgeSearchResult result)
{
return new KnowledgeSearchResultViewModel
{
Id = result.Id,
Data = result.Data,
Score = result.Score,
Vector = result.Vector
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

namespace BotSharp.OpenAPI.ViewModels.Knowledges;

public class SearchKnowledgeModel
public class SearchKnowledgeRequest
{
[JsonPropertyName("text")]
public string Text { get; set; } = string.Empty;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,12 @@ public Task<StringIdPagedItems<KnowledgeCollectionData>> GetCollectionData(strin
throw new NotImplementedException();
}

public async Task<IEnumerable<KnowledgeSearchResult>> Search(string collectionName, float[] vector,
IEnumerable<string> fields, int limit = 5, float confidence = 0.5f, bool withVector = false)
public async Task<IEnumerable<KnowledgeCollectionData>> Search(string collectionName, float[] vector,
IEnumerable<string>? fields, int limit = 5, float confidence = 0.5f, bool withVector = false)
{
if (!_vectors.ContainsKey(collectionName))
{
return new List<KnowledgeSearchResult>();
return new List<KnowledgeCollectionData>();
}

var similarities = VectorUtility.CalCosineSimilarity(vector, _vectors[collectionName]);
Expand All @@ -41,7 +41,7 @@ public async Task<IEnumerable<KnowledgeSearchResult>> Search(string collectionNa
var results = np.argsort(similarities).ToArray<int>()
.Reverse()
.Take(limit)
.Select(i => new KnowledgeSearchResult
.Select(i => new KnowledgeCollectionData
{
Data = new Dictionary<string, string> { { "text", _vectors[collectionName][i].Text } },
Score = similarities[i],
Expand All @@ -64,8 +64,8 @@ public async Task<bool> Upsert(string collectionName, string id, float[] vector,
return true;
}

public Task<bool> DeleteCollectionData(string collectionName, string id)
public async Task<bool> DeleteCollectionData(string collectionName, string id)
{
throw new NotImplementedException();
return await Task.FromResult(false);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -16,21 +16,27 @@ public async Task<IEnumerable<string>> GetKnowledgeCollections()
}
}

public async Task<StringIdPagedItems<KnowledgeCollectionData>> GetKnowledgeCollectionData(string collectionName, KnowledgeFilter filter)
public async Task<StringIdPagedItems<KnowledgeSearchResult>> GetKnowledgeCollectionData(string collectionName, KnowledgeFilter filter)
{
try
{
var db = GetVectorDb();
return await db.GetCollectionData(collectionName, filter);
var pagedResult = await db.GetCollectionData(collectionName, filter);
return new StringIdPagedItems<KnowledgeSearchResult>
{
Count = pagedResult.Count,
Items = pagedResult.Items.Select(x => KnowledgeSearchResult.CopyFrom(x)),
NextId = pagedResult.NextId,
};
}
catch (Exception ex)
{
_logger.LogWarning($"Error when getting knowledge collection data ({collectionName}). {ex.Message}\r\n{ex.InnerException}");
return new StringIdPagedItems<KnowledgeCollectionData>();
return new StringIdPagedItems<KnowledgeSearchResult>();
}
}

public async Task<IEnumerable<KnowledgeRetrievalResult>> SearchKnowledge(string collectionName, KnowledgeRetrievalOptions options)
public async Task<IEnumerable<KnowledgeSearchResult>> SearchKnowledge(string collectionName, KnowledgeSearchOptions options)
{
try
{
Expand All @@ -39,21 +45,15 @@ public async Task<IEnumerable<KnowledgeRetrievalResult>> SearchKnowledge(string

// Vector search
var db = GetVectorDb();
var fields = !options.Fields.IsNullOrEmpty() ? options.Fields : new List<string> { KnowledgePayloadName.Text, KnowledgePayloadName.Answer };
var found = await db.Search(collectionName, vector, fields, limit: options.Limit ?? 5, confidence: options.Confidence ?? 0.5f, withVector: options.WithVector);
var found = await db.Search(collectionName, vector, options.Fields, limit: options.Limit ?? 5, confidence: options.Confidence ?? 0.5f, withVector: options.WithVector);

var results = found.Select(x => new KnowledgeRetrievalResult
{
Data = x.Data,
Score = x.Score,
Vector = x.Vector
}).ToList();
var results = found.Select(x => KnowledgeSearchResult.CopyFrom(x)).ToList();
return results;
}
catch (Exception ex)
{
_logger.LogWarning($"Error when searching knowledge ({collectionName}). {ex.Message}\r\n{ex.InnerException}");
return new List<KnowledgeRetrievalResult>();
return new List<KnowledgeSearchResult>();
}
}
}
4 changes: 2 additions & 2 deletions src/Plugins/BotSharp.Plugin.MetaAI/Providers/FaissDb.cs
Original file line number Diff line number Diff line change
Expand Up @@ -26,8 +26,8 @@ public Task<IEnumerable<string>> GetCollections()
throw new NotImplementedException();
}

public Task<IEnumerable<KnowledgeSearchResult>> Search(string collectionName, float[] vector,
IEnumerable<string> fields, int limit = 10, float confidence = 0.5f, bool withVector = false)
public Task<IEnumerable<KnowledgeCollectionData>> Search(string collectionName, float[] vector,
IEnumerable<string>? fields, int limit = 10, float confidence = 0.5f, bool withVector = false)
{
throw new NotImplementedException();
}
Expand Down
36 changes: 22 additions & 14 deletions src/Plugins/BotSharp.Plugin.Qdrant/QdrantDb.cs
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,7 @@ public async Task<StringIdPagedItems<KnowledgeCollectionData>> GetCollectionData
var points = response?.Result?.Select(x => new KnowledgeCollectionData
{
Id = x.Id?.Uuid ?? string.Empty,
Question = x.Payload.ContainsKey(KnowledgePayloadName.Text) ? x.Payload[KnowledgePayloadName.Text].StringValue : string.Empty,
Answer = x.Payload.ContainsKey(KnowledgePayloadName.Answer) ? x.Payload[KnowledgePayloadName.Answer].StringValue : string.Empty,
Data = x.Payload.ToDictionary(x => x.Key, x => x.Value.StringValue),
Vector = filter.WithVector ? x.Vectors?.Vector?.Data?.ToArray() : null
})?.ToList() ?? new List<KnowledgeCollectionData>();

Expand Down Expand Up @@ -125,10 +124,10 @@ public async Task<bool> Upsert(string collectionName, string id, float[] vector,
return result.Status == UpdateStatus.Completed;
}

public async Task<IEnumerable<KnowledgeSearchResult>> Search(string collectionName, float[] vector,
IEnumerable<string> fields, int limit = 5, float confidence = 0.5f, bool withVector = false)
public async Task<IEnumerable<KnowledgeCollectionData>> Search(string collectionName, float[] vector,
IEnumerable<string>? fields, int limit = 5, float confidence = 0.5f, bool withVector = false)
{
var results = new List<KnowledgeSearchResult>();
var results = new List<KnowledgeCollectionData>();

var client = GetClient();
var exist = await DoesCollectionExist(client, collectionName);
Expand All @@ -138,24 +137,33 @@ public async Task<IEnumerable<KnowledgeSearchResult>> Search(string collectionNa
}

var points = await client.SearchAsync(collectionName, vector, limit: (ulong)limit, scoreThreshold: confidence);


var pickFields = fields != null;
foreach (var point in points)
{
var data = new Dictionary<string, string>();
foreach (var field in fields)
if (pickFields)
{
if (point.Payload.ContainsKey(field))
{
data[field] = point.Payload[field].StringValue;
}
else
foreach (var field in fields)
{
data[field] = "";
if (point.Payload.ContainsKey(field))
{
data[field] = point.Payload[field].StringValue;
}
else
{
data[field] = "";
}
}
}
else
{
data = point.Payload.ToDictionary(k => k.Key, v => v.Value.StringValue);
}

results.Add(new KnowledgeSearchResult
results.Add(new KnowledgeCollectionData
{
Id = point.Id.Uuid,
Data = data,
Score = point.Score,
Vector = withVector ? point.Vectors?.Vector?.Data?.ToArray() : null
Expand Down
Loading