Skip to content

Commit 6ec92e4

Browse files
committed
Adds feature to increase max skiptake dataset size or pre-calculate
1 parent b7ca3aa commit 6ec92e4

File tree

5 files changed

+108
-18
lines changed

5 files changed

+108
-18
lines changed

src/Examine.Lucene/PublicAPI.Shipped.txt

-5
Original file line numberDiff line numberDiff line change
@@ -224,11 +224,6 @@ Examine.Lucene.Search.LuceneQuery.LuceneQuery(Examine.Lucene.Search.LuceneSearch
224224
Examine.Lucene.Search.LuceneQuery.ManagedQuery(string query, string[] fields = null) -> Examine.Search.IBooleanOperation
225225
Examine.Lucene.Search.LuceneQuery.NativeQuery(string query) -> Examine.Search.IBooleanOperation
226226
Examine.Lucene.Search.LuceneQuery.RangeQuery<T>(string[] fields, T? min, T? max, bool minInclusive = true, bool maxInclusive = true) -> Examine.Search.IBooleanOperation
227-
Examine.Lucene.Search.LuceneQueryOptions
228-
Examine.Lucene.Search.LuceneQueryOptions.LuceneQueryOptions(int skip, int? take = null, Examine.Lucene.Search.SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false) -> void
229-
Examine.Lucene.Search.LuceneQueryOptions.SearchAfter.get -> Examine.Lucene.Search.SearchAfterOptions
230-
Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentMaxScore.get -> bool
231-
Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentScores.get -> bool
232227
Examine.Lucene.Search.LuceneSearchExecutor
233228
Examine.Lucene.Search.LuceneSearchExecutor.Execute() -> Examine.ISearchResults
234229
Examine.Lucene.Search.LuceneSearchExtensions

src/Examine.Lucene/PublicAPI.Unshipped.txt

+7
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,13 @@ Examine.Lucene.LuceneIndexOptions.NrtTargetMaxStaleSec.set -> void
1515
Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.get -> double
1616
Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.set -> void
1717
Examine.Lucene.Providers.LuceneSearcher.LuceneSearcher(string name, Lucene.Net.Search.SearcherManager searcherManager, Lucene.Net.Analysis.Analyzer analyzer, Examine.Lucene.FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) -> void
18+
Examine.Lucene.Search.LuceneQueryOptions
19+
Examine.Lucene.Search.LuceneQueryOptions.AutoCalculateSkipTakeMaxResults.get -> bool
20+
Examine.Lucene.Search.LuceneQueryOptions.LuceneQueryOptions(int skip, int? take = null, Examine.Lucene.Search.SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false, int skipTakeMaxResults = 10000, bool autoCalculateSkipTakeMaxResults = false) -> void
21+
Examine.Lucene.Search.LuceneQueryOptions.SearchAfter.get -> Examine.Lucene.Search.SearchAfterOptions
22+
Examine.Lucene.Search.LuceneQueryOptions.SkipTakeMaxResults.get -> int
23+
Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentMaxScore.get -> bool
24+
Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentScores.get -> bool
1825
Examine.Lucene.Search.LuceneSearchResults.LuceneSearchResults(System.Collections.Generic.IReadOnlyCollection<Examine.ISearchResult> results, int totalItemCount, float maxScore, Examine.Lucene.Search.SearchAfterOptions searchAfterOptions) -> void
1926
Examine.Lucene.Search.LuceneSearchResults.MaxScore.get -> float
2027
Examine.Lucene.Search.LuceneSearchResults.SearchAfter.get -> Examine.Lucene.Search.SearchAfterOptions

src/Examine.Lucene/Search/LuceneQueryOptions.cs

+35-6
Original file line numberDiff line numberDiff line change
@@ -12,15 +12,26 @@ public class LuceneQueryOptions : QueryOptions
1212
/// </summary>
1313
/// <param name="skip">Number of result documents to skip.</param>
1414
/// <param name="take">Optional number of result documents to take.</param>
15-
/// <param name="searchAfter">Optionally skip to results after the results from the previous search execution. Used for efficent deep paging.</param>
16-
/// <param name="trackDocumentMaxScore">Whether to track the maximum document score. For best performance, if not needed, leave false.</param>
15+
/// <param name="searchAfter">Optionally skip to results after the results from the previous search execution. Used for efficient deep paging.</param>
1716
/// <param name="trackDocumentScores">Whether to Track Document Scores. For best performance, if not needed, leave false.</param>
18-
public LuceneQueryOptions(int skip, int? take = null, SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false)
17+
/// <param name="trackDocumentMaxScore">Whether to track the maximum document score. For best performance, if not needed, leave false.</param>
18+
/// <param name="skipTakeMaxResults">When using Skip/Take (not SearchAfter) this will be the maximum data set size that can be paged.</param>
19+
/// <param name="autoCalculateSkipTakeMaxResults">If enabled, this will pre-calculate the document count in the index to use for <see cref="SkipTakeMaxResults"/>.</param>
20+
public LuceneQueryOptions(
21+
int skip,
22+
int? take = null,
23+
SearchAfterOptions searchAfter = null,
24+
bool trackDocumentScores = false,
25+
bool trackDocumentMaxScore = false,
26+
int skipTakeMaxResults = AbsoluteMaxResults,
27+
bool autoCalculateSkipTakeMaxResults = false)
1928
: base(skip, take)
2029
{
30+
SearchAfter = searchAfter;
2131
TrackDocumentScores = trackDocumentScores;
2232
TrackDocumentMaxScore = trackDocumentMaxScore;
23-
SearchAfter = searchAfter;
33+
SkipTakeMaxResults = skipTakeMaxResults;
34+
AutoCalculateSkipTakeMaxResults = autoCalculateSkipTakeMaxResults;
2435
}
2536

2637
/// <summary>
@@ -34,8 +45,26 @@ public LuceneQueryOptions(int skip, int? take = null, SearchAfterOptions searchA
3445
public bool TrackDocumentMaxScore { get; }
3546

3647
/// <summary>
37-
/// Options for Searching After. Used for efficent deep paging.
48+
/// Options for Searching After. Used for efficient deep paging.
49+
/// </summary>
50+
public SearchAfterOptions SearchAfter { get; } = null;
51+
52+
/// <summary>
53+
/// When using Skip/Take (not SearchAfter) this will be the maximum data set size that can be paged.
54+
/// </summary>
55+
/// <remarks>
56+
/// For performance reasons, this should be low.
57+
/// The default is 10k and if larger datasets are required to be paged,
58+
/// this value can be increased but it is recommended to use the SearchAfter feature instead.
59+
/// </remarks>
60+
public int SkipTakeMaxResults { get; }
61+
62+
/// <summary>
63+
/// If enabled, this will pre-calculate the document count in the index to use for <see cref="SkipTakeMaxResults"/>.
3864
/// </summary>
39-
public SearchAfterOptions SearchAfter { get; }
65+
/// <remarks>
66+
/// This will incur a performance hit on each search execution since there will be a query to get the total document count.
67+
/// </remarks>
68+
public bool AutoCalculateSkipTakeMaxResults { get; }
4069
}
4170
}

src/Examine.Lucene/Search/LuceneSearchExecutor.cs

+20-1
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ public class LuceneSearchExecutor
2121
private readonly ISearchContext _searchContext;
2222
private readonly Query _luceneQuery;
2323
private readonly ISet<string> _fieldsToLoad;
24+
private int? _maxDoc;
2425

2526
internal LuceneSearchExecutor(QueryOptions options, Query query, IEnumerable<SortField> sortField, ISearchContext searchContext, ISet<string> fieldsToLoad)
2627
{
@@ -70,7 +71,11 @@ public ISearchResults Execute()
7071

7172
using (var searcher = _searchContext.GetSearcher())
7273
{
73-
var maxResults = Math.Min((_options.Skip + 1) * _options.Take, QueryOptions.AbsoluteMaxResults);
74+
var maxSkipTakeDataSetSize = _luceneQueryOptions?.AutoCalculateSkipTakeMaxResults ?? false
75+
? GetMaxDoc()
76+
: _luceneQueryOptions?.SkipTakeMaxResults ?? QueryOptions.AbsoluteMaxResults;
77+
78+
var maxResults = Math.Min((_options.Skip + 1) * _options.Take, maxSkipTakeDataSetSize);
7479
maxResults = maxResults >= 1 ? maxResults : QueryOptions.DefaultMaxResults;
7580
int numHits = maxResults;
7681

@@ -146,6 +151,20 @@ public ISearchResults Execute()
146151
}
147152
}
148153

154+
/// <summary>
155+
/// Used to calculate the total number of documents in the index.
156+
/// </summary>
157+
private int GetMaxDoc()
158+
{
159+
if (_maxDoc == null)
160+
{
161+
using var searcher = _searchContext.GetSearcher();
162+
_maxDoc = searcher.IndexSearcher.IndexReader.MaxDoc;
163+
}
164+
165+
return _maxDoc.Value;
166+
}
167+
149168
private static FieldDoc GetScoreDocAfter(LuceneQueryOptions luceneQueryOptions)
150169
{
151170
FieldDoc scoreDocAfter;

src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs

+46-6
Original file line numberDiff line numberDiff line change
@@ -2582,16 +2582,12 @@ public void Paging_With_Skip_Take()
25822582
indexer.IndexItems(valueSets);
25832583

25842584
var searcher = indexer.Searcher;
2585-
2586-
//Arrange
2587-
25882585
var sc = searcher.CreateQuery("content").Field("writerName", "administrator");
2586+
2587+
// Search with normal Skip/Take:
25892588
int pageIndex = 0;
25902589
int pageSize = 100;
25912590
int pagedCount = 0;
2592-
2593-
//Act
2594-
25952591
while (true)
25962592
{
25972593
var results = sc
@@ -2610,6 +2606,50 @@ public void Paging_With_Skip_Take()
26102606
// This will not proceed further than 100 paged count because the limit for paged data sets is 10K.
26112607
Assert.AreEqual(100, pagedCount);
26122608

2609+
// Search with increased max skiptake data set size:
2610+
pageIndex = 0;
2611+
pageSize = 100;
2612+
pagedCount = 0;
2613+
while (true)
2614+
{
2615+
var results = sc
2616+
.Execute(new LuceneQueryOptions(pageIndex * pageSize, pageSize, skipTakeMaxResults: 15000))
2617+
.ToList();
2618+
2619+
if (results.Count == 0)
2620+
{
2621+
break;
2622+
}
2623+
Assert.AreEqual(pageSize, results.Count);
2624+
pageIndex++;
2625+
pagedCount++;
2626+
}
2627+
2628+
// This will succeed because we've increased the limit of max skiptake dataset size.
2629+
Assert.AreEqual(150, pagedCount);
2630+
2631+
// Search with auto calculated maxdoc:
2632+
pageIndex = 0;
2633+
pageSize = 100;
2634+
pagedCount = 0;
2635+
while (true)
2636+
{
2637+
var results = sc
2638+
.Execute(new LuceneQueryOptions(pageIndex * pageSize, pageSize, autoCalculateSkipTakeMaxResults: true))
2639+
.ToList();
2640+
2641+
if (results.Count == 0)
2642+
{
2643+
break;
2644+
}
2645+
Assert.AreEqual(pageSize, results.Count);
2646+
pageIndex++;
2647+
pagedCount++;
2648+
}
2649+
2650+
// This will succeed because we've auto calculated the limit of max skiptake dataset size.
2651+
Assert.AreEqual(150, pagedCount);
2652+
26132653
// Now, page with SearchAfter:
26142654
pageIndex = 0;
26152655
pageSize = 100;

0 commit comments

Comments
 (0)