Adds feature to increase max skiptake dataset size or pre-calculate

Shazwazza · Shazwazza · commit 6ec92e42eb6a · 2025-01-10T11:38:52.000-07:00
diff --git a/src/Examine.Lucene/PublicAPI.Shipped.txt b/src/Examine.Lucene/PublicAPI.Shipped.txt
@@ -224,11 +224,6 @@ Examine.Lucene.Search.LuceneQuery.LuceneQuery(Examine.Lucene.Search.LuceneSearch
 Examine.Lucene.Search.LuceneQuery.ManagedQuery(string query, string[] fields = null) -> Examine.Search.IBooleanOperation
 Examine.Lucene.Search.LuceneQuery.NativeQuery(string query) -> Examine.Search.IBooleanOperation
 Examine.Lucene.Search.LuceneQuery.RangeQuery<T>(string[] fields, T? min, T? max, bool minInclusive = true, bool maxInclusive = true) -> Examine.Search.IBooleanOperation
-Examine.Lucene.Search.LuceneQueryOptions
-Examine.Lucene.Search.LuceneQueryOptions.LuceneQueryOptions(int skip, int? take = null, Examine.Lucene.Search.SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false) -> void
-Examine.Lucene.Search.LuceneQueryOptions.SearchAfter.get -> Examine.Lucene.Search.SearchAfterOptions
-Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentMaxScore.get -> bool
-Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentScores.get -> bool
 Examine.Lucene.Search.LuceneSearchExecutor
 Examine.Lucene.Search.LuceneSearchExecutor.Execute() -> Examine.ISearchResults
 Examine.Lucene.Search.LuceneSearchExtensions
diff --git a/src/Examine.Lucene/PublicAPI.Unshipped.txt b/src/Examine.Lucene/PublicAPI.Unshipped.txt
@@ -15,6 +15,13 @@ Examine.Lucene.LuceneIndexOptions.NrtTargetMaxStaleSec.set -> void
 Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.get -> double
 Examine.Lucene.LuceneIndexOptions.NrtTargetMinStaleSec.set -> void
 Examine.Lucene.Providers.LuceneSearcher.LuceneSearcher(string name, Lucene.Net.Search.SearcherManager searcherManager, Lucene.Net.Analysis.Analyzer analyzer, Examine.Lucene.FieldValueTypeCollection fieldValueTypeCollection, bool isNrt) -> void
+Examine.Lucene.Search.LuceneQueryOptions
+Examine.Lucene.Search.LuceneQueryOptions.AutoCalculateSkipTakeMaxResults.get -> bool
+Examine.Lucene.Search.LuceneQueryOptions.LuceneQueryOptions(int skip, int? take = null, Examine.Lucene.Search.SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false, int skipTakeMaxResults = 10000, bool autoCalculateSkipTakeMaxResults = false) -> void
+Examine.Lucene.Search.LuceneQueryOptions.SearchAfter.get -> Examine.Lucene.Search.SearchAfterOptions
+Examine.Lucene.Search.LuceneQueryOptions.SkipTakeMaxResults.get -> int
+Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentMaxScore.get -> bool
+Examine.Lucene.Search.LuceneQueryOptions.TrackDocumentScores.get -> bool
 Examine.Lucene.Search.LuceneSearchResults.LuceneSearchResults(System.Collections.Generic.IReadOnlyCollection<Examine.ISearchResult> results, int totalItemCount, float maxScore, Examine.Lucene.Search.SearchAfterOptions searchAfterOptions) -> void
 Examine.Lucene.Search.LuceneSearchResults.MaxScore.get -> float
 Examine.Lucene.Search.LuceneSearchResults.SearchAfter.get -> Examine.Lucene.Search.SearchAfterOptions
diff --git a/src/Examine.Lucene/Search/LuceneQueryOptions.cs b/src/Examine.Lucene/Search/LuceneQueryOptions.cs
@@ -12,15 +12,26 @@ public class LuceneQueryOptions : QueryOptions
         /// </summary>
         /// <param name="skip">Number of result documents to skip.</param>
         /// <param name="take">Optional number of result documents to take.</param>
-        /// <param name="searchAfter">Optionally skip to results after the results from the previous search execution. Used for efficent deep paging.</param>
-        /// <param name="trackDocumentMaxScore">Whether to track the maximum document score. For best performance, if not needed, leave false.</param>
+        /// <param name="searchAfter">Optionally skip to results after the results from the previous search execution. Used for efficient deep paging.</param>
         /// <param name="trackDocumentScores">Whether to Track Document Scores. For best performance, if not needed, leave false.</param>
-        public LuceneQueryOptions(int skip, int? take = null, SearchAfterOptions searchAfter = null, bool trackDocumentScores = false, bool trackDocumentMaxScore = false)
+        /// <param name="trackDocumentMaxScore">Whether to track the maximum document score. For best performance, if not needed, leave false.</param>
+        /// <param name="skipTakeMaxResults">When using Skip/Take (not SearchAfter) this will be the maximum data set size that can be paged.</param>
+        /// <param name="autoCalculateSkipTakeMaxResults">If enabled, this will pre-calculate the document count in the index to use for <see cref="SkipTakeMaxResults"/>.</param>
+        public LuceneQueryOptions(
+            int skip,
+            int? take = null,
+            SearchAfterOptions searchAfter = null,
+            bool trackDocumentScores = false,
+            bool trackDocumentMaxScore = false,
+            int skipTakeMaxResults = AbsoluteMaxResults,
+            bool autoCalculateSkipTakeMaxResults = false)
             : base(skip, take)
         {
+            SearchAfter = searchAfter;
             TrackDocumentScores = trackDocumentScores;
             TrackDocumentMaxScore = trackDocumentMaxScore;
-            SearchAfter = searchAfter;
+            SkipTakeMaxResults = skipTakeMaxResults;
+            AutoCalculateSkipTakeMaxResults = autoCalculateSkipTakeMaxResults;
         }
 
         /// <summary>
@@ -34,8 +45,26 @@ public LuceneQueryOptions(int skip, int? take = null, SearchAfterOptions searchA
         public bool TrackDocumentMaxScore { get; }
 
         /// <summary>
-        /// Options for Searching After. Used for efficent deep paging.
+        /// Options for Searching After. Used for efficient deep paging.
+        /// </summary>
+        public SearchAfterOptions SearchAfter { get; } = null;
+
+        /// <summary>
+        /// When using Skip/Take (not SearchAfter) this will be the maximum data set size that can be paged.
+        /// </summary>
+        /// <remarks>
+        /// For performance reasons, this should be low.
+        /// The default is 10k and if larger datasets are required to be paged,
+        /// this value can be increased but it is recommended to use the SearchAfter feature instead.
+        /// </remarks>
+        public int SkipTakeMaxResults { get; }
+
+        /// <summary>
+        /// If enabled, this will pre-calculate the document count in the index to use for <see cref="SkipTakeMaxResults"/>.
         /// </summary>
-        public SearchAfterOptions SearchAfter { get; }
+        /// <remarks>
+        /// This will incur a performance hit on each search execution since there will be a query to get the total document count.
+        /// </remarks>
+        public bool AutoCalculateSkipTakeMaxResults { get; }
     }
 }
diff --git a/src/Examine.Lucene/Search/LuceneSearchExecutor.cs b/src/Examine.Lucene/Search/LuceneSearchExecutor.cs
@@ -21,6 +21,7 @@ public class LuceneSearchExecutor
         private readonly ISearchContext _searchContext;
         private readonly Query _luceneQuery;
         private readonly ISet<string> _fieldsToLoad;
+        private int? _maxDoc;
 
         internal LuceneSearchExecutor(QueryOptions options, Query query, IEnumerable<SortField> sortField, ISearchContext searchContext, ISet<string> fieldsToLoad)
         {
@@ -70,7 +71,11 @@ public ISearchResults Execute()
 
             using (var searcher = _searchContext.GetSearcher())
             {
-                var maxResults = Math.Min((_options.Skip + 1) * _options.Take, QueryOptions.AbsoluteMaxResults);
+                var maxSkipTakeDataSetSize = _luceneQueryOptions?.AutoCalculateSkipTakeMaxResults ?? false
+                    ? GetMaxDoc()
+                    : _luceneQueryOptions?.SkipTakeMaxResults ?? QueryOptions.AbsoluteMaxResults;
+
+                var maxResults = Math.Min((_options.Skip + 1) * _options.Take, maxSkipTakeDataSetSize);
                 maxResults = maxResults >= 1 ? maxResults : QueryOptions.DefaultMaxResults;
                 int numHits = maxResults;
 
@@ -146,6 +151,20 @@ public ISearchResults Execute()
             }
         }
 
+        /// <summary>
+        /// Used to calculate the total number of documents in the index.
+        /// </summary>
+        private int GetMaxDoc()
+        {
+            if (_maxDoc == null)
+            {
+                using var searcher = _searchContext.GetSearcher();
+                _maxDoc = searcher.IndexSearcher.IndexReader.MaxDoc;
+            }
+
+            return _maxDoc.Value;
+        }
+
         private static FieldDoc GetScoreDocAfter(LuceneQueryOptions luceneQueryOptions)
         {
             FieldDoc scoreDocAfter;
diff --git a/src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs b/src/Examine.Test/Examine.Lucene/Search/FluentApiTests.cs
@@ -2582,16 +2582,12 @@ public void Paging_With_Skip_Take()
                 indexer.IndexItems(valueSets);
 
                 var searcher = indexer.Searcher;
-
-                //Arrange
-
                 var sc = searcher.CreateQuery("content").Field("writerName", "administrator");
+
+                // Search with normal Skip/Take:
                 int pageIndex = 0;
                 int pageSize = 100;
                 int pagedCount = 0;
-
-                //Act
-
                 while (true)
                 {
                     var results = sc
@@ -2610,6 +2606,50 @@ public void Paging_With_Skip_Take()
                 // This will not proceed further than 100 paged count because the limit for paged data sets is 10K.
                 Assert.AreEqual(100, pagedCount);
 
+                // Search with increased max skiptake data set size:
+                pageIndex = 0;
+                pageSize = 100;
+                pagedCount = 0;
+                while (true)
+                {
+                    var results = sc
+                        .Execute(new LuceneQueryOptions(pageIndex * pageSize, pageSize, skipTakeMaxResults: 15000))
+                        .ToList();
+
+                    if (results.Count == 0)
+                    {
+                        break;
+                    }
+                    Assert.AreEqual(pageSize, results.Count);
+                    pageIndex++;
+                    pagedCount++;
+                }
+
+                // This will succeed because we've increased the limit of max skiptake dataset size.
+                Assert.AreEqual(150, pagedCount);
+
+                // Search with auto calculated maxdoc:
+                pageIndex = 0;
+                pageSize = 100;
+                pagedCount = 0;
+                while (true)
+                {
+                    var results = sc
+                        .Execute(new LuceneQueryOptions(pageIndex * pageSize, pageSize, autoCalculateSkipTakeMaxResults: true))
+                        .ToList();
+
+                    if (results.Count == 0)
+                    {
+                        break;
+                    }
+                    Assert.AreEqual(pageSize, results.Count);
+                    pageIndex++;
+                    pagedCount++;
+                }
+
+                // This will succeed because we've auto calculated the limit of max skiptake dataset size.
+                Assert.AreEqual(150, pagedCount);
+
                 // Now, page with SearchAfter:
                 pageIndex = 0;
                 pageSize = 100;