datastax
diff --git a/‎src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
Lines changed: 1 addition & 3 deletions b/‎src/java/org/apache/cassandra/cql3/statements/SelectStatement.java
Lines changed: 1 addition & 3 deletions
diff --git a/‎src/java/org/apache/cassandra/db/ConsistencyLevel.java
Lines changed: 1 addition & 1 deletion b/‎src/java/org/apache/cassandra/db/ConsistencyLevel.java
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/java/org/apache/cassandra/db/ReadCommand.java
Lines changed: 1 addition & 12 deletions b/‎src/java/org/apache/cassandra/db/ReadCommand.java
Lines changed: 1 addition & 12 deletions
diff --git a/‎src/java/org/apache/cassandra/index/Index.java
Lines changed: 0 additions & 30 deletions b/‎src/java/org/apache/cassandra/index/Index.java
Lines changed: 0 additions & 30 deletions
diff --git a/‎src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexQueryPlan.java
Lines changed: 0 additions & 15 deletions b/‎src/java/org/apache/cassandra/index/sai/plan/StorageAttachedIndexQueryPlan.java
Lines changed: 0 additions & 15 deletions
diff --git a/‎src/java/org/apache/cassandra/index/sai/plan/TopKProcessor.java
Lines changed: 31 additions & 34 deletions b/‎src/java/org/apache/cassandra/index/sai/plan/TopKProcessor.java
Lines changed: 31 additions & 34 deletions
@@ -518,9 +518,7 @@ public ReadQuery getQuery(QueryOptions options,
                        String.format(TOPK_CONSISTENCY_LEVEL_ERROR, options.getConsistency()));
 
             // Consistency levels with more than one replica are downgraded to ONE/LOCAL_ONE.
-            if (options.getConsistency() != ConsistencyLevel.ONE &&
-                options.getConsistency() != ConsistencyLevel.LOCAL_ONE &&
-                options.getConsistency() != ConsistencyLevel.NODE_LOCAL)
+            if (options.getConsistency().needsReconciliation())
             {
                 ConsistencyLevel supplied = options.getConsistency();
                 ConsistencyLevel downgrade = supplied.isDatacenterLocal() ? ConsistencyLevel.LOCAL_ONE : ConsistencyLevel.ONE;
 
@@ -286,7 +286,7 @@ public void validateCounterForWrite(TableMetadata metadata, ClientState clientSt
     }
 
     /**
-     * With a replication factor greater than one, reads that contact more than one replica will require 
+     * With a replication factor greater than one, reads that contact more than one replica will require
      * reconciliation of the individual replica results at the coordinator.
      *
      * @return true if reads at this consistency level require merging at the coordinator
 
@@ -573,21 +573,10 @@ public ReadExecutionController executionController(boolean trackRepairedStatus)
         return ReadExecutionController.forCommand(this, trackRepairedStatus);
     }
 
-    /**
-     * Allow to post-process the result of the query after it has been reconciled on the coordinator
-     * but before it is passed to the CQL layer to return the ResultSet.
-     *
-     * See CASSANDRA-8717 for why this exists.
-     */
-    public PartitionIterator postReconciliationProcessing(PartitionIterator result)
-    {
-        return indexQueryPlan == null ? result : indexQueryPlan.postProcessor(this).apply(result);
-    }
-
     @Override
     public PartitionIterator executeInternal(ReadExecutionController controller)
     {
-        return postReconciliationProcessing(UnfilteredPartitionIterators.filter(executeLocally(controller), nowInSec()));
+        return UnfilteredPartitionIterators.filter(executeLocally(controller), nowInSec());
     }
 
     public ReadExecutionController executionController()
 
@@ -30,7 +30,6 @@
 import java.util.Optional;
 import java.util.Set;
 import java.util.concurrent.Callable;
-import java.util.function.Function;
 import java.util.function.Predicate;
 import java.util.function.Supplier;
 import javax.annotation.Nonnull;
@@ -51,7 +50,6 @@
 import org.apache.cassandra.db.lifecycle.LifecycleNewTracker;
 import org.apache.cassandra.db.marshal.AbstractType;
 import org.apache.cassandra.db.memtable.Memtable;
-import org.apache.cassandra.db.partitions.PartitionIterator;
 import org.apache.cassandra.db.partitions.PartitionUpdate;
 import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
 import org.apache.cassandra.db.rows.Row;
@@ -134,13 +132,6 @@
  * cannot support a given Expression. After filtering, the set of candidate indexes are ranked according to the result
  * of getEstimatedResultRows and the most selective (i.e. the one expected to return the smallest number of results) is
  * chosen. A Searcher instance is then obtained from the searcherFor method and used to perform the actual Index lookup.
- * Finally, Indexes can define a post processing step to be performed on the coordinator, after results (partitions from
- * the primary table) have been received from replicas and reconciled. This post processing is defined as a
- * {@code java.util.functions.BiFunction<PartitionIterator, RowFilter, PartitionIterator>}, that is a function which takes as
- * arguments a PartitionIterator (containing the reconciled result rows) and a RowFilter (from the ReadCommand being
- * executed) and returns another iterator of partitions, possibly having transformed the initial results in some way.
- * The post processing function is obtained from the Index's postProcessorFor method; the built-in indexes which ship
- * with Cassandra return a no-op function here.
  *
  * An optional static method may be provided to validate custom index options (two variants are supported):
  *
@@ -1098,27 +1089,6 @@ default void validate(ReadCommand command) throws InvalidRequestException
          */
         Searcher searcherFor(ReadCommand command);
 
-        /**
-         * Return a function which performs post processing on the results of a partition range read command.
-         * In future, this may be used as a generalized mechanism for transforming results on the coordinator prior
-         * to returning them to the caller.
-         *
-         * This is used on the coordinator during execution of a range command to perform post
-         * processing of merged results obtained from the necessary replicas. This is the only way in which results are
-         * transformed in this way but this may change over time as usage is generalized.
-         * See CASSANDRA-8717 for further discussion.
-         *
-         * The function takes a PartitionIterator of the results from the replicas which has already been collated
-         * and reconciled, along with the command being executed. It returns another PartitionIterator containing the results
-         * of the transformation (which may be the same as the input if the transformation is a no-op).
-         *
-         * @param command the read command being executed
-         */
-        default Function<PartitionIterator, PartitionIterator> postProcessor(ReadCommand command)
-        {
-            return partitions -> partitions;
-        }
-
         /**
          * Transform an initial {@link RowFilter} into the filter that will still need to applied to a set of Rows after
          * the index has performed it's initial scan.
 
@@ -20,7 +20,6 @@
 import java.util.HashSet;
 import java.util.Set;
 import java.util.concurrent.TimeUnit;
-import java.util.function.Function;
 import javax.annotation.Nullable;
 
 import com.google.common.collect.ImmutableSet;
@@ -30,7 +29,6 @@
 import org.apache.cassandra.db.DecoratedKey;
 import org.apache.cassandra.db.ReadCommand;
 import org.apache.cassandra.db.filter.RowFilter;
-import org.apache.cassandra.db.partitions.PartitionIterator;
 import org.apache.cassandra.db.rows.Row;
 import org.apache.cassandra.db.rows.Unfiltered;
 import org.apache.cassandra.index.Index;
@@ -209,19 +207,6 @@ public Index.Searcher searcherFor(ReadCommand command)
                                                 DatabaseDescriptor.getRangeRpcTimeout(TimeUnit.MILLISECONDS));
     }
 
-    /**
-     * Called on coordinator after merging replica responses before returning to client
-     */
-    @Override
-    public Function<PartitionIterator, PartitionIterator> postProcessor(ReadCommand command)
-    {
-        if (!isTopK())
-            return partitions -> partitions;
-
-        // in case of top-k query, filter out rows that are not actually global top-K
-        return partitions -> (PartitionIterator) new TopKProcessor(command).filter(partitions);
-    }
-
     /**
      * @return a filter with all the expressions that are user-defined
      */
 
@@ -48,9 +48,7 @@
 import org.apache.cassandra.db.Keyspace;
 import org.apache.cassandra.db.ReadCommand;
 import org.apache.cassandra.db.filter.RowFilter;
-import org.apache.cassandra.db.partitions.BasePartitionIterator;
 import org.apache.cassandra.db.partitions.ParallelCommandProcessor;
-import org.apache.cassandra.db.partitions.PartitionIterator;
 import org.apache.cassandra.db.partitions.UnfilteredPartitionIterator;
 import org.apache.cassandra.db.rows.BaseRowIterator;
 import org.apache.cassandra.db.rows.Row;
@@ -60,7 +58,6 @@
 import org.apache.cassandra.index.sai.IndexContext;
 import org.apache.cassandra.index.sai.StorageAttachedIndex;
 import org.apache.cassandra.index.sai.utils.AbortedOperationException;
-import org.apache.cassandra.index.sai.utils.InMemoryPartitionIterator;
 import org.apache.cassandra.index.sai.utils.InMemoryUnfilteredPartitionIterator;
 import org.apache.cassandra.index.sai.utils.PartitionInfo;
 import org.apache.cassandra.index.sai.utils.TypeUtil;
@@ -74,18 +71,7 @@
 /**
  * Processor applied to SAI based ORDER BY queries. This class could likely be refactored into either two filter
  * methods depending on where the processing is happening or into two classes.
- *
- * This processor performs the following steps on a replica:
- * - collect LIMIT rows from partition iterator, making sure that all are valid.
- * - return rows in Primary Key order
- *
- * This processor performs the following steps on a coordinator:
- * - consume all rows from the provided partition iterator and sort them according to the specified order.
- *   For vectors, that is similarit score and for all others, that is the ordering defined by their
- *   {@link org.apache.cassandra.db.marshal.AbstractType}. If there are multiple vector indexes,
- *   the final score is the sum of all vector index scores.
- * - remove rows with the lowest scores from PQ if PQ size exceeds limit
- * - return rows from PQ in primary key order to caller
+ * Ordering on the coordinator is delegated to CQL.
  */
 public class TopKProcessor
 {
@@ -123,8 +109,8 @@ public TopKProcessor(ReadCommand command)
     /**
      * Executor to use for parallel index reads.
      * Defined by -Dcassandra.index_read.parallele=true/false, true by default.
-     *
-     * INDEX_READ uses 2 * cpus threads by default but can be overridden with -Dcassandra.index_read.parallel_thread_num=#value
+     * </p>
+     * INDEX_READ uses 2 * cpus threads by default but can be overridden with {@literal -Dcassandra.index_read.parallel_thread_num=<value>}
      *
      * @return stage to use, default INDEX_READ
      */
@@ -147,7 +133,7 @@ private static LocalAwareExecutorPlus getExecutor()
      * Filter given partitions and keep the rows with highest scores. In case of {@link UnfilteredPartitionIterator},
      * all tombstones will be kept. Caller must close the supplied iterator.
      */
-    public <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BasePartitionIterator<R>> BasePartitionIterator<?> filter(P partitions)
+    public UnfilteredPartitionIterator filter(UnfilteredPartitionIterator partitions)
     {
         // filterInternal consumes the partitions iterator and creates a new one. Use a try-with-resources block
         // to ensure the original iterator is closed. We do not expect exceptions from filterInternal, but if they
@@ -159,12 +145,14 @@ public <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BasePartit
         }
     }
 
-    private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BasePartitionIterator<R>> BasePartitionIterator<?> filterInternal(P partitions)
+    private UnfilteredPartitionIterator filterInternal(UnfilteredPartitionIterator partitions)
     {
         // priority queue ordered by score in descending order
         Comparator<Triple<PartitionInfo, Row, ?>> comparator;
         if (queryVector != null)
+        {
             comparator = Comparator.comparing((Triple<PartitionInfo, Row, ?> t) -> (Float) t.getRight()).reversed();
+        }
         else
         {
             comparator = Comparator.comparing(t -> (ByteBuffer) t.getRight(), indexContext.getValidator());
@@ -175,13 +163,15 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
         // to store top-k results in primary key order
         TreeMap<PartitionInfo, TreeSet<Unfiltered>> unfilteredByPartition = new TreeMap<>(Comparator.comparing(p -> p.key));
 
-        if (PARALLEL_EXECUTOR != ImmediateExecutor.INSTANCE && partitions instanceof ParallelCommandProcessor) {
+        if (PARALLEL_EXECUTOR != ImmediateExecutor.INSTANCE && partitions instanceof ParallelCommandProcessor)
+        {
             ParallelCommandProcessor pIter = (ParallelCommandProcessor) partitions;
             var commands = pIter.getUninitializedCommands();
             List<CompletableFuture<PartitionResults>> results = new ArrayList<>(commands.size());
 
             int count = commands.size();
-            for (var command: commands) {
+            for (var command : commands)
+            {
                 CompletableFuture<PartitionResults> future = new CompletableFuture<>();
                 results.add(future);
 
@@ -201,7 +191,8 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
                 });
             }
 
-            for (CompletableFuture<PartitionResults> triplesFuture: results) {
+            for (CompletableFuture<PartitionResults> triplesFuture : results)
+            {
                 PartitionResults pr;
                 try
                 {
@@ -216,10 +207,12 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
                 if (pr == null)
                     continue;
                 topK.addAll(pr.rows);
-                for (var uf: pr.tombstones)
+                for (var uf : pr.tombstones)
                     addUnfiltered(unfilteredByPartition, pr.partitionInfo, uf);
             }
-        } else if (partitions instanceof StorageAttachedIndexSearcher.ScoreOrderedResultRetriever) {
+        }
+        else if (partitions instanceof StorageAttachedIndexSearcher.ScoreOrderedResultRetriever)
+        {
             // FilteredPartitions does not implement ParallelizablePartitionIterator.
             // Realistically, this won't benefit from parallelizm as these are coming from in-memory/memtable data.
             int rowsMatched = 0;
@@ -232,7 +225,9 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
                     rowsMatched += processSingleRowPartition(unfilteredByPartition, partitionRowIterator);
                 }
             }
-        } else {
+        }
+        else
+        {
             // FilteredPartitions does not implement ParallelizablePartitionIterator.
             // Realistically, this won't benefit from parallelizm as these are coming from in-memory/memtable data.
             while (partitions.hasNext())
@@ -244,7 +239,7 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
                     {
                         PartitionResults pr = processPartition(partitionRowIterator);
                         topK.addAll(pr.rows);
-                        for (var uf: pr.tombstones)
+                        for (var uf : pr.tombstones)
                             addUnfiltered(unfilteredByPartition, pr.partitionInfo, uf);
                     }
                     else
@@ -255,7 +250,6 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
                             topK.add(Triple.of(PartitionInfo.create(partitionRowIterator), row, row.getCell(expression.column()).buffer()));
                         }
                     }
-
                 }
             }
         }
@@ -264,17 +258,17 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
         for (var triple : topK.getUnsortedShared())
             addUnfiltered(unfilteredByPartition, triple.getLeft(), triple.getMiddle());
 
-        if (partitions instanceof PartitionIterator)
-            return new InMemoryPartitionIterator(command, unfilteredByPartition);
         return new InMemoryUnfilteredPartitionIterator(command, unfilteredByPartition);
     }
 
-    private class PartitionResults {
+    private class PartitionResults
+    {
         final PartitionInfo partitionInfo;
         final SortedSet<Unfiltered> tombstones = new TreeSet<>(command.metadata().comparator);
         final List<Triple<PartitionInfo, Row, Float>> rows = new ArrayList<>();
 
-        PartitionResults(PartitionInfo partitionInfo) {
+        PartitionResults(PartitionInfo partitionInfo)
+        {
             this.partitionInfo = partitionInfo;
         }
 
@@ -283,15 +277,17 @@ void addTombstone(Unfiltered uf)
             tombstones.add(uf);
         }
 
-        void addRow(Triple<PartitionInfo, Row, Float> triple) {
+        void addRow(Triple<PartitionInfo, Row, Float> triple)
+        {
             rows.add(triple);
         }
     }
 
     /**
      * Processes a single partition, calculating scores for rows and extracting tombstones.
      */
-    private PartitionResults processPartition(BaseRowIterator<?> partitionRowIterator) {
+    private PartitionResults processPartition(BaseRowIterator<?> partitionRowIterator)
+    {
         // Compute key and static row score once per partition
         DecoratedKey key = partitionRowIterator.partitionKey();
         Row staticRow = partitionRowIterator.staticRow();
@@ -322,7 +318,8 @@ private PartitionResults processPartition(BaseRowIterator<?> partitionRowIterato
      * Processes a single partition, without scoring it.
      */
     private int processSingleRowPartition(TreeMap<PartitionInfo, TreeSet<Unfiltered>> unfilteredByPartition,
-                                          BaseRowIterator<?> partitionRowIterator) {
+                                          BaseRowIterator<?> partitionRowIterator)
+    {
         if (!partitionRowIterator.hasNext())
             return 0;
Original file line number	Diff line number	Diff line change
`@@ -286,7 +286,7 @@ public void validateCounterForWrite(TableMetadata metadata, ClientState clientSt`
`286`	`286`	`}`
`287`	`287`
`288`	`288`	`/**`
`289`		`- * With a replication factor greater than one, reads that contact more than one replica will require`
	`289`	`+ * With a replication factor greater than one, reads that contact more than one replica will require`
`290`	`290`	`* reconciliation of the individual replica results at the coordinator.`
`291`	`291`	`*`
`292`	`292`	`* @return true if reads at this consistency level require merging at the coordinator`