49
49
import org .apache .cassandra .db .ReadCommand ;
50
50
import org .apache .cassandra .db .SinglePartitionReadCommand ;
51
51
import org .apache .cassandra .db .filter .RowFilter ;
52
- import org .apache .cassandra .db .partitions .BasePartitionIterator ;
53
52
import org .apache .cassandra .db .partitions .ParallelCommandProcessor ;
54
- import org .apache .cassandra .db .partitions .PartitionIterator ;
55
53
import org .apache .cassandra .db .partitions .UnfilteredPartitionIterator ;
56
54
import org .apache .cassandra .db .rows .BaseRowIterator ;
57
55
import org .apache .cassandra .db .rows .Row ;
62
60
import org .apache .cassandra .index .sai .IndexContext ;
63
61
import org .apache .cassandra .index .sai .StorageAttachedIndex ;
64
62
import org .apache .cassandra .index .sai .utils .AbortedOperationException ;
65
- import org .apache .cassandra .index .sai .utils .InMemoryPartitionIterator ;
66
63
import org .apache .cassandra .index .sai .utils .InMemoryUnfilteredPartitionIterator ;
67
64
import org .apache .cassandra .index .sai .utils .PartitionInfo ;
68
65
import org .apache .cassandra .index .sai .utils .PrimaryKey ;
77
74
/**
78
75
* Processor applied to SAI based ORDER BY queries. This class could likely be refactored into either two filter
79
76
* methods depending on where the processing is happening or into two classes.
80
- *
81
- * This processor performs the following steps on a replica:
82
- * - collect LIMIT rows from partition iterator, making sure that all are valid.
83
- * - return rows in Primary Key order
84
- *
85
- * This processor performs the following steps on a coordinator:
86
- * - consume all rows from the provided partition iterator and sort them according to the specified order.
87
- * For vectors, that is similarit score and for all others, that is the ordering defined by their
88
- * {@link org.apache.cassandra.db.marshal.AbstractType}. If there are multiple vector indexes,
89
- * the final score is the sum of all vector index scores.
90
- * - remove rows with the lowest scores from PQ if PQ size exceeds limit
91
- * - return rows from PQ in primary key order to caller
77
+ * Ordering on the coordinator is delegated to CQL.
92
78
*/
93
79
public class TopKProcessor
94
80
{
@@ -126,8 +112,8 @@ public TopKProcessor(ReadCommand command)
126
112
/**
127
113
* Executor to use for parallel index reads.
128
114
* Defined by -Dcassandra.index_read.parallele=true/false, true by default.
129
- *
130
- * INDEX_READ uses 2 * cpus threads by default but can be overridden with -Dcassandra.index_read.parallel_thread_num=# value
115
+ * </p>
116
+ * INDEX_READ uses 2 * cpus threads by default but can be overridden with {@literal -Dcassandra.index_read.parallel_thread_num=< value>}
131
117
*
132
118
* @return stage to use, default INDEX_READ
133
119
*/
@@ -150,7 +136,7 @@ private static LocalAwareExecutorPlus getExecutor()
150
136
* Filter given partitions and keep the rows with highest scores. In case of {@link UnfilteredPartitionIterator},
151
137
* all tombstones will be kept. Caller must close the supplied iterator.
152
138
*/
153
- public < U extends Unfiltered , R extends BaseRowIterator < U >, P extends BasePartitionIterator < R >> BasePartitionIterator <?> filter (P partitions )
139
+ public UnfilteredPartitionIterator filter (UnfilteredPartitionIterator partitions )
154
140
{
155
141
// filterInternal consumes the partitions iterator and creates a new one. Use a try-with-resources block
156
142
// to ensure the original iterator is closed. We do not expect exceptions from filterInternal, but if they
@@ -162,12 +148,14 @@ public <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BasePartit
162
148
}
163
149
}
164
150
165
- private < U extends Unfiltered , R extends BaseRowIterator < U >, P extends BasePartitionIterator < R >> BasePartitionIterator <?> filterInternal (P partitions )
151
+ private UnfilteredPartitionIterator filterInternal (UnfilteredPartitionIterator partitions )
166
152
{
167
153
// priority queue ordered by score in descending order
168
154
Comparator <Triple <PartitionInfo , Row , ?>> comparator ;
169
155
if (queryVector != null )
156
+ {
170
157
comparator = Comparator .comparing ((Triple <PartitionInfo , Row , ?> t ) -> (Float ) t .getRight ()).reversed ();
158
+ }
171
159
else
172
160
{
173
161
comparator = Comparator .comparing (t -> (ByteBuffer ) t .getRight (), indexContext .getValidator ());
@@ -178,13 +166,15 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
178
166
// to store top-k results in primary key order
179
167
TreeMap <PartitionInfo , TreeSet <Unfiltered >> unfilteredByPartition = new TreeMap <>(Comparator .comparing (p -> p .key ));
180
168
181
- if (PARALLEL_EXECUTOR != ImmediateExecutor .INSTANCE && partitions instanceof ParallelCommandProcessor ) {
169
+ if (PARALLEL_EXECUTOR != ImmediateExecutor .INSTANCE && partitions instanceof ParallelCommandProcessor )
170
+ {
182
171
ParallelCommandProcessor pIter = (ParallelCommandProcessor ) partitions ;
183
172
List <Pair <PrimaryKey , SinglePartitionReadCommand >> commands = pIter .getUninitializedCommands ();
184
173
List <CompletableFuture <PartitionResults >> results = new ArrayList <>(commands .size ());
185
174
186
175
int count = commands .size ();
187
- for (Pair <PrimaryKey , SinglePartitionReadCommand > command : commands ) {
176
+ for (var command : commands )
177
+ {
188
178
CompletableFuture <PartitionResults > future = new CompletableFuture <>();
189
179
results .add (future );
190
180
@@ -204,7 +194,8 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
204
194
});
205
195
}
206
196
207
- for (CompletableFuture <PartitionResults > triplesFuture : results ) {
197
+ for (CompletableFuture <PartitionResults > triplesFuture : results )
198
+ {
208
199
PartitionResults pr ;
209
200
try
210
201
{
@@ -219,10 +210,12 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
219
210
if (pr == null )
220
211
continue ;
221
212
topK .addAll (pr .rows );
222
- for (Unfiltered uf : pr .tombstones )
213
+ for (var uf : pr .tombstones )
223
214
addUnfiltered (unfilteredByPartition , pr .partitionInfo , uf );
224
215
}
225
- } else if (partitions instanceof StorageAttachedIndexSearcher .ScoreOrderedResultRetriever ) {
216
+ }
217
+ else if (partitions instanceof StorageAttachedIndexSearcher .ScoreOrderedResultRetriever )
218
+ {
226
219
// FilteredPartitions does not implement ParallelizablePartitionIterator.
227
220
// Realistically, this won't benefit from parallelizm as these are coming from in-memory/memtable data.
228
221
int rowsMatched = 0 ;
@@ -235,19 +228,21 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
235
228
rowsMatched += processSingleRowPartition (unfilteredByPartition , partitionRowIterator );
236
229
}
237
230
}
238
- } else {
231
+ }
232
+ else
233
+ {
239
234
// FilteredPartitions does not implement ParallelizablePartitionIterator.
240
235
// Realistically, this won't benefit from parallelizm as these are coming from in-memory/memtable data.
241
236
while (partitions .hasNext ())
242
237
{
243
238
// have to close to move to the next partition, otherwise hasNext() fails
244
- try (R partitionRowIterator = partitions .next ())
239
+ try (var partitionRowIterator = partitions .next ())
245
240
{
246
241
if (queryVector != null )
247
242
{
248
243
PartitionResults pr = processPartition (partitionRowIterator );
249
244
topK .addAll (pr .rows );
250
- for (var uf : pr .tombstones )
245
+ for (var uf : pr .tombstones )
251
246
addUnfiltered (unfilteredByPartition , pr .partitionInfo , uf );
252
247
}
253
248
else
@@ -258,7 +253,6 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
258
253
topK .add (Triple .of (PartitionInfo .create (partitionRowIterator ), row , row .getCell (expression .column ()).buffer ()));
259
254
}
260
255
}
261
-
262
256
}
263
257
}
264
258
}
@@ -267,17 +261,17 @@ private <U extends Unfiltered, R extends BaseRowIterator<U>, P extends BaseParti
267
261
for (var triple : topK .getUnsortedShared ())
268
262
addUnfiltered (unfilteredByPartition , triple .getLeft (), triple .getMiddle ());
269
263
270
- if (partitions instanceof PartitionIterator )
271
- return new InMemoryPartitionIterator (command , unfilteredByPartition );
272
264
return new InMemoryUnfilteredPartitionIterator (command , unfilteredByPartition );
273
265
}
274
266
275
- private class PartitionResults {
267
+ private class PartitionResults
268
+ {
276
269
final PartitionInfo partitionInfo ;
277
270
final SortedSet <Unfiltered > tombstones = new TreeSet <>(command .metadata ().comparator );
278
271
final List <Triple <PartitionInfo , Row , Float >> rows = new ArrayList <>();
279
272
280
- PartitionResults (PartitionInfo partitionInfo ) {
273
+ PartitionResults (PartitionInfo partitionInfo )
274
+ {
281
275
this .partitionInfo = partitionInfo ;
282
276
}
283
277
@@ -286,15 +280,17 @@ void addTombstone(Unfiltered uf)
286
280
tombstones .add (uf );
287
281
}
288
282
289
- void addRow (Triple <PartitionInfo , Row , Float > triple ) {
283
+ void addRow (Triple <PartitionInfo , Row , Float > triple )
284
+ {
290
285
rows .add (triple );
291
286
}
292
287
}
293
288
294
289
/**
295
290
* Processes a single partition, calculating scores for rows and extracting tombstones.
296
291
*/
297
- private PartitionResults processPartition (BaseRowIterator <?> partitionRowIterator ) {
292
+ private PartitionResults processPartition (BaseRowIterator <?> partitionRowIterator )
293
+ {
298
294
// Compute key and static row score once per partition
299
295
DecoratedKey key = partitionRowIterator .partitionKey ();
300
296
Row staticRow = partitionRowIterator .staticRow ();
@@ -325,7 +321,8 @@ private PartitionResults processPartition(BaseRowIterator<?> partitionRowIterato
325
321
* Processes a single partition, without scoring it.
326
322
*/
327
323
private int processSingleRowPartition (TreeMap <PartitionInfo , TreeSet <Unfiltered >> unfilteredByPartition ,
328
- BaseRowIterator <?> partitionRowIterator ) {
324
+ BaseRowIterator <?> partitionRowIterator )
325
+ {
329
326
if (!partitionRowIterator .hasNext ())
330
327
return 0 ;
331
328
0 commit comments