@@ -33,6 +33,7 @@ final class DocIdsWriter {
33
33
private static final byte CONTINUOUS_IDS = (byte ) -2 ;
34
34
private static final byte BITSET_IDS = (byte ) -1 ;
35
35
private static final byte DELTA_BPV_16 = (byte ) 16 ;
36
+ private static final byte BPV_21 = (byte ) 21 ;
36
37
private static final byte BPV_24 = (byte ) 24 ;
37
38
private static final byte BPV_32 = (byte ) 32 ;
38
39
// These signs are legacy, should no longer be used in the writing side.
@@ -115,9 +116,33 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx
115
116
out .writeShort ((short ) scratch [count - 1 ]);
116
117
}
117
118
} else {
118
- if (max <= 0xFFFFFF ) {
119
+ if (max <= 0x1FFFFF && version >= BKDWriter .VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21 ) {
120
+ out .writeByte (BPV_21 );
121
+ final int oneThird = floorToMultipleOf16 (count / 3 );
122
+ final int numInts = oneThird * 2 ;
123
+ for (int i = 0 ; i < numInts ; i ++) {
124
+ scratch [i ] = docIds [i + start ] << 11 ;
125
+ }
126
+ for (int i = 0 ; i < oneThird ; i ++) {
127
+ final int longIdx = i + numInts + start ;
128
+ scratch [i ] |= docIds [longIdx ] & 0x7FF ;
129
+ scratch [i + oneThird ] |= (docIds [longIdx ] >>> 11 ) & 0x7FF ;
130
+ }
131
+ for (int i = 0 ; i < numInts ; i ++) {
132
+ out .writeInt (scratch [i ]);
133
+ }
134
+ int i = oneThird * 3 ;
135
+ for (; i < count - 2 ; i += 3 ) {
136
+ out .writeLong (
137
+ ((long ) docIds [i ]) | (((long ) docIds [i + 1 ]) << 21 ) | (((long ) docIds [i + 2 ]) << 42 ));
138
+ }
139
+ for (; i < count ; ++i ) {
140
+ out .writeShort ((short ) docIds [start + i ]);
141
+ out .writeByte ((byte ) (docIds [start + i ] >>> 16 ));
142
+ }
143
+ } else if (max <= 0xFFFFFF ) {
119
144
out .writeByte (BPV_24 );
120
- if (version < BKDWriter .VERSION_VECTORIZED_DOCID ) {
145
+ if (version < BKDWriter .VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21 ) {
121
146
writeScalarInts24 (docIds , start , count , out );
122
147
} else {
123
148
// encode the docs in the format that can be vectorized decoded.
@@ -224,8 +249,11 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
224
249
case DELTA_BPV_16 :
225
250
readDelta16 (in , count , docIDs );
226
251
break ;
252
+ case BPV_21 :
253
+ readInts21 (in , count , docIDs );
254
+ break ;
227
255
case BPV_24 :
228
- if (version < BKDWriter .VERSION_VECTORIZED_DOCID ) {
256
+ if (version < BKDWriter .VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21 ) {
229
257
readScalarInts24 (in , count , docIDs );
230
258
} else {
231
259
readInts24 (in , count , docIDs );
@@ -306,13 +334,58 @@ private static void decode16(int[] docIDs, int half, int min) {
306
334
}
307
335
}
308
336
337
+ private static int floorToMultipleOf16 (int n ) {
338
+ assert n >= 0 ;
339
+ return n & 0xFFFFFFF0 ;
340
+ }
341
+
342
+ private void readInts21 (IndexInput in , int count , int [] docIDs ) throws IOException {
343
+ int oneThird = floorToMultipleOf16 (count / 3 );
344
+ int numInts = oneThird << 1 ;
345
+ in .readInts (scratch , 0 , numInts );
346
+ if (count == BKDConfig .DEFAULT_MAX_POINTS_IN_LEAF_NODE ) {
347
+ // Same format, but enabling the JVM to specialize the decoding logic for the default number
348
+ // of points per node proved to help on benchmarks
349
+ decode21 (
350
+ docIDs ,
351
+ scratch ,
352
+ floorToMultipleOf16 (BKDConfig .DEFAULT_MAX_POINTS_IN_LEAF_NODE / 3 ),
353
+ floorToMultipleOf16 (BKDConfig .DEFAULT_MAX_POINTS_IN_LEAF_NODE / 3 ) * 2 );
354
+ } else {
355
+ decode21 (docIDs , scratch , oneThird , numInts );
356
+ }
357
+ int i = oneThird * 3 ;
358
+ for (; i < count - 2 ; i += 3 ) {
359
+ long l = in .readLong ();
360
+ docIDs [i ] = (int ) (l & 0x1FFFFFL );
361
+ docIDs [i + 1 ] = (int ) ((l >>> 21 ) & 0x1FFFFFL );
362
+ docIDs [i + 2 ] = (int ) (l >>> 42 );
363
+ }
364
+ for (; i < count ; ++i ) {
365
+ docIDs [i ] = (in .readShort () & 0xFFFF ) | (in .readByte () & 0xFF ) << 16 ;
366
+ }
367
+ }
368
+
369
+ private static void decode21 (int [] docIds , int [] scratch , int oneThird , int numInts ) {
370
+ for (int i = 0 ; i < numInts ; ++i ) {
371
+ docIds [i ] = scratch [i ] >>> 11 ;
372
+ }
373
+ for (int i = 0 ; i < oneThird ; i ++) {
374
+ docIds [i + numInts ] = (scratch [i ] & 0x7FF ) | ((scratch [i + oneThird ] & 0x7FF ) << 11 );
375
+ }
376
+ }
377
+
309
378
private void readInts24 (IndexInput in , int count , int [] docIDs ) throws IOException {
310
379
int quarter = count >> 2 ;
311
380
int numInts = quarter * 3 ;
312
381
in .readInts (scratch , 0 , numInts );
313
382
if (count == BKDConfig .DEFAULT_MAX_POINTS_IN_LEAF_NODE ) {
314
383
// Same format, but enabling the JVM to specialize the decoding logic for the default number
315
384
// of points per node proved to help on benchmarks
385
+ assert floorToMultipleOf16 (quarter ) == quarter
386
+ : "We are relying on the fact that quarter of BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE"
387
+ + " is a multiple of 16 to vectorize the decoding loop,"
388
+ + " please check performance issue if you want to break this assumption." ;
316
389
decode24 (
317
390
docIDs ,
318
391
scratch ,
@@ -380,8 +453,11 @@ void readInts(IndexInput in, int count, IntersectVisitor visitor, int[] buffer)
380
453
case DELTA_BPV_16 :
381
454
readDelta16 (in , count , visitor );
382
455
break ;
456
+ case BPV_21 :
457
+ readInts21 (in , count , visitor , buffer );
458
+ break ;
383
459
case BPV_24 :
384
- if (version < BKDWriter .VERSION_VECTORIZED_DOCID ) {
460
+ if (version < BKDWriter .VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21 ) {
385
461
readScalarInts24 (in , count , visitor );
386
462
} else {
387
463
readInts24 (in , count , visitor , buffer );
@@ -425,6 +501,14 @@ private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) thr
425
501
visitor .visit (scratchIntsRef );
426
502
}
427
503
504
+ private void readInts21 (IndexInput in , int count , IntersectVisitor visitor , int [] buffer )
505
+ throws IOException {
506
+ readInts21 (in , count , buffer );
507
+ scratchIntsRef .ints = buffer ;
508
+ scratchIntsRef .length = count ;
509
+ visitor .visit (scratchIntsRef );
510
+ }
511
+
428
512
private void readInts24 (IndexInput in , int count , IntersectVisitor visitor , int [] buffer )
429
513
throws IOException {
430
514
readInts24 (in , count , buffer );
0 commit comments