Skip to content

Commit a2c6d3a

Browse files
gf2121expani
andauthored
Introduce new encoding of BPV 21 for DocIdsWriter used in BKD Tree (#14361)
Co-Authored-by: expani <[email protected]>
1 parent 9792d88 commit a2c6d3a

File tree

4 files changed

+93
-7
lines changed

4 files changed

+93
-7
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -180,6 +180,8 @@ Optimizations
180180

181181
# GITHUB#14203: Decode doc ids in BKD leaves with auto-vectorized loops when using DEFAULT_MAX_POINTS_IN_LEAF_NODE. (Guo Feng)
182182

183+
# GITHUB#14361: Introduce new encoding of BPV 21 for DocIdsWriter used in BKD Tree. (Aniketh Jain, Guo Feng)
184+
183185
Bug Fixes
184186
---------------------
185187

lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ public final class Lucene90PointsFormat extends PointsFormat {
6767
private static final Map<Integer, Integer> VERSION_TO_BKD_VERSION =
6868
Map.of(
6969
VERSION_START, BKDWriter.VERSION_META_FILE,
70-
VERSION_BKD_VECTORIZED_BPV24, BKDWriter.VERSION_VECTORIZED_DOCID);
70+
VERSION_BKD_VECTORIZED_BPV24, BKDWriter.VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21);
7171

7272
private final int version;
7373

lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -85,8 +85,8 @@ public class BKDWriter implements Closeable {
8585
public static final int VERSION_SELECTIVE_INDEXING = 6;
8686
public static final int VERSION_LOW_CARDINALITY_LEAVES = 7;
8787
public static final int VERSION_META_FILE = 9;
88-
public static final int VERSION_VECTORIZED_DOCID = 10;
89-
public static final int VERSION_CURRENT = VERSION_VECTORIZED_DOCID;
88+
public static final int VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21 = 10;
89+
public static final int VERSION_CURRENT = VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21;
9090

9191
/** Number of splits before we compute the exact bounding box of an inner node. */
9292
private static final int SPLITS_BEFORE_EXACT_BOUNDS = 4;

lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java

Lines changed: 88 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ final class DocIdsWriter {
3333
private static final byte CONTINUOUS_IDS = (byte) -2;
3434
private static final byte BITSET_IDS = (byte) -1;
3535
private static final byte DELTA_BPV_16 = (byte) 16;
36+
private static final byte BPV_21 = (byte) 21;
3637
private static final byte BPV_24 = (byte) 24;
3738
private static final byte BPV_32 = (byte) 32;
3839
// These signs are legacy, should no longer be used in the writing side.
@@ -115,9 +116,33 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx
115116
out.writeShort((short) scratch[count - 1]);
116117
}
117118
} else {
118-
if (max <= 0xFFFFFF) {
119+
if (max <= 0x1FFFFF && version >= BKDWriter.VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21) {
120+
out.writeByte(BPV_21);
121+
final int oneThird = floorToMultipleOf16(count / 3);
122+
final int numInts = oneThird * 2;
123+
for (int i = 0; i < numInts; i++) {
124+
scratch[i] = docIds[i + start] << 11;
125+
}
126+
for (int i = 0; i < oneThird; i++) {
127+
final int longIdx = i + numInts + start;
128+
scratch[i] |= docIds[longIdx] & 0x7FF;
129+
scratch[i + oneThird] |= (docIds[longIdx] >>> 11) & 0x7FF;
130+
}
131+
for (int i = 0; i < numInts; i++) {
132+
out.writeInt(scratch[i]);
133+
}
134+
int i = oneThird * 3;
135+
for (; i < count - 2; i += 3) {
136+
out.writeLong(
137+
((long) docIds[i]) | (((long) docIds[i + 1]) << 21) | (((long) docIds[i + 2]) << 42));
138+
}
139+
for (; i < count; ++i) {
140+
out.writeShort((short) docIds[start + i]);
141+
out.writeByte((byte) (docIds[start + i] >>> 16));
142+
}
143+
} else if (max <= 0xFFFFFF) {
119144
out.writeByte(BPV_24);
120-
if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
145+
if (version < BKDWriter.VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21) {
121146
writeScalarInts24(docIds, start, count, out);
122147
} else {
123148
// encode the docs in the format that can be vectorized decoded.
@@ -224,8 +249,11 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
224249
case DELTA_BPV_16:
225250
readDelta16(in, count, docIDs);
226251
break;
252+
case BPV_21:
253+
readInts21(in, count, docIDs);
254+
break;
227255
case BPV_24:
228-
if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
256+
if (version < BKDWriter.VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21) {
229257
readScalarInts24(in, count, docIDs);
230258
} else {
231259
readInts24(in, count, docIDs);
@@ -306,13 +334,58 @@ private static void decode16(int[] docIDs, int half, int min) {
306334
}
307335
}
308336

337+
private static int floorToMultipleOf16(int n) {
338+
assert n >= 0;
339+
return n & 0xFFFFFFF0;
340+
}
341+
342+
private void readInts21(IndexInput in, int count, int[] docIDs) throws IOException {
343+
int oneThird = floorToMultipleOf16(count / 3);
344+
int numInts = oneThird << 1;
345+
in.readInts(scratch, 0, numInts);
346+
if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
347+
// Same format, but enabling the JVM to specialize the decoding logic for the default number
348+
// of points per node proved to help on benchmarks
349+
decode21(
350+
docIDs,
351+
scratch,
352+
floorToMultipleOf16(BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 3),
353+
floorToMultipleOf16(BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE / 3) * 2);
354+
} else {
355+
decode21(docIDs, scratch, oneThird, numInts);
356+
}
357+
int i = oneThird * 3;
358+
for (; i < count - 2; i += 3) {
359+
long l = in.readLong();
360+
docIDs[i] = (int) (l & 0x1FFFFFL);
361+
docIDs[i + 1] = (int) ((l >>> 21) & 0x1FFFFFL);
362+
docIDs[i + 2] = (int) (l >>> 42);
363+
}
364+
for (; i < count; ++i) {
365+
docIDs[i] = (in.readShort() & 0xFFFF) | (in.readByte() & 0xFF) << 16;
366+
}
367+
}
368+
369+
private static void decode21(int[] docIds, int[] scratch, int oneThird, int numInts) {
370+
for (int i = 0; i < numInts; ++i) {
371+
docIds[i] = scratch[i] >>> 11;
372+
}
373+
for (int i = 0; i < oneThird; i++) {
374+
docIds[i + numInts] = (scratch[i] & 0x7FF) | ((scratch[i + oneThird] & 0x7FF) << 11);
375+
}
376+
}
377+
309378
private void readInts24(IndexInput in, int count, int[] docIDs) throws IOException {
310379
int quarter = count >> 2;
311380
int numInts = quarter * 3;
312381
in.readInts(scratch, 0, numInts);
313382
if (count == BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE) {
314383
// Same format, but enabling the JVM to specialize the decoding logic for the default number
315384
// of points per node proved to help on benchmarks
385+
assert floorToMultipleOf16(quarter) == quarter
386+
: "We are relying on the fact that quarter of BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE"
387+
+ " is a multiple of 16 to vectorize the decoding loop,"
388+
+ " please check performance issue if you want to break this assumption.";
316389
decode24(
317390
docIDs,
318391
scratch,
@@ -380,8 +453,11 @@ void readInts(IndexInput in, int count, IntersectVisitor visitor, int[] buffer)
380453
case DELTA_BPV_16:
381454
readDelta16(in, count, visitor);
382455
break;
456+
case BPV_21:
457+
readInts21(in, count, visitor, buffer);
458+
break;
383459
case BPV_24:
384-
if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
460+
if (version < BKDWriter.VERSION_VECTORIZE_BPV24_AND_INTRODUCE_BPV21) {
385461
readScalarInts24(in, count, visitor);
386462
} else {
387463
readInts24(in, count, visitor, buffer);
@@ -425,6 +501,14 @@ private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) thr
425501
visitor.visit(scratchIntsRef);
426502
}
427503

504+
private void readInts21(IndexInput in, int count, IntersectVisitor visitor, int[] buffer)
505+
throws IOException {
506+
readInts21(in, count, buffer);
507+
scratchIntsRef.ints = buffer;
508+
scratchIntsRef.length = count;
509+
visitor.visit(scratchIntsRef);
510+
}
511+
428512
private void readInts24(IndexInput in, int count, IntersectVisitor visitor, int[] buffer)
429513
throws IOException {
430514
readInts24(in, count, buffer);

0 commit comments

Comments
 (0)