Skip to content

Commit d0ece89

Browse files
authored
LUCENE-10315: Speed up DocIdsWriter by ForUtil (#797)
1 parent b80658a commit d0ece89

File tree

10 files changed

+267
-78
lines changed

10 files changed

+267
-78
lines changed

lucene/CHANGES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,8 @@ Optimizations
6262
* LUCENE-10503: Potential speedup for pure disjunctions whose clauses produce
6363
scores that are very close to each other. (Adrien Grand)
6464

65+
* LUCENE-10315: Use SIMD instructions to decode BKD doc IDs. (Guo Feng, Adrien Grand, Ignacio Vera)
66+
6567
Bug Fixes
6668
---------------------
6769
* LUCENE-10477: Highlighter: WeightedSpanTermExtractor.extractWeightedSpanTerms to Query#rewrite

lucene/backward-codecs/src/java/org/apache/lucene/backward_codecs/store/EndiannessReverserIndexInput.java

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,14 @@ public void readLongs(long[] dst, int offset, int length) throws IOException {
8080
}
8181
}
8282

83+
@Override
84+
public void readInts(int[] dst, int offset, int length) throws IOException {
85+
in.readInts(dst, offset, length);
86+
for (int i = 0; i < length; ++i) {
87+
dst[offset + i] = Integer.reverseBytes(dst[offset + i]);
88+
}
89+
}
90+
8391
@Override
8492
public void readFloats(float[] dst, int offset, int length) throws IOException {
8593
in.readFloats(dst, offset, length);

lucene/core/src/java/org/apache/lucene/store/ByteBufferGuard.java

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
import java.io.IOException;
2020
import java.nio.ByteBuffer;
2121
import java.nio.FloatBuffer;
22+
import java.nio.IntBuffer;
2223
import java.nio.LongBuffer;
2324
import java.util.concurrent.atomic.AtomicInteger;
2425

@@ -138,6 +139,11 @@ public void getLongs(LongBuffer receiver, long[] dst, int offset, int length) {
138139
receiver.get(dst, offset, length);
139140
}
140141

142+
public void getInts(IntBuffer receiver, int[] dst, int offset, int length) {
143+
ensureValid();
144+
receiver.get(dst, offset, length);
145+
}
146+
141147
public void getFloats(FloatBuffer receiver, float[] dst, int offset, int length) {
142148
ensureValid();
143149
receiver.get(dst, offset, length);

lucene/core/src/java/org/apache/lucene/store/ByteBufferIndexInput.java

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,7 @@
2222
import java.nio.ByteBuffer;
2323
import java.nio.ByteOrder;
2424
import java.nio.FloatBuffer;
25+
import java.nio.IntBuffer;
2526
import java.nio.LongBuffer;
2627

2728
/**
@@ -36,6 +37,7 @@
3637
public abstract class ByteBufferIndexInput extends IndexInput implements RandomAccessInput {
3738
private static final FloatBuffer EMPTY_FLOATBUFFER = FloatBuffer.allocate(0);
3839
private static final LongBuffer EMPTY_LONGBUFFER = LongBuffer.allocate(0);
40+
private static final IntBuffer EMPTY_INTBUFFER = IntBuffer.allocate(0);
3941

4042
protected final long length;
4143
protected final long chunkSizeMask;
@@ -46,6 +48,7 @@ public abstract class ByteBufferIndexInput extends IndexInput implements RandomA
4648
protected int curBufIndex = -1;
4749
protected ByteBuffer curBuf; // redundant for speed: buffers[curBufIndex]
4850
private LongBuffer[] curLongBufferViews;
51+
private IntBuffer[] curIntBufferViews;
4952
private FloatBuffer[] curFloatBufferViews;
5053

5154
protected boolean isClone = false;
@@ -83,6 +86,7 @@ protected void setCurBuf(ByteBuffer curBuf) {
8386
this.curBuf = curBuf;
8487
curLongBufferViews = null;
8588
curFloatBufferViews = null;
89+
curIntBufferViews = null;
8690
}
8791

8892
@Override
@@ -176,6 +180,37 @@ public void readLongs(long[] dst, int offset, int length) throws IOException {
176180
}
177181
}
178182

183+
@Override
184+
public void readInts(int[] dst, int offset, int length) throws IOException {
185+
// See notes about readLongs above
186+
if (curIntBufferViews == null) {
187+
curIntBufferViews = new IntBuffer[Integer.BYTES];
188+
for (int i = 0; i < Integer.BYTES; ++i) {
189+
if (i < curBuf.limit()) {
190+
curIntBufferViews[i] =
191+
curBuf.duplicate().position(i).order(ByteOrder.LITTLE_ENDIAN).asIntBuffer();
192+
} else {
193+
curIntBufferViews[i] = EMPTY_INTBUFFER;
194+
}
195+
}
196+
}
197+
try {
198+
final int position = curBuf.position();
199+
guard.getInts(
200+
curIntBufferViews[position & 0x03].position(position >>> 2), dst, offset, length);
201+
// if the above call succeeded, then we know the below sum cannot overflow
202+
curBuf.position(position + (length << 2));
203+
} catch (
204+
@SuppressWarnings("unused")
205+
BufferUnderflowException e) {
206+
super.readInts(dst, offset, length);
207+
} catch (
208+
@SuppressWarnings("unused")
209+
NullPointerException npe) {
210+
throw new AlreadyClosedException("Already closed: " + this);
211+
}
212+
}
213+
179214
@Override
180215
public final void readFloats(float[] floats, int offset, int len) throws IOException {
181216
// See notes about readLongs above
@@ -503,6 +538,7 @@ private void unsetBuffers() {
503538
curBuf = null;
504539
curBufIndex = 0;
505540
curLongBufferViews = null;
541+
curIntBufferViews = null;
506542
}
507543

508544
/** Optimization of ByteBufferIndexInput for when there is only one buffer */

lucene/core/src/java/org/apache/lucene/store/DataInput.java

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,20 @@ public void readLongs(long[] dst, int offset, int length) throws IOException {
169169
}
170170
}
171171

172+
/**
173+
* Reads a specified number of ints into an array at the specified offset.
174+
*
175+
* @param dst the array to read bytes into
176+
* @param offset the offset in the array to start storing ints
177+
* @param length the number of ints to read
178+
*/
179+
public void readInts(int[] dst, int offset, int length) throws IOException {
180+
Objects.checkFromIndexSize(offset, length, dst.length);
181+
for (int i = 0; i < length; ++i) {
182+
dst[offset + i] = readInt();
183+
}
184+
}
185+
172186
/**
173187
* Reads a specified number of floats into an array at the specified offset.
174188
*

lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -217,6 +217,7 @@ private static class BKDPointTree implements PointTree {
217217
scratchMaxIndexPackedValue;
218218
private final int[] commonPrefixLengths;
219219
private final BKDReaderDocIDSetIterator scratchIterator;
220+
private final DocIdsWriter docIdsWriter;
220221
// if true the tree is balanced, otherwise unbalanced
221222
private final boolean isTreeBalanced;
222223

@@ -303,6 +304,7 @@ private BKDPointTree(
303304
this.scratchDataPackedValue = scratchDataPackedValue;
304305
this.scratchMinIndexPackedValue = scratchMinIndexPackedValue;
305306
this.scratchMaxIndexPackedValue = scratchMaxIndexPackedValue;
307+
this.docIdsWriter = scratchIterator.docIdsWriter;
306308
}
307309

308310
@Override
@@ -570,7 +572,7 @@ public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws I
570572
// How many points are stored in this leaf cell:
571573
int count = leafNodes.readVInt();
572574
// No need to call grow(), it has been called up-front
573-
DocIdsWriter.readInts(leafNodes, count, visitor);
575+
docIdsWriter.readInts(leafNodes, count, visitor);
574576
} else {
575577
pushLeft();
576578
addAll(visitor, grown);
@@ -633,7 +635,7 @@ private int readDocIDs(IndexInput in, long blockFP, BKDReaderDocIDSetIterator it
633635
// How many points are stored in this leaf cell:
634636
int count = in.readVInt();
635637

636-
DocIdsWriter.readInts(in, count, iterator.docIDs);
638+
docIdsWriter.readInts(in, count, iterator.docIDs);
637639

638640
return count;
639641
}
@@ -1002,9 +1004,11 @@ private static class BKDReaderDocIDSetIterator extends DocIdSetIterator {
10021004
private int offset;
10031005
private int docID;
10041006
final int[] docIDs;
1007+
private final DocIdsWriter docIdsWriter;
10051008

10061009
public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) {
10071010
this.docIDs = new int[maxPointsInLeafNode];
1011+
this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode);
10081012
}
10091013

10101014
@Override

lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -130,6 +130,7 @@ public class BKDWriter implements Closeable {
130130
private final long totalPointCount;
131131

132132
private final int maxDoc;
133+
private final DocIdsWriter docIdsWriter;
133134

134135
public BKDWriter(
135136
int maxDoc,
@@ -165,7 +166,7 @@ public BKDWriter(
165166

166167
// Maximum number of points we hold in memory at any time
167168
maxPointsSortInHeap = (int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc));
168-
169+
docIdsWriter = new DocIdsWriter(config.maxPointsInLeafNode);
169170
// Finally, we must be able to hold at least the leaf node in heap during build:
170171
if (maxPointsSortInHeap < config.maxPointsInLeafNode) {
171172
throw new IllegalArgumentException(
@@ -1288,7 +1289,7 @@ private void writeLeafBlockDocs(DataOutput out, int[] docIDs, int start, int cou
12881289
throws IOException {
12891290
assert count > 0 : "config.maxPointsInLeafNode=" + config.maxPointsInLeafNode;
12901291
out.writeVInt(count);
1291-
DocIdsWriter.writeDocIds(docIDs, start, count, out);
1292+
docIdsWriter.writeDocIds(docIDs, start, count, out);
12921293
}
12931294

12941295
private void writeLeafBlockPackedValues(

0 commit comments

Comments
 (0)