Skip to content

Commit e0e3f08

Browse files
author
IBue
committed
[StringUtils::indexOfAnyBut] simplify input-sequence iteration
by transforming ListIterator loop into index-based loop, advancing by Character.charCount(codepoint); enabling short-circuit processing, avoiding full in-advance processing of input-sequence
1 parent 0e372a6 commit e0e3f08

File tree

1 file changed

+5
-9
lines changed

1 file changed

+5
-9
lines changed

src/main/java/org/apache/commons/lang3/StringUtils.java

+5-9
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@
2424
import java.util.Arrays;
2525
import java.util.Iterator;
2626
import java.util.List;
27-
import java.util.ListIterator;
2827
import java.util.Locale;
2928
import java.util.Objects;
3029
import java.util.Set;
@@ -2875,18 +2874,15 @@ public static int indexOfAnyBut(final CharSequence seq, final CharSequence searc
28752874
if (isEmpty(seq) || isEmpty(searchChars)) {
28762875
return INDEX_NOT_FOUND;
28772876
}
2878-
final Set<Integer> searchSetCodePoints = searchChars.codePoints().boxed()
2879-
.collect(Collectors.toSet()); // JDK >=10: Collectors::toUnmodifiableSet
2880-
for (final ListIterator<Integer> seqListIt = seq.chars().boxed().collect(Collectors.toList()) // JDK >=16: Stream::toList, JDK >=10: Collectors::toUnmodifiableList
2881-
.listIterator(); seqListIt.hasNext(); seqListIt.next()) {
2882-
final int curSeqCharIdx = seqListIt.nextIndex();
2877+
final Set<Integer> searchSetCodePoints = searchChars.codePoints()
2878+
.boxed().collect(Collectors.toSet()); // JDK >=10: Collectors::toUnmodifiableSet
2879+
// advance character index from one interpreted codepoint to the next
2880+
for (int curSeqCharIdx = 0; curSeqCharIdx < seq.length();) {
28832881
final int curSeqCodePoint = Character.codePointAt(seq, curSeqCharIdx);
28842882
if (!searchSetCodePoints.contains(curSeqCodePoint)) {
28852883
return curSeqCharIdx;
28862884
}
2887-
if (Character.isSupplementaryCodePoint(curSeqCodePoint)) {
2888-
seqListIt.next(); // skip subsequent low-surrogate in next loop, since it merged into curSeqCodePoint
2889-
}
2885+
curSeqCharIdx += Character.charCount(curSeqCodePoint); // skip indices to paired low-surrogates
28902886
}
28912887
return INDEX_NOT_FOUND;
28922888
}

0 commit comments

Comments
 (0)