Skip to content

Commit 28cbf74

Browse files
committed
[LANG-1770] StringUtils.abbreviate is not emoji aware, breaks surrogate
pairs WIP test
1 parent 6c55c1c commit 28cbf74

File tree

1 file changed

+61
-49
lines changed

1 file changed

+61
-49
lines changed

Diff for: src/test/java/org/apache/commons/lang3/StringUtilsAbbreviateTest.java

+61-49
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
package org.apache.commons.lang3;
1919

2020
import static org.junit.jupiter.api.Assertions.assertEquals;
21+
import static org.junit.jupiter.api.Assertions.assertNotNull;
2122
import static org.junit.jupiter.api.Assertions.assertNull;
2223
import static org.junit.jupiter.api.Assertions.assertThrows;
2324
import static org.junit.jupiter.api.Assertions.assertTrue;
@@ -29,13 +30,60 @@
2930
*/
3031
public class StringUtilsAbbreviateTest {
3132

33+
/**
34+
* Tests <a href="LANG-1770">https://issues.apache.org/jira/projects/LANG/issues/LANG-1770</a>.
35+
*/
36+
@Test
37+
public void testEmoji() {
38+
// @formatter:off
39+
final String[] expectedResultsFox = {
40+
"🦊...", // 4
41+
"🦊🦊...",
42+
"🦊🦊🦊...",
43+
"🦊🦊🦊🦊...",
44+
"🦊🦊🦊🦊🦊...",
45+
"🦊🦊🦊🦊🦊🦊...",
46+
"🦊🦊🦊🦊🦊🦊🦊...", // 10
47+
};
48+
final String[] expectedResultsFamilyWithCodepoints = {
49+
"👩...",
50+
"👩🏻...",
51+
"👩🏻‍...", // zero width joiner
52+
"👩🏻‍👨...",
53+
"👩🏻‍👨🏻...",
54+
"👩🏻‍👨🏻‍...",
55+
"👩🏻‍👨🏻‍👦..."
56+
};
57+
final String[] expectedResultsFamilyWithGrapheme = {
58+
"👩🏻‍👨🏻‍👦🏻‍👦🏻...", // 4
59+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼...",
60+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽...",
61+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽👩🏾‍👨🏾‍👦🏾‍👦🏾...",
62+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽👩🏾‍👨🏾‍👦🏾‍👦🏾👩🏿‍👨🏿‍👦🏿‍👦🏿...",
63+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽👩🏾‍👨🏾‍👦🏾‍👦🏾👩🏿‍👨🏿‍👦🏿‍👦🏿👩🏻‍👨🏻‍👦🏻‍👦🏻...",
64+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽👩🏾‍👨🏾‍👦🏾‍👦🏾👩🏿‍👨🏿‍👦🏿‍👦🏿👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼..." // 10
65+
};
66+
// @formatter:on
67+
for (int i = 4; i <= 10; i++) {
68+
final String abbreviateResult = StringUtils.abbreviate("🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊🦊", i);
69+
assertNotNull(abbreviateResult);
70+
// assertEquals(expectedResultsFox[i - 4], abbreviateResult);
71+
}
72+
for (int i = 4; i <= 10; i++) {
73+
final String abbreviateResult = StringUtils.abbreviate(
74+
"👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽👩🏾‍👨🏾‍👦🏾‍👦🏾👩🏿‍👨🏿‍👦🏿‍👦🏿👩🏻‍👨🏻‍👦🏻‍👦🏻👩🏼‍👨🏼‍👦🏼‍👦🏼👩🏽‍👨🏽‍👦🏽‍👦🏽👩🏾‍👨🏾‍👦🏾‍👦🏾👩🏿‍👨🏿‍👦🏿‍👦🏿",
75+
i);
76+
assertNotNull(abbreviateResult);
77+
// assertEquals(expectedResultsFamilyWithCodepoints[i - 4], abbreviateResult);
78+
}
79+
}
80+
3281
private void assertAbbreviateWithAbbrevMarkerAndOffset(final String expected, final String abbrevMarker, final int offset, final int maxWidth) {
3382
final String abcdefghijklmno = "abcdefghijklmno";
3483
final String message = "abbreviate(String,String,int,int) failed";
3584
final String actual = StringUtils.abbreviate(abcdefghijklmno, abbrevMarker, offset, maxWidth);
3685
if (offset >= 0 && offset < abcdefghijklmno.length()) {
37-
assertTrue(actual.indexOf((char) ('a' + offset)) != -1,
38-
message + " -- should contain offset character");
86+
assertTrue(actual.indexOf((char) ('a' + offset)) != -1, message + " -- should contain offset character");
3987
}
4088
assertTrue(actual.length() <= maxWidth, () -> message + " -- should not be greater than maxWidth");
4189
assertEquals(expected, actual, message);
@@ -46,8 +94,7 @@ private void assertAbbreviateWithOffset(final String expected, final int offset,
4694
final String message = "abbreviate(String,int,int) failed";
4795
final String actual = StringUtils.abbreviate(abcdefghijklmno, offset, maxWidth);
4896
if (offset >= 0 && offset < abcdefghijklmno.length()) {
49-
assertTrue(actual.indexOf((char) ('a' + offset)) != -1,
50-
message + " -- should contain offset character");
97+
assertTrue(actual.indexOf((char) ('a' + offset)) != -1, message + " -- should contain offset character");
5198
}
5299
assertTrue(actual.length() <= maxWidth, () -> message + " -- should not be greater than maxWidth");
53100
assertEquals(expected, actual, message);
@@ -59,7 +106,6 @@ public void testAbbreviate_StringInt() {
59106
assertEquals("", StringUtils.abbreviate("", 10));
60107
assertEquals("short", StringUtils.abbreviate("short", 10));
61108
assertEquals("Now is ...", StringUtils.abbreviate("Now is the time for all good men to come to the aid of their party.", 10));
62-
63109
final String raspberry = "raspberry peach";
64110
assertEquals("raspberry p...", StringUtils.abbreviate(raspberry, 14));
65111
assertEquals("raspberry peach", StringUtils.abbreviate("raspberry peach", 15));
@@ -69,31 +115,20 @@ public void testAbbreviate_StringInt() {
69115
assertEquals("abcdefg", StringUtils.abbreviate("abcdefg", 8));
70116
assertEquals("a...", StringUtils.abbreviate("abcdefg", 4));
71117
assertEquals("", StringUtils.abbreviate("", 4));
72-
73-
assertThrows(
74-
IllegalArgumentException.class,
75-
() -> StringUtils.abbreviate("abc", 3),
76-
"StringUtils.abbreviate expecting IllegalArgumentException");
118+
assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abc", 3), "StringUtils.abbreviate expecting IllegalArgumentException");
77119
}
78120

79121
@Test
80122
public void testAbbreviate_StringIntInt() {
81123
assertNull(StringUtils.abbreviate(null, 10, 12));
82124
assertEquals("", StringUtils.abbreviate("", 0, 10));
83125
assertEquals("", StringUtils.abbreviate("", 2, 10));
84-
85-
assertThrows(
86-
IllegalArgumentException.class,
87-
() -> StringUtils.abbreviate("abcdefghij", 0, 3),
126+
assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", 0, 3),
88127
"StringUtils.abbreviate expecting IllegalArgumentException");
89-
assertThrows(
90-
IllegalArgumentException.class,
91-
() -> StringUtils.abbreviate("abcdefghij", 5, 6),
128+
assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", 5, 6),
92129
"StringUtils.abbreviate expecting IllegalArgumentException");
93-
94130
final String raspberry = "raspberry peach";
95131
assertEquals("raspberry peach", StringUtils.abbreviate(raspberry, 11, 15));
96-
97132
assertNull(StringUtils.abbreviate(null, 7, 14));
98133
assertAbbreviateWithOffset("abcdefg...", -1, 10);
99134
assertAbbreviateWithOffset("abcdefg...", 0, 10);
@@ -124,7 +159,6 @@ public void testAbbreviate_StringStringInt() {
124159
assertEquals("", StringUtils.abbreviate("", "...", 2));
125160
assertEquals("wai**", StringUtils.abbreviate("waiheke", "**", 5));
126161
assertEquals("And af,,,,", StringUtils.abbreviate("And after a long time, he finally met his son.", ",,,,", 10));
127-
128162
final String raspberry = "raspberry peach";
129163
assertEquals("raspberry pe..", StringUtils.abbreviate(raspberry, "..", 14));
130164
assertEquals("raspberry peach", StringUtils.abbreviate("raspberry peach", "---*---", 15));
@@ -134,10 +168,7 @@ public void testAbbreviate_StringStringInt() {
134168
assertEquals("abcdefg", StringUtils.abbreviate("abcdefg", "_-", 8));
135169
assertEquals("abc.", StringUtils.abbreviate("abcdefg", ".", 4));
136170
assertEquals("", StringUtils.abbreviate("", 4));
137-
138-
assertThrows(
139-
IllegalArgumentException.class,
140-
() -> StringUtils.abbreviate("abcdefghij", "...", 3),
171+
assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", "...", 3),
141172
"StringUtils.abbreviate expecting IllegalArgumentException");
142173
}
143174

@@ -147,19 +178,12 @@ public void testAbbreviate_StringStringIntInt() {
147178
assertNull(StringUtils.abbreviate(null, "...", 10, 12));
148179
assertEquals("", StringUtils.abbreviate("", null, 0, 10));
149180
assertEquals("", StringUtils.abbreviate("", "...", 2, 10));
150-
151-
assertThrows(
152-
IllegalArgumentException.class,
153-
() -> StringUtils.abbreviate("abcdefghij", "::", 0, 2),
181+
assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", "::", 0, 2),
154182
"StringUtils.abbreviate expecting IllegalArgumentException");
155-
assertThrows(
156-
IllegalArgumentException.class,
157-
() -> StringUtils.abbreviate("abcdefghij", "!!!", 5, 6),
183+
assertThrows(IllegalArgumentException.class, () -> StringUtils.abbreviate("abcdefghij", "!!!", 5, 6),
158184
"StringUtils.abbreviate expecting IllegalArgumentException");
159-
160185
final String raspberry = "raspberry peach";
161186
assertEquals("raspberry peach", StringUtils.abbreviate(raspberry, "--", 12, 15));
162-
163187
assertNull(StringUtils.abbreviate(null, ";", 7, 14));
164188
assertAbbreviateWithAbbrevMarkerAndOffset("abcdefgh;;", ";;", -1, 10);
165189
assertAbbreviateWithAbbrevMarkerAndOffset("abcdefghi.", ".", 0, 10);
@@ -183,7 +207,7 @@ public void testAbbreviate_StringStringIntInt() {
183207
assertAbbreviateWithAbbrevMarkerAndOffset("+ghijklmno", "+", Integer.MAX_VALUE, 10);
184208
}
185209

186-
//Fixed LANG-1463
210+
// Fixed LANG-1463
187211
@Test
188212
public void testAbbreviateMarkerWithEmptyString() {
189213
final String greaterThanMaxTest = "much too long text";
@@ -198,34 +222,22 @@ public void testAbbreviateMiddle() {
198222
assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 0));
199223
assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 3));
200224
assertEquals("ab.f", StringUtils.abbreviateMiddle("abcdef", ".", 4));
201-
202225
// JIRA issue (LANG-405) example (slightly different than actual expected result)
203-
assertEquals(
204-
"A very long text with un...f the text is complete.",
205-
StringUtils.abbreviateMiddle(
206-
"A very long text with unimportant stuff in the middle but interesting start and " +
207-
"end to see if the text is complete.", "...", 50));
208-
226+
assertEquals("A very long text with un...f the text is complete.", StringUtils.abbreviateMiddle(
227+
"A very long text with unimportant stuff in the middle but interesting start and " + "end to see if the text is complete.", "...", 50));
209228
// Test a much longer text :)
210229
final String longText = "Start text" + StringUtils.repeat("x", 10000) + "Close text";
211-
assertEquals(
212-
"Start text->Close text",
213-
StringUtils.abbreviateMiddle(longText, "->", 22));
214-
230+
assertEquals("Start text->Close text", StringUtils.abbreviateMiddle(longText, "->", 22));
215231
// Test negative length
216232
assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", -1));
217-
218233
// Test boundaries
219234
// Fails to change anything as method ensures first and last char are kept
220235
assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 1));
221236
assertEquals("abc", StringUtils.abbreviateMiddle("abc", ".", 2));
222-
223237
// Test length of n=1
224238
assertEquals("a", StringUtils.abbreviateMiddle("a", ".", 1));
225-
226239
// Test smallest length that can lead to success
227240
assertEquals("a.d", StringUtils.abbreviateMiddle("abcd", ".", 3));
228-
229241
// More from LANG-405
230242
assertEquals("a..f", StringUtils.abbreviateMiddle("abcdef", "..", 4));
231243
assertEquals("ab.ef", StringUtils.abbreviateMiddle("abcdef", ".", 5));

0 commit comments

Comments
 (0)