|
41 | 41 | import java.util.Collections;
|
42 | 42 | import java.util.List;
|
43 | 43 | import java.util.function.BiFunction;
|
| 44 | +import java.util.function.Function; |
44 | 45 | import java.util.stream.Collectors;
|
45 | 46 | import java.util.stream.Stream;
|
46 | 47 |
|
@@ -206,6 +207,70 @@ public void bug162() throws CsvReaderException {
|
206 | 207 | invokeTest(defaultCsvBuilder().parsers(Parsers.DEFAULT).build(), input, expected);
|
207 | 208 | }
|
208 | 209 |
|
| 210 | + /** |
| 211 | + * Reported in <a href="https://github.com/deephaven/deephaven-csv/issues/190">Deephaven CSV Issue #190</a>. That |
| 212 | + * issue report misidentifies the root cause as having to do with reserved keywords. This is not correct because the |
| 213 | + * library doesn't care whether a column header is a reserved keyword. The actual root cause is an interaction |
| 214 | + * between the user-supplied "legalizer" and user-specified parsers or null literals that are specified by column |
| 215 | + * names. Specifically the question is whether column names mentioned in {@link CsvSpecs.Builder#putParserForName} |
| 216 | + * and {@link CsvSpecs.Builder#putNullValueLiteralsForName} should refer to the name that the column had *before* it |
| 217 | + * was transformed by the legalizer, or *after*. The expected behavior is "before", but prior to this fix the |
| 218 | + * library was doing the "after" behavior. This is a parameterized test that invokes the behavior for {delimited, |
| 219 | + * fixed columns} x {without and with a legalizer}. |
| 220 | + */ |
| 221 | + @ParameterizedTest |
| 222 | + @CsvSource({"false,false", "false,true", "true,false", "true,true"}) |
| 223 | + public void bug190(boolean hasFixedWidthColumns, boolean invokeLegalizer) throws CsvReaderException { |
| 224 | + // +++ is the null value literal for Col1 |
| 225 | + // *** is the null value literal for Col2 |
| 226 | + // ??? is the null value literal for Col3 |
| 227 | + |
| 228 | + final String input; |
| 229 | + |
| 230 | + if (!hasFixedWidthColumns) { |
| 231 | + input = "Col1,Col2,Col3\n" + |
| 232 | + "+++,20,30\n" + |
| 233 | + "100,***,300\n" + |
| 234 | + "1000,2000,???\n"; |
| 235 | + } else { |
| 236 | + input = "Col1 Col2 Col3\n" + |
| 237 | + "+++ 20 30\n" + |
| 238 | + "100 *** 300\n" + |
| 239 | + "1000 2000 ???\n"; |
| 240 | + } |
| 241 | + |
| 242 | + final String[] expectedColumnNames = !invokeLegalizer ? new String[] {"Col1", "Col2", "Col3"} |
| 243 | + : new String[] {"xyzCol1", "xyzCol2", "xyzCol3"}; |
| 244 | + |
| 245 | + final ColumnSet expected = |
| 246 | + ColumnSet.of( |
| 247 | + Column.ofValues(expectedColumnNames[0], Sentinels.NULL_LONG, (long) 100, (long) 1000), |
| 248 | + Column.ofValues(expectedColumnNames[1], (double) 20, Sentinels.NULL_DOUBLE, (double) 2000), |
| 249 | + Column.ofRefs(expectedColumnNames[2], "30", "300", null)); |
| 250 | + |
| 251 | + Function<String[], String[]> legalizer = in -> { |
| 252 | + for (int i = 0; i != in.length; ++i) { |
| 253 | + // e.g. transform Col1 to xyzCol1 |
| 254 | + in[i] = "xyz" + in[i]; |
| 255 | + } |
| 256 | + return in; |
| 257 | + }; |
| 258 | + |
| 259 | + CsvSpecs.Builder specsBase = |
| 260 | + defaultCsvBuilder().hasFixedWidthColumns(hasFixedWidthColumns).parsers(Parsers.DEFAULT) |
| 261 | + .putParserForName("Col1", Parsers.LONG).putParserForName("Col2", Parsers.DOUBLE) |
| 262 | + .putParserForName("Col3", Parsers.STRING) |
| 263 | + .putNullValueLiteralsForName("Col1", Collections.singletonList("+++")) |
| 264 | + .putNullValueLiteralsForName("Col2", Collections.singletonList("***")) |
| 265 | + .putNullValueLiteralsForName("Col3", Collections.singletonList("???")); |
| 266 | + |
| 267 | + if (invokeLegalizer) { |
| 268 | + specsBase = specsBase.headerLegalizer(legalizer); |
| 269 | + } |
| 270 | + |
| 271 | + invokeTest(specsBase.build(), input, expected); |
| 272 | + } |
| 273 | + |
209 | 274 | @Test
|
210 | 275 | public void validates() {
|
211 | 276 | final String lengthyMessage = "CsvSpecs failed validation for the following reasons: "
|
|
0 commit comments