24
24
public abstract class CsvSpecs {
25
25
public interface Builder {
26
26
/**
27
- * Copy all of the parameters from {@code specs} into {@code this} builder.
27
+ * Copy all the parameters from {@code specs} into {@code this} builder.
28
28
*/
29
29
Builder from (CsvSpecs specs );
30
30
@@ -117,6 +117,34 @@ public interface Builder {
117
117
*/
118
118
Builder headerValidator (Predicate <String > headerValidator );
119
119
120
+ /**
121
+ * True if the input is organized into fixed width columns rather than delimited by a delimiter.
122
+ */
123
+ Builder hasFixedWidthColumns (boolean hasFixedWidthColumns );
124
+
125
+ /**
126
+ * When {@link #hasFixedWidthColumns} is set, the library either determines the column widths from the header
127
+ * row (provided {@link #hasHeaderRow} is set), or the column widths can be specified explicitly by the caller.
128
+ * If the caller wants to specify them explicitly, they can use this method. It is an error to set this
129
+ * parameter if {@link #hasFixedWidthColumns} is false. Note that because the library is tolerant of the last
130
+ * cell being shorter or wider than expected, the value specified here for the width of the last column is
131
+ * simply a placeholder; its value is ignored.
132
+ */
133
+ Builder fixedColumnWidths (Iterable <Integer > fixedColumnWidths );
134
+
135
+ /**
136
+ * This setting controls what units fixed width columns are measured in. When true, fixed width columns are
137
+ * measured in Unicode code points. When false, fixed width columns are measured in UTF-16 units (aka Java
138
+ * chars). The difference arises when encountering characters outside the Unicode Basic Multilingual Plane. For
139
+ * example, the Unicode code point 💔 (U+1F494) is one Unicode code point, but takes two Java chars to
140
+ * represent. Along these lines, the string 💔💔💔 would fit in a column of width 3 when utf32CountingMode is
141
+ * true, but would require a column width of at least 6 when utf32CountingMode is false. The default setting of
142
+ * true is arguably more natural for users (the number of characters they see matches the visual width of the
143
+ * column). But some programs may want the value of false because they are counting Java chars. It is an error
144
+ * to set this parameter if {@link #hasFixedWidthColumns} is false.
145
+ */
146
+ Builder useUtf32CountingConvention (boolean useUtf32CountingConvention );
147
+
120
148
/**
121
149
* Number of data rows to skip before processing data. This is useful when you want to parse data in chunks.
122
150
* Typically used together with {@link Builder#numRows}. Defaults to 0.
@@ -160,7 +188,7 @@ public interface Builder {
160
188
161
189
/**
162
190
* The field delimiter character (the character that separates one column from the next). Must be 7-bit ASCII.
163
- * Defaults to {code ','}.
191
+ * Defaults to {code ','}. It is an error to set this parameter if {@link #hasFixedWidthColumns} is true.
164
192
*/
165
193
Builder delimiter (char delimiter );
166
194
@@ -179,6 +207,8 @@ public interface Builder {
179
207
* <li>hello, there
180
208
* <li>456
181
209
* </ul>
210
+ *
211
+ * It is an error to set this parameter if {@link #hasFixedWidthColumns} is true.
182
212
*/
183
213
Builder quote (char quote );
184
214
@@ -188,7 +218,8 @@ public interface Builder {
188
218
Builder ignoreSurroundingSpaces (boolean ignoreSurroundingSpaces );
189
219
190
220
/**
191
- * Whether to trim leading and trailing blanks from inside quoted values. Defaults to {@code false}.
221
+ * Whether to trim leading and trailing blanks from inside quoted values. Defaults to {@code false}. It is an
222
+ * error to set this parameter if {@link #hasFixedWidthColumns} is true.
192
223
*/
193
224
Builder trim (boolean trim );
194
225
@@ -224,6 +255,38 @@ void check() {
224
255
if (!hasHeaderRow () && skipHeaderRows () > 0 ) {
225
256
problems .add ("skipHeaderRows != 0 but hasHeaderRow is not set" );
226
257
}
258
+
259
+ for (final Integer colWidth : fixedColumnWidths ()) {
260
+ if (colWidth < 1 ) {
261
+ problems .add (String .format ("Fixed column width %d is invalid" , colWidth ));
262
+ }
263
+ }
264
+
265
+ // Certain items must not be set in fixed-width column mode. Other items must not be set in delimited column
266
+ // mode.
267
+ if (hasFixedWidthColumns ()) {
268
+ final String format = "Incompatible parameters: can't set %s when hasFixedWidthColumns is true" ;
269
+ if (quote () != defaultQuote ) {
270
+ problems .add (String .format (format , "quote" ));
271
+ }
272
+
273
+ if (delimiter () != defaultDelimiter ) {
274
+ problems .add (String .format (format , "delimiter" ));
275
+ }
276
+
277
+ if (trim () != defaultTrim ) {
278
+ problems .add (String .format (format , "trim" ));
279
+ }
280
+ } else {
281
+ final String format = "Incompatible parameters: can't set %s when hasFixedWidthColumns is false" ;
282
+ if (fixedColumnWidths ().size () != 0 ) {
283
+ problems .add (String .format (format , "fixedColumnWidths" ));
284
+ }
285
+
286
+ if (useUtf32CountingConvention () != defaultUtf32CountingConvention ) {
287
+ problems .add (String .format (format , "useUtf32CountingConvention" ));
288
+ }
289
+ }
227
290
if (problems .isEmpty ()) {
228
291
return ;
229
292
}
@@ -340,6 +403,32 @@ public Predicate<String> headerValidator() {
340
403
return c -> true ;
341
404
}
342
405
406
+ /**
407
+ * See {@link Builder#hasFixedWidthColumns}.
408
+ */
409
+ @ Default
410
+ public boolean hasFixedWidthColumns () {
411
+ return false ;
412
+ }
413
+
414
+ /**
415
+ * See {@link Builder#fixedColumnWidths}.
416
+ */
417
+ @ Default
418
+ public List <Integer > fixedColumnWidths () {
419
+ return Collections .emptyList ();
420
+ }
421
+
422
+ private static final boolean defaultUtf32CountingConvention = true ;
423
+
424
+ /**
425
+ * See {@link Builder#useUtf32CountingConvention}.
426
+ */
427
+ @ Default
428
+ public boolean useUtf32CountingConvention () {
429
+ return defaultUtf32CountingConvention ;
430
+ }
431
+
343
432
/**
344
433
* See {@link Builder#skipRows}.
345
434
*/
@@ -396,20 +485,25 @@ public long skipHeaderRows() {
396
485
return 0 ;
397
486
}
398
487
488
+ private final char defaultDelimiter = ',' ;
489
+
399
490
/**
400
491
* See {@link Builder#delimiter}.
401
492
*/
402
493
@ Default
403
494
public char delimiter () {
404
- return ',' ;
495
+ return defaultDelimiter ;
405
496
}
406
497
498
+
499
+ private static final char defaultQuote = '"' ;
500
+
407
501
/**
408
502
* See {@link Builder#quote}.
409
503
*/
410
504
@ Default
411
505
public char quote () {
412
- return '"' ;
506
+ return defaultQuote ;
413
507
}
414
508
415
509
/**
@@ -420,12 +514,14 @@ public boolean ignoreSurroundingSpaces() {
420
514
return true ;
421
515
}
422
516
517
+ private static boolean defaultTrim = false ;
518
+
423
519
/**
424
520
* See {@link Builder#trim}.
425
521
*/
426
522
@ Default
427
523
public boolean trim () {
428
- return false ;
524
+ return defaultTrim ;
429
525
}
430
526
431
527
/**
0 commit comments