Skip to content

Commit

Permalink
Merge pull request #143 from spring-projects/more-flexible-configuration
Browse files Browse the repository at this point in the history
Customization of Apache POI helpers
  • Loading branch information
mdeinum authored Sep 18, 2024
2 parents 41cbb93 + 3955606 commit 6335a69
Show file tree
Hide file tree
Showing 16 changed files with 201 additions and 229 deletions.
13 changes: 8 additions & 5 deletions spring-batch-excel/README.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,9 @@ The `PoiItemReader` has the most features but is also the most memory intensive

To reduce the memory footprint the `StreamingXlsxItemReader` can be used, this will only keep the current row in memory and discard it afterward. Not everything is supported while streaming the XLSX file. It can be that formulas don't get evaluated or lead to an error.

NOTE: The `ItemReader` classess are **not threadsafe**. The API from https://poi.apache.org/help/faq.html#20[Apache POI] itself isn't threadsafe as well as the https://docs.spring.io/spring-batch/docs/current/api/org/springframework/batch/item/support/AbstractItemCountingItemStreamItemReader.html[`AbstractItemCountingItemStreamItemReader`] used as a base class for the `ItemReader` classes. Reading from multiple threads is therefore not supported. Using a multi-threaded processor/writer should work as long as you use a single thread for reading.
WARNING: The `ItemReader` classess are **not threadsafe**. The API from https://poi.apache.org/help/faq.html#20[Apache POI] itself isn't threadsafe as well as the https://docs.spring.io/spring-batch/docs/current/api/org/springframework/batch/item/support/AbstractItemCountingItemStreamItemReader.html[`AbstractItemCountingItemStreamItemReader`] used as a base class for the `ItemReader` classes. Reading from multiple threads is therefore not supported. Using a multi-threaded processor/writer should work as long as you use a single thread for reading.

*Compatibility:* Spring Batch Excel is compatible with Spring Batch 4.3 and 5.0/5.1.
*Compatibility:* Spring Batch Excel is compatible with Spring Batch 5.x.

== Configuration of `PoiItemReader`

Expand All @@ -34,10 +34,10 @@ Configuration of can be done in XML or Java Config.
----
@Bean
@StepScope
public PoiItemReader excelReader() {
public PoiItemReader excelReader(RowMapper rowMapper) {
PoiItemReader reader = new PoiItemReader();
reader.setResource(new FileSystemResource("/path/to/your/excel/file"));
reader.setRowMapper(rowMapper());
reader.setRowMapper(rowMapper);
return reader;
}
Expand Down Expand Up @@ -82,7 +82,6 @@ public RowMapper rowMapper() {
}
----


== Configuration properties
[cols="1,1,1,4"]
.Properties for item readers
Expand All @@ -99,8 +98,12 @@ public RowMapper rowMapper() {
| `strict` | no | `true` | This controls wether or not an exception is thrown if the file doesn't exists or isn't readable, by default an exception will be thrown.
| `datesAsIso` | no | `false` | Controls if dates need to be parsed as ISO or to use the format as specified in the excel sheet.
| `userLocale` | no | `null` | Set the `java.util.Locale` to use when formatting dates when there is no explicit format set in the Excel document.
| `dataFormatterCustomizer` | no | `DataFormatterCustomizer.DEFAULT` | To additionally configure the https://poi.apache.org/apidocs/dev/org/apache/poi/ss/usermodel/DataFormatter.html[`DataFormatter`] in use to format the data. The default will set the `useCachedValuesForFormulaCells` property to `true` to use cached values instead of evaluating the formulas.
| `formulaEvaluatorFactory` | no | `FormulaEvaluatorFactory.NOOP` | A factory approach to create a `FormulaEvaluator` used by Apache POI to evaluate the formulas in the, the default implementation will return `null` as the default is to use the cached values.
|===

== ColumnNameExtractors

- `StaticColumnNameExtractor` uses a preset list of column names.
- `RowNumberColumnNameExtractor` (**the default**) reads a given row (default 0) to determine the column names of the current sheet

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,10 @@ public abstract class AbstractExcelItemReader<T> extends AbstractItemCountingIte

private DataFormatter dataFormatter;

private DataFormatterCustomizer dataFormatterCustomizer = DataFormatterCustomizer.DEFAULT;

private FormulaEvaluatorFactory formulaEvaluatorFactory = FormulaEvaluatorFactory.NOOP;

public AbstractExcelItemReader() {
super();
this.setName(ClassUtils.getShortName(this.getClass()));
Expand Down Expand Up @@ -222,12 +226,15 @@ public void setResource(final Resource resource) {

public void afterPropertiesSet() {
Assert.notNull(this.rowMapper, "RowMapper must be set");
Assert.notNull(this.dataFormatterCustomizer, "DataFormatterCustomizer must be set");
Assert.notNull(this.formulaEvaluatorFactory, "FormulaEvaluatorFactory must be set");
if (this.datesAsIso) {
this.dataFormatter = (this.userLocale != null) ? new IsoFormattingDateDataFormatter(this.userLocale) : new IsoFormattingDateDataFormatter();
}
else {
this.dataFormatter = (this.userLocale != null) ? new DataFormatter(this.userLocale) : new DataFormatter();
}
this.dataFormatterCustomizer.customize(this.dataFormatter);
}

protected DataFormatter getDataFormatter() {
Expand Down Expand Up @@ -330,4 +337,29 @@ public void setDatesAsIso(boolean datesAsIso) {
public void setUserLocale(Locale userLocale) {
this.userLocale = userLocale;
}

/**
* The {@code DataFormatterCustomizer} to use to configure the {@code DataFormatter} used to format/read data.
* The default used is the {@code DataFormatterCustomizer.DEFAULT} which will disable formula evaluating and return
* the cached value for a cell.
* @param dataFormatterCustomizer the {@code DataFormatterCustomizer} never {@code null}.
*/
public void setDataFormatterCustomizer(DataFormatterCustomizer dataFormatterCustomizer) {
this.dataFormatterCustomizer = dataFormatterCustomizer;
}

/**
* The {@code FormulaEvaluatorFactory} to use when a {@code FormulaEvaluator} is needed. The default used will
* return {@code null} as the evaluator to use, this as by default the {@code DataFormatter} is configured to use
* the cached value anyway.
* @param formulaEvaluatorFactory the {@code FormulaEvaluatorFactory} to use, never {@code null}
* @see FormulaEvaluatorFactory
*/
public void setFormulaEvaluatorFactory(FormulaEvaluatorFactory formulaEvaluatorFactory) {
this.formulaEvaluatorFactory = formulaEvaluatorFactory;
}

protected FormulaEvaluatorFactory getFormulaEvaluatorFactory() {
return this.formulaEvaluatorFactory;
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
/*
* Copyright 2011-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.batch.extensions.excel;

import org.apache.poi.ss.usermodel.DataFormatter;

/**
* Callback for customizing a given {@code DataFormatter}. Designed for use with a lambda expression or method reference.
* @author Marten Deinum
* @since 0.2.0
*
*/
@FunctionalInterface
public interface DataFormatterCustomizer {

/** Noop {@code DataFormatterCustomizer}. **/
DataFormatterCustomizer NOOP = (df) -> { };

/** The default {@code DataFormatterCustomizer}, setting the use of cached values. **/
DataFormatterCustomizer DEFAULT = (df) -> df.setUseCachedValuesForFormulaCells(true);

void customize(DataFormatter dataFormatter);


}


Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
/*
* Copyright 2011-2024 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package org.springframework.batch.extensions.excel;

import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Workbook;

import org.springframework.lang.Nullable;

/**
* Factory interface for creating a {@code FormulaEvaluator}, the default will return {@code null} as to re-use the
* cached formula result in the workbooks.
*
* @author Marten Deinum
* @since 0.2.0
*
*/
@FunctionalInterface
public interface FormulaEvaluatorFactory {

/** Return {@code null} for the {@code FormulaEvaluator}, used by default. **/
FormulaEvaluatorFactory NOOP = (wb) -> null;

/** Delegate the creation of the {@code FormulaEvaluator} to the workbook. **/
FormulaEvaluatorFactory SIMPLE = (wb) -> wb.getCreationHelper().createFormulaEvaluator();

/**
* Create the {@code FormulaEvaluator} for the given {@code Workbook}.
* @param workbook the workbook
* @return the {@code FormulaEvaluator}, can be {@code null}.
*/
@Nullable
FormulaEvaluator create(Workbook workbook);
}
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ public class PoiItemReader<T> extends AbstractExcelItemReader<T> {

@Override
protected Sheet getSheet(final int sheet) {
return new PoiSheet(this.workbook.getSheetAt(sheet), getDataFormatter());
return new PoiSheet(this.workbook.getSheetAt(sheet), getDataFormatter(), getFormulaEvaluatorFactory());
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;

import org.springframework.batch.extensions.excel.FormulaEvaluatorFactory;
import org.springframework.batch.extensions.excel.Sheet;
import org.springframework.lang.Nullable;

Expand All @@ -41,20 +42,23 @@ class PoiSheet implements Sheet {
private final org.apache.poi.ss.usermodel.Sheet delegate;
private final int numberOfRows;
private final String name;
private final FormulaEvaluatorFactory formulaEvaluatorFactory;

private FormulaEvaluator evaluator;

/**
* Constructor which takes the delegate sheet.
* @param delegate the apache POI sheet
* @param dataFormatter the {@code DataFormatter} to use.
* @param formulaEvaluatorFactory the {@code FormulaEvaluatorFactory} to use.
*/
PoiSheet(final org.apache.poi.ss.usermodel.Sheet delegate, DataFormatter dataFormatter) {
PoiSheet(final org.apache.poi.ss.usermodel.Sheet delegate, DataFormatter dataFormatter, FormulaEvaluatorFactory formulaEvaluatorFactory) {
super();
this.delegate = delegate;
this.numberOfRows = this.delegate.getLastRowNum() + 1;
this.name = this.delegate.getSheetName();
this.dataFormatter = dataFormatter;
this.formulaEvaluatorFactory = formulaEvaluatorFactory;
}

/**
Expand Down Expand Up @@ -108,17 +112,18 @@ private String[] map(Row row) {
* Lazy getter for the {@code FormulaEvaluator}. Takes some time to create an
* instance, so if not necessary don't create it.
* @return the {@code FormulaEvaluator}
* @see FormulaEvaluatorFactory
*/
private FormulaEvaluator getFormulaEvaluator() {
if (this.evaluator == null) {
this.evaluator = this.delegate.getWorkbook().getCreationHelper().createFormulaEvaluator();
this.evaluator = this.formulaEvaluatorFactory.create(this.delegate.getWorkbook());
}
return this.evaluator;
}

@Override
public Iterator<String[]> iterator() {
return new Iterator<String[]>() {
return new Iterator<>() {
private final Iterator<Row> delegateIter = PoiSheet.this.delegate.iterator();

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.io.InputStream;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;

Expand All @@ -38,6 +37,7 @@
import org.xml.sax.Attributes;

import org.springframework.batch.extensions.excel.Sheet;
import org.springframework.util.CollectionUtils;
import org.springframework.util.StringUtils;
import org.springframework.util.xml.StaxUtils;

Expand Down Expand Up @@ -161,7 +161,7 @@ public void close() throws Exception {

@Override
public Iterator<String[]> iterator() {
return new Iterator<String[]>() {
return new Iterator<>() {

private String[] currentRow;

Expand Down Expand Up @@ -233,9 +233,10 @@ String[] getValues() {
*/
private static final class AttributesAdapter implements Attributes {

private final Map<String, String> attributes = new HashMap<>();
private final Map<String, String> attributes;

private AttributesAdapter(XMLStreamReader delegate) {
this.attributes = CollectionUtils.newHashMap(delegate.getAttributeCount());
for (int i = 0; i < delegate.getAttributeCount(); i++) {
String name = delegate.getAttributeLocalName(i);
String value = delegate.getAttributeValue(i);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,10 @@ private void initSheets(XSSFReader reader, OPCPackage pkg) throws IOException, I

@Override
protected void doClose() throws Exception {
this.pkg.revert();

if (this.pkg != null) {
this.pkg.revert();
}

for (StreamingSheet sheet : this.sheets) {
sheet.close();
Expand Down
Loading

0 comments on commit 6335a69

Please sign in to comment.