
org.jberet.support.io.ExcelStreamingItemReader Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 2014 Red Hat, Inc. and/or its affiliates.
*
* This program and the accompanying materials are made
* available under the terms of the Eclipse Public License 2.0
* which is available at https://www.eclipse.org/legal/epl-2.0/
*
* SPDX-License-Identifier: EPL-2.0
*/
package org.jberet.support.io;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamReader;
import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.xssf.eventusermodel.XSSFReader;
import org.apache.poi.xssf.model.SharedStringsTable;
import org.apache.poi.xssf.usermodel.XSSFRichTextString;
import jakarta.batch.api.chunk.ItemReader;
import jakarta.enterprise.context.Dependent;
import jakarta.inject.Named;
/**
* An implementation of {@code jakarta.batch.api.chunk.ItemReader} for reading OOXML Excel files, based on Apache POI
* XSSF streaming reader API, and StAX.
*
* @see ExcelStreamingItemWriter
* @see ExcelUserModelItemReader
* @see ExcelItemReaderWriterBase
* @since 1.1.0
*/
@Named
@Dependent
public class ExcelStreamingItemReader extends ExcelUserModelItemReader implements ItemReader {
private static final String schemaRelationships = "http://schemas.openxmlformats.org/officeDocument/2006/relationships";
/**
* If set to true, the target sheet uses R1C1-style cell coordinates; if set to false, the target sheet uses the
* more common A1-style cell coordinates; defaults to null (not set).
*/
protected Boolean r1c1;
protected InputStream sheetInputStream;
protected XMLStreamReader sheetStreamReader;
protected SharedStringsTable sharedStringsTable;
@Override
public Object readItem() throws Exception {
if (currentRowNum == this.end) {
return null;
}
Map resultMap;
while (sheetStreamReader.hasNext()) {
final int event = sheetStreamReader.next();
if (event == XMLStreamConstants.START_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) {
currentRowNum = Integer.parseInt(sheetStreamReader.getAttributeValue(null, "r")) - 1;
resultMap = new HashMap();
while (sheetStreamReader.hasNext()) {
final int event1 = sheetStreamReader.next();
if (event1 == XMLStreamConstants.START_ELEMENT && "c".equals(sheetStreamReader.getLocalName())) {
final String columnLabel = getColumnLabel(sheetStreamReader.getAttributeValue(null, "r"));
if (headerMapping == null) {
initHeaderMapping();
}
final String key = headerMapping.get(columnLabel);
resultMap.put(key, getCellStringValue());
} else if (event1 == XMLStreamConstants.END_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) {
if (beanType == Map.class) {
return resultMap;
}
if (beanType == List.class) {
//blank cells have no trace in sheet xml file, so need to match any cell to its column letter
//and add null for blank cell to avoid accidental shift
final List resultList = new ArrayList();
for (final String h : header) {
resultList.add(resultMap.get(h));
}
return resultList;
}
initJsonFactoryAndObjectMapper();
final Object readValue = objectMapper.convertValue(resultMap, beanType);
if (!skipBeanValidation) {
ItemReaderWriterBase.validate(readValue);
}
return readValue;
}
}
}
}
return null;
}
@Override
public void close() throws Exception {
super.close();
if (sheetStreamReader != null) {
try {
sheetStreamReader.close();
} catch (final Exception e) {
//ignore
}
sheetStreamReader = null;
}
if (sheetInputStream != null) {
try {
sheetInputStream.close();
} catch (final Exception e) {
//ignore
}
sheetInputStream = null;
}
}
@Override
protected void initWorkbookAndSheet(final int startRowNumber) throws Exception {
InputStream workbookDataInputStream = null;
XMLStreamReader workbookStreamReader = null;
try {
final OPCPackage opcPackage = OPCPackage.open(inputStream);
final XSSFReader xssfReader = new XSSFReader(opcPackage);
workbookDataInputStream = xssfReader.getWorkbookData();
final XMLInputFactory xmlInputFactory = XMLInputFactory.newInstance();
workbookStreamReader = xmlInputFactory.createXMLStreamReader(workbookDataInputStream);
sharedStringsTable = xssfReader.getSharedStringsTable();
/*
sample sheet element:
*/
while (workbookStreamReader.hasNext()) {
if (workbookStreamReader.next() == XMLStreamConstants.START_ELEMENT && "sheet".equals(workbookStreamReader.getLocalName())) {
final String shn = workbookStreamReader.getAttributeValue(null, "name");
final String shId = workbookStreamReader.getAttributeValue(null, "sheetId");
if ((sheetName != null && sheetName.equals(shn)) ||
(sheetName == null && String.valueOf(this.sheetIndex + 1).equals(shId))) {
//this is the target sheet
final String relationshipId = workbookStreamReader.getAttributeValue(schemaRelationships, "id");
sheetInputStream = xssfReader.getSheet(relationshipId);
sheetStreamReader = xmlInputFactory.createXMLStreamReader(sheetInputStream);
break;
}
}
}
} finally {
if (workbookDataInputStream != null) {
try {
workbookDataInputStream.close();
} catch (final Exception e) {
//ignore
}
}
if (workbookStreamReader != null) {
try {
workbookStreamReader.close();
} catch (final Exception e) {
//ignore
}
}
}
/*
sample row element:
0
1
2
3
For inlineStr:
Date
Note: a blank cell does not show up in xml at all. So for list type beanType, need to detect blank cell and add
null; for map or custom beanType, need to link to the correct header column by r attribute.
*/
if (header == null) {
headerMapping = new HashMap();
outerLoop:
while (sheetStreamReader.hasNext()) {
if (sheetStreamReader.next() == XMLStreamConstants.START_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) {
final int rowNum = Integer.parseInt(sheetStreamReader.getAttributeValue(null, "r"));
if (headerRow + 1 == rowNum) {
// got the header row, next loop through header row cells
final List headerVals = new ArrayList();
while (sheetStreamReader.hasNext()) {
final int event = sheetStreamReader.next();
if (event == XMLStreamConstants.START_ELEMENT && "c".equals(sheetStreamReader.getLocalName())) {
final String label = getColumnLabel(sheetStreamReader.getAttributeValue(null, "r"));
final String value = getCellStringValue();
headerVals.add(value);
headerMapping.put(label, value);
} else if (event == XMLStreamConstants.END_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) {
header = headerVals.toArray(new String[headerVals.size()]);
currentRowNum = rowNum - 1;
break outerLoop;
}
}
}
}
}
}
//fast forward to the start row, which may not immediately follow header row
while (currentRowNum < startRowNumber - 1 && sheetStreamReader.hasNext()) {
if (sheetStreamReader.next() == XMLStreamConstants.START_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) {
currentRowNum = Integer.parseInt(sheetStreamReader.getAttributeValue(null, "r")) - 1;
} else if (sheetStreamReader.next() == XMLStreamConstants.END_ELEMENT && "row".equals(sheetStreamReader.getLocalName())) {
if (currentRowNum >= startRowNumber - 1) {
break;
}
}
}
}
private String getCellStringValue() throws Exception {
String result = null;
final String cellType = sheetStreamReader.getAttributeValue(null, "t");
while (sheetStreamReader.hasNext()) {
final int event = sheetStreamReader.next();
if (event == XMLStreamConstants.START_ELEMENT && "v".equals(sheetStreamReader.getLocalName())) {
result = sheetStreamReader.getElementText();
if ("s".equals(cellType)) {
final int idx = Integer.parseInt(result);
result = new XSSFRichTextString(sharedStringsTable.getEntryAt(idx)).toString();
}
} else if (event == XMLStreamConstants.START_ELEMENT && "t".equals(sheetStreamReader.getLocalName())) {
result = sheetStreamReader.getElementText();
} else if (event == XMLStreamConstants.END_ELEMENT && "c".equals(sheetStreamReader.getLocalName())) {
break;
}
}
return result;
}
// A1 -> A, B1 -> B
//R1C1 -> 1, R2C3 -> 3
private String getColumnLabel(final String cellPoint) {
// a R1C1 notation: R1234C567
if (r1c1 == null) {
final char[] chars = cellPoint.toCharArray();
r1c1 = chars[0] == 'R' && Character.isDigit(chars[1]);
return getColumnLabel(cellPoint);
} else if (r1c1) {
return cellPoint.substring(cellPoint.indexOf('C') + 1);
} else {
for (int i = 0; i < cellPoint.length(); ++i) {
if (Character.isDigit(cellPoint.charAt(i))) {
return cellPoint.substring(0, i);
}
}
return cellPoint;
}
}
private void initHeaderMapping() {
if (r1c1) {
for (int index = 0; index < header.length; ++index) {
headerMapping.put(String.valueOf(index + 1), header[index]);
}
} else {
for (int index = 0; index < header.length; ++index) {
headerMapping.put(getColumnLabelByPosition(index), header[index]);
}
}
}
//utility borrowed from
//http://stackoverflow.com/questions/837155/fastest-function-to-generate-excel-column-letters-in-c-sharp
private static String getColumnLabelByPosition(final long index) {
final char[] ret = new char[64];
for (int i = 0; i < ret.length; ++i) {
final int digit = ret.length - i - 1;
final long test = index - powerDown(i + 1);
if (test < 0)
break;
ret[digit] = toChar(test / (long) (Math.pow(26, i)));
}
return new String(ret);
}
private static char toChar(final long num) {
return (char) ((num % 26) + 65);
}
private static long powerDown(int limit) {
long acc = 0;
while (limit > 1) {
acc += Math.pow(26, limit-- - 1);
}
return acc;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy