ch.rabanti.nanoxlsx4j.lowLevel.WorksheetReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of nanoxlsx4j Show documentation
Show all versions of nanoxlsx4j Show documentation
NanoXLSX4j is a small Java library to create and read XLSX files (Microsoft Excel 2007 or newer) in an
easy and native way. The library is originated form PicoXLSX4j and has basic support of reading spreadsheets
/*
* NanoXLSX4j is a small Java library to write and read XLSX (Microsoft Excel 2007 or newer) files in an easy and native way
* Copyright Raphael Stoeckli © 2024
* This library is licensed under the MIT License.
* You find a copy of the license in project folder or on: http://opensource.org/licenses/MIT
*/
package ch.rabanti.nanoxlsx4j.lowLevel;
import ch.rabanti.nanoxlsx4j.Address;
import ch.rabanti.nanoxlsx4j.Cell;
import ch.rabanti.nanoxlsx4j.Column;
import ch.rabanti.nanoxlsx4j.Helper;
import ch.rabanti.nanoxlsx4j.ImportOptions;
import ch.rabanti.nanoxlsx4j.Range;
import ch.rabanti.nanoxlsx4j.Worksheet;
import ch.rabanti.nanoxlsx4j.styles.Style;
import java.io.IOException;
import java.io.InputStream;
import java.math.BigDecimal;
import java.text.DecimalFormat;
import java.text.SimpleDateFormat;
import java.time.Duration;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.time.temporal.ChronoField;
import java.time.temporal.ChronoUnit;
import java.time.temporal.TemporalAccessor;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import static ch.rabanti.nanoxlsx4j.Cell.CellType.DATE;
import static ch.rabanti.nanoxlsx4j.Cell.CellType.TIME;
/**
* Class representing a reader for worksheets of XLSX files
*
* @author Raphael Stoeckli
*/
public class WorksheetReader {
private static final double ZERO_THRESHOLD = 0.000001d;
private static final Calendar CALENDAR = Calendar.getInstance();
private static final DecimalFormat DECIMAL_FORMAT = new DecimalFormat("#.#########");
private final Map data;
private final SharedStringsReader sharedStrings;
private final Map styleAssignment = new HashMap<>();
private final ImportOptions importOptions;
private List dateStyles;
private List timeStyles;
private Map resolvedStyles;
private Range autoFilterRange = null;
private final List columns = new ArrayList<>();
private Float defaultColumnWidth;
private Float defaultRowHeight;
private final Map rows = new HashMap<>();
private final List mergedCells = new ArrayList<>();
private final List selectedCells = new ArrayList<>();
private final Map worksheetProtection = new HashMap<>();
private String worksheetProtectionHash;
private PaneDefinition paneSplitValue;
private boolean showGridLines = true;
private boolean showRowColHeaders = true;
private boolean showRuler = true;
private Worksheet.SheetViewType viewType = Worksheet.SheetViewType.normal;
private int currentZoomScale = 100;
private final Map zoomFactors = new HashMap<>();
/**
* Gets the data of the worksheet as Hashmap of cell address-cell object tuples
*
* @return Hashmap of cell address-cell object tuples
*/
public Map getData() {
return data;
}
/**
* Gets the assignment of resolved styles to cell addresses
*
* @return Map of cell address-style number tuples
*/
public Map getStyleAssignment() {
return styleAssignment;
}
/**
* gets the auto filter range
*
* @return Auto filter range if defined, otherwise null
*/
public Range getAutoFilterRange() {
return autoFilterRange;
}
/**
* Gets a list of defined Columns
*
* @return List of columns
*/
public List getColumns() {
return columns;
}
/**
* Gets the default column width
*
* @return Default column width if defined, otherwise null
*/
public Float getDefaultColumnWidth() {
return defaultColumnWidth;
}
/**
* Gets the default row height
*
* @return Default row height if defined, otherwise null
*/
public Float getDefaultRowHeight() {
return defaultRowHeight;
}
/**
* Gets a map of row definitions
*
* @return Map of row definitions, where the key is the row number and the value is an instance of
* {@link RowDefinition}
*/
public Map getRows() {
return rows;
}
/**
* Gets a list of merged cells
*
* @return List of Range definitions
*/
public List getMergedCells() {
return mergedCells;
}
/**
* Gets the selected cell ranges (panes are currently not considered)
*
* @return Selected cell ranges if defined, otherwise null
*/
public List getSelectedCells() {
return selectedCells;
}
/**
* Gets the applicable worksheet protection values
*
* @return Map of {@link Worksheet.SheetProtectionValue} objects
*/
public Map getWorksheetProtection() {
return worksheetProtection;
}
/**
* Gets the (legacy) password hash of a worksheet if protection values are applied with a password
*
* @return Hash value as string or null / empty if not defined
*/
public String getWorksheetProtectionHash() {
return worksheetProtectionHash;
}
/**
* Gets the definition of pane split-related information
*
* @return PaneDefinition object
*/
public PaneDefinition getPaneSplitValue() {
return paneSplitValue;
}
/**
* Gets whether grid lines are shown
*
* @return True if grid lines are visible
*/
public boolean isShowingGridLines() {
return showGridLines;
}
/**
* Gets whether column and row headers are shown
*
* @return True if column and row header are visible
*/
public boolean isShowingRowColHeaders() {
return showRowColHeaders;
}
/**
* Gets whether rulers are shown in view type: pageLayout
*
* @return True if rules are visible
*/
public boolean isShowingRuler() {
return showRuler;
}
/**
* Gets the sheet view type of the current worksheet
*
* @return Current view type
*/
public Worksheet.SheetViewType getViewType() {
return viewType;
}
/**
* Gets the zoom factor of the current view type
*
* @return Current zoom scale
*/
public int getCurrentZoomScale() {
return currentZoomScale;
}
/**
* Gets all preserved zoom factors of the worksheet
*
* @return Map of all zoom factors of the current worksheet
*/
public Map getZoomFactors() {
return zoomFactors;
}
/**
* Constructor with parameters and import options
*
* @param sharedStrings SharedStringsReader object
* @param styleReaderContainer Resolved styles, used to determine dates or times
*/
public WorksheetReader(SharedStringsReader sharedStrings, StyleReaderContainer styleReaderContainer, ImportOptions options) {
this.data = new HashMap<>();
this.sharedStrings = sharedStrings;
this.importOptions = options;
processStyles(styleReaderContainer);
}
/**
* Determine which of the resolved styles are either to define a time or a date. Stores also the styles into a map
*
* @param styleReaderContainer Resolved styles from the style reader
*/
private void processStyles(StyleReaderContainer styleReaderContainer) {
this.dateStyles = new ArrayList<>();
this.timeStyles = new ArrayList<>();
this.resolvedStyles = new HashMap<>();
for (int i = 0; i < styleReaderContainer.getStyleCount(); i++) {
String index = Integer.toString(i);
StyleReaderContainer.StyleResult result = styleReaderContainer.evaluateDateTimeStyle(i);
if (result.isDateStyle()) {
this.dateStyles.add(index);
}
if (result.isTimeStyle()) {
this.timeStyles.add(index);
}
resolvedStyles.put(index, result.getResult());
}
}
/**
* Reads the XML file form the passed stream and processes the worksheet data
*
* @param stream Stream of the XML file
* @throws IOException thrown if the document could not be read
*/
public void read(InputStream stream) throws IOException {
data.clear();
try {
XmlDocument xr = new XmlDocument();
xr.load(stream);
XmlDocument.XmlNodeList rows = xr.getDocumentElement().getElementsByTagName("row", true);
for (int i = 0; i < rows.size(); i++) {
XmlDocument.XmlNode row = rows.get(i);
String rowAttribute = row.getAttribute("r");
if (rowAttribute != null) {
String hiddenAttribute = row.getAttribute("hidden");
RowDefinition.addRowDefinition(this.rows, rowAttribute, null, hiddenAttribute);
String heightAttribute = row.getAttribute("ht");
RowDefinition.addRowDefinition(this.rows, rowAttribute, heightAttribute, null);
}
if (row.hasChildNodes()) {
for (XmlDocument.XmlNode rowChild : row.getChildNodes()) {
readCell(rowChild);
}
}
}
getSheetView(xr);
getMergedCells(xr);
getSheetFormats(xr);
getAutoFilters(xr);
getColumns(xr);
getSheetProtection(xr);
}
catch (Exception ex) {
throw new IOException("The XML entry could not be read from the input stream. Please see the inner exception:", ex);
}
finally {
if (stream != null) {
stream.close();
}
}
}
/**
* Gets the selected cells of the current worksheet
*
* @param xmlDocument XML document of the current worksheet
*/
private void getSheetView(XmlDocument xmlDocument) {
XmlDocument.XmlNodeList sheetViewsNodes = xmlDocument.getDocumentElement().getElementsByTagName("sheetViews", true);
if (sheetViewsNodes != null && sheetViewsNodes.size() > 0) {
XmlDocument.XmlNodeList sheetViewNodes = sheetViewsNodes.get(0).getChildNodes();
String attribute;
// Go through all possible views
for (XmlDocument.XmlNode sheetView : sheetViewNodes) {
attribute = sheetView.getAttribute("view");
if (attribute != null) {
viewType = Worksheet.SheetViewType.valueOf(attribute);
}
attribute = sheetView.getAttribute("zoomScale");
if (attribute != null) {
currentZoomScale = Integer.parseInt(attribute);
}
attribute = sheetView.getAttribute("zoomScaleNormal");
if (attribute != null) {
int scale = Integer.parseInt(attribute);
zoomFactors.put(Worksheet.SheetViewType.normal, scale);
}
attribute = sheetView.getAttribute("zoomScalePageLayoutView");
if (attribute != null) {
int scale = Integer.parseInt(attribute);
zoomFactors.put(Worksheet.SheetViewType.pageLayout, scale);
}
attribute = sheetView.getAttribute("zoomScaleSheetLayoutView");
if (attribute != null) {
int scale = Integer.parseInt(attribute);
zoomFactors.put(Worksheet.SheetViewType.pageBreakPreview, scale);
}
attribute = sheetView.getAttribute("showGridLines");
if (attribute != null) {
showGridLines = ReaderUtils.parseBinaryBoolean(attribute) == 1;
}
attribute = sheetView.getAttribute("showRowColHeaders");
if (attribute != null) {
showRowColHeaders = ReaderUtils.parseBinaryBoolean(attribute) == 1;
}
attribute = sheetView.getAttribute("showRuler");
if (attribute != null) {
showRuler = ReaderUtils.parseBinaryBoolean(attribute) == 1;
}
if (sheetView.getName().equalsIgnoreCase("sheetView")) {
XmlDocument.XmlNodeList selectionNodes = sheetView.getElementsByTagName("selection", true);
if (selectionNodes != null && selectionNodes.size() > 0) {
for (XmlDocument.XmlNode selectionNode : selectionNodes) {
attribute = selectionNode.getAttribute("sqref");
if (attribute != null) {
if (attribute.contains(" ")) {
// Multiple ranges
String[] ranges = attribute.split(" ");
for (String range : ranges) {
collectSelectedCells(range);
}
}
else {
collectSelectedCells(attribute);
}
}
}
}
XmlDocument.XmlNodeList paneNodes = sheetView.getElementsByTagName("pane", true);
if (paneNodes != null && paneNodes.size() > 0) {
attribute = paneNodes.get(0).getAttribute("state");
boolean useNumbers = false;
this.paneSplitValue = new PaneDefinition();
if (attribute != null) {
this.paneSplitValue.setFrozenState(attribute);
useNumbers = this.paneSplitValue.getFrozenState();
}
attribute = paneNodes.get(0).getAttribute("ySplit");
if (attribute != null) {
this.paneSplitValue.ySplitDefined = true;
if (useNumbers) {
ReaderUtils.IntParser intParser = ReaderUtils.IntParser.tryParseDecimal(attribute);
this.paneSplitValue.paneSplitRowIndex = intParser.value;
}
else {
this.paneSplitValue.paneSplitHeight = Helper.getPaneSplitHeight(Float.parseFloat(attribute));
}
}
attribute = paneNodes.get(0).getAttribute("xSplit");
if (attribute != null) {
this.paneSplitValue.xSplitDefined = true;
if (useNumbers) {
this.paneSplitValue.paneSplitColumnIndex = Integer.parseInt(attribute);
}
else {
this.paneSplitValue.paneSplitWidth = Helper.getPaneSplitWidth(Float.parseFloat(attribute));
}
}
attribute = paneNodes.get(0).getAttribute("topLeftCell");
if (attribute != null) {
this.paneSplitValue.topLeftCell = new Address(attribute);
}
attribute = paneNodes.get(0).getAttribute("activePane");
if (attribute != null) {
this.paneSplitValue.setActivePane(attribute);
}
}
}
}
}
}
/**
* Resolves the selected cells of a range or a single cell
*
* @param attribute Raw range/cell as string
*/
private void collectSelectedCells(String attribute) {
if (attribute.contains(":")) {
// One range
this.selectedCells.add(new Range(attribute));
}
else {
// One cell
this.selectedCells.add(new Range(attribute + ":" + attribute));
}
}
/**
* Gets the sheet protection values of the current worksheets
*
* @param xmlDocument XML document of the current worksheet
*/
private void getSheetProtection(XmlDocument xmlDocument) {
XmlDocument.XmlNodeList sheetProtectionNodes = xmlDocument.getDocumentElement().getElementsByTagName("sheetProtection", true);
if (sheetProtectionNodes != null && sheetProtectionNodes.size() > 0) {
XmlDocument.XmlNode sheetProtectionNode = sheetProtectionNodes.get(0);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.autoFilter);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.deleteColumns);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.deleteRows);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.formatCells);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.formatColumns);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.formatRows);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.insertColumns);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.insertHyperlinks);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.insertRows);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.objects);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.pivotTables);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.scenarios);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.selectLockedCells);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.selectUnlockedCells);
manageSheetProtection(sheetProtectionNode, Worksheet.SheetProtectionValue.sort);
String legacyPasswordHash = sheetProtectionNode.getAttribute("password");
if (legacyPasswordHash != null) {
this.worksheetProtectionHash = legacyPasswordHash;
}
}
}
/**
* Manages particular sheet protection values if defined
*
* @param node Sheet protection node
* @param sheetProtectionValue Value to check and maintain (if defined)
*/
private void manageSheetProtection(XmlDocument.XmlNode node, Worksheet.SheetProtectionValue sheetProtectionValue) {
String attributeName = sheetProtectionValue.name();
String attribute = node.getAttribute(attributeName);
if (attribute != null) {
int value = ReaderUtils.parseBinaryBoolean(attribute);
worksheetProtection.put(sheetProtectionValue, value);
}
}
/**
* Gets the merged cells of the current worksheet
*
* @param xmlDocument XML document of the current worksheet
*/
private void getMergedCells(XmlDocument xmlDocument) {
XmlDocument.XmlNodeList mergedCellsNodes = xmlDocument.getDocumentElement().getElementsByTagName("mergeCells", true);
if (mergedCellsNodes != null && mergedCellsNodes.size() > 0) {
XmlDocument.XmlNodeList mergedCellNodes = mergedCellsNodes.get(0).getChildNodes();
if (mergedCellNodes != null && mergedCellNodes.size() > 0) {
for (XmlDocument.XmlNode mergedCells : mergedCellNodes) {
String attribute = mergedCells.getAttribute("ref");
if (attribute != null) {
this.mergedCells.add(new Range(attribute));
}
}
}
}
}
/**
* Gets the sheet format information of the current worksheet
*
* @param xmlDocument XML document of the current worksheet
*/
private void getSheetFormats(XmlDocument xmlDocument) {
XmlDocument.XmlNodeList formatNodes = xmlDocument.getDocumentElement().getElementsByTagName("sheetFormatPr", true);
if (formatNodes != null && formatNodes.size() > 0) {
String attribute = formatNodes.get(0).getAttribute("defaultColWidth");
if (attribute != null) {
this.defaultColumnWidth = Float.parseFloat(attribute);
}
attribute = formatNodes.get(0).getAttribute("defaultRowHeight");
if (attribute != null) {
this.defaultRowHeight = Float.parseFloat(attribute);
}
}
}
/**
* Gets the auto filters of the current worksheet
*
* @param xmlDocument XML document of the current worksheet
*/
private void getAutoFilters(XmlDocument xmlDocument) {
XmlDocument.XmlNodeList autoFilterRanges = xmlDocument.getDocumentElement().getElementsByTagName("autoFilter", true);
if (autoFilterRanges != null && autoFilterRanges.size() > 0) {
String auoFilterRef = autoFilterRanges.get(0).getAttribute("ref");
if (auoFilterRef != null) {
this.autoFilterRange = new Range(auoFilterRef);
}
}
}
/**
* Gets the columns of the current worksheet
*
* @param xmlDocument XML document of the current worksheet
*/
private void getColumns(XmlDocument xmlDocument) {
XmlDocument.XmlNodeList columnsNodes = xmlDocument.getDocumentElement().getElementsByTagName("cols", true);
if (columnsNodes.size() == 0) {
return;
}
for (XmlDocument.XmlNode columnNode : columnsNodes.get(0).getChildNodes()) {
Integer min = null;
Integer max = null;
List indices = new ArrayList<>();
String attribute = columnNode.getAttribute("min");
if (attribute != null) {
min = Integer.parseInt(attribute);
max = min;
indices.add(min);
}
attribute = columnNode.getAttribute("max");
if (attribute != null) {
max = Integer.parseInt(attribute);
}
if (min != null && !max.equals(min)) {
for (int i = min; i <= max; i++) {
indices.add(i);
}
}
attribute = columnNode.getAttribute("width");
float width = Worksheet.DEFAULT_COLUMN_WIDTH;
if (attribute != null) {
width = Float.parseFloat(attribute);
}
attribute = columnNode.getAttribute("hidden");
boolean hidden = false;
if (attribute != null) {
int value = ReaderUtils.parseBinaryBoolean(attribute);
if (value == 1) {
hidden = true;
}
}
attribute = columnNode.getAttribute("style");
Style defaultStyle = null;
if (attribute != null) {
if (resolvedStyles.containsKey(attribute)) {
defaultStyle = resolvedStyles.get(attribute);
}
}
for (int index : indices) {
Column column = new Column(index - 1); // transform to zero-based
column.setWidth(width);
column.setHidden(hidden);
if (defaultStyle != null) {
column.setDefaultColumnStyle(defaultStyle);
}
this.columns.add(column);
}
}
}
/**
* Reads one cell in a worksheet
*
* @param rowChild Current child row as XmlNode
*/
private void readCell(XmlDocument.XmlNode rowChild) {
String type = "s";
String styleNumber = "";
String address = "A1";
String value = "";
if (rowChild.getName().equalsIgnoreCase("c")) {
address = rowChild.getAttribute("r"); // Mandatory
type = rowChild.getAttribute("t"); // can be null if not existing
styleNumber = rowChild.getAttribute("s"); // can be null
if (rowChild.hasChildNodes()) {
for (XmlDocument.XmlNode valueNode : rowChild.getChildNodes()) {
if (valueNode.getName().equalsIgnoreCase("v")) {
value = valueNode.getInnerText();
}
if (valueNode.getName().equalsIgnoreCase("f")) {
value = valueNode.getInnerText();
}
}
}
}
String key = address.toUpperCase();
styleAssignment.put(key, styleNumber);
data.put(key, resolveCellData(value, type, styleNumber, address));
}
private Cell resolveCellData(String raw, String type, String styleNumber, String address) {
Cell.CellType importedType = Cell.CellType.DEFAULT;
Object rawValue;
if (checkType(type, "b")) {
rawValue = tryParseBool(raw);
if (rawValue != null) {
importedType = Cell.CellType.BOOL;
}
else {
rawValue = getNumericValue(raw);
if (rawValue != null) {
importedType = Cell.CellType.NUMBER;
}
}
}
else if (checkType(type, "s")) {
importedType = Cell.CellType.STRING;
rawValue = resolveSharedString(raw);
}
else if (checkType(type, "str")) {
importedType = Cell.CellType.FORMULA;
rawValue = raw;
}
else if (dateStyles.contains(styleNumber) && (checkType(type, null) || checkType(type, "") || checkType(type, "n"))) {
Result