org.ttzero.excel.reader.ExcelReader Maven / Gradle / Ivy
/*
* Copyright (c) 2019, [email protected] All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.ttzero.excel.reader;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.Element;
import org.dom4j.Namespace;
import org.dom4j.QName;
import org.dom4j.io.SAXReader;
import org.ttzero.excel.annotation.NS;
import org.ttzero.excel.annotation.TopNS;
import org.ttzero.excel.entity.Relationship;
import org.ttzero.excel.entity.style.Styles;
import org.ttzero.excel.manager.Const;
import org.ttzero.excel.manager.ExcelType;
import org.ttzero.excel.manager.RelManager;
import org.ttzero.excel.manager.docProps.App;
import org.ttzero.excel.manager.docProps.Core;
import org.ttzero.excel.util.FileUtil;
import org.ttzero.excel.util.StringUtil;
import org.ttzero.excel.util.ZipUtil;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.Field;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.attribute.PosixFilePermissions;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.Date;
import java.util.Iterator;
import java.util.List;
import java.util.Spliterator;
import java.util.Spliterators;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
/**
* Excel Reader tools
*
* A streaming operation chain, using cursor control, the cursor
* will only move forward, so you cannot repeatedly operate the
* same Sheet stream. The internal Row object of the same Sheet
* page is memory shared, so don't directly convert Stream<Row>
* to a collection class. You should first consider using
* try-with-resource to use Reader or manually close the ExcelReader.
*
* try (ExcelReader reader = ExcelReader.read(path)) {
* reader.sheets().flatMap(Sheet::rows).forEach(System.out::println);
* } catch (IOException e) {}
* Create by guanquan.wang on 2018-09-22
*/
public class ExcelReader implements AutoCloseable {
private Logger logger = LogManager.getLogger(getClass());
protected ExcelReader() { }
protected Path self;
protected Sheet[] sheets;
private Path temp;
private ExcelType type;
private AppInfo appInfo;
/**
* The Shared String Table
*/
private SharedStrings sst;
/**
* The {@link Styles}
*/
private Styles styles;
/**
* Constructor Excel Reader
*
* @param path the excel path
* @return the {@link ExcelReader}
* @throws IOException if path not exists or I/O error occur
*/
public static ExcelReader read(Path path) throws IOException {
return read(path, 0, 0);
}
/**
* Constructor Excel Reader
*
* @param stream the {@link InputStream} of excel
* @return the {@link ExcelReader}
* @throws IOException if I/O error occur
*/
public static ExcelReader read(InputStream stream) throws IOException {
return read(stream, 0, 0);
}
/**
* Constructor Excel Reader
*
* @param path the excel path
* @param bufferSize the {@link SharedStrings} buffer size. default is 512
* This parameter affects the number of read times.
* @return the {@link ExcelReader}
* @throws IOException if path not exists or I/O error occur
*/
public static ExcelReader read(Path path, int bufferSize) throws IOException {
return read(path, bufferSize, 0);
}
/**
* Constructor Excel Reader
*
* @param stream the {@link InputStream} of excel
* @param bufferSize the {@link SharedStrings} buffer size. default is 512
* This parameter affects the number of read times.
* @return the {@link ExcelReader}
* @throws IOException if I/O error occur
*/
public static ExcelReader read(InputStream stream, int bufferSize) throws IOException {
return read(stream, bufferSize, 0);
}
/**
* Constructor Excel Reader
*
* @param path the excel path
* @param bufferSize the {@link SharedStrings} buffer size. default is 512
* This parameter affects the number of read times.
* @param cacheSize the {@link Cache} size, default is 512
* @return the {@link ExcelReader}
* @throws IOException if path not exists or I/O error occur
*/
public static ExcelReader read(Path path, int bufferSize, int cacheSize) throws IOException {
return read(path, bufferSize, cacheSize, false);
}
/**
* Constructor Excel Reader
*
* @param stream the {@link InputStream} of excel
* @param bufferSize the {@link SharedStrings} buffer size. default is 512
* This parameter affects the number of read times.
* @param cacheSize the {@link Cache} size, default is 512
* @return the {@link ExcelReader}
* @throws IOException if I/O error occur
*/
public static ExcelReader read(InputStream stream, int bufferSize, int cacheSize) throws IOException {
Path temp;
if (FileUtil.isWindows()) {
temp = Files.createTempFile(Const.EEC_PREFIX, null);
} else {
temp = Files.createTempFile(Const.EEC_PREFIX, null
, PosixFilePermissions.asFileAttribute(PosixFilePermissions.fromString("rwxr-x---")));
}
if (temp == null) {
throw new IOException("Create temp directory error. Please check your permission");
}
FileUtil.cp(stream, temp);
return read(temp, bufferSize, cacheSize, true);
}
/**
* Type of excel
*
* @return enum type ExcelType
*/
public ExcelType getType() {
return type;
}
/**
* to streams
*
* @return {@link Stream} of {@link Sheet}
*/
public Stream sheets() {
Iterator iter = new Iterator() {
int n = 0;
@Override
public boolean hasNext() {
return n < sheets.length;
}
@Override
public Sheet next() {
try {
// test and load sheet data
return sheets[n++].load();
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
};
return StreamSupport.stream(Spliterators.spliterator(iter, sheets.length
, Spliterator.ORDERED | Spliterator.NONNULL), false);
}
/**
* get by index
*
* @param index sheet index of workbook
* @return sheet
*/
public Sheet sheet(int index) {
try {
return sheets[index].load(); // lazy loading worksheet data
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
/**
* get by name
*
* @param sheetName name
* @return null if not found
*/
public Sheet sheet(String sheetName) {
try {
for (Sheet t : sheets) {
if (sheetName.equals(t.getName())) {
return t.load(); // lazy loading worksheet data
}
}
} catch (IOException e) {
throw new UncheckedIOException(e);
}
return null;
}
/**
* get all sheets
*
* @return Sheet Array
*/
public Sheet[] all() {
return sheets;
}
/**
* size of sheets
*
* @return int
*/
public int getSize() {
return sheets != null ? sheets.length : 0;
}
/**
* close stream and delete temp files
*
* @throws IOException when fail close readers
*/
public void close() throws IOException {
// Close all opened sheet
for (Sheet st : sheets) {
st.close();
}
// Close Shared String Table
if (sst != null)
sst.close();
// Delete temp files
FileUtil.rm_rf(self.toFile(), true);
if (temp != null) {
FileUtil.rm(temp);
}
}
/**
* General information like title,subject and creator
*
* @return the information
*/
public AppInfo getAppInfo() {
if (appInfo == null) {
appInfo = getGeneralInfo();
}
return appInfo;
}
// --- PRIVATE FUNCTIONS
private ExcelReader(Path path, int bufferSize, int cacheSize) throws IOException {
// Store template stream as zip file
Path temp = FileUtil.mktmp(Const.EEC_PREFIX);
ZipUtil.unzip(Files.newInputStream(path), temp);
// load workbook.xml
SAXReader reader = new SAXReader();
Document document;
try {
document = reader.read(Files.newInputStream(temp.resolve("xl/_rels/workbook.xml.rels")));
} catch (DocumentException | IOException e) {
FileUtil.rm_rf(temp.toFile(), true);
throw new ExcelReadException(e);
}
@SuppressWarnings("unchecked")
List list = document.getRootElement().elements();
Relationship[] rels = new Relationship[list.size()];
int i = 0;
for (Element e : list) {
rels[i++] = new Relationship(e.attributeValue("Id"), e.attributeValue("Target"), e.attributeValue("Type"));
}
RelManager relManager = RelManager.of(rels);
try {
document = reader.read(Files.newInputStream(temp.resolve("xl/workbook.xml")));
} catch (DocumentException | IOException e) {
// read style file fail.
FileUtil.rm_rf(temp.toFile(), true);
throw new ExcelReadException(e);
}
Element root = document.getRootElement();
Namespace ns = root.getNamespaceForPrefix("r");
// Load SharedString
sst = new SharedStrings(temp.resolve("xl/sharedStrings.xml"), bufferSize, cacheSize).load();
// Load Styles
styles = Styles.load(temp.resolve("xl/styles.xml"));
List sheets = new ArrayList<>();
@SuppressWarnings("unchecked")
Iterator sheetIter = root.element("sheets").elementIterator();
for (; sheetIter.hasNext(); ) {
Element e = sheetIter.next();
XMLSheet sheet = new XMLSheet();
sheet.setName(e.attributeValue("name"));
sheet.setIndex(Integer.parseInt(e.attributeValue("sheetId")));
String state = e.attributeValue("state");
sheet.setHidden("hidden".equals(state));
Relationship r = relManager.getById(e.attributeValue(QName.get("id", ns)));
if (r == null) {
FileUtil.rm_rf(temp.toFile(), true);
sheet.close();
throw new ExcelReadException("File has be destroyed");
}
sheet.setPath(temp.resolve("xl").resolve(r.getTarget()));
// put shared string
sheet.setSst(sst);
// Setting styles
sheet.setStyles(styles);
sheets.add(sheet);
}
// sort by sheet index
sheets.sort(Comparator.comparingInt(Sheet::getIndex));
Sheet[] sheets1 = new Sheet[sheets.size()];
sheets.toArray(sheets1);
this.sheets = sheets1;
self = temp;
}
/**
* Constructor Excel Reader
*
* @param path the excel path
* @param bufferSize the {@link SharedStrings} buffer size. default is 512
* This parameter affects the number of read times.
* @param cacheSize the {@link Cache} size, default is 512
* @param rmSource remove the source files
* @return the {@link ExcelReader}
* @throws IOException if path not exists or I/O error occur
*/
private static ExcelReader read(Path path, int bufferSize, int cacheSize, boolean rmSource) throws IOException {
// Check document type
ExcelType type = getType(path);
ExcelReader er;
switch (type) {
case XLSX:
er = new ExcelReader(path, bufferSize, cacheSize);
break;
case XLS:
try {
Class> clazz = Class.forName("org.ttzero.excel.reader.BIFF8Reader");
Constructor> constructor = clazz.getDeclaredConstructor(Path.class, int.class, int.class);
er = (ExcelReader) constructor.newInstance(path, bufferSize, cacheSize);
} catch (Exception e) {
throw new ExcelReadException("Only support read Office Open XML file.", e);
}
break;
default:
throw new ExcelReadException("Unknown file type.");
}
er.type = type;
// storage source path
if (rmSource) {
er.temp = path;
}
return er;
}
/**
* Check the documents type
*
* @param path documents path
* @return enum of ExcelType
*/
private static ExcelType getType(Path path) {
ExcelType type;
try (InputStream is = Files.newInputStream(path)) {
byte[] bytes = new byte[8];
int len = is.read(bytes);
type = typeOfStream(bytes, len);
} catch (IOException e) {
type = ExcelType.UNKNOWN;
}
return type;
}
// --- check
private static ExcelType typeOfStream(byte[] bytes, int size) {
ExcelType excelType = ExcelType.UNKNOWN;
int length = Math.min(bytes.length, size);
if (length < 4)
return excelType;
int type;
type = bytes[0] & 0xFF;
type += (bytes[1] & 0xFF) << 8;
type += (bytes[2] & 0xFF) << 16;
type += (bytes[3] & 0xFF) << 24;
int zip = 0x04034B50;
int b1 = 0xE011CFD0;
int b2 = 0xE11AB1A1;
if (type == zip) {
excelType = ExcelType.XLSX;
} else if (type == b1 && length >= 8) {
type = bytes[4] & 0xFF;
type += (bytes[5] & 0xFF) << 8;
type += (bytes[6] & 0xFF) << 16;
type += (bytes[7] & 0xFF) << 24;
if (type == b2) excelType = ExcelType.XLS;
}
return excelType;
}
protected AppInfo getGeneralInfo() {
// load workbook.xml
SAXReader reader = new SAXReader();
Document document;
try {
document = reader.read(Files.newInputStream(self.resolve("docProps/app.xml")));
} catch (DocumentException | IOException e) {
throw new ExcelReadException(e);
}
Element root = document.getRootElement();
App app = new App();
app.setCompany(root.elementText("Company"));
app.setApplication(root.elementText("Application"));
app.setAppVersion(root.elementText("AppVersion"));
try {
document = reader.read(Files.newInputStream(self.resolve("docProps/core.xml")));
} catch (DocumentException | IOException e) {
throw new ExcelReadException(e);
}
root = document.getRootElement();
Core core = new Core();
Class clazz = Core.class;
TopNS topNS = clazz.getAnnotation(TopNS.class);
String[] prefixs = topNS.prefix(), urls = topNS.uri();
Field[] fields = clazz.getDeclaredFields();
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd'T'hh:mm:ss'Z'");
for (Field f : fields) {
NS ns = f.getAnnotation(NS.class);
if (ns == null) continue;
f.setAccessible(true);
int nsIndex = StringUtil.indexOf(prefixs, ns.value());
if (nsIndex > -1) {
Namespace namespace = new Namespace(ns.value(), urls[nsIndex]);
Class> type = f.getType();
String v = root.elementText(new QName(f.getName(), namespace));
if (type == String.class) {
try {
f.set(core, v);
} catch (IllegalAccessException e) {
logger.warn("Set field (" + f + ") error.");
}
} else if (type == Date.class) {
try {
f.set(core, format.parse(v));
} catch (ParseException | IllegalAccessException e) {
logger.warn("Set field (" + f + ") error.");
}
}
}
}
return new AppInfo(app, core);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy