All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.metamodel.excel.ExcelUtils Maven / Gradle / Ivy

There is a newer version: 5.3.3
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.metamodel.excel;

import java.io.File;
import java.io.InputStream;
import java.io.OutputStream;
import java.text.NumberFormat;
import java.util.Date;
import java.util.Iterator;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.apache.poi.hssf.usermodel.HSSFDateUtil;
import org.apache.poi.hssf.usermodel.HSSFFont;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
import org.apache.poi.hssf.util.HSSFColor;
import org.apache.poi.ss.formula.FormulaParseException;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.CellStyle;
import org.apache.poi.ss.usermodel.Color;
import org.apache.poi.ss.usermodel.Font;
import org.apache.poi.ss.usermodel.FontUnderline;
import org.apache.poi.ss.usermodel.FormulaError;
import org.apache.poi.ss.usermodel.FormulaEvaluator;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.xssf.streaming.SXSSFWorkbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
import org.apache.poi.xssf.usermodel.XSSFColor;
import org.apache.poi.xssf.usermodel.XSSFFont;
import org.apache.metamodel.MetaModelException;
import org.apache.metamodel.MetaModelHelper;
import org.apache.metamodel.data.DataSet;
import org.apache.metamodel.data.DataSetHeader;
import org.apache.metamodel.data.DefaultRow;
import org.apache.metamodel.data.EmptyDataSet;
import org.apache.metamodel.data.Style;
import org.apache.metamodel.data.Style.SizeUnit;
import org.apache.metamodel.data.StyleBuilder;
import org.apache.metamodel.query.SelectItem;
import org.apache.metamodel.schema.Table;
import org.apache.metamodel.util.Action;
import org.apache.metamodel.util.DateUtils;
import org.apache.metamodel.util.FileHelper;
import org.apache.metamodel.util.FileResource;
import org.apache.metamodel.util.FormatHelper;
import org.apache.metamodel.util.Func;
import org.apache.metamodel.util.InMemoryResource;
import org.apache.metamodel.util.Resource;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.XMLReader;

/**
 * Convenience/reusable methods for Excel workbook handling.
 */
final class ExcelUtils {

    private static final Logger logger = LoggerFactory.getLogger(ExcelUtils.class);

    private static final NumberFormat _numberFormat = FormatHelper.getUiNumberFormat();

    private ExcelUtils() {
        // prevent instantiation
    }

    public static XMLReader createXmlReader() {
        try {
            SAXParserFactory saxFactory = SAXParserFactory.newInstance();
            SAXParser saxParser = saxFactory.newSAXParser();
            XMLReader sheetParser = saxParser.getXMLReader();
            return sheetParser;
        } catch (Exception e) {
            throw new MetaModelException(e);
        }
    }

    public static Workbook readWorkbook(Resource resource) {
        if (!resource.isExists()) {
            // resource does not exist- create a blank workbook
            if (isXlsxFile(resource)) {
                return new SXSSFWorkbook(1000);
            } else {
                return new HSSFWorkbook();
            }
        }

        if (resource instanceof FileResource) {
            final File file = ((FileResource) resource).getFile();
            try {
                return WorkbookFactory.create(file);
            } catch (Exception e) {
                logger.error("Could not open workbook", e);
                throw new IllegalStateException("Could not open workbook", e);
            }
        }

        return resource.read(new Func() {
            @Override
            public Workbook eval(InputStream inputStream) {
                try {
                    return WorkbookFactory.create(inputStream);
                } catch (Exception e) {
                    logger.error("Could not open workbook", e);
                    throw new IllegalStateException("Could not open workbook", e);
                }
            }
        });
    }

    public static boolean isXlsxFile(Resource resource) {
        if (resource == null) {
            return false;
        }
        return resource.getName().toLowerCase().endsWith(".xlsx");
    }

    /**
     * Initializes a workbook instance based on a {@link ExcelDataContext}.
     * 
     * @return a workbook instance based on the ExcelDataContext.
     */
    public static Workbook readWorkbook(ExcelDataContext dataContext) {
        Resource resource = dataContext.getResource();
        return readWorkbook(resource);
    }

    /**
     * Writes the {@link Workbook} to a {@link Resource}. The {@link Workbook}
     * will be closed as a result of this operation!
     * 
     * @param dataContext
     * @param wb
     */
    public static void writeAndCloseWorkbook(ExcelDataContext dataContext, final Workbook wb) {
        // first write to a temp file to avoid that workbook source is the same
        // as the target (will cause read+write cyclic overflow)

        final Resource realResource = dataContext.getResource();
        final Resource tempResource = new InMemoryResource(realResource.getQualifiedPath());

        tempResource.write(new Action() {
            @Override
            public void run(OutputStream outputStream) throws Exception {
                wb.write(outputStream);
            }
        });

        if (wb instanceof HSSFWorkbook && realResource instanceof FileResource && realResource.isExists()) {
            // TODO POI has a problem with closing a file-reference/channel
            // after wb.write() is invoked. See POI issue to be fixed:
            // https://bz.apache.org/bugzilla/show_bug.cgi?id=58480
            System.gc();
            System.runFinalization();
            try {
                Thread.sleep(800);
            } catch (InterruptedException e) {
            }
        }

        FileHelper.safeClose(wb);

        FileHelper.copy(tempResource, realResource);
    }

    public static String getCellValue(Workbook wb, Cell cell) {
        if (cell == null) {
            return null;
        }

        final String cellCoordinate = "(" + cell.getRowIndex() + "," + cell.getColumnIndex() + ")";

        final String result;

        switch (cell.getCellType()) {
        case Cell.CELL_TYPE_BLANK:
            result = null;
            break;
        case Cell.CELL_TYPE_BOOLEAN:
            result = Boolean.toString(cell.getBooleanCellValue());
            break;
        case Cell.CELL_TYPE_ERROR:
            String errorResult;
            try {
                byte errorCode = cell.getErrorCellValue();
                FormulaError formulaError = FormulaError.forInt(errorCode);
                errorResult = formulaError.getString();
            } catch (RuntimeException e) {
                logger.debug("Getting error code for {} failed!: {}", cellCoordinate, e.getMessage());
                if (cell instanceof XSSFCell) {
                    // hack to get error string, which is available
                    String value = ((XSSFCell) cell).getErrorCellString();
                    errorResult = value;
                } else {
                    logger.error("Couldn't handle unexpected error scenario in cell: " + cellCoordinate, e);
                    throw e;
                }
            }
            result = errorResult;
            break;
        case Cell.CELL_TYPE_FORMULA:
            // result = cell.getCellFormula();
            result = getFormulaCellValue(wb, cell);
            break;
        case Cell.CELL_TYPE_NUMERIC:
            if (HSSFDateUtil.isCellDateFormatted(cell)) {
                Date date = cell.getDateCellValue();
                if (date == null) {
                    result = null;
                } else {
                    result = DateUtils.createDateFormat().format(date);
                }
            } else {
                // TODO: Consider not formatting it, but simple using
                // Double.toString(...)
                result = _numberFormat.format(cell.getNumericCellValue());
            }
            break;
        case Cell.CELL_TYPE_STRING:
            result = cell.getRichStringCellValue().getString();
            break;
        default:
            throw new IllegalStateException("Unknown cell type: " + cell.getCellType());
        }

        logger.debug("cell {} resolved to value: {}", cellCoordinate, result);

        return result;
    }

    private static String getFormulaCellValue(Workbook wb, Cell cell) {
        // first try with a cached/precalculated value
        try {
            double numericCellValue = cell.getNumericCellValue();
            // TODO: Consider not formatting it, but simple using
            // Double.toString(...)
            return _numberFormat.format(numericCellValue);
        } catch (Exception e) {
            if (logger.isInfoEnabled()) {
                logger.info("Failed to fetch cached/precalculated formula value of cell: " + cell, e);
            }
        }

        // evaluate cell first, if possible
        try {
            if (logger.isInfoEnabled()) {
                logger.info("cell({},{}) is a formula. Attempting to evaluate: {}", new Object[] { cell.getRowIndex(),
                        cell.getColumnIndex(), cell.getCellFormula() });
            }

            final FormulaEvaluator evaluator = wb.getCreationHelper().createFormulaEvaluator();

            // calculates the formula and puts it's value back into the cell
            final Cell evaluatedCell = evaluator.evaluateInCell(cell);

            return getCellValue(wb, evaluatedCell);
        } catch (RuntimeException e) {
            logger.warn("Exception occurred while evaluating formula at position ({},{}): {}",
                    new Object[] { cell.getRowIndex(), cell.getColumnIndex(), e.getMessage() });
            // Some exceptions we simply log - result will be then be the
            // actual formula
            if (e instanceof FormulaParseException) {
                logger.error("Parse exception occurred while evaluating cell formula: " + cell, e);
            } else if (e instanceof IllegalArgumentException) {
                logger.error("Illegal formula argument occurred while evaluating cell formula: " + cell, e);
            } else {
                logger.error("Unexpected exception occurred while evaluating cell formula: " + cell, e);
            }
        }

        // last resort: return the string formula
        return cell.getCellFormula();
    }

    public static Style getCellStyle(Workbook workbook, Cell cell) {
        if (cell == null) {
            return Style.NO_STYLE;
        }
        final CellStyle cellStyle = cell.getCellStyle();

        final short fontIndex = cellStyle.getFontIndex();
        final Font font = workbook.getFontAt(fontIndex);
        final StyleBuilder styleBuilder = new StyleBuilder();

        // Font bold, italic, underline
        if (font.getBoldweight() >= Font.BOLDWEIGHT_BOLD) {
            styleBuilder.bold();
        }
        if (font.getItalic()) {
            styleBuilder.italic();
        }
        if (font.getUnderline() != FontUnderline.NONE.getByteValue()) {
            styleBuilder.underline();
        }

        // Font size
        final Font stdFont = workbook.getFontAt((short) 0);
        final short fontSize = font.getFontHeightInPoints();
        if (stdFont.getFontHeightInPoints() != fontSize) {
            styleBuilder.fontSize(fontSize, SizeUnit.PT);
        }

        // Font color
        final short colorIndex = font.getColor();
        if (font instanceof HSSFFont) {
            if (colorIndex != HSSFFont.COLOR_NORMAL) {
                final HSSFWorkbook wb = (HSSFWorkbook) workbook;
                HSSFColor color = wb.getCustomPalette().getColor(colorIndex);
                if (color != null) {
                    short[] triplet = color.getTriplet();
                    styleBuilder.foreground(triplet);
                }
            }
        } else if (font instanceof XSSFFont) {
            XSSFFont xssfFont = (XSSFFont) font;

            XSSFColor color = xssfFont.getXSSFColor();
            if (color != null) {
                String argbHex = color.getARGBHex();
                if (argbHex != null) {
                    styleBuilder.foreground(argbHex.substring(2));
                }
            }
        } else {
            throw new IllegalStateException("Unexpected font type: " + (font == null ? "null" : font.getClass()) + ")");
        }

        // Background color
        if (cellStyle.getFillPattern() == 1) {
            Color color = cellStyle.getFillForegroundColorColor();
            if (color instanceof HSSFColor) {
                short[] triplet = ((HSSFColor) color).getTriplet();
                if (triplet != null) {
                    styleBuilder.background(triplet);
                }
            } else if (color instanceof XSSFColor) {
                String argb = ((XSSFColor) color).getARGBHex();
                if (argb != null) {
                    styleBuilder.background(argb.substring(2));
                }
            } else {
                throw new IllegalStateException("Unexpected color type: " + (color == null ? "null" : color.getClass())
                        + ")");
            }
        }

        // alignment
        switch (cellStyle.getAlignment()) {
        case CellStyle.ALIGN_LEFT:
            styleBuilder.leftAligned();
            break;
        case CellStyle.ALIGN_RIGHT:
            styleBuilder.rightAligned();
            break;
        case CellStyle.ALIGN_CENTER:
            styleBuilder.centerAligned();
            break;
        case CellStyle.ALIGN_JUSTIFY:
            styleBuilder.justifyAligned();
            break;
        }

        return styleBuilder.create();
    }

    public static Iterator getRowIterator(Sheet sheet, ExcelConfiguration configuration, boolean jumpToDataRows) {
        final Iterator iterator;
        if (configuration.isSkipEmptyLines()) {
            iterator = sheet.rowIterator();
        } else {
            iterator = new ZeroBasedRowIterator(sheet);
        }

        if (jumpToDataRows) {
            final int columnNameLineNumber = configuration.getColumnNameLineNumber();
            if (columnNameLineNumber != ExcelConfiguration.NO_COLUMN_NAME_LINE) {
                // iterate past the column headers
                if (iterator.hasNext()) {
                    iterator.next();
                }
                for (int i = 1; i < columnNameLineNumber; i++) {
                    if (iterator.hasNext()) {
                        iterator.next();
                    } else {
                        // no more rows!
                        break;
                    }
                }
            }
        }

        return iterator;
    }

    /**
     * Creates a MetaModel row based on an Excel row
     * 
     * @param workbook
     * @param row
     * @param selectItems
     *            select items of the columns in the table
     * @return
     */
    public static DefaultRow createRow(Workbook workbook, Row row, DataSetHeader header) {
        final int size = header.size();
        final String[] values = new String[size];
        final Style[] styles = new Style[size];
        if (row != null) {
            for (int i = 0; i < size; i++) {
                final int columnNumber = header.getSelectItem(i).getColumn().getColumnNumber();
                final Cell cell = row.getCell(columnNumber);
                final String value = ExcelUtils.getCellValue(workbook, cell);
                final Style style = ExcelUtils.getCellStyle(workbook, cell);
                values[i] = value;
                styles[i] = style;
            }
        }

        return new DefaultRow(header, values, styles);
    }

    public static DataSet getDataSet(Workbook workbook, Sheet sheet, Table table, ExcelConfiguration configuration) {
        final SelectItem[] selectItems = MetaModelHelper.createSelectItems(table.getColumns());
        final Iterator rowIterator = getRowIterator(sheet, configuration, true);
        if (!rowIterator.hasNext()) {
            // no more rows!
            FileHelper.safeClose(workbook);
            return new EmptyDataSet(selectItems);
        }

        final DataSet dataSet = new XlsDataSet(selectItems, workbook, rowIterator);
        return dataSet;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy