All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.acs.commons.data.Spreadsheet Maven / Gradle / Ivy

The newest version!
/*
 * ACS AEM Commons
 *
 * Copyright (C) 2013 - 2023 Adobe
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.adobe.acs.commons.data;

import java.io.IOException;
import java.io.InputStream;
import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Optional;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.sling.api.request.RequestParameter;
import org.osgi.annotation.versioning.ProviderType;

/**
 * Simple abstraction of reading a single spreadsheet of values. Expects a
 * header row of named columns (case-sensitive) If provided, will also filter
 * data rows missing required columns to prevent processing errors.
 */
@ProviderType
public class Spreadsheet {

    public static final String DEFAULT_DELIMITER = ",";
    public static final String ROW_NUMBER = "~~ROWNUM~~";
    private String fileName = "unknown";
    private int rowCount;
    private transient List> dataRows;
    private final List requiredColumns;
    private Map> headerTypes;
    private List headerRow;
    private final Map delimiters;
    private boolean enableHeaderNameConversion = true;
    private InputStream inputStream;
    private List caseInsensitiveHeaders = new ArrayList<>();

    /**
     * Simple constructor used for unit testing purposes
     *
     * @param convertHeaderNames If true, header names are converted
     * @param headerArray        List of strings for header columns
     */
    public Spreadsheet(boolean convertHeaderNames, String... headerArray) {
        this.enableHeaderNameConversion = convertHeaderNames;
        headerTypes = Arrays.stream(headerArray)
                .collect(Collectors.toMap(this::convertHeaderName, this::detectTypeFromName));
        headerRow = Arrays.asList(headerArray);
        requiredColumns = Collections.emptyList();
        dataRows = new ArrayList<>();
        delimiters = new HashMap<>();
    }

    /**
     * Simple constructor used for unit testing purposes
     *
     * @param convertHeaderNames     If true, header names are converted
     * @param caseInsensitiveHeaders Header names that will be ignored during conversion
     * @param headerArray            List of strings for header columns
     */
    public Spreadsheet(boolean convertHeaderNames, List caseInsensitiveHeaders, String... headerArray) {
        this(convertHeaderNames, headerArray);
        Optional.ofNullable(caseInsensitiveHeaders).ifPresent(this.caseInsensitiveHeaders::addAll);
    }

    public Spreadsheet(boolean convertHeaderNames, InputStream file, String... required) {
        dataRows = new ArrayList<>();
        delimiters = new HashMap<>();
        this.enableHeaderNameConversion = convertHeaderNames;
        if (required == null || required.length == 0) {
            requiredColumns = Collections.emptyList();
        } else {
            requiredColumns = Arrays.stream(required)
                    .map(this::convertHeaderName)
                    .collect(Collectors.toList());
        }
        this.headerRow = new ArrayList<>();
        this.inputStream = file;
    }

    public Spreadsheet(boolean convertHeaderNames, RequestParameter file, String... required) throws IOException {
        this(convertHeaderNames, file.getInputStream(), required);
        fileName = file.getFileName();
    }

    public Spreadsheet(InputStream file, String... required) {
        this(true, file, required);
    }

    public Spreadsheet(RequestParameter file, String... required) throws IOException {
        this(true, file, required);
    }

    public Spreadsheet(RequestParameter file, List caseInsensitiveHeaders, String... required) throws IOException {
        this(true, file, required);
        Optional.ofNullable(caseInsensitiveHeaders).ifPresent(this.caseInsensitiveHeaders::addAll);
    }
    
    /**
     * Parse out the input file synchronously for easier unit test validation.
     * This overload will implicitly use the default JVM locale for numeric and date/time conversions.
     * 
     * @return List of files that will be imported, including any renditions
     * @throws IOException if the file couldn't be read
     */
    public Spreadsheet buildSpreadsheet() throws IOException {
        return buildSpreadsheet(Locale.getDefault());
    }
    
    /**
     * Parse out the input file synchronously for easier unit test validation
     *
     * @param locale The locale to be used for numeric and date/time conversions.
     * @return List of files that will be imported, including any renditions
     * @throws IOException if the file couldn't be read
     */
    public Spreadsheet buildSpreadsheet(Locale locale) throws IOException {

        XSSFWorkbook workbook = new XSSFWorkbook(this.inputStream);

        final XSSFSheet sheet = workbook.getSheetAt(0);
        rowCount = sheet.getLastRowNum();
        final Iterator rows = sheet.rowIterator();

        Row firstRow = rows.next();
        headerRow = readRow(firstRow, locale).stream()
                .map(v -> v != null ? convertHeaderName(v.toString()) : null)
                .collect(Collectors.toList());
        headerTypes = readRow(firstRow, locale).stream()
                .map(Variant::toString)
                .collect(Collectors.toMap(
                        this::convertHeaderName,
                        this::detectTypeFromName,
                        this::upgradeToArray
                ));

        Iterable remainingRows = () -> rows;
        dataRows = StreamSupport.stream(remainingRows.spliterator(), false)
                .map(row -> buildRow(row, locale))
                .filter(Optional::isPresent)
                .map(Optional::get)
                .collect(Collectors.toList());

        return this;
    }

    private List readRow(Row row, Locale locale) {
        Iterator iterator = row.cellIterator();
        List rowOut = new ArrayList<>();
        while (iterator.hasNext()) {
            Cell c = iterator.next();
            while (c.getColumnIndex() > rowOut.size()) {
                rowOut.add(null);
            }
            Variant val = new Variant(c, locale);
            rowOut.add(val.isEmpty() ? null : val);
        }
        return rowOut;
    }

    @SuppressWarnings("squid:S3776")
    private Optional> buildRow(Row row, Locale locale) {
        Map out = new LinkedHashMap<>();
        out.put(ROW_NUMBER, new CompositeVariant(row.getRowNum()));
        List data = readRow(row, locale);
        boolean empty = true;
        for (int i = 0; i < data.size() && i < getHeaderRow().size(); i++) {
            String colName = getHeaderRow().get(i);
            if (colName != null && data.get(i) != null && !data.get(i).isEmpty()) {
                empty = false;
                if (!out.containsKey(colName)) {
                    Class type = headerTypes.get(colName).orElse(data.get(i).getBaseType());
                    if (type == Object.class) {
                        type = data.get(i).getBaseType();
                    } else if (type == Object[].class) {
                        type = getArrayType(Optional.of(data.get(i).getBaseType())).get();
                    }
                    out.put(colName, new CompositeVariant(type));
                }
                Optional type = headerTypes.get(colName);
                if (type.isPresent() && type.get().isArray()) {
                    String[] values = data.get(i).toString().split(Pattern.quote(delimiters.getOrDefault(colName, DEFAULT_DELIMITER)));
                    for (String value : values) {
                        if (value != null && !value.isEmpty()) {
                            out.get(colName).addValue(value.trim());
                        }
                    }
                } else {
                    out.get(colName).addValue(data.get(i));
                }
            }
        }
        if (empty || (!requiredColumns.isEmpty() && !out.keySet().containsAll(requiredColumns))) {
            return Optional.empty();
        } else {
            return Optional.of(out);
        }
    }

    /**
     * @return the fileName
     */
    public String getFileName() {
        return fileName;
    }

    /**
     * @return the rowCount
     */
    public int getRowCount() {
        return rowCount;
    }

    /**
     * @return the headerRow
     */
    public List getHeaderRow() {
        return Collections.unmodifiableList(headerRow);
    }

    /**
     * @return the dataRows
     */
    public List> getDataRowsAsCompositeVariants() {
        return Collections.unmodifiableList(dataRows);
    }

    /**
     * Sort the data rows by the provided column name
     */
    public void sortRows(String columnName) {
        Collections.sort(dataRows, (a, b) -> {
            String aStr = a.get(columnName) != null ? a.get(columnName).toString() : "";
            String bStr = b.get(columnName) != null ? b.get(columnName).toString() : "";

            return bStr.compareTo(aStr);
        });
    }

    /**
     * Append data to the sheet.
     *
     * @param dataRows the data to append
     */
    public void appendData(List> dataRows) {
        Optional.ofNullable(dataRows).ifPresent(newData -> this.dataRows.addAll(newData));
    }

    public Long getRowNum(Map row) {
        if (row.containsKey(ROW_NUMBER)) {
            return (Long) row.get(ROW_NUMBER).getValueAs(Long.class);
        } else {
            return (long) dataRows.indexOf(row);
        }
    }

    /**
     * @return the requiredColumns
     */
    public List getRequiredColumns() {
        return Collections.unmodifiableList(requiredColumns);
    }

    public String convertHeaderName(String str) {
        String name;
        if (str.contains("@")) {
            name = StringUtils.substringBefore(str, "@");
        } else {
            name = str;
        }
        if (name.contains("[")) {
            name = StringUtils.substringBefore(name, "[");
        }
        if (enableHeaderNameConversion && isHeaderCaseInsensitive(name)) {
            name = String.valueOf(name).toLowerCase().replaceAll("[^0-9a-zA-Z:\\-]+", "_");
        }

        return name;
    }

    private boolean isHeaderCaseInsensitive(String name) {
        return CollectionUtils.isEmpty(caseInsensitiveHeaders)
                || caseInsensitiveHeaders.stream().anyMatch(s -> s.equalsIgnoreCase(name));
    }

    /**
     * Look for type hints in the name of a column to extract a usable type.
     * Also look for array hints as well. 
* Possible formats: *
    *
  • column-name - A column named "column-name"
  • *
  • col@int - An integer column named "col"
  • *
  • col2@int[] - An integer array colum named "col2", assumes standard * delimiter (,)
  • *
  • col3@string[] or col3@[] - A String array named "col3", assumes * standard delimiter (,)
  • *
  • col4@string[||] - A string array where values are using a custom * delimiter (||)
  • *
* * @param name * @return */ private Optional detectTypeFromName(String name) { boolean isArray = false; Class detectedClass = Object.class; if (name.contains("@")) { String typeStr = StringUtils.substringAfter(name, "@"); if (typeStr.contains("[")) { typeStr = StringUtils.substringBefore(typeStr, "["); } detectedClass = getClassFromName(typeStr); } if (name.endsWith("]")) { isArray = true; String delimiter = StringUtils.substringBetween(name, "[", "]"); if (!StringUtils.isEmpty(delimiter)) { String colName = convertHeaderName(name); delimiters.put(colName, delimiter); } } if (isArray) { return getArrayType(Optional.of(detectedClass)); } else { return Optional.of(detectedClass); } } private Class getClassFromName(String typeStr) { switch (typeStr.toLowerCase()) { case "int": case "integer": return Integer.TYPE; case "long": return Long.TYPE; case "double": case "number": return Double.TYPE; case "date": case "calendar": case "cal": case "time": return Calendar.class; case "boolean": case "bool": return Boolean.TYPE; case "string": case "str": default: return String.class; } } /** * Consider if a column is seen twice then that column type should be * considered an array. Because String is a default assumption when no type * is specified, any redefinition of a column to a more specific type will * be then assumed for that property altogether. * * @param a * @param b * @return */ private Optional upgradeToArray(Optional a, Optional b) { if (!a.isPresent()) { return b; } if (!b.isPresent()) { return a; } if (a.get().equals(b.get()) || b.get() == Object.class) { return getArrayType(a); } else { return getArrayType(b); } } private static Optional getArrayType(Optional clazz) { if (!clazz.isPresent()) { return Optional.empty(); } else if (clazz.get().isArray()) { return clazz; } else { return Optional.of(Array.newInstance(clazz.get(), 0).getClass()); } } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy