All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dstadler.poi.mailmerge.Data Maven / Gradle / Ivy

Go to download

A small application which allows to repeatedly replace markers in a Microsoft Word document with items taken from a CSV/Microsoft Excel file.

The newest version!
package org.dstadler.poi.mailmerge;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.ss.format.CellFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;
import org.dstadler.commons.logging.jdk.LoggerFactory;

import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;

/**
 * Helper class which handles reading the merge-file-data from
 * either a CSV or XLS/XLSX file.
 */
public class Data {
    private static final Logger log = LoggerFactory.make();

    private final List headers = new ArrayList<>();
    private final List> values = new ArrayList<>();

    /**
     * Read the given file either as .csv or .xls/.xlsx file, depending
     * on the file-extension.
     *
     * @param dataFile The merge-file to read. Can have extension .csv, .xls or .xlsx
     * @throws IOException If an error occurs while reading the file
     * @throws EncryptedDocumentException If the document is encrypted (passwords are not supported currently)
     */
    public void read(File dataFile) throws IOException, EncryptedDocumentException {
        // read the lines from the data-file
        if(FilenameUtils.getExtension(dataFile.getName()).equalsIgnoreCase("csv")) {
            readCSVFile(dataFile);
        } else {
            readExcelFile(dataFile);
        }

        removeEmptyLines();
    }

    private void removeEmptyLines() {
        Iterator> it = values.iterator();
        while(it.hasNext()) {
            List line = it.next();
            boolean empty = true;
            for(String item : line) {
                if(StringUtils.isNotBlank(item)) {
                    empty = false;
                    break;
                }
            }

            // remove empty line
            if(empty) {
                log.info("Removing an empty data line");
                it.remove();
            }
        }
    }

    private void readCSVFile(File csvFile) throws IOException {
        // open file
        try (Reader reader = new FileReader(csvFile)) {
            CSVFormat strategy = CSVFormat.DEFAULT.
                    withHeader().
                    withDelimiter(',').
                    withQuote('"').
                    withCommentMarker((char)0).
                    withIgnoreEmptyLines().
                    withIgnoreSurroundingSpaces();

            try (CSVParser parser = new CSVParser(reader, strategy)) {
                Map headerMap = parser.getHeaderMap();
                for(Map.Entry entry : headerMap.entrySet()) {
                    headers.add(entry.getKey());
                    log.info("Had header '" + entry.getKey() + "' for column " + entry.getValue());
                }

                List lines = parser.getRecords();
                log.info("Found " + lines.size() + " lines");
                for(CSVRecord line : lines) {
                    List data = new ArrayList<>();
                    for(int pos = 0;pos < headerMap.size();pos++) {
                        if(line.size() <= pos) {
                            data.add(null);
                        } else {
                            data.add(line.get(pos));
                        }
                    }

                    values.add(data);
                }
            }
        }
    }

    private void readExcelFile(File excelFile) throws EncryptedDocumentException, IOException {
        try (Workbook wb = WorkbookFactory.create(excelFile, null, true)) {
            Sheet sheet = wb.getSheetAt(0);

            final int start;
            final int end;
            { // read headers
                Row row = sheet.getRow(0);
                if(row == null) {
                    throw new IllegalArgumentException("Provided Microsoft Excel file " + excelFile + " does not have data in the first row in the first sheet, "
                            + "but we expect the header data to be located there");
                }

                start = row.getFirstCellNum();
                end = row.getLastCellNum();
                for(int cellNum = start;cellNum <= end;cellNum++) {
                    Cell cell = row.getCell(cellNum);
                    if(cell == null) {
                        // add null to the headers if there are columns without title in the sheet
                        headers.add(null);
                        log.info("Had empty header for column " + CellReference.convertNumToColString(cellNum));
                    } else {
                        String value = cell.toString();
                        headers.add(value);
                        log.info("Had header '" + value + "' for column " + CellReference.convertNumToColString(cellNum));
                    }
                }
            }

            for(int rowNum = 1; rowNum <= sheet.getLastRowNum();rowNum++) {
                Row row = sheet.getRow(rowNum);
                if(row == null) {
                    // ignore missing rows
                    continue;
                }

                List data = new ArrayList<>();
                for(int colNum = start;colNum <= end;colNum++) {
                    Cell cell = row.getCell(colNum);
                    if(cell == null) {
                        // store null-data for empty/missing cells
                        data.add(null);
                    } else {
                        final String value;
                        //noinspection SwitchStatementWithTooFewBranches
                        switch (cell.getCellType()) {
                            case NUMERIC:
                                // ensure that numeric are formatted the same way as in the Excel file.
                                value = CellFormat.getInstance(cell.getCellStyle().getDataFormatString()).apply(cell).text;
                                break;
                            default:
                                // all others can use the default value from toString() for now.
                                value = cell.toString();
                        }

                        data.add(value);
                    }
                }

                values.add(data);
            }
        }
    }

    /**
     * Return a list of rows containing the data-values.
     *
     * @return a list of rows, each containing a list of data-values as strings.
     */
    public List> getData() {
        return values;
    }

    /**
     * A list of header-names that are used to replace the templates.
     *
     * @return The header-names as found in the .csv/.xls/.xlsx file.
     */
    public List getHeaders() {
        return headers;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy