
org.dstadler.poi.mailmerge.Data Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of poi-mail-merge Show documentation
Show all versions of poi-mail-merge Show documentation
A small application which allows to repeatedly replace markers in a Microsoft Word document with items taken from a CSV/Microsoft Excel file.
The newest version!
package org.dstadler.poi.mailmerge;
import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.apache.commons.io.FilenameUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.poi.EncryptedDocumentException;
import org.apache.poi.ss.format.CellFormat;
import org.apache.poi.ss.usermodel.Cell;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
import org.apache.poi.ss.util.CellReference;
import org.dstadler.commons.logging.jdk.LoggerFactory;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.logging.Logger;
/**
* Helper class which handles reading the merge-file-data from
* either a CSV or XLS/XLSX file.
*/
public class Data {
private static final Logger log = LoggerFactory.make();
private final List headers = new ArrayList<>();
private final List> values = new ArrayList<>();
/**
* Read the given file either as .csv or .xls/.xlsx file, depending
* on the file-extension.
*
* @param dataFile The merge-file to read. Can have extension .csv, .xls or .xlsx
* @throws IOException If an error occurs while reading the file
* @throws EncryptedDocumentException If the document is encrypted (passwords are not supported currently)
*/
public void read(File dataFile) throws IOException, EncryptedDocumentException {
// read the lines from the data-file
if(FilenameUtils.getExtension(dataFile.getName()).equalsIgnoreCase("csv")) {
readCSVFile(dataFile);
} else {
readExcelFile(dataFile);
}
removeEmptyLines();
}
private void removeEmptyLines() {
Iterator> it = values.iterator();
while(it.hasNext()) {
List line = it.next();
boolean empty = true;
for(String item : line) {
if(StringUtils.isNotBlank(item)) {
empty = false;
break;
}
}
// remove empty line
if(empty) {
log.info("Removing an empty data line");
it.remove();
}
}
}
private void readCSVFile(File csvFile) throws IOException {
// open file
try (Reader reader = new FileReader(csvFile)) {
CSVFormat strategy = CSVFormat.DEFAULT.
withHeader().
withDelimiter(',').
withQuote('"').
withCommentMarker((char)0).
withIgnoreEmptyLines().
withIgnoreSurroundingSpaces();
try (CSVParser parser = new CSVParser(reader, strategy)) {
Map headerMap = parser.getHeaderMap();
for(Map.Entry entry : headerMap.entrySet()) {
headers.add(entry.getKey());
log.info("Had header '" + entry.getKey() + "' for column " + entry.getValue());
}
List lines = parser.getRecords();
log.info("Found " + lines.size() + " lines");
for(CSVRecord line : lines) {
List data = new ArrayList<>();
for(int pos = 0;pos < headerMap.size();pos++) {
if(line.size() <= pos) {
data.add(null);
} else {
data.add(line.get(pos));
}
}
values.add(data);
}
}
}
}
private void readExcelFile(File excelFile) throws EncryptedDocumentException, IOException {
try (Workbook wb = WorkbookFactory.create(excelFile, null, true)) {
Sheet sheet = wb.getSheetAt(0);
final int start;
final int end;
{ // read headers
Row row = sheet.getRow(0);
if(row == null) {
throw new IllegalArgumentException("Provided Microsoft Excel file " + excelFile + " does not have data in the first row in the first sheet, "
+ "but we expect the header data to be located there");
}
start = row.getFirstCellNum();
end = row.getLastCellNum();
for(int cellNum = start;cellNum <= end;cellNum++) {
Cell cell = row.getCell(cellNum);
if(cell == null) {
// add null to the headers if there are columns without title in the sheet
headers.add(null);
log.info("Had empty header for column " + CellReference.convertNumToColString(cellNum));
} else {
String value = cell.toString();
headers.add(value);
log.info("Had header '" + value + "' for column " + CellReference.convertNumToColString(cellNum));
}
}
}
for(int rowNum = 1; rowNum <= sheet.getLastRowNum();rowNum++) {
Row row = sheet.getRow(rowNum);
if(row == null) {
// ignore missing rows
continue;
}
List data = new ArrayList<>();
for(int colNum = start;colNum <= end;colNum++) {
Cell cell = row.getCell(colNum);
if(cell == null) {
// store null-data for empty/missing cells
data.add(null);
} else {
final String value;
//noinspection SwitchStatementWithTooFewBranches
switch (cell.getCellType()) {
case NUMERIC:
// ensure that numeric are formatted the same way as in the Excel file.
value = CellFormat.getInstance(cell.getCellStyle().getDataFormatString()).apply(cell).text;
break;
default:
// all others can use the default value from toString() for now.
value = cell.toString();
}
data.add(value);
}
}
values.add(data);
}
}
}
/**
* Return a list of rows containing the data-values.
*
* @return a list of rows, each containing a list of data-values as strings.
*/
public List> getData() {
return values;
}
/**
* A list of header-names that are used to replace the templates.
*
* @return The header-names as found in the .csv/.xls/.xlsx file.
*/
public List getHeaders() {
return headers;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy