All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.flatpack.brparse.BuffReaderDelimParser Maven / Gradle / Ivy

Go to download

Simple Java delimited and fixed width file parser. Handles CSV, Excel CSV, Tab, Pipe delimiters, just to name a few. Maps column positions in the file to user friendly names via XML. See FlatPack Feature List under News for complete feature list.

The newest version!
/*
 * ObjectLab, http://www.objectlab.co.uk/open is supporting FlatPack.
 *
 * Based in London, we are world leaders in the design and development
 * of bespoke applications for the securities financing markets.
 *
 * Click here to learn more
 *           ___  _     _           _   _          _
 *          / _ \| |__ (_) ___  ___| |_| |    __ _| |__
 *         | | | | '_ \| |/ _ \/ __| __| |   / _` | '_ \
 *         | |_| | |_) | |  __/ (__| |_| |__| (_| | |_) |
 *          \___/|_.__// |\___|\___|\__|_____\__,_|_.__/
 *                   |__/
 *
 *                     www.ObjectLab.co.uk
 *
 * $Id: ColorProvider.java 74 2006-10-24 22:19:05Z benoitx $
 *
 * Copyright 2006 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package net.sf.flatpack.brparse;

import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.Reader;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import net.sf.flatpack.DataSet;
import net.sf.flatpack.DefaultDataSet;
import net.sf.flatpack.DelimiterParser;
import net.sf.flatpack.structure.ColumnMetaData;
import net.sf.flatpack.structure.Row;
import net.sf.flatpack.util.FPConstants;
import net.sf.flatpack.util.FPException;
import net.sf.flatpack.util.ParserUtils;

public class BuffReaderDelimParser extends DelimiterParser implements InterfaceBuffReaderParse {
    private BufferedReader br;

    private boolean processedFirst = false;

    private static final Logger LOGGER = LoggerFactory.getLogger(BuffReaderDelimParser.class);

    public BuffReaderDelimParser(final File pzmapXML, final File dataSource, final char delimiter, final char qualifier,
            final boolean ignoreFirstRecord) {
        super(pzmapXML, dataSource, delimiter, qualifier, ignoreFirstRecord);
    }

    public BuffReaderDelimParser(final InputStream pzmapXMLStream, final InputStream dataSourceStream, final char delimiter, final char qualifier,
            final boolean ignoreFirstRecord) {
        super(pzmapXMLStream, dataSourceStream, delimiter, qualifier, ignoreFirstRecord);
    }

    public BuffReaderDelimParser(final File dataSource, final char delimiter, final char qualifier, final boolean ignoreFirstRecord) {
        super(dataSource, delimiter, qualifier, ignoreFirstRecord);
    }

    public BuffReaderDelimParser(final InputStream dataSourceStream, final char delimiter, final char qualifier, final boolean ignoreFirstRecord) {
        super(dataSourceStream, delimiter, qualifier, ignoreFirstRecord);
    }

    public BuffReaderDelimParser(final Reader pzmapXML, final Reader dataSource, final char delimiter, final char qualifier,
            final boolean ignoreFirstRecord) {
        super(dataSource, pzmapXML, delimiter, qualifier, ignoreFirstRecord);
    }

    public BuffReaderDelimParser(final Reader dataSourceStream, final char delimiter, final char qualifier, final boolean ignoreFirstRecord) {
        super(dataSourceStream, delimiter, qualifier, ignoreFirstRecord);
    }

    @Override
    protected DataSet doParse() {
        final DataSet ds = new BuffReaderDataSet(getPzMetaData(), this);
        try {
            // gather the conversion properties
            ds.setPZConvertProps(ParserUtils.loadConvertProperties());

            br = new BufferedReader(getDataSourceReader());

            return ds;

        } catch (final IOException ex) {
            LOGGER.error("error accessing/creating inputstream", ex);
        }

        return null;
    }

    /**
     * Reads in the next record on the file and return a row
     *
     * @param ds
     * @return Row
     */
    @Override
    public Row buildRow(final DefaultDataSet ds) {
        /** loop through each line in the file */
        while (true) {
            String line;
            try {
                line = fetchNextRecord(br, getQualifier(), getDelimiter());
            } catch (final IOException e) {
                throw new FPException("Error Fetching Record From File...", e);
            }

            if (line == null) {
                return null;
            }

            // check to see if the user has elected to skip the first record
            if (shouldSkipFirstRecord(line, ds)) {
                continue;
            }

            // TODO
            // seems like we may want to try doing something like this. I have my reservations because
            // it is possible that we don't get a "detail" id and this might generate NPE
            // is it going to create too much overhead to do a null check here as well???
            List columns = ParserUtils.splitLine(line, getDelimiter(), getQualifier(), FPConstants.SPLITLINE_SIZE_INIT,
                    isPreserveLeadingWhitespace(), isPreserveTrailingWhitespace());
            final String mdkey = ParserUtils.getCMDKeyForDelimitedFile(getPzMetaData(), columns);
            final List cmds = ParserUtils.getColumnMetaData(mdkey, getPzMetaData());
            // DEBUG

            // Incorrect record length on line log the error. Line
            // will not be included in the dataset
            if (!validateColumns(ds, columns, cmds, line)) {
                continue;
            }

            return createRow(line, columns, mdkey);
        }
    }

    private boolean shouldSkipFirstRecord(String line, DefaultDataSet ds) {
        if (!processedFirst && isIgnoreFirstRecord()) {
            processedFirst = true;
            return true;
        } else if (!processedFirst && shouldCreateMDFromFile()) {
            processedFirst = true;
            setPzMetaData(ParserUtils.getPZMetaDataFromFile(line, getDelimiter(), getQualifier(), this, isAddSuffixToDuplicateColumnNames()));
            ds.setMetaData(getPzMetaData());
            return true;
        }
        return false;
    }

    private Row createRow(String line, List columns, final String mdkey) {
        final Row row = new Row();
        row.setMdkey(mdkey.equals(FPConstants.DETAIL_ID) ? null : mdkey); // try
        // to limit the memory use
        row.setCols(columns);
        row.setRowNumber(getLineCount());

        if (isFlagEmptyRows()) {
            // user has elected to have the parser flag rows that are empty
            row.setEmpty(ParserUtils.isListElementsEmpty(columns));
        }
        if (isStoreRawDataToDataSet()) {
            // user told the parser to keep a copy of the raw data in the row
            // WARNING potential for high memory usage here
            row.setRawData(line);
        }
        return row;
    }

    private boolean validateColumns(DefaultDataSet ds, List columns, List cmds, String line) {
        final int columnCount = cmds.size();
        if (columns.size() > columnCount) {
            return handleTooManyColumns(ds, columns, line, columnCount);
        } else if (columns.size() < columnCount) {
            return handleTooFewColumns(ds, columns, line, columnCount, cmds);
        }
        return true;
    }

    private boolean handleTooFewColumns(DefaultDataSet ds, List columns, String line, final int columnCount, List colTitles) {
        if (isHandlingShortLines()) {
            // We can pad this line out
            while (columns.size() < columnCount) {
                columns.add("");
            }

            // log a warning
            addError(ds, "Padded line to correct number of columns", getLineCount(), 1, isStoreRawDataToDataError() ? line : null);
            return true;
        } else {
            StringBuilder sb = new StringBuilder();
            sb.append("Too few columns expected size: ").append(columnCount).append(" Actual size: ").append(columns.size());

            String lastColumnName = colTitles != null && !colTitles.isEmpty() && !columns.isEmpty() ? colTitles.get(columns.size() - 1).getColName()
                    : null;
            String lastColumnValue = columns != null && !columns.isEmpty() ? columns.get(columns.size() - 1) : null;
            /*
            if (columns.size() >= 2) {
                sb.append(System.lineSeparator()).append(" Last 2 Cols:").append(colTitles.get(columns.size() - 2).getColName())//
                        .append(" and ").append(colTitles.get(columns.size() - 1).getColName());
                sb.append(System.lineSeparator()).append(" Last 2 Cols VALUES:").append(columns.get(columns.size() - 2))//
                        .append(" and ").append(columns.get(columns.size() - 1));
            } else if (columns.size() >= 1) {
                sb.append(System.lineSeparator()).append(" Last Col:").append(colTitles.get(columns.size() - 1).getColName());
                sb.append(System.lineSeparator()).append(" Last Col VALUE:").append(columns.get(columns.size() - 1));
            }
            */

            addError(ds, sb.toString(), getLineCount(), 2, isStoreRawDataToDataError() ? line : null, lastColumnName, lastColumnValue);
            return false;
        }
    }

    private boolean handleTooManyColumns(DefaultDataSet ds, List columns, String line, final int columnCount) {
        if (isIgnoreExtraColumns()) {
            // user has chosen to ignore the fact that we have too many columns in the data from
            // what the mapping has described. sublist the array to remove unneeded columns
            columns.retainAll(columns.subList(0, columnCount));
            addError(ds, "TRUNCATED LINE TO CORRECT NUMBER OF COLUMNS", getLineCount(), 1, isStoreRawDataToDataError() ? line : null);
            return true;
        } else {
            // log the error
            addError(ds, "Too many columns expected size: " + columnCount + " Actual size: " + columns.size(), getLineCount(), 2,
                    isStoreRawDataToDataError() ? line : null);
            return false;
        }
    }

    /**
     * Closes out the file readers
     *
     *@throws IOException
     */
    @Override
    public void close() throws IOException {
        if (br != null) {
            br.close();
            br = null;
        }
    }

    // try to clean up the file handles automatically if
    // the close was not called
    @Override
    protected void finalize() throws Throwable {
        try {
            close();
        } catch (final IOException ex) {
            LOGGER.warn("Problem trying to auto close file handles...", ex);
        } finally {
            super.finalize();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy