com.codetaco.funnel.provider.CsvProvider Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of funnelsort Show documentation
A better sort for JAVA
There is a newer version: 3.0.5
package com.codetaco.funnel.provider;

import java.io.IOException;
import java.text.ParseException;
import java.util.Iterator;

import org.apache.commons.csv.CSVFormat;
import org.apache.commons.csv.CSVParser;
import org.apache.commons.csv.CSVRecord;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.codetaco.funnel.AppContext;
import com.codetaco.funnel.orderby.KeyContext;
import com.codetaco.funnel.orderby.KeyPart;
import com.codetaco.funnel.parameters.FunnelContext;

/**
 * 
 * CsvProvider class.
 * 
 *
 * @author Chris DeGreef [email protected]
 */
public class CsvProvider extends VariableLengthProvider
{
    static final Logger _logger = LoggerFactory.getLogger(CsvProvider.class);

    boolean             includeColumn[];

    /**
     * for testing only
     *
     * @param _includeColumn an array of boolean.
     * @throws IOException if any.
     * @throws java.lang.Exception if any.
     */
    public CsvProvider(final boolean _includeColumn[]) throws Exception
    {
        this(new FunnelContext(new AppContext()));
        includeColumn = _includeColumn;
    }

    /**
     * 
     * Constructor for CsvProvider.
     * 
     *
     * @param _context a {@link com.codetaco.funnel.parameters.FunnelContext}
     *            object.
     * @throws java.io.IOException if any.
     * @throws java.text.ParseException if any.
     */
    public CsvProvider(final FunnelContext _context) throws IOException, ParseException
    {
        super(_context);
        logger.debug("CSV file provider activated");
        if (_context == null || _context.getInputColumnDefs() == null)
            return;
        /*
         * Find out which fields we really care about. No sense in moving around
         * bytes or analyzing fields we ultimately won't care about.
         */
        int highestKeyedColumnNumber = -1;

        for (final KeyPart kdef : _context.getInputColumnDefs())
            if (kdef.csvFieldNumber > highestKeyedColumnNumber)
                highestKeyedColumnNumber = kdef.csvFieldNumber;
        includeColumn = new boolean[highestKeyedColumnNumber + 1];
        for (final KeyPart kdef : _context.getInputColumnDefs())
            includeColumn[kdef.csvFieldNumber] = true;
    }

    /** {@inheritDoc} */
    @Override
    public long actualNumberOfRows()
    {
        return super.actualNumberOfRows()
                - (context.getCsv().header
                        ? 1
                        : 0);
    }

    @Override
    void assignReaderInstance() throws IOException, ParseException
    {
        if (context.isSysin())
            reader = new CsvSysinReader(context);
        else if (context.isCacheInput())
            reader = new CsvCacheReader(context);
        else
            reader = new CsvFileReader(context);
    }

    /**
     * 
     * decodeCsv.
     * 
     *
     * @param input an array of byte.
     * @param inputLength a int.
     * @param csvFormat a {@link org.apache.commons.csv.CSVFormat} object.
     * @return an array of byte.
     * @throws java.io.IOException if any.
     */
    public byte[][] decodeCsv(final byte[] input, final int inputLength, final CSVFormat csvFormat)
            throws IOException
    {
        final byte[][] field = new byte[includeColumn.length][];

        try (final CSVParser csvparser = CSVParser.parse(new String(input, 0, inputLength), csvFormat))
        {
            final CSVRecord csvrecord = csvparser.getRecords().get(0);
            final Iterator fields = csvrecord.iterator();
            for (int fNum = 0; fields.hasNext(); fNum++)
            {
                final String fieldAsString = fields.next();

                if (fNum >= includeColumn.length)
                    return field;

                if (fNum < includeColumn.length)
                    if (includeColumn[fNum])
                        field[fNum] = fieldAsString.getBytes();
            }
            return field;
        }
    }

    /**
     * A special case of not selecting the row is when a header is expected on
     * the csv file. This is one line and is to be kept for writing but ignored
     * for sorting. The publisher needs to be sensitive to the csv header
     * possibility and force the header out at the front of the file.
     *
     */
    @Override
    boolean isRowSelected(final int byteCount)
    {
        if (context.getCsv().header && context.getCsv().headerContents == null)
        {
            context.getCsv().headerContents = new byte[byteCount];
            System.arraycopy(row, 0, context.getCsv().headerContents, 0, byteCount);
            return false;
        }
        return true;
    }

    @Override
    KeyContext postReadKeyProcessing(final int byteCount) throws IOException
    {
        KeyContext kContext = null;
        try
        {
            final byte[][] data = decodeCsv(row, byteCount, context.getCsv().format);
            kContext = context.keyHelper.extractKey(data, getContinuousRecordNumber());

        } catch (final Exception e)
        {
            throw new IOException(e);
        }
        return kContext;
    }

    @Override
    void preSelectionExtract(final int byteCount) throws Exception
    {
        final byte[][] data = decodeCsv(row, byteCount, context.getCsv().format);
        context.columnHelper.extract(context, data, getContinuousRecordNumber(), byteCount);
    }

    byte[] unquote(final byte[] input, final int _start, final int _end, final byte quoteByte)
    {
        int start = _start;
        int end = _end;

        for (; start <= end; start++)
        {
            if (input[start] == (byte) ' ' || input[start] == (byte) '\t' || input[start] == quoteByte)
                continue;
            break;
        }
        for (; start <= end; end--)
        {
            if (input[end] == 32 || input[end] == 8 || input[end] == quoteByte)
                continue;
            break;
        }
        final int len = (end - start) + 1;
        final byte[] bb = new byte[len];
        System.arraycopy(input, start, bb, 0, len);
        return bb;
    }
}