All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gov.sandia.cognition.io.CSVUtility Maven / Gradle / Ivy

There is a newer version: 4.0.1
Show newest version
/*
 * File:                CSVUtility.java
 * Authors:             Kevin R. Dixon
 * Company:             Sandia National Laboratories
 * Project:             Cognitive Foundry
 *
 * Copyright March 2, 2006, Sandia Corporation.  Under the terms of Contract
 * DE-AC04-94AL85000, there is a non-exclusive license for use of this work by
 * or on behalf of the U.S. Government. Export of this program may require a
 * license from the United States Government. See CopyrightHistory.txt for
 * complete details.
 *
 */

package gov.sandia.cognition.io;

import gov.sandia.cognition.annotation.CodeReview;
import gov.sandia.cognition.annotation.CodeReviewResponse;
import gov.sandia.cognition.annotation.CodeReviews;
import java.io.BufferedReader;
import java.io.FileReader;
import java.io.IOException;
import java.util.LinkedList;

/**
 * The CSVUtility class implements some utility functions for
 * dealing with comma-separated value (CSV) file types.
 *
 * @author Justin Basilico
 * @author Kevin R. Dixon
 * @since  1.0
 */
@CodeReviews(
    reviews={
        @CodeReview(
            reviewer="Kevin R. Dixon",
            date="2006-07-18",
            changesNeeded=false,
            comments="Minor spacing changes to while statement. Otherwise, looks fine."
        ),
        
        @CodeReview(
            reviewer="Kevin R. Dixon",
            date="2008-02-08",
            changesNeeded=true,
            comments="This class should make the comma a parameter (in the splitCommas() method) so that it can be used on all character-delimited parsing schemes.",
            response=@CodeReviewResponse(
                respondent="Justin Basilico",
                date="2008-02-18",
                moreChangesNeeded=false,
                comments="Added the split() method"
            )
        )
    }
)
public class CSVUtility
    extends java.lang.Object
{

    /**
     * Returns the next non-empty line from the given BufferedReader as an
     * array of the CSV entries. If there is no more data, null is returned.
     *
     * @param  r The BufferedReader to read from.
     * @return An array of the comma-separated values on the next line or null
     *         if the end of the file has been reached.
     * @throws IOException If there is an exception in reading from the 
     *         BufferedReader.
     */
    public static String[] nextNonEmptyLine(
        BufferedReader r)
        throws IOException
    {
        return nextNonEmptyLine(r, ',');
    }

    /**
     * Returns the next non-empty line from the given BufferedReader as an
     * array of the CSV entries. If there is no more data, null is returned.
     *
     * @param  r The BufferedReader to read from.
     * @param delimiter Delimiter to use.
     * @return An array of the comma-separated values on the next line or null
     *         if the end of the file has been reached.
     * @throws IOException If there is an exception in reading from the
     *         BufferedReader.
     */
    public static String[] nextNonEmptyLine(
        BufferedReader r,
        char delimiter )
        throws IOException
    {

        // Loop over the lines.
        String line = null;

        while ((line = r.readLine()) != null)
        {
            line = line.trim();

            if (line.length() <= 0)
            {
                continue;
            }

            // Split the commas.
            String[] entries = CSVUtility.split(line,delimiter);

            if (entries != null && entries.length > 0)
            {
                // Return the line since it had data on it.
                return entries;
            }
        // else - No data on the line.
        }

        // Nothing found before the end of the reader.
        return null;
    }

    /**
     * Splits the given line into the array of comma-separated values. If the
     * given line is null, then the value returned is null. If there is no
     * data on the line, then an empty array is returned. Otherwise, an array
     * containing at least one value is returned.
     *
     * @param  line The line to split on the commas.
     * @return The array of comma-separated values from the given line.
     */
    public static String[] splitCommas(
        final String line)
    {
        return split(line, ',');
    }

    /**
     * Splits the given line into the array of character-separated values. If 
     * the given line is null, then the value returned is null. If there is no
     * data on the line, then an empty array is returned. Otherwise, an array
     * containing at least one value is returned.
     *
     * @param  line The line to split on the given character.
     * @param  c The character to use to split the line.
     * @return The array of comma-separated values from the given line.
     */
    public static String[] split(
        final String line,
        final char c)
    {
        if (line == null)
        {
            // Error: Bad line.
            return null;
        }

        // Get the length of the line.
        int length = line.length();
        if (length <= 0)
        {
            // The line has no data.
            return new String[0];
        }
        
        // We build a list of the comma-separated values and at the end turn
        // it into an array.
        LinkedList list = new LinkedList();

        // Keep track of where the current value starts and ends.
        int start = 0;
        int end = 0;

        // Loop over all the commas in the string until we get to the last one.
        while ((start < length) &&
            ((end = line.indexOf(c, start)) >= 0))
        {
            // Get this value.
            String entry = line.substring(start, end);
            list.add(entry);
            start = end + 1;
        }

        if (start < length)
        {
            // Add the last value
            String last = line.substring(start);
            list.add(last);
        }
        else
        {
            // The last thing was a comma.
            list.add("");
        }

        // Convert the list to an array.
        return list.toArray(new String[list.size()]);
    }

    /**
     * Reads a CSV file into a list of arrays of string values.
     *
     * @param   fileName The file to read in.
     * @return  A list of arrays of strings that contain the comma-separated
     *      values in the given CSV file.
     * @throws java.io.IOException  If there is an error reading the file.
     */
    public static LinkedList readFile(
        final String fileName )
        throws IOException
    {
        return readFile( fileName, ',' );
    }

    /**
     * Reads a CSV file into a list of arrays of string values.
     * 
     * @param   fileName The file to read in.
     * @param delimiter Delimiter to use.
     * @return  A list of arrays of strings that contain the comma-separated
     *      values in the given CSV file.
     * @throws java.io.IOException  If there is an error reading the file.
     */
    public static LinkedList readFile(
        final String fileName,
        final char delimiter )
        throws IOException
    {
        final LinkedList result = new LinkedList();
        final BufferedReader reader = 
            new BufferedReader(new FileReader(fileName));
        
        try
        {
            String[] line = null;
            while ((line = nextNonEmptyLine(reader,delimiter)) != null)
            {
                result.add(line);
            }
        }
        finally
        {
            reader.close();
        }
        
        return result;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy