All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.cmu.tetrad.util.DataUtility Maven / Gradle / Ivy

///////////////////////////////////////////////////////////////////////////////
// For information as to what this class does, see the Javadoc, below.       //
// Copyright (C) 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006,       //
// 2007, 2008, 2009, 2010, 2014, 2015, 2022 by Peter Spirtes, Richard        //
// Scheines, Joseph Ramsey, and Clark Glymour.                               //
//                                                                           //
// This program is free software; you can redistribute it and/or modify      //
// it under the terms of the GNU General Public License as published by      //
// the Free Software Foundation; either version 2 of the License, or         //
// (at your option) any later version.                                       //
//                                                                           //
// This program is distributed in the hope that it will be useful,           //
// but WITHOUT ANY WARRANTY; without even the implied warranty of            //
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the             //
// GNU General Public License for more details.                              //
//                                                                           //
// You should have received a copy of the GNU General Public License         //
// along with this program; if not, write to the Free Software               //
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA //
///////////////////////////////////////////////////////////////////////////////

package edu.cmu.tetrad.util;

import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;

/**
 * Fast data loader for continuous or discrete data.
 * 

* Jul 13, 2015 10:44:10 AM * * @author Kevin V. Bui ([email protected]) */ public class DataUtility { private static final byte NEW_LINE = '\n'; private static final byte CARRIAGE_RETURN = '\r'; private DataUtility() { } /** * @param file dataset * @param delimiter a single character used to separate the data * @return the number of column of the first line in the file. * @throws IOException */ public static int countColumn(File file, char delimiter) throws IOException { int count = 0; byte delim = (byte) delimiter; try (FileChannel fc = new RandomAccessFile(file, "r").getChannel()) { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); byte currentChar = -1; byte prevChar = NEW_LINE; while (buffer.hasRemaining()) { currentChar = buffer.get(); if (currentChar == CARRIAGE_RETURN) { currentChar = NEW_LINE; } if (currentChar == delim || (currentChar == NEW_LINE && prevChar != NEW_LINE)) { count++; if (currentChar == NEW_LINE) { break; } } prevChar = currentChar; } // take care of cases where there's no newline at the end of the file if (!(currentChar == -1 || currentChar == NEW_LINE)) { count++; } } return count; } /** * @param file dataset * @return the number of lines that contain data. * @throws IOException */ public static int countLine(File file) throws IOException { int count = 0; try (FileChannel fc = new RandomAccessFile(file, "r").getChannel()) { MappedByteBuffer buffer = fc.map(FileChannel.MapMode.READ_ONLY, 0, fc.size()); byte prevChar = DataUtility.NEW_LINE; while (buffer.hasRemaining()) { byte currentChar = buffer.get(); if (currentChar == DataUtility.CARRIAGE_RETURN) { currentChar = DataUtility.NEW_LINE; } if (currentChar == DataUtility.NEW_LINE && prevChar != DataUtility.NEW_LINE) { count++; } prevChar = currentChar; } if (prevChar != DataUtility.NEW_LINE) { count++; } } return count; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy