All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.pitt.dbmi.data.reader.covariance.LowerCovarianceDataFileReader Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (C) 2018 University of Pittsburgh.
 *
 * This library is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * This library is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
 * Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with this library; if not, write to the Free Software
 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston,
 * MA 02110-1301  USA
 */
package edu.pitt.dbmi.data.reader.covariance;

import edu.pitt.dbmi.data.reader.DataFileReader;
import edu.pitt.dbmi.data.reader.DataReaderException;
import edu.pitt.dbmi.data.reader.Delimiter;

import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.StandardOpenOption;
import java.util.LinkedList;
import java.util.List;

/**
 * Nov 19, 2018 11:04:51 AM
 *
 * @author Kevin V. Bui ([email protected])
 * @version $Id: $Id
 */
public class LowerCovarianceDataFileReader extends DataFileReader implements CovarianceDataReader {

    /**
     * Constructs a lower triangular covariance data file reader.
     *
     * @param dataFile  the data file.
     * @param delimiter the delimiter.
     */
    public LowerCovarianceDataFileReader(Path dataFile, Delimiter delimiter) {
        super(dataFile, delimiter);
    }

    /**
     * {@inheritDoc}
     * 

* Reads in the covariance data. */ @Override public CovarianceData readInData() throws IOException { int numOfCases = getNumberOfCases(); List variables = getVariables(); double[][] data = getCovarianceData(variables.size()); return new LowerCovarianceData(numOfCases, variables, data); } private double[][] getCovarianceData(int matrixSize) throws IOException { double[][] data = new double[matrixSize][matrixSize]; try (InputStream in = Files.newInputStream(this.dataFile, StandardOpenOption.READ)) { boolean skip = false; boolean hasSeenNonblankChar = false; boolean hasQuoteChar = false; byte delimChar = this.delimiter.getByteValue(); // comment marker check byte[] comment = this.commentMarker.getBytes(); int cmntIndex = 0; boolean checkForComment = comment.length > 0; int lineDataNum = 1; int lineNum = 1; int colNum = 0; int col = 0; int row = 0; StringBuilder dataBuilder = new StringBuilder(); byte prevChar = -1; byte[] buffer = new byte[DataFileReader.BUFFER_SIZE]; int len; while ((len = in.read(buffer)) != -1 && !Thread.currentThread().isInterrupted()) { for (int i = 0; i < len && !Thread.currentThread().isInterrupted(); i++) { byte currChar = buffer[i]; if (currChar == DataFileReader.CARRIAGE_RETURN || currChar == DataFileReader.LINE_FEED) { if (currChar == DataFileReader.LINE_FEED && prevChar == DataFileReader.CARRIAGE_RETURN) { prevChar = DataFileReader.LINE_FEED; continue; } if (hasSeenNonblankChar && !skip) { if (lineDataNum >= 3) { if (col > row) { String errMsg = String.format("Excess data on line %d. Extracted %d value(s) but expected %d.", lineNum, col + 1, row + 1); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else if (col < row) { String errMsg = String.format("Insufficent data on line %d. Extracted %d value(s) but expected %d.", lineNum, col + 1, row + 1); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); colNum++; if (value.isEmpty()) { String errMsg = String.format("Missing value on line %d at column %d.", lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { try { double covariance = Double.parseDouble(value); data[row][col] = covariance; data[col][row] = covariance; } catch (NumberFormatException exception) { String errMsg = String.format("Invalid number %s on line %d at column %d.", value, lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg, exception); throw new DataReaderException(errMsg); } } } row++; } lineDataNum++; } lineNum++; // clear data dataBuilder.delete(0, dataBuilder.length()); // reset states skip = false; hasSeenNonblankChar = false; cmntIndex = 0; col = 0; colNum = 0; checkForComment = comment.length > 0; } else if (!skip) { if (currChar > DataFileReader.SPACE_CHAR) { hasSeenNonblankChar = true; } // skip blank chars at the begining of the line if (currChar <= DataFileReader.SPACE_CHAR && !hasSeenNonblankChar) { continue; } // check for comment marker to skip line if (checkForComment) { if (currChar == comment[cmntIndex]) { cmntIndex++; if (cmntIndex == comment.length) { skip = true; prevChar = currChar; continue; } } else { checkForComment = false; } } if (lineDataNum >= 3) { if (currChar == this.quoteCharacter) { hasQuoteChar = !hasQuoteChar; } else { if (hasQuoteChar) { dataBuilder.append((char) currChar); } else { boolean isDelimiter; if (this.delimiter == Delimiter.WHITESPACE) { isDelimiter = (currChar <= DataFileReader.SPACE_CHAR) && (prevChar > DataFileReader.SPACE_CHAR); } else { isDelimiter = (currChar == delimChar); } if (isDelimiter) { if (col > row) { String errMsg = String.format("Excess data on line %d. Extracted %d value(s) but expected %d.", lineNum, col + 1, row + 1); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); colNum++; if (value.isEmpty()) { String errMsg = String.format("Missing value on line %d at column %d.", lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { try { double covariance = Double.parseDouble(value); data[row][col] = covariance; data[col][row] = covariance; } catch (NumberFormatException exception) { String errMsg = String.format("Invalid number %s on line %d at column %d.", value, lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg, exception); throw new DataReaderException(errMsg); } } col++; } else { dataBuilder.append((char) currChar); } } } } } prevChar = currChar; } } if (hasSeenNonblankChar && !skip) { if (lineDataNum >= 3) { if (col > row) { String errMsg = String.format("Excess data on line %d. Extracted %d value(s) but expected %d.", lineNum, col + 1, row + 1); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else if (col < row) { String errMsg = String.format("Insufficent data on line %d. Extracted %d value(s) but expected %d.", lineNum, col + 1, row + 1); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); colNum++; if (value.isEmpty()) { String errMsg = String.format("Missing value on line %d at column %d.", lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { try { double covariance = Double.parseDouble(value); data[row][col] = covariance; data[col][row] = covariance; } catch (NumberFormatException exception) { String errMsg = String.format("Invalid number %s on line %d at column %d.", value, lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg, exception); throw new DataReaderException(errMsg); } } } } } } return data; } private List getVariables() throws IOException { List variables = new LinkedList<>(); try (InputStream in = Files.newInputStream(this.dataFile, StandardOpenOption.READ)) { boolean skip = false; boolean hasSeenNonblankChar = false; boolean hasQuoteChar = false; boolean finished = false; byte delimChar = this.delimiter.getByteValue(); // comment marker check byte[] comment = this.commentMarker.getBytes(); int cmntIndex = 0; boolean checkForComment = comment.length > 0; int lineDataNum = 1; int colNum = 0; int lineNum = 1; StringBuilder dataBuilder = new StringBuilder(); byte prevChar = -1; byte[] buffer = new byte[DataFileReader.BUFFER_SIZE]; int len; while ((len = in.read(buffer)) != -1 && !finished && !Thread.currentThread().isInterrupted()) { for (int i = 0; i < len && !finished && !Thread.currentThread().isInterrupted(); i++) { byte currChar = buffer[i]; if (currChar == DataFileReader.CARRIAGE_RETURN || currChar == DataFileReader.LINE_FEED) { if (currChar == DataFileReader.LINE_FEED && prevChar == DataFileReader.CARRIAGE_RETURN) { prevChar = DataFileReader.LINE_FEED; continue; } if (hasSeenNonblankChar && !skip) { if (lineDataNum == 2) { String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); colNum++; if (value.isEmpty()) { String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { variables.add(value); } } lineDataNum++; finished = lineDataNum > 2; } lineNum++; // clear data dataBuilder.delete(0, dataBuilder.length()); // reset states skip = false; hasSeenNonblankChar = false; cmntIndex = 0; checkForComment = comment.length > 0; } else if (!skip) { if (currChar > DataFileReader.SPACE_CHAR) { hasSeenNonblankChar = true; } // skip blank chars at the begining of the line if (currChar <= DataFileReader.SPACE_CHAR && !hasSeenNonblankChar) { continue; } // check for comment marker to skip line if (checkForComment) { if (currChar == comment[cmntIndex]) { cmntIndex++; if (cmntIndex == comment.length) { skip = true; prevChar = currChar; continue; } } else { checkForComment = false; } } if (lineDataNum == 2) { if (currChar == this.quoteCharacter) { hasQuoteChar = !hasQuoteChar; } else { if (hasQuoteChar) { dataBuilder.append((char) currChar); } else { boolean isDelimiter; if (this.delimiter == Delimiter.WHITESPACE) { isDelimiter = (currChar <= DataFileReader.SPACE_CHAR) && (prevChar > DataFileReader.SPACE_CHAR); } else { isDelimiter = (currChar == delimChar); } if (isDelimiter) { String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); colNum++; if (value.isEmpty()) { String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { variables.add(value); } } else { dataBuilder.append((char) currChar); } } } } } prevChar = currChar; } } if (hasSeenNonblankChar && !skip) { if (lineDataNum == 2) { String value = dataBuilder.toString().trim(); dataBuilder.delete(0, dataBuilder.length()); colNum++; if (value.isEmpty()) { String errMsg = String.format("Missing variable name on line %d at column %d.", lineNum, colNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { variables.add(value); } } } } if (variables.isEmpty()) { final String errMsg = "Covariance file does not contain variable names."; // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } return variables; } private int getNumberOfCases() throws IOException { int numOfCases = 0; try (InputStream in = Files.newInputStream(this.dataFile, StandardOpenOption.READ)) { boolean skip = false; boolean hasSeenNonblankChar = false; boolean finished = false; // comment marker check byte[] comment = this.commentMarker.getBytes(); int cmntIndex = 0; boolean checkForComment = comment.length > 0; int lineNum = 1; StringBuilder dataBuilder = new StringBuilder(); byte prevChar = -1; byte[] buffer = new byte[DataFileReader.BUFFER_SIZE]; int len; while ((len = in.read(buffer)) != -1 && !finished && !Thread.currentThread().isInterrupted()) { for (int i = 0; i < len && !finished && !Thread.currentThread().isInterrupted(); i++) { byte currChar = buffer[i]; if (currChar == DataFileReader.CARRIAGE_RETURN || currChar == DataFileReader.LINE_FEED) { if (currChar == DataFileReader.LINE_FEED && prevChar == DataFileReader.CARRIAGE_RETURN) { prevChar = DataFileReader.LINE_FEED; continue; } finished = hasSeenNonblankChar && !skip; if (!finished) { lineNum++; // clear data dataBuilder.delete(0, dataBuilder.length()); // reset states skip = false; hasSeenNonblankChar = false; cmntIndex = 0; checkForComment = comment.length > 0; } } else { if (!skip) { if (currChar > DataFileReader.SPACE_CHAR) { hasSeenNonblankChar = true; } // skip blank chars at the begining of the line if (currChar <= DataFileReader.SPACE_CHAR && !hasSeenNonblankChar) { continue; } // check for comment marker to skip line if (checkForComment) { if (currChar == comment[cmntIndex]) { cmntIndex++; if (cmntIndex == comment.length) { skip = true; prevChar = currChar; continue; } } else { checkForComment = false; } } if (currChar != this.quoteCharacter) { dataBuilder.append((char) currChar); } } } prevChar = currChar; } } String value = dataBuilder.toString().trim(); if (value.isEmpty()) { String errMsg = String.format("Line %d: Missing number of cases.", lineNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } else { try { numOfCases += Integer.parseInt(value); } catch (NumberFormatException exception) { String errMsg = String.format("Invalid number %s on line %d.", value, lineNum); // LowerCovarianceDataFileReader.LOGGER.error(errMsg); throw new DataReaderException(errMsg); } } } return numOfCases; } private static final class LowerCovarianceData implements CovarianceData { private final int numberOfCases; private final List variables; private final double[][] data; private LowerCovarianceData(int numberOfCases, List variables, double[][] data) { this.numberOfCases = numberOfCases; this.variables = variables; this.data = data; } /** * @return the number of cases in the data. */ @Override public int getNumberOfCases() { return this.numberOfCases; } /** * @return the number of variables in the data. */ @Override public List getVariables() { return this.variables; } /** * @return the data in a 2D array. */ @Override public double[][] getData() { return this.data; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy