gov.nasa.pds.objectAccess.TableReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of pds4-jparser Show documentation
Show all versions of pds4-jparser Show documentation
This is the parser library for the PDS4 planetary data standard.
// Copyright 2019, California Institute of Technology ("Caltech").
// U.S. Government sponsorship acknowledged.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other
// materials provided with the distribution.
// * Neither the name of Caltech nor its operating division, the Jet Propulsion
// Laboratory, nor the names of its contributors may be used to endorse or
// promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.
package gov.nasa.pds.objectAccess;
import java.io.BufferedReader;
import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.RandomAccessFile;
import java.net.URL;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
import java.util.HashMap;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.opencsv.CSVParser;
import com.opencsv.CSVParserBuilder;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.exceptions.CsvValidationException;
import gov.nasa.pds.label.object.DataObjectLocation;
import gov.nasa.pds.label.object.FieldDescription;
import gov.nasa.pds.label.object.TableRecord;
import gov.nasa.pds.objectAccess.table.AdapterFactory;
import gov.nasa.pds.objectAccess.table.TableAdapter;
import gov.nasa.pds.objectAccess.table.TableBinaryAdapter;
import gov.nasa.pds.objectAccess.table.TableCharacterAdapter;
import gov.nasa.pds.objectAccess.table.TableDelimitedAdapter;
import gov.nasa.pds.objectAccess.utility.Utility;
/**
* The TableReader
class defines methods for reading table records.
*/
public class TableReader implements Closeable {
private static final Logger LOGGER = LoggerFactory.getLogger(TableReader.class);
private TableAdapter adapter;
private long offset;
private long currentRow = 0;
private TableRecord record = null;
protected ByteWiseFileAccessor accessor = null;
private Map map = new HashMap<>();
private CSVReader csvReader = null;
private BufferedReader bufferedReader = null;
private InputStream inputStream = null;
private long recordSize = 0;
private char delimitedChar = ',';
protected DataObjectLocation dataObjectLocation = null;
public TableReader(Object table, File dataFile) throws Exception {
this(table, dataFile.toURI().toURL());
}
/**
* Constructs a TableReader
instance for reading records from a data file associated
* with a table object.
*
* @param table a table object
* @param dataFile an input data file
*
* @throws NullPointerException if table offset is null
*/
public TableReader(Object table, URL dataFile) throws Exception {
this(table, dataFile, true);
}
public TableReader(Object table, URL dataFile, boolean checkSize)
throws InvalidTableException, Exception {
this(table, dataFile, checkSize, false);
}
public TableReader(Object table, URL dataFile, boolean checkSize, boolean readEntireFile)
throws InvalidTableException, Exception {
this(table, dataFile, null, checkSize, readEntireFile, false);
}
public TableReader(Object table, URL dataFile, DataObjectLocation location, boolean checkSize,
boolean readEntireFile, boolean keepQuotationsFlag) throws InvalidTableException, Exception {
this(table, dataFile, location, checkSize, readEntireFile, keepQuotationsFlag, null, null);
}
/**
* Constructs a TableReader
instance for reading records from a data file associated
* with a table object.
*
* @param table a table object
* @param dataFile an input data file
* @param checkSize check that the size of the data file is equal to the size of the table (length
* * records) + offset.
* @param readEntireFile flag to read an entire file
* @param keepQuotationsFlag flag to keep the starting/ending quotes
*
* @throws NullPointerException if table offset is null
*/
public TableReader(Object table, URL dataFile, DataObjectLocation location, boolean checkSize,
boolean readEntireFile, boolean keepQuotationsFlag, RandomAccessFile raf,
InputStream inputStream) throws InvalidTableException, Exception {
this.adapter = AdapterFactory.INSTANCE.getTableAdapter(table);
this.dataObjectLocation = location;
LOGGER.debug("TableReader:dataFile {}, raf {}", dataFile, raf);
try {
offset = adapter.getOffset();
} catch (NullPointerException ex) {
LOGGER.error("The table offset cannot be null.");
throw ex;
}
if (adapter instanceof TableDelimitedAdapter) {
LOGGER.debug("TableReader:instanceof TableDelimitedAdapter: {},{}", dataFile,
adapter.getClass().getSimpleName());
TableDelimitedAdapter tda = (TableDelimitedAdapter) adapter;
if (inputStream == null) {
this.inputStream = Utility.openConnection(dataFile.openConnection());
} else {
this.inputStream = inputStream;
}
this.inputStream.skip(offset);
this.inputStream.mark(0);
this.bufferedReader = new BufferedReader(new InputStreamReader(this.inputStream, "US-ASCII"));
this.accessor =
new ByteWiseFileAccessor(dataFile, offset, -1, adapter.getRecordCount(), false, raf);
this.delimitedChar = tda.getFieldDelimiter();
// Use the flag keepQuotationsFlag to tell the CSVParserBuilder that we wish to keep the
// starting/ending quotes.
LOGGER.debug("keepQuotationsFlag: {}", keepQuotationsFlag);
CSVParser parser = new CSVParserBuilder().withSeparator(this.delimitedChar)
.withKeepQuotations(keepQuotationsFlag).build();
this.csvReader = new CSVReaderBuilder(this.bufferedReader).withCSVParser(parser).build();
} else {
LOGGER.debug("TableReader:NOT TableDelimitedAdapter: {},{}", dataFile,
this.adapter.getClass().getSimpleName());
this.accessor = new ByteWiseFileAccessor(dataFile, this.offset,
this.adapter.getRecordLength(), this.adapter.getRecordCount(), true, raf);
}
createFieldMap();
}
public TableAdapter getAdapter() {
return this.adapter;
}
/**
* Gets the field descriptions for fields in the table.
*
* @return an array of field descriptions
*/
public FieldDescription[] getFields() {
return adapter.getFields();
}
/**
*
* @return the field map.
*/
public Map getFieldMap() {
return map;
}
/**
* Reads the next record from the data file.
*
* @return the next record, or null if no further records.
* @throws CsvValidationException
*/
public TableRecord readNext() throws IOException, CsvValidationException {
currentRow++;
if (currentRow > adapter.getRecordCount()) {
return null;
}
return getTableRecord();
}
/**
* Gets access to the table record given the index. The current row is set to this index, thus,
* subsequent call to readNext() gets the next record from this position.
*
* @param index the record index (1-relative)
* @return an instance of TableRecord
* @throws IllegalArgumentException if index is greater than the record number
* @throws CsvValidationException
*/
public TableRecord getRecord(int index)
throws IllegalArgumentException, IOException, CsvValidationException {
return (getRecord(index, false));
}
/**
* Gets access to the table record given the index. The current row is set to this index, thus,
* subsequent call to readNext() gets the next record from this position.
*
* @param index the record index (1-relative)
* @param keepQuotationsFlag flag to keep the starting/ending quotes or not.
* @return an instance of TableRecord
* @throws IllegalArgumentException if index is greater than the record number
* @throws CsvValidationException
*/
public TableRecord getRecord(long index, boolean keepQuotationsFlag)
throws IllegalArgumentException, IOException, CsvValidationException {
long recordCount = adapter.getRecordCount();
if (index < 1 || index > recordCount) {
String msg = "The index is out of range 1 - " + recordCount;
LOGGER.error(msg);
throw new IllegalArgumentException(msg);
}
// issue 189 - to handle large delimited file
// instread of using the array list, re-position to the line after reset the inputstream
if (currentRow > index) {
this.inputStream.reset();
this.bufferedReader = new BufferedReader(new InputStreamReader(this.inputStream, "US-ASCII"));
// skip 'index-1' lines
// check this again
for (int i = 0; i < (index - 1); i++) {
this.bufferedReader.readLine();
}
CSVParser parser = new CSVParserBuilder().withSeparator(this.delimitedChar)
.withKeepQuotations(keepQuotationsFlag).build();
this.csvReader = new CSVReaderBuilder(bufferedReader).withCSVParser(parser).build();
}
currentRow = index;
return getTableRecord();
}
private TableRecord getTableRecord() throws IOException, CsvValidationException {
// DEBUG statements can be time consuming. Should be uncommented by developer only.
if (adapter instanceof TableDelimitedAdapter) {
String[] recordValue = this.csvReader.readNext();
if (recordValue != null && (recordValue.length != adapter.getFieldCount())) {
throw new IOException("Record " + currentRow + " has wrong number of fields " + "(expected "
+ adapter.getFieldCount() + ", got " + recordValue.length + ")");
}
if (record != null) {
((DelimitedTableRecord) record).setRecordValue(recordValue);
} else {
record = new DelimitedTableRecord(map, adapter.getFieldCount(), recordValue);
}
} else {
byte[] recordValue = this.accessor.readRecordBytes(currentRow, 0, adapter.getRecordLength());
if (record != null) {
((FixedTableRecord) record).setRecordValue(recordValue);
} else {
record = new FixedTableRecord(recordValue, map, adapter.getFields());
}
}
return record;
}
private void createFieldMap() {
map = new HashMap<>();
int fieldIndex = 1;
for (FieldDescription field : adapter.getFields()) {
if (!map.containsKey(field.getName())) {
map.put(field.getName(), fieldIndex);
}
++fieldIndex;
}
}
/**
* Sets the current row.
*
* @param row The row to set.
*/
public void setCurrentRow(int row) {
this.setCurrentRow((long) row);
}
/**
* Sets the current row.
*
* @param row The row to set.
*/
public void setCurrentRow(long row) {
this.currentRow = row;
}
/**
*
* @return the current row.
*/
public long getCurrentRow() {
return this.currentRow;
}
public ByteWiseFileAccessor getAccessor() {
return this.accessor;
}
public InputStream getInputStream() {
return this.inputStream;
}
private long parseBufferForLineCount(URL dataFile, byte[] bufferAsBytes) throws Exception {
// Given a byte array, read through as if reading through a smaller file and count the lines.
InputStream inputStream = new ByteArrayInputStream(bufferAsBytes);
InputStreamReader inputStreamReader = new InputStreamReader(inputStream);
long linesInBuffer = 0;
try (BufferedReader reader = new BufferedReader(inputStreamReader)) {
while (reader.readLine() != null) {
linesInBuffer++;
}
} catch (IOException e) {
LOGGER.error("Cannot count lines from file {} in parseBufferForLineCount() function",
dataFile);
e.printStackTrace();
throw new Exception(e.getMessage());
}
LOGGER.debug("parseBufferForLineCount:linesInBuffer {}", linesInBuffer);
return linesInBuffer;
}
private long countRecordsForTextTable(URL dataFile) throws Exception {
// Count the number of records for a text file of any size. The traditional BufferReader cannot
// handle files larger than 2GB.
long numRecordsForTextTable = 0;
// Use RandomAccessFile to get filesize larger than 2gb
File aFile = new File(dataFile.toURI());
RandomAccessFile raf = new RandomAccessFile(aFile, "r");
raf.seek(offset); // Move the pointer to the offset first.
FileChannel inChannel = raf.getChannel();
int bufferSize = 1024 * 128;
if (bufferSize > inChannel.size()) {
bufferSize = (int) inChannel.size();
}
ByteBuffer buff = ByteBuffer.allocate(bufferSize);
byte[] bufferAsBytes = null;
while (inChannel.read(buff) > 0) {
((Buffer) buff).position(0); // Must point the pointer to the beginning of buff in order to
// access the elements in the array.
bufferAsBytes = buff.array(); // Get the underlying byte array in ByteBuffer.
// With the smaller buffer, we can safely read through the buffer for all lines and count
// them.
numRecordsForTextTable =
numRecordsForTextTable + this.parseBufferForLineCount(dataFile, bufferAsBytes);
((Buffer) buff).clear();
}
raf.close();
return (numRecordsForTextTable);
}
private long countRecordsForTableAdapterType(URL dataFile, long offset) throws Exception {
// For TableCharacter, we have to rely on the size of each record and the file size to calculate
// the number
// of records thus not having to read through the entire file.
LOGGER.debug("countRecordsForTableAdapterType:dataFile,offset {},{}", dataFile, offset);
LOGGER.debug("countRecordsForTableAdapterType:dataFile,adapter.getRecordLength() {},{}",
dataFile, adapter.getRecordLength());
long numRecords = -1;
// Do a sanity check if the record size is not known or zero. Not all labels provide the record
// size, for example comma separated files.
// If the record size is not known or zero, unfortunately we must read through the file and
// count the records.
if (adapter.getRecordLength() <= 0) {
numRecords = this.countRecordsForTextTable(dataFile);
LOGGER.debug("countRecordsForTableAdapterType:numRecords {}", numRecords);
return (numRecords);
}
LOGGER.debug("countRecordsForTableAdapterType:numRecords:initial {}", numRecords);
File aFile = new File(dataFile.toURI());
RandomAccessFile raf = new RandomAccessFile(aFile, "r");
raf.seek(offset);
FileChannel inChannel = raf.getChannel();
long fileSize = inChannel.size(); // The value of fileSize is now the rest content of the file
// after skipping any offset.
raf.close();
// The number of records is the size of the file divided by the record length
numRecords = fileSize / adapter.getRecordLength();
LOGGER.debug("countRecordsForTableAdapterType:numRecords {}", numRecords);
return (numRecords);
}
/**
* @return the size of record (i.e. number of lines)
*/
public long getRecordSize(URL dataFile, Object table) throws Exception {
adapter = AdapterFactory.INSTANCE.getTableAdapter(table);
InputStream is = Utility.openConnection(dataFile.openConnection());
LOGGER.debug("getRecordSize:adapter {}", adapter);
try {
offset = adapter.getOffset();
} catch (NullPointerException ex) {
LOGGER.error("The table offset cannot be null.");
throw ex;
}
if (adapter instanceof TableDelimitedAdapter) {
LOGGER.debug("getRecordSize:adapter instanceof TableDelimitedAdapter");
// The advantage of the new function countRecordsForTableAdapterType() is it does not
// re-read the file but merely calculate how many records fit into the file given the record
// length.
this.recordSize = this.countRecordsForTableAdapterType(dataFile, offset);
} else {
LOGGER.debug("getRecordSize:adapter instanceof TableDelimitedAdapter else");
if (adapter instanceof TableBinaryAdapter) {
offset = 0;
}
is.skip(offset);
bufferedReader = new BufferedReader(new InputStreamReader(is, "US-ASCII"));
if (adapter instanceof TableCharacterAdapter) {
LOGGER.debug("getRecordSize:adapter instanceof TableCharacterAdapter");
// The advantage of the new function countRecordsForTableAdapterType() is it does not
// re-read the file but merely calculate how many records fit into the file given the record
// length.
this.recordSize = this.countRecordsForTableAdapterType(dataFile, offset);
} else {
LOGGER.debug("getRecordSize:adapter instanceof TableCharacterAdapter else");
this.recordSize = is.available();
// need to change to get filesize larger than 2gb
File aFile = new File(dataFile.toURI());
RandomAccessFile raf = new RandomAccessFile(aFile, "r");
raf.seek(offset);
FileChannel inChannel = raf.getChannel();
long fileSize = inChannel.size();
this.recordSize = fileSize;
raf.close();
}
}
LOGGER.debug("getRecordSize:this.recordSize {}", this.recordSize);
LOGGER.debug("getRecordSize:adapter.getRecordLength() {}", adapter.getRecordLength());
return this.recordSize;
}
public long getOffset() {
return this.offset;
}
@Override
public void close() throws IOException {
LOGGER.debug("closing accessor/buffers");
if (this.accessor != null) {
this.accessor.close();
}
if (this.inputStream != null) {
this.inputStream.close();
}
if (this.bufferedReader != null) {
this.bufferedReader.close();
}
}
public DataObjectLocation getDataObjectLocation() {
return dataObjectLocation;
}
public void setDataObjectLocation(DataObjectLocation dataObjectLocation) {
this.dataObjectLocation = dataObjectLocation;
}
}