gov.nasa.pds.objectAccess.ByteWiseFileAccessor Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of pds4-jparser Show documentation
This is the parser library for the PDS4 planetary data standard.
There is a newer version: 2.8.4
// Copyright 2019, California Institute of Technology ("Caltech").
// U.S. Government sponsorship acknowledged.
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions must reproduce the above copyright notice, this list of
// conditions and the following disclaimer in the documentation and/or other
// materials provided with the distribution.
// * Neither the name of Caltech nor its operating division, the Jet Propulsion
// Laboratory, nor the names of its contributors may be used to endorse or
// promote products derived from this software without specific prior written
// permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
// ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
// LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
// CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
// SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
// CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
// ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
// POSSIBILITY OF SUCH DAMAGE.

package gov.nasa.pds.objectAccess;

import java.io.File;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.io.FileInputStream;
import java.io.RandomAccessFile;
import java.net.URL;
import java.net.URLConnection;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.FileChannel;
import java.nio.channels.ReadableByteChannel;
import java.util.Arrays;
import java.util.ArrayList;
import java.util.List;

import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import gov.nasa.pds.objectAccess.utility.Utility;

/**
 * Class that provides common I/O functionality for PDS data objects.
 */
public class ByteWiseFileAccessor {
	private static final Logger LOGGER = LoggerFactory.getLogger(ByteWiseFileAccessor.class);
	private int recordLength;
	private ByteBuffer buffer = null;
	private long fileContentSize;
	
	//https://vanillajava.blogspot.com/2011/12/using-memory-mapped-file-for-huge.html
	private static final int MAPPING_SIZE = 1 << 30;  
	// The below setting is used by developer to split small files into multiple chunks. Do not remove.
    // Having smaller chunks will force the function handleTooSmallMapping() to be called.
    // If uncommented, do not run validate on large files as you will run out of memory.
	//private static final int MAPPING_SIZE = 317;  // TODO: Uncomment by developer only to split small files into multiple chunks.
	private final List mappings = new ArrayList<>();
	private long curPosition = 0;
	private long curListIndex = 0;
	private long totalBytesRead = 0;

	 /**
   * Constructs a ByteWiseFileAccessor object
   * which maps a region of a data file into memory.
   *
   * @param file the data file
   * @param offset the offset within the data file
   * @param length the record length in bytes
   * @param records the number of records
   * @throws FileNotFoundException If file does not exist, is a directory
   *       rather than a regular file, or for some other reason cannot be opened for reading
   * @throws IOException If an I/O error occurs
   */
  public ByteWiseFileAccessor(File file, long offset, int length, int records) throws FileNotFoundException, IOException {
    this(file.toURI().toURL(), offset, length, records);
  }
	
  /**
   * Constructs a ByteWiseFileAccessor object
   * which maps a region of a data file into memory.
   *
   * @param url the data file
   * @param offset the offset within the data file
   * @param length the record length in bytes
   * @param records the number of records
   * @throws FileNotFoundException If file does not exist, is a directory
   *       rather than a regular file, or for some other reason cannot be opened for reading
   * @throws IOException If an I/O error occurs
   */
  public ByteWiseFileAccessor(URL url, long offset, int length, int records) 
      throws FileNotFoundException, IOException {
    this(url, offset, length, records, true);
  }
  
	/**
	 * Constructs a ByteWiseFileAccessor object
	 * which maps a region of a data file into memory.
	 *
	 * @param url the data file
	 * @param offset the offset within the data file
	 * @param length the record length in bytes
	 * @param records the number of records
	 * @param checkSize check that the size of the data file is equal to the 
	 * size of the table (length * records) + offset.
	 * @throws FileNotFoundException If file does not exist, is a directory
	 * 		   rather than a regular file, or for some other reason cannot be opened for reading
	 * @throws IOException If an I/O error occurs
	 */
  public ByteWiseFileAccessor(URL url, long offset, int length, int records, boolean checkSize)
	    throws FileNotFoundException, IOException {
    this.recordLength = length;
    long lsize = (long)length * (long)records;
    this.totalBytesRead = 0;
    int bytesRead = 0;
    try {
      //issue_189: handle the buffer size > 2GB to read a huge file
      File dataFile = new File(url.toURI());
      RandomAccessFile raf = new RandomAccessFile(dataFile, "r");
      FileChannel inChannel = raf.getChannel();    
      long fileSize = this.fileContentSize = inChannel.size();
      long sizeToRead = lsize;
      // check this again
      if (sizeToRead>(fileSize-offset)) 
      	  sizeToRead = (fileSize-offset);
      
      //https://stackoverflow.com/questions/55300976/memory-mapping-huge-files-in-java
      long tmpSize = sizeToRead;
      for (long offset2 = 0; offset2 < sizeToRead; offset2 += MAPPING_SIZE) {
    	long size2 = Math.min(tmpSize, MAPPING_SIZE);
        mappings.add(inChannel.map(FileChannel.MapMode.READ_ONLY, (offset2+offset), size2));
        tmpSize -= size2;
        LOGGER.debug("ByteWiseFileAccessor: mappings.add: offset2,offset {},{}",offset2,offset);
        LOGGER.debug("ByteWiseFileAccessor: mappings.add: size2,mappings.size {},{}",size2,mappings.size());
      }   
      raf.close();
      for (int i=0; i= this.mappings.size()) {
              LOGGER.error("Expecting another mapping of file content while reading record " + Integer.toString(recordNum));
              //System.exit(1);
	          return(buf);
          }

          // Get the first part of the record from aBuf.remaining() bytes and 
          // get the second part of the record in this.mappings.get(mapN+1).
	      byte[] bufPortion1 = new byte[aBuf.remaining()];
          aBuf.get(bufPortion1);
          aBuf = this.mappings.get(mapN+1);                                             // Get the next mapping.
          ((Buffer) aBuf).position(0);                                                        // Point the position to the beginning.
          byte[] bufPortion2 = new byte[this.recordLength-bufPortion1.length]; // The second portion size is the difference.
          aBuf.get(bufPortion2);                                                 // Get the 2nd portion of record from next mapping.

          LOGGER.debug("handleTooSmallBuffer:bufPortion1 {}",bufPortion1);
          LOGGER.debug("handleTooSmallBuffer:bufPortion2 {}",bufPortion2);
          LOGGER.debug("handleTooSmallBuffer:bufPortion1 + bufPortion2 {}{}",new String(bufPortion1),new String(bufPortion2));
          LOGGER.debug("handleTooSmallBuffer: bufPortion1.length,bufPortion2.length {},{}",bufPortion1.length,bufPortion2.length);
          LOGGER.debug("handleTooSmallBuffer: this.recordLength {}",this.recordLength);

          // Copy bufPortion1 and bufPortion2 to buf so it can be returned.
          System.arraycopy(bufPortion1, 0, buf, 0, bufPortion1.length);
          System.arraycopy(bufPortion2, 0, buf, bufPortion1.length, bufPortion2.length);

          LOGGER.debug("handleTooSmallBuffer: buf [{}]",new String(buf));
          LOGGER.debug("handleTooSmallBuffer: buf.length [{}]",buf.length);

          // Because the original input value of length does not know that the record span over two mappings,
          // reset it to the default length of the record.
          //length = this.recordLength;
          return(buf);  // Variable buf is both input and output.
    }

	/**
	 * Reads length bytes of data from a specified record at the given offset.
	 *
	 * @param recordNum the record number to read bytes from (1-relative)
	 * @param offset an offset within the record
	 * @param length the number of bytes to read from the record
	 * @return an array of bytes
	 */
  public byte[] readRecordBytes(int recordNum, int offset, int length) {
	  assert recordNum > 0;
	  // The offset within the mapped buffer
	  long fileOffset = (long)(recordNum-1)*this.recordLength;
	  byte[] buf = new byte[this.recordLength];
	  // CHECK this again with big file
	  int mapN = (int) (fileOffset / MAPPING_SIZE);
	  int offN = (int) (fileOffset % MAPPING_SIZE);

	  if (fileOffset<0 || mapN<0) {
		  LOGGER.error("Negative fileOffset or index of mappings list.");
		  return null;
	  }

	  ByteBuffer aBuf = mappings.get(mapN);
	  ((Buffer) aBuf).position(offN);   // need to check this

      // It is possible to read pass the buffer in variable 'buf'.  Perform a check before the get() function.
      // If not enough bytes left in the buffer, that means that the record we are reading is spanning the boundary of two
      // mappings.  So that means the first part of the record is in mappings.get(mapN) and 2nd part of the record is in
      // mappings.get(mapN+1).
      //
      // The value of MAPPING_SIZE on linux is 1073741824

      LOGGER.debug("readRecordBytes:recordNum,offset {},{}",recordNum,offset);
      LOGGER.debug("readRecordBytes:recordNum,length {},{}",recordNum,length);
      LOGGER.debug("readRecordBytes:aBuf.remaining(),buf.length {},{}",aBuf.remaining(),buf.length);

      if (aBuf.remaining() >= buf.length) {
          aBuf.get(buf);
      } else {
          buf = this.handleTooSmallMapping(recordNum, offset, length, aBuf, mapN, offN, fileOffset, buf);
      }

	  // need to check the offset of the bytes?
	  byte[] bytesToReturn = Arrays.copyOfRange(buf, offset, (offset + length));

      LOGGER.debug("readRecordBytes:recordNum,buf.length {},{}",recordNum,buf.length);
      LOGGER.debug("readRecordBytes: aBuf.remaining(),aBuf.hasRemaining() {},{}",aBuf.remaining(),aBuf.hasRemaining());
      LOGGER.debug("readRecordBytes: aBuf.isDirect() {}",aBuf.isDirect());
      LOGGER.debug("readRecordBytes: length,fileOffset {},{}",length,fileOffset);
      LOGGER.debug("readRecordBytes: mapN,offN {},{}",mapN,offN);
      LOGGER.debug("readRecordBytes: this.recordLength {}",this.recordLength);
      LOGGER.debug("readRecordBytes: bytesToReturn.length {}",bytesToReturn.length);
      LOGGER.debug("readRecordBytes: bytesToReturn {}",new String(bytesToReturn));

	  return bytesToReturn;
  }
	
	/**
	 * Reads a byte from the buffer.
	 * 
	 * @return A byte.
	 */
  public byte readByte() {
	  int mapN = (int)(this.curPosition / MAPPING_SIZE);
	  int offN = (int)(this.curPosition % MAPPING_SIZE);
	  this.curPosition++;
	  return mappings.get(mapN).get(offN);
	}
	
	/**
	 * Marks the buffer.
	 * 
	 */
  public void mark() {
	  int mapN = (int)(this.curPosition/MAPPING_SIZE);
	  ((Buffer) mappings.get(mapN)).mark();
	}
	
	/**
	 * Resets the buffer.
	 * 
	 */
  public void reset() {
	  // reset all buffer??
	  for (int i=0; i