All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.nationalarchives.droid.binFileReader.FileByteReader Maven / Gradle / Ivy

/*
 * c The National Archives 2005-2006.  All rights reserved.
 * See Licence.txt for full licence details.
 *
 * Developed by:
 * Tessella Support Services plc
 * 3 Vineyard Chambers
 * Abingdon, OX14 3PX
 * United Kingdom
 * http://www.tessella.com
 *
 * Tessella/NPD/4826
 * PRONOM 5a
 *
 * $Id: FileByteReader.java,v 1.8 2006/03/13 15:15:28 linb Exp $
 *
 * $Logger: FileByteReader.java,v $
 * Revision 1.8  2006/03/13 15:15:28  linb
 * Changed copyright holder from Crown Copyright to The National Archives.
 * Added reference to licence.txt
 * Changed dates to 2005-2006
 *
 * Revision 1.7  2006/02/09 15:34:10  linb
 * Updates to javadoc and code following the code review
 *
 * Revision 1.5  2006/02/09 15:31:23  linb
 * Updates to javadoc and code following the code review
 *
 * Revision 1.5  2006/02/09 13:17:42  linb
 * Changed StreamByteReader to InputStreamByteReader
 * Refactored common code from UrlByteReader and InputStreamByteReader into new class StreamByteReader, from which they both inherit
 * Updated javadoc
 *
 * Revision 1.4  2006/02/09 12:14:16  linb
 * Changed some javadoc to allow it to be created cleanly
 *
 * Revision 1.3  2006/02/08 08:56:35  linb
 * - Added header comments
 *
 */

package uk.gov.nationalarchives.droid.binFileReader;

import java.io.BufferedInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.RandomAccessFile;

import uk.gov.nationalarchives.droid.base.DroidConstants;

/**
 * The FileByteReader class is a ByteReader that reads
 * its data from a file.
 * 

* FIXME: the decision whether use randomAccess mode or buffered mode is done on * an OutOfMemory basis... while should be done on a fixed threshold. *

* This class can have two files associated with it: The file represented by it * (its IdentificationFile) and a (possibly different) backing * file. The purpose of this separation is so that this object can represent a * URL that has been downloaded or an InputStream that has been saved to disk. * * @author linb */ // TODO from UCDetector: Change visibility of Class "FileByteReader" to default - May cause compile errors! public class FileByteReader extends AbstractByteReader { // NO_UCD private int randomFileBufferSize = DroidConstants.FILE_BUFFER_SIZE; private boolean isRandomAccess = false; private byte[] fileBytes; private long myNumBytes; private long fileMarker; private RandomAccessFile myRandomAccessFile; private long myRAFoffset = 0L; private static final int MIN_RAF_BUFFER_SIZE = 65536; private static final int RAF_BUFFER_REDUCTION_FACTOR = 4; private final File file; /** * Creates a new instance of FileByteReader *

*

* This constructor uses the same file to contain the data as is specified * by theIDFile. * * @param theIDFile * the source file from which the bytes will be read. * @param readFile * true if the file is to be read */ protected FileByteReader(final IdentificationFile theIDFile, final boolean readFile) { this(theIDFile, readFile, theIDFile.getFilePath()); } /** * Creates a new instance of FileByteReader. *

*

* This constructor can set the IdentificationFile to a * different file than the actual file used. For example, if * theIDFile is a URL or stream, and is too big to be buffered * in memory, it could be written to a temporary file. This file would then * be used as a backing file to store the data. * * @param theIDFile * the file represented by this object * @param readFile * true if the file is to be read * @param filePath * the backing file (containing the data) */ FileByteReader(final IdentificationFile theIDFile, final boolean readFile, final String filePath) { super(theIDFile); this.file = new File(filePath); if (readFile) { readFile(); } } public byte[] getbuffer() { return this.fileBytes; } /** * Get a byte from file * * @param fileIndex * position of required byte in the file * @return the byte at position fileIndex in the file */ public byte getByte(final long fileIndex) { byte theByte = 0; if (this.isRandomAccess) { // If the file is being read via random acces, // then read byte from buffer, otherwise read in a new buffer. final long theArrayIndex = fileIndex - this.myRAFoffset; if ((fileIndex >= this.myRAFoffset) && (theArrayIndex < this.randomFileBufferSize)) { theByte = this.fileBytes[(int) (theArrayIndex)]; } else { try { // Create a new buffer: /* * //When a new buffer is created, the requesting file * position is //taken to be the middle of the buffer. This * is so that it will //perform equally well whether the * file is being examined from //start to end or from end to * start myRAFoffset = fileIndex - (myRAFbuffer/2); * if(myRAFoffset<0L) { myRAFoffset = 0L; } * System.out.println(" re-read file buffer"); * myRandomAccessFile.seek(myRAFoffset); * myRandomAccessFile.read(fileBytes); theByte = * fileBytes[(int)(fileIndex-myRAFoffset)]; */ if (fileIndex < this.randomFileBufferSize) { this.myRAFoffset = 0L; } else if (fileIndex < this.myRAFoffset) { this.myRAFoffset = fileIndex - this.randomFileBufferSize + 1; } else { this.myRAFoffset = fileIndex; } // System.out.println(" re-read file buffer from "+myRAFoffset+ // " for "+myRAFbuffer+" bytes"); // System.out.println(" seek start"); this.myRandomAccessFile.seek(this.myRAFoffset); // System.out.println(" read start"); this.myRandomAccessFile.read(this.fileBytes); // System.out.println(fileIndex); // System.out.println(" read end"); theByte = this.fileBytes[(int) (fileIndex - this.myRAFoffset)]; } catch (final Exception e) { throw new RuntimeException("Problem reading byte [" + fileIndex + "]", e); } } } else { // If the file is not being read by random access, then the byte // should be in the buffer array theByte = this.fileBytes[(int) fileIndex]; } return theByte; } /** * Gets the current position of the file marker. * * @return the current position of the file marker */ public long getFileMarker() { return this.fileMarker; } public RandomAccessFile getMyRandomAccessFile() { return this.myRandomAccessFile; } /** * Returns the number of bytes in the file */ public long getNumBytes() { return this.myNumBytes; } public int getRandomFileBufferSize() { return this.randomFileBufferSize; } public boolean isRandomAccess() { return this.isRandomAccess; } /** * Position the file marker at a given byte position. *

*

* The file marker is used to record how far through the file the byte * sequence matching algorithm has got. * * @param markerPosition * The byte number in the file at which to position the marker */ public void setFileMarker(final long markerPosition) { if ((markerPosition < -1L) || (markerPosition > getNumBytes())) { throw new IllegalArgumentException( " Unable to place a fileMarker at byte " + Long.toString(markerPosition) + " in file " + this.myIDFile.getFilePath() + " (size = " + Long.toString(getNumBytes()) + " bytes)"); } else { this.fileMarker = markerPosition; } } /** * Reads in the binary file specified. *

*

* If there are any problems reading in the file, it gets classified as * unidentified, with an explanatory warning message. */ private void readFile() { // If file is not readable or is empty, then it gets classified // as unidentified (with an explanatory warning) if (!this.file.exists()) { setErrorIdent(); setIdentificationWarning("File does not exist"); return; } if (!this.file.canRead()) { setErrorIdent(); setIdentificationWarning("File cannot be read"); return; } if (this.file.isDirectory()) { setErrorIdent(); setIdentificationWarning("This is a directory, not a file"); return; } FileInputStream binStream; try { binStream = new FileInputStream(this.file); } catch (final FileNotFoundException ex) { setErrorIdent(); setIdentificationWarning("File disappeared or cannot be read"); return; } try { final int numBytes = binStream.available(); if (numBytes > 0) { final BufferedInputStream buffStream = new BufferedInputStream( binStream); this.fileBytes = new byte[numBytes]; final int len = buffStream.read(this.fileBytes, 0, numBytes); if (len != numBytes) { // This means that all bytes were not successfully read setErrorIdent(); setIdentificationWarning("Error reading file: " + Integer.toString(len) + " bytes read from file when " + Integer.toString(numBytes) + " were expected"); } else if (buffStream.read() != -1) { // This means that the end of the file was not reached setErrorIdent(); setIdentificationWarning("Error reading file: Unable to read to the end"); } else { this.myNumBytes = numBytes; } buffStream.close(); } else { // If file is empty , status is error // this.setErrorIdent(); this.myNumBytes = 0L; setIdentificationWarning("Zero-length file"); } binStream.close(); this.isRandomAccess = false; } catch (final IOException e) { setErrorIdent(); setIdentificationWarning("Error reading file: " + e.toString()); } catch (final OutOfMemoryError e) { try { this.myRandomAccessFile = new RandomAccessFile(this.file, "r"); this.isRandomAccess = true; // record the file size this.myNumBytes = this.myRandomAccessFile.length(); // try reading in a buffer this.myRandomAccessFile.seek(0L); boolean tryAgain = true; while (tryAgain) { try { this.fileBytes = new byte[this.randomFileBufferSize]; this.myRandomAccessFile.read(this.fileBytes); tryAgain = false; } catch (final OutOfMemoryError e4) { this.randomFileBufferSize = this.randomFileBufferSize / RAF_BUFFER_REDUCTION_FACTOR; if (this.randomFileBufferSize < MIN_RAF_BUFFER_SIZE) { throw e4; } } } this.myRAFoffset = 0L; } catch (final FileNotFoundException e2) { setErrorIdent(); setIdentificationWarning("File disappeared or cannot be read"); } catch (final Exception e2) { try { this.myRandomAccessFile.close(); } catch (final IOException e3) { } setErrorIdent(); setIdentificationWarning("Error reading file: " + e2.toString()); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy