All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.nationalarchives.droid.signatureFile.SubSequence Maven / Gradle / Ivy

The newest version!
/*
 * The National Archives 2005-2006. All rights reserved. See Licence.txt for
 * full licence details. Developed by: Tessella Support Services plc 3
 * Vineyard Chambers Abingdon, OX14 3PX United Kingdom http://www.tessella.com
 * Tessella/NPD/4305 PRONOM 4 $Id: SubSequence.java,v 1.8 2006/03/13 15:15:29
 * linb Exp $ $Logger: SubSequence.java,v $ Revision 1.8 2006/03/13 15:15:29
 * linb Changed copyright holder from Crown Copyright to The National
 * Archives. Added reference to licence.txt Changed dates to 2005-2006
 * Revision 1.7 2006/02/13 10:29:40 gaur Fixed bug in searching a short file
 * for a byte sequence at a large offset from BOF Revision 1.6 2006/02/13
 * 09:26:16 gaur Fixed bug in searching files from EOF, after first STS round
 * Revision 1.5 2006/02/09 15:04:37 gaur Corrected formatting Revision 1.4
 * 2006/02/07 17:16:23 linb - Change fileReader to ByteReader in formal
 * parameters of methods - use new static constructors - Add detection of if a
 * filePath is a URL or not Revision 1.3 2006/02/07 11:30:04 gaur Added
 * support for endianness of signature $History: SubSequence.java $
 * ***************** Version 6 ***************** User: Walm Date: 29/09/05
 * Time: 9:16 Updated in $/PRONOM4/FFIT_SOURCE/signatureFile Bug fix in
 * response to JIRA issue PRON-29. changed startPosInFile to an array + some
 * changes to the way start position options are dealt with. *****************
 * Version 5 ***************** User: Walm Date: 17/05/05 Time: 12:47 Updated
 * in $/PRONOM4/FFIT_SOURCE/signatureFile added more error trapping
 * ***************** Version 4 ***************** User: Walm Date: 5/04/05
 * Time: 18:08 Updated in $/PRONOM4/FFIT_SOURCE/signatureFile review headers
 */
package uk.gov.nationalarchives.droid.signatureFile;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import uk.gov.nationalarchives.droid.base.SimpleElement;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;

/**
 * holds a subsequence for a byte sequence also contains most of the logic for
 * identifying files
 * 
 * @author Martin Waller
 * @version 4.0.0
 */
public class SubSequence extends SimpleElement {
	private static final Logger LOG = LoggerFactory
			.getLogger(SubSequence.class);
	static boolean showProgress = false;
	private boolean bigEndian = true;
	private byte[] byteSequence;

	private List leftFragments = new ArrayList();

	private int maxSeqOffset = 0;
	private int minFragLength;
	private int minSeqOffset = 0;
	private final List> orderedLeftFragments = new ArrayList>();
	private final List> orderedRightFragments = new ArrayList>();
	private ByteSequence parentByteSequence;
	private int parentSignature;
	private int position;
	private String reference;
	private List rightFragments = new ArrayList();
	private String sequence;
	// shiftFunction shift;
	private final long[] shiftFunction = new long[256];

	/* setters */
	public void addLeftFragment(final LeftFragment lf) {
		this.leftFragments.add(lf);
	}

	public void addRightFragment(final RightFragment lf) {
		this.rightFragments.add(lf);
	}

	/**
	 * Searches for the left fragments of this subsequence between the given
	 * byte positions in the file. Either returns the last byte taken up by
	 * the identified sequences or returns -2 if no match was found
	 * 
	 * @param targetFile
	 *            the binary file to be identified
	 * @param leftBytePos
	 *            left-most byte position of allowed search window on file
	 * @param rightBytePos
	 *            right-most byte position of allowed search window on file
	 * @param searchDirection
	 *            1 for a left to right search, -1 for right to left
	 * @param offsetRange
	 *            range of possible start positions in the direction of
	 *            searchDirection
	 * @param bigEndian
	 *            True iff our parent signature is big-endian
	 */
	private long[] bytePosForLeftFragments(final ByteReader targetFile,
			final long leftBytePos, final long rightBytePos,
			final int searchDirection, final int offsetRange,
			final boolean bigEndian) {
		final boolean leftFrag = true;
		long startPos = rightBytePos;
		int posLoopStart = 1;
		final int numFragPos = getNumFragmentPositions(leftFrag);
		if (searchDirection == 1) {
			startPos = leftBytePos;
			posLoopStart = numFragPos;
		}

		// now set up the array so that it can potentially hold all
		// possibilities
		int totalNumOptions = offsetRange + 1;
		for (int iFragPos = 1; iFragPos <= numFragPos; iFragPos++) {
			totalNumOptions = totalNumOptions
					* getNumAlternativeFragments(leftFrag, iFragPos);
		}
		final long[] markerPos = new long[totalNumOptions];
		for (int iOffset = 0; iOffset <= offsetRange; iOffset++) {
			markerPos[iOffset] = startPos + iOffset * searchDirection;
		}
		int numOptions = 1 + offsetRange;

		boolean seqNotFound = false;
		for (int iFragPos = posLoopStart; (!seqNotFound)
				&& (iFragPos <= numFragPos) && (iFragPos >= 1); iFragPos -= searchDirection) {
			final int numAltFrags = getNumAlternativeFragments(leftFrag,
					iFragPos);
			final long[] tempEndPos = new long[numAltFrags * numOptions]; // array
			// to
			// store
			// possible
			// end
			// positions
			// after
			// this
			// fragment
			// position
			// has
			// been
			// examined

			int numEndPos = 0;
			for (int iOption = 0; iOption < numOptions; iOption++) {
				// will now look for all matching alternative sequence at the
				// current end positions
				for (int iAlt = 0; iAlt < numAltFrags; iAlt++) {
					long tempFragEnd;
					if (searchDirection == 1) {
						tempFragEnd = endBytePosForSeqFrag(targetFile,
								markerPos[iOption], rightBytePos, true,
								searchDirection, iFragPos, iAlt, bigEndian);
					} else {
						tempFragEnd = endBytePosForSeqFrag(targetFile,
								leftBytePos, markerPos[iOption], true,
								searchDirection, iFragPos, iAlt, bigEndian);
					}
					if (tempFragEnd > -1L) { // amatch has been found
						tempEndPos[numEndPos] = tempFragEnd + searchDirection;
						numEndPos += 1;
					}
				}
			}
			if (numEndPos == 0) {
				seqNotFound = true;
			} else {
				numOptions = 0;
				for (int iOption = 0; iOption < numEndPos; iOption++) {
					// eliminate any repeated end positions
					boolean addEndPos = true;
					for (int iMarker = 0; iMarker < numOptions; iMarker++) {
						if (markerPos[iMarker] == tempEndPos[iOption]) {
							addEndPos = false;
							break;
						}
					}
					if (addEndPos) {
						markerPos[numOptions] = tempEndPos[iOption];
						numOptions++;
					}
				}
			}
		}

		// prepare array to be returned
		if (seqNotFound) {
			// no possible positions found, return 0 length array
			final long[] outArray = new long[0];
			return outArray;

		} else {
			// return ordered array of possibilities
			final long[] outArray = new long[numOptions];

			// convert values to negative temporarily so that reverse sort
			// order
			// can be obtained for a right to left search direction
			if (searchDirection < 0) {
				for (int iOption = 0; iOption < numOptions; iOption++) {
					markerPos[iOption] = -markerPos[iOption];
				}
			}

			// sort the values in the array
			Arrays.sort(markerPos, 0, numOptions);

			// convert values back to positive now that a reverse sort order
			// has
			// been obtained
			if (searchDirection < 0) {
				for (int iOption = 0; iOption < numOptions; iOption++) {
					markerPos[iOption] = -markerPos[iOption];
				}
			}

			// copy to a new array which has precisely the correct length
			System.arraycopy(markerPos, 0, outArray, 0, numOptions);

			// correct the value
			for (int iOption = 0; iOption < numOptions; iOption++) {
				outArray[iOption] -= searchDirection;
			}

			return outArray;
		}

	}

	/**
	 * Searches for the right fragments of this subsequence between the given
	 * byte positions in the file. Either returns the last byte taken up by
	 * the identified sequences or returns -2 if no match was found
	 * 
	 * @param targetFile
	 *            the binary file to be identified
	 * @param leftBytePos
	 *            left-most byte position of allowed search window on file
	 * @param rightBytePos
	 *            right-most byte position of allowed search window on file
	 * @param searchDirection
	 *            1 for a left to right search, -1 for right to left
	 * @param offsetRange
	 *            range of possible start positions in the direction of
	 *            searchDirection
	 * @param bigEndian
	 *            True iff our parent signature is big-endian
	 */
	private long[] bytePosForRightFragments(final ByteReader targetFile,
			final long leftBytePos, final long rightBytePos,
			final int searchDirection, final int offsetRange,
			final boolean bigEndian) {
		final boolean leftFrag = false;
		long startPos = leftBytePos;
		int posLoopStart = 1;
		final int numFragPos = getNumFragmentPositions(leftFrag);
		if (searchDirection == -1) {
			startPos = rightBytePos;
			posLoopStart = numFragPos;
		}

		// now set up the array so that it can potentially hold all
		// possibilities
		int totalNumOptions = offsetRange + 1;
		for (int iFragPos = 1; iFragPos <= numFragPos; iFragPos++) {
			totalNumOptions = totalNumOptions
					* getNumAlternativeFragments(leftFrag, iFragPos);
		}
		final long[] markerPos = new long[totalNumOptions];
		for (int iOffset = 0; iOffset <= offsetRange; iOffset++) {
			markerPos[iOffset] = startPos + iOffset * searchDirection;
		}
		int numOptions = 1 + offsetRange;

		boolean seqNotFound = false;
		for (int iFragPos = posLoopStart; (!seqNotFound)
				&& (iFragPos <= numFragPos) && (iFragPos >= 1); iFragPos += searchDirection) {
			final int numAltFrags = getNumAlternativeFragments(leftFrag,
					iFragPos);
			final long[] tempEndPos = new long[numAltFrags * numOptions]; // array
			// to
			// store
			// possible
			// end
			// positions
			// after
			// this
			// fragment
			// position
			// has
			// been
			// examined
			int numEndPos = 0;
			for (int iOption = 0; iOption < numOptions; iOption++) {
				// will now look for all matching alternative sequence at the
				// current end positions
				for (int iAlt = 0; iAlt < numAltFrags; iAlt++) {
					long tempFragEnd;
					if (searchDirection == -1) {
						tempFragEnd = endBytePosForSeqFrag(targetFile,
								leftBytePos, markerPos[iOption], false,
								searchDirection, iFragPos, iAlt, bigEndian);
					} else {
						tempFragEnd = endBytePosForSeqFrag(targetFile,
								markerPos[iOption], rightBytePos, false,
								searchDirection, iFragPos, iAlt, bigEndian);
					}
					if (tempFragEnd > -1) { // amatch has been found
						tempEndPos[numEndPos] = tempFragEnd + searchDirection;
						numEndPos += 1;
					}
				}
			}
			if (numEndPos == 0) {
				seqNotFound = true;
			} else {
				numOptions = 0;
				for (int iOption = 0; iOption < numEndPos; iOption++) {
					// eliminate any repeated end positions
					boolean addEndPos = true;
					for (int iMarker = 0; iMarker < numOptions; iMarker++) {
						if (markerPos[iMarker] == tempEndPos[iOption]) {
							addEndPos = false;
							break;
						}
					}
					if (addEndPos) {
						markerPos[numOptions] = tempEndPos[iOption];
						numOptions++;
					}
				}
			}
		}

		// prepare array to be returned
		if (seqNotFound) {
			// no possible positions found, return 0 length array
			final long[] outArray = new long[0];
			return outArray;

		} else {
			// return ordered array of possibilities
			final long[] outArray = new long[numOptions];

			// convert values to negative temporarily so that reverse sort
			// order
			// can be obtained for a right to left search direction
			if (searchDirection < 0) {
				for (int iOption = 0; iOption < numOptions; iOption++) {
					markerPos[iOption] = -markerPos[iOption];
				}
			}

			// sort the values in the array
			Arrays.sort(markerPos, 0, numOptions);

			// convert values back to positive now that a reverse sort order
			// has
			// been obtained
			if (searchDirection < 0) {
				for (int iOption = 0; iOption < numOptions; iOption++) {
					markerPos[iOption] = -markerPos[iOption];
				}
			}

			// copy to a new array which has precisely the correct length
			System.arraycopy(markerPos, 0, outArray, 0, numOptions);

			// correct the value
			for (int iOption = 0; iOption < numOptions; iOption++) {
				outArray[iOption] -= searchDirection;
			}

			return outArray;
		}

	}

	/**
	 * searches for the specified fragment sequence between the leftmost and
	 * rightmost byte positions that are given. returns the end position of
	 * the found sequence or -1 if it is not found
	 * 
	 * @param targetFile
	 *            The file that is being reviewed for identification
	 * @param leftEndBytePos
	 *            leftmost position in file at which to search
	 * @param rightEndBytePos
	 *            rightmost postion in file at which to search
	 * @param leftFrag
	 *            flag to indicate whether looking at left or right fragments
	 * @param searchDirection
	 *            direction in which search is carried out (1 for left to
	 *            right, -1 for right to left)
	 * @param fragPos
	 *            position of left/right sequence fragment to use
	 * @param fragIndex
	 *            index of fragment within the position (where alternatives
	 *            exist)
	 * @param bigEndian
	 *            True iff out parent signature is big-endian
	 */
	private long endBytePosForSeqFrag(final ByteReader targetFile,
			final long leftEndBytePos, final long rightEndBytePos,
			final boolean leftFrag, final int searchDirection,
			final int fragPos, final int fragIndex, final boolean bigEndian) {
		long startPosInFile;
		long lastStartPosInFile;
		long endPosInFile = -1L;
		final long searchDirectionL = searchDirection;
		int numBytes;
		int minOffset;
		int maxOffset;

		// read in values
		numBytes = getFragment(leftFrag, fragPos, fragIndex).getNumBytes();
		if (leftFrag && (searchDirection == -1)) {
			minOffset = getFragment(leftFrag, fragPos, fragIndex)
					.getMinOffset();
			maxOffset = getFragment(leftFrag, fragPos, fragIndex)
					.getMaxOffset();
		} else if (!leftFrag && (searchDirection == 1)) {
			minOffset = getFragment(leftFrag, fragPos, fragIndex)
					.getMinOffset();
			maxOffset = getFragment(leftFrag, fragPos, fragIndex)
					.getMaxOffset();
		} else if (fragPos < getNumFragmentPositions(leftFrag)) {
			minOffset = getFragment(leftFrag, fragPos + 1, 0).getMinOffset();
			maxOffset = getFragment(leftFrag, fragPos + 1, 0).getMaxOffset();
		} else {
			minOffset = 0;
			maxOffset = 0;
		}

		// set up start and end positions for searches taking into account min
		// and max offsets
		if (searchDirection == -1) {
			startPosInFile = rightEndBytePos - minOffset;
			final long lastStartPosInFile1 = leftEndBytePos + numBytes - 1L;
			final long lastStartPosInFile2 = rightEndBytePos - maxOffset;
			lastStartPosInFile = (lastStartPosInFile1 < lastStartPosInFile2) ? lastStartPosInFile2
					: lastStartPosInFile1;
		} else {
			startPosInFile = leftEndBytePos + minOffset;
			final long lastStartPosInFile1 = rightEndBytePos - numBytes + 1L;
			final long lastStartPosInFile2 = leftEndBytePos + maxOffset;
			lastStartPosInFile = (lastStartPosInFile1 < lastStartPosInFile2) ? lastStartPosInFile1
					: lastStartPosInFile2;
		}

		// keep searching until either the sequence fragment is found or until
		// the end of the search area has been reached.
		// compare sequence with file contents directly at fileMarker position
		boolean subSeqFound = false;
		while ((!subSeqFound)
				&& ((searchDirectionL)
						* (lastStartPosInFile - startPosInFile) >= 0L)) {
			boolean missMatchFound = false;
			if (searchDirection == -1) {
			} else {
			}
			final SideFragment fragment = getFragment(leftFrag, fragPos,
					fragIndex);
			long tempFileMarker = startPosInFile;
			for (int i = (searchDirection == 1) ? 0 : fragment
					.getNumByteSeqSpecifiers() - 1; !missMatchFound
					&& (0 <= i) && (i < fragment.getNumByteSeqSpecifiers()); i += searchDirection) {
				missMatchFound = !fragment.getByteSeqSpecifier(i)
						.matchesByteSequence(targetFile, tempFileMarker,
								searchDirection, bigEndian);
				if (!missMatchFound) {
					tempFileMarker += searchDirection
							* fragment.getByteSeqSpecifier(i).getNumBytes();
				}
			}
			if (!missMatchFound) { // subsequence fragment was found in the
									// file
				subSeqFound = true;
				endPosInFile = tempFileMarker - searchDirectionL;
			} else {
				startPosInFile += searchDirectionL;
			}
		}
		return endPosInFile; // this is -1 unless subSeqFound = true
	}

	public byte getByte(final int theIndex) {
		return this.byteSequence[theIndex];
	}

	public ByteSequence getByteSequence() {
		return this.parentByteSequence;
	}

	// TODO from UCDetector: Change visibility of Method
	// "SubSequence.getFragment(boolean,int,int)" to private
	public SideFragment getFragment(final boolean leftFrag, // NO_UCD
			final int thePosition, final int theIndex) {
		if (leftFrag) {
			return (SideFragment) ((ArrayList) this.orderedLeftFragments
					.get(thePosition - 1)).get(theIndex);
		} else {
			return (SideFragment) ((ArrayList) this.orderedRightFragments
					.get(thePosition - 1)).get(theIndex);
		}
	}

	/**
	 * Interpret the bytes in a file as an offset.
	 * 

* The next indirectOffsetLength() bytes after * indirectOffsetLocation() are interpreted as an offset * according to the endianness of the byte sequence. * * @param targetFile * @return */ private int getIndirectOffset(final ByteReader targetFile) { int offset = 0; long power = 1; long offsetLocation = getByteSequence().getIndirectOffsetLocation(); if (getByteSequence().getReference().endsWith("EOFoffset")) { offsetLocation = targetFile.getNumBytes() - offsetLocation - 1; } final int offsetLength = getByteSequence().getIndirectOffsetLength(); // In the case of indirect BOF or indirect EOF bytesequences, // We need to get read the file to get the offset. if (isBigEndian()) { for (int i = offsetLength - 1; i > -1; i--) { final Byte fileByte = targetFile.getByte(offsetLocation + i); int byteValue = fileByte.intValue(); byteValue = (byteValue >= 0) ? byteValue : byteValue + 256; offset += power * byteValue; power *= 256; } } else { for (int i = 0; i < offsetLength; i++) { final Byte fileByte = targetFile.getByte(offsetLocation + i); int byteValue = fileByte.intValue(); byteValue = (byteValue >= 0) ? byteValue : byteValue + 256; offset += power * byteValue; power *= 256; } } return offset; } public List getLeftFragments() { return this.leftFragments; } public int getMaxSeqOffset() { return this.maxSeqOffset; } public int getMinFragLength() { return this.minFragLength; } public int getMinSeqOffset() { return this.minSeqOffset; } // TODO from UCDetector: Change visibility of Method // "SubSequence.getNumAlternativeFragments(boolean,int)" to private public int getNumAlternativeFragments(final boolean leftFrag, // NO_UCD final int thePosition) { if (leftFrag) { return ((ArrayList) this.orderedLeftFragments .get(thePosition - 1)).size(); } else { return ((ArrayList) this.orderedRightFragments .get(thePosition - 1)).size(); } } public int getNumBytes() { return this.byteSequence.length; } /* getters */ // TODO from UCDetector: Change visibility of Method // "SubSequence.getNumFragmentPositions(boolean)" to private public int getNumFragmentPositions(final boolean leftFrag) { // NO_UCD if (leftFrag) { return this.orderedLeftFragments.size(); } else { return this.orderedRightFragments.size(); } } /** * Get the id of the internal signature that this sequence belongs to * * @return */ public int getParentSignature() { return this.parentSignature; } public int getPosition() { return this.position; } // TODO from UCDetector: Change visibility of Method // "SubSequence.getRawLeftFragment(int)" to private public LeftFragment getRawLeftFragment(final int theIndex) { // NO_UCD return this.leftFragments.get(theIndex); } // TODO from UCDetector: Change visibility of Method // "SubSequence.getRawRightFragment(int)" to private public RightFragment getRawRightFragment(final int theIndex) { // NO_UCD return this.rightFragments.get(theIndex); } public List getRightFragments() { return this.rightFragments; } public String getSequence() { return this.sequence; } public long getShift(final byte theByteValue) { // this.ShiftFunction is a long[256] array return this.shiftFunction[theByteValue + 128]; } public boolean isBigEndian() { return this.bigEndian; } /** * is this a BOF sub squence If this subsequence does not match the we can * reject the entire signature * * @return boolean */ public boolean isBOF() { return (this.reference.equalsIgnoreCase("BOFoffset") && (this.position == 1)); } /** * is this a EOF sub squence If this subsequence does not match the we can * reject the entire signature * * @return boolean */ public boolean isEOF() { return (this.reference.equalsIgnoreCase("EOFoffset") && (this.position == 1)); } /** * Searches for this subsequence after the current file marker position in * the file. Moves the file marker to the end of this subsequence. * * @param targetFile * the binary file to be identified * @param reverseOrder * true if file is being searched from right to left * @param bigEndian * True iff our parent signature is big-endian */ protected boolean isFoundAfterFileMarker(final ByteReader targetFile, final boolean reverseOrder, final boolean bigEndian) { boolean subSeqFound = false; try { final long fileSize = targetFile.getNumBytes() - 1; final int searchDirection = reverseOrder ? -1 : 1; // get the current file marker long startPosInFile = targetFile.getFileMarker(); // Add the minimum offset before start of sequence and update the // file // marker accordingly startPosInFile = startPosInFile + (searchDirection * getMinSeqOffset()); if (fileSize < startPosInFile - 1) { // We're looking for a sequence of bytes at an offset which is // longer than the file itself return false; } targetFile.setFileMarker(startPosInFile); // start searching for main sequence after the minimum length of // the // relevant fragments startPosInFile = startPosInFile + (searchDirection * getMinFragLength()); final int numSeqBytes = getNumBytes(); boolean missMatchFound; final int byteLoopStart = reverseOrder ? numSeqBytes - 1 : 0; final int byteLoopEnd = reverseOrder ? 0 : numSeqBytes - 1; try { while (!subSeqFound) { // compare sequence with file contents directly at // fileMarker // position missMatchFound = false; // Start by checking the last byte in the window on the // file. // If this byte is different from the last byte in the // subsequence, // Then we may shift the window according to the value of // this // byte. // In practice, this saves us from unnecessarily checking // file // bytes to calculate the shift. final byte lastByte = targetFile.getByte(startPosInFile + byteLoopEnd); if (this.byteSequence[byteLoopEnd] != lastByte) { startPosInFile += (this.shiftFunction[128 + lastByte] - 1); if ((startPosInFile < 0L) || (startPosInFile > fileSize)) { break; } } else { // If the last bytes don't match, then check the rest. for (int iByte = byteLoopStart; (!missMatchFound) && (iByte <= numSeqBytes - 1) && (iByte >= 0); iByte += searchDirection) { missMatchFound = (this.byteSequence[iByte] != targetFile .getByte(startPosInFile + iByte - byteLoopStart)); } if (!missMatchFound) { // subsequence was found at // position // fileMarker in the file // Now search for fragments between original // fileMarker // and startPosInFile if (reverseOrder) { final long[] rightFragEndArray = bytePosForRightFragments( targetFile, startPosInFile + 1, targetFile.getFileMarker(), 1, 0, bigEndian); if (rightFragEndArray.length == 0) { missMatchFound = true; } else { long leftFragEnd; final long[] leftFragEndArray = bytePosForLeftFragments( targetFile, 0, startPosInFile - numSeqBytes, -1, 0, bigEndian); if (leftFragEndArray.length == 0) { missMatchFound = true; } else { leftFragEnd = leftFragEndArray[0]; targetFile .setFileMarker(leftFragEnd - 1L); subSeqFound = true; } } } else { // search is in forward direction final long[] leftFragEndArray = bytePosForLeftFragments( targetFile, targetFile.getFileMarker(), startPosInFile - 1L, -1, 0, bigEndian); if (leftFragEndArray.length == 0) { missMatchFound = true; } else { long rightFragEnd; final long[] rightFragEndArray = bytePosForRightFragments( targetFile, startPosInFile + numSeqBytes, targetFile.getNumBytes() - 1L, 1, 0, bigEndian); if (rightFragEndArray.length == 0) { missMatchFound = true; } else { rightFragEnd = rightFragEndArray[0]; targetFile .setFileMarker(rightFragEnd + 1L); subSeqFound = true; } } } } if (missMatchFound) { // If a mismatch is found, then shift the window // by a // shift calculated from the value // of the file byte occuring one place after the // window // position. startPosInFile += this.shiftFunction[128 + targetFile .getByte(startPosInFile + (searchDirection * numSeqBytes))]; if ((startPosInFile < 0L) || (startPosInFile > fileSize)) { break; } } } } } catch (final IndexOutOfBoundsException e) { // This only happens when the end of the file is reached. // This exception is allowed to be thrown to avoid repeatedly // checking if the index is valid // and to hence improve the performace of DROID } } catch (final IndexOutOfBoundsException e) { // This is thrown if targetFile is a URLByteReader, the embedded // HeapByteBuffer will check for each access // and throw java.lang.IndexOutOfBoundsException if we are on or // past the limit } return subSeqFound; } /** * Searches for this subsequence at the start of the current file. Moves * the file marker to the end of this subsequence. * * @param targetFile * the binary file to be identified * @param reverseOrder * true if file is being searched from right to left * @param bigEndian * True iff our parent signature is big-endian */ protected boolean isFoundAtStartOfFile(final ByteReader targetFile, final boolean reverseOrder, final boolean bigEndian) { try { final int searchDirection = reverseOrder ? -1 : 1; int minSeqOffset = getMinSeqOffset(); int maxSeqOffset = getMaxSeqOffset(); // Get any indirect offset if (this.reference.startsWith("Indirect")) { try { final int indirectOffset = getIndirectOffset(targetFile); minSeqOffset += indirectOffset; maxSeqOffset += indirectOffset; } catch (final Exception e) { // If an exception is thrown, we can assume that the file // did not match the indirect offset // eg. the indirect offset found could be too large to be // held in an int type return false; } } long[] startPosInFile = new long[1]; startPosInFile[0] = reverseOrder ? targetFile.getNumBytes() - minSeqOffset - 1 : minSeqOffset; boolean subseqFound = true; boolean leftFrag = true; if (reverseOrder) { leftFrag = false; } // match intial fragment if (reverseOrder) { startPosInFile = bytePosForRightFragments(targetFile, 0, startPosInFile[0], -1, (maxSeqOffset - minSeqOffset), bigEndian); } else { startPosInFile = bytePosForLeftFragments(targetFile, startPosInFile[0], targetFile.getNumBytes() - 1, 1, (maxSeqOffset - minSeqOffset), bigEndian); } int numOptions = startPosInFile.length; if (numOptions == 0) { subseqFound = false; } else { for (int i = 0; i < numOptions; i++) { startPosInFile[i] += searchDirection; } } // match main sequence if (subseqFound) { // move startPosInFile according to min offset of last // fragment // looked at int minOffset = 0; int maxOffset = 0; if (getNumFragmentPositions(leftFrag) > 0) { minOffset = getFragment(leftFrag, 1, 0).getMinOffset(); maxOffset = getFragment(leftFrag, 1, 0).getMaxOffset(); for (int i = 0; i < numOptions; i++) { startPosInFile[i] += (minOffset * searchDirection); } } // add new possible values for startPosInFile to allow for // difference between maxOffset and minOffset final int offsetRange = maxOffset - minOffset; if (offsetRange > 0) { final long[] newStartPosInFile = new long[numOptions * (offsetRange + 1)]; for (int i = 0; i <= offsetRange; i++) { for (int j = 0; j < numOptions; j++) { newStartPosInFile[j + i * numOptions] = startPosInFile[j] + (i * searchDirection); } } Arrays.sort(newStartPosInFile); int newNumOptions = 1; for (int i = 1; i < numOptions * (offsetRange + 1); i++) { if (newStartPosInFile[i] > newStartPosInFile[newNumOptions - 1]) { newStartPosInFile[newNumOptions] = newStartPosInFile[i]; newNumOptions++; } } // now copy these back to the startPosInFile array (sorted // in searchDirection) numOptions = newNumOptions; if (searchDirection > 1) { System.arraycopy(newStartPosInFile, 0, startPosInFile, 0, numOptions); } else { // reverse order copy for (int i = 0; i < numOptions; i++) { startPosInFile[i] = newStartPosInFile[numOptions - 1 - i]; } } } // check that the end of the file is not going to be reached final int numSeqBytes = getNumBytes(); final long numBytesInFile = targetFile.getNumBytes(); if (reverseOrder) { // cutoff if startPosInFile is too close to start of file for (int i = 0; i < numOptions; i++) { if (startPosInFile[i] < (numSeqBytes - 1L)) { numOptions = i; } } } else { // cutoff if startPosInFile is too close to end of file for (int i = 0; i < numOptions; i++) { if (startPosInFile[i] > (numBytesInFile - numSeqBytes)) { numOptions = i; } } } for (int iOption = 0; iOption < numOptions; iOption++) { // compare sequence with file contents directly at // fileMarker position final int byteLoopStart = reverseOrder ? numSeqBytes - 1 : 0; long tempFileMarker = startPosInFile[iOption]; boolean provSeqMatch = true; // check whether the file and signature sequences match for (int iByte = byteLoopStart; (provSeqMatch) && (iByte <= numSeqBytes - 1) && (iByte >= 0); iByte += searchDirection) { provSeqMatch = (this.byteSequence[iByte] == targetFile .getByte(tempFileMarker)); tempFileMarker += searchDirection; } if (!provSeqMatch) { // no match startPosInFile[iOption] = -2L; } else { // success: a match was found - update the // startPosInFile startPosInFile[iOption] = tempFileMarker; } } // check the startPosInFile array: remove -2 values, reorder // and // remove duplicates Arrays.sort(startPosInFile, 0, numOptions); int newNumOptions = 0; final long[] newStartPosInFile = new long[numOptions]; if (numOptions > 0) { if (startPosInFile[0] >= -1L) { newStartPosInFile[0] = startPosInFile[0]; newNumOptions = 1; } } for (int i = 1; i < numOptions; i++) { if (startPosInFile[i] > startPosInFile[i - 1]) { newStartPosInFile[newNumOptions] = startPosInFile[i]; newNumOptions++; } } if (newNumOptions == 0) { subseqFound = false; } else { numOptions = newNumOptions; if (searchDirection < 0) { // for right to left search direction, reorder in // reverse for (int iOption = 0; iOption < numOptions; iOption++) { startPosInFile[iOption] = newStartPosInFile[numOptions - 1 - iOption]; } } else { // for left to right search direction, copy over as is System.arraycopy(newStartPosInFile, 0, startPosInFile, 0, numOptions); } } } // match remaining sequence fragment long newValueStartPosInFile = 0L; if (subseqFound) { long[] newArrayStartPosInFile; if (reverseOrder) { int i = 0; subseqFound = false; while ((i < numOptions) && !subseqFound) { newArrayStartPosInFile = bytePosForLeftFragments( targetFile, 0L, startPosInFile[i], -1, 0, bigEndian); if (newArrayStartPosInFile.length == 0) { subseqFound = false; } else { subseqFound = true; newValueStartPosInFile = newArrayStartPosInFile[0] - 1L; // take // away // -1??? } i++; } } else { int i = 0; subseqFound = false; while ((i < numOptions) && !subseqFound) { newArrayStartPosInFile = bytePosForRightFragments( targetFile, startPosInFile[i], targetFile.getNumBytes() - 1L, 1, 0, bigEndian); if (newArrayStartPosInFile.length == 0) { subseqFound = false; } else { subseqFound = true; newValueStartPosInFile = newArrayStartPosInFile[0] + 1L; // take // away // +1???? } i++; } } } // update the file marker if (subseqFound) { targetFile.setFileMarker(newValueStartPosInFile); } return subseqFound; } catch (final IndexOutOfBoundsException e) { // If an indirect offset points to a place that is after the end // of // the file, // Then this exception is thrown and it can be assumed that the // signature is not compliant return false; } } /** * Re-orders the left and right sequence fragments in increasing position * order this method must be after the signature file has been parsed and * before running any file identifications */ protected void prepareSeqFragments() { /* Left fragments */ // Determine the number of fragment subsequences there are int numFrags = 0; for (int i = 0; i < this.leftFragments.size(); i++) { final int currentPosition = getRawLeftFragment(i).getPosition(); if (currentPosition > numFrags) { numFrags = currentPosition; } } // initialise all necessary fragment lists (one for each position) for (int i = 0; i < numFrags; i++) { // loop through fragment // positions final List alternativeFragments = new ArrayList(); this.orderedLeftFragments.add(alternativeFragments); } // Add fragments to new structure for (int i = 0; i < this.leftFragments.size(); i++) { // loop through // all fragments final int currentPosition = getRawLeftFragment(i).getPosition(); this.orderedLeftFragments.get(currentPosition - 1).add( getRawLeftFragment(i)); } // clear out unecessary info this.leftFragments = null; /* Right fragments */ // Determine the number of fragment subsequences there are numFrags = 0; for (int i = 0; i < this.rightFragments.size(); i++) { final int currentPosition = getRawRightFragment(i).getPosition(); if (currentPosition > numFrags) { numFrags = currentPosition; } } // initialise all necessary fragment lists (one for each position) for (int i = 0; i < numFrags; i++) { // loop through fragment // positions final List alternativeFragments = new ArrayList(); this.orderedRightFragments.add(alternativeFragments); } // Add fragments to new structure for (int i = 0; i < this.rightFragments.size(); i++) { // loop through // all fragments final int currentPosition = getRawRightFragment(i).getPosition(); this.orderedRightFragments.get(currentPosition - 1).add( getRawRightFragment(i)); } // clear out unecessary info this.rightFragments = null; } @Override public void setAttributeValue(final String name, final String value) { if (name.equals("Position")) { setPosition(Integer.parseInt(value)); } else if (name.equals("SubSeqMinOffset")) { setMinSeqOffset(Integer.parseInt(value)); } else if (name.equals("SubSeqMaxOffset")) { setMaxSeqOffset(Integer.parseInt(value)); } else if (name.equals("MinFragLength")) { setMinFragLength(Integer.parseInt(value)); } else { final String theCMDMessage = "WARNING: Unknown XML attribute " + name + " found for " + getElementName() + " "; LOG.warn(theCMDMessage); } } public void setBigEndian(final boolean bigEndian) { this.bigEndian = bigEndian; } public void setByteSequence(final ByteSequence byteSequence) { this.parentByteSequence = byteSequence; } public void setDefaultShift(final String theValue) { for (int i = 0; i < 256; i++) { this.shiftFunction[i] = Long.parseLong(theValue); } } public void setMaxSeqOffset(final int theOffset) { this.maxSeqOffset = theOffset; if (this.maxSeqOffset < this.minSeqOffset) { this.maxSeqOffset = this.minSeqOffset; } } public void setMinFragLength(final int theLength) { this.minFragLength = theLength; } public void setMinSeqOffset(final int theOffset) { this.minSeqOffset = theOffset; if (this.maxSeqOffset < this.minSeqOffset) { this.maxSeqOffset = this.minSeqOffset; } } public void setParentSignature(final int parentSignature) { this.parentSignature = parentSignature; } public void setPosition(final int position) { this.position = position; } public void setReference(final String reference) { this.reference = reference; } public void setSequence(final String seq) { this.sequence = seq; final int seqLength = seq.length() / 2; if (2 * seqLength != seq.length()) { System.out .println("A problem - sequence of odd length was found: " + seq); } this.byteSequence = new byte[seqLength]; for (int i = 0; i < seqLength; i++) { final int byteVal = Integer.parseInt( seq.substring(2 * i, 2 * (i + 1)), 16); this.byteSequence[i] = (byteVal > Byte.MAX_VALUE) ? (byte) (byteVal - 256) : (byte) byteVal; } } public void setShift(final Shift theShift) { final int theShiftByte = theShift.getShiftByte(); if ((theShiftByte >= 0) && (theShiftByte < 128)) { this.shiftFunction[theShiftByte + 128] = theShift.getShiftValue(); } else if ((theShiftByte >= 128) && (theShiftByte < 256)) { this.shiftFunction[theShiftByte - 128] = theShift.getShiftValue(); } } @Override public String toString() { return this.position + " seq=<" + this.sequence + ">" + "LLL" + this.orderedLeftFragments + "LLL" + "RRR" + this.orderedRightFragments + "RRR"; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy