All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.gov.nationalarchives.droid.signatureFile.ByteSeqSpecifier Maven / Gradle / Ivy

The newest version!
/*
 * * ByteSeqSpecifier.java The National Archives 2005-2006. All rights
 * reserved. See Licence.txt for full licence details. Developed by: Tessella
 * Support Services plc 3 Vineyard Chambers Abingdon, OX14 3PX United Kingdom
 * http://www.tessella.com Tessella/NPD/4826 PRONOM 4 $Id:
 * ByteSeqSpecifier.java,v 1.7 2006/03/13 15:15:28 linb Exp $ $Logger:
 * ByteSeqSpecifier.java,v $ Revision 1.7 2006/03/13 15:15:28 linb Changed
 * copyright holder from Crown Copyright to The National Archives. Added
 * reference to licence.txt Changed dates to 2005-2006 Revision 1.6 2006/02/13
 * 09:26:16 gaur Fixed bug in searching files from EOF, after first STS round
 * Revision 1.5 2006/02/09 15:04:37 gaur Corrected formatting Revision 1.4
 * 2006/02/07 17:16:22 linb - Change fileReader to ByteReader in formal
 * parameters of methods - use new static constructors - Add detection of if a
 * filePath is a URL or not Revision 1.3 2006/02/07 11:30:04 gaur Added
 * support for endianness of signature Revision 1.2 2006/02/03 16:54:41 gaur
 * We now allow general wildcards of arbitrary endianness: e.g., [!~A1B1:C1D1]
 * Revision 1.1 2006/02/02 17:17:04 gaur Initial version. Functionality not
 * yet complete, but should be sufficient to emulate the old behaviour.
 */

package uk.gov.nationalarchives.droid.signatureFile;

import uk.gov.nationalarchives.droid.binFileReader.ByteReader;

/**
 * Defines the permissible values to be taken by a specific sequence of bytes.
 * For example, it might specify that two bytes in succession must be between
 * 8080 and 808F (inclusive)
 * 
 * @author Richard Gault, Tessella
 */
public class ByteSeqSpecifier {

	// sequence can take: 80, 80 in the example
	// in the header (except that we take off
	// 128 before storing a value in the array,
	// since bytes are unsigned)
	private final byte[] maxSeq; // The maximum (inclusive) value which the
	// Private members
	private final byte[] minSeq; // The minimum (inclusive) value which the
	// sequence can take: 80, 8F in the example
	// in the header
	private boolean negate; // If true, negates the sense of the test (in the

	// example in the header, it would specify that the
	// two bytes must be outside the range 8080-808f)

	/**
	 * Creates a new instance of ByteSeqSpecifier
	 * 
	 * @param asciiRep
	 *            A StringBuffer whose initial portion will be an ASCII
	 *            representation of the bytes specifier. This will be altered
	 *            so that this initial portion is removed.
	 */
	protected ByteSeqSpecifier(final StringBuffer asciiRep) throws Exception {
		String specifier; // The string of characters defining the bytes
		// specifier (excluding any square brackets)

		// First off, handle the case of a simple specifier: A2, for example.
		if (asciiRep.charAt(0) != '[') {
			specifier = asciiRep.substring(0, 2);
			asciiRep.delete(0, 2);
		} else {
			// We have a non-trivial byte sequence Specifier. Extract it from
			// the front of asciiRep
			specifier = asciiRep.substring(1, asciiRep.indexOf("]"));
			asciiRep.delete(0, specifier.length() + 2);
		}

		this.negate = false;
		// Does the specifier begin with a ! (indicating negation)? Remove it
		// if
		// so.
		while ((specifier.charAt(0) == '!') || (specifier.charAt(0) == '~')) {
			if (specifier.charAt(0) == '!') {
				this.negate = !this.negate;
			}
			specifier = specifier.substring(1);
		}

		// Does the specifier contain a : (indicating a range)? If so, set
		// minRage and maxRange to be the strings on either side.
		// If not, set them both to be the same: the whole of specifier.
		String minRange;
		String maxRange;
		final int colonPos = specifier.indexOf(':');
		if (colonPos >= 0) {
			minRange = specifier.substring(0, colonPos);
			maxRange = specifier.substring(colonPos + 1);
		} else {
			minRange = specifier;
			maxRange = specifier;
		}

		// Sanity check that minRange and maxRange are the same length
		if (minRange.length() != maxRange.length()) {
			throw new Exception("Invalid internal signature supplied");
		}

		// We may now assume that both minRange and maxRange contain pairs of
		// characters representing concrete bytes. Extract and
		// store them in our two arrays
		final int seqLength = minRange.length() / 2;
		this.minSeq = new byte[seqLength];
		this.maxSeq = new byte[seqLength];
		for (int i = 0; i < seqLength; i++) {
			int byteVal = Integer.parseInt(
					minRange.substring(2 * i, 2 * (i + 1)), 16);
			this.minSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
			byteVal = Integer.parseInt(
					maxRange.substring(2 * i, 2 * (i + 1)), 16);
			this.maxSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
		}
	}

	/* Getter */
	public int getNumBytes() {
		return this.minSeq.length;
	} // Will always be the same as maxSeq.length

	/**
	 * Determines whether or not a given portion of a binary file matches the
	 * sequence of bytes we specify.
	 * 
	 * @param file
	 *            The file we're currently testing
	 * @param startPos
	 *            The position of the first byte in the file to examine
	 * @param direction
	 *            +1 (left to right) or -1 (right to left). The overall
	 *            direction which our caller is searching in
	 * @param bigEndian
	 *            True iff the signature we are matching is big-endian
	 * @return true iff the portion matches
	 *         

* Note: In an ideal world, we would hold bigEndian as a private * member, set up on construction. However, the framework used * during parsing of the XML file does not lend itself to easily * fetching information from a grandparent element. Consequently, * we parse the byte sequence specifier in ignorance of its * endianness, and wait until we try to match against a specific * byte sequence (here) to find out how minSeq and maxSeq should * be interpreted. */ protected boolean matchesByteSequence(final ByteReader file, long startPos, int direction, final boolean bigEndian) { try { // We have to perform the comparison from big-end to little-end. // Consequently, if we're reading // from right to left but using big-endian-ness, or if we're // reading // from left-to-right but using // little-endian-ness, we have to search through our sequence // backwards -- that is, left-to-right // in the former case, or right-to-left in the latter. if (!bigEndian && (direction == 1)) { direction = -1; startPos += getNumBytes() - 1; } else if (bigEndian && (direction == -1)) { direction = 1; startPos = startPos - getNumBytes() + 1; } int arrayPos = (direction == 1) ? 0 : getNumBytes() - 1; // Loop through the sequence, checking to ensure that the contents // of the binary file >= the minimum sequence for (int fileOffset = 0; (0 <= arrayPos) && (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) { // Read the corresponding byte from the file. Because this is // stored in 2s complement form, we need to // convert it to the same form that minSeq is stored in int fileByte = file.getByte(startPos + fileOffset); if (fileByte < 0) { fileByte += 256; } fileByte += Byte.MIN_VALUE; if (fileByte < this.minSeq[arrayPos]) { // We're outside the allowed range. return this.negate; } else if (fileByte > this.minSeq[arrayPos]) { // The whole of the sequence is definitely greater than // minSeq. Go on and see if it's less than maxSeq. break; } } // Repeat the previous loop, but this time checking to ensure that // the contents of the binary file <= the maximum sequence arrayPos = (direction == 1) ? 0 : getNumBytes() - 1; for (int fileOffset = 0; (arrayPos >= 0) && (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) { int fileByte = file.getByte(startPos + fileOffset); if (fileByte < 0) { fileByte += 256; } fileByte += Byte.MIN_VALUE; if (fileByte > this.maxSeq[arrayPos]) { return this.negate; } else if (fileByte < this.maxSeq[arrayPos]) { break; } } return !this.negate; } catch (final Exception e) { // This is most likely to occur if we run off the end of the file. // (In practice, this method shouldn't be called // unless we have enough bytes to read, but this is belt and // braces.) return false; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy