
uk.gov.nationalarchives.droid.signatureFile.ByteSeqSpecifier Maven / Gradle / Ivy
/*
** ByteSeqSpecifier.java
*
* � The National Archives 2005-2006. All rights reserved.
* See Licence.txt for full licence details.
*
* Developed by:
* Tessella Support Services plc
* 3 Vineyard Chambers
* Abingdon, OX14 3PX
* United Kingdom
* http://www.tessella.com
*
* Tessella/NPD/4826
* PRONOM 4
*
* $Id: ByteSeqSpecifier.java,v 1.7 2006/03/13 15:15:28 linb Exp $
*
* $Logger: ByteSeqSpecifier.java,v $
* Revision 1.7 2006/03/13 15:15:28 linb
* Changed copyright holder from Crown Copyright to The National Archives.
* Added reference to licence.txt
* Changed dates to 2005-2006
*
* Revision 1.6 2006/02/13 09:26:16 gaur
* Fixed bug in searching files from EOF, after first STS round
*
* Revision 1.5 2006/02/09 15:04:37 gaur
* Corrected formatting
*
* Revision 1.4 2006/02/07 17:16:22 linb
* - Change fileReader to ByteReader in formal parameters of methods
* - use new static constructors
* - Add detection of if a filePath is a URL or not
*
* Revision 1.3 2006/02/07 11:30:04 gaur
* Added support for endianness of signature
*
* Revision 1.2 2006/02/03 16:54:41 gaur
* We now allow general wildcards of arbitrary endianness: e.g., [!~A1B1:C1D1]
*
* Revision 1.1 2006/02/02 17:17:04 gaur
* Initial version. Functionality not yet complete, but should be sufficient to emulate the old behaviour.
*
*/
package uk.gov.nationalarchives.droid.signatureFile;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
/**
* Defines the permissible values to be taken by a specific sequence of bytes.
* For example, it might specify that two bytes in succession must be between
* 8080 and 808F (inclusive)
*
* @author Richard Gault, Tessella
*/
public class ByteSeqSpecifier {
// Private members
private final byte[] minSeq; // The minimum (inclusive) value which the
// sequence can take: 80, 80 in the example
// in the header (except that we take off
// 128 before storing a value in the array,
// since bytes are unsigned)
private final byte[] maxSeq; // The maximum (inclusive) value which the
// sequence can take: 80, 8F in the example
// in the header
private boolean negate; // If true, negates the sense of the test (in the
// example in the header, it would specify that the
// two bytes must be outside the range 8080-808f)
/**
* Creates a new instance of ByteSeqSpecifier
*
* @param asciiRep
* A StringBuffer whose initial portion will be an ASCII
* representation of the bytes specifier. This will be altered so
* that this initial portion is removed.
*/
public ByteSeqSpecifier(final StringBuffer asciiRep) throws Exception {
String specifier; // The string of characters defining the bytes
// specifier (excluding any square brackets)
// First off, handle the case of a simple specifier: A2, for example.
if (asciiRep.charAt(0) != '[') {
specifier = asciiRep.substring(0, 2);
asciiRep.delete(0, 2);
} else {
// We have a non-trivial byte sequence Specifier. Extract it from
// the front of asciiRep
specifier = asciiRep.substring(1, asciiRep.indexOf("]"));
asciiRep.delete(0, specifier.length() + 2);
}
this.negate = false;
// Does the specifier begin with a ! (indicating negation)? Remove it if
// so.
while ((specifier.charAt(0) == '!') || (specifier.charAt(0) == '~')) {
if (specifier.charAt(0) == '!') {
this.negate = !this.negate;
}
specifier = specifier.substring(1);
}
// Does the specifier contain a : (indicating a range)? If so, set
// minRage and maxRange to be the strings on either side.
// If not, set them both to be the same: the whole of specifier.
String minRange;
String maxRange;
final int colonPos = specifier.indexOf(':');
if (colonPos >= 0) {
minRange = specifier.substring(0, colonPos);
maxRange = specifier.substring(colonPos + 1);
} else {
minRange = specifier;
maxRange = specifier;
}
// Sanity check that minRange and maxRange are the same length
if (minRange.length() != maxRange.length()) {
throw new Exception("Invalid internal signature supplied");
}
// We may now assume that both minRange and maxRange contain pairs of
// characters representing concrete bytes. Extract and
// store them in our two arrays
final int seqLength = minRange.length() / 2;
this.minSeq = new byte[seqLength];
this.maxSeq = new byte[seqLength];
for (int i = 0; i < seqLength; i++) {
int byteVal = Integer.parseInt(minRange.substring(2 * i,
2 * (i + 1)), 16);
this.minSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
byteVal = Integer.parseInt(
maxRange.substring(2 * i, 2 * (i + 1)), 16);
this.maxSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
}
}
/* Getter */
public int getNumBytes() {
return this.minSeq.length;
} // Will always be the same as maxSeq.length
/**
* Determines whether or not a given portion of a binary file matches the
* sequence of bytes we specify.
*
* @param file
* The file we're currently testing
* @param startPos
* The position of the first byte in the file to examine
* @param direction
* +1 (left to right) or -1 (right to left). The overall
* direction which our caller is searching in
* @param bigEndian
* True iff the signature we are matching is big-endian
* @return true iff the portion matches
*
* Note: In an ideal world, we would hold bigEndian as a private
* member, set up on construction. However, the framework used
* during parsing of the XML file does not lend itself to easily
* fetching information from a grandparent element. Consequently, we
* parse the byte sequence specifier in ignorance of its endianness,
* and wait until we try to match against a specific byte sequence
* (here) to find out how minSeq and maxSeq should be interpreted.
*/
public boolean matchesByteSequence(final ByteReader file, long startPos,
int direction, final boolean bigEndian) {
try {
// We have to perform the comparison from big-end to little-end.
// Consequently, if we're reading
// from right to left but using big-endian-ness, or if we're reading
// from left-to-right but using
// little-endian-ness, we have to search through our sequence
// backwards -- that is, left-to-right
// in the former case, or right-to-left in the latter.
if (!bigEndian && (direction == 1)) {
direction = -1;
startPos += getNumBytes() - 1;
} else if (bigEndian && (direction == -1)) {
direction = 1;
startPos = startPos - getNumBytes() + 1;
}
int arrayPos = (direction == 1) ? 0 : getNumBytes() - 1;
// Loop through the sequence, checking to ensure that the contents
// of the binary file >= the minimum sequence
for (int fileOffset = 0; (0 <= arrayPos)
&& (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) {
// Read the corresponding byte from the file. Because this is
// stored in 2s complement form, we need to
// convert it to the same form that minSeq is stored in
int fileByte = file.getByte(startPos + fileOffset);
if (fileByte < 0) {
fileByte += 256;
}
fileByte += Byte.MIN_VALUE;
if (fileByte < this.minSeq[arrayPos]) {
// We're outside the allowed range.
return this.negate;
} else if (fileByte > this.minSeq[arrayPos]) {
// The whole of the sequence is definitely greater than
// minSeq. Go on and see if it's less than maxSeq.
break;
}
}
// Repeat the previous loop, but this time checking to ensure that
// the contents of the binary file <= the maximum sequence
arrayPos = (direction == 1) ? 0 : getNumBytes() - 1;
for (int fileOffset = 0; (arrayPos >= 0)
&& (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) {
int fileByte = file.getByte(startPos + fileOffset);
if (fileByte < 0) {
fileByte += 256;
}
fileByte += Byte.MIN_VALUE;
if (fileByte > this.maxSeq[arrayPos]) {
return this.negate;
} else if (fileByte < this.maxSeq[arrayPos]) {
break;
}
}
return !this.negate;
} catch (final Exception e) {
// This is most likely to occur if we run off the end of the file.
// (In practice, this method shouldn't be called
// unless we have enough bytes to read, but this is belt and
// braces.)
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy