
uk.gov.nationalarchives.droid.signatureFile.ByteSeqSpecifier Maven / Gradle / Ivy
/*
* * ByteSeqSpecifier.java The National Archives 2005-2006. All rights
* reserved. See Licence.txt for full licence details. Developed by: Tessella
* Support Services plc 3 Vineyard Chambers Abingdon, OX14 3PX United Kingdom
* http://www.tessella.com Tessella/NPD/4826 PRONOM 4 $Id:
* ByteSeqSpecifier.java,v 1.7 2006/03/13 15:15:28 linb Exp $ $Logger:
* ByteSeqSpecifier.java,v $ Revision 1.7 2006/03/13 15:15:28 linb Changed
* copyright holder from Crown Copyright to The National Archives. Added
* reference to licence.txt Changed dates to 2005-2006 Revision 1.6 2006/02/13
* 09:26:16 gaur Fixed bug in searching files from EOF, after first STS round
* Revision 1.5 2006/02/09 15:04:37 gaur Corrected formatting Revision 1.4
* 2006/02/07 17:16:22 linb - Change fileReader to ByteReader in formal
* parameters of methods - use new static constructors - Add detection of if a
* filePath is a URL or not Revision 1.3 2006/02/07 11:30:04 gaur Added
* support for endianness of signature Revision 1.2 2006/02/03 16:54:41 gaur
* We now allow general wildcards of arbitrary endianness: e.g., [!~A1B1:C1D1]
* Revision 1.1 2006/02/02 17:17:04 gaur Initial version. Functionality not
* yet complete, but should be sufficient to emulate the old behaviour.
*/
package uk.gov.nationalarchives.droid.signatureFile;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
/**
* Defines the permissible values to be taken by a specific sequence of bytes.
* For example, it might specify that two bytes in succession must be between
* 8080 and 808F (inclusive)
*
* @author Richard Gault, Tessella
*/
public class ByteSeqSpecifier {
// sequence can take: 80, 80 in the example
// in the header (except that we take off
// 128 before storing a value in the array,
// since bytes are unsigned)
private final byte[] maxSeq; // The maximum (inclusive) value which the
// Private members
private final byte[] minSeq; // The minimum (inclusive) value which the
// sequence can take: 80, 8F in the example
// in the header
private boolean negate; // If true, negates the sense of the test (in the
// example in the header, it would specify that the
// two bytes must be outside the range 8080-808f)
/**
* Creates a new instance of ByteSeqSpecifier
*
* @param asciiRep
* A StringBuffer whose initial portion will be an ASCII
* representation of the bytes specifier. This will be altered
* so that this initial portion is removed.
*/
protected ByteSeqSpecifier(final StringBuffer asciiRep) throws Exception {
String specifier; // The string of characters defining the bytes
// specifier (excluding any square brackets)
// First off, handle the case of a simple specifier: A2, for example.
if (asciiRep.charAt(0) != '[') {
specifier = asciiRep.substring(0, 2);
asciiRep.delete(0, 2);
} else {
// We have a non-trivial byte sequence Specifier. Extract it from
// the front of asciiRep
specifier = asciiRep.substring(1, asciiRep.indexOf("]"));
asciiRep.delete(0, specifier.length() + 2);
}
this.negate = false;
// Does the specifier begin with a ! (indicating negation)? Remove it
// if
// so.
while ((specifier.charAt(0) == '!') || (specifier.charAt(0) == '~')) {
if (specifier.charAt(0) == '!') {
this.negate = !this.negate;
}
specifier = specifier.substring(1);
}
// Does the specifier contain a : (indicating a range)? If so, set
// minRage and maxRange to be the strings on either side.
// If not, set them both to be the same: the whole of specifier.
String minRange;
String maxRange;
final int colonPos = specifier.indexOf(':');
if (colonPos >= 0) {
minRange = specifier.substring(0, colonPos);
maxRange = specifier.substring(colonPos + 1);
} else {
minRange = specifier;
maxRange = specifier;
}
// Sanity check that minRange and maxRange are the same length
if (minRange.length() != maxRange.length()) {
throw new Exception("Invalid internal signature supplied");
}
// We may now assume that both minRange and maxRange contain pairs of
// characters representing concrete bytes. Extract and
// store them in our two arrays
final int seqLength = minRange.length() / 2;
this.minSeq = new byte[seqLength];
this.maxSeq = new byte[seqLength];
for (int i = 0; i < seqLength; i++) {
int byteVal = Integer.parseInt(
minRange.substring(2 * i, 2 * (i + 1)), 16);
this.minSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
byteVal = Integer.parseInt(
maxRange.substring(2 * i, 2 * (i + 1)), 16);
this.maxSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
}
}
/* Getter */
public int getNumBytes() {
return this.minSeq.length;
} // Will always be the same as maxSeq.length
/**
* Determines whether or not a given portion of a binary file matches the
* sequence of bytes we specify.
*
* @param file
* The file we're currently testing
* @param startPos
* The position of the first byte in the file to examine
* @param direction
* +1 (left to right) or -1 (right to left). The overall
* direction which our caller is searching in
* @param bigEndian
* True iff the signature we are matching is big-endian
* @return true iff the portion matches
*
* Note: In an ideal world, we would hold bigEndian as a private
* member, set up on construction. However, the framework used
* during parsing of the XML file does not lend itself to easily
* fetching information from a grandparent element. Consequently,
* we parse the byte sequence specifier in ignorance of its
* endianness, and wait until we try to match against a specific
* byte sequence (here) to find out how minSeq and maxSeq should
* be interpreted.
*/
protected boolean matchesByteSequence(final ByteReader file,
long startPos, int direction, final boolean bigEndian) {
try {
// We have to perform the comparison from big-end to little-end.
// Consequently, if we're reading
// from right to left but using big-endian-ness, or if we're
// reading
// from left-to-right but using
// little-endian-ness, we have to search through our sequence
// backwards -- that is, left-to-right
// in the former case, or right-to-left in the latter.
if (!bigEndian && (direction == 1)) {
direction = -1;
startPos += getNumBytes() - 1;
} else if (bigEndian && (direction == -1)) {
direction = 1;
startPos = startPos - getNumBytes() + 1;
}
int arrayPos = (direction == 1) ? 0 : getNumBytes() - 1;
// Loop through the sequence, checking to ensure that the contents
// of the binary file >= the minimum sequence
for (int fileOffset = 0; (0 <= arrayPos)
&& (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) {
// Read the corresponding byte from the file. Because this is
// stored in 2s complement form, we need to
// convert it to the same form that minSeq is stored in
int fileByte = file.getByte(startPos + fileOffset);
if (fileByte < 0) {
fileByte += 256;
}
fileByte += Byte.MIN_VALUE;
if (fileByte < this.minSeq[arrayPos]) {
// We're outside the allowed range.
return this.negate;
} else if (fileByte > this.minSeq[arrayPos]) {
// The whole of the sequence is definitely greater than
// minSeq. Go on and see if it's less than maxSeq.
break;
}
}
// Repeat the previous loop, but this time checking to ensure that
// the contents of the binary file <= the maximum sequence
arrayPos = (direction == 1) ? 0 : getNumBytes() - 1;
for (int fileOffset = 0; (arrayPos >= 0)
&& (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) {
int fileByte = file.getByte(startPos + fileOffset);
if (fileByte < 0) {
fileByte += 256;
}
fileByte += Byte.MIN_VALUE;
if (fileByte > this.maxSeq[arrayPos]) {
return this.negate;
} else if (fileByte < this.maxSeq[arrayPos]) {
break;
}
}
return !this.negate;
} catch (final Exception e) {
// This is most likely to occur if we run off the end of the file.
// (In practice, this method shouldn't be called
// unless we have enough bytes to read, but this is belt and
// braces.)
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy