
uk.gov.nationalarchives.droid.signatureFile.ByteSeqSpecifier Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of wazformat Show documentation
Show all versions of wazformat Show documentation
Format identification utilities
The newest version!
/*
* * ByteSeqSpecifier.java The National Archives 2005-2006. All rights
* reserved. See Licence.txt for full licence details. Developed by: Tessella
* Support Services plc 3 Vineyard Chambers Abingdon, OX14 3PX United Kingdom
* http://www.tessella.com Tessella/NPD/4826 PRONOM 4 $Id:
* ByteSeqSpecifier.java,v 1.7 2006/03/13 15:15:28 linb Exp $ $Logger:
* ByteSeqSpecifier.java,v $ Revision 1.7 2006/03/13 15:15:28 linb Changed
* copyright holder from Crown Copyright to The National Archives. Added
* reference to licence.txt Changed dates to 2005-2006 Revision 1.6 2006/02/13
* 09:26:16 gaur Fixed bug in searching files from EOF, after first STS round
* Revision 1.5 2006/02/09 15:04:37 gaur Corrected formatting Revision 1.4
* 2006/02/07 17:16:22 linb - Change fileReader to ByteReader in formal
* parameters of methods - use new static constructors - Add detection of if a
* filePath is a URL or not Revision 1.3 2006/02/07 11:30:04 gaur Added
* support for endianness of signature Revision 1.2 2006/02/03 16:54:41 gaur
* We now allow general wildcards of arbitrary endianness: e.g., [!~A1B1:C1D1]
* Revision 1.1 2006/02/02 17:17:04 gaur Initial version. Functionality not
* yet complete, but should be sufficient to emulate the old behaviour.
*/
package uk.gov.nationalarchives.droid.signatureFile;
import uk.gov.nationalarchives.droid.binFileReader.ByteReader;
/**
* Defines the permissible values to be taken by a specific sequence of bytes.
* For example, it might specify that two bytes in succession must be between
* 8080 and 808F (inclusive)
*
* @author Richard Gault, Tessella
*/
public class ByteSeqSpecifier {
// sequence can take: 80, 80 in the example
// in the header (except that we take off
// 128 before storing a value in the array,
// since bytes are unsigned)
private final byte[] maxSeq; // The maximum (inclusive) value which the
// Private members
private final byte[] minSeq; // The minimum (inclusive) value which the
// sequence can take: 80, 8F in the example
// in the header
private boolean negate; // If true, negates the sense of the test (in the
// example in the header, it would specify that the
// two bytes must be outside the range 8080-808f)
/**
* Creates a new instance of ByteSeqSpecifier
*
* @param asciiRep
* A StringBuffer whose initial portion will be an ASCII
* representation of the bytes specifier. This will be altered
* so that this initial portion is removed.
*/
protected ByteSeqSpecifier(final StringBuffer asciiRep) throws Exception {
String specifier; // The string of characters defining the bytes
// specifier (excluding any square brackets)
// First off, handle the case of a simple specifier: A2, for example.
if (asciiRep.charAt(0) != '[') {
specifier = asciiRep.substring(0, 2);
asciiRep.delete(0, 2);
} else {
// We have a non-trivial byte sequence Specifier. Extract it from
// the front of asciiRep
specifier = asciiRep.substring(1, asciiRep.indexOf("]"));
asciiRep.delete(0, specifier.length() + 2);
}
this.negate = false;
// Does the specifier begin with a ! (indicating negation)? Remove it
// if
// so.
while ((specifier.charAt(0) == '!') || (specifier.charAt(0) == '~')) {
if (specifier.charAt(0) == '!') {
this.negate = !this.negate;
}
specifier = specifier.substring(1);
}
// Does the specifier contain a : (indicating a range)? If so, set
// minRage and maxRange to be the strings on either side.
// If not, set them both to be the same: the whole of specifier.
String minRange;
String maxRange;
final int colonPos = specifier.indexOf(':');
if (colonPos >= 0) {
minRange = specifier.substring(0, colonPos);
maxRange = specifier.substring(colonPos + 1);
} else {
minRange = specifier;
maxRange = specifier;
}
// Sanity check that minRange and maxRange are the same length
if (minRange.length() != maxRange.length()) {
throw new Exception("Invalid internal signature supplied");
}
// We may now assume that both minRange and maxRange contain pairs of
// characters representing concrete bytes. Extract and
// store them in our two arrays
final int seqLength = minRange.length() / 2;
this.minSeq = new byte[seqLength];
this.maxSeq = new byte[seqLength];
for (int i = 0; i < seqLength; i++) {
int byteVal = Integer.parseInt(
minRange.substring(2 * i, 2 * (i + 1)), 16);
this.minSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
byteVal = Integer.parseInt(
maxRange.substring(2 * i, 2 * (i + 1)), 16);
this.maxSeq[i] = (byte) (byteVal + Byte.MIN_VALUE);
}
}
/* Getter */
public int getNumBytes() {
return this.minSeq.length;
} // Will always be the same as maxSeq.length
/**
* Determines whether or not a given portion of a binary file matches the
* sequence of bytes we specify.
*
* @param file
* The file we're currently testing
* @param startPos
* The position of the first byte in the file to examine
* @param direction
* +1 (left to right) or -1 (right to left). The overall
* direction which our caller is searching in
* @param bigEndian
* True iff the signature we are matching is big-endian
* @return true iff the portion matches
*
* Note: In an ideal world, we would hold bigEndian as a private
* member, set up on construction. However, the framework used
* during parsing of the XML file does not lend itself to easily
* fetching information from a grandparent element. Consequently,
* we parse the byte sequence specifier in ignorance of its
* endianness, and wait until we try to match against a specific
* byte sequence (here) to find out how minSeq and maxSeq should
* be interpreted.
*/
protected boolean matchesByteSequence(final ByteReader file,
long startPos, int direction, final boolean bigEndian) {
try {
// We have to perform the comparison from big-end to little-end.
// Consequently, if we're reading
// from right to left but using big-endian-ness, or if we're
// reading
// from left-to-right but using
// little-endian-ness, we have to search through our sequence
// backwards -- that is, left-to-right
// in the former case, or right-to-left in the latter.
if (!bigEndian && (direction == 1)) {
direction = -1;
startPos += getNumBytes() - 1;
} else if (bigEndian && (direction == -1)) {
direction = 1;
startPos = startPos - getNumBytes() + 1;
}
int arrayPos = (direction == 1) ? 0 : getNumBytes() - 1;
// Loop through the sequence, checking to ensure that the contents
// of the binary file >= the minimum sequence
for (int fileOffset = 0; (0 <= arrayPos)
&& (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) {
// Read the corresponding byte from the file. Because this is
// stored in 2s complement form, we need to
// convert it to the same form that minSeq is stored in
int fileByte = file.getByte(startPos + fileOffset);
if (fileByte < 0) {
fileByte += 256;
}
fileByte += Byte.MIN_VALUE;
if (fileByte < this.minSeq[arrayPos]) {
// We're outside the allowed range.
return this.negate;
} else if (fileByte > this.minSeq[arrayPos]) {
// The whole of the sequence is definitely greater than
// minSeq. Go on and see if it's less than maxSeq.
break;
}
}
// Repeat the previous loop, but this time checking to ensure that
// the contents of the binary file <= the maximum sequence
arrayPos = (direction == 1) ? 0 : getNumBytes() - 1;
for (int fileOffset = 0; (arrayPos >= 0)
&& (arrayPos < getNumBytes()); fileOffset += direction, arrayPos += direction) {
int fileByte = file.getByte(startPos + fileOffset);
if (fileByte < 0) {
fileByte += 256;
}
fileByte += Byte.MIN_VALUE;
if (fileByte > this.maxSeq[arrayPos]) {
return this.negate;
} else if (fileByte < this.maxSeq[arrayPos]) {
break;
}
}
return !this.negate;
} catch (final Exception e) {
// This is most likely to occur if we run off the end of the file.
// (In practice, this method shouldn't be called
// unless we have enough bytes to read, but this is belt and
// braces.)
return false;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy