nom.tam.util.ByteParser Maven / Gradle / Ivy

Go to download
package nom.tam.util;

import edu.umd.cs.findbugs.annotations.SuppressFBWarnings;

/*
 * #%L
 * nom.tam FITS library
 * %%
 * Copyright (C) 2004 - 2015 nom-tam-fits
 * %%
 * This is free and unencumbered software released into the public domain.
 * 
 * Anyone is free to copy, modify, publish, use, compile, sell, or
 * distribute this software, either in source code form or as a compiled
 * binary, for any purpose, commercial or non-commercial, and by any
 * means.
 * 
 * In jurisdictions that recognize copyright laws, the author or authors
 * of this software dedicate any and all copyright interest in the
 * software to the public domain. We make this dedication for the benefit
 * of the public at large and to the detriment of our heirs and
 * successors. We intend this dedication to be an overt act of
 * relinquishment in perpetuity of all present and future rights to this
 * software under copyright law.
 * 
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 * #L%
 */

/**
 * This class provides routines for efficient parsing of data stored in a byte
 * array. This routine is optimized (in theory at least!) for efficiency rather
 * than accuracy. The values read in for doubles or floats may differ in the
 * last bit or so from the standard input utilities, especially in the case
 * where a float is specified as a very long string of digits (substantially
 * longer than the precision of the type).
 * 
 * The get methods generally are available with or without a length parameter
 * specified. When a length parameter is specified only the bytes with the
 * specified range from the current offset will be search for the number. If no
 * length is specified, the entire buffer from the current offset will be
 * searched.
 * 
 * The getString method returns a string with leading and trailing white space
 * left intact. For all other get calls, leading white space is ignored. If
 * fillFields is set, then the get methods check that only white space follows
 * valid data and a FormatException is thrown if that is not the case. If
 * fillFields is not set and valid data is found, then the methods return having
 * read as much as possible. E.g., for the sequence "T123.258E13", a getBoolean,
 * getInteger and getFloat call would return true, 123, and 2.58e12 when called
 * in succession.
 */
public class ByteParser {

    private static final int EXPONENT_DENORMALISATION_CORR_LIMIT = -300;

    private static final double EXPONENT_DENORMALISATION_FACTOR = 1.e-300;

    private static final byte[] INFINITY_LOWER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toLowerCase());

    private static final byte[] INFINITY_UPPER = AsciiFuncs.getBytes(ByteFormatter.INFINITY.toUpperCase());

    private static final int INFINITY_LENGTH = ByteParser.INFINITY_UPPER.length;

    private static final int INFINITY_SHORTCUT_LENGTH = 3;

    private static final byte[] NOT_A_NUMBER_LOWER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toLowerCase());

    private static final byte[] NOT_A_NUMBER_UPPER = AsciiFuncs.getBytes(ByteFormatter.NOT_A_NUMBER.toUpperCase());

    private static final int NOT_A_NUMBER_LENGTH = ByteParser.NOT_A_NUMBER_UPPER.length;

    /**
     * The underlying number base used in this class.
     */
    private static final int NUMBER_BASE = 10;

    /**
     * The underlying number base used in this class as a double value.
     */
    private static final double NUMBER_BASE_DOUBLE = 10.;

    /**
     * Did we find a sign last time we checked?
     */
    private boolean foundSign;

    /**
     * Array being parsed
     */
    private byte[] input;

    /**
     * Length of last parsed value
     */
    private int numberLength;

    /**
     * Current offset into input.
     */
    private int offset;

    /**
     * Construct a parser.
     * 
     * @param input
     *            The byte array to be parsed. Note that the array can be
     *            re-used by refilling its contents and resetting the offset.
     */
    @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "intended exposure of mutable data")
    public ByteParser(byte[] input) {
        this.input = input;
        this.offset = 0;
    }

    /**
     * Find the sign for a number . This routine looks for a sign (+/-) at the
     * current location and return +1/-1 if one is found, or +1 if not. The
     * foundSign boolean is set if a sign is found and offset is incremented.
     */
    private int checkSign() {

        this.foundSign = false;

        if (this.input[this.offset] == '+') {
            this.foundSign = true;
            this.offset++;
            return 1;
        } else if (this.input[this.offset] == '-') {
            this.foundSign = true;
            this.offset++;
            return -1;
        }

        return 1;
    }

    /**
     * Get the integer value starting at the current position. This routine
     * returns a double rather than an int/long to enable it to read very long
     * integers (with reduced precision) such as
     * 111111111111111111111111111111111111111111. Note that this routine does
     * set numberLength.
     * 
     * @param length
     *            The maximum number of characters to use.
     */
    private double getBareInteger(int length) {

        int startOffset = this.offset;
        double number = 0;

        while (length > 0 && this.input[this.offset] >= '0' && this.input[this.offset] <= '9') {

            number *= ByteParser.NUMBER_BASE;
            number += this.input[this.offset] - '0';
            this.offset++;
            length--;
        }
        this.numberLength = this.offset - startOffset;
        return number;
    }

    /**
     * @return a boolean value from the beginning of the buffer.
     * @throws FormatException
     *             if the double was in an unknown format
     */
    public boolean getBoolean() throws FormatException {
        return getBoolean(this.input.length - this.offset);
    }

    /**
     * @return a boolean value from a specified region of the buffer
     * @param length
     *            The maximum number of characters used to parse this boolean.
     * @throws FormatException
     *             if the double was in an unknown format
     */
    public boolean getBoolean(int length) throws FormatException {

        int startOffset = this.offset;
        length -= skipWhite(length);
        if (length == 0) {
            throw new FormatException("Blank boolean field");
        }

        boolean value = false;
        if (this.input[this.offset] == 'T' || this.input[this.offset] == 't') {
            value = true;
        } else if (this.input[this.offset] != 'F' && this.input[this.offset] != 'f') {
            this.numberLength = 0;
            this.offset = startOffset;
            throw new FormatException("Invalid boolean value");
        }
        this.offset++;
        this.numberLength = this.offset - startOffset;
        return value;
    }

    /**
     * @return the buffer being used by the parser
     */
    @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "intended exposure of mutable data")
    public byte[] getBuffer() {
        return this.input;
    }

    /**
     * Read in the buffer until a double is read. This will read the entire
     * buffer if fillFields is set.
     * 
     * @return The value found.
     * @throws FormatException
     *             if the double was in an unknown format
     */
    public double getDouble() throws FormatException {
        return getDouble(this.input.length - this.offset);
    }

    /**
     * @return a parsed double from the buffer. Leading spaces are ignored.
     * @param length
     *            The maximum number of characters used to parse this number. If
     *            fillFields is specified then exactly only whitespace may
     *            follow a valid double value.
     * @throws FormatException
     *             if the double was in an unknown format
     */
    public double getDouble(int length) throws FormatException {
        int startOffset = this.offset;
        boolean error = true;
        double number;
        // Skip initial blanks.
        length -= skipWhite(length);
        if (length == 0) {
            this.numberLength = this.offset - startOffset;
            return 0;
        }
        double mantissaSign = checkSign();
        if (this.foundSign) {
            length--;
        }
        // Look for the special strings NaN, Inf,
        if (isCaseInsensitiv(length, ByteParser.NOT_A_NUMBER_LENGTH, ByteParser.NOT_A_NUMBER_LOWER, ByteParser.NOT_A_NUMBER_UPPER)) {
            number = Double.NaN;
            this.offset += ByteParser.NOT_A_NUMBER_LENGTH;
            // Look for the longer string first then try the shorter.
        } else if (isCaseInsensitiv(length, ByteParser.INFINITY_LENGTH, ByteParser.INFINITY_LOWER, ByteParser.INFINITY_UPPER)) {
            number = Double.POSITIVE_INFINITY;
            this.offset += ByteParser.INFINITY_LENGTH;
        } else if (isCaseInsensitiv(length, ByteParser.INFINITY_SHORTCUT_LENGTH, ByteParser.INFINITY_LOWER, ByteParser.INFINITY_UPPER)) {
            number = Double.POSITIVE_INFINITY;
            this.offset += ByteParser.INFINITY_SHORTCUT_LENGTH;
        } else {
            number = getBareInteger(length); // This will update offset
            length -= this.numberLength; // Set by getBareInteger
            if (this.numberLength > 0) {
                error = false;
            }
            // Check for fractional values after decimal
            if (length > 0 && this.input[this.offset] == '.') {
                this.offset++;
                length--;
                double numerator = getBareInteger(length);
                if (numerator > 0) {
                    number += numerator / Math.pow(ByteParser.NUMBER_BASE_DOUBLE, this.numberLength);
                }
                length -= this.numberLength;
                if (this.numberLength > 0) {
                    error = false;
                }
            }

            if (error) {
                this.offset = startOffset;
                this.numberLength = 0;
                throw new FormatException("Invalid real field");
            }

            // Look for an exponent ,Our Fortran heritage means that we allow
            // 'D' for the exponent
            // indicator.
            if (length > 0 && (this.input[this.offset] == 'e' || this.input[this.offset] == 'E' || this.input[this.offset] == 'd' || this.input[this.offset] == 'D')) {
                this.offset++;
                length--;
                if (length > 0) {
                    int sign = checkSign();
                    if (this.foundSign) {
                        length--;
                    }

                    int exponent = (int) getBareInteger(length);

                    // For very small numbers we try to miminize
                    // effects of denormalization.
                    if (exponent * sign > ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT) {
                        number *= Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign);
                    } else {
                        number =
                                ByteParser.EXPONENT_DENORMALISATION_FACTOR
                                        * (number * Math.pow(ByteParser.NUMBER_BASE_DOUBLE, exponent * sign + ByteParser.EXPONENT_DENORMALISATION_CORR_LIMIT * -1));
                    }
                }
            }
        }
        this.numberLength = this.offset - startOffset;
        return mantissaSign * number;
    }

    /**
     * @return a floating point value from the buffer. (see getDouble(int())
     * @throws FormatException
     *             if the float was in an unknown format
     */
    public float getFloat() throws FormatException {
        return (float) getDouble(this.input.length - this.offset);
    }

    /**
     * @return a floating point value in a region of the buffer
     * @param length
     *            The maximum number of characters used to parse this float.
     * @throws FormatException
     *             if the float was in an unknown format
     */
    public float getFloat(int length) throws FormatException {
        return (float) getDouble(length);
    }

    /**
     * @return an integer at the beginning of the buffer
     * @throws FormatException
     *             if the integer was in an unknown format
     */
    public int getInt() throws FormatException {
        return getInt(this.input.length - this.offset);
    }

    /**
     * @return a region of the buffer to an integer
     * @param length
     *            The maximum number of characters used to parse this integer. @throws
     *            FormatException if the integer was in an unknown format
     * @throws FormatException
     *             if the integer was in an unknown format
     */
    public int getInt(int length) throws FormatException {
        int startOffset = this.offset;

        length -= skipWhite(length);
        if (length == 0) {
            this.numberLength = this.offset - startOffset;
            return 0;
        }

        int number = 0;
        boolean error = true;

        int sign = checkSign();
        if (this.foundSign) {
            length--;
        }

        while (length > 0 && this.input[this.offset] >= '0' && this.input[this.offset] <= '9') {
            number = number * ByteParser.NUMBER_BASE + this.input[this.offset] - '0';
            this.offset++;
            length--;
            error = false;
        }

        if (error) {
            this.numberLength = 0;
            this.offset = startOffset;
            throw new FormatException("Invalid Integer");
        }
        this.numberLength = this.offset - startOffset;
        return sign * number;
    }

    /**
     * @return a long in a specified region of the buffer
     * @param length
     *            The maximum number of characters used to parse this long.
     * @throws FormatException
     *             if the long was in an unknown format
     */
    public long getLong(int length) throws FormatException {

        int startOffset = this.offset;

        // Skip white space.
        length -= skipWhite(length);
        if (length == 0) {
            this.numberLength = this.offset - startOffset;
            return 0;
        }

        long number = 0;
        boolean error = true;

        long sign = checkSign();
        if (this.foundSign) {
            length--;
        }

        while (length > 0 && this.input[this.offset] >= '0' && this.input[this.offset] <= '9') {
            number = number * ByteParser.NUMBER_BASE + this.input[this.offset] - '0';
            error = false;
            this.offset++;
            length--;
        }

        if (error) {
            this.numberLength = 0;
            this.offset = startOffset;
            throw new FormatException("Invalid long number");
        }
        this.numberLength = this.offset - startOffset;
        return sign * number;
    }

    /**
     * @return the number of characters used to parse the previous number (or
     *         the length of the previous String returned).
     */
    public int getNumberLength() {
        return this.numberLength;
    }

    /**
     * Get the current offset.
     * 
     * @return The current offset within the buffer.
     */
    public int getOffset() {
        return this.offset;
    }

    /**
     * @return a string.
     * @param length
     *            The length of the string.
     */
    public String getString(int length) {

        String s = AsciiFuncs.asciiString(this.input, this.offset, length);
        this.offset += length;
        this.numberLength = length;
        return s;
    }

    private boolean isCaseInsensitiv(int length, int constantLength, byte[] lowerConstant, byte[] upperConstant) {
        if (length < constantLength) {
            return false;
        }
        for (int i = 0; i < constantLength; i++) {
            if (this.input[this.offset + i] != lowerConstant[i] && this.input[this.offset + i] != upperConstant[i]) {
                return false;
            }
        }
        return true;
    }

    /**
     * Set the buffer for the parser.
     * 
     * @param buf
     *            buffer to set
     */
    @SuppressFBWarnings(value = "EI_EXPOSE_REP", justification = "intended exposure of mutable data")
    public void setBuffer(byte[] buf) {
        this.input = buf;
        this.offset = 0;
    }

    /**
     * Set the offset into the array.
     * 
     * @param offset
     *            The desired offset from the beginning of the array.
     */
    public void setOffset(int offset) {
        this.offset = offset;
    }

    /**
     * Skip bytes in the buffer.
     * 
     * @param nBytes
     *            number of bytes to skip
     */
    public void skip(int nBytes) {
        this.offset += nBytes;
    }

    /**
     * Skip white space. This routine skips with space in the input .
     * 
     * @return the number of character skipped. White space is defined as ' ',
     *         '\t', '\n' or '\r'
     * @param length
     *            The maximum number of characters to skip.
     */
    public int skipWhite(int length) {
        int i;
        for (i = 0; i < length; i++) {
            if (this.input[this.offset + i] != ' ' && this.input[this.offset + i] != '\t' && this.input[this.offset + i] != '\n' && this.input[this.offset + i] != '\r') {
                break;
            }
        }
        this.offset += i;
        return i;
    }
}