nom.tam.fits.HeaderCardParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of nom-tam-fits Show documentation
Java library for reading and writing FITS files. FITS, the Flexible Image Transport System, is the format commonly used in the archiving and transport of astronomical data.
There is a newer version: 1.21.0
Show newest version
/*
 * #%L
 * nom.tam FITS library
 * %%
 * Copyright (C) 2004 - 2024 nom-tam-fits
 * %%
 * This is free and unencumbered software released into the public domain.
 *
 * Anyone is free to copy, modify, publish, use, compile, sell, or
 * distribute this software, either in source code form or as a compiled
 * binary, for any purpose, commercial or non-commercial, and by any
 * means.
 *
 * In jurisdictions that recognize copyright laws, the author or authors
 * of this software dedicate any and all copyright interest in the
 * software to the public domain. We make this dedication for the benefit
 * of the public at large and to the detriment of our heirs and
 * successors. We intend this dedication to be an overt act of
 * relinquishment in perpetuity of all present and future rights to this
 * software under copyright law.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 * #L%
 */

package nom.tam.fits;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Locale;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

import nom.tam.util.ComplexValue;
import nom.tam.util.FlexFormat;

import static nom.tam.fits.header.NonStandard.HIERARCH;
import static nom.tam.fits.header.Standard.CONTINUE;

/**
 * 
 * Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for
 * a description of the rules that guide parsing.
 * 
 * 
 * When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate
 * excpetions (depending on the severity of the standard violation and whether
 * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by
 * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}.
 * 
 *
 * @author Attila Kovacs
 *
 * @see    FitsFactory#setAllowHeaderRepairs(boolean)
 * @see    Header#setParserWarningsEnabled(boolean)
 */
class HeaderCardParser {

    private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName());

    static {
        // Do not log warnings by default.
        LOG.setLevel(Level.SEVERE);
    }

    /** regexp for IEEE floats */
    private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?");

    /** regexp for complex numbers */
    private static final Pattern COMPLEX_REGEX = Pattern
            .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)");

    /** regexp for decimal integers. */
    private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+");

    /** The header line (usually 80-character width), which to parse. */
    private String line;

    /**
     * the value of the card. (trimmed and standardized with . in HIERARCH)
     */
    private String key = null;

    /**
     * the value of the card. (trimmed)
     */
    private String value = null;

    /**
     * the comment specified with the value.
     */
    private String comment = null;

    /**
     * was the value quoted?
     */
    private Class type = null;

    /**
     * The position in the string that right after the last character processed by this parser
     */
    private int parsePos = 0;

    /**
     * Instantiates a new parser for a FITS header line.
     *
     * @param  line                     a line in the FITS header, normally exactly 80-characters wide (but need not
     *                                      be).
     *
     * @see                             #getKey()
     * @see                             #getValue()
     * @see                             #getComment()
     * @see                             #isString()
     *
     * @throws UnclosedQuoteException   if there is a missing end-quote and header repairs aren't allowed.
     * @throws IllegalArgumentException if the record contained neither a key or a value.
     *
     * @see                             FitsFactory#setAllowHeaderRepairs(boolean)
     */
    HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException {
        this.line = line;
        // TODO HeaderCard never calls this with a null argument, so the check below is dead code here...
        // if (line == null) {
        // throw new IllegalArgumentException("Cannot parse null string");
        // }
        parseKey();
        parseValue();
        parseComment();
    }

    /**
     * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it
     * may be a `HIERARCH` style long key with the components separated by dots (e.g.
     * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS
     * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set false).
     *
     * @return the FITS header keyword for the line.
     *
     * @see    FitsFactory#setUseHierarch(boolean)
     */
    String getKey() {
        return key;
    }

    /**
     * Returns the value component of the parsed header line.
     *
     * @return the value part of the line or null if the line contained no value.
     *
     * @see    FitsFactory#setUseHierarch(boolean)
     */
    String getValue() {
        return value;
    }

    /**
     * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved.
     *
     * @return the comment part of the line or null if the line contained no comment.
     *
     * @see    #getTrimmedComment()
     */
    String getUntrimmedComment() {
        return comment;
    }

    /**
     * Returns the comment component of the parsed header line, with both leading and trailing spaces removed
     *
     * @return the comment part of the line or null if the line contained no comment.
     *
     * @see    #getUntrimmedComment()
     */
    String getTrimmedComment() {
        return comment == null ? null : comment.trim();
    }

    /**
     * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no
     * considered string values, but rather as comments. To allow processing lines with missing quotes as string values,
     * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to true prior to parsing a header
     * line with the missing end quote.
     *
     * @return true if the value was quoted.
     *
     * @see    FitsFactory#setAllowHeaderRepairs(boolean)
     */
    boolean isString() {
        if (type == null) {
            return false;
        }
        return String.class.isAssignableFrom(type);
    }

    /**
     * 
     * Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a
     * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal
     * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or
     * null. For number types, it returns the 'smallest' type that can be used to represent the string
     * value.
     * 
     * 
     * Its an inferred type as the true underlying type that was used to create the value is lost. For example, the
     * value 42 may have been written from any integer type, including byte or
     * short, but this routine will guess it to be an int ({@link Integer} type. As such, it
     * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be
     * used for round-trip testing of type equality.
     * 

     *
     * @return the inferred type of the stored serialized (string) value, or null if the value does not
     *             seem to match any of the supported value types.
     *
     * @see    HeaderCard#valueType()
     */
    Class getInferredType() {
        return type;
    }

    /**
     * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots).
     */
    private void parseKey() {
        /*
         * AK: The parsing of headers should never be stricter that the writing, such that any header written by this
         * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow
         * reading FITS produced by other libraries, which may be less stringent in their rules). The original
         * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here
         * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The
         * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may
         * contain any ASCII characters other than those used for separating. It is more in line with what we do with
         * standard keys too.
         */

        // Find the '=' in the line, if any...
        int iEq = line.indexOf('=');

        // The stem is in the first 8 characters or what precedes an '=' character
        // before that.
        int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH;
        endStem = Math.min(line.length(), endStem);

        String rawStem = line.substring(0, endStem).trim();

        // Check for space at the start of the keyword...
        if (endStem > 0 && !rawStem.isEmpty()) {
            if (Character.isSpaceChar(line.charAt(0))) {
                LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming).");
            }
        }

        String stem = rawStem.toUpperCase();

        if (!stem.equals(rawStem)) {
            LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword.");
        }

        key = stem;
        parsePos = endStem;

        // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make...

        // If the line does not have an '=', can only be a simple key
        // If it's not a HIERARCH keyword, then return the simple key.
        if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) {
            return;
        }

        // Compose the hierarchical key...
        StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n.");
        StringBuilder builder = new StringBuilder(stem);

        while (tokens.hasMoreTokens()) {
            String token = tokens.nextToken();

            parsePos = line.indexOf(token, parsePos) + token.length();

            // Add a . to separate hierarchies
            builder.append('.');
            builder.append(token);
        }

        key = builder.toString();

        if (HIERARCH.key().equals(key)) {
            // The key is only HIERARCH, without a hierarchical keyword after it...
            LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it.");
            return;
        }

        if (!FitsFactory.getHierarchFormater().isCaseSensitive()) {
            key = key.toUpperCase(Locale.US);
        }

        try {
            HeaderCard.validateKey(key);
        } catch (IllegalArgumentException e) {
            LOG.warning(e.getMessage());
        }
    }

    /**
     * Advances the parse position to skip any spaces at the current parse position, and returns whether there is
     * anything left in the line after the spaces...
     *
     * @return true if there is more non-space characters in the string, otherwise false
     */
    private boolean skipSpaces() {
        for (; parsePos < line.length(); parsePos++) {
            if (!Character.isSpaceChar(line.charAt(parsePos))) {
                // Line has non-space characters left to parse...
                return true;
            }
        }
        // nothing left to parse.
        return false;
    }

    /**
     * Parses the comment components starting from the current parse position. After this call the parse position is set
     * to the end of the string. The leading '/' (if found) is not included in the comment.
     */
    private void parseComment() {
        if (!skipSpaces()) {
            // nothing left to parse.
            return;
        }

        // if no value, then everything is comment from here on...
        if (value != null) {
            if (line.charAt(parsePos) == '/') {
                // Skip the '/' itself, the comment is whatever is after it.
                parsePos++;
            } else {
                // Junk after a string value -- interpret it as the start of the comment...
                LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment).");
            }
        }

        comment = line.substring(parsePos);
        parsePos = line.length();

        try {
            HeaderCard.validateChars(comment);
        } catch (IllegalArgumentException e) {
            LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage());
        }
    }

    /**
     * Parses the value component from the current parse position. The parse position is advanced to the first character
     * after the value specification in the line. If the header line does not contain a value component, then the value
     * field of this object is set to null.
     *
     * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
     *
     * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
     */
    private void parseValue() throws UnclosedQuoteException {
        if (key.isEmpty() || !skipSpaces()) {
            // nothing left to parse.
            return;
        }

        if (CONTINUE.key().equals(key)) {
            parseValueBody();
        } else if (line.charAt(parsePos) == '=') {

            if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) {
                LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1)
                        + " for key '" + sanitize(key) + "'.");
            }
            if (parsePos + 1 >= line.length()) {
                LOG.warning("[" + sanitize(key) + "] Record ends with '='.");
            } else if (line.charAt(parsePos + 1) != ' ') {
                LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='.");
            }

            if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) {
                // equal sign = after the 9th char -- only supported with hierarch keys...
                if (!key.startsWith(HIERARCH.key() + ".")) {
                    LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9).");
                    // It's not a HIERARCH key
                    return;
                }
            }

            parsePos++;
            parseValueBody();
        }

        try {
            HeaderCard.validateChars(value);
        } catch (IllegalArgumentException e) {
            LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage());
        }
    }

    /**
     * Parses the value body from the current parse position. The parse position is advanced to the first character
     * after the value specification in the line. If the header line does not contain a value component, then the value
     * field of this object is set to null.
     *
     * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
     *
     * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
     */
    private void parseValueBody() throws UnclosedQuoteException {
        if (!skipSpaces()) {
            // nothing left to parse.
            return;
        }

        if (isNextQuote()) {
            // Parse as a string value, or else throw an exception.
            parseStringValue();
        } else {
            int end = line.indexOf('/', parsePos);
            if (end < 0) {
                end = line.length();
            }
            value = line.substring(parsePos, end).trim();
            parsePos = end;
            type = getInferredValueType(key, value);
        }

    }

    /**
     * Checks if the next character, at the current parse position, is a single quote.
     *
     * @return true if the next character on the line exists and is a single quote, otherwise
     *             false.
     */
    private boolean isNextQuote() {
        if (parsePos >= line.length()) {
            // nothing left to parse.
            return false;
        }
        return line.charAt(parsePos) == '\'';
    }

    /**
     * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading
     * spaces.
     *
     * @param  buf the parsed string value.
     *
     * @return     the string value with trailing spaces removed.
     */
    private static String getNoTrailingSpaceString(StringBuilder buf) {
        int to = buf.length();

        // Remove trailing spaces only!
        while (--to >= 0) {
            if (!Character.isSpaceChar(buf.charAt(to))) {
                break;
            }
        }

        return to < 0 ? "" : buf.substring(0, to + 1);
    }

    /**
     * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated
     * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the
     * input position.
     *
     * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed.
     *
     * @see                           FitsFactory#setAllowHeaderRepairs(boolean)
     */
    private void parseStringValue() throws UnclosedQuoteException {
        type = String.class;
        StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH);

        // Build the string value, up to the end quote and paying attention to double
        // quotes inside the string, which are translated to single quotes within
        // the string value itself.
        for (++parsePos; parsePos < line.length(); parsePos++) {
            if (isNextQuote()) {
                parsePos++;

                if (!isNextQuote()) {
                    // Closing single quote;
                    value = getNoTrailingSpaceString(buf);
                    return;
                }
            }
            buf.append(line.charAt(parsePos));
        }

        // String with missing end quote
        if (!FitsFactory.isAllowHeaderRepairs()) {
            throw new UnclosedQuoteException(line);
        }
        LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record).");
        value = getNoTrailingSpaceString(buf);
    }

    /**
     * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed
     * description.
     *
     * @param  value the serialized (string) representation of a FITS header value.
     *
     * @return       the inferred type of the specified serialized (string) value, or null if the value
     *                   does not seem to match any of the supported value types. null values default to
     *                   Boolean.class.
     */
    private static Class getInferredValueType(String key, String value) {
        // TODO We never call this with null locally, so the following check is dead code here...
        // if (value == null) {
        // return Boolean.class;
        // }
        if (value.isEmpty()) {
            LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class).");
            return Boolean.class;
        }

        String trimmedValue = value.trim().toUpperCase();

        if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) {
            return Boolean.class;
        }
        if (INT_REGEX.matcher(trimmedValue).matches()) {
            return getIntegerType(trimmedValue);
        }
        if (DECIMAL_REGEX.matcher(trimmedValue).matches()) {
            return getDecimalType(trimmedValue);
        }
        if (COMPLEX_REGEX.matcher(trimmedValue).matches()) {
            return ComplexValue.class;
        }

        LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'.");

        return null;
    }

    /**
     * Returns the guessed decimal type of a string representation of a decimal value.
     *
     * @param  value the string representation of a decimal value.
     *
     * @return       the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to
     *                   represent the value with the precision provided.
     *
     * @see          #getInferredValueType()
     * @see          #getIntegerType(String)
     */
    private static Class getDecimalType(String value) {
        value = value.toUpperCase(Locale.US);
        boolean hasD = (value.indexOf('D') >= 0);

        if (hasD) {
            // Convert the Double Scientific Notation specified by FITS to pure IEEE.
            value = value.replace('D', 'E');
        }

        BigDecimal big = new BigDecimal(value);

        // Check for zero, and deal with it separately...
        if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) {
            int decimals = big.scale();
            if (decimals <= FlexFormat.FLOAT_DECIMALS) {
                return hasD ? Double.class : Float.class;
            }
            if (decimals <= FlexFormat.DOUBLE_DECIMALS) {
                return Double.class;
            }
            return BigDecimal.class;
        }

        // Now non-zero values...
        int decimals = big.precision() - 1;
        float f = big.floatValue();
        if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) {
            return hasD ? Double.class : Float.class;
        }

        double d = big.doubleValue();
        if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) {
            return Double.class;
        }
        return BigDecimal.class;
    }

    /**
     * Returns the guessed integer type of a string representation of a integer value.
     *
     * @param  value the string representation of an integer value.
     *
     * @return       the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to
     *                   represent the value with the number of digits provided.
     *
     * @see          #getInferredValueType()
     * @see          #getDecimalType(String)
     */
    private static Class getIntegerType(String value) {
        int bits = new BigInteger(value).bitLength();
        if (bits < Integer.SIZE) {
            return Integer.class;
        }
        if (bits < Long.SIZE) {
            return Long.class;
        }
        return BigInteger.class;
    }

    private static String sanitize(String text) {
        return HeaderCard.sanitize(text);
    }

    static Logger getLogger() {
        return LOG;
    }

}