All Downloads are FREE. Search and download functionalities are using the official Maven repository.

nom.tam.fits.HeaderCardParser Maven / Gradle / Ivy

/*
 * #%L
 * nom.tam FITS library
 * %%
 * Copyright (C) 2004 - 2024 nom-tam-fits
 * %%
 * This is free and unencumbered software released into the public domain.
 *
 * Anyone is free to copy, modify, publish, use, compile, sell, or
 * distribute this software, either in source code form or as a compiled
 * binary, for any purpose, commercial or non-commercial, and by any
 * means.
 *
 * In jurisdictions that recognize copyright laws, the author or authors
 * of this software dedicate any and all copyright interest in the
 * software to the public domain. We make this dedication for the benefit
 * of the public at large and to the detriment of our heirs and
 * successors. We intend this dedication to be an overt act of
 * relinquishment in perpetuity of all present and future rights to this
 * software under copyright law.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
 * IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 * #L%
 */

package nom.tam.fits;

import java.math.BigDecimal;
import java.math.BigInteger;
import java.util.Locale;
import java.util.StringTokenizer;
import java.util.logging.Level;
import java.util.logging.Logger;
import java.util.regex.Pattern;

import nom.tam.util.ComplexValue;
import nom.tam.util.FlexFormat;

import static nom.tam.fits.header.NonStandard.HIERARCH;
import static nom.tam.fits.header.Standard.CONTINUE;

/**
 * 

* Converts a single 80-character wide FITS header record into a header card. See {@link HeaderCard#create(String)} for * a description of the rules that guide parsing. *

*

* When parsing header records that violate FITS standards, the violations can be logged or will throw appropriate * excpetions (depending on the severity of the standard violation and whether * {@link FitsFactory#setAllowHeaderRepairs(boolean)} is enabled or not. The logging of violations is disabled by * default, but may be controlled via {@link Header#setParserWarningsEnabled(boolean)}. *

* * @author Attila Kovacs * * @see FitsFactory#setAllowHeaderRepairs(boolean) * @see Header#setParserWarningsEnabled(boolean) */ class HeaderCardParser { private static final Logger LOG = Logger.getLogger(HeaderCardParser.class.getName()); static { // Do not log warnings by default. LOG.setLevel(Level.SEVERE); } /** regexp for IEEE floats */ private static final Pattern DECIMAL_REGEX = Pattern.compile("[+-]?\\d+(\\.\\d*)?([dDeE][+-]?\\d+)?"); /** regexp for complex numbers */ private static final Pattern COMPLEX_REGEX = Pattern .compile("\\(\\s*" + DECIMAL_REGEX + "\\s*,\\s*" + DECIMAL_REGEX + "\\s*\\)"); /** regexp for decimal integers. */ private static final Pattern INT_REGEX = Pattern.compile("[+-]?\\d+"); /** The header line (usually 80-character width), which to parse. */ private String line; /** * the value of the card. (trimmed and standardized with . in HIERARCH) */ private String key = null; /** * the value of the card. (trimmed) */ private String value = null; /** * the comment specified with the value. */ private String comment = null; /** * was the value quoted? */ private Class type = null; /** * The position in the string that right after the last character processed by this parser */ private int parsePos = 0; /** * Instantiates a new parser for a FITS header line. * * @param line a line in the FITS header, normally exactly 80-characters wide (but need not * be). * * @see #getKey() * @see #getValue() * @see #getComment() * @see #isString() * * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. * @throws IllegalArgumentException if the record contained neither a key or a value. * * @see FitsFactory#setAllowHeaderRepairs(boolean) */ HeaderCardParser(String line) throws UnclosedQuoteException, IllegalArgumentException { this.line = line; // TODO HeaderCard never calls this with a null argument, so the check below is dead code here... // if (line == null) { // throw new IllegalArgumentException("Cannot parse null string"); // } parseKey(); parseValue(); parseComment(); } /** * Returns the keyword component of the parsed header line. If the processing of HIERARCH keywords is enabled, it * may be a `HIERARCH` style long key with the components separated by dots (e.g. * `HIERARCH.ORG.SYSTEM.SUBSYS.ELEMENT`). Otherwise, it will be a standard 0--8 character standard uppercase FITS * keyword (including simply `HIERARCH` if {@link FitsFactory#setUseHierarch(boolean)} was set false). * * @return the FITS header keyword for the line. * * @see FitsFactory#setUseHierarch(boolean) */ String getKey() { return key; } /** * Returns the value component of the parsed header line. * * @return the value part of the line or null if the line contained no value. * * @see FitsFactory#setUseHierarch(boolean) */ String getValue() { return value; } /** * Returns the comment component of the parsed header line, with all leading and trailing spaces preserved. * * @return the comment part of the line or null if the line contained no comment. * * @see #getTrimmedComment() */ String getUntrimmedComment() { return comment; } /** * Returns the comment component of the parsed header line, with both leading and trailing spaces removed * * @return the comment part of the line or null if the line contained no comment. * * @see #getUntrimmedComment() */ String getTrimmedComment() { return comment == null ? null : comment.trim(); } /** * Returns whether the line contained a quoted string value. By default, strings with missing end quotes are no * considered string values, but rather as comments. To allow processing lines with missing quotes as string values, * you must set {@link FitsFactory#setAllowHeaderRepairs(boolean)} to true prior to parsing a header * line with the missing end quote. * * @return true if the value was quoted. * * @see FitsFactory#setAllowHeaderRepairs(boolean) */ boolean isString() { if (type == null) { return false; } return String.class.isAssignableFrom(type); } /** *

* Returns the inferred Java class for the value stored in the header record, such as a {@link String} class, a * {@link Boolean} class, an integer type ({@link Integer}, {@link Long}, or {@link BigInteger}) class, a decimal * type ({@link Float}, {@link Double}, or {@link BigDecimal}) class, a {@link ComplexValue} class, or * null. For number types, it returns the 'smallest' type that can be used to represent the string * value. *

*

* Its an inferred type as the true underlying type that was used to create the value is lost. For example, the * value 42 may have been written from any integer type, including byte or * short, but this routine will guess it to be an int ({@link Integer} type. As such, it * may not be equal to {@link HeaderCard#valueType()} from which the record was created, and hence should not be * used for round-trip testing of type equality. *

* * @return the inferred type of the stored serialized (string) value, or null if the value does not * seem to match any of the supported value types. * * @see HeaderCard#valueType() */ Class getInferredType() { return type; } /** * Parses a fits keyword from a card and standardizes it (trim, uppercase, and hierarch with dots). */ private void parseKey() { /* * AK: The parsing of headers should never be stricter that the writing, such that any header written by this * library can be parsed back without errors. (And, if anything, the parsing should be more permissive to allow * reading FITS produced by other libraries, which may be less stringent in their rules). The original * implementation strongly enforced the ESO HIERARCH convention when reading, but not at all for writing. Here * is a tolerant hierarch parser that will read back any hierarch key that was written by this library. The * input FITS can use any space or even '.' to separate the hierarchies, and the hierarchical elements may * contain any ASCII characters other than those used for separating. It is more in line with what we do with * standard keys too. */ // Find the '=' in the line, if any... int iEq = line.indexOf('='); // The stem is in the first 8 characters or what precedes an '=' character // before that. int endStem = (iEq >= 0 && iEq <= HeaderCard.MAX_KEYWORD_LENGTH) ? iEq : HeaderCard.MAX_KEYWORD_LENGTH; endStem = Math.min(line.length(), endStem); String rawStem = line.substring(0, endStem).trim(); // Check for space at the start of the keyword... if (endStem > 0 && !rawStem.isEmpty()) { if (Character.isSpaceChar(line.charAt(0))) { LOG.warning("[" + sanitize(rawStem) + "] Non-standard starting with a space (trimming)."); } } String stem = rawStem.toUpperCase(); if (!stem.equals(rawStem)) { LOG.warning("[" + sanitize(rawStem) + "] Non-standard lower-case letter(s) in base keyword."); } key = stem; parsePos = endStem; // If not using HIERARCH, then be very resilient, and return whatever key the first 8 chars make... // If the line does not have an '=', can only be a simple key // If it's not a HIERARCH keyword, then return the simple key. if (!FitsFactory.getUseHierarch() || (iEq < 0) || !stem.equals(HIERARCH.key())) { return; } // Compose the hierarchical key... StringTokenizer tokens = new StringTokenizer(line.substring(stem.length(), iEq), " \t\r\n."); StringBuilder builder = new StringBuilder(stem); while (tokens.hasMoreTokens()) { String token = tokens.nextToken(); parsePos = line.indexOf(token, parsePos) + token.length(); // Add a . to separate hierarchies builder.append('.'); builder.append(token); } key = builder.toString(); if (HIERARCH.key().equals(key)) { // The key is only HIERARCH, without a hierarchical keyword after it... LOG.warning("HIERARCH base keyword without HIERARCH-style long key after it."); return; } if (!FitsFactory.getHierarchFormater().isCaseSensitive()) { key = key.toUpperCase(Locale.US); } try { HeaderCard.validateKey(key); } catch (IllegalArgumentException e) { LOG.warning(e.getMessage()); } } /** * Advances the parse position to skip any spaces at the current parse position, and returns whether there is * anything left in the line after the spaces... * * @return true if there is more non-space characters in the string, otherwise false */ private boolean skipSpaces() { for (; parsePos < line.length(); parsePos++) { if (!Character.isSpaceChar(line.charAt(parsePos))) { // Line has non-space characters left to parse... return true; } } // nothing left to parse. return false; } /** * Parses the comment components starting from the current parse position. After this call the parse position is set * to the end of the string. The leading '/' (if found) is not included in the comment. */ private void parseComment() { if (!skipSpaces()) { // nothing left to parse. return; } // if no value, then everything is comment from here on... if (value != null) { if (line.charAt(parsePos) == '/') { // Skip the '/' itself, the comment is whatever is after it. parsePos++; } else { // Junk after a string value -- interpret it as the start of the comment... LOG.warning("[" + sanitize(getKey()) + "] Junk after value (included in the comment)."); } } comment = line.substring(parsePos); parsePos = line.length(); try { HeaderCard.validateChars(comment); } catch (IllegalArgumentException e) { LOG.warning("[" + sanitize(getKey()) + "]: " + e.getMessage()); } } /** * Parses the value component from the current parse position. The parse position is advanced to the first character * after the value specification in the line. If the header line does not contain a value component, then the value * field of this object is set to null. * * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. * * @see FitsFactory#setAllowHeaderRepairs(boolean) */ private void parseValue() throws UnclosedQuoteException { if (key.isEmpty() || !skipSpaces()) { // nothing left to parse. return; } if (CONTINUE.key().equals(key)) { parseValueBody(); } else if (line.charAt(parsePos) == '=') { if (parsePos < HeaderCard.MAX_KEYWORD_LENGTH) { LOG.warning("[" + sanitize(key) + "] assigmment before byte " + (HeaderCard.MAX_KEYWORD_LENGTH + 1) + " for key '" + sanitize(key) + "'."); } if (parsePos + 1 >= line.length()) { LOG.warning("[" + sanitize(key) + "] Record ends with '='."); } else if (line.charAt(parsePos + 1) != ' ') { LOG.warning("[" + sanitize(key) + "] Missing required standard space after '='."); } if (parsePos > HeaderCard.MAX_KEYWORD_LENGTH) { // equal sign = after the 9th char -- only supported with hierarch keys... if (!key.startsWith(HIERARCH.key() + ".")) { LOG.warning("[" + sanitize(key) + "] Possibly misplaced '=' (after byte 9)."); // It's not a HIERARCH key return; } } parsePos++; parseValueBody(); } try { HeaderCard.validateChars(value); } catch (IllegalArgumentException e) { LOG.warning("[" + sanitize(getKey()) + "] " + e.getMessage()); } } /** * Parses the value body from the current parse position. The parse position is advanced to the first character * after the value specification in the line. If the header line does not contain a value component, then the value * field of this object is set to null. * * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. * * @see FitsFactory#setAllowHeaderRepairs(boolean) */ private void parseValueBody() throws UnclosedQuoteException { if (!skipSpaces()) { // nothing left to parse. return; } if (isNextQuote()) { // Parse as a string value, or else throw an exception. parseStringValue(); } else { int end = line.indexOf('/', parsePos); if (end < 0) { end = line.length(); } value = line.substring(parsePos, end).trim(); parsePos = end; type = getInferredValueType(key, value); } } /** * Checks if the next character, at the current parse position, is a single quote. * * @return true if the next character on the line exists and is a single quote, otherwise * false. */ private boolean isNextQuote() { if (parsePos >= line.length()) { // nothing left to parse. return false; } return line.charAt(parsePos) == '\''; } /** * Returns the string fom a parsed string value component, with trailing spaces removed. It preserves leading * spaces. * * @param buf the parsed string value. * * @return the string value with trailing spaces removed. */ private static String getNoTrailingSpaceString(StringBuilder buf) { int to = buf.length(); // Remove trailing spaces only! while (--to >= 0) { if (!Character.isSpaceChar(buf.charAt(to))) { break; } } return to < 0 ? "" : buf.substring(0, to + 1); } /** * Parses a quoted string value starting at the current parse position. If successful, the parse position is updated * to after the string. Otherwise, the parse position is advanced only to skip leading spaces starting from the * input position. * * @throws UnclosedQuoteException if there is a missing end-quote and header repairs aren't allowed. * * @see FitsFactory#setAllowHeaderRepairs(boolean) */ private void parseStringValue() throws UnclosedQuoteException { type = String.class; StringBuilder buf = new StringBuilder(HeaderCard.MAX_VALUE_LENGTH); // Build the string value, up to the end quote and paying attention to double // quotes inside the string, which are translated to single quotes within // the string value itself. for (++parsePos; parsePos < line.length(); parsePos++) { if (isNextQuote()) { parsePos++; if (!isNextQuote()) { // Closing single quote; value = getNoTrailingSpaceString(buf); return; } } buf.append(line.charAt(parsePos)); } // String with missing end quote if (!FitsFactory.isAllowHeaderRepairs()) { throw new UnclosedQuoteException(line); } LOG.warning("[" + sanitize(key) + "] Ignored missing end quote (value parsed to end of record)."); value = getNoTrailingSpaceString(buf); } /** * Returns the inferred Java class for the specified value. See {@link #getInferredType()} for a more detailed * description. * * @param value the serialized (string) representation of a FITS header value. * * @return the inferred type of the specified serialized (string) value, or null if the value * does not seem to match any of the supported value types. null values default to * Boolean.class. */ private static Class getInferredValueType(String key, String value) { // TODO We never call this with null locally, so the following check is dead code here... // if (value == null) { // return Boolean.class; // } if (value.isEmpty()) { LOG.warning("[" + sanitize(key) + "] Null non-string value (defaulted to Boolean.class)."); return Boolean.class; } String trimmedValue = value.trim().toUpperCase(); if ("T".equals(trimmedValue) || "F".equals(trimmedValue)) { return Boolean.class; } if (INT_REGEX.matcher(trimmedValue).matches()) { return getIntegerType(trimmedValue); } if (DECIMAL_REGEX.matcher(trimmedValue).matches()) { return getDecimalType(trimmedValue); } if (COMPLEX_REGEX.matcher(trimmedValue).matches()) { return ComplexValue.class; } LOG.warning("[" + sanitize(key) + "] Unrecognised non-string value type '" + sanitize(trimmedValue) + "'."); return null; } /** * Returns the guessed decimal type of a string representation of a decimal value. * * @param value the string representation of a decimal value. * * @return the The Java class ({@link Float}, {@link Double}, or {@link BigDecimal}) that can be used to * represent the value with the precision provided. * * @see #getInferredValueType() * @see #getIntegerType(String) */ private static Class getDecimalType(String value) { value = value.toUpperCase(Locale.US); boolean hasD = (value.indexOf('D') >= 0); if (hasD) { // Convert the Double Scientific Notation specified by FITS to pure IEEE. value = value.replace('D', 'E'); } BigDecimal big = new BigDecimal(value); // Check for zero, and deal with it separately... if (big.stripTrailingZeros().equals(BigDecimal.ZERO)) { int decimals = big.scale(); if (decimals <= FlexFormat.FLOAT_DECIMALS) { return hasD ? Double.class : Float.class; } if (decimals <= FlexFormat.DOUBLE_DECIMALS) { return Double.class; } return BigDecimal.class; } // Now non-zero values... int decimals = big.precision() - 1; float f = big.floatValue(); if (decimals <= FlexFormat.FLOAT_DECIMALS && (f != 0.0F) && Float.isFinite(f)) { return hasD ? Double.class : Float.class; } double d = big.doubleValue(); if (decimals <= FlexFormat.DOUBLE_DECIMALS && (d != 0.0) && Double.isFinite(d)) { return Double.class; } return BigDecimal.class; } /** * Returns the guessed integer type of a string representation of a integer value. * * @param value the string representation of an integer value. * * @return the The Java class ({@link Integer}, {@link Long}, or {@link BigInteger}) that can be used to * represent the value with the number of digits provided. * * @see #getInferredValueType() * @see #getDecimalType(String) */ private static Class getIntegerType(String value) { int bits = new BigInteger(value).bitLength(); if (bits < Integer.SIZE) { return Integer.class; } if (bits < Long.SIZE) { return Long.class; } return BigInteger.class; } private static String sanitize(String text) { return HeaderCard.sanitize(text); } static Logger getLogger() { return LOG; } }