com.google.common.flogger.parser.PrintfMessageParser Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of runtime-impl-jetty12 Show documentation
There is a newer version: 2.0.31
/*
 * Copyright (C) 2015 The Flogger Authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.common.flogger.parser;

/**
 * A specialized {@link MessageParser} for processing log messages in printf style, as used by
 * {@link String#format}. This is an abstract parser which knows how to
 * process and extract placeholder terms at a high level, but does not impose its own semantics
 * for place-holder types.
 * 
 * Typically you should not subclass this class, but instead subclass
 * {@link DefaultPrintfMessageParser}, which provides compatibility with {@link String#format}.
 */
public abstract class PrintfMessageParser extends MessageParser {
  // Assume that anything outside this set of chars is suspicious and not safe.
  private static final String ALLOWED_NEWLINE_PATTERN = "\\n|\\r(?:\\n)?";
  /** The system newline separator for replacing %n. */
  private static final String SYSTEM_NEWLINE = getSafeSystemNewline();
  /**
   * Returns the system newline separator avoiding any issues with security exceptions or
   * "suspicious" values. The only allowed return values are "\n" (default), "\r" or "\r\n".
   */
  static String getSafeSystemNewline() {
    try {
      String unsafeNewline = System.getProperty("line.separator");
      if (unsafeNewline.matches(ALLOWED_NEWLINE_PATTERN)) {
        return unsafeNewline;
      }
    } catch (SecurityException e) {
      // Fall through to default value;
    }
    return "\n";
  }

  /**
   * Parses a single printf-like term from a log message into a message template builder.
   * 

   * A simple example of an implicit parameter (the argument index is not specified):
   * 
   * message: "Hello %s World"
   * termStart: 6 ───┚╿╿
   * specStart: 7 ────┤│
   * formatStart: 7 ──╯│
   * return: 8 ────────╯
   * 
   * If this case there is no format specification, so {@code specStart == formatStart}.
   * 
   * A complex example with an explicit index:
   * 
   * message: "Hello %2$10d World"
   * termStart: 6 ───┚  ╿ ╿╿
   * specStart: 9 ──────╯ ││
   * formatStart: 11 ─────╯│
   * return: 12 ───────────╯
   * 
   * Note that in this example the given index will be 1 (rather than 2) because printf specifies
   * indices using a 1-based scheme, but internally they are 0-based.
   *
   * @param builder the message template builder.
   * @param index the zero-based argument index for the parameter.
   * @param message the complete log message string.
   * @param termStart the index of the initial '%' character that starts the term.
   * @param specStart the index of the first format specification character (after any optional
   *        index specification).
   * @param formatStart the index of the (first) format character in the term.
   * @return the index after the last character of the term.
   */
  abstract int parsePrintfTerm(
      MessageBuilder builder,
      int index,
      String message,
      int termStart,
      int specStart,
      int formatStart)
      throws ParseException;

  @Override
  public final void unescape(StringBuilder out, String message, int start, int end) {
    unescapePrintf(out, message, start, end);
  }

  @Override
  protected final  void parseImpl(MessageBuilder builder) throws ParseException {
    String message = builder.getMessage();
    // The last index we used (needed for $< syntax, initially invalid).
    int lastResolvedIndex = -1;
    // The next index to use for an implicit parameter (can become -1 if a parameter consumes all
    // the remaining arguments).
    int implicitIndex = 0;
    // Find the start of each term in sequence.
    for (int pos = nextPrintfTerm(message, 0); pos >= 0; pos = nextPrintfTerm(message, pos)) {
      // Capture the term start and move on (the character here is always '%').
      int termStart = pos++;
      // At this stage we don't know if any numeric value we parse is going to be part of a
      // parameter index ($nnn$x) or the format specification (%nnnx) but we assume the latter.
      int optionsStart = pos;

      // STEP 1: Parse any numeric value at the start of the term.
      char c;
      int index = 0;
      while (true) {
        if (pos < message.length()) {
          c = message.charAt(pos++);
          // Casting to char makes the result unsigned (so we don't need to test "digit < 0" later).
          int digit = (char) (c - '0');
          if (digit < 10) {
            index = (10 * index) + digit;
            if (index < MAX_ARG_COUNT) {
              continue;
            }
            throw ParseException.withBounds("index too large", message, termStart, pos);
          }
          // We found something other than [0-9] so we've finished parsing our value.
          break;
        }
        throw ParseException.withStartPosition("unterminated parameter", message, termStart);
      }

      // STEP 2: Process the value and determine the parameter's real index.
      if (c == '$') {
        // If was an index, but we could have got here without parsing any digits (ie, "%$")
        int indexLen = (pos - 1) - optionsStart;
        if (indexLen == 0) {
          throw ParseException.withBounds("missing index", message, termStart, pos);
        }
        // We also prohibit leading zeros in any index value (printf indices are 1-based, so the
        // first digit should never be zero).
        if (message.charAt(optionsStart) == '0') {
          throw ParseException.withBounds("index has leading zero", message, termStart, pos);
        }
        // Now correct the index to be 0-based.
        index -= 1;
        // Having got the parameter index, reset the specification start to just after the '$'.
        optionsStart = pos;
        // Read the next character from the message (needed for the next part of the parsing).
        if (pos == message.length()) {
          throw ParseException.withStartPosition("unterminated parameter", message, termStart);
        }
        c = message.charAt(pos++);
      } else if (c == '<') {
        // This is the rare 'relative' indexing mode where you just re-use the last parameter index.
        if (lastResolvedIndex == -1) {
          throw ParseException.withBounds("invalid relative parameter", message, termStart, pos);
        }
        index = lastResolvedIndex;
        // Having got the parameter index, reset the specification start to just after the '<'.
        optionsStart = pos;
        // Read the next character from the message (needed for the next part of the parsing).
        if (pos == message.length()) {
          throw ParseException.withStartPosition("unterminated parameter", message, termStart);
        }
        c = message.charAt(pos++);
      } else {
        // The parsed value was not an index, so we use the current implicit index. We do not need
        // to update the format start in this case and the current character is already correct for
        // the next part of the parsing.
        index = implicitIndex++;
      }

      // STEP 3: Find the index of the type character that terminates the format specification.
      // Remember to decrement pos to account for the fact we were one ahead of the current char.
      pos = findFormatChar(message, termStart, pos - 1);

      // STEP 4: Invoke the term parsing method and reset loop state.
      // Add a parameter to the builder and find where the term ends.
      pos = parsePrintfTerm(builder, index, message, termStart, optionsStart, pos);
      // Before going round again, record the index we just used and update the implicit index.
      lastResolvedIndex = index;
    }
  }

  /**
   * Returns the index of the first unescaped '%' character in message starting at pos (or -1 if not
   * found).
   */
  // VisibleForTesting
  static int nextPrintfTerm(String message, int pos) throws ParseException {
    while (pos < message.length()) {
      if (message.charAt(pos++) != '%') {
        continue;
      }
      if (pos < message.length()) {
        char c = message.charAt(pos);
        if (c == '%' || c == 'n') {
          // We encountered '%%' or '%n', so keep going (these will be unescaped later).
          pos += 1;
          continue;
        }
        // We were pointing at the character after the '%', so adjust back by one.
        return pos - 1;
      }
      // We ran off the end while looking for the character after the first '%'.
      throw ParseException.withStartPosition("trailing unquoted '%' character", message, pos - 1);
    }
    // We never found another unescaped '%'.
    return -1;
  }

  private static int findFormatChar(String message, int termStart, int pos) throws ParseException {
    for (; pos < message.length(); pos++) {
      char c = message.charAt(pos);
      // Get the relative offset of the ASCII letter (in the range 0-25) ignoring whether it's
      // upper or lower case. Using this unsigned value avoids multiple range checks in a tight
      // loop.
      int alpha = (char) ((c & ~0x20) - 'A');
      if (alpha < 26) {
        return pos;
      }
    }
    throw ParseException.withStartPosition("unterminated parameter", message, termStart);
  }

  /**
   * Unescapes the characters in the sub-string {@code s.substring(start, end)} according to
   * printf style formatting rules.
   */
  // VisibleForTesting
  static void unescapePrintf(StringBuilder out, String message, int start, int end) {
    int pos = start;
    while (pos < end) {
      if (message.charAt(pos++) != '%') {
        continue;
      }
      if (pos == end) {
        // Ignore unexpected trailing '%'.
        break;
      }
      char chr = message.charAt(pos);
      if (chr == '%') {
        // Append the section up to and including the first '%'.
        out.append(message, start, pos);
      } else if (chr == 'n') {
        // %n encountered, rewind one position to not emit leading '%' and emit newline.
        out.append(message, start, pos - 1);
        out.append(SYSTEM_NEWLINE);
      } else {
        // A single unescaped '%' is ignored and left in the output as-is.
        continue;
      }
      // Increment the position and reset the start point after the last processed character.
      start = ++pos;
    }
    // Append the last section (if it's non empty).
    if (start < end) {
      out.append(message, start, end);
    }
  }
}