All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flume.source.SyslogParser Maven / Gradle / Ivy

There is a newer version: 4.15.0-HBase-1.5
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
/**
 * To change this template, choose Tools | Templates
 * and open the template in the editor.
 */

package org.apache.flume.source;

import com.google.common.base.Charsets;
import com.google.common.base.Preconditions;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;
import com.google.common.cache.CacheLoader;
import com.google.common.collect.Maps;
import java.nio.charset.Charset;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.regex.Pattern;
import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.joda.time.DateTime;
import org.joda.time.format.DateTimeFormat;
import org.joda.time.format.DateTimeFormatter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class SyslogParser {

  private static final Logger logger =
      LoggerFactory.getLogger(SyslogParser.class);

  private static final int TS_CACHE_MAX = 1000;  // timestamp cache size limit
  private static final Pattern TWO_SPACES = Pattern.compile("  ");
  private static final DateTimeFormatter rfc3164Format =
      DateTimeFormat.forPattern("MMM d HH:mm:ss").withZoneUTC();

  private static final String timePat = "yyyy-MM-dd'T'HH:mm:ss";
  private static final int RFC3164_LEN = 15;
  private static final int RFC5424_PREFIX_LEN = 19;

  private final DateTimeFormatter timeParser;

  private Cache timestampCache;

  public SyslogParser() {
    timeParser = DateTimeFormat.forPattern(timePat).withZoneUTC();
    timestampCache = CacheBuilder.newBuilder().maximumSize(TS_CACHE_MAX).build(
        new CacheLoader() {

          @Override
          public Long load(String key) throws Exception {
            return timeParser.parseMillis(key);
          }
        });
  }

  /**
   * Parses a Flume Event out of a syslog message string.
   * @param msg Syslog message, not including the newline character
   * @return Parsed Flume Event
   * @throws IllegalArgumentException if unable to successfully parse message
   */
  public Event parseMessage(String msg, Charset charset) {
    Map headers = Maps.newHashMap();

    int msgLen = msg.length();

    int curPos = 0;

    Preconditions.checkArgument(msg.charAt(curPos) == '<',
        "Bad format: invalid priority: cannot find open bracket '<' (%s)", msg);

    int endBracketPos = msg.indexOf('>');
    Preconditions.checkArgument(endBracketPos > 0 && endBracketPos <= 6,
        "Bad format: invalid priority: cannot find end bracket '>' (%s)", msg);

    String priority = msg.substring(1, endBracketPos);
    int pri = Integer.parseInt(priority);
    int facility = pri / 8;
    int severity = pri % 8;

    // put fac / sev into header
    headers.put(SyslogUtils.SYSLOG_FACILITY, String.valueOf(facility));
    headers.put(SyslogUtils.SYSLOG_SEVERITY, String.valueOf(severity));

    Preconditions.checkArgument(msgLen > endBracketPos + 1,
        "Bad format: no data except priority (%s)", msg);

    // update parsing position
    curPos = endBracketPos + 1;

    // ignore version string
    if (msgLen > curPos + 2 && "1 ".equals(msg.substring(curPos, curPos + 2))) {
      curPos += 2;
    }

    // now parse timestamp (handle different varieties)

    long ts;
    char dateStartChar = msg.charAt(curPos);

    try {

      // no timestamp specified; use relay current time
      if (dateStartChar == '-') {
        ts = System.currentTimeMillis();
        if (msgLen <= curPos + 2) {
          throw new IllegalArgumentException(
              "bad syslog format (missing hostname)");
        }
        curPos += 2; // assume we skip past a space to get to the hostname

        // rfc3164 imestamp
      } else if (dateStartChar >= 'A' && dateStartChar <= 'Z') {
        if (msgLen <= curPos + RFC3164_LEN) {
          throw new IllegalArgumentException("bad timestamp format");
        }
        ts = parseRfc3164Time(
            msg.substring(curPos, curPos + RFC3164_LEN));
        curPos += RFC3164_LEN + 1;

        // rfc 5424 timestamp
      } else {
        int nextSpace = msg.indexOf(' ', curPos);
        if (nextSpace == -1) {
          throw new IllegalArgumentException("bad timestamp format");
        }
        ts = parseRfc5424Date(msg.substring(curPos, nextSpace));
        curPos = nextSpace + 1;
      }

    } catch (IllegalArgumentException ex) {
      throw new IllegalArgumentException("Unable to parse message: " + msg, ex);
    }

    headers.put("timestamp", String.valueOf(ts));

    // parse out hostname
    int nextSpace = msg.indexOf(' ', curPos);
    if (nextSpace == -1) {
      throw new IllegalArgumentException(
          "bad syslog format (missing hostname)");
    }
    // copy the host string to avoid holding the message string in memory
    // if using a memory-based queue
    String hostname = new String(msg.substring(curPos, nextSpace));
    headers.put("host", hostname);

    // EventBuilder will do a copy of its own, so no defensive copy of the body
    String data = "";
    if (msgLen > nextSpace + 1) {
      curPos = nextSpace + 1;
      data = msg.substring(curPos);
    }

    Event event = EventBuilder.withBody(data, charset, headers);

    return event;
  }

  /**
   * Parse date in RFC 5424 format. Uses an LRU cache to speed up parsing for
   * multiple messages that occur in the same second.
   * @param msg
   * @return Typical (for Java) milliseconds since UNIX epoch
   */
  protected long parseRfc5424Date(String msg) {

    Long ts = null;
    int curPos = 0;

    int msgLen = msg.length();
    Preconditions.checkArgument(msgLen > RFC5424_PREFIX_LEN,
        "Bad format: Not a valid RFC5424 timestamp: %s", msg);
    String timestampPrefix = msg.substring(curPos, RFC5424_PREFIX_LEN);

    try {
      ts = timestampCache.get(timestampPrefix);
    } catch (ExecutionException ex) {
      throw new IllegalArgumentException("bad timestamp format", ex);
    }

    curPos += RFC5424_PREFIX_LEN;

    Preconditions.checkArgument(ts != null, "Parsing error: timestamp is null");

    // look for the optional fractional seconds
    if (msg.charAt(curPos) == '.') {
      // figure out how many numeric digits
      boolean foundEnd = false;
      int endMillisPos = curPos + 1;

      if (msgLen <= endMillisPos) {
        throw new IllegalArgumentException("bad timestamp format (no TZ)");
      }

      // FIXME: TODO: ensure we handle all bad formatting cases
      while (!foundEnd) {
        char curDigit = msg.charAt(endMillisPos);
        if (curDigit >= '0' && curDigit <= '9') {
          endMillisPos++;
        } else {
          foundEnd = true;
        }
      }

      // if they had a valid fractional second, append it rounded to millis
      if (endMillisPos - (curPos + 1) > 0) {
        float frac = Float.parseFloat(msg.substring(curPos, endMillisPos));
        long milliseconds = (long) (frac * 1000f);
        ts += milliseconds;
      } else {
        throw new IllegalArgumentException(
            "Bad format: Invalid timestamp (fractional portion): " + msg);
      }

      curPos = endMillisPos;
    }

    // look for timezone
    char tzFirst = msg.charAt(curPos);

    // UTC
    if (tzFirst == 'Z') {
      // no-op
    } else if (tzFirst == '+' || tzFirst == '-') {

      Preconditions.checkArgument(msgLen > curPos + 5,
          "Bad format: Invalid timezone (%s)", msg);

      int polarity;
      if (tzFirst == '+') {
        polarity = +1;
      } else {
        polarity = -1;
      }

      char[] h = new char[5];
      for (int i = 0; i < 5; i++) {
        h[i] = msg.charAt(curPos + 1 + i);
      }

      if (h[0] >= '0' && h[0] <= '9'
          && h[1] >= '0' && h[1] <= '9'
          && h[2] == ':'
          && h[3] >= '0' && h[3] <= '9'
          && h[4] >= '0' && h[4] <= '9') {
        int hourOffset = Integer.parseInt(msg.substring(curPos + 1, curPos + 3));
        int minOffset = Integer.parseInt(msg.substring(curPos + 4, curPos + 6));
        ts -= polarity * ((hourOffset * 60) + minOffset) * 60000;
      } else {
        throw new IllegalArgumentException(
            "Bad format: Invalid timezone: " + msg);
      }

    }


    return ts;
  }

  /**
   * Parse the RFC3164 date format. This is trickier than it sounds because this
   * format does not specify a year so we get weird edge cases at year
   * boundaries. This implementation tries to "do what I mean".
   * @param ts RFC3164-compatible timestamp to be parsed
   * @return Typical (for Java) milliseconds since the UNIX epoch
   */
  protected long parseRfc3164Time(String ts) {
    DateTime now = DateTime.now();
    int year = now.getYear();

    ts = TWO_SPACES.matcher(ts).replaceFirst(" ");

    DateTime date;
    try {
      date = rfc3164Format.parseDateTime(ts);
    } catch (IllegalArgumentException e) {
      logger.debug("rfc3164 date parse failed on ("+ts+"): invalid format", e);
      return 0;
    }

    // try to deal with boundary cases, i.e. new year's eve.
    // rfc3164 dates are really dumb.
    // NB: cannot handle replaying of old logs or going back to the future
    if (date != null) {
      DateTime fixed = date.withYear(year);

      // flume clock is ahead or there is some latency, and the year rolled
      if (fixed.isAfter(now) && fixed.minusMonths(1).isAfter(now)) {
        fixed = date.withYear(year - 1);
      // flume clock is behind and the year rolled
      } else if (fixed.isBefore(now) && fixed.plusMonths(1).isBefore(now)) {
        fixed = date.withYear(year + 1);
      }
      date = fixed;
    }

    if (date == null) {
      return 0;
    }

    return date.getMillis();
  }


}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy