All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flume.source.SyslogUtils Maven / Gradle / Ivy

There is a newer version: 4.15.0-HBase-1.5
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.flume.source;

import org.apache.flume.Event;
import org.apache.flume.event.EventBuilder;
import org.jboss.netty.buffer.ChannelBuffer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.MatchResult;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class SyslogUtils {
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_2 = "yyyy-MM-dd'T'HH:mm:ss.SZ";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_1 = "yyyy-MM-dd'T'HH:mm:ss.S";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_3 = "yyyy-MM-dd'T'HH:mm:ssZ";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC5424_4 = "yyyy-MM-dd'T'HH:mm:ss";
  final public static String SYSLOG_TIMESTAMP_FORMAT_RFC3164_1 = "yyyyMMM d HH:mm:ss";

  final public static String SYSLOG_MSG_RFC5424_0 =
      "(?:\\d\\s)?" +// version
  // yyyy-MM-dd'T'HH:mm:ss.SZ or yyyy-MM-dd'T'HH:mm:ss.S+hh:mm or - (null stamp)
      "(?:(\\d{4}[-]\\d{2}[-]\\d{2}[T]\\d{2}[:]\\d{2}[:]\\d{2}(?:\\.\\d{1,6})?(?:[+-]\\d{2}[:]\\d{2}|Z)?)|-)" + // stamp
      "\\s" + // separator
      "(?:([\\w][\\w\\d\\.@-]*)|-)" + // host name or - (null)
      "\\s" + // separator
      "(.*)$"; // body

  final public static String SYSLOG_MSG_RFC3164_0 =
      // stamp MMM d HH:mm:ss, single digit date has two spaces
      "([A-Z][a-z][a-z]\\s{1,2}\\d{1,2}\\s\\d{2}[:]\\d{2}[:]\\d{2})" +
      "\\s" + // separator
      "([\\w][\\w\\d\\.@-]*)" + // host
      "\\s(.*)$";  // body

  final public static int SYSLOG_TIMESTAMP_POS = 1;
  final public static int SYSLOG_HOSTNAME_POS = 2;
  final public static int SYSLOG_BODY_POS = 3;

  private Mode m = Mode.START;
  private StringBuilder prio = new StringBuilder();
  private ByteArrayOutputStream baos;
  private static final Logger logger = LoggerFactory
      .getLogger(SyslogUtils.class);

  final public static String SYSLOG_FACILITY = "Facility";
  final public static String SYSLOG_SEVERITY = "Severity";
  final public static String EVENT_STATUS = "flume.syslog.status";
  final public static Integer MIN_SIZE = 10;
  final public static Integer DEFAULT_SIZE = 2500;
  private final boolean isUdp;
  private boolean isBadEvent;
  private boolean isIncompleteEvent;
  private Integer maxSize;

  private class SyslogFormatter {
    public Pattern regexPattern;
    public ArrayList searchPattern = new ArrayList();
    public ArrayList replacePattern = new ArrayList();
    public ArrayList dateFormat = new ArrayList();
    public boolean addYear;
  }
  private ArrayList formats = new ArrayList();

  private String timeStamp = null;
  private String hostName = null;
  private String msgBody = null;

  public SyslogUtils() {
    this(false);
  }

  public SyslogUtils(boolean isUdp) {
    this(DEFAULT_SIZE, isUdp);
  }

  public SyslogUtils(Integer eventSize, boolean isUdp){
    this.isUdp = isUdp;
    isBadEvent = false;
    isIncompleteEvent = false;
    maxSize = (eventSize < MIN_SIZE) ? MIN_SIZE : eventSize;
    baos = new ByteArrayOutputStream(eventSize);
    initHeaderFormats();
  }

  // extend the default header formatter
  public void addFormats(Map formatProp) {
    if (formatProp.isEmpty() || !formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_REGEX)) {
      return;
    }
    SyslogFormatter fmt1 = new SyslogFormatter();
    fmt1.regexPattern = Pattern.compile( formatProp.get(
        SyslogSourceConfigurationConstants.CONFIG_REGEX) );
    if (formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_SEARCH)) {
      fmt1.searchPattern.add(formatProp.get(
          SyslogSourceConfigurationConstants.CONFIG_SEARCH));
    }
    if (formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_REPLACE)) {
      fmt1.replacePattern.add(formatProp.get(
          SyslogSourceConfigurationConstants.CONFIG_REPLACE));
    }
    if (formatProp.containsKey(
        SyslogSourceConfigurationConstants.CONFIG_DATEFORMAT)) {
        fmt1.dateFormat.add(new SimpleDateFormat(formatProp.get(
            SyslogSourceConfigurationConstants.CONFIG_DATEFORMAT)));
    }
    formats.add(0, fmt1);
  }

  // setup built-in formats
  private void initHeaderFormats() {
    // setup RFC5424 formater
    SyslogFormatter fmt1 = new SyslogFormatter();
    fmt1.regexPattern = Pattern.compile(SYSLOG_MSG_RFC5424_0);
    // 'Z' in timestamp indicates UTC zone, so replace it it with '+0000' for date formatting
    fmt1.searchPattern.add("Z");
    fmt1.replacePattern.add("+0000");
    // timezone in RFC5424 is [+-]tt:tt, so remove the ':' for java date formatting
    fmt1.searchPattern.add("([+-])(\\d{2})[:](\\d{2})");
    fmt1.replacePattern.add("$1$2$3");
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_1));
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_2));
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_3));
    fmt1.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC5424_4));
    fmt1.addYear = false;

    // setup RFC3164 formater
    SyslogFormatter fmt2 = new SyslogFormatter();
    fmt2.regexPattern = Pattern.compile(SYSLOG_MSG_RFC3164_0);
    // the single digit date has two spaces, so trim it
    fmt2.searchPattern.add("  ");
    fmt2.replacePattern.add(" ");
    fmt2.dateFormat.add(new SimpleDateFormat(SYSLOG_TIMESTAMP_FORMAT_RFC3164_1));
    fmt2.addYear = true;

    formats.add(fmt1);
    formats.add(fmt2);
  }

  enum Mode {
    START, PRIO, DATA
  };

  public enum SyslogStatus{
    OTHER("Unknown"),
    INVALID("Invalid"),
    INCOMPLETE("Incomplete");

    private final String syslogStatus;

    private SyslogStatus(String status){
      syslogStatus = status;
    }

    public String getSyslogStatus(){
      return this.syslogStatus;
    }
  }

  // create the event from syslog data
  Event buildEvent() {
    byte[] body;
    int pri = 0;
    int sev = 0;
    int facility = 0;

    if(!isBadEvent){
      pri = Integer.parseInt(prio.toString());
      sev = pri % 8;
      facility = pri / 8;
      formatHeaders();
    }

    Map  headers = new HashMap();
    headers.put(SYSLOG_FACILITY, String.valueOf(facility));
    headers.put(SYSLOG_SEVERITY, String.valueOf(sev));
    if ((timeStamp != null) && timeStamp.length() > 0) {
      headers.put("timestamp", timeStamp);
    }
    if ((hostName != null) && (hostName.length() > 0)) {
      headers.put("host", hostName);
    }
    if(isBadEvent){
      logger.warn("Event created from Invalid Syslog data.");
      headers.put(EVENT_STATUS, SyslogStatus.INVALID.getSyslogStatus());
    } else if(isIncompleteEvent){
      logger.warn("Event size larger than specified event size: {}. You should " +
          "consider increasing your event size.", maxSize);
      headers.put(EVENT_STATUS, SyslogStatus.INCOMPLETE.getSyslogStatus());
    }

    if ((msgBody != null) && (msgBody.length() > 0)) {
      body = msgBody.getBytes();
    } else {
      body = baos.toByteArray();
    }
    reset();
    // format the message
    return EventBuilder.withBody(body, headers);
  }

  // Apply each known pattern to message
  private void formatHeaders() {
    String eventStr = baos.toString();
    for(int p=0; p < formats.size(); p++) {
      SyslogFormatter fmt = formats.get(p);
      Pattern pattern = fmt.regexPattern;
      Matcher matcher = pattern.matcher(eventStr);
      if (! matcher.matches()) {
        continue;
      }
      MatchResult res = matcher.toMatchResult();
      for (int grp=1; grp <= res.groupCount(); grp++) {
        String value = res.group(grp);
        if (grp == SYSLOG_TIMESTAMP_POS) {
          // apply available format replacements to timestamp
          if (value != null) {
            for (int sp=0; sp < fmt.searchPattern.size(); sp++) {
              value = value.replaceAll(fmt.searchPattern.get(sp), fmt.replacePattern.get(sp));
            }
            // Add year to timestamp if needed
            if (fmt.addYear) {
              value = String.valueOf(Calendar.getInstance().get(Calendar.YEAR)) + value;
            }
            // try the available time formats to timestamp
            for (int dt = 0; dt < fmt.dateFormat.size(); dt++) {
              try {
                timeStamp = String.valueOf(fmt.dateFormat.get(dt).parse(value).getTime());
                break; // done. formatted the time
              } catch (ParseException e) {
                // Error formatting the timeStamp, try next format
                continue;
              }
            }
          }
        } else if (grp == SYSLOG_HOSTNAME_POS) {
          hostName = value;
        } else if (grp == SYSLOG_BODY_POS) {
          msgBody = value;
        }
      }
      break; // we successfully parsed the message using this pattern
    }
  }

  private void reset(){
    baos.reset();
    m = Mode.START;
    prio.delete(0, prio.length());
    isBadEvent = false;
    isIncompleteEvent = false;
    hostName = null;
    timeStamp = null;
    msgBody = null;
  }

  // extract relevant syslog data needed for building Flume event
  public Event extractEvent(ChannelBuffer in){

    /* for protocol debugging
    ByteBuffer bb = in.toByteBuffer();
    int remaining = bb.remaining();
    byte[] buf = new byte[remaining];
    bb.get(buf);
    HexDump.dump(buf, 0, System.out, 0);
    */

    byte b = 0;
    Event e = null;
    boolean doneReading = false;

    try {
      while (!doneReading && in.readable()) {
        b = in.readByte();
        switch (m) {
        case START:
          if (b == '<') {
            m = Mode.PRIO;
          } else if(b == '\n'){
          //If the character is \n, it was because the last event was exactly
          //as long  as the maximum size allowed and
          //the only remaining character was the delimiter - '\n', or
          //multiple delimiters were sent in a row.
          //Just ignore it, and move forward, don't change the mode.
          //This is a no-op, just ignore it.
            logger.debug("Delimiter found while in START mode, ignoring..");

          } else {
            isBadEvent = true;
            baos.write(b);
            //Bad event, just dump everything as if it is data.
            m = Mode.DATA;
          }
          break;
        case PRIO:
          if (b == '>') {
            m = Mode.DATA;
          } else {
            char ch = (char) b;
            prio.append(ch);
            if (!Character.isDigit(ch)) {
              isBadEvent = true;
              //Append the priority to baos:
              String badPrio = "<"+ prio;
              baos.write(badPrio.getBytes());
              //If we hit a bad priority, just write as if everything is data.
              m = Mode.DATA;
            }
          }
          break;
        case DATA:
          // TCP syslog entries are separated by '\n'
          if (b == '\n') {
            e = buildEvent();
            doneReading = true;
          } else {
            baos.write(b);
          }
          if(baos.size() == this.maxSize && !doneReading){
            isIncompleteEvent = true;
            e = buildEvent();
            doneReading = true;
          }
          break;
        }

      }

      // UDP doesn't send a newline, so just use what we received
      if (e == null && isUdp) {
        doneReading = true;
        e = buildEvent();
      }
    //} catch (IndexOutOfBoundsException eF) {
    //    e = buildEvent(prio, baos);
    } catch (IOException e1) {
      //no op
    } finally {
      // no-op
    }

    return e;
  }

  public Integer getEventSize() {
    return maxSize;
  }

  public void setEventSize(Integer eventSize) {
    this.maxSize = eventSize;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy