All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.twitter.elephantbird.util.W3CLogParser Maven / Gradle / Ivy

There is a newer version: 4.17
Show newest version
package com.twitter.elephantbird.util;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.util.Arrays;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import com.google.common.collect.Maps;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/*
 * A parser for W3C-style log lines.  See LzoW3CLogInputFormat for more details about
 * the format itself.  Create the parser with an InputStream open to a list of
 * hash/field-list lines that give all the field-sets ever used for logging.  That is,
 * if you logged a set of fields for two days, then added a parameter and logged that set of
 * fields for a day, then added another parameter and removed a different one and logged
 * that set of fields for a while, the field definition file should have three lines
 * with the hashes and corresponding set of fields for each.
 */
public class W3CLogParser {
  protected static final Logger LOG = LoggerFactory.getLogger(W3CLogParser.class);

  private Map> fieldDef_ = Maps.newHashMap();
  private final static String DELIMITER = "\\s+";

  // Initialize a W3CLogParser From a InputStream
  public W3CLogParser(InputStream is) throws IOException {
    BufferedReader buf = new BufferedReader(new InputStreamReader(is));
    String line;
    while ((line = buf.readLine()) != null) {
      List fields = Arrays.asList(line.split(DELIMITER));
      fieldDef_.put(fields.get(0), fields.subList(1, fields.size()));
    }
  }

  // Parse a line using the field definition, and put results into a Map of
  // 
  public Map parse(String line) throws IOException {
    Map map = new HashMap();

    // Get the version CRC and find the field definition
    List fields = Arrays.asList(line.split(DELIMITER));
    List fieldNames = fieldDef_.get(fields.get(0));
    if (fieldNames == null) {
      throw new IOException("cannot find matching field definition for CRC "
          + fields.get(0));
    }
    if (fieldNames.size() != fields.size()) {
      throw new IOException("W3C field definition and input line for CRC "
          + fields.get(0) + " does not match:\n" + line);
    }

    // Map values to field names
    for (int fieldNum = 1; fieldNum < fieldNames.size(); fieldNum++) {
      map.put(fieldNames.get(fieldNum), fields.get(fieldNum));
    }

    return map;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy