All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.tools.rumen.ParsedLine Maven / Gradle / Ivy

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.tools.rumen;

import java.util.Properties;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class ParsedLine {
  Properties content;
  LogRecordType type;

  static final String KEY = "(\\w+)";
  /**
   * The value string is enclosed in double quotation marks ('"') and
   * occurrences of '"' and '\' are escaped with a '\'. So the escaped value
   * string is essentially a string of escaped sequence ('\' followed by any
   * character) or any character other than '"' and '\'.
   * 
   * The straightforward REGEX to capture the above is "((?:[^\"\\\\]|\\\\.)*)".
   * Unfortunately Java's REGEX implementation is "broken" that it does not
   * perform the NFA-to-DFA conversion and such expressions would lead to
   * backtracking and stack overflow when matching with long strings. The
   * following is a manual "unfolding" of the REGEX to get rid of backtracking.
   */
  static final String VALUE = "([^\"\\\\]*+(?:\\\\.[^\"\\\\]*+)*+)";
  /**
   * REGEX to match the Key-Value pairs in an input line. Capture group 1
   * matches the key and capture group 2 matches the value (without quotation
   * marks).
   */
  static final Pattern keyValPair = Pattern.compile(KEY + "=" + "\"" + VALUE + "\"");

  @SuppressWarnings("unused")
  ParsedLine(String fullLine, int version) {
    super();

    content = new Properties();

    int firstSpace = fullLine.indexOf(" ");

    if (firstSpace < 0) {
      firstSpace = fullLine.length();
    }

    if (firstSpace == 0) {
      return; // This is a junk line of some sort
    }

    type = LogRecordType.intern(fullLine.substring(0, firstSpace));

    String propValPairs = fullLine.substring(firstSpace + 1);

    Matcher matcher = keyValPair.matcher(propValPairs);

    while(matcher.find()){
      String key = matcher.group(1);
      String value = matcher.group(2);
      content.setProperty(key, value);
    }
  }

  protected LogRecordType getType() {
    return type;
  }

  protected String get(String key) {
    return content.getProperty(key);
  }

  protected long getLong(String key) {
    String val = get(key);

    return Long.parseLong(val);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy