All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oi.thekraken.grok.api.Discovery Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright 2014 Anthony Corbacho and contributors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *******************************************************************************/
package oi.thekraken.grok.api;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeMap;

import org.apache.commons.lang3.StringUtils;

import com.google.code.regexp.Matcher;
import com.google.code.regexp.Pattern;

import oi.thekraken.grok.api.exception.GrokException;

/**
 * {@code Discovery} try to find the best pattern for the given string.
 *
 * @author anthonycorbacho
 * @since 0.0.2
 */
public class Discovery {

  private Grok grok;

  /**
   * Create a new {@code Discovery} object.
   *
   * @param grok instance of grok
   */
  public Discovery(Grok grok) {
    this.grok = grok;
  }

  /**
   * Sort by regex complexity.
   *
   * @param groks Map of the pattern name and grok instance
   * @return the map sorted by grok pattern complexity
   */
  private Map sort(Map groks) {

    List groky = new ArrayList(groks.values());
    Map mGrok = new LinkedHashMap();
    Collections.sort(groky, new Comparator() {

      public int compare(Grok g1, Grok g2) {
        return (this.complexity(g1.getNamedRegex()) < this.complexity(g2.getNamedRegex())) ? 1
            : 0;
      }

      private int complexity(String expandedPattern) {
        int score = 0;
        score += expandedPattern.split("\\Q" + "|" + "\\E", -1).length - 1;
        score += expandedPattern.length();
        return score;
      }
    });

    for (Grok g : groky) {
      mGrok.put(g.getSaved_pattern(), g);
    }
    return mGrok;

  }

  /**
   *
   * @param expandedPattern regex string
   * @return the complexity of the regex
   */
  private int complexity(String expandedPattern) {
    int score = 0;

    score += expandedPattern.split("\\Q" + "|" + "\\E", -1).length - 1;
    score += expandedPattern.length();

    return score;
  }

  /**
   * Find a pattern from a log.
   *
   * @param text witch is the representation of your single
   * @return Grok pattern %{Foo}...
   */
  public String discover(String text) {
    if (text == null) {
      return "";
    }

    Map groks = new TreeMap();
    Map gPatterns = grok.getPatterns();
    // Boolean done = false;
    String texte = text;

    // Compile the pattern
    Iterator> it = gPatterns.entrySet().iterator();
    while (it.hasNext()) {
      @SuppressWarnings("rawtypes")
      Map.Entry pairs = (Map.Entry) it.next();
      String key = pairs.getKey().toString();
      Grok g = new Grok();

      // g.patterns.putAll( gPatterns );
      try {
        g.copyPatterns(gPatterns);
        g.setSaved_pattern(key);
        g.compile("%{" + key + "}");
        groks.put(key, g);
      } catch (GrokException e) {
        // Add logger
        continue;
      }

    }

    // Sort patterns by complexity
    Map patterns = this.sort(groks);

    // while (!done){
    // done = true;
    Iterator> pit = patterns.entrySet().iterator();
    while (pit.hasNext()) {
      @SuppressWarnings("rawtypes")
      Map.Entry pairs = (Map.Entry) pit.next();
      String key = pairs.getKey().toString();
      Grok value = (Grok) pairs.getValue();

      // We want to search with more complex pattern
      // We avoid word, small number, space....
      if (this.complexity(value.getNamedRegex()) < 20) {
        continue;
      }

      Match m = value.match(text);
      if (m.isNull()) {
        continue;
      }
      // get the part of the matched text
      String part = getPart(m, text);

      // we skip boundary word
      Pattern pattern = Pattern.compile(".\\b.");
      Matcher ma = pattern.matcher(part);
      if (!ma.find()) {
        continue;
      }

      // We skip the part that already include %{Foo}
      Pattern pattern2 = Pattern.compile("%\\{[^}+]\\}");
      Matcher ma2 = pattern2.matcher(part);

      if (ma2.find()) {
        continue;
      }
      texte = StringUtils.replace(texte, part, "%{" + key + "}");
    }
    // }

    return texte;
  }

  /**
   * Get the substring that match with the text.
   *
   * @param m Grok Match
   * @param text text
   * @return string
   */
  private String getPart(Match m, String text) {

    if (m == null || text == null) {
      return "";
    }

    return text.substring(m.getStart(), m.getEnd());
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy