
nl.basjes.parse.core.Parser Maven / Gradle / Ivy
/*
* Apache HTTPD logparsing made easy
* Copyright (C) 2011-2015 Niels Basjes
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package nl.basjes.parse.core;
import nl.basjes.parse.core.exceptions.*;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.lang.reflect.Constructor;
import java.lang.reflect.Method;
import java.util.*;
public class Parser {
private static class DissectorPhase {
public DissectorPhase(final String inputType, final String outputType, final String name, final Dissector instance) {
this.inputType = inputType;
this.outputType = outputType;
this.name = name;
this.instance = instance;
}
private final String inputType;
private final String outputType;
private final String name;
private final Dissector instance;
}
// --------------------------------------------
private static final Logger LOG = LoggerFactory.getLogger(Parser.class);
private final Class recordClass;
private final Set availableDissectors = new HashSet<>();
private final Set allDissectors = new HashSet<>();
// Key = "request.time.hour"
// Value = the set of dissectors that must all be started once we have this value
private Map> compiledDissectors = null;
private Set usefulIntermediateFields = null;
private String rootType;
// The target methods in the record class that will want to receive the values
private final Map> targets = new TreeMap<>();
private final Map> castsOfTargets = new TreeMap<>();
private final Set locatedTargets = new HashSet<>();
private boolean usable = false;
// --------------------------------------------
public Set getNeeded() {
return targets.keySet();
}
/**
* Returns the casts possible for the specified path.
* Before you call 'getCasts' the actual parser needs to be constructed.
* Simply calling getPossiblePaths does not build the actual parser.
* If you want to get the casts for all possible paths the code looks something like this:
* {@code
* Parser
* @param name The name of the path for which you want the casts
* @return The set of casts that are valid for this name. Null if this name is unknown.
*/
public EnumSet getCasts(String name) {
try {
assembleDissectors();
} catch (MissingDissectorsException
|InvalidDissectorException e) {
e.printStackTrace();
}
return castsOfTargets.get(name);
}
public Map> getAllCasts() {
try {
assembleDissectors();
} catch (MissingDissectorsException
|InvalidDissectorException e) {
e.printStackTrace();
}
return castsOfTargets;
}
// --------------------------------------------
Set getUsefulIntermediateFields() {
return usefulIntermediateFields;
}
// --------------------------------------------
public final void addDissectors(final List dissectors) {
if (compiledDissectors != null) {
throw new CannotChangeDissectorsAfterConstructionException();
}
if (dissectors != null) {
for (Dissector dissector : dissectors) {
allDissectors.add(dissector);
}
}
}
// --------------------------------------------
public final void addDissector(final Dissector dissector) {
if (compiledDissectors != null) {
throw new CannotChangeDissectorsAfterConstructionException();
}
if (dissector != null) {
allDissectors.add(dissector);
}
}
// --------------------------------------------
public final void dropDissector(Class extends Dissector> dissectorClassToDrop) {
if (compiledDissectors != null) {
throw new CannotChangeDissectorsAfterConstructionException();
}
Set removeDissector = new HashSet<>();
for (final Dissector dissector : allDissectors) {
if (dissector.getClass().equals(dissectorClassToDrop)) {
removeDissector.add(dissector);
}
}
allDissectors.removeAll(removeDissector);
}
// --------------------------------------------
protected void setRootType(final String newRootType) {
compiledDissectors = null;
rootType = newRootType;
}
// --------------------------------------------
private void assembleDissectorPhases() throws InvalidDissectorException {
if (compiledDissectors != null) {
return; // nothing to do.
}
for (final Dissector dissector : allDissectors) {
final String inputType = dissector.getInputType();
if (inputType == null) {
throw new InvalidDissectorException("Dissector returns null on getInputType(): ["+ dissector.getClass().getCanonicalName()+"]");
}
final List outputs = dissector.getPossibleOutput();
if (outputs == null || outputs.size() == 0) {
throw new InvalidDissectorException("Dissector cannot create any outputs: ["+ dissector.getClass().getCanonicalName()+"]");
}
// Create all dissector phases
for (final String output: outputs) {
final int colonPos = output.indexOf(':');
final String outputType = output.substring(0, colonPos);
final String name = output.substring(colonPos + 1);
availableDissectors.add(new DissectorPhase(inputType, outputType, name, dissector));
}
}
}
// --------------------------------------------
private void assembleDissectors() throws MissingDissectorsException, InvalidDissectorException {
if (compiledDissectors != null) {
return; // nothing to do.
}
// So
// - we have a set of needed values (targets)
// - we have a set of dissectors that can pick apart some input
// - we know where to start from
// - we need to know how to proceed
assembleDissectorPhases();
// Step 1: Acquire all potentially useful subtargets
// We first build a set of all possible subtargets that may be useful
// this way we can skip anything we know not to be useful
Set needed = new HashSet<>(getNeeded());
needed.add(rootType + ':' ); // The root name is an empty string
LOG.debug("Root: >>>{}:<<<", rootType);
Set allPossibleSubtargets = new HashSet<>();
for (String need : needed) {
String neededName = need.substring(need.indexOf(':') + 1);
LOG.debug("Needed : >>>{}<<<", neededName);
String[] needs = neededName.split("\\.");
StringBuilder sb = new StringBuilder(need.length());
for (String part : needs) {
if (sb.length() == 0) {
sb.append(part);
} else {
sb.append('.').append(part);
}
allPossibleSubtargets.add(sb.toString());
LOG.debug("Possible: >>>{}<<<", sb.toString());
}
}
// Step 2: From the root we explore all possibly useful trees (recursively)
compiledDissectors = new HashMap<>();
usefulIntermediateFields = new HashSet<>();
findUsefulDissectorsFromField(allPossibleSubtargets, rootType, "", true); // The root name is an empty string
// Step 3: Inform all dissectors to prepare for the run
for (Set dissectorPhases : compiledDissectors.values()) {
for (DissectorPhase dissectorPhase : dissectorPhases) {
dissectorPhase.instance.prepareForRun();
}
}
// Step 4: As a final step we verify that every required input can be found
Set missingDissectors = getTheMissingFields();
if (missingDissectors != null && !missingDissectors.isEmpty()) {
StringBuilder allMissing = new StringBuilder(missingDissectors.size()*64);
for (String missing:missingDissectors){
allMissing.append(missing).append(' ');
}
throw new MissingDissectorsException(allMissing.toString());
}
usable = true;
}
// --------------------------------------------
private void findUsefulDissectorsFromField(
final Set possibleTargets,
final String subRootType, final String subRootName,
final boolean thisIsTheRoot) {
String subRootId = subRootType + ':' + subRootName;
// When we reach this point we have dissectors to get here.
// So we store this to later validate if we have everything.
if (locatedTargets.contains(subRootId)) {
// We already found this one.
return; // Avoid infinite recursion
}
locatedTargets.add(subRootId);
LOG.debug("findUsefulDissectors:\"" + subRootType + "\" \"" + subRootName + "\"");
for (DissectorPhase dissector: availableDissectors) {
if (!(dissector.inputType.equals(subRootType))) {
continue; // Wrong type
}
// If it starts with a . it extends.
// If it doesn't then it starts at the beginning
Set checkFields = new HashSet<>();
// If true then this dissector can output any name instead of just one
boolean isWildCardDissector = dissector.name.equals("*");
if (isWildCardDissector) {
// Ok, this is special
// We need to see if any of the wanted types start with the
// subRootName (it may have a '.' in the rest of the line !)
String subRootNameMatch = subRootName + '.';
for (String possibleTarget : possibleTargets) {
if (possibleTarget.startsWith(subRootNameMatch)) {
checkFields.add(possibleTarget);
}
}
} else if (thisIsTheRoot) {
checkFields.add(dissector.name);
} else {
checkFields.add(subRootName + '.' + dissector.name);
}
for (String checkField: checkFields) {
if (possibleTargets.contains(checkField)
&& !compiledDissectors.containsKey(dissector.outputType + ":" + checkField)) {
Set subRootPhases = compiledDissectors.get(subRootId);
if (subRootPhases == null) {
// New so we can simply add it.
subRootPhases = new HashSet<>();
compiledDissectors.put(subRootId, subRootPhases);
usefulIntermediateFields.add(subRootName);
}
Class extends Dissector> clazz = dissector.instance.getClass();
DissectorPhase dissectorPhaseInstance = findDissectorInstance(subRootPhases, clazz);
if (dissectorPhaseInstance == null) {
dissectorPhaseInstance =
new DissectorPhase(dissector.inputType, dissector.outputType,
checkField, dissector.instance.getNewInstance());
subRootPhases.add(dissectorPhaseInstance);
}
// Tell the dissector instance what to expect
if (LOG.isDebugEnabled()) {
LOG.debug("Informing : (" + dissector.inputType + ")" + subRootName
+ " --> " + dissector.instance.getClass().getName()
+ " --> (" + dissector.outputType + ")" + checkField);
}
castsOfTargets.put(dissector.outputType + ':' + checkField,
dissectorPhaseInstance.instance.prepareForDissect(subRootName, checkField));
// Recurse from this point down
findUsefulDissectorsFromField(possibleTargets, dissector.outputType, checkField, false);
}
}
}
Set mappings = typeRemappings.get(subRootName);
if (mappings != null) {
for (String mappedType : mappings) {
if (!compiledDissectors.containsKey(mappedType + ':' + subRootName)) {
// Retyped targets are ALWAYS String ONLY.
castsOfTargets.put(mappedType + ':' + subRootName, Casts.STRING_ONLY);
findUsefulDissectorsFromField(possibleTargets, mappedType, subRootName, false);
}
}
}
}
private DissectorPhase findDissectorInstance(Set dissectorPhases,
Class extends Dissector> clazz) {
for (DissectorPhase phase : dissectorPhases) {
if (phase.instance.getClass() == clazz) {
return phase;
}
}
return null;
}
// --------------------------------------------
private Set getTheMissingFields() {
Set missing = new HashSet<>();
for (String target : getNeeded()) {
if (!locatedTargets.contains(target)) {
// Handle wildcard targets differently
if (target.endsWith("*")) {
if (target.endsWith(".*")) {
if (!locatedTargets.contains(target.substring(0, target.length() - 2))) {
missing.add(target);
}
}
// Else: it ends with :* and it is always "present".
} else {
missing.add(target);
}
}
}
return missing;
}
// --------------------------------------------
/*
* The constructor tries to retrieve the desired fields from the annotations in the specified class. */
public Parser(final Class clazz) {
recordClass = clazz;
// Get all methods of the correct signature that have been annotated
// with Field
for (final Method method : recordClass.getMethods()) {
final Field field = method.getAnnotation(Field.class);
if (field != null) {
addParseTarget(method, Arrays.asList(field.value()));
}
}
}
// --------------------------------------------
/*
* When there is a need to add a target callback manually use this method. */
public void addParseTarget(final Method method, final String fieldValue) {
addParseTarget(method, Arrays.asList(new String[]{fieldValue}));
}
/*
* When there is a need to add a target callback manually use this method. */
public void addParseTarget(final Method method, final List fieldValues) {
if (method == null || fieldValues == null) {
return; // Nothing to do here
}
final Class>[] parameters = method.getParameterTypes();
if (
// Setters that receive a String
((parameters.length == 1) && (parameters[0] == String.class)) ||
((parameters.length == 2) && (parameters[0] == String.class) && (parameters[1] == String.class)) ||
// Setters that receive a Long
((parameters.length == 1) && (parameters[0] == Long.class)) ||
((parameters.length == 2) && (parameters[0] == String.class) && (parameters[1] == Long.class)) ||
// Setters that receive a Double
((parameters.length == 1) && (parameters[0] == Double.class)) ||
((parameters.length == 2) && (parameters[0] == String.class) && (parameters[1] == Double.class))
) {
for (final String fieldValue : fieldValues) {
String cleanedFieldValue = cleanupFieldValue(fieldValue);
if (!fieldValue.equals(cleanedFieldValue)) {
LOG.warn("The requested \"" + fieldValue + "\" was converted into \"" + cleanedFieldValue + "\" ");
}
// We have 1 real target
Set fieldTargets = targets.get(cleanedFieldValue);
if (fieldTargets == null) {
fieldTargets = new HashSet<>();
}
fieldTargets.add(method);
targets.put(cleanedFieldValue, fieldTargets);
}
} else {
throw new InvalidFieldMethodSignature(method);
}
compiledDissectors = null;
}
// --------------------------------------------
private Map> typeRemappings = new HashMap<>(16);
public void setTypeRemappings(Map> pTypeRemappings) {
if (pTypeRemappings == null) {
this.typeRemappings.clear();
} else {
this.typeRemappings = pTypeRemappings;
}
}
public void addTypeRemappings(Map> additionalTypeRemappings) {
for (Map.Entry> entry: additionalTypeRemappings.entrySet()){
String input = entry.getKey();
for (String newType: entry.getValue()) {
addTypeRemapping(input, newType, Casts.STRING_ONLY);
}
}
}
public void addTypeRemapping(String input, String newType) {
addTypeRemapping(input, newType, Casts.STRING_ONLY);
}
public void addTypeRemapping(String input, String newType, EnumSet newCasts) {
if (compiledDissectors != null) {
throw new CannotChangeDissectorsAfterConstructionException();
}
String theInput = input.trim().toLowerCase(Locale.ENGLISH);
String theType = newType.trim().toUpperCase(Locale.ENGLISH);
Set mappingsForInput = typeRemappings.get(theInput);
if (mappingsForInput == null) {
mappingsForInput = new HashSet<>();
typeRemappings.put(theInput, mappingsForInput);
}
if (!mappingsForInput.contains(theType)) {
mappingsForInput.add(theType);
castsOfTargets.put(theType+':'+theInput, newCasts);
}
}
// --------------------------------------------
public static String cleanupFieldValue(String fieldValue) {
final int colonPos = fieldValue.indexOf(':');
if (colonPos == -1) {
return fieldValue.toLowerCase(Locale.ENGLISH);
}
final String fieldType = fieldValue.substring(0, colonPos);
final String fieldName = fieldValue.substring(colonPos + 1);
return fieldType.toUpperCase(Locale.ENGLISH)+':'+ fieldName.toLowerCase(Locale.ENGLISH);
}
// --------------------------------------------
/**
* Parse the value and return a new instance of RECORD.
* For this method to work the RECORD class may NOT be an inner class.
*/
public RECORD parse(final String value)
throws DissectionFailure, InvalidDissectorException, MissingDissectorsException {
assembleDissectors();
final Parsable parsable = createParsable();
if (parsable == null) {
return null;
}
parsable.setRootDissection(rootType, value);
return parse(parsable).getRecord();
}
// --------------------------------------------
/**
* Parse the value and call all configured setters in the provided instance of RECORD.
*/
public RECORD parse(final RECORD record, final String value)
throws DissectionFailure, InvalidDissectorException, MissingDissectorsException {
assembleDissectors();
final Parsable parsable = createParsable(record);
parsable.setRootDissection(rootType, value);
return parse(parsable).getRecord();
}
// --------------------------------------------
Parsable parse(final Parsable parsable)
throws DissectionFailure, InvalidDissectorException, MissingDissectorsException {
assembleDissectors();
if (!usable) {
return null;
}
// Values look like "TYPE:foo.bar"
Set toBeParsed = new HashSet<>(parsable.getToBeParsed());
while (toBeParsed.size() > 0) {
for (ParsedField fieldThatNeedsToBeParsed : toBeParsed) {
parsable.setAsParsed(fieldThatNeedsToBeParsed);
Set dissectorSet = compiledDissectors.get(fieldThatNeedsToBeParsed.getId());
if (dissectorSet != null) {
for (DissectorPhase dissector : dissectorSet) {
if (LOG.isDebugEnabled()) {
LOG.debug("Dissect " + fieldThatNeedsToBeParsed + " with " + dissector.instance.getClass().getName());
}
dissector.instance.dissect(parsable, fieldThatNeedsToBeParsed.getName());
}
} else {
LOG.trace("NO DISSECTORS FOR \"{}\"", fieldThatNeedsToBeParsed);
}
}
toBeParsed.clear();
toBeParsed.addAll(parsable.getToBeParsed());
}
return parsable;
}
// --------------------------------------------
void store(final RECORD record, final String key, final String name, final Value value) {
boolean calledASetter = false;
if (value == null) {
LOG.error("Got a null value to store for key={} name={}.", key, name);
return; // Nothing to do
}
final Set methods = targets.get(key);
if (methods == null) {
LOG.error("NO methods for key={} name={}.", key, name);
return;
}
EnumSet castsTo = castsOfTargets.get(key);
if (castsTo == null) {
castsTo = castsOfTargets.get(name);
if (castsTo == null) {
LOG.error("NO casts for \"" + name + "\"");
return;
}
}
for (Method method : methods) {
if (method != null) {
try {
Class>[] parameters = method.getParameterTypes();
Class> valueClass = parameters[parameters.length - 1]; // Always the last one
if (valueClass == String.class) {
if (castsTo.contains(Casts.STRING)) {
String stringValue = value.getString();
if (parameters.length == 2) {
method.invoke(record, name, stringValue);
} else {
method.invoke(record, stringValue);
}
calledASetter = true;
}
continue;
}
if (valueClass == Long.class) {
if (castsTo.contains(Casts.LONG)) {
Long longValue = value.getLong();
if (parameters.length == 2) {
method.invoke(record, name, longValue);
} else {
method.invoke(record, longValue);
}
calledASetter = true;
}
continue;
}
if (valueClass == Double.class) {
if (castsTo.contains(Casts.DOUBLE)) {
Double doubleValue = value.getDouble();
if (parameters.length == 2) {
method.invoke(record, name, doubleValue);
} else {
method.invoke(record, doubleValue);
}
calledASetter = true;
}
continue;
}
throw new FatalErrorDuringCallOfSetterMethod(
"Tried to call setter with unsupported class :" +
" key = \"" + key + "\" " +
" name = \"" + name + "\" " +
" value = \"" + value + "\"" +
" castsTo = \"" + castsTo + "\"");
} catch (final Exception e) {
throw new FatalErrorDuringCallOfSetterMethod(e.getMessage() + " caused by \"" +
e.getCause() + "\" when calling \"" +
method.toGenericString() + "\" for " +
" key = \"" + key + "\" " +
" name = \"" + name + "\" " +
" value = \"" + value + "\"" +
" castsTo = \"" + castsTo + "\"", e);
}
}
}
if (!calledASetter) {
throw new FatalErrorDuringCallOfSetterMethod("No setter called for " +
" key = \"" + key + "\" " +
" name = \"" + name + "\" " +
" value = \"" + value + "\"");
}
}
// --------------------------------------------
private Parsable createParsable(RECORD record) {
return new Parsable<>(this, record, typeRemappings);
}
public Parsable createParsable() {
RECORD record;
try {
Constructor co = recordClass.getConstructor();
record = co.newInstance();
} catch (Exception e) {
LOG.error("Unable to create instance: " + e.toString());
return null;
}
return createParsable(record);
}
// --------------------------------------------
/**
* This method is for use by the developer to query the parser about
* the possible paths that may be extracted.
* @return A list of all possible paths that could be determined automatically.
* @throws nl.basjes.parse.core.exceptions.InvalidDissectorException
* @throws nl.basjes.parse.core.exceptions.MissingDissectorsException
*/
public List getPossiblePaths() throws MissingDissectorsException, InvalidDissectorException {
return getPossiblePaths(15);
}
/**
* This method is for use by the developer to query the parser about
* the possible paths that may be extracted.
* @param maxDepth The maximum recursion depth
* @return A list of all possible paths that could be determined automatically.
*/
public List getPossiblePaths(int maxDepth) {
if (allDissectors.isEmpty()) {
return null; // nothing to do.
}
List paths = new ArrayList<>();
Map> pathNodes = new HashMap<>();
for (Dissector dissector : allDissectors) {
final String inputType = dissector.getInputType();
if (inputType == null) {
LOG.error("Dissector returns null on getInputType(): ["+ dissector.getClass().getCanonicalName()+"]");
return null;
}
final List outputs = dissector.getPossibleOutput();
pathNodes.put(inputType, outputs);
}
findAdditionalPossiblePaths(pathNodes, paths, "", rootType, maxDepth);
for (Map.Entry> typeRemappingSet: typeRemappings.entrySet()) {
for (String typeRemapping: typeRemappingSet.getValue()) {
String remappedPath = typeRemapping + ':' + typeRemappingSet.getKey();
LOG.debug("Adding remapped path: {}", remappedPath);
paths.add(remappedPath);
findAdditionalPossiblePaths(pathNodes, paths, typeRemappingSet.getKey(), typeRemapping, maxDepth - 1);
}
}
return paths;
}
/**
* Add all child paths in respect to the base (which is already present in the result set)
*/
private void findAdditionalPossiblePaths(Map> pathNodes, List paths, String base, String baseType,
int maxDepth) {
if (maxDepth == 0) {
return;
}
if (pathNodes.containsKey(baseType)) {
List childPaths = pathNodes.get(baseType);
for (String childPath : childPaths) {
final int colonPos = childPath.indexOf(':');
final String childType = childPath.substring(0, colonPos);
final String childName = childPath.substring(colonPos + 1);
String childBase;
if (base.isEmpty()) {
childBase = childName;
} else {
childBase = base + '.' + childName;
}
paths.add(childType+':'+childBase);
findAdditionalPossiblePaths(pathNodes, paths, childBase, childType, maxDepth - 1);
}
}
}
// --------------------------------------------
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy