All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.nosqlbench.virtdata.api.VirtDataComposer Maven / Gradle / Ivy

There is a newer version: 5.17.0
Show newest version
package io.nosqlbench.virtdata.api;

import io.nosqlbench.virtdata.api.composers.FunctionAssembly;
import io.nosqlbench.virtdata.lang.ast.FunctionCall;
import io.nosqlbench.virtdata.lang.ast.VirtDataFlow;
import io.nosqlbench.virtdata.lang.parser.VirtDataDSL;
import org.apache.commons.lang3.ClassUtils;
import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

import java.lang.invoke.MethodHandles;
import java.util.*;
import java.util.stream.Collectors;

/**
 * 

Synopsis

*

This library implements the ability to compose a lambda function from a sequence of other functions. * The resulting lambda will use the specialized primitive function interfaces, such as LongUnaryOperator, LongFunction, etc. * Where there are two functions which do not have matching input and output types, the most obvious conversion is made. * This means that while you are able to compose a LongUnaryOperator with a LongUnaryOperator for maximum * efficiency, you can also compose LongUnaryOperator with an IntFunction, and a best effort attempt will be made to * do a reasonable conversion in between.

* *

Limitations

*

Due to type erasure, it is not possible to know the generic type parameters for non-primitive functional types. * These include IntFunction<?>, LongFunction<?>, and in the worst case, Function<?,?>. * For these types, annotations are provided to better inform the runtime lambda compositor.

* *

Multiple Paths

*

The library allows for there to be multiple functions which match the spec, possibly because multiple * functions have the same name, but exist in different libraries or in different packages within the same library. * This means that the composer library must find a connecting path between the functions that can match at each stage, * disregarding all but one.

* *

Path Finding

*

The rule for finding the best path among the available functions is as follows, at each pairing between * adjacent stages of functions:

*
    *
  1. The co-compatible output and input types between the functions are mapped. Functions sharing the co-compatible * types are kept in the list. Functions not sharing them are removed.
  2. *
  3. As long as functions can be removed in this way, the process iterates through the chain, starting again * at the front of the list.
  4. *
  5. When no functions can be removed due to lack of co-compatible types, each stage is selected according to * type preferences as represented in {@link ValueType}
  6. * *
  7. If the next (outer) function does not have a compatible input type, move it down on the list. * If, after this step, there are functions which do have matching signatures, all others are removed.
  8. *
*/ public class VirtDataComposer { private final static String PREAMBLE = "compose "; private final static Logger logger = LogManager.getLogger(DataMapperLibrary.class);private final static MethodHandles.Lookup lookup = MethodHandles.publicLookup(); private final VirtDataFunctionLibrary functionLibrary; private final Map customElements = new HashMap<>(); public VirtDataComposer(VirtDataFunctionLibrary functionLibrary) { this.functionLibrary = functionLibrary; } public VirtDataComposer() { this.functionLibrary = VirtDataLibraries.get(); } public Optional resolveFunctionFlow(String flowspec) { String strictSpec = flowspec.startsWith("compose ") ? flowspec.substring(8) : flowspec; VirtDataDSL.ParseResult parseResult = VirtDataDSL.parse(strictSpec); if (parseResult.throwable != null) { throw new RuntimeException(parseResult.throwable); } VirtDataFlow flow = parseResult.flow; return resolveFunctionFlow(flow); } public ResolverDiagnostics resolveDiagnosticFunctionFlow(String flowspec) { String strictSpec = flowspec.startsWith("compose ") ? flowspec.substring(8) : flowspec; VirtDataDSL.ParseResult parseResult = VirtDataDSL.parse(strictSpec); if (parseResult.throwable != null) { throw new RuntimeException(parseResult.throwable); } VirtDataFlow flow = parseResult.flow; return resolveDiagnosticFunctionFlow(flow); } public ResolverDiagnostics resolveDiagnosticFunctionFlow(VirtDataFlow flow) { ResolverDiagnostics diagnostics = new ResolverDiagnostics(); diagnostics.trace("processing flow " + flow.toString() + " from output to input"); LinkedList> funcs = new LinkedList<>(); LinkedList>> nextFunctionInputTypes = new LinkedList<>(); Optional> finalValueTypeOption = Optional.ofNullable(flow.getLastExpression().getCall().getOutputType()) .map(ValueType::valueOfClassName).map(ValueType::getValueClass); nextFunctionInputTypes.add(new HashSet<>()); finalValueTypeOption.ifPresent(t -> nextFunctionInputTypes.get(0).add(t)); diagnostics.trace("working backwards from " + (flow.getExpressions().size()-1)); for (int i = flow.getExpressions().size() - 1; i >= 0; i--) { FunctionCall call = flow.getExpressions().get(i).getCall(); diagnostics.trace("resolving args for " + call.toString()); List nodeFunctions = new LinkedList<>(); String funcName = call.getFunctionName(); Class inputType = ValueType.classOfType(call.getInputType()); Class outputType = ValueType.classOfType(call.getOutputType()); Object[] args = call.getArguments(); try { args = populateFunctions(diagnostics, args, this.customElements); } catch (Exception e) { return diagnostics.error(e); } diagnostics.trace("resolved args: "); for (Object arg : args) { diagnostics.trace(" " + arg.getClass().getSimpleName() + ": " + arg.toString()); } List resolved = functionLibrary.resolveFunctions(outputType, inputType, funcName, this.customElements,args); if (resolved.size() == 0) { return diagnostics.error(new RuntimeException("Unable to find even one function for " + call)); } diagnostics.trace(" resolved functions:"); diagnostics.trace(summarize(resolved)); nodeFunctions.addAll(resolved); funcs.addFirst(nodeFunctions); Set> inputTypes = nodeFunctions.stream().map(ResolvedFunction::getInputClass).collect(Collectors.toSet()); nextFunctionInputTypes.addFirst(inputTypes); } if (!nextFunctionInputTypes.peekFirst().contains(Long.TYPE)) { return diagnostics.error(new RuntimeException("There is no initial function which accepts a long input. Function chain, after type filtering: \n" + summarizeBulk(funcs))); } removeNonLongFunctions(funcs.getFirst()); List flattenedFuncs = optimizePath(funcs, ValueType.classOfType(flow.getLastExpression().getCall().getOutputType())); if (flattenedFuncs.size() == 1) { diagnostics.trace("FUNCTION resolution succeeded (single): '" + flow.toString() + "'"); return diagnostics.setResolvedFunction(flattenedFuncs.get(0)); } FunctionAssembly assembly = new FunctionAssembly(); diagnostics.trace("composed summary: " + summarize(flattenedFuncs)); boolean isThreadSafe = true; diagnostics.trace("FUNCTION chain selected: (multi) '" + this.summarize(flattenedFuncs) + "'"); for (ResolvedFunction resolvedFunction : flattenedFuncs) { try { Object functionObject = resolvedFunction.getFunctionObject(); assembly.andThen(functionObject); if (!resolvedFunction.isThreadSafe()) { isThreadSafe = false; } } catch (Exception e) { String flowdata = flow!=null? flow.toString() : "undefined"; return diagnostics.error(new RuntimeException("FUNCTION resolution failed: '" + flowdata + "': " + e.toString())); } } ResolvedFunction composedFunction = assembly.getResolvedFunction(isThreadSafe); diagnostics.trace("FUNCTION resolution succeeded (lambda): '" + flow.toString() + "'"); return diagnostics.setResolvedFunction(composedFunction); } public Optional resolveFunctionFlow(VirtDataFlow flow) { ResolverDiagnostics resolverDiagnostics = resolveDiagnosticFunctionFlow(flow); return resolverDiagnostics.getResolvedFunction(); } private Object[] populateFunctions(ResolverDiagnostics diagnostics, Object[] args, Map cconfig) { for (int i = 0; i < args.length; i++) { Object o = args[i]; if (o instanceof FunctionCall) { FunctionCall call = (FunctionCall) o; String funcName = call.getFunctionName(); Class inputType = ValueType.classOfType(call.getInputType()); Class outputType = ValueType.classOfType(call.getOutputType()); Object[] fargs = call.getArguments(); diagnostics.trace("resolving argument as function '" + call.toString() + "'"); fargs = populateFunctions(diagnostics, fargs, cconfig); List resolved = functionLibrary.resolveFunctions(outputType, inputType, funcName, cconfig, fargs); if (resolved.size() == 0) { throw new RuntimeException("Unable to resolve even one function for argument: " + call); } args[i] = resolved.get(0).getFunctionObject(); } } return args; } private void removeNonLongFunctions(List funcs) { List toRemove = new LinkedList<>(); for (ResolvedFunction func : funcs) { if (!func.getInputClass().isAssignableFrom(long.class)) { logger.trace("input type " + func.getInputClass().getCanonicalName() + " is not assignable from long"); toRemove.add(func); } } if (toRemove.size() > 0 && toRemove.size() == funcs.size()) { throw new RuntimeException("removeNonLongFunctions would remove all functions: " + funcs); } funcs.removeAll(toRemove); } private String summarize(List funcs) { return funcs.stream() .map(String::valueOf).collect(Collectors.joining("|")); } private String summarizeBulk(List> funcs) { List> spans = new LinkedList<>(); funcs.forEach(l -> spans.add(l.stream().map(String::valueOf).collect(Collectors.toList()))); List> widths = spans.stream().map( l -> l.stream().map(String::length).max(Integer::compare)).collect(Collectors.toList()); String funcsdata = spans.stream().map( l -> l.stream().map(String::valueOf).collect(Collectors.joining("|\n")) ).collect(Collectors.joining("\n\n")); StringBuilder sb = new StringBuilder(); sb.append("---\\\\\n").append(funcsdata).append("\n---////\n"); return sb.toString(); } /** *

* Attempt path optimizations on each phase junction, considering the set of * candidate inner functions with the candidate outer functions. * This is an iterative process, that will keep trying until no apparent * progress is made. Each higher-precedence optimization strategy is used * iteratively as long as it makes progress and then the lower precedence * strategies are allowed to have their turn. *

*

*

It is considered an error if the strategies are unable to reduce each * phase down to a single preferred function. Therefore, the lowest precedence * strategy is the most aggressive, simply sorting the functions by basic * type preference and then removing all but the highest selected function.

* * @param funcs the list of candidate functions offered at each phase, in List<List> form. * @return a List of resolved functions that has been fully optimized */ private List optimizePath(List> funcs, Class type) { List prevFuncs = null; List nextFuncs = null; int progress = -1; int pass = 0; while (progress != 0) { pass++; progress = 0; progress += reduceByRequiredResultsType(funcs.get(funcs.size() - 1), type); if (funcs.size() > 1) { int stage = 0; for (List funcList : funcs) { stage++; nextFuncs = funcList; if (prevFuncs != null && nextFuncs != null) { if (progress == 0) { progress += reduceByDirectTypes(prevFuncs, nextFuncs); if (progress == 0) { progress += reduceByAssignableTypes(prevFuncs, nextFuncs, false); if (progress == 0) { progress += reduceByAssignableTypes(prevFuncs, nextFuncs, true); if (progress == 0) { progress += reduceByPreferredTypes(prevFuncs, nextFuncs); } } } } } // else first pass, prime pointers prevFuncs = nextFuncs; } nextFuncs = null; prevFuncs = null; } else { progress += reduceByPreferredResultTypes(funcs.get(0)); } } List optimized = funcs.stream().map(l -> l.get(0)).collect(Collectors.toList()); return optimized; } private int reduceByRequiredResultsType(List endFuncs, Class resultType) { int progressed = 0; LinkedList tmpList = new LinkedList<>(endFuncs); for (ResolvedFunction endFunc : tmpList) { if (resultType != null && !ClassUtils.isAssignable(endFunc.getResultClass(), resultType, true)) { endFuncs.remove(endFunc); String logmsg = "BY-REQUIRED-RESULT-TYPE removed function '" + endFunc + "' because is not assignable to " + resultType; logger.trace(logmsg); progressed++; } } if (endFuncs.size() == 0) { throw new RuntimeException("BY-REQUIRED-RESULT-TYPE No end funcs were found which are assignable to " + resultType); } return progressed; } private int reduceByPreferredResultTypes(List funcs) { int progressed = 0; if (funcs.size() > 1) { progressed += funcs.size() - 1; funcs.sort(ResolvedFunction.PREFERRED_TYPE_COMPARATOR); while (funcs.size() > 1) { logger.trace("BY-SINGLE-PREFERRED-TYPE removing func " + funcs.get(funcs.size() - 1) + " because " + funcs.get(0) + " has more preferred types."); funcs.remove(funcs.size() - 1); } } return progressed; } private int reduceByPreferredTypes(List prevFuncs, List nextFuncs) { int progressed = 0; if (prevFuncs.size() > 1) { progressed += prevFuncs.size() - 1; prevFuncs.sort(ResolvedFunction.PREFERRED_TYPE_COMPARATOR); while (prevFuncs.size() > 1) { String logmsg = "BY-PREV-PREFERRED-TYPE removing func " + prevFuncs.get(prevFuncs.size() - 1) + " because " + prevFuncs.get(0) + " has more preferred types."; logger.trace(logmsg); prevFuncs.remove(prevFuncs.size() - 1); } } else if (nextFuncs.size() > 1) { progressed += nextFuncs.size() - 1; nextFuncs.sort(ResolvedFunction.PREFERRED_TYPE_COMPARATOR); while (nextFuncs.size() > 1) { String logmsg = "BY-NEXT-PREFERRED-TYPE removing func " + nextFuncs.get(nextFuncs.size() - 1) + " because " + nextFuncs.get(0) + " has more preferred types."; logger.trace(logmsg); nextFuncs.remove(nextFuncs.size() - 1); } } return progressed; } /** * If there are direct type matches between the inner func and the outer func, then remove all * other outer funcs except the ones with direct matches. * * @param prevFuncs The list of candidate inner functions * @param nextFuncs The list of candidate outer functions * @return count of items removed */ private int reduceByDirectTypes(List prevFuncs, List nextFuncs) { int progressed = 0; // Rule 1: If there are direct type matches, remove extraneous next funcs Set> outputs = getOutputs(prevFuncs); Set> inputs = getInputs(nextFuncs); Set> directMatches = inputs.stream().filter(outputs::contains).collect(Collectors.toCollection(HashSet::new)); if (directMatches.size() > 0) { List toremove = new ArrayList<>(); for (ResolvedFunction nextFunc : nextFuncs) { if (!directMatches.contains(nextFunc.getArgType())) { String logmsg = "BY-DIRECT-TYPE removing next func: " + nextFunc + " because its input types are not satisfied by any previous func"; logger.trace(logmsg); toremove.add(nextFunc); progressed++; } } nextFuncs.removeAll(toremove); } return progressed; } /** * Remove any functions in the second set which do not have an input type which is assignable * from any of the output types of the functions in the first set. * * @param prevFuncs the functions that come before the nextFuncs * @param nextFuncs the functions that come after prevFuncs * @return the number of next funcs that have been removed */ private int reduceByAssignableTypes(List prevFuncs, List nextFuncs, boolean autoboxing) { // Rule 1: If there are direct type matches, remove extraneous next funcs Set> outputs = getOutputs(prevFuncs); Set> inputs = getInputs(nextFuncs); Set> compatibleInputs = new HashSet<>(); for (Class input : inputs) { for (Class output : outputs) { if (ClassUtils.isAssignable(output, input, autoboxing)) { compatibleInputs.add(input); } } } List toremove = new ArrayList<>(); for (ResolvedFunction nextfunc : nextFuncs) { if (!compatibleInputs.contains(nextfunc.getInputClass())) { toremove.add(nextfunc); } } if (toremove.size() == nextFuncs.size()) { String logmsg = "BY-ASSIGNABLE-TYPE Not removing remaining " + nextFuncs.size() + " next funcs " + (autoboxing ? "with autoboxing " : "") + "because no functions would be left."; logger.trace(logmsg); return 0; } else { toremove.forEach(nextfunc -> { String logmsg = "BY-ASSIGNABLE-TYPE removing next func: " + nextfunc + " because its input types are not assignable from any of the previous funcs"; logger.trace(logmsg); } ); nextFuncs.removeAll(toremove); return toremove.size(); } } private Set> getOutputs(List prevFuncs) { Set> outputs = new HashSet<>(); for (ResolvedFunction func : prevFuncs) { outputs.add(func.getResultClass()); } return outputs; } private Set> getInputs(List nextFuncs) { Set> inputs = new HashSet<>(); for (ResolvedFunction nextFunc : nextFuncs) { inputs.add(nextFunc.getArgType()); } return inputs; } public Map getCustomElements() { return this.customElements; } public VirtDataComposer addCustomElement(String name, Object element) { this.customElements.put(name, element); return this; } public VirtDataComposer addCustomElements(Map config) { this.customElements.putAll(config); return this; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy