
io.virtdata.core.VirtDataComposer Maven / Gradle / Ivy
package io.virtdata.core;
import com.google.common.collect.Sets;
import io.virtdata.api.DataMapperLibrary;
import io.virtdata.api.ValueType;
import io.virtdata.api.VirtDataFunctionLibrary;
import io.virtdata.api.composers.FunctionAssembly;
import io.virtdata.ast.FunctionCall;
import io.virtdata.ast.VirtDataFlow;
import io.virtdata.parser.VirtDataDSL;
import org.apache.commons.lang3.ClassUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
/**
* Synopsis
* This library implements the ability to compose a lambda function from a sequence of other functions.
* The resulting lambda will use the specialized primitive function interfaces, such as LongUnaryOperator, LongFunction, etc.
* Where there are two functions which do not have matching input and output types, the most obvious conversion is made.
* This means that while you are able to compose a LongUnaryOperator with a LongUnaryOperator for maximum
* efficiency, you can also compose LongUnaryOperator with an IntFunction, and a best effort attempt will be made to
* do a reasonable conversion in between.
*
* Limitations
* Due to type erasure, it is not possible to know the generic type parameters for non-primitive functional types.
* These include IntFunction<?>, LongFunction<?>, and in the worst case, Function<?,?>.
* For these types, annotations are provided to better inform the runtime lambda compositor.
*
* Multiple Paths
* The library allows for there to be multiple functions which match the spec, possibly because multiple
* functions have the same name, but exist in different libraries or in different packages within the same library.
* This means that the composer library must find a connecting path between the functions that can match at each stage,
* disregarding all but one.
*
* Path Finding
* The rule for finding the best path among the available functions is as follows, at each pairing between
* adjacent stages of functions:
*
* - The co-compatible output and input types between the functions are mapped. Functions sharing the co-compatible
* types are kept in the list. Functions not sharing them are removed.
* - As long as functions can be removed in this way, the process iterates through the chain, starting again
* at the front of the list.
* - When no functions can be removed due to lack of co-compatible types, each stage is selected according to
* type preferences as represented in {@link ValueType}
*
* - If the next (outer) function does not have a compatible input type, move it down on the list.
* If, after this step, there are functions which do have matching signatures, all others are removed.
*
*/
public class VirtDataComposer {
private final static String PREAMBLE = "compose ";
private final static Logger logger = LoggerFactory.getLogger(DataMapperLibrary.class);
private final VirtDataFunctionLibrary functionLibrary;
public VirtDataComposer(VirtDataFunctionLibrary functionLibrary) {
this.functionLibrary = functionLibrary;
}
public VirtDataComposer() {
this.functionLibrary = VirtDataLibraries.get();
}
public Optional resolveFunctionFlow(String flowspec) {
String strictSpec = flowspec.startsWith("compose ") ? flowspec.substring(8) : flowspec;
VirtDataDSL.ParseResult parseResult = VirtDataDSL.parse(strictSpec);
if (parseResult.throwable!=null) {
throw new RuntimeException(parseResult.throwable);
}
VirtDataFlow flow = parseResult.flow;
return resolveFunctionFlow(flow);
}
public Optional resolveFunctionFlow(VirtDataFlow flow) {
LinkedList> funcs = new LinkedList<>();
LinkedList>> nextFunctionInputTypes = new LinkedList<>();
Optional> finalValueTypeOption =
Optional.ofNullable(flow.getLastExpression().getCall().getOutputType())
.map(ValueType::valueOfClassName).map(ValueType::getValueClass);
nextFunctionInputTypes.add(new HashSet>() {{
finalValueTypeOption.ifPresent(this::add);
}});
for (int i = flow.getExpressions().size() - 1; i >= 0; i--) {
FunctionCall call = flow.getExpressions().get(i).getCall();
List nodeFunctions = new LinkedList<>();
String funcName = call.getFunctionName();
Class> inputType = classOf(call.getInputType());
Class> outputType = classOf(call.getOutputType());
Object[] args = call.getArguments();
args = populateFunctions(args);
List resolved = functionLibrary.resolveFunctions(outputType, inputType, funcName, args);
if (resolved.size() == 0) {
throw new RuntimeException("Unable to find a even one function for " + call);
}
nodeFunctions.addAll(resolved);
funcs.addFirst(nodeFunctions);
Set> inputTypes = nodeFunctions.stream().map(ResolvedFunction::getInputClass).collect(Collectors.toSet());
nextFunctionInputTypes.addFirst(inputTypes);
}
if (!nextFunctionInputTypes.peekFirst().contains(Long.TYPE)) {
throw new RuntimeException("There is no initial function which accepts a long input. Function chain, after type filtering: \n" +
summarize(funcs));
}
removeNonLongFunctions(funcs.getFirst());
List flattenedFuncs = optimizePath(funcs, classOf(flow.getLastExpression().getCall().getOutputType()));
if (flattenedFuncs.size() == 1) {
logger.trace("FUNCTION resolution succeeded (single): '" + flow.toString() + "'");
return Optional.of(flattenedFuncs.get(0));
}
FunctionAssembly assembly = new FunctionAssembly();
logger.trace("composed summary: " + summarize(flattenedFuncs));
boolean isThreadSafe = true;
logger.trace("FUNCTION chain selected: (multi) '" + this.summarize(flattenedFuncs) + "'");
for (ResolvedFunction resolvedFunction : flattenedFuncs) {
try {
assembly.andThen(resolvedFunction.getFunctionObject());
if (!resolvedFunction.isThreadSafe()) {
isThreadSafe = false;
}
} catch (Exception e) {
logger.error("FUNCTION resolution failed: '" + flow.toString() + "': " + e.toString());
throw e;
}
}
logger.trace("FUNCTION resolution succeeded: (multi) '" + flow.toString() + "'");
ResolvedFunction composedFunction = assembly.getResolvedFunction(isThreadSafe);
return Optional.of(composedFunction);
}
private Object[] populateFunctions(Object[] args) {
for (int i = 0; i < args.length; i++) {
Object o = args[i];
if (o instanceof FunctionCall) {
FunctionCall call = (FunctionCall) o;
String funcName = call.getFunctionName();
Class> inputType = classOf(call.getInputType());
Class> outputType = classOf(call.getOutputType());
Object[] fargs = call.getArguments();
fargs = populateFunctions(fargs);
List resolved = functionLibrary.resolveFunctions(outputType, inputType, funcName, fargs);
if (resolved.size() == 0) {
throw new RuntimeException("Unable to find a even one function for " + call);
}
args[i]=resolved.get(0).getFunctionObject();
}
}
return args;
}
private Class> classOf(String inputType) {
ValueType valueType = ValueType.valueOfClassName(inputType);
if (valueType == null) {
return null;
}
if (valueType == ValueType.OBJECT) {
try {
Class.forName(inputType);
} catch (ClassNotFoundException e) {
throw new RuntimeException("Unable to determine class for type " + inputType + ". Consider adding the full package to the name.");
}
}
return valueType.getValueClass();
}
private void removeNonLongFunctions(List funcs) {
List toRemove = new LinkedList<>();
for (ResolvedFunction func : funcs) {
if (func.getFunctionType().getInputValueType() != ValueType.LONG) {
toRemove.add(func);
}
}
if (toRemove.size() > 0 && toRemove.size() == funcs.size()) {
throw new RuntimeException("removeNonLongFunctions would remove all functions: " + funcs);
}
funcs.removeAll(toRemove);
}
private String summarize(List funcs) {
return funcs.stream()
.map(String::valueOf).collect(Collectors.joining("|"));
}
private String summarize(LinkedList> funcs) {
List> spans = new LinkedList<>();
funcs.forEach(l -> spans.add(l.stream().map(String::valueOf).collect(Collectors.toList())));
List> widths = spans.stream().map(
l -> l.stream().map(String::length).max(Integer::compare)).collect(Collectors.toList());
String summary = spans.stream().map(
l -> l.stream().map(String::valueOf).collect(Collectors.joining("|\n"))
).collect(Collectors.joining("\n\n"));
return summary;
}
/**
*
* Attempt path optimizations on each phase junction, considering the set of
* candidate inner functions with the candidate outer functions.
* This is an iterative process, that will keep trying until no apparent
* progress is made. Each higher-precedence optimization strategy is used
* iteratively as long as it makes progress and then the lower precedence
* strategies are allowed to have their turn.
*
*
*
It is considered an error if the strategies are unable to reduce each
* phase down to a single preferred function. Therefore, the lowest precedence
* strategy is the most aggressive, simply sorting the functions by basic
* type preference and then removing all but the highest selected function.
*
* @param funcs the list of candidate functions offered at each phase, in List<List> form.
* @return a List of resolved functions that has been fully optimized
*/
private List optimizePath(List> funcs, Class> type) {
List prevFuncs = null;
List nextFuncs = null;
int progress = -1;
while (progress != 0) {
progress = 0;
progress += reduceByResultType(funcs.get(funcs.size() - 1), type);
if (funcs.size() > 1) {
for (List funcList : funcs) {
nextFuncs = funcList;
if (prevFuncs != null) {
progress += reduceByDirectTypes(prevFuncs, nextFuncs);
// attempt secondary strategy IFF higher precedence strategy failed
if (progress == 0) {
progress += reduceByPreferredTypes(prevFuncs, nextFuncs);
}
} // else first pass, prime pointers
prevFuncs = nextFuncs;
}
}
}
List optimized = funcs.stream().map(l -> l.get(0)).collect(Collectors.toList());
return optimized;
}
private int reduceByResultType(List endFuncs, Class> resultType) {
int progressed = 0;
LinkedList tmpList = new LinkedList<>(endFuncs);
for (ResolvedFunction endFunc : tmpList) {
if (resultType != null && !ClassUtils.isAssignable(endFunc.getResultClass(), resultType, true)) {
endFuncs.remove(endFunc);
logger.trace("removed function '" + endFunc + "' because is not assignable to " + resultType);
progressed++;
}
}
if (endFuncs.size() == 0) {
throw new RuntimeException("No end funcs were found which are assignable to " + resultType);
}
return progressed;
}
private int reduceByPreferredTypes(List prevFuncs, List nextFuncs) {
int progressed = 0;
if (prevFuncs.size() > 1) {
progressed += prevFuncs.size() - 1;
Collections.sort(prevFuncs, ResolvedFunction.PREFERRED_TYPE_COMPARATOR);
while (prevFuncs.size() > 1) {
logger.trace("removing func " + prevFuncs.get(prevFuncs.size() - 1)
+ " because " + prevFuncs.get(0) + " has more preferred types.");
prevFuncs.remove(prevFuncs.size() - 1);
}
} else if (nextFuncs.size() > 1) {
progressed += nextFuncs.size() - 1;
Collections.sort(nextFuncs, ResolvedFunction.PREFERRED_TYPE_COMPARATOR);
while (nextFuncs.size() > 1) {
logger.trace("removing func " + nextFuncs.get(nextFuncs.size() - 1)
+ " because " + nextFuncs.get(0) + " has more preferred types.");
nextFuncs.remove(nextFuncs.size() - 1);
}
}
return progressed;
}
/**
* If there are direct type matches between the inner func and the outer func, then remove all
* other outer funcs except the ones with direct matches.
*
* @param prevFuncs The list of candidate inner functions
* @param nextFuncs The list of candidate outer functions
* @return count of items removed
*/
private int reduceByDirectTypes(List prevFuncs, List nextFuncs) {
int progressed = 0;
// Rule 1: If there are direct type matches, remove extraneous next funcs
Set> outputs = getOutputs(prevFuncs);
Set> inputs = getInputs(nextFuncs);
Sets.SetView> directMatches = Sets.intersection(inputs, outputs);
if (directMatches.size() > 0) {
List toremove = new ArrayList<>();
for (ResolvedFunction nextFunc : nextFuncs) {
if (!directMatches.contains(nextFunc.getArgType())) {
logger.debug("removing next func: " + nextFunc + " because its input types are not satisfied by an previous func");
toremove.add(nextFunc);
progressed++;
}
}
nextFuncs.removeAll(toremove);
}
return progressed;
}
private Set> getOutputs(List prevFuncs) {
Set> outputs = new HashSet<>();
for (ResolvedFunction func : prevFuncs) {
outputs.add(func.getResultClass());
}
return outputs;
}
private Set> getInputs(List nextFuncs) {
Set> inputs = new HashSet<>();
for (ResolvedFunction nextFunc : nextFuncs) {
inputs.add(nextFunc.getArgType());
}
return inputs;
}
}