io.virtdata.core.VirtDataComposer Maven / Gradle / Ivy
Show all versions of virtdata-lib-realer Show documentation
package io.virtdata.core;
import io.virtdata.api.DataMapperLibrary;
import io.virtdata.api.ValueType;
import io.virtdata.api.VirtDataFunctionLibrary;
import io.virtdata.api.composers.FunctionAssembly;
import io.virtdata.ast.FunctionCall;
import io.virtdata.ast.VirtDataFlow;
import io.virtdata.parser.VirtDataDSL;
import org.apache.commons.lang3.ClassUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.*;
import java.util.stream.Collectors;
/**
* Synopsis
* This library implements the ability to compose a lambda function from a sequence of other functions.
* The resulting lambda will use the specialized primitive function interfaces, such as LongUnaryOperator, LongFunction, etc.
* Where there are two functions which do not have matching input and output types, the most obvious conversion is made.
* This means that while you are able to compose a LongUnaryOperator with a LongUnaryOperator for maximum
* efficiency, you can also compose LongUnaryOperator with an IntFunction, and a best effort attempt will be made to
* do a reasonable conversion in between.
*
* Limitations
* Due to type erasure, it is not possible to know the generic type parameters for non-primitive functional types.
* These include IntFunction<?>, LongFunction<?>, and in the worst case, Function<?,?>.
* For these types, annotations are provided to better inform the runtime lambda compositor.
*
* Multiple Paths
* The library allows for there to be multiple functions which match the spec, possibly because multiple
* functions have the same name, but exist in different libraries or in different packages within the same library.
* This means that the composer library must find a connecting path between the functions that can match at each stage,
* disregarding all but one.
*
* Path Finding
* The rule for finding the best path among the available functions is as follows, at each pairing between
* adjacent stages of functions:
*
* - The co-compatible output and input types between the functions are mapped. Functions sharing the co-compatible
* types are kept in the list. Functions not sharing them are removed.
* - As long as functions can be removed in this way, the process iterates through the chain, starting again
* at the front of the list.
* - When no functions can be removed due to lack of co-compatible types, each stage is selected according to
* type preferences as represented in {@link ValueType}
*
* - If the next (outer) function does not have a compatible input type, move it down on the list.
* If, after this step, there are functions which do have matching signatures, all others are removed.
*
*/
public class VirtDataComposer {
private final static String PREAMBLE = "compose ";
private final static Logger logger = LoggerFactory.getLogger(DataMapperLibrary.class);
private final VirtDataFunctionLibrary functionLibrary;
public VirtDataComposer(VirtDataFunctionLibrary functionLibrary) {
this.functionLibrary = functionLibrary;
}
public VirtDataComposer() {
this.functionLibrary = VirtDataLibraries.get();
}
public Optional resolveFunctionFlow(String flowspec) {
String strictSpec = flowspec.startsWith("compose ") ? flowspec.substring(8) : flowspec;
VirtDataDSL.ParseResult parseResult = VirtDataDSL.parse(strictSpec);
if (parseResult.throwable != null) {
throw new RuntimeException(parseResult.throwable);
}
VirtDataFlow flow = parseResult.flow;
return resolveFunctionFlow(flow);
}
public Optional resolveFunctionFlow(VirtDataFlow flow) {
LinkedList> funcs = new LinkedList<>();
LinkedList>> nextFunctionInputTypes = new LinkedList<>();
Optional> finalValueTypeOption =
Optional.ofNullable(flow.getLastExpression().getCall().getOutputType())
.map(ValueType::valueOfClassName).map(ValueType::getValueClass);
nextFunctionInputTypes.add(new HashSet<>());
finalValueTypeOption.ifPresent(t -> nextFunctionInputTypes.get(0).add(t));
for (int i = flow.getExpressions().size() - 1; i >= 0; i--) {
FunctionCall call = flow.getExpressions().get(i).getCall();
List nodeFunctions = new LinkedList<>();
String funcName = call.getFunctionName();
Class> inputType = ValueType.classOfType(call.getInputType());
Class> outputType = ValueType.classOfType(call.getOutputType());
Object[] args = call.getArguments();
args = populateFunctions(args);
List resolved = functionLibrary.resolveFunctions(outputType, inputType, funcName, args);
if (resolved.size() == 0) {
throw new RuntimeException("Unable to find even one function for " + call);
}
nodeFunctions.addAll(resolved);
funcs.addFirst(nodeFunctions);
Set> inputTypes = nodeFunctions.stream().map(ResolvedFunction::getInputClass).collect(Collectors.toSet());
nextFunctionInputTypes.addFirst(inputTypes);
}
if (!nextFunctionInputTypes.peekFirst().contains(Long.TYPE)) {
throw new RuntimeException("There is no initial function which accepts a long input. Function chain, after type filtering: \n" +
summarizeBulk(funcs));
}
removeNonLongFunctions(funcs.getFirst());
List flattenedFuncs = optimizePath(funcs, ValueType.classOfType(flow.getLastExpression().getCall().getOutputType()));
if (flattenedFuncs.size() == 1) {
logger.trace("FUNCTION resolution succeeded (single): '" + flow.toString() + "'");
return Optional.of(flattenedFuncs.get(0));
}
FunctionAssembly assembly = new FunctionAssembly();
logger.trace("composed summary: " + summarize(flattenedFuncs));
boolean isThreadSafe = true;
logger.trace("FUNCTION chain selected: (multi) '" + this.summarize(flattenedFuncs) + "'");
for (ResolvedFunction resolvedFunction : flattenedFuncs) {
try {
Object functionObject = resolvedFunction.getFunctionObject();
assembly.andThen(functionObject);
if (!resolvedFunction.isThreadSafe()) {
isThreadSafe = false;
}
} catch (Exception e) {
logger.error("FUNCTION resolution failed: '" + flow.toString() + "': " + e.toString());
throw e;
}
}
logger.trace("FUNCTION resolution succeeded: (multi) '" + flow.toString() + "'");
ResolvedFunction composedFunction = assembly.getResolvedFunction(isThreadSafe);
return Optional.of(composedFunction);
}
private Object[] populateFunctions(Object[] args) {
for (int i = 0; i < args.length; i++) {
Object o = args[i];
if (o instanceof FunctionCall) {
FunctionCall call = (FunctionCall) o;
String funcName = call.getFunctionName();
Class> inputType = ValueType.classOfType(call.getInputType());
Class> outputType = ValueType.classOfType(call.getOutputType());
Object[] fargs = call.getArguments();
fargs = populateFunctions(fargs);
List resolved = functionLibrary.resolveFunctions(outputType, inputType, funcName, fargs);
if (resolved.size() == 0) {
throw new RuntimeException("Unable to find even one function for " + call);
}
args[i] = resolved.get(0).getFunctionObject();
}
}
return args;
}
private void removeNonLongFunctions(List funcs) {
List toRemove = new LinkedList<>();
for (ResolvedFunction func : funcs) {
if (func.getFunctionType().getInputValueType() != ValueType.LONG) {
toRemove.add(func);
}
}
if (toRemove.size() > 0 && toRemove.size() == funcs.size()) {
throw new RuntimeException("removeNonLongFunctions would remove all functions: " + funcs);
}
funcs.removeAll(toRemove);
}
private String summarize(List funcs) {
return funcs.stream()
.map(String::valueOf).collect(Collectors.joining("|"));
}
private String summarizeBulk(List> funcs) {
List> spans = new LinkedList<>();
funcs.forEach(l -> spans.add(l.stream().map(String::valueOf).collect(Collectors.toList())));
List> widths = spans.stream().map(
l -> l.stream().map(String::length).max(Integer::compare)).collect(Collectors.toList());
String funcsdata = spans.stream().map(
l -> l.stream().map(String::valueOf).collect(Collectors.joining("|\n"))
).collect(Collectors.joining("\n\n"));
StringBuilder sb = new StringBuilder();
sb.append("---\\\\\n").append(funcsdata).append("\n---////\n");
return sb.toString();
}
/**
*
* Attempt path optimizations on each phase junction, considering the set of
* candidate inner functions with the candidate outer functions.
* This is an iterative process, that will keep trying until no apparent
* progress is made. Each higher-precedence optimization strategy is used
* iteratively as long as it makes progress and then the lower precedence
* strategies are allowed to have their turn.
*
*
*
It is considered an error if the strategies are unable to reduce each
* phase down to a single preferred function. Therefore, the lowest precedence
* strategy is the most aggressive, simply sorting the functions by basic
* type preference and then removing all but the highest selected function.
*
* @param funcs the list of candidate functions offered at each phase, in List<List> form.
* @return a List of resolved functions that has been fully optimized
*/
private List optimizePath(List> funcs, Class> type) {
List prevFuncs = null;
List nextFuncs = null;
int progress = -1;
int pass = 0;
while (progress != 0) {
pass++;
progress = 0;
progress += reduceByRequiredResultsType(funcs.get(funcs.size() - 1), type);
if (funcs.size() > 1) {
int stage = 0;
for (List funcList : funcs) {
stage++;
nextFuncs = funcList;
if (prevFuncs != null && nextFuncs != null) {
if (progress == 0) {
progress += reduceByDirectTypes(prevFuncs, nextFuncs);
if (progress == 0) {
progress += reduceByAssignableTypes(prevFuncs, nextFuncs, false);
if (progress == 0) {
progress += reduceByAssignableTypes(prevFuncs, nextFuncs, true);
if (progress == 0) {
progress += reduceByPreferredTypes(prevFuncs, nextFuncs);
}
}
}
}
} // else first pass, prime pointers
prevFuncs = nextFuncs;
}
nextFuncs = null;
prevFuncs = null;
} else {
progress += reduceByPreferredResultTypes(funcs.get(0));
}
}
List optimized = funcs.stream().map(l -> l.get(0)).collect(Collectors.toList());
return optimized;
}
private int reduceByRequiredResultsType(List endFuncs, Class> resultType) {
int progressed = 0;
LinkedList tmpList = new LinkedList<>(endFuncs);
for (ResolvedFunction endFunc : tmpList) {
if (resultType != null && !ClassUtils.isAssignable(endFunc.getResultClass(), resultType, true)) {
endFuncs.remove(endFunc);
String logmsg = "BY-REQUIRED-RESULT-TYPE removed function '" + endFunc + "' because is not assignable to " + resultType;
logger.trace(logmsg);
progressed++;
}
}
if (endFuncs.size() == 0) {
throw new RuntimeException("BY-REQUIRED-RESULT-TYPE No end funcs were found which are assignable to " + resultType);
}
return progressed;
}
private int reduceByPreferredResultTypes(List funcs) {
int progressed = 0;
if (funcs.size() > 1) {
progressed += funcs.size() - 1;
funcs.sort(ResolvedFunction.PREFERRED_TYPE_COMPARATOR);
while (funcs.size() > 1) {
logger.trace("BY-SINGLE-PREFERRED-TYPE removing func " + funcs.get(funcs.size() - 1)
+ " because " + funcs.get(0) + " has more preferred types.");
funcs.remove(funcs.size() - 1);
}
}
return progressed;
}
private int reduceByPreferredTypes(List prevFuncs, List nextFuncs) {
int progressed = 0;
if (prevFuncs.size() > 1) {
progressed += prevFuncs.size() - 1;
prevFuncs.sort(ResolvedFunction.PREFERRED_TYPE_COMPARATOR);
while (prevFuncs.size() > 1) {
String logmsg = "BY-PREV-PREFERRED-TYPE removing func " + prevFuncs.get(prevFuncs.size() - 1)
+ " because " + prevFuncs.get(0) + " has more preferred types.";
logger.trace(logmsg);
prevFuncs.remove(prevFuncs.size() - 1);
}
} else if (nextFuncs.size() > 1) {
progressed += nextFuncs.size() - 1;
nextFuncs.sort(ResolvedFunction.PREFERRED_TYPE_COMPARATOR);
while (nextFuncs.size() > 1) {
String logmsg = "BY-NEXT-PREFERRED-TYPE removing func " + nextFuncs.get(nextFuncs.size() - 1)
+ " because " + nextFuncs.get(0) + " has more preferred types.";
logger.trace(logmsg);
nextFuncs.remove(nextFuncs.size() - 1);
}
}
return progressed;
}
/**
* If there are direct type matches between the inner func and the outer func, then remove all
* other outer funcs except the ones with direct matches.
*
* @param prevFuncs The list of candidate inner functions
* @param nextFuncs The list of candidate outer functions
* @return count of items removed
*/
private int reduceByDirectTypes(List prevFuncs, List nextFuncs) {
int progressed = 0;
// Rule 1: If there are direct type matches, remove extraneous next funcs
Set> outputs = getOutputs(prevFuncs);
Set> inputs = getInputs(nextFuncs);
Set> directMatches =
inputs.stream().filter(outputs::contains).collect(Collectors.toCollection(HashSet::new));
if (directMatches.size() > 0) {
List toremove = new ArrayList<>();
for (ResolvedFunction nextFunc : nextFuncs) {
if (!directMatches.contains(nextFunc.getArgType())) {
String logmsg = "BY-DIRECT-TYPE removing next func: " + nextFunc + " because its input types are not satisfied by any previous func";
logger.trace(logmsg);
toremove.add(nextFunc);
progressed++;
}
}
nextFuncs.removeAll(toremove);
}
return progressed;
}
/**
* Remove any functions in the second set which do not have an input type which is assignable
* from any of the output types of the functions in the first set.
*
* @param prevFuncs the functions that come before the nextFuncs
* @param nextFuncs the functions that come after prevFuncs
* @return the number of next funcs that have been removed
*/
private int reduceByAssignableTypes(List prevFuncs, List nextFuncs, boolean autoboxing) {
// Rule 1: If there are direct type matches, remove extraneous next funcs
Set> outputs = getOutputs(prevFuncs);
Set> inputs = getInputs(nextFuncs);
Set> compatibleInputs = new HashSet<>();
for (Class> input : inputs) {
for (Class> output : outputs) {
if (ClassUtils.isAssignable(output, input, autoboxing)) {
compatibleInputs.add(input);
}
}
}
List toremove = new ArrayList<>();
for (ResolvedFunction nextfunc : nextFuncs) {
if (!compatibleInputs.contains(nextfunc.getInputClass())) {
toremove.add(nextfunc);
}
}
if (toremove.size() == nextFuncs.size()) {
String logmsg = "BY-ASSIGNABLE-TYPE Not removing remaining " + nextFuncs.size() + " next funcs " + (autoboxing ? "with autoboxing " : "") + "because no functions would be left.";
logger.trace(logmsg);
return 0;
} else {
toremove.forEach(nextfunc -> {
String logmsg = "BY-ASSIGNABLE-TYPE removing next func: " + nextfunc + " because its input types are not assignable from any of the previous funcs";
logger.trace(logmsg);
}
);
nextFuncs.removeAll(toremove);
return toremove.size();
}
}
private Set> getOutputs(List prevFuncs) {
Set> outputs = new HashSet<>();
for (ResolvedFunction func : prevFuncs) {
outputs.add(func.getResultClass());
}
return outputs;
}
private Set> getInputs(List nextFuncs) {
Set> inputs = new HashSet<>();
for (ResolvedFunction nextFunc : nextFuncs) {
inputs.add(nextFunc.getArgType());
}
return inputs;
}
}