org.aksw.jenax.path.relgen.RelationGeneratorBase Maven / Gradle / Ivy
The newest version!
package org.aksw.jenax.path.relgen;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.stream.Collectors;
import org.aksw.jenax.arq.util.syntax.ElementUtils;
import org.aksw.jenax.path.core.PathOpsPE;
import org.aksw.jenax.path.core.PathPE;
import org.aksw.jenax.sparql.fragment.api.Fragment;
import org.aksw.jenax.sparql.fragment.api.Fragment1;
import org.aksw.jenax.sparql.fragment.impl.Concept;
import org.aksw.jenax.sparql.fragment.impl.UnaryXExpr;
import org.apache.jena.graph.Node;
import org.apache.jena.sparql.core.Var;
import org.apache.jena.sparql.expr.Expr;
import org.apache.jena.sparql.syntax.Element;
import com.google.common.hash.HashCode;
import com.google.common.hash.Hashing;
/** */
public abstract class RelationGeneratorBase
// implements Trav1Provider
{
/** Relations that have been traversed by the path -
* does not include the current relation */
protected List pastRelations = new ArrayList<>();
/** The current relation */
protected Fragment relation;
// A hash updated upon requesting a new relation; hash is based on the seen path segments
// Note the hash is really per relation and not per column.
// The column names are appended to the hash
protected HashCode contextHash;
/** Cached String version of the hash */
protected String contextHashStr;
/** Conditions imposed the current relation based on the seen path segments */
protected List conditions = new ArrayList<>();
/** The absolute path at which the relation was requested
* Once it covers all columns of the relation it is used to compute the
* next context hash
*/
protected PathPE relationStartAbsPath;
/**
* The relative path of segments seen for the current relation
* Connects to relationStartPath
*/
protected PathPE relPath;
/** The path segments seen for the current relation. */
// protected List segments = new ArrayList<>();
int columnIdx = 0;
/**
* Yield the next relation to traverse
*
* @param path
* @param index
* @return
*/
protected abstract Fragment nextInstance();
public RelationGeneratorBase() {
super();
reset();
}
public Fragment process(PathPE path) {
if (path.isAbsolute()) {
reset();
}
ensureInit();
Fragment result = relation;
for (UnaryXExpr segment : path.getSegments()) {
result = process(segment);
}
return result;
}
protected void reset() {
setHashCode(null);
pastRelations.clear();
relation = null;
columnIdx = 0;
relationStartAbsPath = PathOpsPE.get().newAbsolutePath();
relPath = PathOpsPE.newRelativePath();
updateHash();
}
public void ensureInit() {
if (relation == null || columnIdx >= relation.getVars().size()) {
String oldHash = contextHashStr;
updateHash();
Var pastLastVar = null;
if (relation != null) {
Fragment pastItem = relation.filter(conditions);
pastLastVar = pastItem.getVars().get(pastItem.getVars().size() - 1);
pastRelations.add(pastItem);
}
relation = nextInstance();
relationStartAbsPath = relationStartAbsPath.resolve(relPath);
relPath = PathOpsPE.newRelativePath();
List vars = relation.getVars();
if (vars.size() <= 1) {
throw new RuntimeException("Relations must have at least 2 variables");
}
conditions.clear();
// Rename variables w.r.t the hashes:
// The first var receives the prior hash (in order to join with the prior relation)
// all other vars receive the new hash
Var firstVar = vars.get(0);
if (pastLastVar == null) {
pastLastVar = Var.alloc(contextHashStr + "_" + firstVar.getName());
}
Var plv = pastLastVar;
Map remap = relation.getVarsMentioned().stream()
.collect(Collectors.toMap(
v -> v,
node -> {
Node r;
if (node.isVariable()) {
if (node.equals(firstVar)) {
r = plv;
} else {
// String prefix = node.equals(firstVar)
// ? oldHash
// : contextHashStr;
r = Var.alloc(contextHashStr + "_" + node.getName());
}
} else {
r = node;
}
return r;
}));
relation = relation.applyNodeTransform(v -> remap.getOrDefault(v, v));
// If we joined the last column of the previous relation with the first
// column of the next one, then jump over that first column
columnIdx = pastRelations.isEmpty() ? 0 : 1;
}
}
public Fragment process(UnaryXExpr segment) {
ensureInit();
relPath = relPath.resolve(segment);
List vars = relation.getVars();
Var v = vars.get(columnIdx);
++columnIdx;
if (!segment.isAlwaysTrue()) {
// Substitute the only variable in the expression with that of the relation instance
Expr expr = segment.getExpr().applyNodeTransform(x -> x.isVariable() ? v : x);
conditions.add(expr);
}
Fragment r = relation.filter(conditions);
ensureInit();
return r;
}
public Var getCurrentVar() {
return relation.getVars().get(columnIdx);
}
public List getPastRelations() {
return pastRelations;
}
public Fragment1 getCurrentConcept() {
return new Concept(assemble(), getCurrentVar());
}
/** Assemble the complete element */
public Element assemble() {
List elts = pastRelations.stream()
.flatMap(r -> r.getElements().stream())
.collect(Collectors.toList());
if (relation != null) {
elts.add(relation.filter(conditions).getElement());
}
Element elt = ElementUtils.groupIfNeeded(elts);
return elt;
}
protected void updateHash() {
HashCode nextHashCode = computeNextHash(contextHash, relationStartAbsPath, relPath);
setHashCode(nextHashCode);
}
protected void setHashCode(HashCode hashCode) {
contextHash = hashCode;
contextHashStr = hashCode == null ? null : encodeHashCode(hashCode);
}
protected String encodeHashCode(HashCode hashCode) {
return hashCode.toString(); // BaseEncoding.base64Url().encode(contextHash.asBytes());
}
protected HashCode computeNextHash(HashCode currentHash, PathPE relationStartAbsPath, PathPE relPath) {
HashCode contrib = Hashing.murmur3_32().hashString(relPath.toString(), StandardCharsets.UTF_8);
HashCode result = currentHash == null ? contrib : Hashing.combineOrdered(Arrays.asList(currentHash, contrib));
return result;
}
}