Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
apoc.graph.document.builder.DocumentToGraph Maven / Gradle / Ivy
package apoc.graph.document.builder;
import apoc.graph.util.GraphsConfig;
import apoc.result.VirtualGraph;
import apoc.result.VirtualNode;
import apoc.util.FixedSizeStringWriter;
import apoc.util.JsonUtil;
import apoc.util.Util;
import org.apache.commons.lang3.StringUtils;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;
import java.io.IOException;
import java.lang.reflect.Array;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public class DocumentToGraph {
private static final String JSON_ROOT = "$";
private final DocumentToNodes documentToNodes;
private RelationshipBuilder documentRelationBuilder;
private LabelBuilder documentLabelBuilder;
private GraphsConfig config;
public DocumentToGraph(Transaction tx, GraphsConfig config) {
this(tx, config, new HashSet<>());
}
public DocumentToGraph(Transaction tx, GraphsConfig config, Set initialNodes) {
this.documentRelationBuilder = new RelationshipBuilder(config);
this.documentLabelBuilder = new LabelBuilder(config);
this.config = config;
this.documentToNodes = new DocumentToNodes(initialNodes, tx);
}
private boolean hasId(Map map, String path) {
List ids = config.idsForPath(path);
if (ids.isEmpty()) {
return map.containsKey(config.getIdField());
} else {
return map.keySet().containsAll(ids);
}
}
private boolean hasLabel(Map map, String path) {
return !config.labelsForPath(path).isEmpty() || map.containsKey(config.getLabelField());
}
public Map, List> validate(Map map, String path) {
return flatMapFieldsWithPath(map, path)
.entrySet()
.stream()
.flatMap(elem -> elem.getValue().stream().map(data -> new AbstractMap.SimpleEntry<>(elem.getKey(), data)))
.map(elem -> {
String subPath = elem.getKey();
List valueObjects = config.valueObjectForPath(subPath);
List msgs = new ArrayList<>();
Map value = elem.getValue();
if (valueObjects.isEmpty()) {
if (!hasId(value, subPath)) {
msgs.add("`" + config.getIdField() + "` as id-field name");
}
if (!hasLabel(value, subPath)) {
msgs.add("`" + config.getLabelField() + "` as label-field name");
}
}
return new AbstractMap.SimpleEntry<>(value, msgs);
})
.filter(elem -> !elem.getValue().isEmpty())
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
}
public String formatDocument(Map map) {
try (FixedSizeStringWriter writer = new FixedSizeStringWriter(100)) {
JsonUtil.OBJECT_MAPPER.writeValue(writer, map);
return writer.toString().concat(writer.isExceeded() ? "...}" : "");
} catch (IOException e) {
throw new RuntimeException(e);
}
}
private void fromDocument(Map document, Node source, String type,
Map, Set> nodes, Set relationships,
String propertyName) {
String path = propertyName == null ? JSON_ROOT : propertyName;
// clean the object form unwanted properties
if (!config.allPropertiesForPath(path)) {
document.keySet().retainAll(config.propertiesForPath(path));
}
boolean isRootNode = source == null;
prepareData(document, path);
// validate
if (!config.isSkipValidation()) {
Map, List> errors = validate(document, path);
if (!errors.isEmpty()) {
throwError(errors);
}
}
Label[] labels = this.documentLabelBuilder.buildLabel(document, path);
Map idValues = filterNodeIdProperties(document, path);
// retrieve the current node
final Node node;
if (this.config.isWrite()) {
node = documentToNodes.getOrCreateRealNode(labels, idValues);
} else {
node = documentToNodes.getOrCreateVirtualNode(nodes, labels, idValues);
}
// write node properties
document.entrySet().stream()
.filter(e -> isSimpleType(e, path))
.flatMap(e -> {
if (e.getValue() instanceof Map) {
return Util.flattenMap((Map) e.getValue(), e.getKey()).entrySet().stream();
} else {
return Stream.of(e);
}
})
.forEach(e -> {
Object value = e.getValue();
if (value instanceof List) {
List list = (List) value;
if (!list.isEmpty()) {
value = Array.newInstance(list.get(0).getClass(), list.size());
for (int i = 0; i < list.size(); i++) {
Array.set(value, i, list.get(i));
}
}
}
node.setProperty(e.getKey(), value);
});
// get child nodes
document.entrySet().stream()
.filter(e -> !isSimpleType(e, path))
.forEach(e -> {
String newPath = path + "." + e.getKey();
if (e.getValue() instanceof Map) { // if value is a complex object (map)
Map inner = (Map) e.getValue();
fromDocument(inner, node, e.getKey(), nodes, relationships, newPath);
} else {
List list = (List) e.getValue(); // if value is and array
list.forEach(map -> fromDocument(map, node, e.getKey(), nodes, relationships, newPath));
}
});
Set nodesWithSameIds = getNodesWithSameLabels(nodes, labels);
nodesWithSameIds.add(node);
if (!isRootNode) {
relationships.addAll(documentRelationBuilder.buildRelation(source, node, type));
}
}
private void throwError(Map, List> errors) {
String error = formatError(errors);
throw new RuntimeException(error);
}
private String formatError(Map, List> errors) {
return errors.entrySet().stream()
.map(e -> "The object `" + formatDocument(e.getKey()) + "` must have " + String.join(" and ", e.getValue()))
.collect(Collectors.joining(StringUtils.LF));
}
public void prepareData(Map document, String path) {
if (config.isGenerateId()) {
List ids = config.idsForPath(path);
String idField;
if (ids.isEmpty()) {
idField = config.getIdField();
} else {
idField = ids.get(0);
}
document.computeIfAbsent(idField, key -> UUID.randomUUID().toString());
}
}
private Map filterNodeIdProperties(Map document, String path) {
List ids = config.idsForPath(path);
Map idMap = new HashMap<>(document);
if(ids.isEmpty()) {
idMap.keySet().retainAll(Collections.singleton(config.getIdField()));
} else {
idMap.keySet().retainAll(ids);
}
return idMap;
}
public static Set getNodesWithSameLabels(Map, Set> nodes, Label[] labels) {
Set set = Stream.of(labels).map(Label::name).collect(Collectors.toSet());
return nodes.computeIfAbsent(set, (k) -> new LinkedHashSet<>());
}
private boolean isSimpleType(Map.Entry e, String path) {
List valueObjects = config.valueObjectForPath(path);
if (e.getValue() instanceof Map) {
return valueObjects.contains(e.getKey());
}
if (e.getValue() instanceof List) {
List list = (List) e.getValue();
if (!list.isEmpty()) {
Object object = list.get(0); // assumption: homogeneous array
if (object instanceof Map) { // if is an array of complex type
return false; // TODO add support for array of value objects
}
}
}
return true;
}
private List> getDocumentCollection(Object document) {
List> coll;
if (document instanceof String) {
document = JsonUtil.parse((String) document, null, Object.class);
}
if (document instanceof List) {
coll = (List) document;
} else {
coll = Arrays.asList((Map) document);
}
return coll;
}
public VirtualGraph create(Object documentObj) {
List> coll = getDocumentCollection(documentObj);
return getVirtualGraph(coll);
}
private VirtualGraph getVirtualGraph(List> coll) {
Map, Set> nodes = new LinkedHashMap<>();
Set relationships = new LinkedHashSet<>();
coll.forEach(map -> fromDocument(map, null, null, nodes, relationships, JSON_ROOT));
return new VirtualGraph("Graph", nodes.values().stream().flatMap(Set::stream).collect(Collectors.toCollection(LinkedHashSet::new)), relationships, Collections.emptyMap());
}
public Map> findDuplicates(Object doc) {
// duplicate validation
// the check on duplicates must be provided on raw data without apply the default label or auto generate the id
AtomicLong index = new AtomicLong(-1);
return getDocumentCollection(doc).stream()
.flatMap(e -> {
long lineDup = index.incrementAndGet();
return flatMapFields(e)
.map(ee -> new AbstractMap.SimpleEntry(ee, lineDup));
})
.collect(Collectors.groupingBy(Map.Entry::getKey,
Collectors.mapping(Map.Entry::getValue, Collectors.toList())))
.entrySet()
.stream()
.filter(e -> e.getValue().size() > 1)
.map(e -> {
long line = e.getValue().get(0);
String elem = formatDocument(e.getKey());
String dupLines = e.getValue().subList(1, e.getValue().size())
.stream()
.map(ee -> String.valueOf(ee))
.collect(Collectors.joining(","));
return new AbstractMap.SimpleEntry<>(line,
String.format("The object `%s` has duplicate at lines [%s]", elem, dupLines));
})
.collect(Collectors.groupingBy(Map.Entry::getKey,
Collectors.mapping(Map.Entry::getValue, Collectors.toList())));
}
private Stream> flatMapFields(Map map) {
Stream> stream = Stream.of(map);
return Stream.concat(stream, map.values()
.stream()
.filter(e -> e instanceof Map)
.flatMap(e -> flatMapFields((Map) e)));
}
private Map>> flatMapFieldsWithPath(Map map, String path) {
Map>> flatWithPath = new HashMap<>();
String newPath = path == null ? JSON_ROOT : path;
flatWithPath.computeIfAbsent(newPath, e -> new ArrayList<>()).add(map);
Map>> collect = map.entrySet()
.stream()
.filter(e -> !isSimpleType(e, path))
.flatMap(e -> {
String subPath = newPath + "." + e.getKey();
if (e.getValue() instanceof Map) {
return flatMapFieldsWithPath((Map) e.getValue(), subPath).entrySet().stream();
} else {
List> list = (List>) e.getValue();
return list.stream().flatMap(le -> flatMapFieldsWithPath(le, subPath).entrySet().stream());
}
})
.flatMap(e -> e.getValue().stream().map(ee -> new AbstractMap.SimpleEntry<>(e.getKey(), ee)))
.collect(Collectors.groupingBy(e -> e.getKey(),
Collectors.mapping(e -> e.getValue(), Collectors.toList())));
flatWithPath.putAll(collect);
return flatWithPath;
}
public Map validate(Object doc) {
AtomicLong index = new AtomicLong(-1);
return getDocumentCollection(doc).stream()
.map(elem -> {
long line = index.incrementAndGet();
prepareData(elem, JSON_ROOT);
// id, label validation
Map, List> errors = validate(elem, JSON_ROOT);
if (errors.isEmpty()) {
return null;
} else {
return new AbstractMap.SimpleEntry<>(line, formatError(errors));
}
})
.filter(e -> e != null)
.collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
}
public Map> validateDocument(Object document) {
Map> dups = findDuplicates(document);
Map invalids = validate(document);
for (Map.Entry invalid : invalids.entrySet()) {
dups.computeIfAbsent(invalid.getKey(), (key) -> new ArrayList<>()).add(invalid.getValue());
}
return dups;
}
public static class DocumentToNodes {
private final Map, Set> initialNodes;
private final Transaction tx;
public DocumentToNodes(Set initialNodes, Transaction tx) {
this.tx = tx;
this.initialNodes = new HashMap<>();
for (Node initialNode : initialNodes) {
Set labels = StreamSupport.stream(initialNode.getLabels().spliterator(), false).map(Label::name).collect(Collectors.toSet());
if(this.initialNodes.containsKey(labels)) {
this.initialNodes.get(labels).add(initialNode);
} else {
this.initialNodes.put(labels, new HashSet<>(Arrays.asList(initialNode)));
}
}
}
public Node getOrCreateRealNode(Label[] labels, Map idValues) {
return Stream.of(labels)
.map(label -> tx.findNodes(label, idValues))
.filter(it -> it.hasNext())
.map(it -> it.next())
.findFirst()
.orElseGet(() -> tx.createNode(labels));
}
public Node getOrCreateVirtualNode(Map, Set> nodes, Label[] labels, Map idValues) {
Set nodesWithSameIds = getNodesWithSameLabels(nodes, labels);
Set initialNodesWithSameIds = getNodesWithSameLabels(this.initialNodes, labels);
HashSet searchableNodes = new HashSet<>(nodesWithSameIds);
searchableNodes.addAll(initialNodesWithSameIds);
return searchableNodes
.stream()
.filter(n -> {
if (Stream.of(labels).anyMatch(label -> n.hasLabel(label))) {
Map ids = filterNodeIdProperties(n, idValues);
return idValues.equals(ids);
}
return StreamSupport.stream(n.getRelationships().spliterator(), false)
.anyMatch(r -> {
Node otherNode = r.getOtherNode(n);
Map ids = filterNodeIdProperties(otherNode, idValues);
return Stream.of(labels).anyMatch(label -> otherNode.hasLabel(label)) && idValues.equals(ids);
});
})
.findFirst()
.orElseGet(() -> new VirtualNode(labels, Collections.emptyMap()));
}
private Map filterNodeIdProperties(Node n, Map idMap) {
return n.getProperties(idMap.keySet().toArray(new String[idMap.keySet().size()]));
}
private Set getNodesWithSameLabels(Map, Set> nodes, Label[] labels) {
Set set = Stream.of(labels).map(Label::name).collect(Collectors.toSet());
return nodes.computeIfAbsent(set, (k) -> new LinkedHashSet<>());
}
}
}