All Downloads are FREE. Search and download functionalities are using the official Maven repository.

apoc.graph.document.builder.DocumentToGraph Maven / Gradle / Ivy

There is a newer version: 5.25.1
Show newest version
package apoc.graph.document.builder;

import apoc.graph.util.GraphsConfig;
import apoc.result.VirtualGraph;
import apoc.result.VirtualNode;
import apoc.util.FixedSizeStringWriter;
import apoc.util.JsonUtil;
import apoc.util.Util;
import org.apache.commons.lang3.StringUtils;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.Transaction;

import java.io.IOException;
import java.lang.reflect.Array;
import java.util.*;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;


public class DocumentToGraph {

    private static final String JSON_ROOT = "$";
    private final DocumentToNodes documentToNodes;

    private RelationshipBuilder documentRelationBuilder;
    private LabelBuilder documentLabelBuilder;
    private GraphsConfig config;

    public DocumentToGraph(Transaction tx, GraphsConfig config) {
       this(tx, config, new HashSet<>());
    }

    public DocumentToGraph(Transaction tx, GraphsConfig config, Set initialNodes) {
        this.documentRelationBuilder = new RelationshipBuilder(config);
        this.documentLabelBuilder = new LabelBuilder(config);
        this.config = config;
        this.documentToNodes = new DocumentToNodes(initialNodes, tx);
    }

    private boolean hasId(Map map, String path) {
        List ids = config.idsForPath(path);
        if (ids.isEmpty()) {
            return map.containsKey(config.getIdField());
        } else {
            return map.keySet().containsAll(ids);
        }
    }

    private boolean hasLabel(Map map, String path) {
        return !config.labelsForPath(path).isEmpty() || map.containsKey(config.getLabelField());
    }

    public Map, List> validate(Map map, String path) {
        return flatMapFieldsWithPath(map, path)
                .entrySet()
                .stream()
                .flatMap(elem -> elem.getValue().stream().map(data -> new AbstractMap.SimpleEntry<>(elem.getKey(), data)))
                .map(elem -> {
                    String subPath = elem.getKey();
                    List valueObjects = config.valueObjectForPath(subPath);
                    List msgs = new ArrayList<>();
                    Map value = elem.getValue();
                    if (valueObjects.isEmpty()) {
                        if (!hasId(value, subPath)) {
                            msgs.add("`" + config.getIdField() + "` as id-field name");
                        }
                        if (!hasLabel(value, subPath)) {
                            msgs.add("`" + config.getLabelField() + "` as label-field name");
                        }
                    }
                    return new AbstractMap.SimpleEntry<>(value, msgs);
                })
                .filter(elem -> !elem.getValue().isEmpty())
                .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
    }

    public String formatDocument(Map map) {
        try (FixedSizeStringWriter writer = new FixedSizeStringWriter(100)) {
            JsonUtil.OBJECT_MAPPER.writeValue(writer, map);
            return writer.toString().concat(writer.isExceeded() ? "...}" : "");
        } catch (IOException e) {
            throw new RuntimeException(e);
        }
    }

    private void fromDocument(Map document, Node source, String type,
                              Map, Set> nodes, Set relationships,
                              String propertyName) {
        String path = propertyName == null ? JSON_ROOT : propertyName;

        // clean the object form unwanted properties
        if (!config.allPropertiesForPath(path)) {
            document.keySet().retainAll(config.propertiesForPath(path));
        }

        boolean isRootNode = source == null;
        prepareData(document, path);

        // validate
        if (!config.isSkipValidation()) {
            Map, List> errors = validate(document, path);
            if (!errors.isEmpty()) {
                throwError(errors);
            }
        }

        Label[] labels = this.documentLabelBuilder.buildLabel(document, path);
        Map idValues = filterNodeIdProperties(document, path);

        // retrieve the current node
        final Node node;
        if (this.config.isWrite()) {
            node = documentToNodes.getOrCreateRealNode(labels, idValues);
        } else {
            node = documentToNodes.getOrCreateVirtualNode(nodes, labels, idValues);
        }

        // write node properties
        document.entrySet().stream()
                .filter(e -> isSimpleType(e, path))
                .flatMap(e -> {
                    if (e.getValue() instanceof Map) {
                        return Util.flattenMap((Map) e.getValue(), e.getKey()).entrySet().stream();
                    } else {
                        return Stream.of(e);
                    }
                })
                .forEach(e -> {
                    Object value = e.getValue();
                    if (value instanceof List) {
                        List list = (List) value;
                        if (!list.isEmpty()) {
                            value = Array.newInstance(list.get(0).getClass(), list.size());
                            for (int i = 0; i < list.size(); i++) {
                                Array.set(value, i, list.get(i));
                            }
                        }
                    }
                    node.setProperty(e.getKey(), value);
                });

        // get child nodes
        document.entrySet().stream()
                .filter(e -> !isSimpleType(e, path))
                .forEach(e -> {
                    String newPath = path + "."  + e.getKey();
                    if (e.getValue() instanceof Map) { // if value is a complex object (map)
                        Map inner = (Map) e.getValue();
                        fromDocument(inner, node, e.getKey(), nodes, relationships, newPath);
                    } else {
                        List list = (List) e.getValue(); // if value is and array
                        list.forEach(map -> fromDocument(map, node, e.getKey(), nodes, relationships, newPath));
                    }
                });

        Set nodesWithSameIds = getNodesWithSameLabels(nodes, labels);
        nodesWithSameIds.add(node);

        if (!isRootNode) {
            relationships.addAll(documentRelationBuilder.buildRelation(source, node, type));
        }

    }

    private void throwError(Map, List> errors) {
        String error = formatError(errors);
        throw new RuntimeException(error);
    }

    private String formatError(Map, List> errors) {
        return errors.entrySet().stream()
                .map(e -> "The object `" + formatDocument(e.getKey()) + "` must have " + String.join(" and ", e.getValue()))
                .collect(Collectors.joining(StringUtils.LF));
    }

    public void prepareData(Map document, String path) {
        if (config.isGenerateId()) {
            List ids = config.idsForPath(path);
            String idField;
            if (ids.isEmpty()) {
                idField = config.getIdField();
            } else {
                idField = ids.get(0);
            }
            document.computeIfAbsent(idField, key -> UUID.randomUUID().toString());
        }
    }

    private Map filterNodeIdProperties(Map document, String path) {
        List ids = config.idsForPath(path);
        Map idMap = new HashMap<>(document);
        if(ids.isEmpty()) {
            idMap.keySet().retainAll(Collections.singleton(config.getIdField()));
        } else {
            idMap.keySet().retainAll(ids);
        }
        return idMap;
    }

    public static Set getNodesWithSameLabels(Map, Set> nodes, Label[] labels) {
        Set set = Stream.of(labels).map(Label::name).collect(Collectors.toSet());
        return nodes.computeIfAbsent(set, (k) -> new LinkedHashSet<>());
    }


    private boolean isSimpleType(Map.Entry e, String path) {
        List valueObjects = config.valueObjectForPath(path);
        if (e.getValue() instanceof Map) {
            return valueObjects.contains(e.getKey());
        }
        if (e.getValue() instanceof List) {
            List list = (List) e.getValue();
            if (!list.isEmpty()) {
                Object object = list.get(0); // assumption: homogeneous array
                if (object instanceof Map) { // if is an array of complex type
                    return false; // TODO add support for array of value objects
                }
            }
        }
        return true;
    }

    private List> getDocumentCollection(Object document) {
        List> coll;
        if (document instanceof String) {
            document  = JsonUtil.parse((String) document, null, Object.class);
        }
        if (document instanceof List) {
            coll = (List) document;
        } else {
            coll = Arrays.asList((Map) document);
        }
        return coll;
    }

    public VirtualGraph create(Object documentObj) {
        List> coll = getDocumentCollection(documentObj);
        return getVirtualGraph(coll);
    }

    private VirtualGraph getVirtualGraph(List> coll) {
        Map, Set> nodes = new LinkedHashMap<>();
        Set relationships = new LinkedHashSet<>();
        coll.forEach(map -> fromDocument(map, null, null, nodes, relationships, JSON_ROOT));
        return new VirtualGraph("Graph", nodes.values().stream().flatMap(Set::stream).collect(Collectors.toCollection(LinkedHashSet::new)), relationships, Collections.emptyMap());
    }

    public Map> findDuplicates(Object doc) {
        // duplicate validation
        // the check on duplicates must be provided on raw data without apply the default label or auto generate the id
        AtomicLong index = new AtomicLong(-1);
        return getDocumentCollection(doc).stream()
                .flatMap(e -> {
                    long lineDup = index.incrementAndGet();
                    return flatMapFields(e)
                            .map(ee -> new AbstractMap.SimpleEntry(ee, lineDup));
                })
                .collect(Collectors.groupingBy(Map.Entry::getKey,
                        Collectors.mapping(Map.Entry::getValue, Collectors.toList())))
                .entrySet()
                .stream()
                .filter(e -> e.getValue().size() > 1)
                .map(e -> {
                    long line = e.getValue().get(0);
                    String elem = formatDocument(e.getKey());
                    String dupLines = e.getValue().subList(1, e.getValue().size())
                            .stream()
                            .map(ee -> String.valueOf(ee))
                            .collect(Collectors.joining(","));
                    return new AbstractMap.SimpleEntry<>(line,
                            String.format("The object `%s` has duplicate at lines [%s]", elem, dupLines));
                })
                .collect(Collectors.groupingBy(Map.Entry::getKey,
                        Collectors.mapping(Map.Entry::getValue, Collectors.toList())));
    }

    private Stream> flatMapFields(Map map) {
        Stream> stream = Stream.of(map);
        return Stream.concat(stream, map.values()
                .stream()
                .filter(e -> e instanceof Map)
                .flatMap(e -> flatMapFields((Map) e)));
    }

    private Map>> flatMapFieldsWithPath(Map map, String path) {
        Map>> flatWithPath = new HashMap<>();
        String newPath = path == null ? JSON_ROOT : path;
        flatWithPath.computeIfAbsent(newPath, e -> new ArrayList<>()).add(map);
        Map>> collect = map.entrySet()
                .stream()
                .filter(e -> !isSimpleType(e, path))
                .flatMap(e -> {
                    String subPath = newPath + "." + e.getKey();
                    if (e.getValue() instanceof Map) {
                        return flatMapFieldsWithPath((Map) e.getValue(), subPath).entrySet().stream();
                    } else {
                        List> list = (List>) e.getValue();
                        return list.stream().flatMap(le -> flatMapFieldsWithPath(le, subPath).entrySet().stream());
                    }
                })
                .flatMap(e -> e.getValue().stream().map(ee -> new AbstractMap.SimpleEntry<>(e.getKey(), ee)))
                .collect(Collectors.groupingBy(e -> e.getKey(),
                        Collectors.mapping(e -> e.getValue(), Collectors.toList())));
        flatWithPath.putAll(collect);

        return flatWithPath;
    }

    public Map validate(Object doc) {
        AtomicLong index = new AtomicLong(-1);
        return getDocumentCollection(doc).stream()
                .map(elem -> {
                    long line = index.incrementAndGet();
                    prepareData(elem, JSON_ROOT);
                    // id, label validation
                    Map, List> errors = validate(elem, JSON_ROOT);
                    if (errors.isEmpty()) {
                        return null;
                    } else {
                        return new AbstractMap.SimpleEntry<>(line, formatError(errors));
                    }
                })
                .filter(e -> e != null)
                .collect(Collectors.toMap(e -> e.getKey(), e -> e.getValue()));
    }

    public Map> validateDocument(Object document) {
        Map> dups = findDuplicates(document);
        Map invalids = validate(document);

        for (Map.Entry invalid : invalids.entrySet()) {
            dups.computeIfAbsent(invalid.getKey(), (key) -> new ArrayList<>()).add(invalid.getValue());
        }
        return dups;
    }

    public static class DocumentToNodes {
        private final Map, Set> initialNodes;
        private final Transaction tx;

        public DocumentToNodes(Set initialNodes, Transaction tx) {
            this.tx = tx;
            this.initialNodes = new HashMap<>();
            for (Node initialNode : initialNodes) {
                Set labels = StreamSupport.stream(initialNode.getLabels().spliterator(), false).map(Label::name).collect(Collectors.toSet());
                if(this.initialNodes.containsKey(labels)) {
                    this.initialNodes.get(labels).add(initialNode);
                } else {
                    this.initialNodes.put(labels, new HashSet<>(Arrays.asList(initialNode)));
                }
            }
        }

        public Node getOrCreateRealNode(Label[] labels, Map idValues) {
            return Stream.of(labels)
                    .map(label -> tx.findNodes(label, idValues))
                    .filter(it -> it.hasNext())
                    .map(it -> it.next())
                    .findFirst()
                    .orElseGet(() -> tx.createNode(labels));
        }

        public Node getOrCreateVirtualNode(Map, Set> nodes, Label[] labels, Map idValues) {
            Set nodesWithSameIds = getNodesWithSameLabels(nodes, labels);
            Set initialNodesWithSameIds = getNodesWithSameLabels(this.initialNodes, labels);

            HashSet searchableNodes = new HashSet<>(nodesWithSameIds);
            searchableNodes.addAll(initialNodesWithSameIds);

            return searchableNodes
                    .stream()
                    .filter(n -> {
                        if (Stream.of(labels).anyMatch(label -> n.hasLabel(label))) {
                            Map ids = filterNodeIdProperties(n, idValues);
                            return idValues.equals(ids);
                        }
                        return StreamSupport.stream(n.getRelationships().spliterator(), false)
                                .anyMatch(r -> {
                                    Node otherNode = r.getOtherNode(n);
                                    Map ids = filterNodeIdProperties(otherNode, idValues);
                                    return Stream.of(labels).anyMatch(label -> otherNode.hasLabel(label)) && idValues.equals(ids);
                                });
                    })
                    .findFirst()
                    .orElseGet(() -> new VirtualNode(labels, Collections.emptyMap()));
        }

        private Map filterNodeIdProperties(Node n, Map idMap) {
            return n.getProperties(idMap.keySet().toArray(new String[idMap.keySet().size()]));
        }

        private Set getNodesWithSameLabels(Map, Set> nodes, Label[] labels) {
            Set set = Stream.of(labels).map(Label::name).collect(Collectors.toSet());
            return nodes.computeIfAbsent(set, (k) -> new LinkedHashSet<>());
        }

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy