All Downloads are FREE. Search and download functionalities are using the official Maven repository.

apoc.export.graphml.XmlGraphMLReader Maven / Gradle / Ivy

/*
 * Copyright (c) "Neo4j"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package apoc.export.graphml;

import apoc.export.util.BatchTransaction;
import apoc.export.util.ExportConfig;
import apoc.export.util.Reporter;
import apoc.util.JsonUtil;
import java.io.Reader;
import java.lang.reflect.Array;
import java.util.AbstractMap;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.function.Function;
import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import org.apache.commons.lang3.StringUtils;
import org.neo4j.graphdb.*;
import org.neo4j.procedure.TerminationGuard;

/**
 * Created by mh on 10.07.13.
 */
public class XmlGraphMLReader {

    public static final String LABEL_SPLIT = " *: *";
    private final GraphDatabaseService db;
    private boolean storeNodeIds;
    private RelationshipType defaultRelType = RelationshipType.withName("UNKNOWN");
    private ExportConfig.NodeConfig source;
    private ExportConfig.NodeConfig target;
    private int batchSize = 40000;
    private Reporter reporter;
    private boolean labels;

    public XmlGraphMLReader storeNodeIds() {
        this.storeNodeIds = true;
        return this;
    }

    public XmlGraphMLReader relType(String name) {
        this.defaultRelType = RelationshipType.withName(name);
        return this;
    }

    public XmlGraphMLReader batchSize(int batchSize) {
        this.batchSize = batchSize;
        return this;
    }

    public XmlGraphMLReader nodeLabels(boolean readLabels) {
        this.labels = readLabels;
        return this;
    }

    public XmlGraphMLReader source(ExportConfig.NodeConfig sourceConfig) {
        this.source = sourceConfig;
        return this;
    }

    public XmlGraphMLReader target(ExportConfig.NodeConfig targetConfig) {
        this.target = targetConfig;
        return this;
    }

    public XmlGraphMLReader reporter(Reporter reporter) {
        this.reporter = reporter;
        return this;
    }

    public ExportConfig.NodeConfig getSource() {
        return source;
    }

    public ExportConfig.NodeConfig getTarget() {
        return target;
    }

    enum Type {
        BOOLEAN() {
            Object parse(String value) {
                return Boolean.valueOf(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Boolean.class, (i) -> (Boolean) i);
            }
        },
        INT() {
            Object parse(String value) {
                return Integer.parseInt(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Integer.class, (n) -> ((Number) n).intValue());
            }
        },
        LONG() {
            Object parse(String value) {
                return Long.parseLong(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Long.class, (i) -> ((Number) i).longValue());
            }
        },
        FLOAT() {
            Object parse(String value) {
                return Float.parseFloat(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Float.class, (i) -> ((Number) i).floatValue());
            }
        },
        DOUBLE() {
            Object parse(String value) {
                return Double.parseDouble(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Double.class, (i) -> ((Number) i).doubleValue());
            }
        },
        STRING() {
            Object parse(String value) {
                return value;
            }

            Object parseList(String value) {
                return Type.parseList(value, String.class, (i) -> (String) i);
            }
        };

        abstract Object parse(String value);

        abstract Object parseList(String value);

        public static  T[] parseList(String value, Class asClass, Function convert) {
            List parsed = JsonUtil.parse(value, null, List.class);
            T[] converted = (T[]) Array.newInstance(asClass, parsed.size());

            for (int i = 0; i < parsed.size(); i++) converted[i] = convert.apply(parsed.get(i));
            return converted;
        }

        public static Type forType(String type) {
            if (type == null) return STRING;
            return valueOf(type.trim().toUpperCase());
        }
    }

    static class Key {
        String nameOrId;
        boolean forNode;
        Type listType;
        Type type;
        Object defaultValue;

        public Key(String nameOrId, String type, String listType, String forNode) {
            this.nameOrId = nameOrId;
            this.type = Type.forType(type);
            if (listType != null) {
                this.listType = Type.forType(listType);
            }
            this.forNode = forNode == null || forNode.equalsIgnoreCase("node");
        }

        private static Key defaultKey(String id, boolean forNode) {
            return new Key(id, "string", null, forNode ? "node" : "edge");
        }

        public void setDefault(String data) {
            this.defaultValue = type.parse(data);
        }

        public Object parseValue(String input) {
            if (input == null || input.trim().isEmpty()) return defaultValue;
            if (listType != null) return listType.parseList(input);
            return type.parse(input);
        }
    }

    public static final QName ID = QName.valueOf("id");
    public static final QName LABELS = QName.valueOf("labels");
    public static final QName LABEL = QName.valueOf("label");
    public static final QName FOR = QName.valueOf("for");
    public static final QName NAME = QName.valueOf("attr.name");
    public static final QName TYPE = QName.valueOf("attr.type");
    public static final QName LIST = QName.valueOf("attr.list");
    public static final QName KEY = QName.valueOf("key");

    public XmlGraphMLReader(GraphDatabaseService db) {
        this.db = db;
    }

    public long parseXML(Reader input, TerminationGuard terminationGuard) throws XMLStreamException {
        Map cache = new HashMap<>(1024 * 32);
        XMLInputFactory inputFactory = XMLInputFactory.newInstance();
        inputFactory.setProperty("javax.xml.stream.isCoalescing", true);
        inputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, true);
        inputFactory.setProperty(XMLInputFactory.SUPPORT_DTD, false);
        inputFactory.setProperty(XMLInputFactory.IS_SUPPORTING_EXTERNAL_ENTITIES, false);
        XMLEventReader reader = inputFactory.createXMLEventReader(input);
        Entity last = null;
        Map nodeKeys = new HashMap<>();
        Map relKeys = new HashMap<>();
        int count = 0;
        BatchTransaction tx = new BatchTransaction(db, batchSize * 10, reporter);
        try {

            while (reader.hasNext()) {
                terminationGuard.check();
                XMLEvent event;
                try {
                    event = (XMLEvent) reader.next();
                    if (event.getEventType() == XMLStreamConstants.DTD) {
                        generateXmlDoctypeException();
                    }
                } catch (Exception e) {
                    // in case of unicode invalid chars we skip the event, or we exit in case of EOF
                    if (e.getMessage().contains("Unexpected EOF")) {
                        break;
                    } else if (e.getMessage().contains("DOCTYPE")) {
                        throw e;
                    }
                    continue;
                }
                if (event.isStartElement()) {

                    StartElement element = event.asStartElement();
                    String name = element.getName().getLocalPart();

                    if (name.equals("graphml") || name.equals("graph")) continue;
                    if (name.equals("key")) {
                        String id = getAttribute(element, ID);
                        Key key = new Key(
                                getAttribute(element, NAME),
                                getAttribute(element, TYPE),
                                getAttribute(element, LIST),
                                getAttribute(element, FOR));

                        XMLEvent next = peek(reader);
                        if (next.isStartElement()
                                && next.asStartElement()
                                        .getName()
                                        .getLocalPart()
                                        .equals("default")) {
                            reader.nextEvent().asStartElement();
                            key.setDefault(reader.nextEvent().asCharacters().getData());
                        }
                        if (key.forNode) nodeKeys.put(id, key);
                        else relKeys.put(id, key);
                        continue;
                    }
                    if (name.equals("data")) {
                        if (last == null) continue;
                        String id = getAttribute(element, KEY);
                        boolean isNode = last instanceof Node;
                        Key key = isNode ? nodeKeys.get(id) : relKeys.get(id);
                        if (key == null) key = Key.defaultKey(id, isNode);
                        final Map.Entry eventEntry = getDataEventEntry(reader, key);
                        final XMLEvent next = eventEntry.getKey();
                        final Object value = eventEntry.getValue();
                        if (value != null) {
                            if (this.labels && isNode && id.equals("labels")) {
                                addLabels((Node) last, value.toString());
                            } else if (!this.labels || isNode || !id.equals("label")) {
                                last.setProperty(key.nameOrId, value);
                                if (reporter != null) reporter.update(0, 0, 1);
                            }
                        } else if (next.getEventType() == XMLStreamConstants.END_ELEMENT) {
                            last.setProperty(key.nameOrId, StringUtils.EMPTY);
                            reporter.update(0, 0, 1);
                        }
                        continue;
                    }
                    if (name.equals("node")) {
                        tx.increment();
                        String id = getAttribute(element, ID);
                        Node node = tx.getTransaction().createNode();
                        if (this.labels) {
                            String labels = getAttribute(element, LABELS);
                            addLabels(node, labels);
                        }
                        if (storeNodeIds) node.setProperty("id", id);
                        setDefaults(nodeKeys, node);
                        last = node;
                        cache.put(id, node.getElementId());
                        if (reporter != null) reporter.update(1, 0, 0);
                        count++;
                        continue;
                    }
                    if (name.equals("edge")) {
                        tx.increment();
                        String label = getAttribute(element, LABEL);
                        Node from = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.SOURCE);
                        Node to = getByNodeId(cache, tx.getTransaction(), element, XmlNodeExport.NodeType.TARGET);

                        RelationshipType relationshipType =
                                label == null ? getRelationshipType(reader) : RelationshipType.withName(label);
                        Relationship relationship = from.createRelationshipTo(to, relationshipType);
                        setDefaults(relKeys, relationship);
                        last = relationship;
                        if (reporter != null) reporter.update(0, 1, 0);
                        count++;
                    }
                }
            }
            tx.doCommit();
        } catch (Exception e) {
            tx.rollback();
            throw e;
        } finally {
            tx.close();
            reader.close();
        }
        return count;
    }

    private Map.Entry getDataEventEntry(XMLEventReader reader, Key key) {
        Object value = key.defaultValue;

        final Map.Entry peekEntry = peekRecursively(reader, null);
        if (peekEntry.getValue() != null) {
            value = key.parseValue(peekEntry.getValue());
        }
        return new AbstractMap.SimpleEntry<>(peekEntry.getKey(), value);
    }

    private Map.Entry peekRecursively(XMLEventReader reader, String data) {
        try {
            final XMLEvent peek = peek(reader);
            // in case of char, we concat the result to the current value and redo the peek
            //  in order to obtain e.g. from a string "abcdef" --> "abcdef"
            if (peek.isCharacters()) {
                data = StringUtils.join(data, reader.nextEvent().asCharacters().getData());
                return peekRecursively(reader, data);
            }
            // in case the event is not a char we continue setting labels/properties
            return new AbstractMap.SimpleEntry<>(peek, data);
        } catch (Exception e) {
            // in case of unicode invalid chars we continue until we get a valid event
            return peekRecursively(reader, data);
        }
    }

    private Node getByNodeId(
            Map cache, Transaction tx, StartElement element, XmlNodeExport.NodeType nodeType) {
        final XmlNodeExport.ExportNode xmlNodeInterface = nodeType.get();
        final ExportConfig.NodeConfig nodeConfig = xmlNodeInterface.getNodeConfigReader(this);

        final String sourceTargetValue = getAttribute(element, QName.valueOf(nodeType.getName()));

        final String id = cache.get(sourceTargetValue);
        // without source/target config, we look for the internal id
        if (StringUtils.isBlank(nodeConfig.label)) {
            if (id == null) {
                throw new RuntimeException("The node with the given id: " + sourceTargetValue
                        + " was not found. Check that it is defined before the referencing relationship.");
            }
            return tx.getNodeByElementId(id);
        }
        // with source/target configured, we search a node with a specified label
        // and with a type specified in sourceType, if present, or string by default
        final String attribute = getAttribute(element, QName.valueOf(nodeType.getNameType()));
        final Object value =
                attribute == null ? sourceTargetValue : Type.forType(attribute).parse(sourceTargetValue);

        return tx.findNode(
                Label.label(nodeConfig.label),
                Optional.ofNullable(nodeConfig.id).orElse("id"),
                value);
    }

    private RelationshipType getRelationshipType(XMLEventReader reader) throws XMLStreamException {
        if (this.labels) {
            XMLEvent peek = reader.peek();
            boolean isChar = peek.isCharacters();
            if (isChar && !(peek.asCharacters().isWhiteSpace())) {
                String value = peek.asCharacters().getData();
                String el = ":";
                String typeRel = value.contains(el) ? value.replace(el, StringUtils.EMPTY) : value;
                return RelationshipType.withName(typeRel.trim());
            }

            boolean notStartElementOrContainsKeyLabel = isChar || !peek.isStartElement() || containsLabelKey(peek);

            if (!peek.isEndDocument() && notStartElementOrContainsKeyLabel) {
                reader.nextEvent();
                return getRelationshipType(reader);
            }
        }
        reader.nextEvent(); // to prevent eventual wrong reader (f.e. self-closing tag)
        return defaultRelType;
    }

    private boolean containsLabelKey(XMLEvent peek) {
        final Attribute keyAttribute = peek.asStartElement().getAttributeByName(new QName("key"));
        return keyAttribute != null && keyAttribute.getValue().equals("label");
    }

    private void addLabels(Node node, String labels) {
        if (labels == null) return;
        labels = labels.trim();
        if (labels.isEmpty()) return;
        String[] parts = labels.split(LABEL_SPLIT);
        for (String part : parts) {
            if (part.trim().isEmpty()) continue;
            node.addLabel(Label.label(part.trim()));
        }
    }

    private XMLEvent peek(XMLEventReader reader) throws XMLStreamException {
        XMLEvent peek = reader.peek();
        if (peek.isCharacters() && (peek.asCharacters().isWhiteSpace())) {
            reader.nextEvent();
            return peek(reader);
        }
        return peek;
    }

    private void setDefaults(Map keys, Entity pc) {
        if (keys.isEmpty()) return;
        for (Key key : keys.values()) {
            if (key.defaultValue != null) pc.setProperty(key.nameOrId, key.defaultValue);
        }
    }

    private String getAttribute(StartElement element, QName qname) {
        Attribute attribute = element.getAttributeByName(qname);
        return attribute != null ? attribute.getValue() : null;
    }

    private RuntimeException generateXmlDoctypeException() {
        throw new RuntimeException("XML documents with a DOCTYPE are not allowed.");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy