All Downloads are FREE. Search and download functionalities are using the official Maven repository.

apoc.export.graphml.XmlGraphMLReader Maven / Gradle / Ivy

There is a newer version: 4.4.0.34
Show newest version
package apoc.export.graphml;

import apoc.export.util.BatchTransaction;
import apoc.export.util.Reporter;
import apoc.util.JsonUtil;
import org.apache.commons.lang.StringUtils;
import org.neo4j.graphdb.*;

import javax.xml.namespace.QName;
import javax.xml.stream.XMLEventReader;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamConstants;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.events.Attribute;
import javax.xml.stream.events.StartElement;
import javax.xml.stream.events.XMLEvent;
import java.io.Reader;
import java.lang.reflect.Array;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;

/**
 * Created by mh on 10.07.13.
 */
public class XmlGraphMLReader {

    public static final String LABEL_SPLIT = " *: *";
    private final GraphDatabaseService gdb;
    private boolean storeNodeIds;
    private RelationshipType defaultRelType = RelationshipType.withName("UNKNOWN");
    private int batchSize = 40000;
    private Reporter reporter;
    private boolean labels;

    public XmlGraphMLReader storeNodeIds() {
        this.storeNodeIds = true;
        return this;
    }

    public XmlGraphMLReader relType(String name) {
        this.defaultRelType = RelationshipType.withName(name);
        return this;
    }

    public XmlGraphMLReader batchSize(int batchSize) {
        this.batchSize = batchSize;
        return this;
    }

    public XmlGraphMLReader nodeLabels(boolean readLabels) {
        this.labels = readLabels;
        return this;
    }

    public XmlGraphMLReader reporter(Reporter reporter) {
        this.reporter = reporter;
        return this;
    }



    enum Type {
        BOOLEAN() {
            Object parse(String value) {
                return Boolean.valueOf(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Boolean.class, (i) -> (Boolean)i);
            }
        }, INT() {
            Object parse(String value) {
                return Integer.parseInt(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Integer.class, (n) -> ((Number)n).intValue());
            }
        }, LONG() {
            Object parse(String value) {
                return Long.parseLong(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Long.class, (i) -> ((Number)i).longValue());
            }
        }, FLOAT() {
            Object parse(String value) {
                return Float.parseFloat(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Float.class, (i) -> ((Number)i).floatValue());
            }
        }, DOUBLE() {
            Object parse(String value) {
                return Double.parseDouble(value);
            }

            Object parseList(String value) {
                return Type.parseList(value, Double.class, (i) -> ((Number)i).doubleValue());
            }
        }, STRING() {
            Object parse(String value) {
                return value;
            }

            Object parseList(String value) {
                return Type.parseList(value, String.class, (i) -> (String)i);
            }
        };

        abstract Object parse(String value);
        abstract Object parseList(String value);

        public static  T[] parseList(String value, Class asClass, Function convert) {
            List parsed = JsonUtil.parse(value, null, List.class);
            T[] converted = (T[])Array.newInstance(asClass, parsed.size());

            for (int i = 0; i < parsed.size(); i++)
                converted[i] = convert.apply(parsed.get(i));
            return converted;
        }

        public static Type forType(String type) {
            if (type==null) return STRING;
            return valueOf(type.trim().toUpperCase());
        }
    }

    static class Key {
        String id;
        String name;
        boolean forNode;
        Type listType;
        Type type;
        Object defaultValue;

        public Key(String id, String name, String type, String listType, String forNode) {
            this.id = id;
            this.name = name;
            this.type = Type.forType(type);
            if (listType != null) {
                this.listType = Type.forType(listType);
            }
            this.forNode = forNode == null || forNode.equalsIgnoreCase("node");
        }

        private static Key defaultKey(String id, boolean forNode) {
            return new Key(id,id,"string", null, forNode ? "node" : "edge");
        }

        public void setDefault(String data) {
            this.defaultValue = type.parse(data);
        }

        public Object parseValue(String input) {
            if (input == null || input.trim().isEmpty()) return defaultValue;
            if (listType != null) return listType.parseList(input);
            return type.parse(input);
        }
    }

    public static final QName ID = QName.valueOf("id");
    public static final QName LABELS = QName.valueOf("labels");
    public static final QName SOURCE = QName.valueOf("source");
    public static final QName TARGET = QName.valueOf("target");
    public static final QName LABEL = QName.valueOf("label");
    public static final QName FOR = QName.valueOf("for");
    public static final QName NAME = QName.valueOf("attr.name");
    public static final QName TYPE = QName.valueOf("attr.type");
    public static final QName LIST = QName.valueOf("attr.list");
    public static final QName KEY = QName.valueOf("key");

    public XmlGraphMLReader(GraphDatabaseService gdb) {
        this.gdb = gdb;
    }

    public long parseXML(Reader input) throws XMLStreamException {
        Map cache = new HashMap<>(1024*32);
        XMLInputFactory inputFactory = XMLInputFactory.newInstance();
        inputFactory.setProperty("javax.xml.stream.isCoalescing", true);
        inputFactory.setProperty(XMLInputFactory.IS_NAMESPACE_AWARE, true);
        XMLEventReader reader = inputFactory.createXMLEventReader(input);
        PropertyContainer last = null;
        Map nodeKeys = new HashMap<>();
        Map relKeys = new HashMap<>();
        int count = 0;
        try (BatchTransaction tx = new BatchTransaction(gdb, batchSize * 10, reporter)) {

            while (reader.hasNext()) {
                XMLEvent event = (XMLEvent) reader.next();
                if (event.isStartElement()) {

                    StartElement element = event.asStartElement();
                    String name = element.getName().getLocalPart();

                    if (name.equals("graphml") || name.equals("graph")) continue;
                    if (name.equals("key")) {
                        String id = getAttribute(element, ID);
                        Key key = new Key(id, getAttribute(element, NAME), getAttribute(element, TYPE), getAttribute(element, LIST), getAttribute(element, FOR));

                        XMLEvent next = peek(reader);
                        if (next.isStartElement() && next.asStartElement().getName().getLocalPart().equals("default")) {
                            reader.nextEvent().asStartElement();
                            key.setDefault(reader.nextEvent().asCharacters().getData());
                        }
                        if (key.forNode) nodeKeys.put(id, key);
                        else relKeys.put(id, key);
                        continue;
                    }
                    if (name.equals("data")) {
                        if (last == null) continue;
                        String id = getAttribute(element, KEY);
                        boolean isNode = last instanceof Node;
                        Key key = isNode ? nodeKeys.get(id) : relKeys.get(id);
                        if (key == null) key = Key.defaultKey(id, isNode);
                        Object value = key.defaultValue;
                        XMLEvent next = peek(reader);
                        if (next.isCharacters()) {
                            value = key.parseValue(reader.nextEvent().asCharacters().getData());
                        }
                        if (value != null) {
                            if (this.labels && isNode && id.equals("labels")) {
                                addLabels((Node)last,value.toString());
                            } else if (!this.labels || isNode || !id.equals("label")) {
                                last.setProperty(key.name, value);
                                if (reporter != null) reporter.update(0, 0, 1);
                            }
                        } else if (next.getEventType() == XMLStreamConstants.END_ELEMENT) {
                            last.setProperty(key.name, StringUtils.EMPTY);
                            reporter.update(0, 0, 1);
                        }
                        continue;
                    }
                    if (name.equals("node")) {
                        tx.increment();
                        String id = getAttribute(element, ID);
                        Node node = gdb.createNode();
                        if (this.labels) {
                            String labels = getAttribute(element, LABELS);
                            addLabels(node, labels);
                        }
                        if (storeNodeIds) node.setProperty("id", id);
                        setDefaults(nodeKeys, node);
                        last = node;
                        cache.put(id, node.getId());
                        if (reporter != null) reporter.update(1, 0, 0);
                        count++;
                        continue;
                    }
                    if (name.equals("edge")) {
                        tx.increment();
                        String source = getAttribute(element, SOURCE);
                        String target = getAttribute(element, TARGET);
                        String label = getAttribute(element, LABEL);
                        Node from = gdb.getNodeById(cache.get(source));
                        Node to = gdb.getNodeById(cache.get(target));

                        RelationshipType relationshipType = label == null ? getRelationshipType(reader) : RelationshipType.withName(label);
                        Relationship relationship = from.createRelationshipTo(to, relationshipType);
                        setDefaults(relKeys, relationship);
                        last = relationship;
                        if (reporter != null) reporter.update(0, 1, 0);
                        count++;
                    }
                }
            }
        }
        return count;
    }

    private RelationshipType getRelationshipType(XMLEventReader reader) throws XMLStreamException {
        if (this.labels) {
            XMLEvent peek = reader.peek();
            if (peek.isCharacters() && !(peek.asCharacters().isWhiteSpace())) {
                String value = peek.asCharacters().getData();
                String el = ":";
                String typeRel = value.contains(el) ? value.replace(el, StringUtils.EMPTY) : value;
                return RelationshipType.withName(typeRel.trim());
            } else {
                reader.nextEvent();
                return getRelationshipType(reader);
            }
        }
        return defaultRelType;
    }

    private void addLabels(Node node, String labels) {
        if (labels==null) return;
        labels = labels.trim();
        if (labels.isEmpty()) return;
        String[] parts = labels.split(LABEL_SPLIT);
        for (String part : parts) {
            if (part.trim().isEmpty()) continue;
            node.addLabel(Label.label(part.trim()));
        }
    }

    private XMLEvent peek(XMLEventReader reader) throws XMLStreamException {
        XMLEvent peek = reader.peek();
        if (peek.isCharacters() && (peek.asCharacters().isWhiteSpace())) {
            reader.nextEvent();
            return peek(reader);
        }
        return peek;
    }

    private void setDefaults(Map keys, PropertyContainer pc) {
        if (keys.isEmpty()) return;
        for (Key key : keys.values()) {
            if (key.defaultValue!=null) pc.setProperty(key.name,key.defaultValue);
        }
    }

    private String getAttribute(StartElement element, QName qname) {
        Attribute attribute = element.getAttributeByName(qname);
        return attribute != null ? attribute.getValue() : null;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy