All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.interedition.text.xml.XMLSerializer Maven / Gradle / Ivy

/*
 * #%L
 * Text: A text model with range-based markup via standoff annotations.
 * %%
 * Copyright (C) 2010 - 2011 The Interedition Development Group
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package eu.interedition.text.xml;

import com.google.common.base.Objects;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Ordering;
import eu.interedition.text.Layer;
import eu.interedition.text.Name;
import eu.interedition.text.QueryResultTextStream;
import eu.interedition.text.Text;
import eu.interedition.text.TextConstants;
import eu.interedition.text.TextRange;
import eu.interedition.text.TextRepository;
import eu.interedition.text.TextStream;
import java.io.IOException;
import java.net.URI;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Stack;
import javax.xml.XMLConstants;
import javax.xml.stream.XMLStreamException;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

/**
 * @author Gregor Middell
 */
public class XMLSerializer extends TextStream.ExceptionPropagatingListenerAdapter {
    private final ContentHandler xml;
    private final XMLSerializerConfiguration config;

    private final Map namespaceMappings = Maps.newHashMap();
    private final Stack> namespaceMappingStack = new Stack>();
    private final Map clixIdIncrements = Maps.newHashMap();
    private final Map clixIds = Maps.newHashMap();
    private final Ordering> layerOrdering;

    private boolean rootWritten = false;

    private XMLSerializer(ContentHandler xml, XMLSerializerConfiguration config) {
        this.xml = xml;
        this.config = config;
        this.namespaceMappings.put(URI.create(XMLConstants.XML_NS_URI), XMLConstants.XML_NS_PREFIX);
        this.namespaceMappings.put(URI.create(XMLConstants.XMLNS_ATTRIBUTE_NS_URI), XMLConstants.XMLNS_ATTRIBUTE);
        this.layerOrdering = Ordering.from(new HierarchyAwareAnnotationComparator())
                .compound(Ordering.from(new XMLNodePathComparator(config)))
                .compound(Ordering.arbitrary());
    }

    public static  void serialize(final ContentHandler xml, TextRepository repository, Layer text, final XMLSerializerConfiguration config) throws XMLStreamException, IOException {
        try {
            new QueryResultTextStream(repository, text, config.getQuery()).stream(new XMLSerializer(xml, config));
        } catch (Throwable t) {
            Throwables.propagateIfInstanceOf(t, IOException.class);
            Throwables.propagateIfInstanceOf(Throwables.getRootCause(t), XMLStreamException.class);
            throw Throwables.propagate(t);
        }
    }

    @Override
    protected void doStart(long contentLength) throws Exception {
        xml.startDocument();

        final Name rootName = config.getRootName();
        if (rootName != null) {
            startElement(rootName, Collections.emptyMap());
        }
    }

    @Override
    protected void doStart(long offset, Iterable> annotations) throws Exception {
        for (Layer a : layerOrdering.immutableSortedCopy(annotations)) {
            final Name name = a.getName();
            Map attributes = config.extractAttributes(a);
            if (!rootWritten || config.getHierarchy().contains(name)) {
                startElement(name, attributes);
            } else {
                final String localName = name.getLocalName();

                Integer id = clixIdIncrements.get(localName);
                id = (id == null ? 0 : id + 1);
                final String clixId = "clix:" + localName + "-" + id;

                attributes = Maps.newHashMap(attributes);
                attributes.put(TextConstants.CLIX_START_ATTR_NAME, clixId);
                emptyElement(name, attributes);

                clixIdIncrements.put(localName, id);
                clixIds.put(a, clixId);
            }
        }
    }

    @Override
    protected void doEnd(long offset, Iterable> annotations) throws Exception {
        for (Layer a : layerOrdering.reverse().immutableSortedCopy(annotations)) {
            final String clixId = clixIds.get(a);
            if (clixId == null) {
                endElement(a.getName());
            } else {
                final Map attributes = Maps.newHashMap();
                attributes.put(TextConstants.CLIX_END_ATTR_NAME, clixId);
                emptyElement(a.getName(), attributes);

                clixIds.remove(a);
            }

        }
    }

    @Override
    protected void doText(TextRange r, String text) throws Exception {
        final char[] chars = text.toCharArray();
        xml.characters(chars, 0, chars.length);
    }

    @Override
    protected void doEnd() throws Exception {
        final Name rootName = config.getRootName();
        if (rootName != null) {
            endElement(rootName);
        }
        xml.endDocument();
    }

    private void emptyElement(Name name, Map attributes) throws SAXException {
        startElement(name, attributes);
        endElement(name);
    }

    private void startElement(Name name, Map attributes) throws SAXException {
        namespaceMappingStack.push(new HashSet());

        final Map nsAttributes = Maps.newHashMap();
        if (!rootWritten) {
            for (Map.Entry mapping : config.getNamespaceMappings().entrySet()) {
                mapNamespace(mapping.getValue(), mapping.getKey(), nsAttributes);
            }
            mapNamespace(TextConstants.CLIX_NS, TextConstants.CLIX_NS_PREFIX, nsAttributes);
            rootWritten = true;
        }
        for (Name n : Iterables.concat(attributes.keySet(), Collections.singleton(name))) {
            final URI ns = n.getNamespace();
            if (ns == null || namespaceMappings.containsKey(ns)) {
                continue;
            }
            int count = 0;
            String newPrefix = "ns" + count;
            while (true) {
                if (!namespaceMappings.containsKey(newPrefix)) {
                    break;
                }
                newPrefix = "ns" + (++count);
            }
            mapNamespace(ns, newPrefix, nsAttributes);
        }

        final Map mergedAttributes = Maps.newLinkedHashMap();
        mergedAttributes.putAll(nsAttributes);
        mergedAttributes.putAll(attributes);
        xml.startElement(toNamespace(name.getNamespace()), name.getLocalName(), toQNameStr(name), toAttributes(mergedAttributes));
    }

    private void mapNamespace(URI namespace, String prefix, Map nsAttributes) throws SAXException {
        final String uri = namespace.toString();
        namespaceMappings.put(namespace, prefix);
        namespaceMappingStack.peek().add(namespace);
        if (prefix.length() == 0) {
            nsAttributes.put(new Name((URI) null, XMLConstants.XMLNS_ATTRIBUTE), uri);
        } else {
            nsAttributes.put(new Name(TextConstants.XMLNS_ATTRIBUTE_NS_URI, prefix), uri);
            xml.startPrefixMapping(prefix, uri);
        }
    }

    private void endElement(Name name) throws SAXException {
        xml.endElement(toNamespace(name.getNamespace()), name.getLocalName(), toQNameStr(name));

        for (URI namespace : namespaceMappingStack.pop()) {
            xml.endPrefixMapping(namespaceMappings.remove(namespace));
        }
    }

    private String toNamespace(URI uri) {
        return (uri == null ? "" : uri.toString());
    }

    private String toQNameStr(Name name) {
        final URI ns = name.getNamespace();
        final String localName = name.getLocalName();

        if (ns == null) {
            return localName;
        } else {
            final String prefix = namespaceMappings.get(ns);
            return (prefix.length() == 0 ? localName : prefix + ":" + localName);
        }
    }

    private Name toQName(String str) {
        final int colon = str.indexOf(':');
        return (colon >= 0 ? toQName(str.substring(0, colon), str.substring(colon + 1)) : toQName(null, str));
    }

    private Name toQName(String uri, String localName) {
        return new Name(URI.create(uri), localName);
    }

    private Attributes toAttributes(final Map attributes) {
        return new Attributes() {
            final List names = Lists.newArrayList(attributes.keySet());

            public int getLength() {
                return names.size();
            }

            public String getURI(int index) {
                return toNamespace(names.get(index).getNamespace());
            }

            public String getLocalName(int index) {
                return names.get(index).getLocalName();
            }

            public String getQName(int index) {
                return toQNameStr(names.get(index));
            }

            public String getType(int index) {
                return (index >= 0 && index < names.size() ? "CDATA" : null);
            }

            public String getValue(int index) {
                return attributes.get(names.get(index));
            }

            public int getIndex(String uri, String localName) {
                return names.indexOf(toQName(uri, localName));
            }

            public int getIndex(String qName) {
                return names.indexOf(toQName(qName));
            }

            public String getType(String uri, String localName) {
                return names.indexOf(toQName(uri, localName)) >= 0 ? "CDATA" : null;
            }

            public String getType(String qName) {
                return names.indexOf(toQName(qName)) >= 0 ? "CDATA" : null;
            }

            public String getValue(String uri, String localName) {
                return attributes.get(toQName(uri, localName));
            }

            public String getValue(String qName) {
                return attributes.get(toQName(qName));
            }
        };
    }

    private class HierarchyAwareAnnotationComparator implements Comparator> {
        private final Ordering hierarchyOrdering;

        private HierarchyAwareAnnotationComparator() {
            this.hierarchyOrdering = Ordering.explicit(Objects.>firstNonNull(config.getHierarchy(), Collections.emptyList()));
        }

        public int compare(Layer o1, Layer o2) {
            final Name o1Name = o1.getName();
            final Name o2Name = o2.getName();
            if (config.getHierarchy().contains(o1Name) && config.getHierarchy().contains(o2Name)) {
                return hierarchyOrdering.compare(o1Name, o2Name);
            }
            return 0;
        }
    }

    private class XMLNodePathComparator implements Comparator> {

        private final XMLSerializerConfiguration config;

        private XMLNodePathComparator(XMLSerializerConfiguration config) {
            this.config = config;
        }

        public int compare(Layer o1, Layer o2) {
            final XMLNodePath o1Path = config.extractXMLNodePath(o1);
            final XMLNodePath o2Path = config.extractXMLNodePath(o2);
            if (o1Path != null && o2Path != null) {
                return o1Path.compareTo(o2Path);
            }
            return 0;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy