All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openstreetmap.osmosis.xml.common.SaxParserFactory Maven / Gradle / Ivy

The newest version!
// This software is released into the Public Domain.  See copying.txt for details.
package org.openstreetmap.osmosis.xml.common;

import org.openstreetmap.osmosis.core.OsmosisRuntimeException;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import java.io.IOException;
import java.io.InputStream;

/**
 * SAX parser factory that additionally verifies that the underlying parser is providing correct unicode support for
 * characters requiring more than 1 UTF-16 character.
 */
public final class SaxParserFactory {

    private SaxParserFactory() {
    }


    /**
     * Creates a new SAX parser.
     *
     * @return The newly created SAX parser.
     */
    public static SAXParser createParser() {
        try {
            return SAXParserFactory.newInstance().newSAXParser();

        } catch (ParserConfigurationException e) {
            throw new OsmosisRuntimeException("Unable to create SAX Parser.", e);
        } catch (SAXException e) {
            throw new OsmosisRuntimeException("Unable to create SAX Parser.", e);
        }
    }


    /**
     * Validate SAX parser unicode support.
     */
    private static void validate() {
        try {
            UnicodeTestHandler unicodeTestHandler = new UnicodeTestHandler();

            SAXParser parser = createParser();
            InputStream is = SaxParserFactory.class.getResourceAsStream("test-unicode-node.osm");
            parser.parse(is, unicodeTestHandler);
            if (!unicodeTestHandler.isCorrect()) {
                throw new OsmosisRuntimeException(
                        "SAX Parser doesn't correctly support multi-byte characters,"
                                + " try including a modern version of Xerces on the classpath.");
            }
        } catch (SAXException e) {
            throw new OsmosisRuntimeException("Unable to create SAX Parser.", e);
        } catch (IOException e) {
            throw new OsmosisRuntimeException("Unable to read unicode test file.", e);
        }
    }


    static {
        // Trigger validation during class initialisation.
        validate();
    }


    /**
     * Looks at the SAX document and validates that the "name" and "name:en" attributes both contain the
     * correct value.
     */
    private static class UnicodeTestHandler extends DefaultHandler {
        // The expected value of test tags.  These escape sequences represent a single treble-clef which requires
        // two 16-bit characters.  Represented using escape sequences to prevent accidental munging by dev tools.
        private static final String NAME_VALUE = "H\uD834\uDD1EM Events";
        private boolean nameCorrect;
        private boolean enNameCorrect;


        private boolean validateNameValue(Attributes attributes) {
            return NAME_VALUE.equals(attributes.getValue("v"));
        }


        @Override
        public void startElement(
                String uri, String localName, String qName, Attributes attributes) throws SAXException {
            if ("tag".equals(qName)) {
                if ("name".equals(attributes.getValue("k"))) {
                    if (validateNameValue(attributes)) {
                        nameCorrect = true;
                    }
                } else if ("name:en".equals(attributes.getValue("k"))) {
                    if (validateNameValue(attributes)) {
                        enNameCorrect = true;
                    }
                }
            }
        }


        /**
         * Are all fields correct.
         *
         * @return True if all correct.
         */
        public boolean isCorrect() {
            return nameCorrect && enNameCorrect;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy