All Downloads are FREE. Search and download functionalities are using the official Maven repository.

uk.ac.starlink.util.DOMUtils Maven / Gradle / Ivy

There is a newer version: 4.3
Show newest version
package uk.ac.starlink.util;

import java.util.ArrayList;
import java.util.List;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import org.w3c.dom.Attr;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.Text;

import java.net.URI;

/**
 * Provides convenience methods for handling DOMs.
 */
public class DOMUtils {

    /** Private dummy sole constructor. */
    private DOMUtils() {}

    /** Maps node type codes to names.  Used by {@link #mapNodeType} */
    static private String[] nodeTypeMap;
    static {
        // The following appears nasty and errorprone, and vulnerable
        // to updates in the DOM spec.  However, it's not as bad as it
        // looks.  The DOM spec
        //  includes the
        // numerical values of the Node constants in the definition of
        // the Node interface, so they can't change.  The outside
        // possibility of change is held open within the spec, by the
        // statement reserving the first 200 such codes for use by
        // W3C, but if such a change were to come about, and affect
        // us, this is the least of the things which would have to
        // be modified.  It's possible to use a hash map to make this
        // completely general, but that gains us very little beyond
        // paranoid generality.
        //
        // See also nodeToMaskMap in NodeDescendants.java
        nodeTypeMap = new String[16];
        
        assert Node.ATTRIBUTE_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.ATTRIBUTE_NODE] = "Attribute";

        assert Node.CDATA_SECTION_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.CDATA_SECTION_NODE] = "CDATASection";

        assert Node.COMMENT_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.COMMENT_NODE] = "Comment";

        assert Node.DOCUMENT_FRAGMENT_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.DOCUMENT_FRAGMENT_NODE] = "DocumentFragment";

        assert Node.DOCUMENT_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.DOCUMENT_NODE] = "Document";

        assert Node.DOCUMENT_TYPE_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.DOCUMENT_TYPE_NODE] = "DocumentType";

        assert Node.ELEMENT_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.ELEMENT_NODE] = "Element";

        assert Node.ENTITY_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.ENTITY_NODE] = "Entity";

        assert Node.ENTITY_REFERENCE_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.ENTITY_REFERENCE_NODE] = "EntityReference";

        assert Node.NOTATION_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.NOTATION_NODE] = "Notation";

        assert Node.PROCESSING_INSTRUCTION_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.PROCESSING_INSTRUCTION_NODE]
                = "ProcessingInstruction";

        assert Node.TEXT_NODE < nodeTypeMap.length;
        nodeTypeMap[Node.TEXT_NODE] = "Text";
    }
    
    /**
     * Returns the first child element of a node which has a given name.
     *
     * @param  parent  the node whose children are to be searched
     * @param  name    the name of the element being searched for
     * @return the first child of parent which is an Element
     *         and has the tagname name, or null if none
     *         match
     */
    public static Element getChildElementByName( Node parent, String name ) {
        for ( Node child = parent.getFirstChild(); child != null;
              child = child.getNextSibling() ) {
            if ( child instanceof Element ) {
                Element childEl = (Element) child;
                String childName = childEl.getTagName();
                if ( childName.equals( name ) ) {
                    return childEl;
                }
            }
        }
        return null;
    }

    /**
     * Returns all child elements of a node with a given name.
     *
     * @param  parent  the node whose children are to be searched
     * @param  name    the name of the element being searched for
     * @return  array of child elements of parent with tagname
     *          name; if name is null, all child elements
     *          are returned
     */
    public static Element[] getChildElementsByName( Node parent, String name ) {
        List els = new ArrayList();
        for ( Node child = parent.getFirstChild(); child != null;
              child  = child.getNextSibling() ) {
            if ( child instanceof Element ) {
                Element childEl = (Element) child;
                if ( name == null || name.equals( childEl.getTagName() ) ) {
                    els.add( childEl );
                }
            }
        }
        return els.toArray( new Element[ 0 ] );
    }

    /**
     * Returns a string representing the plain text content of an element.
     * Any comments, attributes, elements or other non-text children 
     * are ignored, and all CDATA and Text nodes are merged to 
     * give a single string.
     * 
     * @param   el  the element whose text content is wanted
     * @return  the pure text content.  If there is none, an empty 
     *          string is returned.
     */
    public static String getTextContent( Element el ) {
        StringBuffer sb = new StringBuffer();
        for ( Node child = el.getFirstChild(); child != null; 
              child = child.getNextSibling() ) {
            if ( child instanceof Text ) {
                Text childText = (Text) child;
                sb.append( childText.getData() );
            }
        }
        return sb.toString();
    }

    /**
     * Returns the first subsequent sibling of a given node which is an Element.
     * This is useful for naviating a DOM as a tree of elements when
     * the presence of text or attribute children is a distraction.
     *
     * @param  node  the node whose siblings (including itself) you are
     *         interested in.  May be null
     * @return the first sibling of node which is an Element.
     *         If node itself is an element, that is returned.
     *         If node has no subsequent siblings which are 
     *         elements, or if it is null,
     *         then null is returned.
     */
    public static Element getFirstElementSibling( Node node ) {
        return ( node == null || node instanceof Element ) 
             ? (Element) node
             : getFirstElementSibling( node.getNextSibling() );
    }

    /**
     * Traverses the given DOM, relativising all the URIs in the
     * uri attributes of each Element.
     *
     * 

The (uri-attribute) nodes in the input DOM are modified by this * method; if this is a problem, use {@link * org.w3c.dom.Node#cloneNode} first. * * @param n a node containing the DOM whose URIs are to be * relativized. If this is null, the method immediately returns null * @param baseURI the URI relative to which the DOM is to be * relativised. If this is null, then the input node is * immediately returned unchanged. * @param attname the attribute name to be used. If null, this * defaults to uri * @return the input node * @see java.net.URI#relativize */ public static Node relativizeDOM(Node n, URI baseURI, String attname) { if (n == null || baseURI == null) return n; if (attname == null) attname = "uri"; NamedNodeMap nm = n.getAttributes(); if (nm != null) for (int i=0; iNode * @return a string name for the type */ static public String mapNodeType(short nodeType) { // Mostly for debugging -- the numeric node types are pretty // useless in any sort of log message. Yes, this _is_ // more elaborate than you'd guess it'd have to be, and no, // there's no other way to debug node types other than by grubbing // through org.w3c.dom.Node.java assert nodeType < nodeTypeMap.length; String val = nodeTypeMap[nodeType]; if (val == null) val = "UNKNOWN!!!"; return val; } /** * Returns a new Document instance. * This method just does all the tedious business of mucking about * with factories for you. * * @return an empty Document */ public static Document newDocument() { try { return DocumentBuilderFactory .newInstance() .newDocumentBuilder() .newDocument(); } catch ( ParserConfigurationException e ) { throw new RuntimeException( "Unexpected error constructing " + "default document factory", e ); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy