All Downloads are FREE. Search and download functionalities are using the official Maven repository.

edu.isi.nlp.xml.XMLUtils Maven / Gradle / Ivy

The newest version!
package edu.isi.nlp.xml;

import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;

import com.google.common.annotations.Beta;
import com.google.common.base.CharMatcher;
import com.google.common.base.Joiner;
import com.google.common.base.Optional;
import com.google.common.base.Predicate;
import com.google.common.base.Predicates;
import com.google.common.base.Splitter;
import com.google.common.collect.AbstractIterator;
import com.google.common.collect.FluentIterable;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
import edu.isi.nlp.symbols.Symbol;
import edu.isi.nlp.symbols.SymbolUtils;
import java.io.StringWriter;
import java.util.Iterator;
import java.util.List;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.TransformerFactoryConfigurationError;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.stream.StreamResult;
import org.w3c.dom.Element;
import org.w3c.dom.NamedNodeMap;
import org.w3c.dom.Node;
import org.w3c.dom.ls.DOMImplementationLS;

/** Some utilities for working with XML files. */
@Beta
public final class XMLUtils {

  private XMLUtils() {
    throw new UnsupportedOperationException();
  }

  public static boolean hasChildOfType(final Element e, final String name) {
    for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) {
      if (child instanceof Element) {
        if (((Element) child).getTagName().equals(name)) {
          return true;
        }
      }
    }
    return false;
  }

  public static Optional directChild(final Element parent, final String name) {
    for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
      if (child instanceof Element && ((Element) child).getTagName().equalsIgnoreCase(name)) {
        return Optional.of((Element) child);
      }
    }
    return Optional.absent();
  }

  /**
   * Returns the element's next sibling with a tag matching the given name.
   *
   * @param element the element
   * @param name the tag of the desired sibling
   * @return a sibling matching the tag, or {@link Optional#absent()} if there is none
   */
  public static Optional nextSibling(final Element element, final String name) {
    for (Node childNode = element.getNextSibling();
        childNode != null;
        childNode = childNode.getNextSibling()) {
      if (childNode instanceof Element
          && ((Element) childNode).getTagName().equalsIgnoreCase(name)) {
        return Optional.of((Element) childNode);
      }
    }
    return Optional.absent();
  }

  public static Element requiredDirectChild(final Element parent, final String name) {
    for (Node child = parent.getFirstChild(); child != null; child = child.getNextSibling()) {
      if (child instanceof Element && ((Element) child).getTagName().equalsIgnoreCase(name)) {
        return (Element) child;
      }
    }
    throw new XMLUnexpectedInputException(
        String.format(
            "Parent with tag %s lacks required child element %s", parent.getTagName(), name));
  }

  public static Symbol requiredSymbolAttribute(final Node e, final String attribute) {
    return Symbol.from(requiredAttribute(e, attribute));
  }

  public static boolean requiredBooleanAttribute(final Node e, final String attribute) {
    final String val = requiredAttribute(e, attribute);

    if (val.equalsIgnoreCase("true")) {
      return true;
    }
    if (val.equalsIgnoreCase("false")) {
      return false;
    }
    throw new XMLUnexpectedInputException(
        String.format(
            "%s has required boolean attribute %s, but it doesn't parse as a boolean: %s; true or false required",
            ((Element) e).getTagName(), attribute, e));
  }

  public static float requiredFloatAttribute(final Node e, final String attribute) {
    final String val = requiredAttribute(e, attribute);

    try {
      return Float.parseFloat(val);
    } catch (final NumberFormatException ex) {
      throw new XMLUnexpectedInputException(
          String.format(
              "%s has required float attribute %s, but it doesn't parse as a float: %s",
              ((Element) e).getTagName(), attribute, e));
    }
  }

  public static double requiredDoubleAttribute(final Node e, final String attribute) {
    final String val = requiredAttribute(e, attribute);

    try {
      return Double.parseDouble(val);
    } catch (final NumberFormatException ex) {
      throw new XMLUnexpectedInputException(
          String.format(
              "%s has required double attribute %s, but it doesn't parse as a double: %s",
              ((Element) e).getTagName(), attribute, e));
    }
  }

  public static int requiredIntegerAttribute(final Node e, final String attribute) {
    final String val = requiredAttribute(e, attribute);

    try {
      return Integer.parseInt(val);
    } catch (final NumberFormatException ex) {
      throw new XMLUnexpectedInputException(
          String.format(
              "%s has required int attribute %s, but it doesn't parse as an int : %s",
              ((Element) e).getTagName(), attribute, e));
    }
  }

  public static long requiredLongAttribute(final Node e, final String attribute) {
    final String val = requiredAttribute(e, attribute);

    try {
      return Long.parseLong(val);
    } catch (final NumberFormatException ex) {
      throw new XMLUnexpectedInputException(
          String.format(
              "%s has required long attribute %s, but it doesn't parse as a long: %s",
              ((Element) e).getTagName(), attribute, e));
    }
  }

  public static String requiredAttribute(final Node node, final String attribute) {
    checkArgument(node instanceof Element);
    final Element e = (Element) node;
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return val;
    } else if (e.getAttributeNode(attribute) != null) {
      // this attribute is present and really is the empty string
      return val;
    } else {
      throw new XMLUnexpectedInputException(
          String.format(
              "%s missing required attribute %s: %s.",
              e.getTagName(), attribute, prettyPrintElementLocally(e)));
    }
  }

  public static void checkMissing(final Node e, final Object o, final String type) {
    checkArgument(e instanceof Element);
    if (o == null) {
      throw new XMLUnexpectedInputException(
          String.format("%s missing %s: %s", ((Element) e).getTagName(), type, e));
    }
  }

  public static boolean hasAnyChildElement(final Element e) {
    for (Node child = e.getFirstChild(); child != null; child = child.getNextSibling()) {
      if (child instanceof Element) {
        return true;
      }
    }
    return false;
  }

  public static Symbol symbolOrNull(final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);
    if (val != null) {
      return Symbol.from(val);
    } else {
      return null;
    }
  }

  public static Symbol nonEmptySymbolOrNull(final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);
    if (val != null && !val.isEmpty()) {
      return Symbol.from(val);
    } else {
      return null;
    }
  }

  public static boolean is(final Element e, final String tag) {
    return e.getTagName().equalsIgnoreCase(tag);
  }

  public static Optional optionalStringAttribute(final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return Optional.of(val);
    } else {
      return Optional.absent();
    }
  }

  public static String defaultStringAttribute(
      final Element e, final String attribute, final String defaultValue) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return val;
    } else {
      return defaultValue;
    }
  }

  public static Optional optionalIntegerAttribute(
      final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return Optional.of(Integer.parseInt(val));
    } else {
      return Optional.absent();
    }
  }

  public static Optional optionalLongAttribute(final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return Optional.of(Long.parseLong(val));
    } else {
      return Optional.absent();
    }
  }

  public static Optional optionalSymbolAttribute(final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return Optional.of(Symbol.from(val));
    } else {
      return Optional.absent();
    }
  }

  public static Optional optionalBooleanAttribute(
      final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      if (val.equalsIgnoreCase("true")) {
        return Optional.of(true);
      } else if (val.equalsIgnoreCase("false")) {
        return Optional.of(false);
      } else {
        throw new XMLUnexpectedInputException(
            String.format(
                "%s has required boolean attribute %s, but it doesn't parse as a boolean: %s; specify true or false",
                e.getTagName(), attribute, e));
      }
    } else {
      return Optional.absent();
    }
  }

  public static Optional optionalDoubleAttribute(final Element e, final String attribute) {
    final String val = e.getAttribute(attribute);

    if (!val.isEmpty()) {
      return Optional.of(Double.parseDouble(val));
    } else {
      return Optional.absent();
    }
  }

  public static List optionalSymbolList(
      final Element e, final String attribute, Splitter splitter) {
    final String val = e.getAttribute(attribute);

    return FluentIterable.from(splitter.split(val))
        .transform(SymbolUtils.symbolizeFunction())
        .toList();
  }

  /** @deprecated */
  @Deprecated
  public static List optionalSymbolList(final Element e, final String attribute) {
    return optionalSymbolList(
        e, attribute, Splitter.on(CharMatcher.WHITESPACE).omitEmptyStrings().trimResults());
  }

  /** @deprecated */
  @Deprecated
  public static List requiredSymbolList(final Element e, final String attribute) {
    return requiredSymbolList(
        e, attribute, Splitter.on(CharMatcher.WHITESPACE).omitEmptyStrings().trimResults());
  }

  public static List requiredSymbolList(
      final Element e, final String attribute, Splitter splitter) {
    final String val = requiredAttribute(e, attribute);

    return FluentIterable.from(splitter.split(val))
        .transform(SymbolUtils.symbolizeFunction())
        .toList();
  }

  /** @deprecated */
  @Deprecated
  public static List requiredStringList(final Element e, final String attribute) {
    return requiredStringList(
        e, attribute, Splitter.on(CharMatcher.WHITESPACE).omitEmptyStrings().trimResults());
  }

  public static List requiredStringList(
      final Element e, final String attribute, Splitter splitter) {
    final String val = requiredAttribute(e, attribute);

    return FluentIterable.from(splitter.split(val)).toList();
  }

  public static String dumpElement(final Element e) {
    return ((DOMImplementationLS) e.getOwnerDocument().getImplementation())
        .createLSSerializer()
        .writeToString(e);
  }

  public static Optional optionalSymbolFromTextContent(Element e, String tagName) {
    final Optional child = directChild(e, tagName);
    if (child.isPresent()) {
      return Optional.of(Symbol.from(child.get().getTextContent()));
    } else {
      return Optional.absent();
    }
  }

  public static Optional optionalStringFromTextContent(Element e, String tagName) {
    final Optional child = directChild(e, tagName);
    if (child.isPresent()) {
      return Optional.of(child.get().getTextContent());
    } else {
      return Optional.absent();
    }
  }

  public abstract static class FromXMLLoader {

    public abstract T from(Element e);
  }

  public static final FromXMLLoader ToContentString =
      new FromXMLLoader() {
        @Override
        public String from(final Element e) {
          return e.getTextContent();
        }
      };

  public static  List childrenToList(final Element e, final FromXMLLoader childToXML) {
    return childrenToList(e, null, childToXML);
  }

  public static  List childrenToList(
      final Element e, final String kidName, final FromXMLLoader childToXML) {
    return childrenToListInternal(e, kidName, childToXML, true);
  }

  private static  List childrenToListInternal(
      final Element e,
      final String kidName,
      final FromXMLLoader childToXML,
      final boolean throwOnMismatch) {
    final List list = Lists.newArrayList();

    for (Node kid = e.getFirstChild(); kid != null; kid = kid.getNextSibling()) {
      if (kid instanceof Element) {
        final Element kidElement = (Element) kid;
        if (kidName == null || is(kidElement, kidName)) {
          list.add(childToXML.from(kidElement));
        } else if (throwOnMismatch) {
          throw new XMLUnexpectedInputException(
              String.format(
                  "Expected children of type %s but encountered %s",
                  kidName, kidElement.getTagName()));
        }
      }
    }

    return list;
  }

  public static  List matchingChildrenToList(
      final Element e, final String kidName, final FromXMLLoader childToXML) {
    return childrenToListInternal(e, kidName, childToXML, false);
  }

  public static boolean emptyElement(final Element e) {
    return !e.hasChildNodes();
  }

  public static Element requiredSingleChild(final Element e, final String childName) {
    Element singleChild = null;

    for (Node kid = e.getFirstChild(); kid != null; kid = kid.getNextSibling()) {
      if (kid instanceof Element) {
        final Element kidElement = (Element) kid;
        if (is(kidElement, childName)) {
          if (singleChild != null) {
            throw new XMLUnexpectedInputException(
                String.format("Expected a single child of type %s but found multple.", childName));
          }
          singleChild = kidElement;
        }
      }
    }

    if (singleChild == null) {
      throw new XMLUnexpectedInputException(
          String.format("Expected child of type %s but didn't find one", childName));
    }

    return singleChild;
  }

  public static  T requiredSingleChild(
      final Element e, final String childName, final FromXMLLoader loader) {
    return loader.from(requiredSingleChild(e, childName));
  }

  private static Transformer dumpTransformer = null;

  public static String dumpXMLElement(final Element e) {
    if (dumpTransformer == null) {
      try {
        dumpTransformer = TransformerFactory.newInstance().newTransformer();
      } catch (final TransformerConfigurationException e1) {
        throw new XMLException("XML configuration problem", e1);
      } catch (final TransformerFactoryConfigurationError e1) {
        throw new XMLException("XML configuration problem", e1);
      }
      dumpTransformer.setOutputProperty(OutputKeys.OMIT_XML_DECLARATION, "yes");
    }
    final StringWriter out = new StringWriter();
    try {
      dumpTransformer.transform(new DOMSource(e), new StreamResult(out));
    } catch (final TransformerException e1) {
      throw new XMLException("XML configuration problem", e1);
    }
    return out.toString();
  }

  private static Predicate tagNameIsPredicate(final String tagName) {
    return new Predicate() {
      @Override
      public boolean apply(final Element input) {
        return tagName.equals(input.getTagName());
      }
    };
  }

  /** Returns an {@link Iterable} over all children of {@code e} with tag {@code tag} */
  public static Iterable childrenWithTag(Element e, String tag) {
    return new ElementChildrenIterable(e, tagNameIsPredicate(tag));
  }

  public static Iterable elementChildren(Element e) {
    return new ElementChildrenIterable(e, Predicates.alwaysTrue());
  }

  private static final class ElementChildrenIterable implements Iterable {

    private final Element e;
    private final Predicate predicate;

    public ElementChildrenIterable(final Element e, final Predicate predicate) {
      this.e = checkNotNull(e);
      this.predicate = checkNotNull(predicate);
    }

    @Override
    public Iterator iterator() {
      return new ElementChildrenIterator();
    }

    private final class ElementChildrenIterator extends AbstractIterator {

      Node curNode = e.getFirstChild();

      @Override
      protected Element computeNext() {
        if (curNode == null) {
          return endOfData();
        }
        while (curNode != null) {
          if (curNode instanceof Element) {
            final Element curElement = ((Element) curNode);
            curNode = curNode.getNextSibling();
            if (predicate.apply(curElement)) {
              return curElement;
            }
          } else {
            curNode = curNode.getNextSibling();
          }
        }
        return endOfData();
      }
    }
  }

  /**
   * A human-consumable string representation of an element with its attributes but without its
   * children. Do not depend on the particular form of this.
   */
  public static String prettyPrintElementLocally(Element e) {
    final ImmutableMap.Builder ret = ImmutableMap.builder();
    final NamedNodeMap attributes = e.getAttributes();
    for (int i = 0; i < attributes.getLength(); ++i) {
      final Node attr = attributes.item(i);
      ret.put(attr.getNodeName(), "\"" + attr.getNodeValue() + "\"");
    }
    return "<"
        + e.getNodeName()
        + " "
        + Joiner.on(" ").withKeyValueSeparator("=").join(ret.build())
        + "/>";
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy