All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.cdevreeze.yaidom2.node.saxon.SaxonNodes.scala Maven / Gradle / Ivy

/*
 * Copyright 2019-2019 Chris de Vreeze
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package eu.cdevreeze.yaidom2.node.saxon

import java.net.URI

import scala.collection.immutable.ArraySeq
import scala.compat.java8.OptionConverters._
import scala.compat.java8.StreamConverters._

import eu.cdevreeze.yaidom2.core.EName
import eu.cdevreeze.yaidom2.core.QName
import eu.cdevreeze.yaidom2.core.Scope
import eu.cdevreeze.yaidom2.queryapi.ElemStep
import eu.cdevreeze.yaidom2.queryapi.fun.BackingElemFunctionsApi
import eu.cdevreeze.yaidom2.queryapi.oo.BackingNodes
import net.sf.saxon.s9api.XdmNode
import net.sf.saxon.s9api.XdmNodeKind
import net.sf.saxon.s9api.streams.Predicates._
import net.sf.saxon.s9api.streams.Step
import net.sf.saxon.s9api.streams.Steps._

/**
 * Saxon-backed nodes.
 *
 * @author Chris de Vreeze
 */
object SaxonNodes {

  /**
   * Arbitrary Saxon node
   */
  sealed trait Node extends BackingNodes.Node

  /**
   * Potential Saxon document child, so an element, processing instruction or comment
   */
  sealed trait CanBeDocumentChild extends Node with BackingNodes.CanBeDocumentChild

  /**
   * Saxon element node, offering the `BackingNodes.Elem` element query API.
   * Note that this is a value class, so no object creation is done for these "wrapper elements".
   */
  final case class Elem(xdmNode: XdmNode) extends CanBeDocumentChild with BackingNodes.Elem {
    require(xdmNode.getNodeKind == XdmNodeKind.ELEMENT, s"Not an element node: $xdmNode")

    type ThisElem = Elem

    type ThisNode = Node

    // ElemApi

    def filterChildElems(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.filterChildElems(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findChildElem(p: ThisElem => Boolean): Option[ThisElem] = {
      Elem.findChildElem(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def filterDescendantElems(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.filterDescendantElems(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findDescendantElem(p: ThisElem => Boolean): Option[ThisElem] = {
      Elem.findDescendantElem(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def filterDescendantElemsOrSelf(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.filterDescendantElemsOrSelf(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findDescendantElemOrSelf(p: ThisElem => Boolean): Option[ThisElem] = {
      Elem.findDescendantElemOrSelf(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findTopmostElems(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.findTopmostElems(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findTopmostElemsOrSelf(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.findTopmostElemsOrSelf(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    // ClarkElemApi

    def name: EName = {
      Elem.name(xdmNode)
    }

    def attributes: Iterable[(EName, String)] = {
      Elem.attributes(xdmNode)
    }

    def localName: String = {
      Elem.localName(xdmNode)
    }

    def namespaceOption: Option[String] = {
      Elem.namespaceOption(xdmNode)
    }

    def namespaceAsString: String = {
      Elem.namespaceAsString(xdmNode)
    }

    def attrOption(attributeName: EName): Option[String] = {
      Elem.attrOption(xdmNode, attributeName)
    }

    def attrOption(attributeNamespaceOption: Option[String], attributeLocalName: String): Option[String] = {
      Elem.attrOption(xdmNode, attributeNamespaceOption, attributeLocalName)
    }

    def attrOption(attributeNamespace: String, attributeLocalName: String): Option[String] = {
      Elem.attrOption(xdmNode, attributeNamespace, attributeLocalName)
    }

    def attrOption(attributeLocalName: String): Option[String] = {
      Elem.attrOption(xdmNode, attributeLocalName)
    }

    def attr(attributeName: EName): String = {
      Elem.attr(xdmNode, attributeName)
    }

    def attr(attributeNamespaceOption: Option[String], attributeLocalName: String): String = {
      Elem.attr(xdmNode, attributeNamespaceOption, attributeLocalName)
    }

    def attr(attributeNamespace: String, attributeLocalName: String): String = {
      Elem.attr(xdmNode, attributeNamespace, attributeLocalName)
    }

    def attr(attributeLocalName: String): String = {
      Elem.attr(xdmNode, attributeLocalName)
    }

    def text: String = {
      Elem.text(xdmNode)
    }

    def normalizedText: String = {
      Elem.normalizedText(xdmNode)
    }

    def trimmedText: String = {
      Elem.trimmedText(xdmNode)
    }

    // ScopedElemApi

    def scope: Scope = {
      Elem.scope(xdmNode)
    }

    def qname: QName = {
      Elem.qname(xdmNode)
    }

    def attributesByQName: Iterable[(QName, String)] = {
      Elem.attributesByQName(xdmNode)
    }

    def textAsQName: QName = {
      Elem.textAsQName(xdmNode)
    }

    def textAsResolvedQName: EName = {
      Elem.textAsResolvedQName(xdmNode)
    }

    def attrAsQNameOption(attributeName: EName): Option[QName] = {
      Elem.attrAsQNameOption(xdmNode, attributeName)
    }

    def attrAsQNameOption(attributeNamespaceOption: Option[String], attributeLocalName: String): Option[QName] = {
      Elem.attrAsQNameOption(xdmNode, attributeNamespaceOption, attributeLocalName)
    }

    def attrAsQNameOption(attributeNamespace: String, attributeLocalName: String): Option[QName] = {
      Elem.attrAsQNameOption(xdmNode, attributeNamespace, attributeLocalName)
    }

    def attrAsQName(attributeName: EName): QName = {
      Elem.attrAsQName(xdmNode, attributeName)
    }

    def attrAsQName(attributeNamespaceOption: Option[String], attributeLocalName: String): QName = {
      Elem.attrAsQName(xdmNode, attributeNamespaceOption, attributeLocalName)
    }

    def attrAsQName(attributeNamespace: String, attributeLocalName: String): QName = {
      Elem.attrAsQName(xdmNode, attributeNamespace, attributeLocalName)
    }

    def attrAsResolvedQNameOption(attributeName: EName): Option[EName] = {
      Elem.attrAsResolvedQNameOption(xdmNode, attributeName)
    }

    def attrAsResolvedQNameOption(attributeNamespaceOption: Option[String], attributeLocalName: String): Option[EName] = {
      Elem.attrAsResolvedQNameOption(xdmNode, attributeNamespaceOption, attributeLocalName)
    }

    def attrAsResolvedQNameOption(attributeNamespace: String, attributeLocalName: String): Option[EName] = {
      Elem.attrAsResolvedQNameOption(xdmNode, attributeNamespace, attributeLocalName)
    }

    def attrAsResolvedQName(attributeName: EName): EName = {
      Elem.attrAsResolvedQName(xdmNode, attributeName)
    }

    def attrAsResolvedQName(attributeNamespaceOption: Option[String], attributeLocalName: String): EName = {
      Elem.attrAsResolvedQName(xdmNode, attributeNamespaceOption, attributeLocalName)
    }

    def attrAsResolvedQName(attributeNamespace: String, attributeLocalName: String): EName = {
      Elem.attrAsResolvedQName(xdmNode, attributeNamespace, attributeLocalName)
    }

    // BackingElemApi

    def findParentElem(p: ThisElem => Boolean): Option[ThisElem] = {
      Elem.findParentElem(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def filterAncestorElems(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.filterAncestorElems(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findAncestorElem(p: ThisElem => Boolean): Option[ThisElem] = {
      Elem.findAncestorElem(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def filterAncestorElemsOrSelf(p: ThisElem => Boolean): Seq[ThisElem] = {
      Elem.filterAncestorElemsOrSelf(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def findAncestorElemOrSelf(p: ThisElem => Boolean): Option[ThisElem] = {
      Elem.findAncestorElemOrSelf(xdmNode, n => p(Elem(n))).map(n => Elem(n))
    }

    def baseUriOption: Option[URI] = {
      Elem.baseUriOption(xdmNode)
    }

    def baseUri: URI = {
      Elem.baseUri(xdmNode)
    }

    def docUriOption: Option[URI] = {
      Elem.docUriOption(xdmNode)
    }

    def docUri: URI = {
      Elem.docUri(xdmNode)
    }

    def rootElem: ThisElem = {
      Elem(Elem.rootElem(xdmNode))
    }

    // ClarkNodes.Elem

    def children: Seq[ThisNode] = {
      Elem.children(xdmNode).flatMap(n => Node.opt(n))
    }

    def select(step: ElemStep[Elem]): Seq[Elem] = {
      // Implemented directly, instead of in terms of Elem.select.
      step(this)
    }
  }

  /**
   * Saxon text node
   */
  final case class Text(xdmNode: XdmNode) extends Node with BackingNodes.Text {
    require(xdmNode.getNodeKind == XdmNodeKind.TEXT, s"Not a text node: $xdmNode")

    def text: String = {
      xdmNode.getUnderlyingNode.getStringValue
    }
  }

  /**
   * Saxon comment node
   */
  final case class Comment(xdmNode: XdmNode) extends CanBeDocumentChild with BackingNodes.Comment {
    require(xdmNode.getNodeKind == XdmNodeKind.COMMENT, s"Not a comment node: $xdmNode")

    def text: String = {
      xdmNode.getUnderlyingNode.getStringValue
    }
  }

  /**
   * Saxon processing instruction node
   */
  final case class ProcessingInstruction(xdmNode: XdmNode) extends CanBeDocumentChild with BackingNodes.ProcessingInstruction {
    require(xdmNode.getNodeKind == XdmNodeKind.PROCESSING_INSTRUCTION, s"Not a processing instruction node: $xdmNode")

    def target: String = {
      xdmNode.getUnderlyingNode.getDisplayName
    }

    def data: String = {
      xdmNode.getUnderlyingNode.getStringValue
    }
  }

  object Node {

    def opt(xdmNode: XdmNode): Option[Node] = {
      xdmNode.getNodeKind match {
        case XdmNodeKind.ELEMENT => Some(Elem(xdmNode))
        case XdmNodeKind.TEXT => Some(Text(xdmNode))
        case XdmNodeKind.COMMENT => Some(Comment(xdmNode))
        case XdmNodeKind.PROCESSING_INSTRUCTION => Some(ProcessingInstruction(xdmNode))
        case _ => None
      }
    }

    def extractEName(xdmNode: XdmNode): EName = {
      val ns: String = xdmNode.getUnderlyingNode.getURI
      val nsOption: Option[String] = if (ns == "") None else Some(ns)
      EName(nsOption, xdmNode.getUnderlyingNode.getLocalPart)
    }

    def extractQName(xdmNode: XdmNode): QName = {
      val pref: String = xdmNode.getUnderlyingNode.getPrefix
      val prefOption: Option[String] = if (pref == "") None else Some(pref)
      QName(prefOption, xdmNode.getUnderlyingNode.getLocalPart)
    }

    private[saxon] val EmptyUri: URI = URI.create("")
  }

  object Elem extends BackingElemFunctionsApi {

    type ElemType = XdmNode

    type NodeType = XdmNode

    def filterChildElems(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      filterElems(elem, child(), p)
    }

    def findChildElem(elem: ElemType, p: ElemType => Boolean): Option[ElemType] = {
      findElem(elem, child(), p)
    }

    def filterDescendantElems(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      filterElems(elem, descendant(), p)
    }

    def findDescendantElem(elem: ElemType, p: ElemType => Boolean): Option[ElemType] = {
      findElem(elem, descendant(), p)
    }

    def filterDescendantElemsOrSelf(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      filterElems(elem, descendantOrSelf(), p)
    }

    def findDescendantElemOrSelf(elem: ElemType, p: ElemType => Boolean): Option[ElemType] = {
      findElem(elem, descendantOrSelf(), p)
    }

    // TODO Make the following 2 methods more efficient

    def findTopmostElems(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      filterChildElems(elem, _ => true).to(Vector).flatMap(e => findTopmostElemsOrSelf(e, p)).to(ArraySeq)
    }

    def findTopmostElemsOrSelf(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      def findTopmostElemsOrSelf(e: ElemType): Seq[ElemType] = {
        if (p(e)) {
          Vector(e)
        } else {
          // Recursive calls

          filterChildElems(e, _ => true).to(Vector).flatMap(che => findTopmostElemsOrSelf(che))
        }
      }

      findTopmostElemsOrSelf(elem).to(ArraySeq)
    }

    def name(elem: ElemType): EName = {
      Node.extractEName(elem)
    }

    def attributes(elem: ElemType): Iterable[(EName, String)] = {
      val stream = elem.select(attribute())
      stream.toScala(ArraySeq).map(n => Node.extractEName(n) -> n.getStringValue)
    }

    def localName(elem: ElemType): String = {
      elem.getUnderlyingNode.getLocalPart
    }

    def namespaceOption(elem: ElemType): Option[String] = {
      val nsAsString = namespaceAsString(elem)

      if (nsAsString.isEmpty) None else Some(nsAsString)
    }

    def namespaceAsString(elem: ElemType): String = {
      elem.getUnderlyingNode.getURI
    }

    def attrOption(elem: ElemType, attributeName: EName): Option[String] = {
      val stream = elem.select(attribute(attributeName.namespaceUriOption.getOrElse(""), attributeName.localPart))
      stream.asOptionalNode.asScala.map(_.getStringValue)
    }

    def attrOption(elem: ElemType, attributeNamespaceOption: Option[String], attributeLocalName: String): Option[String] = {
      val stream = elem.select(attribute(attributeNamespaceOption.getOrElse(""), attributeLocalName))
      stream.asOptionalNode.asScala.map(_.getStringValue)
    }

    def attrOption(elem: ElemType, attributeNamespace: String, attributeLocalName: String): Option[String] = {
      require(attributeNamespace.nonEmpty, s"Empty namespace URI not allowed")

      val stream = elem.select(attribute(attributeNamespace, attributeLocalName))
      stream.asOptionalNode.asScala.map(_.getStringValue)
    }

    def attrOption(elem: ElemType, attributeLocalName: String): Option[String] = {
      val stream = elem.select(attribute("", attributeLocalName))
      stream.asOptionalNode.asScala.map(_.getStringValue)
    }

    def attr(elem: ElemType, attributeName: EName): String = {
      attrOption(elem, attributeName).get
    }

    def attr(elem: ElemType, attributeNamespaceOption: Option[String], attributeLocalName: String): String = {
      attrOption(elem, attributeNamespaceOption, attributeLocalName).get
    }

    def attr(elem: ElemType, attributeNamespace: String, attributeLocalName: String): String = {
      attrOption(elem, attributeNamespace, attributeLocalName).get
    }

    def attr(elem: ElemType, attributeLocalName: String): String = {
      attrOption(elem, attributeLocalName).get
    }

    def text(elem: ElemType): String = {
      val stream = elem.select(child(isText))
      stream.toScala(ArraySeq).map(_.getUnderlyingNode.getStringValue).mkString
    }

    def normalizedText(elem: ElemType): String = {
      normalizeString(text(elem))
    }

    def trimmedText(elem: ElemType): String = {
      text(elem).trim
    }

    def children(elem: ElemType): Seq[NodeType] = {
      val stream = elem.select(child())
      stream.toScala(ArraySeq)
    }

    def select(elem: ElemType, step: ElemStep[ElemType]): Seq[ElemType] = {
      step(elem)
    }

    def scope(elem: ElemType): Scope = {
      val stream = elem.select(namespace())

      val result = stream.toScala(ArraySeq).map { n =>
        // Not very transparent: prefix is "display name" and namespace URI is "string value"
        val prefix = n.getUnderlyingNode.getDisplayName
        val nsUri = n.getUnderlyingNode.getStringValue
        (prefix -> nsUri)
      }
      Scope.from(result.to(Map))
    }

    def qname(elem: ElemType): QName = {
      Node.extractQName(elem)
    }

    def attributesByQName(elem: ElemType): Iterable[(QName, String)] = {
      val stream = elem.select(attribute())
      stream.toScala(ArraySeq).map(n => Node.extractQName(n) -> n.getStringValue)
    }

    def textAsQName(elem: ElemType): QName = {
      QName.parse(text(elem).trim)
    }

    def textAsResolvedQName(elem: ElemType): EName = {
      scope(elem).resolveQNameOption(textAsQName(elem)).getOrElse(
        sys.error(s"Could not resolve QName-valued element text ${textAsQName(elem)}, given scope [${scope(elem)}]"))
    }

    def attrAsQNameOption(elem: ElemType, attributeName: EName): Option[QName] = {
      attrOption(elem, attributeName).map(v => QName.parse(v.trim))
    }

    def attrAsQNameOption(elem: ElemType, attributeNamespaceOption: Option[String], attributeLocalName: String): Option[QName] = {
      attrOption(elem, attributeNamespaceOption, attributeLocalName).map(v => QName.parse(v.trim))
    }

    def attrAsQNameOption(elem: ElemType, attributeNamespace: String, attributeLocalName: String): Option[QName] = {
      attrOption(elem, attributeNamespace, attributeLocalName).map(v => QName.parse(v.trim))
    }

    def attrAsQName(elem: ElemType, attributeName: EName): QName = {
      attrAsQNameOption(elem, attributeName).getOrElse(
        sys.error(s"Missing QName-valued attribute $attributeName"))
    }

    def attrAsQName(elem: ElemType, attributeNamespaceOption: Option[String], attributeLocalName: String): QName = {
      attrAsQNameOption(elem, attributeNamespaceOption, attributeLocalName).getOrElse(
        sys.error(s"Missing QName-valued attribute ${EName(attributeNamespaceOption, attributeLocalName)}"))
    }

    def attrAsQName(elem: ElemType, attributeNamespace: String, attributeLocalName: String): QName = {
      attrAsQNameOption(elem, attributeNamespace, attributeLocalName).getOrElse(
        sys.error(s"Missing QName-valued attribute ${EName(Some(attributeNamespace), attributeLocalName)}"))
    }

    def attrAsResolvedQNameOption(elem: ElemType, attributeName: EName): Option[EName] = {
      attrAsQNameOption(elem, attributeName).map { qn =>
        scope(elem).resolveQNameOption(qn).getOrElse(
          sys.error(s"Could not resolve QName-valued attribute value $qn, given scope [${scope(elem)}]"))
      }
    }

    def attrAsResolvedQNameOption(elem: ElemType, attributeNamespaceOption: Option[String], attributeLocalName: String): Option[EName] = {
      attrAsQNameOption(elem, attributeNamespaceOption, attributeLocalName).map { qn =>
        scope(elem).resolveQNameOption(qn).getOrElse(
          sys.error(s"Could not resolve QName-valued attribute value $qn, given scope [${scope(elem)}]"))
      }
    }

    def attrAsResolvedQNameOption(elem: ElemType, attributeNamespace: String, attributeLocalName: String): Option[EName] = {
      attrAsQNameOption(elem, attributeNamespace, attributeLocalName).map { qn =>
        scope(elem).resolveQNameOption(qn).getOrElse(
          sys.error(s"Could not resolve QName-valued attribute value $qn, given scope [${scope(elem)}]"))
      }
    }

    def attrAsResolvedQName(elem: ElemType, attributeName: EName): EName = {
      attrAsResolvedQNameOption(elem, attributeName).getOrElse(
        sys.error(s"Missing QName-valued attribute $attributeName"))
    }

    def attrAsResolvedQName(elem: ElemType, attributeNamespaceOption: Option[String], attributeLocalName: String): EName = {
      attrAsResolvedQNameOption(elem, attributeNamespaceOption, attributeLocalName).getOrElse(
        sys.error(s"Missing QName-valued attribute ${EName(attributeNamespaceOption, attributeLocalName)}"))
    }

    def attrAsResolvedQName(elem: ElemType, attributeNamespace: String, attributeLocalName: String): EName = {
      attrAsResolvedQNameOption(elem, attributeNamespace, attributeLocalName).getOrElse(
        sys.error(s"Missing QName-valued attribute ${EName(Some(attributeNamespace), attributeLocalName)}"))
    }

    def findParentElem(elem: ElemType, p: ElemType => Boolean): Option[ElemType] = {
      findElem(elem, parent(), p)
    }

    def filterAncestorElems(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      filterElems(elem, ancestor(), p)
    }

    def findAncestorElem(elem: ElemType, p: ElemType => Boolean): Option[ElemType] = {
      findElem(elem, ancestor(), p)
    }

    def filterAncestorElemsOrSelf(elem: ElemType, p: ElemType => Boolean): Seq[ElemType] = {
      filterElems(elem, ancestorOrSelf(), p)
    }

    def findAncestorElemOrSelf(elem: ElemType, p: ElemType => Boolean): Option[ElemType] = {
      findElem(elem, ancestorOrSelf(), p)
    }

    def baseUriOption(elem: ElemType): Option[URI] = {
      Option(elem.getUnderlyingNode.getBaseURI).map(u => URI.create(u))
    }

    def baseUri(elem: ElemType): URI = {
      baseUriOption(elem).getOrElse(Node.EmptyUri)
    }

    def docUriOption(elem: ElemType): Option[URI] = {
      Option(elem.getUnderlyingNode.getSystemId).map(u => URI.create(u))
    }

    def docUri(elem: ElemType): URI = {
      docUriOption(elem).getOrElse(Node.EmptyUri)
    }

    def rootElem(elem: ElemType): ElemType = {
      filterAncestorElemsOrSelf(elem, _ => true).last
    }

    // Private methods

    private def filterElems(elem: ElemType, step: Step[XdmNode], p: ElemType => Boolean): Seq[ElemType] = {
      val stream = elem.select(step.where(n => isElement.test(n) && p(n)))
      stream.toScala(ArraySeq)
    }

    private def findElem(elem: ElemType, step: Step[XdmNode], p: ElemType => Boolean): Option[ElemType] = {
      val stream = elem.select(step.where(n => isElement.test(n) && p(n)))
      stream.findFirst.asScala
    }

    /**
     * Normalizes the string, removing surrounding whitespace and normalizing internal whitespace to a single space.
     * Whitespace includes #x20 (space), #x9 (tab), #xD (carriage return), #xA (line feed). If there is only whitespace,
     * the empty string is returned. Inspired by the JDOM library.
     */
    private def normalizeString(s: String): String = {
      require(s ne null) // scalastyle:off null

      val separators = Array(' ', '\t', '\r', '\n')
      val words: Seq[String] = s.split(separators).toSeq.filterNot(_.isEmpty)

      words.mkString(" ") // Returns empty string if words.isEmpty
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy