All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scala.xml.parsing.FactoryAdapter.scala Maven / Gradle / Ivy

The newest version!
/*
 * Scala (https://www.scala-lang.org)
 *
 * Copyright EPFL and Lightbend, Inc.
 *
 * Licensed under Apache License 2.0
 * (http://www.apache.org/licenses/LICENSE-2.0).
 *
 * See the NOTICE file distributed with this work for
 * additional information regarding copyright ownership.
 */

package scala
package xml
package parsing

import scala.collection.Seq
import org.xml.sax.{Attributes, Locator, SAXNotRecognizedException, SAXNotSupportedException}
import org.xml.sax.ext.{DefaultHandler2, Locator2}

// can be mixed into FactoryAdapter if desired
trait ConsoleErrorHandler extends DefaultHandler2 {
  // ignore warning, crimson warns even for entity resolution!
  override def warning(ex: SAXParseException): Unit = ()
  override def error(ex: SAXParseException): Unit = printError("Error", ex)
  override def fatalError(ex: SAXParseException): Unit = printError("Fatal Error", ex)

  protected def printError(errtype: String, ex: SAXParseException): Unit =
    Console.withOut(Console.err) {
      Console.println(s"[$errtype]:${ex.getLineNumber}:${ex.getColumnNumber}: ${ex.getMessage}")
      Console.flush()
    }
}

/**
 * SAX adapter class, for use with Java SAX parser. Keeps track of
 *  namespace bindings, without relying on namespace handling of the
 *  underlying SAX parser (but processing the parser's namespace-related events if it is namespace-aware).
 */
abstract class FactoryAdapter extends DefaultHandler2 with factory.XMLLoader[Node] {
  val normalizeWhitespace: Boolean = false

  // reference to the XMLReader that parses the document; this is used to query
  // features (e.g., 'is-standalone') and properties (e.g., document-xml-version) -
  // see http://www.saxproject.org/apidoc/org/xml/sax/package-summary.html
  private var xmlReader: Option[XMLReader] = None

  private var dtdBuilder: Option[DtdBuilder] = None
  private def inDtd: Boolean = dtdBuilder.isDefined && !dtdBuilder.get.isDone

  private var document: Option[Document] = None
  private var baseURI: Option[String] = None
  private var xmlEncoding: Option[String] = None

  private var prefixMappings: List[(String, String)] = List.empty

  // TODO all the variables should be private, but - binary compatibility...
  var prolog: List[Node] = List.empty
  var rootElem: Node = _
  var epilogue: List[Node] = List.empty

  val buffer: StringBuilder = new StringBuilder()
  private var inCDATA: Boolean = false

  /** List of attributes
    * 
    * Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
    * 
    * @since 2.0.0 
    */
  var attribStack: List[MetaData] = List.empty
  /** List of elements
    * 
    * Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
    * 
    * @since 2.0.0 
    */
  var hStack: List[Node] = List.empty // [ element ] contains siblings
  /** List of element names
    * 
    * Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
    * 
    * @since 2.0.0 
    */
  var tagStack: List[String] = List.empty
  /** List of namespaces
    * 
    * Previously was a mutable [[scala.collection.mutable.Stack Stack]], but is now a mutable reference to an immutable [[scala.collection.immutable.List List]].
    * 
    * @since 2.0.0 
    */
  var scopeStack: List[NamespaceBinding] = List.empty

  var curTag: String = _
  var capture: Boolean = false

  /**
   * Captures text or cdata.
   */
  def captureText(): Unit = {
    if (capture && buffer.nonEmpty) {
      val text: String = buffer.toString
      val newNode: Node = if (inCDATA) createPCData(text) else createText(text)
      hStack ::= newNode
    }

    buffer.clear()
    inCDATA = false
  }

  /**
   * Load XML document from the inputSource using the xmlReader.
   */
  def loadDocument(inputSource: InputSource, xmlReader: XMLReader): Document = {
    if (inputSource == null) throw new IllegalArgumentException("InputSource cannot be null")

    xmlReader.setContentHandler(this)
    xmlReader.setDTDHandler(this)
    /* Do not overwrite pre-configured EntityResolver. */
    if (xmlReader.getEntityResolver == null) xmlReader.setEntityResolver(this)
    /* Do not overwrite pre-configured ErrorHandler. */
    if (xmlReader.getErrorHandler == null) xmlReader.setErrorHandler(this)

    /* Use LexicalHandler if it is supported by the xmlReader. */
    try {
      xmlReader.setProperty("http://xml.org/sax/properties/lexical-handler", this)
    } catch {
      case _: SAXNotRecognizedException =>
      case _: SAXNotSupportedException =>
    }

    /* Use DeclHandler if it is supported by the xmlReader. */
    try {
      xmlReader.setProperty("http://xml.org/sax/properties/declaration-handler", this)
    } catch {
      case _: SAXNotRecognizedException =>
      case _: SAXNotSupportedException =>
    }

    this.xmlReader = Some(xmlReader)
    xmlReader.parse(inputSource)

    document.get
  }

  // abstract methods

  /**
   * Tests if an XML element contains text.
   * @return true if element named `localName` contains text.
   */
  def nodeContainsText(localName: String): Boolean // abstract

  /**
   * creates an new non-text(tree) node.
   * @param elemName
   * @param attribs
   * @param chIter
   * @return a new XML element.
   */
  def createNode(pre: String, elemName: String, attribs: MetaData,
                 scope: NamespaceBinding, chIter: List[Node]): Node // abstract

  /**
   * creates a Text node.
   * @param text
   * @return a new Text node.
   */
  def createText(text: String): Text // abstract

  /**
   * creates a PCData node.
   * @param text
   * @return a new PCData node.
   */
  def createPCData(text: String): PCData // abstract

  /**
   * creates a new processing instruction node.
   */
  def createProcInstr(target: String, data: String): Seq[ProcInstr]

  /**
   * creates a new comment node.
   */
  def createComment(characters: String): Seq[Comment]

  /* ContentHandler methods */

  // Since Java 14, ContentHandler has a method that delivers the values from the XML declaration:
  //   def declaration(version: String, encoding: String, standalone: String): Unit = ()
  // but it'll be years until we are all on Java 14 *and* Xerces starts calling this method...

  override def setDocumentLocator(locator: Locator): Unit = {
    baseURI = Option(locator.getSystemId)
    locator match {
      case locator2: Locator2 =>
        // Note: Xerces calls setDocumentLocator() (and startDocument()) *before* it even reads the XML declaration;
        // the version delivered here - locator2.getXMLVersion - is always "1.0";
        // the real version is retrieved as a property of the XML reader in endDocument().

        xmlEncoding = Option(locator2.getEncoding)
      case _ =>
    }
  }

  override def startDocument(): Unit = {
    scopeStack ::= TopScope // TODO turn into a parameter
  }

  override def endDocument(): Unit = {
    // capture the epilogue at the end of the document
    epilogue = hStack.init.reverse

    val document = new Document
    this.document = Some(document)
    document.children = prolog ++ rootElem ++ epilogue
    document.docElem = rootElem
    document.dtd = dtdBuilder.map(_.dtd).orNull
    document.baseURI = baseURI.orNull
    document.encoding = xmlEncoding

    document.version =
      try {
        Option(xmlReader.get.getProperty("http://xml.org/sax/properties/document-xml-version").asInstanceOf[String])
      } catch {
        case _: SAXNotRecognizedException => None
        case _: SAXNotSupportedException => None
      }

    document.standAlone =
      try {
        Some(xmlReader.get.getFeature("http://xml.org/sax/features/is-standalone"))
      } catch {
        case _: SAXNotRecognizedException => None
        case _: SAXNotSupportedException => None
      }

    // Note: resetting to the freshly-created state; needed only if this instance is reused, which we do not do...
    dtdBuilder = None
    xmlReader = None

    baseURI = None
    xmlEncoding = None

    hStack = hStack.last :: Nil // TODO List.empty
    scopeStack = scopeStack.tail // TODO List.empty

    rootElem = null
    prolog = List.empty
    epilogue = List.empty

    buffer.clear()
    inCDATA = false
    capture = false
    curTag = null

    attribStack = List.empty
    tagStack = List.empty
  }

  override def startPrefixMapping(prefix: String, uri: String): Unit =
    prefixMappings ::= ((prefix, uri))

  override def endPrefixMapping(prefix: String): Unit = ()

  /* Start element. */
  override def startElement(
    uri: String,
    _localName: String,
    qname: String,
    attributes: Attributes
  ): Unit = {
    captureText()

    // capture the prolog at the start of the root element
    if (tagStack.isEmpty) {
      prolog = hStack.reverse
      hStack = List.empty
    }

    tagStack ::= curTag
    curTag = qname

    val localName: String = Utility.splitName(qname)._2
    capture = nodeContainsText(localName)

    hStack ::= null
    var m: MetaData = Null
    var scpe: NamespaceBinding =
      if (scopeStack.isEmpty) TopScope
      else scopeStack.head

    for (i <- 0.until(attributes.getLength).reverse) {
      val qname: String = attributes.getQName(i)
      val value: String = attributes.getValue(i)
      val (pre: Option[String], key: String) = Utility.splitName(qname)
      def nullIfEmpty(s: String): String = if (s == "") null else s

      if (pre.contains("xmlns") || (pre.isEmpty && qname == "xmlns")) {
        val arg: String = if (pre.isEmpty) null else key
        scpe = NamespaceBinding(arg, nullIfEmpty(value), scpe)
      } else
        m = Attribute(pre, key, Text(value), m)
    }

    // Add namespace bindings for the prefix mappings declared by this element
    // (if there are any, the parser is namespace-aware, and no namespace bindings were delivered as attributes).
    // All `startPrefixMapping()` events will occur immediately before the corresponding `startElement()` event.
    for ((prefix: String, uri: String) <- prefixMappings)
      scpe = NamespaceBinding(if (prefix.isEmpty) null else prefix, uri, scpe)

    // Once the `prefixMappings` are processed into `scpe`, the list is emptied out
    // so that already-declared namespaces are not re-declared on the nested elements.
    prefixMappings = List.empty

    scopeStack ::= scpe
    attribStack ::=  m
  }

  /**
   * End element.
   * @param uri
   * @param _localName
   * @param qname
   * @throws org.xml.sax.SAXException if ..
   */
  override def endElement(uri: String, _localName: String, qname: String): Unit = {
    captureText()
    val metaData: MetaData = attribStack.head
    attribStack = attribStack.tail

    // reverse order to get it right
    val v: List[Node] = hStack.takeWhile(_ != null).reverse
    hStack = hStack.dropWhile(_ != null) match {
      case null :: hs => hs
      case hs => hs
    }
    val (pre: Option[String], localName: String) = Utility.splitName(qname)
    val scp: NamespaceBinding = scopeStack.head
    scopeStack = scopeStack.tail

    // create element
    rootElem = createNode(pre.orNull, localName, metaData, scp, v)
    hStack ::= rootElem
    curTag = tagStack.head
    tagStack = tagStack.tail
    capture = curTag != null && nodeContainsText(curTag) // root level
  }

  /**
   * Capture characters, possibly normalizing whitespace.
   *
   * @param ch
   * @param offset
   * @param length
   */
  override def characters(ch: Array[Char], offset: Int, length: Int): Unit = {
    if (!capture) ()
    // compliant: report every character
    else if (!normalizeWhitespace) buffer.appendAll(ch, offset, length)
    // normalizing whitespace is not compliant, but useful
    else {
      var it: Iterator[Char] = ch.slice(offset, offset + length).iterator
      while (it.hasNext) {
        val c: Char = it.next()
        val isSpace: Boolean = c.isWhitespace
        buffer.append(if (isSpace) ' ' else c)
        if (isSpace)
          it = it.dropWhile(_.isWhitespace)
      }
    }
  }

  override def ignorableWhitespace(ch: Array[Char], offset: Int, length: Int): Unit = ()

  /**
   * Processing instruction.
   */
  override def processingInstruction(target: String, data: String): Unit =
    if (inDtd) dtdBuilder.foreach(_.processingInstruction(target, data)) else {
      captureText()
      hStack = hStack.reverse_:::(createProcInstr(target, data).toList)
    }

  override def skippedEntity(name: String): Unit = ()

  /* LexicalHandler methods (see https://docs.oracle.com/javase/8/docs/api/org/xml/sax/ext/LexicalHandler.html) */

  override def startDTD(
    name: String,
    publicId: String,
    systemId: String
  ): Unit = dtdBuilder = Some(DtdBuilder(
    name,
    publicId,
    systemId
  ))

  override def endDTD(): Unit = dtdBuilder.foreach(_.endDTD())

  override def startEntity(name: String): Unit = dtdBuilder.foreach(_.startEntity(name))
  override def endEntity(name: String): Unit = dtdBuilder.foreach(_.endEntity(name))

  /**
   * Start of a CDATA section.
   */
  override def startCDATA(): Unit = {
    captureText()
    inCDATA = true
  }

  /**
   * End of a CDATA section.
   */
  override def endCDATA(): Unit = captureText()

  /**
   * Comment.
   */
  override def comment(ch: Array[Char], start: Int, length: Int): Unit = {
    val commentText: String = String.valueOf(ch.slice(start, start + length))
    if (inDtd) dtdBuilder.foreach(_.comment(commentText)) else {
      captureText()
      hStack = hStack.reverse_:::(createComment(commentText).toList)
    }
  }

  /* DTDHandler methods (see https://docs.oracle.com/javase/8/docs/api/org/xml/sax/DTDHandler.html) */

  override def notationDecl(name: String, publicId: String, systemId: String): Unit =
    dtdBuilder.foreach(_.notationDecl(name, publicId, systemId))

  override def unparsedEntityDecl(name: String, publicId: String, systemId: String, notationName: String): Unit =
    dtdBuilder.foreach(_.unparsedEntityDecl(name, publicId, systemId, notationName))

  /* DeclHandler methods (see https://docs.oracle.com/javase/8/docs/api/org/xml/sax/ext/DeclHandler.html) */

  override def elementDecl(name: String, model: String): Unit =
    dtdBuilder.foreach(_.elementDecl(name, model))

  override def attributeDecl(eName: String, aName: String, `type`: String, mode: String, value: String): Unit =
    dtdBuilder.foreach(_.attributeDecl(eName, aName, `type`, mode, value))

  override def internalEntityDecl(name: String, value: String): Unit =
    dtdBuilder.foreach(_.internalEntityDecl(name, value))

  override def externalEntityDecl(name: String, publicId: String, systemId: String): Unit =
    dtdBuilder.foreach(_.externalEntityDecl(name, publicId, systemId))
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy