All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.daffodil.xml.QNameBase.scala Maven / Gradle / Ivy

There is a newer version: 3.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.daffodil.xml

import java.net.URI
import java.net.URISyntaxException

import scala.language.reflectiveCalls
import scala.util.Try
import org.apache.daffodil.api.UnqualifiedPathStepPolicy
import org.apache.daffodil.equality.TypeEqual
import org.apache.daffodil.exceptions.Assert


/**
 * Please centralize QName handling here.
 *
 * This can be XSD-specific, but should be generic capabilities.
 *
 * There are two concrete classes of QNames for named things: Global and Local
 *
 * There are two concrete classes of referencing QNames, for references to
 * global objects (as in ref="tns:bar" and type="xs:int") and for references
 * which can be to local objects as well as global, such as in DPath step expressions like
 * ../foo:bar/baz.
 *
 * There is a precedent for how xpath expressions work in XSD which is the xpath
 * expressions that are used in the xs:selector and xs:field sub-elements of the
 * xs:key and xs:unique constraints.
 *
 * Based on this, there are two kinds of named things: globally named things, and
 * locally. The only locally named things are local element declarations. Global things
 * include elements, types, groups, etc.
 *
 * These distinctions are necessary because of XSD's elementFormDefault concept.
 * This makes matching against a local name either equivalent to matching against
 * a global name (elementFormDefault="qualified"), or local name matching is
 * namespace independent (elementFormDefault="unqualified"), which is the default
 * behavior.
 *
 * The QName objects are always constructed with a prefix (if present), local
 * name part, and namespace.
 *
 * Consider this case:
 *
 * 
 *
 * In that case, the name "bar" is resolved in the default namespace to be
 * {someURN}bar.
 *
 * However, in the absence of a default namespace binding, the name "bar" will
 * resolve to {No_Namespace}bar, and that will only have a counterpart if
 * there is a schema with no target namespace which defines bar as a global def
 * of some sort.
 *
 * Now consider this case:
 *
 * 
 *    ...
 * 
 * 
 * 
 *      
 *
 * Now, in some other place we have a DPath such as
 *
 * <.... xmlns="someURN"
 *  
 *
 * This does not match because despite the xmlns defining a default namespace,
 * that is not used when considering path steps. Both foo and bar are qualified
 * which means path steps referencing them MUST have prefixes on them.
 * (This is the behavior of Xerces as of 2014-09-29)
 *
 * If there is no default namespace at the point of reference, then
 * this will only match if
 * (1) There is no targetNamespace surrounding the decl of element bar. (Which ought
 *  to draw a warning given that there is no namespace but there is an explicit request
 *  for qualified names.)
 * (2) The schema's element form default is 'unqualified' (which is the
 * default). In this case the local name is the only thing that has to match.
 *
 * This latter case is represented by a StepRef with NoNamespace for the namespace,
 * matching against a LocalDeclQName, which will have NoNamespace by way of
 * the elementFormDefault being unqualified.
 *
 */

/*
 * Use this factory to create the right kinds of QNames.
 */
object QName {

  /**
   * Construct a RefQName from a QName string, and a scope.
   */
  def resolveRef(qnameString: String, scope: scala.xml.NamespaceBinding,
      unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy): Try[RefQName] =
    RefQNameFactory.resolveRef(qnameString, scope, unqualifiedPathStepPolicy)

  def parseExtSyntax(extSyntax: String): (Option[String], NS, String) = {
    val res = try {
      extSyntax match {
      case QNameRegex.ExtQName(prefix, uriString, local) => {
        val pre = if (prefix eq null) None else Some(prefix)
        val ns = (pre, uriString) match {
          case (Some(pre), "") => throw new ExtendedQNameSyntaxException(Some(extSyntax), None)
          case (Some(pre), null) => UnspecifiedNamespace
          case (Some(pre), s) => NS(s)
          case (None, "") => NoNamespace
          case (None, null) => UnspecifiedNamespace
          case (None, s) => NS(s)
        }
        (pre, ns, local)
      }
        case _ => throw new ExtendedQNameSyntaxException(Some(extSyntax), None)
      }
  } catch {
      case ex: URISyntaxException => throw new ExtendedQNameSyntaxException(Some(extSyntax), Some(ex))
      case ia: IllegalArgumentException => {
        val ex = ia.getCause()
        if (ex ne null)
          throw new ExtendedQNameSyntaxException(Some(extSyntax), Some(ex))
        else
          throw ia
      }
    }
    res
  }

  /**
   * Specialized getQName function for handling
   * manually specified variables via the CLI.
   *
   * Variables will be of the format:
   *
   * 1. {nsURI}varName=value
   * 2. {}varName=value       // explicitly means NoNamespace
   * 3. varName=value         // unspecified namespace, i.e., might default
   */
  def refQNameFromExtendedSyntax(extSyntax: String): Try[RefQName] = Try {
    val (pre, ns, local) = parseExtSyntax(extSyntax)
    val res = RefQName(pre, local, ns)
    res
  }

  def resolveStep(qnameString: String, scope: scala.xml.NamespaceBinding,
      unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy): Try[StepQName] =
    StepQNameFactory.resolveRef(qnameString, scope, unqualifiedPathStepPolicy)

  def createLocal(name: String, targetNamespace: NS, isQualified: Boolean,
    scope: scala.xml.NamespaceBinding) = {
    val ns = if (isQualified) targetNamespace else NoNamespace
    // TODO: where we parse xmlSchemaDocument, a check for
    // xs:schema with no target namespace, but elementFormDefault 'qualified'
    // should emit a warning. It is not ALWAYS incorrect, as a schema
    // designed for inclusion into another schema (via xs:include)
    // can take a position on how its local element names are to
    // be qualified without having a target namespace itself.
    //
    // But,... it is very likely to be an error.
    //
    LocalDeclQName(optPrefix(scope, ns), name, ns)
  }

  private def optPrefix(scope: scala.xml.NamespaceBinding, targetNamespace:NS) = {
   val res = if (scope eq null) None
    else if (targetNamespace == NoNamespace) None
    else if (targetNamespace == UnspecifiedNamespace) None
    else {
      val prefix = scope.getPrefix(targetNamespace)
      if (prefix eq null) None
      else Some(prefix)
    }
    res
  }

  def createGlobal(name: String, targetNamespace: NS, scope: scala.xml.NamespaceBinding) = {
    GlobalQName(optPrefix(scope, targetNamespace), name, targetNamespace)
  }
}

protected sealed abstract class QNameSyntaxExceptionBase(kind: String, offendingSyntax: Option[String], cause: Option[Throwable])
  extends Exception(offendingSyntax.getOrElse(null), cause.getOrElse(null)) {

  override def getMessage = {
    val intro = "Invalid syntax for " + kind + " "
    val details = (offendingSyntax, cause) match {
      case (Some(syntax), Some(cause)) => "'%s'. Caused by: '%s'".format(syntax, cause)
      case (None, Some(cause)) => "'%s'".format(cause.getMessage())
      case (Some(syntax), None) => "'%s'.".format(syntax)
      case _ => Assert.usageError("supply either offendingSyntax, or cause or both")
    }
    intro + details
  }
}

class ExtendedQNameSyntaxException(offendingSyntax: Option[String], cause: Option[Throwable])
  extends QNameSyntaxExceptionBase("extended QName", offendingSyntax, cause)

class QNameSyntaxException(offendingSyntax: Option[String], cause: Option[Throwable])
  extends QNameSyntaxExceptionBase("QName", offendingSyntax, cause)

class QNameUndefinedPrefixException(pre: String)
  extends Exception("Undefined QName prefix '%s'".format(pre))

trait QNameBase extends Serializable {

  /**
   * The prefix is not generally involved in matching, but they
   * must show up in diagnostic messages. Mistakes by having the wrong prefix
   * or by omitting one, are very common.
   */
  def prefix: Option[String]
  def local: String
  def namespace: NS // No namespace is represented by the NoNamespace object.

  override def toString = toPrettyString

  /**
   * For purposes of hashCode and equals, we disregard the prefix
   */
  override lazy val hashCode = namespace.hashCode + local.hashCode

  override def equals(other: Any) = {
    val res = other match {
      case qn: QNameBase => (local =:= qn.local && namespace =:= qn.namespace)
      case _ => false
    }
    res
  }

  /**
   * Provides name with namespace information. Uses prefix if that is
   * appropriate, otherwise puts out the namespace in braces. Empty braces are
   * the no-namespace indicator.
   *
   * Incorrectly defined names are not tolerated.
   */
  def toPrettyString: String = {
    (prefix, local, namespace) match {
      case (Some(pre), local, NoNamespace) => Assert.invariantFailed("QName has prefix, but NoNamespace")
      case (Some(pre), local, UnspecifiedNamespace) => pre + ":" + local
      case (None, local, NoNamespace) => "{}" + local
      case (None, local, UnspecifiedNamespace) => local
      case (None, local, ns) => "{" + ns + "}" + local
      case (Some(pre), local, ns) => pre + ":" + local
    }
  }

  /**
   * displays all components that are available.
   */
  def toExtendedSyntax: String = {
    (prefix, local, namespace) match {
      case (Some(pre), local, NoNamespace) => Assert.invariantFailed("QName has prefix, but NoNamespace")
      case (Some(pre), local, UnspecifiedNamespace) => Assert.invariantFailed("QName has prefix, but unspecified namespace")

      case (None, local, NoNamespace) => "{}" + local
      case (None, local, UnspecifiedNamespace) => local
      case (None, local, ns) => "{" + ns + "}" + local
      //
      // This is a hack to avoid printing out tns prefixes just becasue a
      // schema author has chosen to use the tns prefix for the same uri as
      // the target namespace. Really we only want to print out a prefix
      // if it's a meaningful prefix that will distinguish something.
      //
      case (Some("tns"), local, ns) => "{" + ns + "}" + local // ignore the "tns" prefix.
      case (Some(pre), local, ns) => pre + ":{" + ns + "}" + local
    }
  }

  /**
   * Never displays the namespace. Never complains about inconsistencies.
   * Provides back what the schema author ought to think of as the name
   * of the thing.
   */
  def diagnosticDebugName: String = {
    (prefix, local, namespace) match {
      case (Some(pre), local, NoNamespace) => pre + ":" + local // generally this is an error. Shouldn't have a prefix.
      case (Some(pre), local, UnspecifiedNamespace) => pre + ":" + local
      case (None, local, NoNamespace) => local
      case (None, local, UnspecifiedNamespace) => local
      case (None, local, ns) => local
      //
      // See comment above about tns hack
      //
      case (Some("tns"), local, ns) => local
      case (Some(pre), local, ns) => pre + ":" + local
    }
  }

  /**
   * Creates a string suitable for use in an XML attribute as in 'dfdl:terminator="..."' or
   * 'xsi:nil="true"'
   *
   * This does not know about the scope, so it cannot decide that a namespace is the default namespace
   * so that just a local name is ok. That distinction actually requires you to know what kind of
   * name it is. RefQName or StepQName, so you can know whether it needs unqualifiedPathStepPolicy or not.
   */
  def toAttributeNameString: String = {
    (prefix, local, namespace) match {
      case (None, local, NoNamespace) => local
      case (None, local, ns) => Assert.invariantFailed("QName has namespace, but no prefix defined.")
      case _ => toPrettyString
    }
  }

  /**
   * Just turns into a prefix (optionally) then the local name e.g, foo:bar
   * or if there is no prefix, just bar.
   */
  def toQNameString: String = {
    if (prefix.isDefined) prefix.get + ":" + local else local
  }

  def matches[Q <: QNameBase](other: Q): Boolean
}

/**
 * common base trait for named things, both global and local
 */
sealed abstract class NamedQName(
    prefix: Option[String],
    local: String,
    namespace: NS)
  extends QNameBase {

  if (prefix.isDefined) {
    Assert.usage(!namespace.isNoNamespace)
    Assert.usage(!namespace.isUnspecified)
  }

  def toRefQName = RefQName(prefix, local, namespace)
}

/**
 * QName for a local declaration.
 */
final case class LocalDeclQName(prefix: Option[String], local: String, namespace: NS)
  extends NamedQName(prefix, local, namespace) {

  override def matches[Q <: QNameBase](other: Q): Boolean = {
    other match {
      case StepQName(_, `local`, `namespace`) => {
        // exact match
        //
        // This means either this was unqualified so the namespace
        // arg is NoNamespace, and the local matched that, or...
        // it means this was qualified but the other matched that
        // (which can happen even without a prefix if there is a default
        // namespace i.e., xmlns="...")
        true
      }
      case StepQName(_, `local`, NoNamespace) => {
        //
        // This case matches if the other has no NS qualifier
        // but in this case, this local decl name DOES have
        // some other URI as its namespace, so there is
        // explicitly no match here.
        //
        false
      }
      case StepQName(_, `local`, _) => false // NS didn't match, even if local did we don't care.
      case StepQName(_, notLocal, _) => false
      case _ => Assert.usageError("other must be a StepQName")
    }
  }
}

/**
 * QName for a global declaration or definition (of element, type, group, format, etc.)
 */
final case class GlobalQName(prefix: Option[String], local: String, namespace: NS)
  extends NamedQName(prefix, local, namespace) {

  override def matches[Q <: QNameBase](other: Q): Boolean = {
    other match {
      // StepQNames match against global names in the case of a path
      // step that refers to an element that is defined in its
      // group, via an element reference.
      case StepQName(_, `local`, `namespace`) => true // exact match
      case StepQName(_, _, _) => false
      // RefQNames match against global names in element references,
      // group references, type references (i.e., type="..."), etc.
      case RefQName(_, `local`, `namespace`) => true // exact match
      case RefQName(_, _, _) => false
      case _ => Assert.usageError("other must be a StepQName or RefQName")
    }
  }
}

/**
 * base trait for Qnames that reference other things
 */
protected sealed trait RefQNameBase extends QNameBase

/**
 * A qname as found in a ref="foo:bar" attribute, or a type="foo:barType" attribute.
 * Or a variable reference e.g., \$foo:barVar in an expression.
 *
 * These are references to globally defined things.
 */
final case class RefQName(prefix: Option[String], local: String, namespace: NS)
  extends RefQNameBase {

  override def matches[Q <: QNameBase](other: Q): Boolean = {
    other match {
      case named: GlobalQName => named.matches(this)
      case _ => Assert.usageError("other must be a GlobalQName")
    }
  }

  def toStepQName = StepQName(prefix, local, namespace)
  def toGlobalQName = GlobalQName(prefix, local, namespace)
}

/**
 * A qname as found in a path step as in ../foo:bar/baz/quux
 *
 * Differs from RefQName in that it has to match up to LocalDeclQNames
 * properly.
 */
final case class StepQName(prefix: Option[String], local: String, namespace: NS)
  extends RefQNameBase {

  override def matches[Q <: QNameBase](other: Q): Boolean = {
    other match {
      case named: NamedQName => named.matches(this) // let the named things decide how other things match them.
      case _ => Assert.usageError("other must be a NamedQName")
    }
  }

  /**
   * Finds the matches in a list of things that have QNames.
   * Used for finding if a named path step has corresponding element declaration.
   *
   * Handles local or global matches
   *
   */
  def findMatches[T <: { def namedQName: NamedQName }](candidates: Seq[T]): Seq[T] = {
    val matched = candidates.filter { x =>
      val other = x.namedQName
      val res = matches(other)
      res
    }
    matched
  }

}

protected trait RefQNameFactoryBase[T] {

  protected def resolveDefaultNamespace(scope: scala.xml.NamespaceBinding,
      unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy): Option[String]

  protected def constructor(prefix: Option[String], local: String, namespace: NS): T

  /**
   * This variant to resolve normal QNames such as in a ref="pre:local" syntax, or
   * a path step like these steps .../foo/bar:baz/quux.
   */
  def resolveRef(qnameString: String, scope: scala.xml.NamespaceBinding,
      unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy): Try[T] = Try {
    qnameString match {
      case QNameRegex.QName(pre, local) => {
        val prefix = Option(pre)
        // note that the prefix, if defined, can never be ""
        val optURI = prefix match {
          case None => resolveDefaultNamespace(scope, unqualifiedPathStepPolicy)
          case Some(pre) => Option(scope.getURI(pre))
        }
        val ns = (prefix, optURI) match {
          case (None, None) => NoNamespace
          case (Some(pre), None) => throw new QNameUndefinedPrefixException(pre)
          case (_, Some(ns)) => NS(ns)
        }
        val res = constructor(prefix, local, ns)
        res
      }
      case _ => throw new QNameSyntaxException(Some(qnameString), None)
    }
  }
}

object RefQNameFactory extends RefQNameFactoryBase[RefQName] {

  override def constructor(prefix: Option[String], local: String, namespace: NS) =
    RefQName(prefix, local, namespace)

  override def resolveDefaultNamespace(
      scope: scala.xml.NamespaceBinding,
      unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy) =
    Option(scope.getURI(null)) // could be a default namespace

  /**
   * Variant used to deal with extended syntax, e.g., from specifying names at the CLI
   * or for variable binding mechanisms like from the command line options of Daffodil CLI which
   * accept the extended syntax.
   */
  def resolveExtendedSyntaxRef(extSyntax: String, scope: scala.xml.NamespaceBinding, unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy): Try[RefQName] = Try {
    val (pre, ns, local) = QName.parseExtSyntax(extSyntax)
    // note that the prefix, if defined, can never be ""
    val optURI = pre match {
      case None => resolveDefaultNamespace(scope, unqualifiedPathStepPolicy)
      case Some(prefix) => Option(scope.getURI(prefix))
    }
    val optNS = (pre, optURI, ns) match {
      case (None, None, UnspecifiedNamespace) =>
        resolveDefaultNamespace(scope, unqualifiedPathStepPolicy).map { NS(_) }
      case (_, Some(uriString), UnspecifiedNamespace) => Some(NS(uriString))
      case (Some(pre), None, _) => throw new QNameUndefinedPrefixException(pre)
      case (Some(pre), Some(uriString), n) if (n.toString != uriString) =>
        Assert.invariantFailed("namespace from prefix and scope, and ns argument are inconsitent.")
      case (_, _, n) => Some(n)
    }
    val resolvedNS = optNS.getOrElse(UnspecifiedNamespace)
    val res = constructor(pre, local, resolvedNS)
    res
  }
}

object StepQNameFactory extends RefQNameFactoryBase[StepQName] {

  override def constructor(prefix: Option[String], local: String, namespace: NS) =
    StepQName(prefix, local, namespace)

  /* This is what needs Tunables and propagates into Expression */
  override def resolveDefaultNamespace(scope: scala.xml.NamespaceBinding,
      unqualifiedPathStepPolicy: UnqualifiedPathStepPolicy) = {
    unqualifiedPathStepPolicy match {
      case UnqualifiedPathStepPolicy.NoNamespace => None // don't consider default namespace
      case UnqualifiedPathStepPolicy.DefaultNamespace => Option(scope.getURI(null)) // could be a default namespace
      case UnqualifiedPathStepPolicy.PreferDefaultNamespace => Option(scope.getURI(null)) // could be a default namespace
    }
  }
}

object QNameRegex {
  private val xC0_D6 = ("""\x{C0}-\x{D6}""")
  private val xD8_F6 = """\x{D8}-\x{F6}"""
  private val xF8_2FF = """\x{F8}-\x{2FF}"""
  // private val x370_37D = """\x{370}-\x{37D}"""
  private val x37F_1FFF = """\x{37F}-\x{1FFF}"""
  private val x200C_200D = """\x{200c}|\x{200d}"""
  private val x2070_218F = """\x{2070}-\x{218F}"""
  private val x2C00_2FEF = """\x{2C00}-\x{2FEF}"""
  private val x3001_D7FF = """\x{3001}-\x{D7FF}"""
  private val xF900_FDCF = """\x{F900}-\x{FDCF}"""
  private val xFDF0_FFFD = """\x{FDF0}-\x{FFFD}"""
  private val x10000_EFFFF = """\x{10000}-\x{EFFFF}"""
  private val range0_9 = """0-9"""
  //  private val xB7 = """\xB7"""
  //  private val x0300_036F = """\x{0300}-\x{036F}"""
  //  private val x203F_2040 = """\x{203F}-\x{2040}"""

  private val ncNameStartChar = "A-Z_a-z" + xC0_D6 + xD8_F6 + xF8_2FF +
    // x370_37D + // TODO: why is this one is left out? Add comments please.
    x37F_1FFF + x200C_200D +
    x2070_218F + x2C00_2FEF + x3001_D7FF + xF900_FDCF + xFDF0_FFFD + x10000_EFFFF
  private val ncNameChar = ncNameStartChar + "\\-" + "\\." + range0_9 // + "|" + xB7 + "|" + x0300_036F + "|" + x203F_2040
  // TODO: why are the above left out? Add comments please.
  private val NCNameRegexString = "([" + ncNameStartChar + "](?:[" + ncNameChar + "])*)"
  private val QNameRegexString = "(?:" + NCNameRegexString + "\\:)?" + NCNameRegexString
  lazy val NCName = NCNameRegexString.r
  lazy val QName = QNameRegexString.r

  //
  // extended syntax now supports a prefix and a namespace
  // E.g., ex:{http://example.com}foo
  //
  private val URIPartRegexString = """(?:\{(.*)\})"""
  private val PrefixPartRegexString = """(?:""" + NCNameRegexString + """\:)"""
  private val ExtQNameRegexString = PrefixPartRegexString + "?" + URIPartRegexString + "?" + NCNameRegexString
  lazy val ExtQName = ExtQNameRegexString.r

  def isURISyntax(s: String): Boolean = {
    try {
      new URI(s)
      true
    } catch {
      case _: URISyntaxException => false
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy