All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scala.xml.parsing.ElementContentModel.scala Maven / Gradle / Ivy

The newest version!
/*
 * Scala (https://www.scala-lang.org)
 *
 * Copyright EPFL and Lightbend, Inc.
 *
 * Licensed under Apache License 2.0
 * (http://www.apache.org/licenses/LICENSE-2.0).
 *
 * See the NOTICE file distributed with this work for
 * additional information regarding copyright ownership.
 */

package scala.xml.parsing

import scala.annotation.tailrec
import scala.xml.dtd

// Note: this is private to avoid it becoming a part of binary compatibility checks.

// The content model will consist of the string "EMPTY", the string "ANY", or a parenthesised group,
// optionally followed by an occurrence indicator.
// The model will be normalized so that all parameter entities are fully resolved and all whitespace is removed,
// and will include the enclosing parentheses.
// Other normalization (such as removing redundant parentheses or simplifying occurrence indicators)
// is at the discretion of the parser.

// elementdecl         ::=       ''
// contentspec         ::=       'EMPTY' | 'ANY' | Mixed | children
// children            ::=       (choice | seq) ('?' | '*' | '+')?
// cp                  ::=       (Name | choice | seq) ('?' | '*' | '+')?
// choice              ::=       '(' S? cp ( S? '|' S? cp )+ S? ')'
// seq                 ::=       '(' S? cp ( S? ',' S? cp )* S? ')'
// Mixed               ::=       '(' S? '#PCDATA' (S? '|' S? Name)* S? ')*'
//                             | '(' S? '#PCDATA' S? ')'
private[parsing] object ElementContentModel {
  def parseContentModel(model: String): dtd.ContentModel = ContentSpec.parse(model) match {
    case ContentSpec.Empty => dtd.EMPTY
    case ContentSpec.Any => dtd.ANY
    case ContentSpec.PCData => dtd.PCDATA
    case ContentSpec.Children(elements, occurrence) => dtd.ELEMENTS(convertOccurrence(elements, occurrence))
    case ContentSpec.Mixed(elements) =>
      val result: List[dtd.ContentModel.RegExp] =
        dtd.ContentModel.Letter(dtd.ContentModel.ElemName(ContentSpec.PCData.value)) +:
        elements.map(convertElements)
      // TODO scala.xml.dtd.impl.Alt.apply() insists on there being al least two alternatives,
      // which causes an exception in MIXED.toString() when there is only one alternative besides #PCDATA.
      // I think this is a bug.
      dtd.MIXED(dtd.ContentModel.Alt(result: _*))
  }

  private def convertElements(elements: Elements): dtd.ContentModel.RegExp = {
    def convertCp(cp: Cp): dtd.ContentModel.RegExp = convertOccurrence(cp.elements, cp.occurrence)
    elements match {
      case Elements.Element(name) => dtd.ContentModel.Letter(dtd.ContentModel.ElemName(name))
      case Elements.Choice(children) => dtd.ContentModel.Alt(children.map(convertCp): _*)
      case Elements.Sequence(children) => dtd.ContentModel.Sequ(children.map(convertCp): _*)
    }
  }

  private def convertOccurrence(elements: Elements, occurrence: Occurrence): dtd.ContentModel.RegExp = {
    val result: dtd.ContentModel.RegExp = convertElements(elements)
    occurrence match {
      case Occurrence.Once => result
      case Occurrence.RepeatOptional => dtd.ContentModel.Star(result)
      case Occurrence.OnceOptional => dtd.ContentModel.Star(result) // TODO fidelity lost!
      case Occurrence.Repeat => dtd.ContentModel.Star(result) // TODO fidelity lost!
    }
  }

  sealed trait ContentSpec
  object ContentSpec {
    sealed trait Simple extends ContentSpec {
      final override def toString: String = value
      val value: String
    }
    case object Empty extends Simple {
      override val value: String = "EMPTY"
    }
    case object Any extends Simple {
      override val value: String = "ANY"
    }
    case object PCData extends ContentSpec {
      override def toString: String = s"($value)"
      val value: String = "#PCDATA"
    }
    final case class Mixed(elements: List[Elements.Element]) extends ContentSpec {
      override def toString: String = {
        val names: String = elements.mkString("|")
        s"(${PCData.value}|$names)*"
      }
    }
    final case class Children(elements: Elements.Many, occurrence: Occurrence) extends ContentSpec {
      override def toString: String = s"$elements$occurrence"
    }
    object Children {
      def parse(string: String, occurrence: Occurrence): Children =
        Children(Elements.Many.parse(string), occurrence)
    }
    def parse(model: String): ContentSpec = model match {
      case Empty.value => Empty
      case Any.value => Any
      case model =>
        val (parenthesized: String, occurrence: Occurrence) = Occurrence.parse(model)
        require(isParenthesized(parenthesized))
        val string: String = removeParentheses(parenthesized)
        if (occurrence == Occurrence.Once && string == PCData.value) PCData else if (occurrence == Occurrence.RepeatOptional) {
          val choice: List[String] = Elements.Choice.split(string)
          if (choice.length > 1 && choice.head == PCData.value) Mixed(choice.tail.map(Elements.Element.apply))
          else Children.parse(string, occurrence)
        } else Children.parse(string, occurrence)
    }
  }

  sealed trait Elements
  object Elements {
    final case class Element(name: String) extends Elements {
      override def toString: String = name
    }
    sealed abstract class ManyCompanion(val separator: Char) {
      final def split(string: String): List[String] = ElementContentModel.split(string, separator)
    }
    sealed abstract class Many(children: List[Cp]) extends Elements {
      final override def toString: String = children.map(_.toString).mkString("(", companion.separator.toString, ")")
      def companion: ManyCompanion
    }
    object Choice extends ManyCompanion(separator = '|')
    final case class Choice(children: List[Cp]) extends Many(children) {
      override def companion: ManyCompanion = Choice
    }
    object Sequence extends ManyCompanion(separator = ',')
    final case class Sequence(children: List[Cp]) extends Many(children) {
      override def companion: ManyCompanion = Sequence
    }
    object Many {
      def parse(string: String): Many = {
        val choice: List[String] = Choice.split(string)
        if (choice.length > 1) Choice(choice.map(Cp.parse))
        else Sequence(Sequence.split(string).map(Cp.parse))
      }
    }
    def parse(string: String): Elements =
      if (!isParenthesized(string)) Element(string)
      else Many.parse(removeParentheses(string))
  }

  final case class Cp(elements: Elements, occurrence: Occurrence) {
    override def toString: String = s"$elements$occurrence"
  }
  object Cp {
    def parse(string: String): Cp = {
      val (maybeParenthesized: String, occurrence: Occurrence) = Occurrence.parse(string)
      Cp(Elements.parse(maybeParenthesized), occurrence)
    }
  }

  sealed class Occurrence
  object Occurrence {
    case object Once extends Occurrence {
      override def toString: String = ""
    }
    sealed trait Signed extends Occurrence {
      final override def toString: String = sign
      def sign: String
    }
    case object OnceOptional extends Signed {
      override def sign: String = "?"
    }
    case object Repeat extends Signed {
      override def sign: String = "+"
    }
    case object RepeatOptional extends Signed {
      override def sign: String = "*"
    }
    def parse(string: String): (String, Occurrence) =
      if (string.endsWith(OnceOptional.sign)) (string.init, OnceOptional) else
      if (string.endsWith(RepeatOptional.sign)) (string.init, RepeatOptional) else
      if (string.endsWith(Repeat.sign)) (string.init, Repeat) else
        (string, Once)
  }

  private def isParenthesized(string: String): Boolean = {
    @tailrec
    def isParenthesized(level: Int, tail: String): Boolean = {
      val current: Char = tail.head
      val nextTail: String = tail.tail
      val nextLevel: Int = if (current == '(') level + 1 else if (current == ')') level - 1 else level
      if (nextTail.isEmpty) nextLevel == 0 else if (nextLevel == 0) false else isParenthesized(nextLevel, nextTail)
    }

    string.startsWith("(") && isParenthesized(0, string)
  }

  @tailrec
  private def removeParentheses(string: String): String =
    if (!isParenthesized(string)) string
    else removeParentheses(string.tail.init)

  // split at the top level of parentheses
  private def split(string: String, separator: Char): List[String] = {
    @tailrec
    def split(
      result: List[String],
      level: Int,
      init: String,
      tail: String
    ): List[String] = if (tail.isEmpty) if (init.isEmpty) result else result :+ init else {
      val current: Char = tail.head
      val nextTail: String = tail.tail
      if (level == 0 && current == separator) split(
        result :+ init,
        level,
        "",
        nextTail
      ) else split(
        result,
        if (current == '(') level + 1 else if (current == ')') level - 1 else level,
        init :+ current,
        nextTail
      )
    }

    split(
      List.empty,
      0,
      "",
      string
    )
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy