All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.bottlecaps.railroad.xq.ast-to-ebnf.xq Maven / Gradle / Ivy

(:~
 : The AST to EBNF conversion module.
 :)
module namespace b="de/bottlecaps/railroad/xq/ast-to-ebnf.xq";

declare namespace g="http://www.w3.org/2001/03/XPath/grammar";

(:~
 : The indentation in front of continuation line content.
 :)
declare variable $b:t1 := 12;
declare variable $b:t2 := 40;

(:~
 : Render a g:string, selecting an appropriate quote character.
 :
 : @param $string the g:string element to be rendered.
 : @return the rendered result.
 :)
declare function b:render-string($string as element(g:string)) as item()+
{
  let $context := ("^", data($string/@context))[$string/@context]
  let $quote := if (contains($string, "'")) then """" else "'"
  return
    if ($string/@underline) then
      element fragment
      {
        $quote,
        element u {substring($string, 1, $string/@underline)},
        string-join
        (
          (
            substring($string, $string/@underline + 1),
            $quote,
            $context
          ),
          ""
        )
      }
    else
      string-join
      (
        (
          $quote,
          string($string),
          $quote,
          $context
        ),
        ""
      )
};

(:~
 : Render end-of-file.
 :
 : @param $item the end-of-file item.
 : @return the rendered result.
 :)
declare function b:render-endOfFile($item as element()) as xs:string
{
  "$"
};

(:~
 : Replace indentation of a block of text by a fixed number of spaces.
 :
 : @param $text
 : @param $indent
 :)
declare function b:re-indent($text as xs:string, $indent as xs:integer) as xs:string?
{
  let $trimmed := replace($text, "(^\s+)|(\s+$)", "")
  let $lines := tokenize($trimmed, "
")
  where exists($lines)
  return
    let $min-indent :=
      min
      (
        for $line in $lines[position() > 1]
        where normalize-space($line) != ""
        return string-length($line) - string-length(replace($line, "^ +", ""))
      )
    let $old-indent := string-join(("^", for $x in 1 to $min-indent return " "), "")
    let $new-indent := string-join(for $x in 1 to $indent return " ", "")
    return
      string-join
      (
        (
          $lines[1],
          for $line in $lines[position() > 1]
          return
          (
            if (normalize-space($line) = "") then
              ""
            else if ($old-indent = "^") then
              concat($new-indent, $line)
            else
              replace($line, $old-indent, $new-indent)
          )
        ),
        "
"
      )
};

(:~
 : Render the name of a given item (g:ref or g:production).
 :
 : @param $item the item whose name should be rendered.
 : @return the rendered result.
 :)
declare function b:render-name($item as element()) as element(name)
{
  element name
  {
    replace(string-join((data($item/@name), data($item/@context)), "^"), " ", "_")
  }
};

(:~
 : Render a g:choice or g:orderedChoice element. Decide whether
 : parentheses are required. Render alternatives and separate
 : with operator.
 :
 : @param $choice the g:choice to be rendered.
 : @param $operator the actual choice operator.
 : @return the rendered result.
 :)
declare function b:render-choice($choice as element(), $operator as xs:string)
{
  let $embedded :=
    exists($choice/parent::*[self::g:optional or
                             self::g:oneOrMore or
                             self::g:zeroOrMore or
                             self::g:subtract or
                             count(b:subitems(.)) != 1])
  return
  (
    "("[$embedded],
    for $e at $i in b:subitems($choice)
    return
    (
      $operator[$i != 1],
      b:render-items($e)
    ),
    ")"[$embedded]
  )
};

(:~
 : Calculate subitems of an item, i.e. child nodes and processing instructions.
 :
 : @param $node the parent item.
 : @return the sequence of subitems.
 :)
declare function b:subitems($node as element()) as node()*
{
  for $child in $node/node()
  where $child/self::element() or $child/self::processing-instruction()
  return $child
};

(:~
 : Render a compound construct, along with a given occurrence
 : indicator. Decide whether parentheses are required and render
 : subsequence.
 :
 : @param $compound the compound item to be rendered.
 : @param $occurrence-indicator the occurrence indicator.
 : @param $original the original node, necessary for parenthesis decision.
 : @return the rendered result.
 :)
declare function b:render-compound($compound as element(),
                                   $occurrence-indicator as xs:string?,
                                   $original as element())
{
  let $items := b:subitems($compound)
  let $need-parenthesis := count($items) != 1
                       and ($occurrence-indicator
                         or $original/parent::g:subtract
                         or $original is $original/parent::g:context/*[2])
  let $rendered :=
  (
    text{"("}[$need-parenthesis],
    b:render-items($items),
    text{")"}[$need-parenthesis]
  )
  return
    if (empty($occurrence-indicator)) then
      $rendered
    else
      let $last :=
      (
        for $x at $i in $rendered
        where not($x instance of element(tab))
        return $i
      )[last()]
      return
      (
        $rendered[position() < $last],
        element fragment {$rendered[$last], text{$occurrence-indicator}},
        $rendered[position() > $last]
      )
};

(:~
 : Render a oneOrMoreWithSeparator operator. This operator is used only
 : in a normalized expression, so we only ever need this function when
 : rendering intermediate results.
 :
 : @param $oneOrMoreWithSeparator the oneOrMoreWithSeparator operator node.
 : @return the rendered result.
 :)
declare function b:render-oneOrMoreWithSeparator($oneOrMoreWithSeparator as element(g:oneOrMoreWithSeparator))
{
  "[",
  let $s := element g:sequence {b:subitems($oneOrMoreWithSeparator)[1]}
  return b:render-compound($s, (), $s),
  ";",
  let $s := element g:sequence {b:subitems($oneOrMoreWithSeparator)[2]}
  return b:render-compound($s, (), $s),
  "]+"
(:
  let $body := $oneOrMoreWithSeparator/*[1]
  let $body :=
    if ($body/self::g:sequence) then
      b:subitems($body)
    else
      $body
  let $delimiter := $oneOrMoreWithSeparator/*[2]
  let $delimiter :=
    if ($delimiter/self::g:sequence) then
      b:subitems($delimiter)
    else
      $delimiter
  return
    if (empty($delimiter)) then
      b:render-compound
      (
        element g:oneOrMore {$body},
        "+",
        $oneOrMoreWithSeparator
      )
    else
      b:render-compound
      (
        element g:sequence
        {
          (
            $body,
            element g:zeroOrMore
            {
              $delimiter,
              $body
            }
          )
        },
        (),
        $oneOrMoreWithSeparator
      )
:)
};

(:~
 : Render a subtract operator. Decide whether parentheses are required and render
 : subsequence, separated by "-" characters.
 :
 : @param $subtract the subtract operator to be rendered.
 : @return the rendered result.
 :)
declare function b:render-subtract($subtract as element(g:subtract))
{
  let $items := $subtract/*
  let $parent := $subtract/parent::element()

  let $embedded := $parent/self::g:optional
                or $parent/self::g:oneOrMore
                or $parent/self::g:zeroOrMore
                or $parent/self::g:subtract
                or $parent[not(self::g:choice) and count((element(), processing-instruction())) != 1]
  return
  (
    "("[$embedded],
    for $item at $i in $items
    return
    (
      "-"[$i != 1],
      b:render-items($item)
    ),
    ")"[$embedded]
  )
};

(:~
 : Render a context operator. Render operands, then wrap rhs operand
 : in a predicate item, i.e. precede it by "&". b:render-compound
 : will make sure that we have parentheses if needed.
 :
 : @param $context the context operator to be rendered.
 : @return the rendered result.
 :)
declare function b:render-context($context as element(g:context))
{
  let $lhs := b:render-items($context/*[1])
  let $rhs := b:render-items($context/*[2])
  let $first :=
    (for $x at $i in $rhs where not($x instance of element(tab)) return $i)[1]
  return
  (
    $lhs,
    $rhs[position() < $first],
    element fragment {text{"&"}, $rhs[$first]},
    $rhs[position() > $first]
  )
};

(:~
 : Render a charCode. Add a space if we are not in a charclass. Also
 : add the "#x" prefix.
 :
 : @param $charCode the charCode to be rendered.
 : @return the rendered result.
 :)
declare function b:render-charCode($charCode as node()) as xs:string
{
  concat("#x", $charCode)
};

(:~
 : Render a charCodeRange. Add "#x" prefixes and "-" character.
 :
 : @param $charCodeRange the charCodeRange to be rendered.
 : @return the rendered result.
 :)
declare function b:render-charCodeRange($charCodeRange as element(g:charCodeRange)) as xs:string
{
  concat
  (
    b:render-charCode($charCodeRange/@minValue),
    "-",
    b:render-charCode($charCodeRange/@maxValue)
  )
};

(:~
 : Render a char. Return char values, surrounded by quotes.
 :
 : @param $char the char to be rendered.
 : @return the rendered result.
 :)
declare function b:render-char($char as element(g:char)) as xs:string
{
  let $quote := if ($char = "'") then '"' else "'"
  return concat($quote, $char, $quote)
};

(:~
 : Render a charRange. Return char values, separated by "-" character.
 :
 : @param $charRange the charRange to be rendered.
 : @return the rendered result.
 :)
declare function b:render-charRange($charRange as element(g:charRange)) as xs:string
{
  concat
  (
    $charRange/@minChar,
    "-",
    $charRange/@maxChar
  )
};

(:~
 : Render a charClass. Render items, surrounded by brackets.
 :
 : @param $charClass the charClass to be rendered.
 : @return the rendered result.
 :)
declare function b:render-charClass($charClass as element(g:charClass)) as xs:string
{
  if (exists($charClass/g:char)
  and empty($charClass/*[empty(self::g:char)])
  and contains("0123456789", string-join($charClass/g:char, ""))) then
    concat("[", $charClass/g:char[1], "-", $charClass/g:char[last()], "]")
  else
    let $chars :=
      string-join
      (
        let $rendered :=
          for $c in $charClass/*
          return if ($c/self::g:char) then string($c) else b:render-items($c)
        for $item at $i in $rendered
        let $item :=
          if ($i != 1 and $item = "-") then
            "#x2D"
          else if ($i != 1 and string-length($item) = 3 and starts-with($item, "--")) then
            concat("#x2D-#x", b:to-string(16, string-to-codepoints(substring($item, 3))))
          else if ($item = "]") then
            "#x5D"
          else if (string-length($item) = 3 and starts-with($item, "]-")) then
            concat("#x5D-#x", b:to-string(16, string-to-codepoints(substring($item, 3))))
          else if ($item = "#") then
            "#x23"
          else if (string-length($item) = 3 and starts-with($item, "#-")) then
            concat("#x23-#x", b:to-string(16, string-to-codepoints(substring($item, 3))))
          else
            $item
        order by starts-with($item, "#")
        return $item,
        ""
      )
    return concat("[", replace($chars, "^\^", "#x5E"), "]")
};

(:~
 : Convert an integer value to string representation. This is a
 : tail-recursive helper for the 2-argument function of the same
 : name.
 :
 : @param $base the numeral system base: 2, 8, 10, or 16.
 : @param $todo the uncoverted integer.
 : @param $done the converted string as accumulated in previous
 : recursion levels.
 : @return the string representation.
 :)
declare function b:to-string($base as xs:integer, $todo as xs:integer, $done as xs:string?) as xs:string
{
  if ($todo eq 0) then
    ($done, "0")[1]
  else
    let $done := concat(substring("0123456789ABCDEF", 1 + $todo mod $base, 1), $done)
    return b:to-string($base, $todo idiv $base, $done)
};

(:~
 : Convert an integer value to string representation.
 :
 : @param $base the numeral system base: 2, 8, 10, or 16.
 : @param $todo the uncoverted integer.
 : @return the string representation.
 :)
declare function b:to-string($base as xs:integer, $todo as xs:integer) as xs:string
{
  b:to-string($base, $todo, ())
};

(:~
 : Render a complement charClass. Render items, surrounded by brackets,
 : and preceded by negation indicator.
 :
 : @param $complement the complement to be rendered.
 : @return the rendered result.
 :)
declare function b:render-complement($complement as element(g:complement)) as xs:string
{
  if (empty($complement/*)) then
    "."
  else if ($complement/g:charClass and count($complement/*) eq 1) then
    if (empty($complement/g:charClass/*)) then
      "."
    else
      concat("[^", substring(b:render-charClass($complement/g:charClass), 2))
  else if (($complement/g:string or $complement/g:char) and count($complement/*) eq 1) then
    concat("[^", $complement/*, "]")
  else
    error(xs:QName("b:render-complement"), concat("invalid node type: ", $complement/*[1]/local-name()))
};

(:~
 : Render a preference. Render child items, separated by "<<".
 :
 : @param $preference the preference to be rendered.
 : @return the rendered result.
 :)
declare function b:render-preference($preference as element(g:preference))
{
  let $lhs := $preference/*[1]
  let $all-with-lhs := $preference/../g:preference[deep-equal(*[1], $lhs)]
  where $preference is $all-with-lhs[1]
  return
  (
    ,
    b:render-items($lhs),
    ,
    "<<",
    b:render-items($all-with-lhs/*[2])
  )
};

(:~
 : Render a delimiter. Render child items, separated by "\\".
 :
 : @param $delimiter the delimiter to be rendered.
 : @return the rendered result.
 :)
declare function b:render-delimiter($delimiter as element(g:delimiter))
{
  let $lhs := $delimiter/*[1]
  let $all-with-lhs := $delimiter/../g:delimiter[deep-equal(*[1], $lhs)]
  where $delimiter is $all-with-lhs[1]
  return
  (
    ,
    b:render-items($lhs),
    ,
    "\\",
    b:render-items($all-with-lhs/*[2])
  )
};

(:~
 : Render a range equivalence declaration. Render child items, enclosed in "[" "]",
 : separated by "==".
 :
 : @param $equivalence the equivalence declaration to be rendered.
 : @return the rendered result.
 :)
declare function b:render-equivalence($equivalence as element(g:equivalence))
{
  let $lhs := $equivalence/*[1]
  let $rhs := $equivalence/*[2]
  return
  (
    ,
    concat("["[not($lhs/self::g:string)], b:render-items($lhs), "]"[not($lhs/self::g:string)]),
    ,
    "==",
    concat("["[not($rhs/self::g:string)], b:render-items($rhs), "]"[not($rhs/self::g:string)])
  )
};

(:~
 : Render a production. Handle name, add "::=", and render right hand
 : side items.
 :
 : @param $production the production to be rendered.
 : @return the rendered result.
 :)
declare function b:render-production($production as element(g:production))
{
  ,
  element fragment
  {
    b:render-name($production),
    "?"[$production/@nongreedy="true"]
  },
  ,
  "::=",
  if ($production/@xhref) then
    b:render-items($production/@xhref)
  else
    let $items := b:subitems($production)
    return
      if (count($items) != 1 or empty(($items/self::g:choice, $items/self::g:orderedChoice))) then
        b:render-items($items)
      else
        for $item at $i in b:subitems($items)
        return
        (
          (
            ,
            if ($items/self::g:choice) then "|" else "/"
          )
          [$i > 1],
          b:render-items($item)
        ),
  (
    ,
    concat("/* ws: ", data($production/@whitespace-spec), " */")
  )
  [$production/@whitespace-spec]
};

(:~
 : Render a sequence of items. For each item, dispatch to
 : appropriate handler based in item type.
 :
 : @param $items the sequence of items to be rendered.
 : @return the rendered result.
 :)
declare function b:render-items($nodes as node()*)
{
  for $node in $nodes
  return
    typeswitch ($node)
    case document-node() return b:render-items($node/*)
    case element(g:string) return b:render-string($node)
    case element(g:ref) return b:render-name($node)
    case element(g:choice) return b:render-choice($node, "|")
    case element(g:orderedChoice) return b:render-choice($node, "/")
    case element(g:sequence) return b:render-compound($node, (), $node)
    case element(g:optional) return b:render-compound($node, "?", $node)
    case element(g:oneOrMore) return b:render-compound($node, "+", $node)
    case element(g:oneOrMoreWithSeparator) return b:render-oneOrMoreWithSeparator($node)
    case element(g:zeroOrMore) return b:render-compound($node, "*", $node)
    case element(g:subtract) return b:render-subtract($node)
    case element(g:context) return b:render-context($node)
    case element(g:charClass) return b:render-charClass($node)
    case element(g:complement) return b:render-complement($node)
    case element(g:charRange) return b:render-charRange($node)
    case element(g:char) return b:render-char($node)
    case element(g:charCode) return b:render-charCode($node/@value)
    case element(g:charCodeRange) return b:render-charCodeRange($node)
    case element(g:production) return b:render-production($node)
    case element(g:grammar) return b:render-items(b:subitems($node))
    case element(g:endOfFile) return b:render-endOfFile($node)
    case element(g:preference) return b:render-preference($node)
    case element(g:delimiter) return b:render-delimiter($node)
    case element(g:equivalence) return b:render-equivalence($node)
    case processing-instruction() return $node
    case attribute(xhref) return [{data($node)}]
    default return
      error(xs:QName("b:render-items"), concat("invalid node type: ", local-name($node), "(", string($node), ")"))
};

(:~
 : Render a sequence of nodes, returning a single string. Recursively pass
 : grammar fragments from the todo list to the done list.
 :
 : @param $done the sequence of lines (strings) already rendered.
 : @param $todo the sequence of nodes to be rendered.
 : @return the rendered result.
 :)
declare function b:break-lines($done, $todo) as element(ebnf)
{
  if (empty($todo)) then
    element ebnf
    {
      for $d at $i in $done
      return (
[$i > 1], $d)
    }
  else
    let $item := $todo[1]
    let $todo := $todo[position() > 1]
    return
      typeswitch ($item)
      case element(tab) return
        let $n := xs:integer($item/@col - string-length($done[last()]) - 1)
        return
          if ($n = 0) then
            b:break-lines($done, $todo)
          else if ($n > 0) then
            let $spaces := string-join(for $i in (1 to $n) return " ", "")
            return b:break-lines(($done[position() < last()], element line {$done[last()]/node(), text{$spaces}}), $todo)
          else if (normalize-space($done[last()]) != "") then
            let $spaces := string-join(for $i in (1 to xs:integer($item/@col) - 1) return " ", "")
            return b:break-lines(($done, element line {$spaces}), $todo)
          else
            let $spaces := string-join(for $i in (1 to xs:integer($item/@col) - 1) return " ", "")
            return b:break-lines(($done[position() < last()], element line {$spaces}), $todo)
      case element(fragment) return
        b:break-lines
        (
          (
            $done[position() < last()],
            element line{$done[last()]/node(), text{" "}[$done[last()] != ""], $item/node()}
          ),
          $todo
        )
      case element(name) return
        b:break-lines
        (
          (
            $done[position() < last()],
            element line{$done[last()]/node(), text{" "}[$done[last()] != ""], $item}
          ),
          $todo
        )
      case text() return
        b:break-lines
        (
          (
            $done[position() < last()],
            element line{$done[last()]/node(), text{" "}[$done[last()] != ""], $item}
          ),
          $todo
        )
      case xs:string return
        b:break-lines
        (
          (
            $done[position() < last()],
            element line{$done[last()]/node(), text{" "}[$done[last()] != ""], text{$item}}
          ),
          $todo
        )
      case processing-instruction() return
        if (local-name($item) = "TOKENS" and $item = "") then
          b:break-lines($done, (, "

", , $todo))
        else if (local-name($item) = "ENCORE" and $item = "") then
          b:break-lines($done, (, "
", , $todo))
        else
          b:break-lines
          (
            $done,
            (
              ,
              let $data := replace(data($item), "^#line [0-9]+ ""[^""]*""\s+", "")
              return
                if (not(contains($data, "
"))) then
                  concat("")
                else
                (
                  concat(",
                  b:re-indent($data, $b:t2 + 2),
                  ,
                  "?>"
                ),
              ,
              $todo
            )
          )
      default return
        error(xs:QName("b:break-lines"), concat("invalid input type: ", string($item)))
};

declare function b:to-html($nodes as node()*, $namespace as xs:string?) as node()*
{
  for $node at $i in $nodes
  return
    typeswitch ($node)
    case element(u) return
      element {QName($namespace, "u")} {$node/node()}
    case element(name) return
      let $qname := QName($namespace, "a")
      return
        element {$qname}
        {
          attribute href {concat("#", $node)},
          attribute title {$node},
          data($node)
        }
    case element(link) return
      let $qname := QName($namespace, "a")
      return
        element {$qname}
        {
          attribute href {$node},
          attribute title {$node},
          attribute target {"_blank"},
          data($node)
        }
    case element(lf) return
      ()
    case element(line) return
      let $qname := QName($namespace, "div")
      return element {$qname} {b:to-html($node/node(), $namespace)}
    case element() return
      b:to-html($node/node(), $namespace)
    case text() return
      if (matches($node, "^ +::= .*$") or $node[matches(., "^ +.*$")]/parent::line and $i eq 1) then
        let $spaces := string-length(replace($node, "^( +).*$", "$1"))
        return text {string-join(((1 to $spaces)!" ", substring($node, $spaces + 1)))}
      else
        $node
    default return
      error(xs:QName("b:to-html"), concat("invalid input type: ", string($node)))
};

(:~
 : Render a grammar fragment, returning a single string.
 :
 : @param $nodes the grammar fragment to be rendered.
 : @return the rendered result.
 :)
declare function b:render($nodes as node()*) as xs:string
{
  b:break-lines((), b:render-items($nodes))
};

(:~
 : Render a grammar fragment as HTML, returning a sequence of nodes.
 :
 : @param $nodes the grammar fragment to be rendered.
 : @param $namespace the namespace for HTML tags.
 : @return the rendered result.
 :)
declare function b:render-as-html($nodes as node()*, $namespace as xs:string?) as node()*
{
  b:to-html(b:break-lines((), b:render-items($nodes)), $namespace)
};




© 2015 - 2025 Weber Informatics LLC | Privacy Policy