All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.bottlecaps.railroad.xq.cst-to-ast.xq Maven / Gradle / Ivy

(:~
 : The parse tree rewriter. Contains functions for converting
 : the concrete syntax tree into the abstact syntax tree (AST).
 :
 : We are using the XPath grammar namespace here, i.e.
 : "http://www.w3.org/2001/03/XPath/grammar", though actually
 : using a few minor modifications of the original structure.
 :)
module namespace a="de/bottlecaps/railroad/xq/cst-to-ast.xq";

declare namespace g="http://www.w3.org/2001/03/XPath/grammar";

(:~
 : Convert a char code from the EBNF input into the representation
 : for AST use, i.e. strip the leading "#x".
 :
 : @param $code the char code as it occurred in the input, with a "#x"
 : prefix.
 : @return the hex char code string, without the "#x" prefix.
 :)
declare function a:charCode($code as xs:string) as xs:string
{
  substring($code, 3)
};

(:~
 : Recursively transform a fragment of the concrete parse tree of an
 : EBNF grammar in to an AST node.
 :
 : @param $nodes sequence of nodes of the concrete parse tree,
 : representing the grammar fragment to be transformed.
 : @return the AST fragment for the parse tree fragment, which is
 : a sequence of elements in the XPath grammar namespace, or an
 : xhref attribute node.
 :)
declare function a:ast($nodes as node()*) as node()*
{
  for $node in $nodes
  return
    typeswitch ($node)
    case element(NCName) return
      element g:ref {attribute name {$node}}
    case element(StringLiteral) return
      let $string := $node
      return
        element g:string {substring($string, 2, string-length($string) - 2)}
    case element(NameOrString) return
      let $a := a:ast($node/*[1])
      return
        if (count($node/*) = 2) then
          element {node-name($a)}
          {
            $a/@*,
            attribute context {substring($node/*[2], 2)},
            $a/node()
          }
        else
          $a
    case element(Char) return
      element g:char {string($node)}
    case element(CharRange) return
      element g:charRange
      {
        attribute minChar {substring($node, 1, 1)},
        attribute maxChar {substring($node, 3, 1)}
      }
    case element(CharCode) return
      element g:charCode {attribute value {a:charCode($node)}}
    case element(CharCodeRange) return
      element g:charCodeRange
      {
        attribute minValue {a:charCode(substring-before($node, '-'))},
        attribute maxValue {a:charCode(substring-after($node, '-'))}
      }
    case element(CharClass) return
      let $ast := a:ast($node/*[not(self::TOKEN)])
      return
        if ($node/TOKEN = "[^") then
            element g:complement {element g:charClass{$ast}}
          else
            element g:charClass {$ast}
    case element(ProcessingInstruction) return
      processing-instruction {$node/NCName} {data($node/ProcessingInstructionContents)}
    case element(Link) return
      attribute xhref {$node/URL}
    case element(Primary) return
      if ($node/TOKEN = ".") then
        element g:ref {attribute name {"."}}
      else if ($node/TOKEN = "$") then
        element g:endOfFile {}
      else
        a:ast($node/*[not(self::TOKEN)])
    case element(Item) return
      a:closure(a:ast($node/Primary), $node/TOKEN)
    case element(SequenceOrDifference) return
      if ($node/TOKEN = "-") then
        element g:subtract
        {
          for $i in $node/Item
          let $ast := a:ast($i)
          return if (count($ast) = 1) then $ast else element g:sequence {$ast}
        }
      else
        a:ast($node/Item)
    case element(Choice) return
      if (count($node/SequenceOrDifference) = 1) then
        a:ast($node/SequenceOrDifference)
      else if ($node/TOKEN = "|") then
          element g:choice
          {
            for $s in $node/SequenceOrDifference
            let $ast := a:ast($s)
            return if (count($ast) = 1) then $ast else element g:sequence {$ast}
          }
      else
          element g:orderedChoice
          {
            for $s in $node/SequenceOrDifference
            let $ast := a:ast($s)
            return if (count($ast) = 1) then $ast else element g:sequence {$ast}
          }
    case element(Alternative) return
      if ($node/TOKEN = "&") then
        element g:context
        {
          for $i in $node/*[not(self::TOKEN)]
          let $ast := a:ast($i)
          return if (count($ast) = 1) then $ast else element g:sequence {$ast}
        }
      else
        a:ast($node/SequenceOrDifference)
    case element(Alternatives) return
      if (count($node/Alternative) = 1) then
        a:ast($node/Alternative)
      else if ($node/TOKEN = "|") then
          element g:choice
          {
            for $s in $node/Alternative
            let $ast := a:ast($s)
            return if (count($ast) = 1) then $ast else element g:sequence {$ast}
          }
      else
          element g:orderedChoice
          {
            for $s in $node/Alternative
            let $ast := a:ast($s)
            return if (count($ast) = 1) then $ast else element g:sequence {$ast}
          }
    case element(Preference) return
      let $lhs := $node/NameOrString[1]
      let $lhs-ast := a:ast($lhs)
      for $rhs in $node/NameOrString[. >> $lhs]
      return
        if ($node/TOKEN = "<<") then
          element g:preference {$lhs-ast, a:ast($rhs)}
        else
          element g:preference {a:ast($rhs), $lhs-ast}
    case element(Delimiter) return
      let $lhs-ast := a:ast($node/NCName)
      for $rhs in $node/NameOrString
      return element g:delimiter {$lhs-ast, a:ast($rhs)}
    case element(Equivalence) return
      let $lhs := a:ast($node/EquivalenceCharRange[1]/*[not(self::TOKEN)])
      let $rhs := a:ast($node/EquivalenceCharRange[2]/*[not(self::TOKEN)])
      return element g:equivalence {$lhs, $rhs}
    case element(Production) return
      element g:production
      {
        attribute name {$node/NCName},
        attribute nongreedy {true()}[$node/TOKEN = "?"],
        attribute whitespace-spec {"explicit"  }[contains($node/Option, "explicit"  )],
        attribute whitespace-spec {"definition"}[contains($node/Option, "definition")],
        a:ast($node/(Alternatives | Link))
      }
    case element(EOF) return
      ()
    case element(Grammar) return
      element g:grammar
      {
        let $terminals := $node/TOKEN[. = ""]
        let $encore := $node/TOKEN[. = ""]
        let $syntax-end := ($terminals, $encore)[1]
        return
        (
          a:ast($node/*[not(. is $syntax-end or . >> $syntax-end)]),
          if ($terminals) then
          (
            ,
            a:ast($node/*[. >> $terminals and not(. is $encore or . >> $encore)])
          )
          else
            (),
          if ($encore) then
          (
            ,
            a:ast($node/ProcessingInstruction[. >> $encore])
          )
          else
            ()
        )
      }
    default return
      error(xs:QName("a:ast"), concat("invalid parse tree node type: ", local-name($node)))
};

(:~
 : Apply closure operators to a given ASR fragment.
 :
 : @param $ast the AST fragment.
 : @param $operators the closure operators.
 : @return the AST fragment for the parse tree fragment, which is
 : a sequence of elements in the XPath grammar namespace, an
 : xhref attribute node, or a processing instruction.
 :)
declare function a:closure($ast as node()*, $operators as element(TOKEN)*) as node()*
{
  if (empty($operators)) then
    $ast
  else
    let $operator := $operators[1]
    return
      a:closure
      (
        if ($operator = "?") then
          if (count($ast) != 1) then
            element g:optional {$ast}
          else
            typeswitch ($ast)
            case element(g:optional) return $ast
            case element(g:zeroOrMore) return $ast
            case element(g:oneOrMore) return element g:zeroOrMore{$ast/*}
            default return
              element g:optional {$ast}
        else if ($operator = "+") then
          if (count($ast) != 1) then
            element g:oneOrMore {$ast}
          else
            typeswitch ($ast)
            case element(g:optional) return element g:zeroOrMore{$ast/*}
            case element(g:zeroOrMore) return $ast
            case element(g:oneOrMore) return $ast
            default return
              element g:oneOrMore {$ast}
        else if ($operator = "*") then
          if (count($ast) != 1) then
            element g:zeroOrMore {$ast}
          else
            typeswitch ($ast)
            case element(g:optional) return element g:zeroOrMore{$ast/*}
            case element(g:zeroOrMore) return $ast
            case element(g:oneOrMore) return element g:zeroOrMore{$ast/*}
            default return
              element g:zeroOrMore {$ast}
        else
          error(xs:QName("a:closure"), concat("invalid operator: ", $operator)),
        subsequence($operators, 2)
      )
};




© 2015 - 2025 Weber Informatics LLC | Privacy Policy