All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scalaParser.Xml.scala Maven / Gradle / Ivy

package scalaParser
import acyclic.file
import org.parboiled2._

import scala.language.implicitConversions

trait Xml extends Core {

  def Patterns: R1
  def XmlExpr: R1 = rule( WL ~ Xml.XmlContent ~ ((WL ~ Xml.Element ~> Concat).* ~> ConcatSeqNoDelim) ~> Concat3 )
  def XmlPattern: R1 = rule( WL ~ Xml.ElemPattern ~> Concat )

  private[this] object Xml{
    def BaseChar = rule(
      ("\u0041"-"\u005A") | ("\u0061"-"\u007A") | ("\u00C0"-"\u00D6") | ("\u00D8"-"\u00F6") |
      ("\u00F8"-"\u00FF") | ("\u0100"-"\u0131") | ("\u0134"-"\u013E") | ("\u0141"-"\u0148") |
      ("\u014A"-"\u017E") | ("\u0180"-"\u01C3") | ("\u01CD"-"\u01F0") | ("\u01F4"-"\u01F5") |
      ("\u01FA"-"\u0217") | ("\u0250"-"\u02A8") | ("\u02BB"-"\u02C1") | "\u0386" |
      ("\u0388"-"\u038A") | "\u038C" | ("\u038E"-"\u03A1") | ("\u03A3"-"\u03CE") |
      ("\u03D0"-"\u03D6") | "\u03DA" | "\u03DC" | "\u03DE" | "\u03E0" | ("\u03E2"-"\u03F3") |
      ("\u0401"-"\u040C") | ("\u040E"-"\u044F") | ("\u0451"-"\u045C") | ("\u045E"-"\u0481") |
      ("\u0490"-"\u04C4") | ("\u04C7"-"\u04C8") | ("\u04CB"-"\u04CC") | ("\u04D0"-"\u04EB") |
      ("\u04EE"-"\u04F5") | ("\u04F8"-"\u04F9") | ("\u0531"-"\u0556") | "\u0559" |
      ("\u0561"-"\u0586") | ("\u05D0"-"\u05EA") | ("\u05F0"-"\u05F2") | ("\u0621"-"\u063A") |
      ("\u0641"-"\u064A") | ("\u0671"-"\u06B7") | ("\u06BA"-"\u06BE") | ("\u06C0"-"\u06CE") |
      ("\u06D0"-"\u06D3") | "\u06D5" | ("\u06E5"-"\u06E6") | ("\u0905"-"\u0939") | "\u093D" |
      ("\u0958"-"\u0961") | ("\u0985"-"\u098C") | ("\u098F"-"\u0990") | ("\u0993"-"\u09A8") |
      ("\u09AA"-"\u09B0") | "\u09B2" | ("\u09B6"-"\u09B9") | ("\u09DC"-"\u09DD") |
      ("\u09DF"-"\u09E1") | ("\u09F0"-"\u09F1") | ("\u0A05"-"\u0A0A") | ("\u0A0F"-"\u0A10") |
      ("\u0A13"-"\u0A28") | ("\u0A2A"-"\u0A30") | ("\u0A32"-"\u0A33") | ("\u0A35"-"\u0A36") |
      ("\u0A38"-"\u0A39") | ("\u0A59"-"\u0A5C") | "\u0A5E" | ("\u0A72"-"\u0A74") |
      ("\u0A85"-"\u0A8B") | "\u0A8D" | ("\u0A8F"-"\u0A91") | ("\u0A93"-"\u0AA8") |
      ("\u0AAA"-"\u0AB0") | ("\u0AB2"-"\u0AB3") | ("\u0AB5"-"\u0AB9") | "\u0ABD" | "\u0AE0" |
      ("\u0B05"-"\u0B0C") | ("\u0B0F"-"\u0B10") | ("\u0B13"-"\u0B28") | ("\u0B2A"-"\u0B30") |
      ("\u0B32"-"\u0B33") | ("\u0B36"-"\u0B39") | "\u0B3D" | ("\u0B5C"-"\u0B5D") |
      ("\u0B5F"-"\u0B61") | ("\u0B85"-"\u0B8A") | ("\u0B8E"-"\u0B90") | ("\u0B92"-"\u0B95") |
      ("\u0B99"-"\u0B9A") | "\u0B9C" | ("\u0B9E"-"\u0B9F") | ("\u0BA3"-"\u0BA4") |
      ("\u0BA8"-"\u0BAA") | ("\u0BAE"-"\u0BB5") | ("\u0BB7"-"\u0BB9") | ("\u0C05"-"\u0C0C") |
      ("\u0C0E"-"\u0C10") | ("\u0C12"-"\u0C28") | ("\u0C2A"-"\u0C33") | ("\u0C35"-"\u0C39") |
      ("\u0C60"-"\u0C61") | ("\u0C85"-"\u0C8C") | ("\u0C8E"-"\u0C90") | ("\u0C92"-"\u0CA8") |
      ("\u0CAA"-"\u0CB3") | ("\u0CB5"-"\u0CB9") | "\u0CDE" | ("\u0CE0"-"\u0CE1") |
      ("\u0D05"-"\u0D0C") | ("\u0D0E"-"\u0D10") | ("\u0D12"-"\u0D28") | ("\u0D2A"-"\u0D39") |
      ("\u0D60"-"\u0D61") | ("\u0E01"-"\u0E2E") | "\u0E30" | ("\u0E32"-"\u0E33") |
      ("\u0E40"-"\u0E45") | ("\u0E81"-"\u0E82") | "\u0E84" | ("\u0E87"-"\u0E88") | "\u0E8A" |
      "\u0E8D" | ("\u0E94"-"\u0E97") | ("\u0E99"-"\u0E9F") | ("\u0EA1"-"\u0EA3") | "\u0EA5" |
      "\u0EA7" | ("\u0EAA"-"\u0EAB") | ("\u0EAD"-"\u0EAE") | "\u0EB0" | ("\u0EB2"-"\u0EB3") |
      "\u0EBD" | ("\u0EC0"-"\u0EC4") | ("\u0F40"-"\u0F47") | ("\u0F49"-"\u0F69") |
      ("\u10A0"-"\u10C5") | ("\u10D0"-"\u10F6") | "\u1100" | ("\u1102"-"\u1103") |
      ("\u1105"-"\u1107") | "\u1109" | ("\u110B"-"\u110C") | ("\u110E"-"\u1112") |
      "\u113C" | "\u113E" | "\u1140" | "\u114C" | "\u114E" | "\u1150" | ("\u1154"-"\u1155") |
      "\u1159" | ("\u115F"-"\u1161") | "\u1163" | "\u1165" | "\u1167" | "\u1169" |
      ("\u116D"-"\u116E") | ("\u1172"-"\u1173") | "\u1175" | "\u119E" | "\u11A8" | "\u11AB" |
      ("\u11AE"-"\u11AF") | ("\u11B7"-"\u11B8") | "\u11BA" | ("\u11BC"-"\u11C2") | "\u11EB" |
      "\u11F0" | "\u11F9" | ("\u1E00"-"\u1E9B") | ("\u1EA0"-"\u1EF9") | ("\u1F00"-"\u1F15") |
      ("\u1F18"-"\u1F1D") | ("\u1F20"-"\u1F45") | ("\u1F48"-"\u1F4D") | ("\u1F50"-"\u1F57") |
      "\u1F59" | "\u1F5B" | "\u1F5D" | ("\u1F5F"-"\u1F7D") | ("\u1F80"-"\u1FB4") |
      ("\u1FB6"-"\u1FBC") | "\u1FBE" | ("\u1FC2"-"\u1FC4") | ("\u1FC6"-"\u1FCC") |
      ("\u1FD0"-"\u1FD3") | ("\u1FD6"-"\u1FDB") | ("\u1FE0"-"\u1FEC") | ("\u1FF2"-"\u1FF4") |
      ("\u1FF6"-"\u1FFC") | "\u2126" | ("\u212A"-"\u212B") | "\u212E" | ("\u2180"-"\u2182") |
      ("\u3041"-"\u3094") | ("\u30A1"-"\u30FA") | ("\u3105"-"\u312C") | ("\uAC00"-"\uD7A3")
    )
    def Ideographic = rule( "\u4E00"-"\u9FA5" | "\u3007" | "\u3021"-"\u3029" )
    def Eq = rule (WLR0.? ~ '=' ~ WLR0.?)

    def Element: R1 = rule( EmptyElemTag | STag ~ Content ~ capture(ETag) ~> Concat3 )

    def EmptyElemTag: R1 = rule( capture('<' ~ Name) ~ ((WL ~ Attribute ~> Concat).* ~> ConcatSeqNoDelim) ~ capture(WLR0.? ~ "/>") ~> Concat3 )

    def STag: R1 = rule( capture('<' ~ Name) ~ ((WL ~ Attribute ~> Concat).* ~> ConcatSeqNoDelim) ~ capture(WLR0.? ~ '>') ~> Concat3 )
    def ETag = rule( "' )
    def Content: R1 = rule( (capture(CharData) | Content1).* ~> ConcatSeqNoDelim )
    def Content1: R1  = rule( XmlContent | capture(Reference) | ScalaExpr )
    def XmlContent: R1 = rule( Element | capture(CDSect) | capture(PI) | capture(Comment) )

    def CDSect = rule( CDStart ~ CData ~ CDEnd )
    def CDStart = rule( "" ~ Char).* )
    def CDEnd = rule( "]]>" )

    def Attribute: R1 = rule( capture(Name ~ Eq) ~ AttValue ~> Concat )

    def AttValue: R1 = rule(
      capture('"' ~ (CharQ | Reference).* ~ '"') |
      capture("'" ~ (CharA | Reference).* ~ "'") |
      ScalaExpr
    )

    def Comment = rule( "" )

    def PI = rule( "" ~ Char).*).? ~ "?>" )
    def PITarget = rule( !(("X" | "x") ~ ("M" | "m") ~ ("L" | "l")) ~ Name )
    def CharRef = rule( "&#" ~ ("0"-"9").+ ~ ';' | "&#x" ~ Basic.HexNum ~ ";" )
    def Reference = rule( EntityRef | CharRef )
    def EntityRef = rule( "&" ~ Name ~ ";" )
    def ScalaExpr: R1 = rule(capture("{") ~ WS ~ Block ~ WS ~ capture("}") ~> Concat5)
    def Char = rule( ANY )
    def CharData = rule( (!("{" | "]]>" | CharRef) ~ Char1 | "{{").+ )

    def Char1  = rule( &(noneOf("<&")) ~ Char )
    def CharQ = rule( !'"' ~ Char1 )
    def CharA = rule( !"'" ~ Char1 )
    def CharB = rule( !'{' ~ Char1 )
    def Name = rule( XNameStart ~ NameChar.* )
    def XNameStart  = rule( '_' | BaseChar | Ideographic )

    def NameStartChar = rule(
      ":" | ("A"-"Z") | "_" | ("a"-"z") | ("\u00C0"-"\u00D6") | ("\u00D8"-"\u00F6") |
      ("\u00F8"-"\u02FF") | ("\u0370"-"\u037D") | ("\u037F"-"\u1FFF") | ("\u200C"-"\u200D") |
      ("\u2070"-"\u218F") | ("\u2C00"-"\u2FEF") | ("\u3001"-"\uD7FF") | ("\uF900"-"\uFDCF") |
      ("\uFDF0"-"\uFFFD") )// | [#x10000-#xEFFFF] ???? don't chars max out at \uffff ????

    def NameChar = rule( NameStartChar | "-" | "." | ("0"-"9") | "\u00B7" | ("\u0300"-"\u036F") | ("\u203F"-"\u2040") )
    def ElemPattern: R1 = rule( capture(EmptyElemTagP) | capture(STagP) ~ ContentP ~ capture(ETagP) ~> Concat3 )
    def EmptyElemTagP = rule( "<" ~ Name ~ WLR0.? ~ "/>" )
    def STagP = rule( "<" ~ Name ~ WLR0.? ~ ">")
    def ETagP = rule( "" )
    def ContentP: R1 = rule( capture(CharData.?) ~ (((ElemPattern | ScalaPatterns) ~ capture(CharData.?) ~> Concat).* ~> ConcatSeqNoDelim) ~> Concat )
    def ContentP1: R1 = rule( ElemPattern | capture(Reference) | capture(CDSect) | capture(PI) | capture(Comment) | ScalaPatterns )
    def ScalaPatterns: R1 = rule( capture("{") ~ Patterns ~ WL ~ capture("}") ~> Concat4 )
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy