org.apache.spark.sql.catalyst.AbstractSparkSQLParser.scala Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.spark.sql.catalyst
import scala.language.implicitConversions
import scala.util.parsing.combinator.lexical.StdLexical
import scala.util.parsing.combinator.syntactical.StandardTokenParsers
import scala.util.parsing.combinator.PackratParsers
import scala.util.parsing.input.CharArrayReader.EofCh
import org.apache.spark.sql.catalyst.plans.logical._
private[sql] abstract class AbstractSparkSQLParser
extends StandardTokenParsers with PackratParsers {
def parse(input: String): LogicalPlan = synchronized {
// Initialize the Keywords.
initLexical
phrase(start)(new lexical.Scanner(input)) match {
case Success(plan, _) => plan
case failureOrError => sys.error(failureOrError.toString)
}
}
/* One time initialization of lexical.This avoid reinitialization of lexical in parse method */
protected lazy val initLexical: Unit = lexical.initialize(reservedWords)
protected case class Keyword(str: String) {
def normalize: String = lexical.normalizeKeyword(str)
def parser: Parser[String] = normalize
}
protected implicit def asParser(k: Keyword): Parser[String] = k.parser
// By default, use Reflection to find the reserved words defined in the sub class.
// NOTICE, Since the Keyword properties defined by sub class, we couldn't call this
// method during the parent class instantiation, because the sub class instance
// isn't created yet.
protected lazy val reservedWords: Seq[String] =
this
.getClass
.getMethods
.filter(_.getReturnType == classOf[Keyword])
.map(_.invoke(this).asInstanceOf[Keyword].normalize)
// Set the keywords as empty by default, will change that later.
override val lexical = new SqlLexical
protected def start: Parser[LogicalPlan]
// Returns the whole input string
protected lazy val wholeInput: Parser[String] = new Parser[String] {
def apply(in: Input): ParseResult[String] =
Success(in.source.toString, in.drop(in.source.length()))
}
// Returns the rest of the input string that are not parsed yet
protected lazy val restInput: Parser[String] = new Parser[String] {
def apply(in: Input): ParseResult[String] =
Success(
in.source.subSequence(in.offset, in.source.length()).toString,
in.drop(in.source.length()))
}
}
class SqlLexical extends StdLexical {
case class DecimalLit(chars: String) extends Token {
override def toString: String = chars
}
/* This is a work around to support the lazy setting */
def initialize(keywords: Seq[String]): Unit = {
reserved.clear()
reserved ++= keywords
}
/* Normal the keyword string */
def normalizeKeyword(str: String): String = str.toLowerCase
delimiters += (
"@", "*", "+", "-", "<", "=", "<>", "!=", "<=", ">=", ">", "/", "(", ")",
",", ";", "%", "{", "}", ":", "[", "]", ".", "&", "|", "^", "~", "<=>"
)
protected override def processIdent(name: String) = {
val token = normalizeKeyword(name)
if (reserved contains token) Keyword(token) else Identifier(name)
}
override lazy val token: Parser[Token] =
( rep1(digit) ~ scientificNotation ^^ { case i ~ s => DecimalLit(i.mkString + s) }
| '.' ~> (rep1(digit) ~ scientificNotation) ^^
{ case i ~ s => DecimalLit("0." + i.mkString + s) }
| rep1(digit) ~ ('.' ~> digit.*) ~ scientificNotation ^^
{ case i1 ~ i2 ~ s => DecimalLit(i1.mkString + "." + i2.mkString + s) }
| digit.* ~ identChar ~ (identChar | digit).* ^^
{ case first ~ middle ~ rest => processIdent((first ++ (middle :: rest)).mkString) }
| rep1(digit) ~ ('.' ~> digit.*).? ^^ {
case i ~ None => NumericLit(i.mkString)
case i ~ Some(d) => DecimalLit(i.mkString + "." + d.mkString)
}
| '\'' ~> chrExcept('\'', '\n', EofCh).* <~ '\'' ^^
{ case chars => StringLit(chars mkString "") }
| '"' ~> chrExcept('"', '\n', EofCh).* <~ '"' ^^
{ case chars => StringLit(chars mkString "") }
| '`' ~> chrExcept('`', '\n', EofCh).* <~ '`' ^^
{ case chars => Identifier(chars mkString "") }
| EofCh ^^^ EOF
| '\'' ~> failure("unclosed string literal")
| '"' ~> failure("unclosed string literal")
| delim
| failure("illegal character")
)
override def identChar: Parser[Elem] = letter | elem('_')
private lazy val scientificNotation: Parser[String] =
(elem('e') | elem('E')) ~> (elem('+') | elem('-')).? ~ rep1(digit) ^^ {
case s ~ rest => "e" + s.mkString + rest.mkString
}
override def whitespace: Parser[Any] =
( whitespaceChar
| '/' ~ '*' ~ comment
| '/' ~ '/' ~ chrExcept(EofCh, '\n').*
| '#' ~ chrExcept(EofCh, '\n').*
| '-' ~ '-' ~ chrExcept(EofCh, '\n').*
| '/' ~ '*' ~ failure("unclosed comment")
).*
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy