com.twitter.scrooge.frontend.ThriftParser.scala Maven / Gradle / Ivy
The newest version!
/*
* Copyright 2011 Twitter, Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License. You may obtain
* a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.twitter.scrooge.frontend
import com.twitter.scrooge.ast._
import java.io.FileNotFoundException
import scala.collection.concurrent.{Map, TrieMap}
import scala.collection.mutable
import scala.util.parsing.combinator._
case class FileParseException(filename: String, cause: Throwable)
extends Exception("Exception parsing: %s".format(filename), cause)
class ThriftParser(
importer: Importer,
strict: Boolean,
defaultOptional: Boolean = false,
skipIncludes: Boolean = false,
documentCache: Map[String, Document] = new TrieMap[String, Document]
) extends RegexParsers {
// 1 2 3 4 4a 4b 4c 4d
override val whiteSpace = """(\s+|(//.*\r?\n)|(#([^@\r\n].*)?\r?\n)|(/\*[^\*]([^\*]+|\r?\n|\*(?!/))*\*/))+""".r
// 1: whitespace, 1 or more
// 2: leading // followed by anything 0 or more, until newline
// 3: leading # then NOT a @ followed by anything 0 or more, until newline
// 4: leading /* then NOT a *, then...
// 4a: not a *, 1 or more times
// 4b: OR a newline
// 4c: OR a * followed by a 0-width lookahead / (not sure why we have this -KO)
// (0 or more of 4b/4c/4d)
// 4d: ending */
// transformations
def fixFieldIds(fields: List[Field]): List[Field] = {
// check negative field ids
fields.find(_.index < 0).foreach {
field => throw new NegativeFieldIdException(field.sid.name)
}
// check duplicate field ids
fields.filter(_.index != 0).foldLeft(Set[Int]())((set, field) => {
if (set.contains(field.index)) {
throw new DuplicateFieldIdException(field.sid.name)
} else {
set + field.index
}
})
var nextId = -1
fields.map {
field =>
if (field.index == 0) {
val f = field.copy(index = nextId)
nextId -= 1
f
} else {
field
}
}
}
// identifier
/**
* The places where both SimpleIDs and QualifiedIDs are allowed, in which case
* we use Identifier:
* - right hand side of an assignment
* - namespace declaration
* For all other places, only SimpleIDs are allowed. Specifically
* - right hand side of an assignment.
*
* Note that Scala parser does not support left recursion well. We cannot do
* something like this which is more intuitive:
def qualifiedID = (simpleID <~ "\\.") ~ repsep(simpleID, "\\.".r) ^^ {
case id ~ ids => QualifiedID((id +: ids) map { _.name })
}
def identifier: Parser[Identifier] = qualifiedID | simpleID
*/
val identifierRegex = "[A-Za-z_][A-Za-z0-9\\._]*".r
lazy val identifier = identifierRegex ^^ {
x => Identifier(x)
}
private[this] val thriftKeywords = Set[String](
"async",
"const",
"enum",
"exception",
"extends",
"include",
"namespace",
"optional",
"required",
"service",
"struct",
"throws",
"typedef",
"union",
"void",
// Built-in types are also keywords.
"binary",
"bool",
"byte",
"double",
"i16",
"i32",
"i64",
"list",
"map",
"set",
"string"
)
lazy val simpleIDRegex = "[A-Za-z_][A-Za-z0-9_]*".r
lazy val simpleID = simpleIDRegex ^^ { x =>
if (thriftKeywords.contains(x))
failOrWarn(new KeywordException(x))
SimpleID(x)
}
// right hand side (RHS)
lazy val rhs: Parser[RHS] = {
numberLiteral | boolLiteral | stringLiteral | listOrMapRHS | mapRHS | idRHS |
failure("constant expected")
}
lazy val boolLiteral: Parser[BoolLiteral] = ("true" | "True" | "false" | "False") ^^ { x =>
if (x.toLowerCase == "false") BoolLiteral(false)
else BoolLiteral(true)
}
lazy val intConstant = "[-+]?\\d+(?!\\.)".r ^^ {
x => IntLiteral(x.toLong)
}
lazy val numberLiteral = "[-+]?\\d+(\\.\\d+)?([eE][-+]?\\d+)?".r ^^ {
x =>
if (x.exists { c => "eE." contains c }) DoubleLiteral(x.toDouble)
else IntLiteral(x.toLong)
}
// use a single regex to match string quote-to-quote, so that whitespace parser doesn't
// get executed inside the quotes
lazy val doubleQuotedString = """(")(\\.|[^\\"])*(")""".r
lazy val singleQuotedString = """'(\\.|[^\\'])*'""".r
lazy val stringLiteral = (doubleQuotedString | singleQuotedString) ^^ {
// strip off quotes
x => StringLiteral(x.substring(1, x.length - 1))
}
lazy val listSeparator = "[,;]?".r
lazy val listOrMapRHS = "[" ~> repsep(rhs, listSeparator) <~ opt(listSeparator) <~ "]" ^^ {
list => ListRHS(list)
}
lazy val keyval = rhs ~ (":" ~> rhs) ^^ {
case k ~ v => (k, v)
}
lazy val mapRHS = "{" ~> repsep(keyval, listSeparator) <~ opt(listSeparator) <~ "}" ^^ {
list => MapRHS(list)
}
lazy val idRHS = identifier ^^ {
id => IdRHS(id)
}
// types
lazy val fieldType: Parser[FieldType] = baseType | containerType | referenceType
lazy val referenceType = identifier ^^ {
id => ReferenceType(id)
}
lazy val baseType: Parser[BaseType] = (
"bool" ^^^ TBool |
"byte" ^^^ TByte |
"i16" ^^^ TI16 |
"i32" ^^^ TI32 |
"i64" ^^^ TI64 |
"double" ^^^ TDouble |
"string" ^^^ TString |
"binary" ^^^ TBinary
)
lazy val containerType: Parser[ContainerType] = mapType | setType | listType
lazy val mapType = ("map" ~> opt(cppType) <~ "<") ~ (fieldType <~ ",") ~ (fieldType <~ ">") ^^ {
case cpp ~ key ~ value => MapType(key, value, cpp)
}
lazy val setType = ("set" ~> opt(cppType)) ~ ("<" ~> fieldType <~ ">") ^^ {
case cpp ~ t => SetType(t, cpp)
}
lazy val listType = ("list" ~ "<") ~> (fieldType <~ ">") ~ opt(cppType) ^^ {
case t ~ cpp => ListType(t, cpp)
}
// FFS. i'm very close to removing this and forcably breaking old thrift files.
lazy val cppType = "cpp_type" ~> stringLiteral ^^ {
literal => literal.value
}
// Cast IntLiterals into booleans.
private[this] def convertRhs(fieldType: FieldType, rhs: RHS): RHS = {
fieldType match {
case TBool => rhs match {
case x: BoolLiteral => x
case IntLiteral(0) => BoolLiteral(false)
case IntLiteral(1) => BoolLiteral(true)
case _ => throw new TypeMismatchException(s"Can't assign $rhs to a bool")
}
case _ => rhs
}
}
// fields
lazy val field = (opt(comments) ~ opt(fieldId) ~ fieldReq) ~
(fieldType ~ defaultedAnnotations ~ simpleID) ~
opt("=" ~> rhs) ~ defaultedAnnotations <~ opt(listSeparator) ^^ {
case (comm ~ fid ~ req) ~ (ftype ~ typeAnnotations ~ sid) ~ value ~ fieldAnnotations => {
val transformedVal = value.map(convertRhs(ftype, _))
// if field is marked optional and a default is defined, ignore the optional part.
val transformedReq = if (!defaultOptional && transformedVal.isDefined && req.isOptional) Requiredness.Default else req
Field(
fid.getOrElse(0),
sid,
sid.name,
ftype,
transformedVal,
transformedReq,
typeAnnotations,
fieldAnnotations,
comm
)
}
}
lazy val fieldId = intConstant <~ ":" ^^ {
x => x.value.toInt
}
lazy val fieldReq = opt("required" | "optional") ^^ {
case Some("required") => Requiredness.Required
case Some("optional") => Requiredness.Optional
case None => Requiredness.Default
}
// functions
lazy val function = (opt(comments) ~ (opt("oneway") ~ functionType)) ~ (simpleID <~ "(") ~ (rep(field) <~ ")") ~
(opt(throws) <~ opt(listSeparator)) ^^ {
case comment ~ (oneway ~ ftype) ~ id ~ args ~ throws =>
Function(
id,
id.name,
if (oneway.isDefined) OnewayVoid else ftype,
fixFieldIds(args),
throws.map {
fixFieldIds(_)
}.getOrElse(Nil), comment)
}
lazy val functionType: Parser[FunctionType] = ("void" ^^^ Void) | fieldType
lazy val throws = "throws" ~> "(" ~> rep(field) <~ ")"
// definitions
lazy val definition = const | typedef | enum | senum | struct | union | exception | service
lazy val const = opt(comments) ~ ("const" ~> fieldType) ~ simpleID ~ ("=" ~> rhs) ~ opt(listSeparator) ^^ {
case comment ~ ftype ~ sid ~ const ~ _ => {
ConstDefinition(sid, ftype, convertRhs(ftype, const), comment)
}
}
lazy val typedef = (opt(comments) ~ "typedef") ~> fieldType ~ defaultedAnnotations ~ simpleID ^^ {
case dtype ~ annotations ~ sid => Typedef(sid, dtype, annotations)
}
lazy val enum = (opt(comments) ~ (("enum" ~> simpleID) <~ "{")) ~ rep(opt(comments) ~ simpleID ~ opt("=" ~> intConstant) <~
opt(listSeparator)) <~ "}" ^^ {
case comment ~ sid ~ items =>
var failed: Option[Int] = None
val seen = new mutable.HashSet[Int]
var nextValue = 0
val values = new mutable.ListBuffer[EnumField]
items.foreach {
case c ~ k ~ v =>
val value = v.map {
_.value.toInt
}.getOrElse(nextValue)
if (seen contains value) failed = Some(value)
nextValue = value + 1
seen += value
values += EnumField(k, value, c)
}
if (failed.isDefined) {
throw new RepeatingEnumValueException(sid.name, failed.get)
} else {
Enum(sid, values.toList, comment)
}
}
lazy val senum = (("senum" ~> simpleID) <~ "{") ~ rep(stringLiteral <~ opt(listSeparator)) <~
"}" ^^ {
case sid ~ items => Senum(sid, items.map {
_.value
})
}
def structLike(keyword: String) =
(opt(comments) ~ ((keyword ~> simpleID) <~ "{")) ~ rep(field) ~ ("}" ~> defaultedAnnotations)
lazy val struct = structLike("struct") ^^ {
case comment ~ sid ~ fields ~ annotations =>
Struct(sid, sid.name, fixFieldIds(fields), comment, annotations)
}
private[this] val disallowedUnionFieldNames = Set("unknown_union_field", "unknownunionfield") map { _.toLowerCase }
lazy val union = structLike("union") ^^ {
case comment ~ sid ~ fields ~ annotations =>
val fields0 = fields.map {
case f if f.requiredness == Requiredness.Default =>
if (disallowedUnionFieldNames.contains(f.sid.name.toLowerCase)) {
throw new UnionFieldInvalidNameException(sid.name, f.sid.name)
} else f
case f @ _ =>
failOrWarn(UnionFieldRequirednessException(sid.name, f.sid.name, f.requiredness.toString))
f.copy(requiredness = Requiredness.Default)
}
Union(sid, sid.name, fixFieldIds(fields0), comment, annotations)
}
lazy val exception = (opt(comments) ~ ("exception" ~> simpleID <~ "{")) ~ opt(rep(field)) <~ "}" ^^ {
case comment ~ sid ~ fields => Exception_(sid, sid.name, fixFieldIds(fields.getOrElse(Nil)), comment)
}
lazy val service = (opt(comments) ~ ("service" ~> simpleID)) ~ opt("extends" ~> serviceParentID) ~ ("{" ~> rep(function) <~
"}") ^^ {
case comment ~ sid ~ extend ~ functions =>
Service(sid, extend, functions, comment)
}
// This is a simpleID without the keyword check. Filenames that are thrift keywords are allowed.
lazy val serviceParentID = opt(simpleIDRegex <~ ".") ~ simpleID ^^ {
case prefix ~ sid => {
ServiceParent(sid, prefix.map(SimpleID(_)))
}
}
// document
lazy val document: Parser[Document] = rep(header) ~ rep(definition) <~ opt(comments) ^^ {
case hs ~ ds => Document(hs, ds)
}
lazy val header: Parser[Header] = include | cppInclude | namespace
lazy val include = opt(comments) ~> "include" ~> stringLiteral ^^ { s =>
val doc =
if (skipIncludes) {
Document(Seq(), Seq())
} else {
parseFile(s.value)
}
Include(s.value, doc)
}
// bogus dude.
lazy val cppInclude = "cpp_include" ~> stringLiteral ^^ {
s => CppInclude(s.value)
}
lazy val namespace = opt(comments) ~> opt("#@") ~> "namespace" ~> namespaceScope ~ identifier ^^ {
case scope ~ id =>
Namespace(scope, id)
}
lazy val namespaceScope = "*" ^^^ "*" | (identifier ^^ { _.fullName })
/**
* Matches scaladoc/javadoc style comments.
*/
lazy val comments: Parser[String] = {
rep1(docComment) ^^ {
case cs =>
cs.mkString("\n")
}
}
val docComment: Parser[String] = """(?s)/\*\*.+?\*/""".r
// annotations
lazy val annotation = identifier ~ ("=" ~> stringLiteral) ^^ {
case id ~ StringLiteral(value) => id.fullName -> value
}
lazy val annotationGroup = "(" ~> repsep(annotation, ",") <~ (opt(",") ~ ")") ^^ { _.toMap }
lazy val defaultedAnnotations = opt(annotationGroup) ^^ { _ getOrElse Map.empty }
def parse[T](in: String, parser: Parser[T], file: Option[String] = None): T = try {
parseAll(parser, in) match {
case Success(result, _) => result
case x@Failure(msg, z) => throw new ParseException(x.toString)
case x@Error(msg, _) => throw new ParseException(x.toString)
}
} catch {
case e: Throwable => throw file.map(FileParseException(_, e)).getOrElse(e)
}
def parseFile(filename: String): Document = {
importer.getResolvedPath(filename) match {
// Cache the result if the importer supports caching.
case Some(key) => documentCache.getOrElseUpdate(key, parseFileUncached(filename))
// Else, just resolve the document.
case None => parseFileUncached(filename)
}
}
private[this] def parseFileUncached(filename: String): Document = {
val contents = importer(filename) getOrElse {
throw new FileNotFoundException(filename)
}
// one thrift file can be included in another and referenced like this:
// list requests
//
// thus we need to ensure includedthriftfilenamehere is valid, otherwise the first person
// to include the thrift file, with for example a dash in the name, will run into problems
contents.thriftFilename foreach { f =>
identifierRegex.findFirstIn(f) match {
case Some(`f`) => ()
case _ => failOrWarn(new InvalidThriftFilenameException(f, identifierRegex.toString()))
}
}
val newParser = new ThriftParser(contents.importer,
this.strict,
this.defaultOptional,
this.skipIncludes,
this.documentCache)
newParser.parse(contents.data, newParser.document, contents.thriftFilename)
}
// helper functions
def failOrWarn(ex: ParseWarning) {
if (strict)
throw ex
else
println("Warning: " + ex.getMessage)
}
}