
io.shiftleft.codepropertygraph.schema.Ast.scala Maven / Gradle / Ivy
package io.shiftleft.codepropertygraph.schema
import overflowdb.schema.{Cardinality, Constant, NodeType, SchemaBuilder, SchemaInfo}
import overflowdb.storage.ValueTypes
object Ast extends SchemaBase {
def index: Int = 7
override def providedByFrontend: Boolean = true
override def description: String =
"""
|The Abstract Syntax Tree (AST) Layer provides syntax trees for all compilation units.
|All nodes of the tree inherit from the same base class (`AST_NODE`) and are connected
|to their child nodes via outgoing `AST` edges.
|
|Syntax trees are typed, that is, when possible, types for all expressions are stored
|in the tree. Moreover, common control structure types are defined in the specification,
|making it possible to translate trees into corresponding control flow graphs if only
|these common control structure types are used, possibly by desugaring on the side of
|the language frontend. For cases where this is not an option,
|the AST specification provides means of storing language-dependent information in the
|AST that can be interpreted by language-dependent control flow construction passes.
|
|This layer MUST be created by the frontend.
|
|""".stripMargin
def apply(builder: SchemaBuilder,
base: Base.Schema,
namespaces: Namespace.Schema,
methodSchema: Method.Schema,
typeSchema: Type.Schema,
fs: FileSystem.Schema) =
new Schema(builder, base, namespaces, methodSchema, typeSchema, fs)
class Schema(builder: SchemaBuilder,
base: Base.Schema,
namespaces: Namespace.Schema,
methodSchema: Method.Schema,
typeSchema: Type.Schema,
fs: FileSystem.Schema) {
implicit private val schemaInfo = SchemaInfo.forClass(getClass)
import methodSchema._
import base._
import namespaces._
import typeSchema._
import fs._
// Base types
val order = builder
.addProperty(
name = "ORDER",
valueType = ValueTypes.INTEGER,
cardinality = Cardinality.One,
comment = """This integer indicates the position of the node among
|its siblings in the AST. The left-most child has an
|order of 0.
|""".stripMargin
)
.protoId(4)
val astNode = builder
.addNodeBaseType(
name = "AST_NODE",
comment = """This is the base type for all nodes of the abstract syntax tree (AST). An AST
|node has a `CODE` and an `ORDER` field. The `CODE` field contains the
|code (verbatim) represented by the AST node. The `ORDER` field contains the
|nodes position among its siblings, encoded as an integer where the left most
|sibling has the position `0`.
|
|AST nodes contain optional `LINE_NUMBER` and `COLUMN_NUMBER` fields. For
|source-based frontends, these fields contain the start line number and
|start column number of the code represented by the node.
|For machine-code-based and bytecode-based frontends, `LINE_NUMBER` contains
|the address at which the code starts while `COLUMN_NUMBER` is undefined.
|""".stripMargin
)
.addProperties(order, code)
.addProperties(lineNumber, columnNumber)
// The following nodes from other schemas are AST nodes
method.extendz(astNode)
methodParameterIn.extendz(astNode)
methodParameterOut.extendz(astNode)
typeDecl.extendz(astNode)
member.extendz(astNode)
typeParameter.extendz(astNode)
typeArgument.extendz(astNode)
file.extendz(astNode)
namespaceBlock.extendz(astNode)
// Type-related nodes that are part of the AST
val block: NodeType = builder
.addNodeType(
name = "BLOCK",
comment = """This node represents a compound statement. Compound statements are used in many languages to allow
|grouping a sequence of statements. For example, in C and Java, compound statements
|are statements enclosed by curly braces. Function/Method bodies are compound
|statements. We do not use the term "compound statement" because "statement" would
|imply that the block does not yield a value upon evaluation, that is, that it is
|not an expression. This is true in languages such as C and Java, but not for languages
|such as Scala where the value of the block is given by that of the last expression it
|contains. In fact, the Scala grammar uses the term "BlockExpr" (short for
|"block expression") to describe what in the CPG we call "Block".
|""".stripMargin
)
.protoId(31)
.addProperties(typeFullName)
val literal: NodeType = builder
.addNodeType(
name = "LITERAL",
comment = """This node represents a literal such as an integer or string constant. Literals
|are symbols included in the code in verbatim form and which are immutable.
|The `TYPE_FULL_NAME` field stores the literal's fully-qualified type name,
|e.g., `java.lang.Integer`.
|""".stripMargin
)
.protoId(8)
.addProperties(typeFullName)
val local: NodeType = builder
.addNodeType(
name = "LOCAL",
comment = """This node represents a local variable. Its fully qualified type name is stored
|in the `TYPE_FULL_NAME` field and its name in the `NAME` field. The `CODE` field
|contains the entire local variable declaration without initialization, e.g., for
|`int x = 10;`, it contains `int x`.
|""".stripMargin
)
.protoId(23)
.addProperties(typeFullName)
.extendz(declaration, astNode)
val identifier: NodeType = builder
.addNodeType(
name = "IDENTIFIER",
comment = """This node represents an identifier as used when referring to a variable by name.
|It holds the identifier's name in the `NAME` field and its fully-qualified type
|name in `TYPE_FULL_NAME`.
|""".stripMargin
)
.protoId(27)
.addProperties(typeFullName, name)
val canonicalName = builder
.addProperty(
name = "CANONICAL_NAME",
valueType = ValueTypes.STRING,
cardinality = Cardinality.One,
comment = """This field holds the canonical name of a `FIELD_IDENTIFIER`. It is typically
|identical to the CODE field, but canonicalized according to source language
|semantics. Human readable names are preferable. `FIELD_IDENTIFIER` nodes must
|share identical `CANONICAL_NAME` if and
|only if they alias, e.g., in C-style unions (if the aliasing relationship is
|unknown or there are partial overlaps, then one must make a reasonable guess,
|and trade off between false negatives and false positives).
|""".stripMargin
)
.protoId(2001092)
val fieldIdentifier: NodeType = builder
.addNodeType(
name = "FIELD_IDENTIFIER",
comment = """This node represents the field accessed in a field access, e.g., in
|`a.b`, it represents `b`. The field name as it occurs in the code is
|stored in the `CODE` field. This may mean that the `CODE` field holds
|an expression. The `CANONICAL_NAME` field MAY contain the same value is
|the `CODE` field but SHOULD contain the normalized name that results
|from evaluating `CODE` as an expression if such an evaluation is
|possible for the language frontend. The objective is to store an identifier
|in `CANONICAL_NAME` that is the same for two nodes iff they refer to the
|same field, regardless of whether they use the same expression to reference
|it.
|""".stripMargin
)
.protoId(2001081)
.addProperties(canonicalName)
val modifierType = builder
.addProperty(
name = "MODIFIER_TYPE",
valueType = ValueTypes.STRING,
cardinality = Cardinality.One,
comment = """The modifier type is a free-form string. The following are known modifier types:
|`STATIC`, `PUBLIC`, `PROTECTED`, `PRIVATE`, `ABSTRACT`, `NATIVE`, `CONSTRUCTOR`, `VIRTUAL`.
|""".stripMargin
)
.protoId(26)
val modifierTypes = builder.addConstants(
category = "ModifierTypes",
Constant(name = "STATIC", value = "STATIC", valueType = ValueTypes.STRING, comment = "The static modifier")
.protoId(1),
Constant(name = "PUBLIC", value = "PUBLIC", valueType = ValueTypes.STRING, comment = "The public modifier")
.protoId(2),
Constant(name = "PROTECTED",
value = "PROTECTED",
valueType = ValueTypes.STRING,
comment = "The protected modifier").protoId(3),
Constant(name = "PRIVATE", value = "PRIVATE", valueType = ValueTypes.STRING, comment = "The private modifier")
.protoId(4),
Constant(name = "ABSTRACT", value = "ABSTRACT", valueType = ValueTypes.STRING, comment = "The abstract modifier")
.protoId(5),
Constant(name = "NATIVE", value = "NATIVE", valueType = ValueTypes.STRING, comment = "The native modifier")
.protoId(6),
Constant(name = "CONSTRUCTOR",
value = "CONSTRUCTOR",
valueType = ValueTypes.STRING,
comment = "The constructor modifier").protoId(7),
Constant(name = "VIRTUAL", value = "VIRTUAL", valueType = ValueTypes.STRING, comment = "The virtual modifier")
.protoId(8),
)
val modifier: NodeType = builder
.addNodeType(
name = "MODIFIER",
comment = """This field represents a (language-dependent) modifier such as `static`, `private`
|or `public`. Unlike most other AST nodes, it is NOT an expression, that is, it
|cannot be evaluated and cannot be passed as an argument in function calls.
|""".stripMargin
)
.protoId(300)
.addProperties(modifierType)
.extendz(astNode)
val jumpTarget: NodeType = builder
.addNodeType(
name = "JUMP_TARGET",
comment = """A jump target is any location in the code that has been specifically marked
|as the target of a jump, e.g., via a label. The `NAME` field holds the name of
|the label while the `PARSER_TYPE_NAME` field holds the name of language construct
|that this jump target is created from, e.g., "Label".
|""".stripMargin
)
.protoId(340)
.addProperties(name, parserTypeName)
.extendz(astNode)
val methodRef: NodeType = builder
.addNodeType(
name = "METHOD_REF",
comment = """This node represents a reference to a method/function/procedure as it
|appears when a method is passed as an argument in a call. The `METHOD_FULL_NAME`
|field holds the fully-qualified name of the referenced method and the
|`TYPE_FULL_NAME` holds its fully-qualified type name.
|""".stripMargin
)
.protoId(333)
.addProperties(typeFullName)
val typeRef: NodeType = builder
.addNodeType(
name = "TYPE_REF",
comment = "Reference to a type/class"
)
.protoId(335)
.addProperties(typeFullName)
val ret: NodeType = builder
.addNodeType(
name = "RETURN",
comment = """This node represents a return instruction, e.g., `return x`. Note that it does
|NOT represent a formal return parameter as formal return parameters are
|represented via `METHOD_RETURN` nodes.
|""".stripMargin
)
.protoId(30)
val controlStructureType = builder
.addProperty(
name = "CONTROL_STRUCTURE_TYPE",
valueType = ValueTypes.STRING,
cardinality = Cardinality.One,
comment = """The `CONTROL_STRUCTURE_TYPE` field indicates which kind of control structure
|a `CONTROL_STRUCTURE` node represents. The available types are the following:
| BREAK, CONTINUE, DO, WHILE, FOR, GOTO, IF, ELSE, TRY, and SWITCH.
|""".stripMargin
)
.protoId(27)
val controlStructureTypes = builder.addConstants(
category = "ControlStructureTypes",
Constant(name = "BREAK", value = "BREAK", valueType = ValueTypes.STRING, comment = "Represents a break statement")
.protoId(1),
Constant(name = "CONTINUE",
value = "CONTINUE",
valueType = ValueTypes.STRING,
comment = "Represents a continue statement").protoId(2),
Constant(name = "WHILE", value = "WHILE", valueType = ValueTypes.STRING, comment = "Represents a while statement")
.protoId(3),
Constant(name = "DO", value = "DO", valueType = ValueTypes.STRING, comment = "Represents a do statement")
.protoId(4),
Constant(name = "FOR", value = "FOR", valueType = ValueTypes.STRING, comment = "Represents a for statement")
.protoId(5),
Constant(name = "GOTO", value = "GOTO", valueType = ValueTypes.STRING, comment = "Represents a goto statement")
.protoId(6),
Constant(name = "IF", value = "IF", valueType = ValueTypes.STRING, comment = "Represents an if statement")
.protoId(7),
Constant(name = "ELSE", value = "ELSE", valueType = ValueTypes.STRING, comment = "Represents an else statement")
.protoId(8),
Constant(name = "SWITCH",
value = "SWITCH",
valueType = ValueTypes.STRING,
comment = "Represents a switch statement").protoId(9),
Constant(name = "TRY", value = "TRY", valueType = ValueTypes.STRING, comment = "Represents a try statement")
.protoId(10),
)
val controlStructure: NodeType = builder
.addNodeType(
name = "CONTROL_STRUCTURE",
comment = """This node represents a control structure as introduced by control structure
|statements as well as conditional and unconditional jumps. Its type is stored in the
|`CONTROL_STRUCTURE_TYPE` field to be one of several pre-defined types. These types
| are used in the construction of the control flow layer, making it possible to
| generate the control flow layer from the abstract syntax tree layer automatically.
|
|In addition to the `CONTROL_STRUCTURE_TYPE` field, the `PARSER_TYPE_NAME` field
|MAY be used by frontends to store the name of the control structure as emitted by
|the parser or disassembler, however, the value of this field is not relevant
|for construction of the control flow layer.
|""".stripMargin
)
.protoId(339)
.addProperties(parserTypeName, controlStructureType)
val unknown: NodeType = builder
.addNodeType(
name = "UNKNOWN",
comment = """Any AST node that the frontend would like to include in the AST but for
|which no suitable AST node is specified in the CPG specification may be
|included using a node of type `UNKNOWN`.
|""".stripMargin
)
.protoId(44)
.addProperties(parserTypeName, typeFullName)
// Edge types
val ast = builder
.addEdgeType(
name = "AST",
comment = "This edge connects a parent node to its child in the syntax tree."
)
.protoId(3)
val condition = builder
.addEdgeType(
name = "CONDITION",
comment = "The edge connects control structure nodes to the expressions that holds their conditions."
)
.protoId(56)
file
.addOutEdge(edge = ast, inNode = namespaceBlock, cardinalityIn = Cardinality.ZeroOrOne)
member.addOutEdge(edge = ast, inNode = modifier)
tpe.addOutEdge(edge = ast, inNode = typeArgument)
typeDecl
.addOutEdge(edge = ast, inNode = typeParameter)
.addOutEdge(edge = ast, inNode = member, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = modifier, cardinalityIn = Cardinality.One)
method
.addOutEdge(edge = ast, inNode = methodReturn, cardinalityOut = Cardinality.One, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = methodParameterIn, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = modifier, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = block, cardinalityOut = Cardinality.One, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = typeParameter, cardinalityIn = Cardinality.One)
ret
.addOutEdge(edge = ast, inNode = identifier)
.addOutEdge(edge = ast, inNode = literal)
.addOutEdge(edge = ast, inNode = methodRef)
.addOutEdge(edge = ast, inNode = typeRef)
.addOutEdge(edge = ast, inNode = ret)
.addOutEdge(edge = ast, inNode = block)
.addOutEdge(edge = ast, inNode = unknown)
.addOutEdge(edge = ast, inNode = jumpTarget)
.addOutEdge(edge = ast, inNode = controlStructure)
block
.addOutEdge(edge = ast, inNode = identifier)
.addOutEdge(edge = ast, inNode = literal)
.addOutEdge(edge = ast, inNode = methodRef)
.addOutEdge(edge = ast, inNode = typeRef)
.addOutEdge(edge = ast, inNode = ret)
.addOutEdge(edge = ast, inNode = block, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = local)
.addOutEdge(edge = ast, inNode = unknown)
.addOutEdge(edge = ast, inNode = jumpTarget)
.addOutEdge(edge = ast, inNode = controlStructure)
controlStructure
.addOutEdge(edge = ast, inNode = literal, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = modifier)
.addOutEdge(edge = ast, inNode = local)
.addOutEdge(edge = ast, inNode = identifier, cardinalityIn = Cardinality.ZeroOrOne)
.addOutEdge(edge = ast, inNode = ret, cardinalityIn = Cardinality.ZeroOrOne)
.addOutEdge(edge = ast, inNode = block, cardinalityIn = Cardinality.ZeroOrOne)
.addOutEdge(edge = ast, inNode = jumpTarget)
.addOutEdge(edge = ast, inNode = unknown)
.addOutEdge(edge = ast, inNode = controlStructure)
.addOutEdge(edge = ast, inNode = methodRef, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = typeRef)
unknown
.addOutEdge(edge = ast, inNode = literal)
.addOutEdge(edge = ast, inNode = modifier)
.addOutEdge(edge = ast, inNode = local)
.addOutEdge(edge = ast, inNode = identifier)
.addOutEdge(edge = ast, inNode = fieldIdentifier)
.addOutEdge(edge = ast, inNode = ret)
.addOutEdge(edge = ast, inNode = block)
.addOutEdge(edge = ast, inNode = jumpTarget)
.addOutEdge(edge = ast, inNode = unknown)
.addOutEdge(edge = ast, inNode = controlStructure)
unknown
.addOutEdge(edge = ast, inNode = member)
methodParameterIn.addOutEdge(edge = ast, inNode = unknown)
namespaceBlock
.addOutEdge(edge = ast, inNode = typeDecl, cardinalityIn = Cardinality.ZeroOrOne)
.addOutEdge(edge = ast, inNode = method, cardinalityIn = Cardinality.ZeroOrOne)
namespace.extendz(astNode)
controlStructure
.addOutEdge(edge = condition, inNode = literal)
.addOutEdge(edge = condition, inNode = identifier)
.addOutEdge(edge = condition, inNode = ret)
.addOutEdge(edge = condition, inNode = block)
.addOutEdge(edge = condition, inNode = methodRef)
.addOutEdge(edge = condition, inNode = typeRef)
.addOutEdge(edge = condition, inNode = controlStructure)
.addOutEdge(edge = condition, inNode = jumpTarget)
.addOutEdge(edge = condition, inNode = unknown)
.addOutEdge(edge = condition, inNode = controlStructure)
typeDecl
.addOutEdge(edge = ast, inNode = typeDecl, cardinalityIn = Cardinality.ZeroOrOne)
.addOutEdge(edge = ast, inNode = method, cardinalityIn = Cardinality.ZeroOrOne)
method
.addOutEdge(edge = ast, inNode = methodParameterOut)
.addOutEdge(edge = ast, inNode = typeDecl, cardinalityIn = Cardinality.ZeroOrOne)
.addOutEdge(edge = ast, inNode = method, cardinalityIn = Cardinality.ZeroOrOne)
val callRepr = builder
.addNodeBaseType(
name = "CALL_REPR",
comment = "This is the base class of `CALL` that language implementers may safely ignore."
)
.addProperties(name, signature)
val callNode: NodeType = builder
.addNodeType(
name = "CALL",
comment = """A (function/method/procedure) call. The `METHOD_FULL_NAME` property is the name of the
|invoked method (the callee) while the `TYPE_FULL_NAME` is its return type, and
|therefore, the return type of the call when viewing it as an expression. For
|languages like Javascript, it is common that we may know the (short-) name
|of the invoked method, but we do not know at compile time which method
|will actually be invoked, e.g., because it depends on a dynamic import.
|In this case, we leave `METHOD_FULL_NAME` blank but at least fill out `NAME`,
|which contains the method's (short-) name and `SIGNATURE`, which contains
|any information we may have about the types of arguments and return value.
|""".stripMargin
)
.protoId(15)
.extendz(callRepr)
.addProperties(typeFullName)
val expression = builder
.addNodeBaseType(
name = "EXPRESSION",
comment = """`EXPRESSION` is the base class for all nodes that represent code pieces
|that can be evaluated.
|
| Expression may be arguments in method calls. For method calls that do
| not involved named parameters, the `ARGUMENT_INDEX` field indicates at
| which position in the argument list the expression occurs, e.g., an
| `ARGUMENT_INDEX` of 1 indicates that the expression is the first argument
| in a method call. For calls that employ named parameters, `ARGUMENT_INDEX`
| is set to -1 and the `ARGUMENT_NAME` fields holds the name of the parameter.
|""".stripMargin
)
.extendz(astNode)
fieldIdentifier.extendz(expression)
identifier.extendz(expression)
literal.extendz(expression)
block.extendz(expression)
controlStructure.extendz(expression)
methodRef.extendz(expression)
typeRef.extendz(expression)
ret.extendz(expression)
unknown.extendz(expression)
callNode
.addOutEdge(edge = ast, inNode = callNode)
.addOutEdge(edge = ast, inNode = identifier)
.addOutEdge(edge = ast, inNode = fieldIdentifier, cardinalityIn = Cardinality.One)
.addOutEdge(edge = ast, inNode = literal)
.addOutEdge(edge = ast, inNode = methodRef)
.addOutEdge(edge = ast, inNode = typeRef)
.addOutEdge(edge = ast, inNode = ret)
.addOutEdge(edge = ast, inNode = block)
.addOutEdge(edge = ast, inNode = controlStructure)
ret
.addOutEdge(edge = ast, inNode = callNode)
block
.addOutEdge(edge = ast, inNode = callNode)
controlStructure
.addOutEdge(edge = ast, inNode = callNode, cardinalityIn = Cardinality.One)
unknown
.addOutEdge(edge = ast, inNode = callNode)
controlStructure
.addOutEdge(edge = condition, inNode = callNode)
// To refactor
identifier
.addOutEdge(edge = ref, inNode = local, cardinalityOut = Cardinality.ZeroOrOne)
.addOutEdge(edge = ref, inNode = methodParameterIn, cardinalityOut = Cardinality.ZeroOrOne)
namespaceBlock
.addOutEdge(edge = ref, inNode = namespace)
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy