All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.joern.dataflowengineoss.semanticsloader.Parser.scala Maven / Gradle / Ivy

package io.joern.dataflowengineoss.semanticsloader

import io.joern.dataflowengineoss.SemanticsParser.MappingContext
import io.joern.dataflowengineoss.{SemanticsBaseListener, SemanticsLexer, SemanticsParser}
import io.shiftleft.codepropertygraph.generated.Cpg
import org.antlr.v4.runtime.tree.ParseTreeWalker
import org.antlr.v4.runtime.{CharStream, CharStreams, CommonTokenStream}

import scala.collection.mutable
import scala.jdk.CollectionConverters._

object Semantics {

  def fromList(elements: List[FlowSemantic]): Semantics = {
    new Semantics(
      mutable.Map.newBuilder
        .addAll(elements.map { e =>
          e.methodFullName -> e
        })
        .result()
    )
  }

  def empty: Semantics = fromList(List())

}

class Semantics private (methodToSemantic: mutable.Map[String, FlowSemantic]) {

  /** The map below keeps a mapping between results of a regex and the regex string it matches. e.g.
    *
    * `path/to/file.py:.Foo.sink` -> `^path.*Foo\\.sink$`
    */
  private val regexMatchedFullNames = mutable.HashMap.empty[String, String]

  /** Initialize all the method semantics that use regex with all their regex results before query time.
    */
  def loadRegexSemantics(cpg: Cpg): Unit = {
    import io.shiftleft.semanticcpg.language._

    methodToSemantic.filter(_._2.regex).foreach { case (regexString, _) =>
      cpg.method.fullName(regexString).fullName.foreach { methodMatch =>
        regexMatchedFullNames.put(methodMatch, regexString)
      }
    }
  }

  def elements: List[FlowSemantic] = methodToSemantic.values.toList

  def forMethod(fullName: String): Option[FlowSemantic] = regexMatchedFullNames.get(fullName) match {
    case Some(matchedFullName) => methodToSemantic.get(matchedFullName)
    case None                  => methodToSemantic.get(fullName)
  }

  def serialize: String = {
    elements
      .sortBy(_.methodFullName)
      .map { elem =>
        s"\"${elem.methodFullName}\" " + elem.mappings
          .collect { case FlowMapping(x, y) => s"$x -> $y" }
          .mkString(" ")
      }
      .mkString("\n")
  }

}
case class FlowSemantic(methodFullName: String, mappings: List[FlowPath] = List.empty, regex: Boolean = false)

object FlowSemantic {

  def from(methodFullName: String, mappings: List[?], regex: Boolean = false): FlowSemantic = {
    FlowSemantic(
      methodFullName,
      mappings.map {
        case (src: Int, dst: Int)                                 => FlowMapping(src, dst)
        case (srcIdx: Int, src: String, dst: Int)                 => FlowMapping(srcIdx, src, dst)
        case (src: Int, dstIdx: Int, dst: String)                 => FlowMapping(src, dstIdx, dst)
        case (srcIdx: Int, src: String, dstIdx: Int, dst: String) => FlowMapping(srcIdx, src, dstIdx, dst)
        case x: FlowMapping                                       => x
      },
      regex
    )
  }

}

abstract class FlowNode

/** Collects parameters and return nodes under a common trait. This trait acknowledges their argument index which is
  * relevant when a caller wants to coordinate relevant tainted flows through specific arguments and the return flow.
  */
trait ParamOrRetNode extends FlowNode {

  /** Temporary backward compatible idx field.
    *
    * @return
    *   the argument index.
    */
  def index: Int
}

/** A parameter where the index of the argument matches the position of the parameter at the callee. The name is used to
  * match named arguments if used instead of positional arguments.
  *
  * @param index
  *   the position or argument index.
  * @param name
  *   the name of the parameter.
  */
case class ParameterNode(index: Int, name: Option[String] = None) extends ParamOrRetNode

object ParameterNode {
  def apply(index: Int, name: String): ParameterNode = ParameterNode(index, Option(name))
}

/** Represents explicit mappings or special cases.
  */
sealed trait FlowPath

/** Maps flow between arguments based on how they interact as parameters at the callee.
  *
  * @param src
  *   source of the flow.
  * @param dst
  *   destination of the flow.
  */
case class FlowMapping(src: FlowNode, dst: FlowNode) extends FlowPath

object FlowMapping {
  def apply(from: Int, to: Int): FlowMapping = FlowMapping(ParameterNode(from), ParameterNode(to))

  def apply(fromIdx: Int, from: String, toIdx: Int, to: String): FlowMapping =
    FlowMapping(ParameterNode(fromIdx, from), ParameterNode(toIdx, to))

  def apply(fromIdx: Int, from: String, toIdx: Int): FlowMapping =
    FlowMapping(ParameterNode(fromIdx, from), ParameterNode(toIdx))

  def apply(from: Int, toIdx: Int, to: String): FlowMapping = FlowMapping(ParameterNode(from), ParameterNode(toIdx, to))

}

/** Represents an instance where parameters are not sanitized, may affect the return value, and do not cross-taint. e.g.
  * foo(1, 2) = 1 -> 1, 2 -> 2, 1 -> -1, 2 -> -1
  *
  * The main benefit is that this works for unbounded parameters e.g. VARARGS. Note this does not taint 0 -> 0.
  */
object PassThroughMapping extends FlowPath

class Parser() {

  def parse(input: String): List[FlowSemantic] = {
    val charStream = CharStreams.fromString(input)
    parseCharStream(charStream)
  }

  def parseFile(fileName: String): List[FlowSemantic] = {
    val charStream = CharStreams.fromFileName(fileName)
    parseCharStream(charStream)
  }

  private def parseCharStream(charStream: CharStream): List[FlowSemantic] = {
    val lexer       = new SemanticsLexer(charStream)
    val tokenStream = new CommonTokenStream(lexer)
    val parser      = new SemanticsParser(tokenStream)
    val treeWalker  = new ParseTreeWalker()

    val tree     = parser.taintSemantics()
    val listener = new Listener()
    treeWalker.walk(listener, tree)
    listener.result.toList
  }

  implicit class AntlrFlowExtensions(val ctx: MappingContext) {

    def isPassThrough: Boolean = Option(ctx.PASSTHROUGH()).isDefined

    def srcIdx: Int = ctx.src().argIdx().NUMBER().getText.toInt

    def srcArgName: Option[String] = Option(ctx.src().argName()).map(_.name().getText)

    def dstIdx: Int = ctx.dst().argIdx().NUMBER().getText.toInt

    def dstArgName: Option[String] = Option(ctx.dst().argName()).map(_.name().getText)

  }

  private class Listener extends SemanticsBaseListener {

    val result: mutable.ListBuffer[FlowSemantic] = mutable.ListBuffer[FlowSemantic]()

    override def enterTaintSemantics(ctx: SemanticsParser.TaintSemanticsContext): Unit = {
      ctx.singleSemantic().asScala.foreach { semantic =>
        val methodName = semantic.methodName().name().getText
        val mappings   = semantic.mapping().asScala.toList.map(ctxToParamMapping)
        result.addOne(FlowSemantic(methodName, mappings))
      }
    }

    private def ctxToParamMapping(ctx: MappingContext): FlowPath =
      if (ctx.isPassThrough) {
        PassThroughMapping
      } else {
        val src = ParameterNode(ctx.srcIdx, ctx.srcArgName)
        val dst = ParameterNode(ctx.dstIdx, ctx.dstArgName)

        FlowMapping(src, dst)
      }

  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy