All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.joern.ghidra2cpg.utils.PCodeMapper.scala Maven / Gradle / Ivy

The newest version!
package io.joern.ghidra2cpg.utils

import ghidra.app.util.template.TemplateSimplifier
import ghidra.program.model.listing.{CodeUnitFormat, CodeUnitFormatOptions, Function, Instruction}
import ghidra.program.model.pcode.PcodeOp.*
import ghidra.program.model.pcode.{HighFunction, PcodeOp, PcodeOpAST, Varnode}
import io.joern.ghidra2cpg.Types
//import io.joern.ghidra2cpg.utils.Utils.{createCallNode, createIdentifier, createLiteral}
import io.joern.ghidra2cpg.utils.Utils.*
import io.shiftleft.codepropertygraph.generated.EdgeTypes
import io.shiftleft.codepropertygraph.generated.nodes.CfgNodeNew
import org.slf4j.LoggerFactory
import io.shiftleft.codepropertygraph.generated.DiffGraphBuilder

import scala.jdk.CollectionConverters.*
import scala.language.implicitConversions
class State(argumentIndex: Int) {
  var argument: Int = argumentIndex
}

class PCodeMapper(
  diffGraphBuilder: DiffGraphBuilder,
  nativeInstruction: Instruction,
  functions: List[Function],
  highFunction: HighFunction,
  address2Literal: Map[Long, String]
) {
  private val logger = LoggerFactory.getLogger(getClass)
  private val pcodeOps: List[PcodeOp] =
    nativeInstruction.getPcode().toList

  val state = new State(argumentIndex = -1)

  val codeUnitFormat = new CodeUnitFormat(
    new CodeUnitFormatOptions(
      CodeUnitFormatOptions.ShowBlockName.NEVER,
      CodeUnitFormatOptions.ShowNamespace.NEVER,
      "",
      true,
      true,
      true,
      true,
      true,
      true,
      true,
      new TemplateSimplifier()
    )
  )

  // Entry point
  def getNode: CfgNodeNew = {
    if (pcodeOps.isEmpty) {
      // It looks like that for some instructions,
      // like "bti c" getPcode() returns nothing
      logger.info(s"NO pcodes for $nativeInstruction")
      createCallNode(
        nativeInstruction.toString,
        nativeInstruction.toString,
        nativeInstruction.getMinAddress.getOffsetAsBigInteger.intValue()
      )
    } else {
      mapCallNode(pcodeOps.last)
    }
  }

  def createCall(name: String, code: String, index: Int = 1): CfgNodeNew = {
    createCallNode(code, name, nativeInstruction.getMinAddress.getOffsetAsBigInteger.intValue, index)
  }

  def handleSingleArgument(pcodeOp: PcodeOp, cpgOperationName: String, operation: String): CfgNodeNew = {
    val firstOp  = resolveVarNode(pcodeOp.getInput(0), 1)
    val callNode = createCall(cpgOperationName, nativeInstruction.toString) // s"$operation(${firstOp.code})")
    connectCallToArgument(callNode, firstOp)
    callNode
  }

  def handleTwoArguments(pcodeOp: PcodeOp, cpgOperationName: String, code: String, callIndex: Int = -1): CfgNodeNew = {
    val callNode =
      createCall(cpgOperationName, nativeInstruction.toString, callIndex)
    // we need this for MIPS
    if (pcodeOp.getOutput.isRegister) {
      val firstOp  = resolveVarNode(pcodeOp.getOutput, 1)
      val secondOp = resolveVarNode(pcodeOp.getInput(0), 2)
      val thirdOp  = resolveVarNode(pcodeOp.getInput(1), 3)
      connectCallToArgument(callNode, firstOp)
      connectCallToArgument(callNode, secondOp)
      connectCallToArgument(callNode, thirdOp)
    } else {
      val firstOp  = resolveVarNode(pcodeOp.getInput(0), 1)
      val secondOp = resolveVarNode(pcodeOp.getInput(1), 2)
      connectCallToArgument(callNode, firstOp)
      connectCallToArgument(callNode, secondOp)
    }
    callNode
  }

  def handleStore(pcodeOp: PcodeOp): CfgNodeNew = {
    val firstOp  = resolveVarNode(pcodeOp.getInput(1), 1)
    val secondOp = resolveVarNode(pcodeOp.getInput(2), 2)
    val callNode = createCall(".assignment", nativeInstruction.toString)
    connectCallToArgument(callNode, firstOp)
    connectCallToArgument(callNode, secondOp)
    callNode
  }
  def resolveVarNode(instruction: Instruction, input: Varnode, index: Int): CfgNodeNew = {
    if (input.isRegister) {
      var name = input.getHigh.getName
      val high = input.getHigh
      if (
        high != null && input.getDef != null && high.getName == "UNNAMED" && input.getDef != null && input.getDef.getInputs != null
      ) {
        val symbol = input.getDef.getInputs.toList.lastOption
          .flatMap(x => Option(x.getHigh))
          .flatMap(x => Option(x.getSymbol))
        if (symbol.isDefined) {
          name = symbol.get.getName
        }
      }
      if (name == null) {
        name = input.getHigh.getSymbol.getName
      }
      createIdentifier(
        name,
        name,
        index,
        Types.registerType(name),
        instruction.getMinAddress.getOffsetAsBigInteger.intValue
      )
    } else if (input.isConstant) {
      createLiteral(
        "0x" + input.getWordOffset.toHexString,
        index,
        index,
        "0x" + input.getWordOffset.toHexString,
        instruction.getMinAddress.getOffsetAsBigInteger.intValue
      )
    } else if (input.isUnique) {
      var valueString = ""
      if (input.getDescendants.asScala.toList.head.getOutput == null) {
        valueString = input.getDef.getInputs.toList.head.getAddress.getOffset.toString
      } else {
        valueString = input.getDescendants.asScala.toList.head.getOutput.getHigh.getName
      }

      val value = address2Literal.getOrElse(input.getDef.getInputs.toList.head.getAddress.getOffset, valueString)

      createLiteral(
        value,
        index,
        index,
        input.getWordOffset.toHexString,
        instruction.getMinAddress.getOffsetAsBigInteger.intValue
      )
    } else {
      // we default to literal
      // identifier could be useful too
      createLiteral(
        input.toString(),
        index,
        index,
        input.toString(),
        instruction.getMinAddress.getOffsetAsBigInteger.intValue
      )
    }
  }
  def handleAssignment(pcodeOp: PcodeOp, code: String): CfgNodeNew = {
    val secondOp = resolveVarNode(pcodeOp.getInputs.headOption.get, 2)
    val callNode = createCall(".assignment", code = code) // s"${firstOp.code} = ${secondOp.code}")
    connectCallToArgument(callNode, secondOp)
    if (pcodeOp.getOutput.isRegister) {
      val firstOp = resolveVarNode(pcodeOp.getOutput, 1)
      connectCallToArgument(callNode, firstOp)
    } else {
      val firstOp = createIdentifier(
        pcodeOp.getOutput.toString(),
        pcodeOp.getOutput.toString,
        1,
        "TODO",
        pcodeOp.getSeqnum.getTarget.getOffsetAsBigInteger.intValue()
      )
      connectCallToArgument(callNode, firstOp)
    }
    callNode
  }
  def handleTwoArguments(
    instruction: Instruction,
    callNode: CfgNodeNew,
    pcodeOp: PcodeOp,
    operand: String,
    name: String
  ): Unit = {
    val firstOp  = resolveVarNode(instruction, pcodeOp.getInput(0), 1)
    val secondOp = resolveVarNode(instruction, pcodeOp.getInput(1), 2)
    val code     = s"${firstOp.code} $operand ${secondOp.code}"
    val opNode   = createCallNode(code = code, name, instruction.getMinAddress.getOffsetAsBigInteger.intValue)

    connectCallToArgument(opNode, firstOp)
    connectCallToArgument(opNode, secondOp)
    connectCallToArgument(callNode, opNode)
  }
  def handlePtrSub(instruction: Instruction, callNode: CfgNodeNew, varNode: Varnode, index: Int): Unit = {
    val arg = resolveVarNode(instruction, varNode, index)
    connectCallToArgument(callNode, arg)
  }
  def handleAssignment(instruction: Instruction, callNode: CfgNodeNew, to: Varnode, index: Int): Unit = {
    val node = resolveVarNode(instruction, to, index)
    connectCallToArgument(callNode, node)
  }
  def resolveArgument(instruction: Instruction, callNode: CfgNodeNew, pcodeAst: PcodeOp, index: Int): Unit = {
    pcodeAst.getOpcode match {
      case INT_EQUAL | INT_NOTEQUAL | INT_SLESS | INT_SLESSEQUAL | INT_LESS | INT_LESSEQUAL =>
        logger.warn("INT_EQUAL | INT_NOTEQUAL | INT_SLESS | INT_SLESSEQUAL | INT_LESS | INT_LESSEQUAL ")
      case CALL | CALLIND =>
        handleAssignment(instruction, callNode, pcodeAst.getOutput, index)
      case INT_ADD | FLOAT_ADD =>
        handleTwoArguments(instruction, callNode, pcodeAst, "+", ".addition")
      case INT_DIV | FLOAT_DIV | INT_SDIV =>
        handleTwoArguments(instruction, callNode, pcodeAst, "/", ".division")
      case INT_SUB | FLOAT_SUB =>
        handleTwoArguments(instruction, callNode, pcodeAst, "-", ".subtraction")
      case INT_MULT | FLOAT_MULT =>
        handleTwoArguments(instruction, callNode, pcodeAst, "*", ".multiplication")
      case MULTIEQUAL | INDIRECT | PIECE => // not handled
      case INT_XOR =>
        handleTwoArguments(instruction, callNode, pcodeAst, "^", ".xor")
      case INT_OR =>
        handleTwoArguments(instruction, callNode, pcodeAst, "^", ".xor")
      case COPY | LOAD | STORE | SUBPIECE =>
        handleAssignment(instruction, callNode, pcodeAst.getOutput, index)
      case CAST =>
        // we need to "unpack" the def of the first input of the cast
        // eg. "(param_1 + 5)" in "(void *)(param_1 + 5)"
        if (pcodeAst.getInput(0).getDef != null) {
          resolveArgument(instruction, callNode, pcodeAst.getInput(0).getDef, index)
        }
      case PTRSUB | PTRADD => handlePtrSub(instruction, callNode, pcodeAst.getOutput, index)
      case _               => // handleDefault(pcodeAst)
    }
  }

  private def mapCallNode(pcodeOp: PcodeOp, index: Int = -1): CfgNodeNew = {
    // var callNode: CfgNodeNew = createCallNode("UNKNOWN", "UNKNOWN", -1)
    val callNode = pcodeOp.getOpcode match {
      // TODO add more pcode ops like CALL.*
      case BRANCH | BRANCHIND | CBRANCH =>
        val destination = resolveVarNode(pcodeOp.getInputs.head, 1)
        val callNode = createCallNode(
          nativeInstruction.toString,
          ".goto",
          nativeInstruction.getMinAddress.getOffsetAsBigInteger.intValue
        )
        connectCallToArgument(callNode, destination)
        callNode
      case RETURN =>
        createCall("ret", "ret")
      case CALL | CALLOTHER | CALLIND =>
        val calledFunction = codeUnitFormat
          .getOperandRepresentationString(nativeInstruction, 0)
          .split(">")
          .last
          .replace("[", "")
          .replace("]", "")
        val _callNode =
          createCallNode(calledFunction, calledFunction, nativeInstruction.getMinAddress.getOffsetAsBigInteger.intValue)
        val opCodes: Seq[PcodeOpAST] = highFunction
          .getPcodeOps(nativeInstruction.getAddress())
          .asScala
          .toList
        if (opCodes.size < 1) {
          return _callNode
        }
        // first input is the address to the called function
        // we know it already
        opCodes.head.getInputs.toList
          .drop(1)
          .zipWithIndex
          .foreach { case (value, index) =>
            if (value.getDef != null)
              resolveArgument(nativeInstruction, _callNode, value.getDef, index)
            else {
              // could/should be a constant
              val literalNode = createLiteral(
                "0x" + value.getWordOffset.toHexString,
                index + 1,
                index + 1,
                "0x" + value.getWordOffset.toHexString,
                nativeInstruction.getMinAddress.getOffsetAsBigInteger.intValue
              )
              connectCallToArgument(_callNode, literalNode)
            }
          }
        _callNode
      case BOOL_AND =>
        handleTwoArguments(pcodeOp, ".and", "&&")
      case BOOL_NEGATE =>
        handleSingleArgument(pcodeOp, ".negate", pcodeOp.getMnemonic)
      case BOOL_OR =>
        handleTwoArguments(pcodeOp, ".or", "||")
      case BOOL_XOR =>
        handleTwoArguments(pcodeOp, ".xor", "^^")

      case CAST =>
        handleSingleArgument(pcodeOp, ".cast", pcodeOp.getMnemonic)
      case CPOOLREF =>
        handleSingleArgument(pcodeOp, ".cpoolref", pcodeOp.getMnemonic)
      case EXTRACT =>
        handleSingleArgument(pcodeOp, ".extract", pcodeOp.getMnemonic)
      case FLOAT_ABS =>
        handleSingleArgument(pcodeOp, ".abs", pcodeOp.getMnemonic)
      case FLOAT_CEIL =>
        handleSingleArgument(pcodeOp, ".ceil", pcodeOp.getMnemonic)
      case FLOAT_FLOAT2FLOAT =>
        handleSingleArgument(pcodeOp, ".float2float", pcodeOp.getMnemonic)
      case FLOAT_FLOOR =>
        handleSingleArgument(pcodeOp, ".floor", pcodeOp.getMnemonic)
      case FLOAT_INT2FLOAT =>
        handleSingleArgument(pcodeOp, ".int2float", pcodeOp.getMnemonic)
      case FLOAT_LESS | INT_SLESS | INT_LESS =>
        handleTwoArguments(pcodeOp, ".goto", "<")
      case FLOAT_LESSEQUAL | INT_SLESSEQUAL | INT_LESSEQUAL =>
        handleTwoArguments(pcodeOp, ".lessThanEqual", "<=")
      case FLOAT_NAN =>
        handleSingleArgument(pcodeOp, ".nan", pcodeOp.getMnemonic)
      case FLOAT_ROUND =>
        handleSingleArgument(pcodeOp, ".round", pcodeOp.getMnemonic)
      case FLOAT_SQRT =>
        handleSingleArgument(pcodeOp, ".sqrt", pcodeOp.getMnemonic)
      case FLOAT_TRUNC =>
        handleSingleArgument(pcodeOp, ".trunc", pcodeOp.getMnemonic)
      case INSERT =>
        handleSingleArgument(pcodeOp, ".insert", pcodeOp.getMnemonic)
      case INT_2COMP =>
        handleSingleArgument(pcodeOp, ".int2comp", pcodeOp.getMnemonic)
      case INT_ADD | FLOAT_ADD | PTRADD =>
        handleTwoArguments(pcodeOp, ".addition", "+", index)
      case INT_AND =>
        handleTwoArguments(pcodeOp, ".and", "TODO: AND")
      case INT_CARRY =>
        handleTwoArguments(pcodeOp, ".TODO", "TODO: INT_CARRY")
      case INT_DIV | FLOAT_DIV | INT_SDIV =>
        handleTwoArguments(pcodeOp, ".division", "/")
      case FLOAT_EQUAL | INT_EQUAL =>
        handleTwoArguments(pcodeOp, ".equal", "==")
      case INT_NOTEQUAL | FLOAT_NOTEQUAL =>
        handleTwoArguments(pcodeOp, ".notEqual", "!=")
      case INT_LEFT =>
        handleTwoArguments(pcodeOp, ".shiftLeft", "<<")
      case INT_MULT | FLOAT_MULT =>
        handleTwoArguments(pcodeOp, ".multiplication", "*")
      case FLOAT_NEG | INT_NEGATE =>
        handleSingleArgument(pcodeOp, ".negation", pcodeOp.getMnemonic)
      case INT_OR =>
        handleTwoArguments(pcodeOp, ".or", "||")
      case INT_REM | INT_SREM =>
        handleTwoArguments(pcodeOp, ".modolo", "%")
      case INT_RIGHT | INT_SRIGHT =>
        handleTwoArguments(pcodeOp, ".shiftRight", ">>")
      case INT_SBORROW =>
        handleSingleArgument(pcodeOp, ".sborrow", pcodeOp.getMnemonic)
      case INT_SCARRY =>
        handleSingleArgument(pcodeOp, ".scarry", pcodeOp.getMnemonic)
      case INT_SEXT | INT_ZEXT =>
        handleSingleArgument(pcodeOp, ".extend", pcodeOp.getMnemonic)
      case INT_SUB | FLOAT_SUB | PTRSUB =>
        handleTwoArguments(pcodeOp, ".subtraction", "-")
      case INT_XOR =>
        handleTwoArguments(pcodeOp, ".xor", "^")
      case COPY | LOAD | SUBPIECE =>
        handleAssignment(pcodeOp, nativeInstruction.toString)
      case STORE =>
        handleStore(pcodeOp)
      case NEW =>
        handleSingleArgument(pcodeOp, ".new", pcodeOp.getMnemonic)
      case UNIMPLEMENTED | SEGMENTOP | MULTIEQUAL | INDIRECT | PIECE | PCODE_MAX =>
        createCall(
          "TODO: UNIMPLEMENTED | SEGMENTOP | MULTIEQUAL | INDIRECT | PIECE | PCODE_MAX | POPCOUNT ",
          "TODO: UNIMPLEMENTED | SEGMENTOP | MULTIEQUAL | INDIRECT | PIECE | PCODE_MAX | POPCOUNT "
        )
      case POPCOUNT =>
        handleAssignment(pcodeOp, nativeInstruction.toString)
      case _ =>
        createCall("NOT HANDLED", "NOT HANDLED")
    }
    callNode
  }

  def resolveVarNode(input: Varnode, index: Int): CfgNodeNew = {
    if (input.isRegister) {
      // we only care about the name
      createIdentifier(
        nativeInstruction.getProgram.getRegister(input).getName,
        nativeInstruction.getProgram.getRegister(input).getName,
        index,
        Types.registerType(nativeInstruction.getProgram.getRegister(input).getName),
        input.getAddress.getOffsetAsBigInteger.intValue
      )
    } else if (input.isUnique) {
      // unique could point to a string
      if (address2Literal.contains(input.getOffset)) {
        val value = address2Literal(input.getDef.getInputs.toList.head.getAddress.getOffset)
        createLiteral(
          value,
          index,
          index,
          input.getWordOffset.toHexString,
          nativeInstruction.getMinAddress.getOffsetAsBigInteger.intValue
        )
      } else {
        val uniques = pcodeOps
          // If the argument is a unique,
          // we try to resolve it
          .filter(x => x.getOutput == input)
          // Sometimes the first parameter is equal to the return value
          // filtering those out for now
          .filterNot(x => x.getInput(0) == input)
        mapCallNode(uniques.last, index)
      }
    } else {
      // input.isConstant || input.isAddress || input.isUnique
      createLiteral(
        "0x" + input.getWordOffset.toHexString,
        index,
        index,
        Types.registerType(input.getWordOffset.toHexString),
        -1
      )
    }
  }

  def connectCallToArgument(call: CfgNodeNew, argument: CfgNodeNew): Unit = {
    diffGraphBuilder.addNode(argument)
    diffGraphBuilder.addEdge(call, argument, EdgeTypes.ARGUMENT)
    diffGraphBuilder.addEdge(call, argument, EdgeTypes.AST)
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy