All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.shiftleft.js2cpg.parser.JsSource.scala Maven / Gradle / Ivy

There is a newer version: 0.3.3
Show newest version
package io.shiftleft.js2cpg.parser

import java.nio.file.{Path, Paths}
import better.files.File
import com.atlassian.sourcemap.{ReadableSourceMap, ReadableSourceMapImpl}
import com.oracle.js.parser.Source
import com.oracle.js.parser.ir.Node
import io.shiftleft.js2cpg.io.FileDefaults._
import io.shiftleft.js2cpg.io.FileUtils
import io.shiftleft.js2cpg.preprocessing.NuxtTranspiler
import io.shiftleft.utils.IOUtils
import org.apache.commons.lang3.StringUtils
import org.slf4j.LoggerFactory

import scala.jdk.CollectionConverters._
import scala.util.{Failure, Success, Try}

object JsSource {

  private val logger = LoggerFactory.getLogger(getClass)

  // maximum length of re-mapped code fields after transpilation in number of characters
  private val MAX_CODE_LENGTH: Int = 1000
  private val MIN_CODE_LENGTH: Int = 50

  def shortenCode(code: String, length: Int = MAX_CODE_LENGTH): String =
    StringUtils.abbreviate(code, math.max(MIN_CODE_LENGTH, length))

}

class JsSource(val srcDir: File, val projectDir: Path, val source: Source) {

  import JsSource._

  private val absoluteFilePath = (File(projectDir.toAbsolutePath) / originalFilePath).pathAsString
  private val mapFilePath      = absoluteFilePath + ".map"
  private val sourceMap        = sourceMapOrigin()

  private val (positionToLineNumberMapping, positionToFirstPositionInLineMapping) =
    FileUtils.positionLookupTables(source.getContent)

  private case class SourceMapOrigin(
    sourceFilePath: Path,
    sourceMap: Option[ReadableSourceMap],
    sourceWithLineNumbers: Map[Int, String]
  )

  /** @return
    *   the file path of the parsed file. If this file is the result of transpilation the original source file path is
    *   calculated from the corresponding sourcemap.
    */
  def filePath: String = filePathFromSourceMap

  /** @return
    *   the line number of a node in the parsed file. If this file is the result of transpilation the original line
    *   number is calculated from the corresponding sourcemap.
    */
  def getLine(node: Node): Option[Int] = lineFromSourceMap(node)

  /** @return
    *   the column number of a node in the parsed file. If this file is the result of transpilation the original column
    *   number is calculated from the corresponding sourcemap.
    */
  def getColumn(node: Node): Option[Int] = columnFromSourceMap(node)

  /** @return
    *   the code of a node in the parsed file. If this file is the result of transpilation the original code is
    *   calculated from the corresponding sourcemap. Note: in this case, only the re-mapped starting line/column number
    *   are available. Hence, we extract only a fixed number of characters (max. until the end of the file).
    */
  def getCode(node: Node): String = codeFromSourceMap(node)

  def getString(node: Node): String = source.getString(node.getToken)

  /** @return
    *   always the original file that was parsed. Might be a file that is the result of transpilation
    */
  def originalFilePath: String = {
    Option(source.getURL) match {
      case Some(url) => url.getPath
      case None      => source.getName
    }
  }

  private def constructSourceFilePath(sourceFileName: String): File = {
    if (sourceFileName.isEmpty) {
      srcDir / source.getName
    } else if (absoluteFilePath.contains(NuxtTranspiler.NUXT_FOLDER) && srcDir.path.compareTo(projectDir) == 0) {
      // For nuxt-js transpilation we have the same src and project dir and we need some special handling here
      if (sourceFileName.startsWith(WEBPACK_PREFIX)) {
        val replacedName = FileUtils.cleanPath(sourceFileName.replace(WEBPACK_PREFIX, ""))
        srcDir / replacedName.substring(replacedName.indexOf("/") + 1)
      } else {
        File(absoluteFilePath).parent / sourceFileName
      }
    } else if (sourceFileName.startsWith(WEBPACK_PREFIX)) {
      // Additionally, source map files coming from webpack (e.g., from Vue transpilation) are somewhat hidden
      val replacedName = sourceFileName.replace(WEBPACK_PREFIX, "")
      srcDir / replacedName.substring(replacedName.indexOf("/") + 1)
    } else {
      val cleanedPath = FileUtils.cleanPath(sourceFileName)
      // having "/" here is fine as JS source maps always have platform independent path separators
      val lookupPath = if (cleanedPath.contains("/" + srcDir.name + "/")) {
        cleanedPath.substring(cleanedPath.indexOf("/" + srcDir.name + "/") + srcDir.name.length + 2)
      } else {
        cleanedPath
      }
      val srcFilePath: File = if (cleanedPath.contains("AppData/Local/Temp")) {
        // special handling for Windows CI
        srcDir.root / "Users" / cleanedPath.replace(srcDir.toString(), "")
      } else {
        val lookupFile = File(lookupPath)
        if (lookupFile.parent != lookupFile.root) {
          srcDir / lookupPath
        } else {
          srcDir / lookupFile.name
        }
      }
      srcFilePath
    }
  }

  private def sourceMapOrigin(): Option[SourceMapOrigin] = {
    if (File(mapFilePath).isEmpty) {
      logger.debug(s"No source map file available for '$originalFilePath'")
      None
    } else {
      val sourceMapContent = IOUtils.readLinesInFile(Paths.get(mapFilePath)).mkString("\n")
      // We apply a Try here as some source maps are indeed un-parsable by ReadableSourceMap:
      Try(ReadableSourceMapImpl.fromSource(sourceMapContent)) match {
        case Failure(exception) =>
          logger.debug(s"Invalid source map file for '$originalFilePath'", exception)
          None
        case Success(sourceMap) =>
          val sourceFileNames = sourceMap.getSources.asScala.filter(_ != null)
          // The source file might not exist, e.g., if it was the result of transpilation
          // but is not delivered and still referenced in the source map
          // (fix for: https://github.com/ShiftLeftSecurity/product/issues/4994)
          val sourceFile = sourceFileNames
            .find { f =>
              val fAsVue = File(absoluteFilePath).nameWithoutExtension + VUE_SUFFIX
              val fAsJs  = File(absoluteFilePath).nameWithoutExtension + JS_SUFFIX
              f.toLowerCase.endsWith(fAsVue) || f.endsWith(s"/$fAsJs")
            }
            .orElse(sourceFileNames.headOption)

          sourceFile.flatMap { sourceFileName =>
            val sourceFilePath = constructSourceFilePath(sourceFileName)
            if (!sourceFilePath.exists) {
              logger.debug(
                s"Could not load source map file for '$originalFilePath'. The source map file refers to '$sourceFilePath' but this does not exist"
              )
              None
            } else {
              val sourceFileMapping = FileUtils.contentMapFromFile(sourceFilePath.path)
              logger.debug(
                s"Successfully loaded source map file '$mapFilePath':" +
                  s"\n\t* Transpiled file: '$absoluteFilePath'" +
                  s"\n\t* Origin: '$sourceFilePath'"
              )
              Some(SourceMapOrigin(sourceFilePath.path, Some(sourceMap), sourceFileMapping))
            }
          }
      }
    }
  }

  private def codeFromSourceMap(node: Node): String = {
    sourceMap match {
      case Some(SourceMapOrigin(_, Some(sourceMap), sourceWithLineNumbers)) =>
        val line   = getLineOfSource(node.getStart) - 1
        val column = getColumnOfSource(node.getStart)
        sourceMap.getMapping(line, column) match {
          case null =>
            source.getString(node.getStart, node.getFinish - node.getStart)
          case mapping =>
            val originLine   = mapping.getSourceLine
            val originColumn = mapping.getSourceColumn
            val transpiledCodeLength = node.getFinish - node.getStart match {
              // for some transpiled nodes the start and finish indices are wrong:
              case 0     => node.toString.length
              case other => other
            }
            sourceWithLineNumbers.get(originLine) match {
              case Some(startingCodeLine) =>
                // Code from the origin source file was found.
                val maxCodeLength = math.min(transpiledCodeLength, MAX_CODE_LENGTH)
                // We are extra careful: we do not want to generate empty lines.
                // That can happen e.g., for synthetic return statements.
                // Hence, we back up 1 char.
                val startingCode =
                  startingCodeLine.substring(math.min(math.max(startingCodeLine.length - 1, 0), originColumn))
                calculateCode(sourceWithLineNumbers, startingCode, originLine, maxCodeLength)
              case None =>
                // It has an actual mapping, but it is synthetic code not found in the source file.
                // We return the synthetic code.
                source.getString(node.getStart, node.getFinish - node.getStart)
            }
        }
      case _ =>
        // No mapping at all. We return the node code.
        source.getString(node.getStart, node.getFinish - node.getStart)
    }
  }

  /** Code field calculation:
    *   - We start with the re-mapped line/column number.
    *   - We always read at the length of the transpiled node (except if the original file ends earlier) capped at
    *     MAX_CODE_LENGTH.
    *   - If there would be more content we append ' [...]'.
    */
  @scala.annotation.tailrec
  private def calculateCode(
    sourceWithLineNumbers: Map[Int, String],
    currentLine: String,
    currentLineNumber: Int,
    transpiledCodeLength: Int
  ): String =
    currentLine match {
      case line if line.length >= transpiledCodeLength =>
        shortenCode(line, transpiledCodeLength - 1)
      case line if line.length < transpiledCodeLength && sourceWithLineNumbers.contains(currentLineNumber + 1) =>
        calculateCode(
          sourceWithLineNumbers,
          line + "\n" + sourceWithLineNumbers(currentLineNumber + 1),
          currentLineNumber + 1,
          transpiledCodeLength
        )
      case line =>
        line.stripLineEnd
    }

  private def lineFromSourceMap(node: Node): Option[Int] = {
    sourceMap match {
      case Some(SourceMapOrigin(_, Some(sourceMap), _)) =>
        val line   = getLineOfSource(node.getStart) - 1
        val column = getColumnOfSource(node.getStart)
        Option(sourceMap.getMapping(line, column)).map(_.getSourceLine + 1)
      case _ =>
        Some(getLineOfSource(node.getStart))
    }
  }

  private def columnFromSourceMap(node: Node): Option[Int] = {
    sourceMap match {
      case Some(SourceMapOrigin(_, Some(sourceMap), _)) =>
        val line   = getLineOfSource(node.getStart) - 1
        val column = getColumnOfSource(node.getStart)
        Option(sourceMap.getMapping(line, column)).map(_.getSourceColumn)
      case _ =>
        Some(getColumnOfSource(node.getStart))
    }
  }

  private def filePathFromSourceMap: String = {
    sourceMap match {
      case Some(SourceMapOrigin(sourceFilePath, _, _)) if absoluteFilePath.contains(NuxtTranspiler.NUXT_FOLDER) =>
        srcDir.relativize(File(NuxtTranspiler.remapPath(sourceFilePath.toString))).toString
      case Some(SourceMapOrigin(sourceFilePath, _, _)) =>
        srcDir.relativize(File(sourceFilePath)).toString
      case None if absoluteFilePath.contains(NuxtTranspiler.NUXT_FOLDER) =>
        NuxtTranspiler.remapPath(originalFilePath)
      case None =>
        originalFilePath
    }
  }

  // Returns the line number for a given position in the source.
  // We use this method instead of source.getLine for performance reasons.
  private def getLineOfSource(position: Int): Int = {
    val (_, lineNumber) = positionToLineNumberMapping.minAfter(position).get
    lineNumber
  }

  // Returns the column number for a given position in the source.
  // We use this method instead of source.getColumn for performance reasons.
  private def getColumnOfSource(position: Int): Int = {
    val (_, firstPositionInLine) = positionToFirstPositionInLineMapping.minAfter(position).get
    position - firstPositionInLine
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy