All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.joern.jssrc2cpg.utils.AstGenRunner.scala Maven / Gradle / Ivy

The newest version!
package io.joern.jssrc2cpg.utils

import better.files.File
import io.joern.jssrc2cpg.Config
import io.joern.jssrc2cpg.preprocessing.EjsPreprocessor
import io.joern.x2cpg.SourceFiles
import io.joern.x2cpg.utils.{Environment, ExternalCommand}
import io.shiftleft.utils.IOUtils
import com.typesafe.config.ConfigFactory
import org.slf4j.LoggerFactory
import versionsort.VersionHelper

import java.nio.file.Paths
import java.util.regex.Pattern
import scala.util.Failure
import scala.util.Success
import scala.util.matching.Regex
import scala.util.Try

object AstGenRunner {

  private val logger = LoggerFactory.getLogger(getClass)

  private val LineLengthThreshold: Int = 10000

  private val NODE_OPTIONS: Map[String, String] = Map("NODE_OPTIONS" -> "--max-old-space-size=8192")

  private val TypeDefinitionFileExtensions = List(".t.ts", ".d.ts")

  private val MinifiedPathRegex: Regex = ".*([.-]min\\..*js|bundle\\.js)".r

  private val AstGenDefaultIgnoreRegex: Seq[Regex] =
    List(
      "(conf|test|spec|[.-]min|\\.d)\\.(js|ts|jsx|tsx)$".r,
      s"node_modules${Pattern.quote(java.io.File.separator)}.*".r,
      s"venv${Pattern.quote(java.io.File.separator)}.*".r,
      s"docs${Pattern.quote(java.io.File.separator)}.*".r,
      s"test${Pattern.quote(java.io.File.separator)}.*".r,
      s"tests${Pattern.quote(java.io.File.separator)}.*".r,
      s"e2e${Pattern.quote(java.io.File.separator)}.*".r,
      s"e2e-beta${Pattern.quote(java.io.File.separator)}.*".r,
      s"examples${Pattern.quote(java.io.File.separator)}.*".r,
      s"cypress${Pattern.quote(java.io.File.separator)}.*".r,
      s"jest-cache${Pattern.quote(java.io.File.separator)}.*".r,
      s"eslint-rules${Pattern.quote(java.io.File.separator)}.*".r,
      s"codemods${Pattern.quote(java.io.File.separator)}.*".r,
      s"flow-typed${Pattern.quote(java.io.File.separator)}.*".r,
      s"i18n${Pattern.quote(java.io.File.separator)}.*".r,
      s"vendor${Pattern.quote(java.io.File.separator)}.*".r,
      s"www${Pattern.quote(java.io.File.separator)}.*".r,
      s"dist${Pattern.quote(java.io.File.separator)}.*".r,
      s"build${Pattern.quote(java.io.File.separator)}.*".r
    )

  private val IgnoredTestsRegex: Seq[Regex] =
    List(
      ".*[.-]spec\\.js".r,
      ".*[.-]mock\\.js".r,
      ".*[.-]e2e\\.js".r,
      ".*[.-]test\\.js".r,
      ".*cypress\\.json".r,
      ".*test.*\\.json".r
    )

  private val IgnoredFilesRegex: Seq[Regex] = List(
    ".*jest\\.config.*".r,
    ".*webpack\\..*\\.js".r,
    ".*vue\\.config\\.js".r,
    ".*babel\\.config\\.js".r,
    ".*chunk-vendors.*\\.js".r, // commonly found in webpack / vue.js projects
    ".*app~.*\\.js".r,          // commonly found in webpack / vue.js projects
    ".*\\.chunk\\.js".r,
    ".*\\.babelrc.*".r,
    ".*\\.eslint.*".r,
    ".*\\.tslint.*".r,
    ".*\\.stylelintrc\\.js".r,
    ".*rollup\\.config.*".r,
    ".*\\.types\\.js".r,
    ".*\\.cjs\\.js".r,
    ".*eslint-local-rules\\.js".r,
    ".*\\.devcontainer\\.json".r,
    ".*Gruntfile\\.js".r,
    ".*i18n.*\\.json".r
  )

  case class AstGenRunnerResult(
    parsedFiles: List[(String, String)] = List.empty,
    skippedFiles: List[(String, String)] = List.empty
  )

  // full path to the astgen binary from the env var ASTGEN_BIN
  private val AstGenBin: Option[String] = scala.util.Properties.envOrNone("ASTGEN_BIN").flatMap {
    case path if File(path).isDirectory => Some((File(path) / "astgen").pathAsString)
    case path if File(path).exists      => Some(File(path).pathAsString)
    case _                              => None
  }

  lazy private val executableName = (Environment.operatingSystem, Environment.architecture) match {
    case (Environment.OperatingSystemType.Windows, _)                                => "astgen-win.exe"
    case (Environment.OperatingSystemType.Linux, Environment.ArchitectureType.X86)   => "astgen-linux"
    case (Environment.OperatingSystemType.Linux, Environment.ArchitectureType.ARMv8) => "astgen-linux-arm"
    case (Environment.OperatingSystemType.Mac, Environment.ArchitectureType.X86)     => "astgen-macos"
    case (Environment.OperatingSystemType.Mac, Environment.ArchitectureType.ARMv8)   => "astgen-macos-arm"
    case _ =>
      logger.warn("Could not detect OS version! Defaulting to Linux/x86_64.")
      "astgen-linux"
  }

  lazy private val executableDir: String = {
    val dir        = getClass.getProtectionDomain.getCodeSource.getLocation.toString
    val indexOfLib = dir.lastIndexOf("lib")
    val fixedDir = if (indexOfLib != -1) {
      new java.io.File(dir.substring("file:".length, indexOfLib)).toString
    } else {
      val indexOfTarget = dir.lastIndexOf("target")
      if (indexOfTarget != -1) {
        new java.io.File(dir.substring("file:".length, indexOfTarget)).toString
      } else {
        "."
      }
    }
    Paths.get(fixedDir, "/bin/astgen").toAbsolutePath.toString
  }

  private def hasCompatibleAstGenVersionAtPath(astGenVersion: String, path: Option[String]): Boolean = {
    val astGenCommand = path.getOrElse("astgen")
    val localPath     = path.flatMap(File(_).parentOption.map(_.pathAsString)).getOrElse(".")
    val debugMsgPath  = path.getOrElse("PATH")
    ExternalCommand.run(s"$astGenCommand --version", localPath).toOption.map(_.mkString.strip()) match {
      case Some(installedVersion)
          if installedVersion != "unknown" &&
            Try(VersionHelper.compare(installedVersion, astGenVersion)).toOption.getOrElse(-1) >= 0 =>
        logger.debug(s"Found astgen v$installedVersion in '$debugMsgPath'")
        true
      case Some(installedVersion) =>
        logger.debug(
          s"Found astgen v$installedVersion in '$debugMsgPath' but jssrc2cpg requires at least v$astGenVersion"
        )
        false
      case _ =>
        false
    }
  }

  /** @return
    *   the full path to the astgen binary found on the system
    */
  private def compatibleAstGenPath(astGenVersion: String): String = {
    AstGenBin match
      // 1. case: we try it at env var ASTGEN_BIN
      case Some(path) if hasCompatibleAstGenVersionAtPath(astGenVersion, Some(path)) =>
        path
      // 2. case: we try it with the systems PATH
      case _ if hasCompatibleAstGenVersionAtPath(astGenVersion, None) =>
        "astgen"
      // otherwise: we use the default local astgen executable path
      case _ =>
        logger.debug(
          s"Did not find any astgen binary on this system (environment variable ASTGEN_BIN not set and no entry in the systems PATH)"
        )
        val localPath = s"$executableDir${java.io.File.separator}$executableName"
        localPath
  }

  private lazy val astGenCommand = {
    val conf          = ConfigFactory.load
    val astGenVersion = conf.getString("jssrc2cpg.astgen_version")
    val astGenPath    = compatibleAstGenPath(astGenVersion)
    logger.info(s"Using astgen from '$astGenPath'")
    astGenPath
  }
}

class AstGenRunner(config: Config) {

  import io.joern.jssrc2cpg.utils.AstGenRunner._

  private val executableArgs = if (!config.tsTypes) " --no-tsTypes" else ""

  private def skippedFiles(astGenOut: List[String]): List[String] = {
    val skipped = astGenOut.collect {
      case out if out.startsWith("Parsing") =>
        val filename = out.substring(out.indexOf(" ") + 1, out.indexOf(":") - 1)
        val reason   = out.substring(out.indexOf(":") + 2)
        logger.warn(s"\t- failed to parse '$filename': '$reason'")
        Option(filename)
      case out if !out.startsWith("Converted") && !out.startsWith("Retrieving") =>
        val filename = out.substring(0, out.indexOf(" "))
        val reason   = out.substring(out.indexOf(" ") + 1)
        logger.warn(s"\t- failed to parse '$filename': '$reason'")
        Option(filename)
      case out =>
        logger.debug(s"\t+ $out")
        None
    }
    skipped.flatten
  }

  private def isIgnoredByUserConfig(filePath: String): Boolean = {
    lazy val isInIgnoredFiles = config.ignoredFiles.exists {
      case ignorePath if File(ignorePath).isDirectory => filePath.startsWith(ignorePath)
      case ignorePath                                 => filePath == ignorePath
    }
    lazy val isInIgnoredFileRegex = config.ignoredFilesRegex.matches(filePath)
    if (isInIgnoredFiles || isInIgnoredFileRegex) {
      logger.debug(s"'$filePath' ignored by user configuration")
      true
    } else {
      false
    }
  }

  private def isMinifiedFile(filePath: String): Boolean = filePath match {
    case p if MinifiedPathRegex.matches(p) => true
    case p if File(p).exists && p.endsWith(".js") =>
      val lines             = IOUtils.readLinesInFile(File(filePath).path)
      val linesOfCode       = lines.size
      val longestLineLength = if (lines.isEmpty) 0 else lines.map(_.length).max
      if (longestLineLength >= LineLengthThreshold && linesOfCode <= 50) {
        logger.debug(s"'$filePath' seems to be a minified file (contains a line with length $longestLineLength)")
        true
      } else false
    case _ => false
  }

  private def isIgnoredByDefault(filePath: String): Boolean = {
    lazy val isIgnored     = IgnoredFilesRegex.exists(_.matches(filePath))
    lazy val isIgnoredTest = IgnoredTestsRegex.exists(_.matches(filePath))
    lazy val isMinified    = isMinifiedFile(filePath)
    if (isIgnored || isIgnoredTest || isMinified) {
      logger.debug(s"'$filePath' ignored by default")
      true
    } else {
      false
    }
  }

  private def isTranspiledFile(filePath: String): Boolean = {
    val file = File(filePath)
    // We ignore files iff:
    // - they are *.js files and
    // - they contain a //sourceMappingURL comment or have an associated source map file and
    // - a file with the same name is located directly next to them
    lazy val isJsFile            = file.exists && file.extension.contains(".js")
    lazy val hasSourceMapComment = IOUtils.readLinesInFile(file.path).exists(_.contains("//sourceMappingURL"))
    lazy val hasSourceMapFile    = File(s"$filePath.map").exists
    lazy val hasSourceMap        = hasSourceMapComment || hasSourceMapFile
    lazy val hasFileWithSameName =
      file.siblings.exists(_.nameWithoutExtension(includeAll = false) == file.nameWithoutExtension)
    if (isJsFile && hasSourceMap && hasFileWithSameName) {
      logger.debug(s"'$filePath' ignored by default (seems to be the result of transpilation)")
      true
    } else {
      false
    }

  }

  private def filterFiles(files: List[String], out: File): List[String] = {
    files.filter { file =>
      file.stripSuffix(".json").replace(out.pathAsString, config.inputPath) match {
        // We are not interested in JS / TS type definition files at this stage.
        // TODO: maybe we can enable that later on and use the type definitions there
        //  for enhancing the CPG with additional type information for functions
        case filePath if TypeDefinitionFileExtensions.exists(filePath.endsWith) => false
        case filePath if isIgnoredByUserConfig(filePath)                        => false
        case filePath if isIgnoredByDefault(filePath)                           => false
        case filePath if isTranspiledFile(filePath)                             => false
        case _                                                                  => true
      }
    }
  }

  /** Changes the file-extension by renaming this file; if file does not have an extension, it adds the extension. If
    * file does not exist (or is a directory) no change is done and the current file is returned.
    */
  private def changeExtensionTo(file: File, extension: String): File = {
    val newName = s"${file.nameWithoutExtension(includeAll = false)}.${extension.stripPrefix(".")}"
    if (file.isRegularFile) file.renameTo(newName) else if (file.notExists) File(newName) else file
  }

  private def processEjsFiles(in: File, out: File, ejsFiles: List[String]): Try[Seq[String]] = {
    val tmpJsFiles = ejsFiles.map { ejsFilePath =>
      val ejsFile             = File(ejsFilePath)
      val maybeTranspiledFile = File(s"${ejsFilePath.stripSuffix(".ejs")}.js")
      if (isTranspiledFile(maybeTranspiledFile.pathAsString)) {
        maybeTranspiledFile
      } else {
        val sourceFileContent = IOUtils.readEntireFile(ejsFile.path)
        val preprocessContent = new EjsPreprocessor().preprocess(sourceFileContent)
        (out / in.relativize(ejsFile).toString).parent.createDirectoryIfNotExists(createParents = true)
        val newEjsFile = ejsFile.copyTo(out / in.relativize(ejsFile).toString)
        val jsFile     = changeExtensionTo(newEjsFile, ".js").writeText(preprocessContent)
        newEjsFile.createFile().writeText(sourceFileContent)
        jsFile
      }
    }

    val result =
      ExternalCommand.run(s"$astGenCommand$executableArgs -t ts -o $out", out.toString(), extraEnv = NODE_OPTIONS)

    val jsons = SourceFiles.determine(out.toString(), Set(".json"))
    jsons.foreach { jsonPath =>
      val jsonFile    = File(jsonPath)
      val jsonContent = IOUtils.readEntireFile(jsonFile.path)
      val json        = ujson.read(jsonContent)
      val fileName    = json("fullName").str
      val newFileName = fileName.patch(fileName.lastIndexOf(".js"), ".ejs", 3)
      json("relativeName") = newFileName
      json("fullName") = newFileName
      jsonFile.writeText(json.toString())
    }

    tmpJsFiles.foreach(_.delete())
    result
  }

  private def ejsFiles(in: File, out: File): Try[Seq[String]] = {
    val files = SourceFiles.determine(in.pathAsString, Set(".ejs"))
    if (files.nonEmpty) processEjsFiles(in, out, files)
    else Success(Seq.empty)
  }

  private def vueFiles(in: File, out: File): Try[Seq[String]] = {
    val files = SourceFiles.determine(in.pathAsString, Set(".vue"))
    if (files.nonEmpty)
      ExternalCommand.run(s"$astGenCommand$executableArgs -t vue -o $out", in.toString(), extraEnv = NODE_OPTIONS)
    else Success(Seq.empty)
  }

  private def jsFiles(in: File, out: File): Try[Seq[String]] =
    ExternalCommand.run(s"$astGenCommand$executableArgs -t ts -o $out", in.toString(), extraEnv = NODE_OPTIONS)

  private def runAstGenNative(in: File, out: File): Try[Seq[String]] = for {
    ejsResult <- ejsFiles(in, out)
    vueResult <- vueFiles(in, out)
    jsResult  <- jsFiles(in, out)
  } yield jsResult ++ vueResult ++ ejsResult

  private def checkParsedFiles(files: List[String], in: File): List[String] = {
    val numOfParsedFiles = files.size
    logger.info(s"Parsed $numOfParsedFiles files.")
    if (numOfParsedFiles == 0) {
      logger.warn("You may want to check the DEBUG logs for a list of files that are ignored by default.")
      SourceFiles.determine(
        in.pathAsString,
        Set(".js", ".ts", ".vue", ".ejs", ".jsx", ".cjs", ".mjs", ".tsx"),
        ignoredDefaultRegex = Option(AstGenDefaultIgnoreRegex)
      )
    }
    files
  }

  def execute(out: File): AstGenRunnerResult = {
    val in = File(config.inputPath)
    runAstGenNative(in, out) match {
      case Success(result) =>
        val parsed  = checkParsedFiles(filterFiles(SourceFiles.determine(out.toString(), Set(".json")), out), in)
        val skipped = skippedFiles(result.toList)
        AstGenRunnerResult(parsed.map((in.toString(), _)), skipped.map((in.toString(), _)))
      case Failure(f) =>
        logger.error("\t- running astgen failed!", f)
        AstGenRunnerResult()
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy