All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.shiftleft.codepropertygraph.cpgloading.CpgLoader.scala Maven / Gradle / Ivy

There is a newer version: 1.7.23
Show newest version
package io.shiftleft.codepropertygraph.cpgloading

import io.shiftleft.codepropertygraph.generated.Cpg
import org.slf4j.{Logger, LoggerFactory}

import java.io.FileNotFoundException
import java.nio.charset.StandardCharsets
import java.nio.file.{Files, Path, Paths}
import scala.util.Using

object CpgLoader {
  private val logger: Logger = LoggerFactory.getLogger(getClass)

  /** Load a Code Property Graph from the given file */
  def load(filename: String): Cpg =
    load(Paths.get(filename))

  /** Load a Code Property Graph from the given file - persist in given second file. I.e. the given input file will not
    * be modified, all changes will be written to the given 'persistTo' file. *
    */
  def load(from: String, persistTo: String): Cpg =
    load(Paths.get(from), Paths.get(persistTo))

  /** Load a Code Property Graph from the given file
    *
    * Notes:
    *   - detects the format as either flatgraph, overflowdb or proto
    *   - a flatgraph storage opened straight away
    *   - OverflowDb and proto formats are first converted to flatgraph, which is written to the `persistTo` file
    *   - OverflowDb and proto formats are first converted to flatgraph, and therefor we create a new flatgraph storage
    *     path, which can be obtained via `cpg.graph.storagePathMaybe`
    */
  def load(path: Path): Cpg = {
    val absolutePath = path.toAbsolutePath
    if (!Files.exists(absolutePath)) {
      throw new FileNotFoundException(s"given input file $absolutePath does not exist")
    } else if (isProtoFormat(absolutePath)) {
      load(path, persistTo = absolutePath.resolveSibling(s"${path.getFileName}.fg"))
    } else if (isOverflowDbFormat(absolutePath)) {
      load(absolutePath, persistTo = path.resolveSibling(s"${path.getFileName}.fg"))
    } else {
      // assuming it's flatgraph format
      Cpg.withStorage(absolutePath)
    }
  }

  /** Load a Code Property Graph from the given file, but persist it in the given second file. I.e. the given input file
    * will not be modified, all changes will be written to the given 'persistTo' file.
    *
    * Notes:
    *   - if the given 'persistTo' file already exists, it will be overridden
    *   - detects the format as either flatgraph, overflowdb or proto
    *   - a flatgraph storage is copied to the `persistTo` file and then opened straight away
    *   - OverflowDb and proto formats are first converted to flatgraph, which is written to the `persistTo` file
    */
  def load(from: Path, persistTo: Path): Cpg = {
    val absolutePath = from.toAbsolutePath
    if (persistTo != from)
      Files.deleteIfExists(persistTo)

    if (!Files.exists(absolutePath)) {
      throw new FileNotFoundException(s"given input file $absolutePath does not exist")
    } else if (isProtoFormat(absolutePath)) {
      logger.debug(s"Converting $from from proto cpg into new flatgraph storage: $persistTo")
      ProtoCpgLoader.loadFromProtoZip(absolutePath.toString, Option(persistTo))
    } else if (isOverflowDbFormat(absolutePath)) {
      loadFromOverflowDb(absolutePath, persistTo)
    } else if (isFlatgraphFormat(absolutePath)) {
      Files.copy(absolutePath, persistTo)
      Cpg.withStorage(persistTo)
    } else {
      throw new AssertionError(
        s"unknown file format - we probed the first bytes but it didn't look like one of our known formats (proto.zip, flatgraph, overflowdb)"
      )
    }
  }

  /** Determine whether the CPG is a legacy (proto) CPG */
  def isProtoFormat(path: Path): Boolean =
    probeFirstBytes(path, "PK")

  /** Determine whether the CPG is a proto CPG */
  def isProtoFormat(filename: String): Boolean =
    isProtoFormat(Paths.get(filename))

  def isOverflowDbFormat(path: Path): Boolean =
    probeFirstBytes(path, "H:2")

  def isFlatgraphFormat(path: Path): Boolean =
    probeFirstBytes(path, "FLT GRPH") // flatgraph.storage.MagicBytesString

  /** Load Code Property Graph from an overflow DB file, by first converting it into a flatgraph binary */
  def loadFromOverflowDb(path: Path, persistTo: Path): Cpg = {
    logger.info(s"Converting $path from overflowdb to new flatgraph storage: $persistTo")
    flatgraph.convert.Convert.convertOdbToFlatgraph(overflowDbFile = path, outputFile = persistTo)
    Cpg.withStorage(persistTo)
  }

  /** Determine whether the CPG is a legacy (proto) CPG */
  @deprecated("use `isProtoCpg` instead")
  def isLegacyCpg(filename: String): Boolean =
    isProtoFormat(Paths.get(filename))

  /** Determine whether the CPG is a legacy (proto) CPG */
  @deprecated("use `isProtoCpg` instead")
  def isLegacyCpg(path: Path): Boolean =
    isProtoFormat(path)

  private def probeFirstBytes(path: Path, probeFor: String): Boolean = {
    Using(Files.newInputStream(path)) { is =>
      val buffer = new Array[Byte](probeFor.size)
      is.read(buffer)
      new String(buffer, StandardCharsets.UTF_8) == probeFor
    }.getOrElse(false)
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy