All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.rapid7.container.analyzer.docker.service.DockerImageAnalyzerService Maven / Gradle / Ivy

There is a newer version: 0.2.26
Show newest version
package com.rapid7.container.analyzer.docker.service;

import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.databind.DeserializationFeature;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.rapid7.container.analyzer.docker.analyzer.ImageHandler;
import com.rapid7.container.analyzer.docker.analyzer.LayerExtractor;
import com.rapid7.container.analyzer.docker.analyzer.LayerFileHandler;
import com.rapid7.container.analyzer.docker.fingerprinter.ApkgFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.DotNetFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.DpkgFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.FileFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.OsReleaseFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.OwaspDependencyFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.PacmanFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.RpmFingerprinter;
import com.rapid7.container.analyzer.docker.fingerprinter.WhiteoutImageHandler;
import com.rapid7.container.analyzer.docker.model.Digest;
import com.rapid7.container.analyzer.docker.model.LayerPath;
import com.rapid7.container.analyzer.docker.model.image.Image;
import com.rapid7.container.analyzer.docker.model.image.ImageId;
import com.rapid7.container.analyzer.docker.model.image.ImageType;
import com.rapid7.container.analyzer.docker.model.image.Layer;
import com.rapid7.container.analyzer.docker.model.image.LayerId;
import com.rapid7.container.analyzer.docker.model.image.json.ImageModelObjectMapper;
import com.rapid7.container.analyzer.docker.model.json.Configuration;
import com.rapid7.container.analyzer.docker.model.json.ConfigurationJsonV2;
import com.rapid7.container.analyzer.docker.model.json.HistoryJson;
import com.rapid7.container.analyzer.docker.model.json.LayerJson;
import com.rapid7.container.analyzer.docker.model.json.Manifest;
import com.rapid7.container.analyzer.docker.model.json.TarManifestJson;
import com.rapid7.container.analyzer.docker.os.Fingerprinter;
import com.rapid7.container.analyzer.docker.packages.ApkgParser;
import com.rapid7.container.analyzer.docker.packages.DotNetParser;
import com.rapid7.container.analyzer.docker.packages.DpkgParser;
import com.rapid7.container.analyzer.docker.packages.OwaspDependencyParser;
import com.rapid7.container.analyzer.docker.packages.PacmanPackageParser;
import com.rapid7.container.analyzer.docker.packages.RpmPackageParser;
import com.rapid7.container.analyzer.docker.packages.settings.CustomParserSettingsBuilder;
import com.rapid7.container.analyzer.docker.packages.settings.OwaspDependencyParserSettingsBuilder;
import com.rapid7.container.analyzer.docker.util.InstantParser;
import com.rapid7.container.analyzer.docker.util.InstantParserModule;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.nio.file.InvalidPathException;
import java.nio.file.Paths;
import java.time.Instant;
import java.util.ArrayList;
import java.util.List;
import java.util.Objects;
import java.util.zip.GZIPInputStream;
import java.util.zip.ZipException;
import java.util.zip.ZipInputStream;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
import org.apache.commons.compress.compressors.gzip.GzipCompressorInputStream;
import org.apache.commons.compress.utils.IOUtils;
import org.apache.commons.io.FilenameUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static org.apache.commons.io.FileUtils.deleteQuietly;
import static org.slf4j.helpers.MessageFormatter.arrayFormat;
import static org.slf4j.helpers.MessageFormatter.format;

public class DockerImageAnalyzerService {
  private static final Logger LOGGER = LoggerFactory.getLogger(DockerImageAnalyzerService.class);
  private static final String WHITEOUT_AUFS_PREFIX = ".wh.";
  private static final long MAX_READ_FILE_SIZE = 1_048_576 * 25; // 25 MB
  private static final long MAX_EXTRACT_FILE_SIZE = 1_048_576 * 256; // 256 MB
  private ObjectMapper objectMapper;
  private List layerHandlers;
  private List imageHandlers;

  public DockerImageAnalyzerService(String rpmDockerImage) {
    this(rpmDockerImage, OwaspDependencyParserSettingsBuilder.EXPERIMENTAL);
  }

  public DockerImageAnalyzerService(String rpmDockerImage, CustomParserSettingsBuilder customBuilder) {
    this(rpmDockerImage, OwaspDependencyParserSettingsBuilder.EXPERIMENTAL, customBuilder);
  }

  public DockerImageAnalyzerService(String rpmDockerImage, OwaspDependencyParserSettingsBuilder builder) {
    this(rpmDockerImage, builder, CustomParserSettingsBuilder.builder());
  }

  public DockerImageAnalyzerService(String rpmDockerImage, OwaspDependencyParserSettingsBuilder owaspBuilder, CustomParserSettingsBuilder customBuilder) {
    objectMapper = new ObjectMapper();
    objectMapper.disable(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES);
    objectMapper.registerModule(new InstantParserModule());
    imageHandlers = new ArrayList<>(1);
    imageHandlers.add(new WhiteoutImageHandler());
    layerHandlers = new ArrayList<>(6);
    layerHandlers.add(new OsReleaseFingerprinter(new Fingerprinter()));
    layerHandlers.add(new RpmFingerprinter(new RpmPackageParser(), rpmDockerImage));
    layerHandlers.add(new DpkgFingerprinter(new DpkgParser()));
    layerHandlers.add(new ApkgFingerprinter(new ApkgParser()));
    layerHandlers.add(new PacmanFingerprinter(new PacmanPackageParser()));
    layerHandlers.add(new OwaspDependencyFingerprinter(new OwaspDependencyParser(owaspBuilder)));
    layerHandlers.addAll(customBuilder.getFingerprinters());
  }

  public void addFileHandler(LayerFileHandler handler) {
    layerHandlers.add(Objects.requireNonNull(handler));
  }

  public void addFileHashFingeprinter() {
    if (layerHandlers.stream().noneMatch(h -> h instanceof FileFingerprinter))
      layerHandlers.add(new FileFingerprinter());
  }

  public ImageId getId(File imageTar) throws IOException {

    try (TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(new FileInputStream(imageTar)))) {
      TarArchiveEntry entry = null;
      while ((entry = tarIn.getNextTarEntry()) != null) {
        String name = entry.getName();

        if (name.equals("manifest.json")) {

          List manifests = objectMapper.readValue(new NonClosingInputStream(tarIn), objectMapper.getTypeFactory().constructCollectionType(List.class, TarManifestJson.class));
          TarManifestJson manifest = manifests.get(0);

          return new ImageId(manifest.getConfig().replaceAll(".json", ""));
        }
      }
    }

    return null;
  }

  public void writeJson(Image image, File destination) throws IOException {
    ObjectMapper imageMapper = new ImageModelObjectMapper();
    imageMapper.writeValue(destination, image);
  }

  public Image analyze(File imageTar, String tempFilePath) throws IOException {

    File outputDirectory = Files.createTempDirectory(Paths.get(tempFilePath), FilenameUtils.removeExtension(imageTar.getName())).toFile();
    try {
      LOGGER.info("Extracting image to {}.", outputDirectory.getAbsolutePath());
      // extract the tar
      untar(imageTar, outputDirectory);
      LOGGER.info("Extracted image {}.", outputDirectory.getAbsolutePath());
      TarManifestJson manifest = parseTarManifest(imageTar.length(), new File(outputDirectory, "manifest.json"));
      Image image = analyze(
          outputDirectory,
          manifest.getImageId(),
          null,
          manifest,
          parseConfiguration(new File(outputDirectory, manifest.getConfig())),
          layerId -> new File(outputDirectory, layerId.getId() + "/layer.tar"));

      LOGGER.info("Completed analyzing image {}", image.getId());
      return image;
    } finally {
      deleteQuietly(outputDirectory);
    }
  }

  public  Image analyze(File outputDirectory, ImageId id, Digest digest, M manifest, Configuration configuration, LayerExtractor layerSupplier) throws IOException {

    try {
      // parse the image configuration
      List layerHistories = configuration.getHistory();

      // the ordered identifiers of non-empty layers (which are presumed to have file-system changes requiring a blob)
      List layers = manifest.getLayers();

      // the ordered identifiers of the blobs to retrieve for each non-empty layer
      List layerBlobIds = manifest.getLayerBlobIds();

      // TODO: not pulling out the repository in the digest reference
      Image image = new Image(id, ImageType.DOCKER, digest, manifest.getSize(), configuration.getCreated());

      int layerIndex = 0;
      int emptyLayerIdSuffix = 0;
      Layer previousLayer = null;
      for (HistoryJson layerHistory : layerHistories) {
        boolean empty = layerHistory.isEmpty();
        LayerId layerId = null;
        if (!empty)
          layerId = layers.get(layerIndex);
        else
          layerId = new LayerId(id.getString() + "_empty_" + emptyLayerIdSuffix++);

        LOGGER.debug("Processing layer {}.", layerId);

        Layer layer = new Layer(layerId);
        layer.setCommand(layerHistory.getCommand());
        layer.setComment(layerHistory.getComment());
        layer.setAuthor(layerHistory.getAuthor());
        layer.setEmpty(empty);

        // Images built with a tool called buildkit can have history entries with empty created date, so fall back to previous layer or epoch
        if (layerHistory.getCreated() != null)
          layer.setCreated(InstantParser.parse(layerHistory.getCreated()));
        else if (previousLayer != null && previousLayer.getCreated() != null)
          layer.setCreated(previousLayer.getCreated());
        else
          layer.setCreated(Instant.EPOCH);

        // if the layer is non-empty, process it
        if (!empty) {

          // locate the layer to process
          LayerId layerRetrievalId = layerBlobIds.get(layerIndex);
          File layerTar = layerSupplier.getLayer(layerRetrievalId);
          layer.setSize(layerTar.length());
          LOGGER.debug("Processing layer tar.");

          // extract layer
          processLayer(image, configuration, layer, layerTar);

          // attach additional layer information
          File layerInfoFile = new File(layerTar.getParentFile(), "json");
          if (layerInfoFile.exists()) {
            try {
              LayerJson layerInfo = parseLayerConfiguration(layerInfoFile);
              layer.setCreated(layerInfo.getCreated());
              layer.setParentId(layerInfo.getParentId());
            } catch (Exception exception) {
              LOGGER.warn("Failed to parse layer configuration json. Skipping setting of created data and parent identifier.", exception);
            }
          } else if (previousLayer != null) {
            layer.setParentId(previousLayer.getId());
          }

          layerIndex++;
          previousLayer = layer;
        } else if (previousLayer != null) {
          layer.setParentId(previousLayer.getId());
        }

        image.addLayer(layer);
      }

      // post-process image handlers
      for (ImageHandler handler : imageHandlers)
        handler.handle(image);

      // if the size on the image isn't known (e.g. from a manifest that does not support it), sum up layers as a last resort
      if (image.getSize() == null || image.getSize() <= 0L)
        image.setSize(image.getLayers().stream().mapToLong(Layer::getSize).sum());

      // if the created date on an image isn't known, use the last created date from the last layer
      if (image.getCreated() == null && !image.getLayers().isEmpty())
        image.setCreated(image.getLayers().get(image.getLayers().size() - 1).getCreated());

      return image;
    } catch (IOException exception) {
      LOGGER.error("Failed to analyze image.", exception);
      throw exception;
    }
  }

  public Manifest parseManifest(File file) throws JsonParseException, JsonMappingException, IOException {
    // TODO: polymorphic
    try (GZIPInputStream stream = new GZIPInputStream(new FileInputStream(file))) {
      return objectMapper.readValue(stream, Manifest.class);
    } catch (ZipException exception) {
      return objectMapper.readValue(file, Manifest.class);
    }
  }

  public Configuration parseConfiguration(File file) throws JsonParseException, JsonMappingException, IOException {
    return objectMapper.readValue(file, ConfigurationJsonV2.class);
  }

  public TarManifestJson parseTarManifest(long tarSize, File file) throws JsonParseException, JsonMappingException, IOException {
    List manifests = objectMapper.readValue(file, objectMapper.getTypeFactory().constructCollectionType(List.class, TarManifestJson.class));
    return manifests.get(0).setSize(tarSize);
  }

  public LayerJson parseLayerConfiguration(File file) throws JsonParseException, JsonMappingException, IOException {
    return objectMapper.readValue(file, LayerJson.class);
  }

  public void untar(File tar, File destination) throws FileNotFoundException, IOException {

    try (TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(new FileInputStream(tar)))) {
      TarArchiveEntry entry = null;
      while ((entry = tarIn.getNextTarEntry()) != null) {
        String name = entry.getName();
        File file = new File(destination, name);
        try {
          if (!file.toPath().toAbsolutePath().normalize().startsWith(destination.toPath().toAbsolutePath().normalize())) {
            LOGGER.debug(format("[Image: {}] Skipping extraction of {} due to directory traversal.", tar.getName(), name).getMessage());
            continue;
          }
        } catch (InvalidPathException exception) {
          LOGGER.debug(format("[Image: {}] Skipping extraction of {} due to invalid filename.", tar.getName(), name).getMessage());
          continue;
        }

        LOGGER.debug(arrayFormat("[Image: {}] Extracting {} {}.", new Object[] {tar.getName(), entry.isDirectory() ? "directory" : "file", file.getAbsolutePath()}).getMessage());

        long usableSpace = file.getParentFile().getFreeSpace();
        long objectSize = entry.getRealSize();
        if (objectSize * 3 > usableSpace) {
          throw new IOException("Insufficient disk space to extract resource. " + usableSpace + " available, " + objectSize * 3 + " required.");
        }

        try {
          if (entry.isDirectory()) {
            //noinspection ResultOfMethodCallIgnored
            file.mkdirs();
          } else if (!name.contains(WHITEOUT_AUFS_PREFIX)) {
            try (FileOutputStream outputStream = new FileOutputStream(file)) {
              IOUtils.copy(tarIn, outputStream);
            }
          } else {
            LOGGER.debug(format("[Image: {}] Skipping whiteout file {}.", tar.getName(), file.getAbsolutePath()).getMessage());
          }
        } catch (FileNotFoundException exception) {
          // can occur if there is case sensitivity collision, etc
          LOGGER.warn(format("[Image: {}] File failed to extract {}.", tar.getName(), file.getAbsolutePath()).getMessage());
        }
      }
    }
  }

  private void processLayer(Image image, Configuration configuration, Layer layer, File tar) throws FileNotFoundException, IOException {
    // skip invalid files (too small to be a tar or gzipped tar, and symlinks are duplicate layers)
    if (tar.length() < 100)
      return;

    try (TarArchiveInputStream tarIn = new TarArchiveInputStream(new GzipCompressorInputStream(new BufferedInputStream(new FileInputStream(tar), 65536)))) {
      processLayerTar(image, configuration, layer, tar, tarIn);
    } catch (IOException exception) {
      if (exception.getMessage().equals("Input is not in the .gz format") || exception.getCause() instanceof ZipException) {
        try (TarArchiveInputStream tarIn = new TarArchiveInputStream(new BufferedInputStream(new FileInputStream(tar), 65536))) {
          processLayerTar(image, configuration, layer, tar, tarIn);
        }
      }
    }
  }

  private void processLayerTar(Image image, Configuration configuration, Layer layer, File tar, TarArchiveInputStream tarIn) throws IOException {
    TarArchiveEntry entry = null;
    while ((entry = tarIn.getNextTarEntry()) != null) {
      String name = entry.getName();
      if (LOGGER.isTraceEnabled())
        LOGGER.trace(arrayFormat("[Image: {}] Processing {} {}.", new Object[] {tar.getName(), entry.isDirectory() ? "directory" : "file", name}).getMessage());

      if (entry.getSize() > MAX_EXTRACT_FILE_SIZE) {
        LOGGER.debug("Skipping file {} with size {} bytes because exceeds max of {} bytes.", name, entry.getSize(), MAX_EXTRACT_FILE_SIZE);
        continue;
      }

      LOGGER.debug(format("Extracting large file {} ({} bytes)", entry.getName(), entry.getSize()).getMessage());
      File bigFile = new File(tar.getParentFile(), entry.getName());
      try {
        if (!bigFile.toPath().toAbsolutePath().normalize().startsWith(bigFile.getParentFile().toPath().toAbsolutePath().normalize())) {
          LOGGER.debug(format("[Image: {}] Skipping extraction of {} due to directory traversal.", tar.getName(), name).getMessage());
          continue;
        }
        if (bigFile.isDirectory()) {
          LOGGER.debug(format("[Image: {}] Skipping extraction of {} because it is a directory.", tar.getName(), name).getMessage());
          continue;
        }

        bigFile.getParentFile().mkdirs();
        try (FileOutputStream outputStream = new FileOutputStream(bigFile)) {
          IOUtils.copy(tarIn, outputStream);
        }

        try (BufferedInputStream inputStream = new BufferedInputStream(new FileInputStream(bigFile), 65536)) {
          inputStream.mark((int) (bigFile.getTotalSpace() + 1));
          for (LayerFileHandler handler : layerHandlers) {
            handler.handle(name, entry, inputStream, image, configuration, new LayerPath(tar.getParent(), layer));
            inputStream.reset();
          }
        }
      } catch (IOException ioe) {
        LOGGER.info("Failed to handle file {}", entry.getName(), ioe);
      } finally {
        bigFile.delete();
      }
    }
  }

  private static class ConvertibleOutputStream extends ByteArrayOutputStream {
    public ConvertibleOutputStream(int size) {
      super(size);
    }

    // Creates InputStream without actually copying the buffer and using up memory for that.
    public InputStream toInputStream() {
      return new ByteArrayInputStream(buf, 0, count);
    }
  }

  private static class NonClosingInputStream extends FilterInputStream {
    NonClosingInputStream(InputStream in) {
      super(in);
    }

    @Override
    public void close() throws IOException {
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy