All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.gerrit.server.patch.gitfilediff.GitFileDiffCacheImpl Maven / Gradle / Ivy

The newest version!
// Copyright (C) 2020 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.gerrit.server.patch.gitfilediff;

import static java.util.function.Function.identity;

import com.google.auto.value.AutoValue;
import com.google.common.base.Throwables;
import com.google.common.cache.CacheLoader;
import com.google.common.cache.LoadingCache;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Iterables;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.MultimapBuilder;
import com.google.common.collect.Multimaps;
import com.google.common.collect.Streams;
import com.google.common.flogger.FluentLogger;
import com.google.gerrit.entities.Patch;
import com.google.gerrit.entities.Project;
import com.google.gerrit.extensions.client.DiffPreferencesInfo.Whitespace;
import com.google.gerrit.metrics.Counter0;
import com.google.gerrit.metrics.Description;
import com.google.gerrit.metrics.MetricMaker;
import com.google.gerrit.server.cache.CacheModule;
import com.google.gerrit.server.config.ConfigUtil;
import com.google.gerrit.server.config.GerritServerConfig;
import com.google.gerrit.server.git.GitRepositoryManager;
import com.google.gerrit.server.logging.Metadata;
import com.google.gerrit.server.logging.TraceContext;
import com.google.gerrit.server.logging.TraceContext.TraceTimer;
import com.google.gerrit.server.patch.DiffExecutor;
import com.google.gerrit.server.patch.DiffNotAvailableException;
import com.google.gerrit.server.util.git.CloseablePool;
import com.google.inject.Inject;
import com.google.inject.Module;
import com.google.inject.Singleton;
import com.google.inject.name.Named;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.stream.Collectors;
import org.eclipse.jgit.diff.DiffEntry;
import org.eclipse.jgit.diff.DiffEntry.ChangeType;
import org.eclipse.jgit.diff.DiffFormatter;
import org.eclipse.jgit.diff.HistogramDiff;
import org.eclipse.jgit.diff.RawTextComparator;
import org.eclipse.jgit.errors.MissingObjectException;
import org.eclipse.jgit.lib.AbbreviatedObjectId;
import org.eclipse.jgit.lib.Config;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.Repository;
import org.eclipse.jgit.patch.FileHeader;
import org.eclipse.jgit.util.io.DisabledOutputStream;

/** Implementation of the {@link GitFileDiffCache} */
@Singleton
public class GitFileDiffCacheImpl implements GitFileDiffCache {
  private static final FluentLogger logger = FluentLogger.forEnclosingClass();
  private static final String GIT_DIFF = "git_file_diff";

  public static Module module() {
    return new CacheModule() {
      @Override
      protected void configure() {
        bind(GitFileDiffCache.class).to(GitFileDiffCacheImpl.class);
        persist(GIT_DIFF, GitFileDiffCacheKey.class, GitFileDiff.class)
            .maximumWeight(10 << 20)
            .weigher(GitFileDiffWeigher.class)
            .keySerializer(GitFileDiffCacheKey.Serializer.INSTANCE)
            .valueSerializer(GitFileDiff.Serializer.INSTANCE)
            .version(3)
            .loader(GitFileDiffCacheImpl.Loader.class);
      }
    };
  }

  @Singleton
  static class Metrics {
    final Counter0 timeouts;

    @Inject
    Metrics(MetricMaker metricMaker) {
      timeouts =
          metricMaker.newCounter(
              "caches/diff/timeouts",
              new Description(
                      "Total number of git file diff computations that resulted in timeouts.")
                  .setRate()
                  .setUnit("count"));
    }
  }

  /** Enum for the supported diff algorithms for the file diff computation. */
  public enum DiffAlgorithm {
    HISTOGRAM_WITH_FALLBACK_MYERS,
    HISTOGRAM_NO_FALLBACK
  }

  /** Creates a new JGit diff algorithm instance using the Gerrit's {@link DiffAlgorithm} enum. */
  public static class DiffAlgorithmFactory {
    public static org.eclipse.jgit.diff.DiffAlgorithm create(DiffAlgorithm diffAlgorithm) {
      HistogramDiff result = new HistogramDiff();
      if (diffAlgorithm.equals(DiffAlgorithm.HISTOGRAM_NO_FALLBACK)) {
        result.setFallbackAlgorithm(null);
      }
      return result;
    }
  }

  private final LoadingCache cache;

  @Inject
  public GitFileDiffCacheImpl(
      @Named(GIT_DIFF) LoadingCache cache) {
    this.cache = cache;
  }

  @Override
  public GitFileDiff get(GitFileDiffCacheKey key) throws DiffNotAvailableException {
    try {
      return cache.get(key);
    } catch (ExecutionException e) {
      throw new DiffNotAvailableException(e);
    }
  }

  @Override
  public ImmutableMap getAll(Iterable keys)
      throws DiffNotAvailableException {
    try {
      return cache.getAll(keys);
    } catch (ExecutionException e) {
      throw new DiffNotAvailableException(e);
    }
  }

  static class Loader extends CacheLoader {
    private final GitRepositoryManager repoManager;
    private final ExecutorService diffExecutor;
    private final long timeoutMillis;
    private final Metrics metrics;

    @Inject
    public Loader(
        @GerritServerConfig Config cfg,
        GitRepositoryManager repoManager,
        @DiffExecutor ExecutorService de,
        Metrics metrics) {
      this.repoManager = repoManager;
      this.diffExecutor = de;
      this.timeoutMillis =
          ConfigUtil.getTimeUnit(
              cfg,
              "cache",
              GIT_DIFF,
              "timeout",
              TimeUnit.MILLISECONDS.convert(5, TimeUnit.SECONDS),
              TimeUnit.MILLISECONDS);
      this.metrics = metrics;
    }

    @Override
    public GitFileDiff load(GitFileDiffCacheKey key) throws IOException, DiffNotAvailableException {
      try (TraceTimer timer =
          TraceContext.newTimer(
              "Loading a single key from git file diff cache",
              Metadata.builder()
                  .diffAlgorithm(key.diffAlgorithm().name())
                  .filePath(key.newFilePath())
                  .build())) {
        return loadAll(ImmutableList.of(key)).get(key);
      }
    }

    @Override
    public Map loadAll(
        Iterable keys)
        throws IOException, DiffNotAvailableException {
      try (TraceTimer timer =
          TraceContext.newTimer("Loading multiple keys from git file diff cache")) {
        ImmutableMap.Builder result =
            ImmutableMap.builderWithExpectedSize(Iterables.size(keys));

        Map> byProject =
            Streams.stream(keys)
                .distinct()
                .collect(Collectors.groupingBy(GitFileDiffCacheKey::project));

        for (Map.Entry> entry : byProject.entrySet()) {
          try (Repository repo = repoManager.openRepository(entry.getKey())) {

            // Grouping keys by diff options because each group of keys will be processed with a
            // separate call to JGit using the DiffFormatter object.
            Map> optionsGroups =
                entry.getValue().stream().collect(Collectors.groupingBy(DiffOptions::fromKey));

            for (Map.Entry> group :
                optionsGroups.entrySet()) {
              result.putAll(loadAllImpl(repo, group.getKey(), group.getValue()));
            }
          }
        }
        return result.build();
      }
    }

    /**
     * Loads the git file diffs for all keys of the same repository, and having the same diff {@code
     * options}.
     *
     * @return The git file diffs for all input keys.
     */
    private ImmutableMap loadAllImpl(
        Repository repo, DiffOptions options, List keys)
        throws IOException, DiffNotAvailableException {
      ImmutableMap.Builder result =
          ImmutableMap.builderWithExpectedSize(keys.size());
      Map filePaths =
          keys.stream().collect(Collectors.toMap(identity(), GitFileDiffCacheKey::newFilePath));
      try (CloseablePool diffPool =
          new CloseablePool<>(() -> createDiffFormatter(options, repo))) {
        ListMultimap diffEntries;
        try (CloseablePool.Handle formatter = diffPool.get()) {
          diffEntries = loadDiffEntries(formatter.get(), options, filePaths.values());
        }
        for (GitFileDiffCacheKey key : filePaths.keySet()) {
          String newFilePath = filePaths.get(key);
          if (!diffEntries.containsKey(newFilePath)) {
            result.put(
                key,
                GitFileDiff.empty(
                    AbbreviatedObjectId.fromObjectId(key.oldTree()),
                    AbbreviatedObjectId.fromObjectId(key.newTree()),
                    newFilePath));
            continue;
          }
          List entries = diffEntries.get(newFilePath);
          if (entries.size() == 1) {
            result.put(key, createGitFileDiff(entries.get(0), key, diffPool));
          } else {
            // Handle when JGit returns two {Added, Deleted} entries for the same file. This
            // happens, for example, when a file's mode is changed between patchsets (e.g.
            // converting a symlink to a regular file). We combine both diff entries into a single
            // entry with {changeType = Rewrite}.
            List gitDiffs = new ArrayList<>();
            for (DiffEntry entry : diffEntries.get(newFilePath)) {
              gitDiffs.add(createGitFileDiff(entry, key, diffPool));
            }
            result.put(key, createRewriteEntry(gitDiffs));
          }
        }
        return result.build();
      }
    }

    private static ListMultimap loadDiffEntries(
        DiffFormatter diffFormatter, DiffOptions diffOptions, Collection filePaths)
        throws IOException {
      ImmutableSet filePathsSet = ImmutableSet.copyOf(filePaths);
      List diffEntries =
          diffFormatter.scan(
              diffOptions.oldTree().equals(ObjectId.zeroId()) ? null : diffOptions.oldTree(),
              diffOptions.newTree());

      return diffEntries.stream()
          .filter(d -> filePathsSet.contains(extractPath(d)))
          .collect(
              Multimaps.toMultimap(
                  Loader::extractPath,
                  identity(),
                  MultimapBuilder.treeKeys().arrayListValues()::build));
    }

    private static DiffFormatter createDiffFormatter(DiffOptions diffOptions, Repository repo) {
      try (DiffFormatter diffFormatter = new DiffFormatter(DisabledOutputStream.INSTANCE)) {
        diffFormatter.setRepository(repo);
        RawTextComparator cmp = comparatorFor(diffOptions.whitespace());
        diffFormatter.setDiffComparator(cmp);
        if (diffOptions.renameScore() != -1) {
          diffFormatter.setDetectRenames(true);
          diffFormatter.getRenameDetector().setRenameScore(diffOptions.renameScore());
        }
        diffFormatter.setDiffAlgorithm(DiffAlgorithmFactory.create(diffOptions.diffAlgorithm()));
        diffFormatter.getRenameDetector().setSkipContentRenamesForBinaryFiles(true);
        return diffFormatter;
      }
    }

    private static RawTextComparator comparatorFor(Whitespace ws) {
      switch (ws) {
        case IGNORE_ALL:
          return RawTextComparator.WS_IGNORE_ALL;

        case IGNORE_TRAILING:
          return RawTextComparator.WS_IGNORE_TRAILING;

        case IGNORE_LEADING_AND_TRAILING:
          return RawTextComparator.WS_IGNORE_CHANGE;

        case IGNORE_NONE:
        default:
          return RawTextComparator.DEFAULT;
      }
    }

    /**
     * Create a {@link GitFileDiff}. The result depends on the value of the {@code useTimeout} field
     * of the {@code key} parameter.
     *
     * 
    *
  • If {@code useTimeout} is true, the computation is performed with timeout enforcement * (identified by {@link #timeoutMillis}). If the timeout is exceeded, this method returns * a negative result using {@link GitFileDiff#createNegative(AbbreviatedObjectId, * AbbreviatedObjectId, String)}. *
  • If {@code useTimeouts} is false, the computation is performed synchronously without * timeout enforcement. */ private GitFileDiff createGitFileDiff( DiffEntry diffEntry, GitFileDiffCacheKey key, CloseablePool diffPool) throws IOException { if (!key.useTimeout()) { try (CloseablePool.Handle formatter = diffPool.get()) { return GitFileDiff.create(diffEntry, getFileHeader(formatter, diffEntry)); } } // This submits the DiffFormatter to a different thread. The CloseablePool and our usage of it // ensures that any DiffFormatter instance and the ObjectReader it references internally is // only used by a single thread concurrently. However, ObjectReaders have a reference to // Repository which might not be thread safe (FileRepository is, DfsRepository might not). // This could lead to a race condition. Future fileDiffFuture = diffExecutor.submit( () -> { try (CloseablePool.Handle formatter = diffPool.get()) { return GitFileDiff.create(diffEntry, getFileHeader(formatter, diffEntry)); } }); try { // We employ the timeout because of a bug in Myers diff in JGit. See // https://issues.gerritcodereview.com/issues/40000618 for more details. The bug may happen // if the algorithm used in diffs is HISTOGRAM_WITH_FALLBACK_MYERS. return fileDiffFuture.get(timeoutMillis, TimeUnit.MILLISECONDS); } catch (InterruptedException | TimeoutException e) { // If timeout happens, create a negative result metrics.timeouts.increment(); return GitFileDiff.createNegative( AbbreviatedObjectId.fromObjectId(key.oldTree()), AbbreviatedObjectId.fromObjectId(key.newTree()), key.newFilePath()); } catch (ExecutionException e) { // If there was an error computing the result, carry it // up to the caller so the cache knows this key is invalid. Throwables.throwIfInstanceOf(e.getCause(), IOException.class); throw new IOException(e.getMessage(), e.getCause()); } } /** * Extract the file path from a {@link DiffEntry}. Returns the old file path if the entry * corresponds to a deleted file, otherwise it returns the new file path. */ private static String extractPath(DiffEntry diffEntry) { return diffEntry.getChangeType().equals(ChangeType.DELETE) ? diffEntry.getOldPath() : diffEntry.getNewPath(); } private FileHeader getFileHeader( CloseablePool.Handle formatter, DiffEntry diffEntry) throws IOException { logger.atFine().log("getting file header for %s", formatDiffEntryForLogging(diffEntry)); try { return formatter.get().toFileHeader(diffEntry); } catch (MissingObjectException e) { throw new IOException( String.format("Failed to get file header for %s", formatDiffEntryForLogging(diffEntry)), e); } } private String formatDiffEntryForLogging(DiffEntry diffEntry) { StringBuilder buf = new StringBuilder(); buf.append("DiffEntry["); buf.append(diffEntry.getChangeType()); buf.append(" "); switch (diffEntry.getChangeType()) { case ADD: buf.append(String.format("%s (%s)", diffEntry.getNewPath(), diffEntry.getNewId().name())); break; case COPY: case RENAME: buf.append( String.format( "%s (%s) -> %s (%s)", diffEntry.getOldPath(), diffEntry.getOldId().name(), diffEntry.getNewPath(), diffEntry.getNewId().name())); break; case DELETE: case MODIFY: buf.append(String.format("%s (%s)", diffEntry.getOldPath(), diffEntry.getOldId().name())); break; } buf.append("]"); return buf.toString(); } } /** * Create a single {@link GitFileDiff} with {@link com.google.gerrit.entities.Patch.ChangeType} * equals {@link com.google.gerrit.entities.Patch.ChangeType#REWRITE}, assuming the input list * contains two entries. * * @param gitDiffs input list of exactly two {@link GitFileDiff} for same file path. * @return a single {@link GitFileDiff} with change type equals {@link * com.google.gerrit.entities.Patch.ChangeType#REWRITE}. * @throws DiffNotAvailableException if input list contains git diffs with change types other than * {ADDED, DELETED}. This is a JGit error. */ private static GitFileDiff createRewriteEntry(List gitDiffs) throws DiffNotAvailableException { if (gitDiffs.size() != 2) { throw new DiffNotAvailableException( String.format( "JGit error: found %d dff entries for same file path %s", gitDiffs.size(), gitDiffs.get(0).getDefaultPath())); } // Convert the first entry (prioritized according to change type enum order) to REWRITE gitDiffs.sort(Comparator.comparingInt(o -> o.changeType().ordinal())); return gitDiffs.get(0).toBuilder().changeType(Patch.ChangeType.REWRITE).build(); } /** An entity representing the options affecting the diff computation. */ @AutoValue abstract static class DiffOptions { /** Convert a {@link GitFileDiffCacheKey} input to a {@link DiffOptions}. */ static DiffOptions fromKey(GitFileDiffCacheKey key) { return create( key.oldTree(), key.newTree(), key.renameScore(), key.whitespace(), key.diffAlgorithm()); } private static DiffOptions create( ObjectId oldTree, ObjectId newTree, int renameScore, Whitespace whitespace, DiffAlgorithm diffAlgorithm) { return new AutoValue_GitFileDiffCacheImpl_DiffOptions( oldTree, newTree, renameScore, whitespace, diffAlgorithm); } abstract ObjectId oldTree(); abstract ObjectId newTree(); abstract int renameScore(); abstract Whitespace whitespace(); abstract DiffAlgorithm diffAlgorithm(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy