com.google.gerrit.server.patch.GitPositionTransformer Maven / Gradle / Ivy
// Copyright (C) 2020 The Android Open Source Project
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gerrit.server.patch;
import static com.google.common.collect.Comparators.emptiesFirst;
import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.ImmutableSet.toImmutableSet;
import static java.util.Comparator.comparing;
import static java.util.stream.Collectors.collectingAndThen;
import static java.util.stream.Collectors.groupingBy;
import com.google.auto.value.AutoValue;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Sets;
import com.google.common.collect.Streams;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Transformer of {@link Position}s in one Git tree to {@link Position}s in another Git tree given
* the {@link Mapping}s between the trees.
*
* The base idea is that a {@link Position} in the source tree can be translated/mapped to a
* corresponding {@link Position} in the target tree when we know how the target tree changed
* compared to the source tree. As long as {@link Position}s are only defined via file path and line
* range, we only need to know which file path in the source tree corresponds to which file path in
* the target tree and how the lines within that file changed from the source to the target tree.
*
*
The algorithm is roughly:
*
*
* - Go over all positions and replace the file path for each of them with the corresponding one
* in the target tree. If a file path maps to two file paths in the target tree (copied file),
* duplicate the position entry and use each of the new file paths with it. If a file path
* maps to no file in the target tree (deleted file), apply the specified conflict strategy
* (e.g. drop position completely or map to next best guess).
*
- Per file path, go through the file from top to bottom and keep track of how the range
* mappings for that file shift the lines. Derive the shifted amount by comparing the number
* of lines between source and target in the range mapping. While going through the file,
* shift each encountered position by the currently tracked amount. If a position overlaps
* with the lines of a range mapping, apply the specified conflict strategy (e.g. drop
* position completely or map to next best guess).
*
*/
public class GitPositionTransformer {
private final PositionConflictStrategy positionConflictStrategy;
/**
* Creates a new {@code GitPositionTransformer} which uses the specified strategy for conflicts.
*/
public GitPositionTransformer(PositionConflictStrategy positionConflictStrategy) {
this.positionConflictStrategy = positionConflictStrategy;
}
/**
* Transforms the {@link Position}s of the specified entities as indicated via the {@link
* Mapping}s.
*
* This is typically used to transform the {@link Position}s in one Git tree (source) to the
* corresponding {@link Position}s in another Git tree (target). The {@link Mapping}s need to
* indicate all relevant changes between the source and target tree. {@link Mapping}s for files
* not referenced by the given {@link Position}s need not be specified. They can be included,
* though, as they aren't harmful.
*
* @param entities the entities whose {@link Position} should be mapped to the target tree
* @param mappings the mappings describing all relevant changes between the source and the target
* tree
* @param an entity which has a {@link Position}
* @return a list of entities with transformed positions. There are no guarantees about the order
* of the returned elements.
*/
public ImmutableList> transform(
Collection> entities, Set mappings) {
// Update the file paths first as copied files might exist. For copied files, this operation
// will duplicate the PositionedEntity instances of the original file.
List> filePathUpdatedEntities = updateFilePaths(entities, mappings);
return shiftRanges(filePathUpdatedEntities, mappings);
}
private ImmutableList> updateFilePaths(
Collection> entities, Set mappings) {
Map> newFilesPerOldFile = groupNewFilesByOldFiles(mappings);
return entities.stream()
.flatMap(entity -> mapToNewFileIfChanged(newFilesPerOldFile, entity))
.collect(toImmutableList());
}
private static Map> groupNewFilesByOldFiles(Set mappings) {
return mappings.stream()
.map(Mapping::file)
// Ignore file additions (irrelevant for mappings).
.filter(mapping -> mapping.oldPath().isPresent())
.collect(
groupingBy(
mapping -> mapping.oldPath().orElse(""),
collectingAndThen(
Collectors.mapping(FileMapping::newPath, toImmutableSet()),
// File deletion (empty Optional) -> empty set.
GitPositionTransformer::unwrapOptionals)));
}
private static ImmutableSet unwrapOptionals(ImmutableSet> optionals) {
return optionals.stream().flatMap(Streams::stream).collect(toImmutableSet());
}
private Stream> mapToNewFileIfChanged(
Map> newFilesPerOldFile, PositionedEntity entity) {
if (!entity.position().filePath().isPresent()) {
// No mapping of file paths necessary if no file path is set. -> Keep existing entry.
return Stream.of(entity);
}
String oldFilePath = entity.position().filePath().get();
if (!newFilesPerOldFile.containsKey(oldFilePath)) {
// Unchanged files don't have a mapping. -> Keep existing entries.
return Stream.of(entity);
}
Set newFiles = newFilesPerOldFile.get(oldFilePath);
if (newFiles.isEmpty()) {
// File was deleted.
return positionConflictStrategy.getOnFileConflict(entity.position()).map(entity::withPosition)
.stream();
}
return newFiles.stream().map(entity::withFilePath);
}
private ImmutableList> shiftRanges(
List> filePathUpdatedEntities, Set mappings) {
Map> mappingsPerNewFilePath =
groupRangeMappingsByNewFilePath(mappings);
return Stream.concat(
// Keep positions without a file.
filePathUpdatedEntities.stream()
.filter(entity -> !entity.position().filePath().isPresent()),
// Shift ranges per file.
groupByFilePath(filePathUpdatedEntities).entrySet().stream()
.flatMap(
newFilePathAndEntities ->
shiftRangesInOneFileIfChanged(
mappingsPerNewFilePath,
newFilePathAndEntities.getKey(),
newFilePathAndEntities.getValue())
.stream()))
.collect(toImmutableList());
}
private static Map> groupRangeMappingsByNewFilePath(
Set mappings) {
return mappings.stream()
// Ignore range mappings of deleted files.
.filter(mapping -> mapping.file().newPath().isPresent())
.collect(
groupingBy(
mapping -> mapping.file().newPath().orElse(""),
collectingAndThen(
Collectors.>reducing(
new HashSet<>(), Mapping::ranges, Sets::union),
ImmutableSet::copyOf)));
}
private static Map>> groupByFilePath(
List> fileUpdatedEntities) {
return fileUpdatedEntities.stream()
.filter(entity -> entity.position().filePath().isPresent())
.collect(groupingBy(entity -> entity.position().filePath().orElse(""), toImmutableList()));
}
private ImmutableList> shiftRangesInOneFileIfChanged(
Map> mappingsPerNewFilePath,
String newFilePath,
ImmutableList> sameFileEntities) {
ImmutableSet sameFileRangeMappings =
mappingsPerNewFilePath.getOrDefault(newFilePath, ImmutableSet.of());
if (sameFileRangeMappings.isEmpty()) {
// Unchanged files and pure renames/copies don't have range mappings. -> Keep existing
// entries.
return sameFileEntities;
}
return shiftRangesInOneFile(sameFileEntities, sameFileRangeMappings);
}
private ImmutableList> shiftRangesInOneFile(
List> sameFileEntities, Set sameFileRangeMappings) {
ImmutableList> sortedEntities = sortByStartEnd(sameFileEntities);
ImmutableList sortedMappings = sortByOldStartEnd(sameFileRangeMappings);
int shiftedAmount = 0;
int mappingIndex = 0;
int entityIndex = 0;
ImmutableList.Builder> resultingEntities =
ImmutableList.builderWithExpectedSize(sortedEntities.size());
while (entityIndex < sortedEntities.size() && mappingIndex < sortedMappings.size()) {
PositionedEntity entity = sortedEntities.get(entityIndex);
if (entity.position().lineRange().isPresent()) {
Range range = entity.position().lineRange().get();
RangeMapping mapping = sortedMappings.get(mappingIndex);
if (mapping.oldLineRange().end() <= range.start()) {
shiftedAmount = mapping.newLineRange().end() - mapping.oldLineRange().end();
mappingIndex++;
} else if (range.end() <= mapping.oldLineRange().start()) {
resultingEntities.add(entity.shiftPositionBy(shiftedAmount));
entityIndex++;
} else {
positionConflictStrategy
.getOnRangeConflict(entity.position())
.map(entity::withPosition)
.ifPresent(resultingEntities::add);
entityIndex++;
}
} else {
// No range -> no need to shift.
resultingEntities.add(entity);
entityIndex++;
}
}
for (int i = entityIndex; i < sortedEntities.size(); i++) {
resultingEntities.add(sortedEntities.get(i).shiftPositionBy(shiftedAmount));
}
return resultingEntities.build();
}
private static ImmutableList> sortByStartEnd(
List> entities) {
return entities.stream()
.sorted(
comparing(
entity -> entity.position().lineRange(),
emptiesFirst(comparing(Range::start).thenComparing(Range::end))))
.collect(toImmutableList());
}
private static ImmutableList sortByOldStartEnd(Set mappings) {
return mappings.stream()
.sorted(
comparing(
RangeMapping::oldLineRange, comparing(Range::start).thenComparing(Range::end)))
.collect(toImmutableList());
}
/**
* A mapping from a {@link Position} in one Git commit/tree (source) to a {@link Position} in
* another Git commit/tree (target).
*/
@AutoValue
public abstract static class Mapping {
/** A mapping describing how the attributes of one file are mapped from source to target. */
public abstract FileMapping file();
/**
* Mappings describing how line ranges within the file indicated by {@link #file()} are mapped
* from source to target.
*/
public abstract ImmutableSet ranges();
public static Mapping create(FileMapping fileMapping, Iterable rangeMappings) {
return new AutoValue_GitPositionTransformer_Mapping(
fileMapping, ImmutableSet.copyOf(rangeMappings));
}
}
/**
* A mapping of attributes from a file in one Git tree (source) to a file in another Git tree
* (target).
*
* At the moment, only the file path is considered. Other attributes like file mode would be
* imaginable too but are currently not supported.
*/
@AutoValue
public abstract static class FileMapping {
/** File path in the source tree. For file additions, this is an empty {@link Optional}. */
public abstract Optional oldPath();
/**
* File path in the target tree. Can be the same as {@link #oldPath()} if unchanged. For file
* deletions, this is an empty {@link Optional}.
*/
public abstract Optional newPath();
/**
* Creates a {@link FileMapping} for a file addition.
*
* In the context of {@link GitPositionTransformer}, file additions are irrelevant as no
* given position in the source tree can refer to such a new file in the target tree. We still
* provide this factory method so that code outside of {@link GitPositionTransformer} doesn't
* have to care about such details and can simply create {@link FileMapping}s for any
* modifications between the trees.
*/
public static FileMapping forAddedFile(String filePath) {
return new AutoValue_GitPositionTransformer_FileMapping(
Optional.empty(), Optional.of(filePath));
}
/** Creates a {@link FileMapping} for a file deletion. */
public static FileMapping forDeletedFile(String filePath) {
return new AutoValue_GitPositionTransformer_FileMapping(
Optional.of(filePath), Optional.empty());
}
/** Creates a {@link FileMapping} for a file modification. */
public static FileMapping forModifiedFile(String filePath) {
return new AutoValue_GitPositionTransformer_FileMapping(
Optional.of(filePath), Optional.of(filePath));
}
/** Creates a {@link FileMapping} for a file renaming. */
public static FileMapping forRenamedFile(String oldPath, String newPath) {
return new AutoValue_GitPositionTransformer_FileMapping(
Optional.of(oldPath), Optional.of(newPath));
}
/** Creates a {@link FileMapping} using the old and new paths. */
public static FileMapping forFile(Optional oldPath, Optional newPath) {
return new AutoValue_GitPositionTransformer_FileMapping(oldPath, newPath);
}
}
/**
* A mapping of a line range in one Git tree (source) to the corresponding line range in another
* Git tree (target).
*/
@AutoValue
public abstract static class RangeMapping {
/** Range in the source tree. */
public abstract Range oldLineRange();
/** Range in the target tree. */
public abstract Range newLineRange();
/**
* Creates a new {@code RangeMapping}.
*
* @param oldRange see {@link #oldLineRange()}
* @param newRange see {@link #newLineRange()}
*/
public static RangeMapping create(Range oldRange, Range newRange) {
return new AutoValue_GitPositionTransformer_RangeMapping(oldRange, newRange);
}
}
/**
* A position within the tree of a Git commit.
*
* The term 'position' is our own invention. The underlying idea is that a Gerrit comment is at
* a specific position within the commit of a patchset. That position is defined by the attributes
* defined in this class.
*
*
The same thinking can be applied to diff hunks (= JGit edits). Each diff hunk maps a
* position in one commit (e.g. in the parent of the patchset) to a position in another commit
* (e.g. in the commit of the patchset).
*
*
We only refer to lines and not character offsets within the lines here as Git only works
* with line precision. In theory, we could do better in Gerrit as we also have intraline diffs.
* Incorporating those requires careful considerations, though.
*/
@AutoValue
public abstract static class Position {
/** Absolute file path. */
public abstract Optional filePath();
/**
* Affected lines. An empty {@link Optional} indicates that this position does not refer to any
* specific lines (e.g. used for a file comment).
*/
public abstract Optional lineRange();
/**
* Creates a copy of this {@code Position} whose range is shifted by the indicated amount.
*
* Note: There's no guarantee that this method returns a new instance.
*
* @param amount number of lines to shift. Negative values mean moving the range up, positive
* values mean moving the range down.
* @return a {@code Position} instance with the updated range
*/
public Position shiftBy(int amount) {
return lineRange()
.map(range -> toBuilder().lineRange(range.shiftBy(amount)).build())
.orElse(this);
}
/**
* Creates a copy of this {@code Position} which doesn't refer to any specific lines.
*
*
Note: There's no guarantee that this method returns a new instance.
*
* @return a {@code Position} instance without a line range
*/
public Position withoutLineRange() {
return toBuilder().lineRange(Optional.empty()).build();
}
/**
* Creates a copy of this {@code Position} whose file path is adjusted to the indicated value.
*
*
Note: There's no guarantee that this method returns a new instance.
*
* @param filePath the new file path to use
* @return a {@code Position} instance with the indicated file path
*/
public Position withFilePath(String filePath) {
return toBuilder().filePath(filePath).build();
}
abstract Builder toBuilder();
public static Builder builder() {
return new AutoValue_GitPositionTransformer_Position.Builder();
}
/** Builder of a {@link Position}. */
@AutoValue.Builder
public abstract static class Builder {
/** See {@link #filePath()}. */
public abstract Builder filePath(String filePath);
/** See {@link #lineRange()}. */
public abstract Builder lineRange(Range lineRange);
/** See {@link #lineRange()}. */
public abstract Builder lineRange(Optional lineRange);
public abstract Position build();
}
}
/** A range. In the context of {@link GitPositionTransformer}, this is a line range. */
@AutoValue
public abstract static class Range {
/** Start of the range. (inclusive) */
public abstract int start();
/** End of the range. (exclusive) */
public abstract int end();
/**
* Creates a copy of this {@code Range} which is shifted by the indicated amount. A shift
* equally applies to both {@link #start()} end {@link #end()}.
*
* Note: There's no guarantee that this method returns a new instance.
*
* @param amount amount to shift. Negative values mean moving the range up, positive values mean
* moving the range down.
* @return a {@code Range} instance with updated start/end
*/
public Range shiftBy(int amount) {
return create(start() + amount, end() + amount);
}
public static Range create(int start, int end) {
return new AutoValue_GitPositionTransformer_Range(start, end);
}
}
/**
* Wrapper around an instance of {@code T} which annotates it with a {@link Position}. Methods
* such as {@link #shiftPositionBy(int)} and {@link #withFilePath(String)} allow to update the
* associated {@link Position}. Afterwards, use {@link #getEntityAtUpdatedPosition()} to get an
* updated version of the {@code T} instance.
*
* @param an object/entity type which has a {@link Position}
*/
public static class PositionedEntity {
private final T entity;
private final Position position;
private final BiFunction updatedEntityCreator;
/**
* Creates a new {@code PositionedEntity}.
*
* @param entity an instance which should be annotated with a {@link Position}
* @param positionExtractor a function describing how a {@link Position} can be derived from the
* given entity
* @param updatedEntityCreator a function to create a new entity of type {@code T} from an
* existing entity and a given {@link Position}. This must return a new instance of type
* {@code T}! The existing instance must not be modified!
* @param an object/entity type which has a {@link Position}
*/
public static PositionedEntity create(
T entity,
Function positionExtractor,
BiFunction updatedEntityCreator) {
Position position = positionExtractor.apply(entity);
return new PositionedEntity<>(entity, position, updatedEntityCreator);
}
private PositionedEntity(
T entity, Position position, BiFunction updatedEntityCreator) {
this.entity = entity;
this.position = position;
this.updatedEntityCreator = updatedEntityCreator;
}
/**
* Returns the original underlying entity.
*
* @return the original instance of {@code T}
*/
public T getEntity() {
return entity;
}
/**
* Returns an updated version of the entity to which the internally stored {@link Position} was
* written back to.
*
* @return an updated instance of {@code T}
*/
public T getEntityAtUpdatedPosition() {
return updatedEntityCreator.apply(entity, position);
}
Position position() {
return position;
}
/**
* Shifts the tracked {@link Position} by the specified amount.
*
* @param amount number of lines to shift. Negative values mean moving the range up, positive
* values mean moving the range down.
* @return a {@code PositionedEntity} with updated {@link Position}
*/
public PositionedEntity shiftPositionBy(int amount) {
return new PositionedEntity<>(entity, position.shiftBy(amount), updatedEntityCreator);
}
/**
* Updates the file path of the tracked {@link Position}.
*
* @param filePath the new file path to use
* @return a {@code PositionedEntity} with updated {@link Position}
*/
public PositionedEntity withFilePath(String filePath) {
return new PositionedEntity<>(entity, position.withFilePath(filePath), updatedEntityCreator);
}
/**
* Updates the tracked {@link Position}.
*
* @return a {@code PositionedEntity} with updated {@link Position}
*/
public PositionedEntity withPosition(Position newPosition) {
return new PositionedEntity<>(entity, newPosition, updatedEntityCreator);
}
}
/**
* Strategy indicating how to handle {@link Position}s for which mapping conflicts exist. A
* mapping conflict means that a {@link Position} can't be transformed such that it still refers
* to exactly the same commit content afterwards.
*
* Example: A {@link Position} refers to file foo.txt and lines 5-6 which contain the text
* "Line 5\nLine 6". One of the {@link Mapping}s given to {@link #transform(Collection, Set)}
* indicates that line 5 of foo.txt was modified to "Line five\nLine 5.1\n". We could derive a
* transformed {@link Position} (foo.txt, lines 5-7) but that {@link Position} would then refer to
* the content "Line five\nLine 5.1\nLine 6". If the modification started already in line 4, we
* could even only guess what the transformed {@link Position} would be.
*/
public interface PositionConflictStrategy {
/**
* Determines an alternate {@link Position} when the range of the position can't be mapped
* without a conflict.
*
* @param oldPosition position in the source tree
* @return the new {@link Position} or an empty {@link Optional} if the position should be
* dropped
*/
Optional getOnRangeConflict(Position oldPosition);
/**
* Determines an alternate {@link Position} when there is no file for the position (= file
* deletion) in the target tree.
*
* @param oldPosition position in the source tree
* @return the new {@link Position} or an empty {@link Optional} if the position should be *
* dropped
*/
Optional getOnFileConflict(Position oldPosition);
}
/**
* A strategy which drops any {@link Position}s on a conflicting mapping. Such a strategy is
* useful if it's important that any mapped {@link Position} still refers to exactly the same
* commit content as before. See more details at {@link PositionConflictStrategy}.
*
* We need this strategy for computing edits due to rebase.
*/
public enum OmitPositionOnConflict implements PositionConflictStrategy {
INSTANCE;
@Override
public Optional getOnRangeConflict(Position oldPosition) {
return Optional.empty();
}
@Override
public Optional getOnFileConflict(Position oldPosition) {
return Optional.empty();
}
}
/**
* A strategy which tries to select the next suitable {@link Position} on a conflicting mapping.
* At the moment, this strategy is very basic and only defers to the next higher level (e.g. range
* unclear -> drop range but keep file reference). This could be improved in the future.
*
* We need this strategy for ported comments.
*
*
Warning: With this strategy, mapped {@link Position}s are not guaranteed to
* refer to exactly the same commit content as before. See more details at {@link
* PositionConflictStrategy}.
*
*
Contract: This strategy will never drop any {@link Position}.
*/
public enum BestPositionOnConflict implements PositionConflictStrategy {
INSTANCE;
@Override
public Optional getOnRangeConflict(Position oldPosition) {
return Optional.of(oldPosition.withoutLineRange());
}
@Override
public Optional getOnFileConflict(Position oldPosition) {
// If there isn't a target file, we can also drop any ranges.
return Optional.of(Position.builder().build());
}
}
}