![JAR search and dependency download from the Maven repository](/logo.png)
io.github.oliviercailloux.git.GitHubHistory Maven / Gradle / Ivy
package io.github.oliviercailloux.git;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Verify.verify;
import com.google.common.collect.HashMultiset;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.ImmutableSet;
import com.google.common.collect.Maps;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import com.google.common.graph.Graph;
import com.google.common.graph.GraphBuilder;
import com.google.common.graph.Graphs;
import com.google.common.graph.ImmutableGraph;
import io.github.oliviercailloux.git.filter.GitHistory;
import java.time.Instant;
import java.util.ArrayDeque;
import java.util.Comparator;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Queue;
import java.util.Set;
import java.util.function.Function;
import org.eclipse.jgit.lib.ObjectId;
/**
* Interface GitHistory.
*
* + create(Map, Graph) (using either author or commit date from RevCommits, for example)
*
* + getGraph() // unmodifiable
*
* + getRoot()
*
* Class GitHistorySimple.
*
* Class GitHistoryIncreasable.
*
* + addCommits(Stream<>).
*
* Class MultiDatesHistory.
*
* + create(Map authorDates, Map commitDates, Map pushsDates, Graph commits)
*
* - graph
*
* + getCommitHistory
*
* + getAuthorHistory
*
* + getPushHistory // all three the same graph
*
* + getPushedBeforeCommitted
*
* GitHubUtils::toCoherent(MultiDatesHistory): MultiDatesHistory.
*
* I could have chosen to make this a more general MultiDatesHistory, with authorDates, commitDates,
* pushDates, but then I also need one with the GH specific interface where the provided push
* information is incomplete.
*
* @param
*/
public class GitHubHistory {
/**
* @param authorDates must be complete
* @param commitDates must be complete
* @param pushDates may be incomplete
*/
public static GitHubHistory create(Graph commits, Map authorDates,
Map commitDates, Map pushDates) {
return new GitHubHistory(commits, authorDates, commitDates, pushDates);
}
private final ImmutableGraph graph;
private final ImmutableMap authorDates;
private final ImmutableMap commitDates;
private final ImmutableMap pushDates;
private ImmutableMap finalPushDates;
private ImmutableGraph patchedKnowns;
public GitHubHistory(Graph commits, Map authorDates,
Map commitDates, Map pushDates) {
this.graph = ImmutableGraph.copyOf(commits);
this.authorDates =
ImmutableMap.copyOf(Maps.filterKeys(authorDates, k -> graph.nodes().contains(k)));
this.commitDates =
ImmutableMap.copyOf(Maps.filterKeys(commitDates, k -> graph.nodes().contains(k)));
this.pushDates =
ImmutableMap.copyOf(Maps.filterKeys(pushDates, k -> graph.nodes().contains(k)));
checkArgument(this.graph.nodes().equals(this.authorDates.keySet()));
checkArgument(this.graph.nodes().equals(this.commitDates.keySet()));
checkAndCompletePushDates();
}
public ImmutableGraph getGraph() {
return graph;
}
public GitHistory getAuthorHistory() {
return GitHistory.create(graph, authorDates);
}
public GitHistory getCommitterHistory() {
return GitHistory.create(graph, commitDates);
}
public ImmutableMap getPushDates() {
return pushDates;
}
public ImmutableMap getConsistentPushDates() {
return finalPushDates;
}
/**
* Many null values among the pushedDate information sent by GitHub. Also, there’s probably
* occasional bugs, where a commit is reportedly pushed before its parent (which I don’t think is
* possible). This class attempts to correct.
*
* Corrected by taking the most favorable hypothesis for the student (the one that yields the
* earliest push date) among those that do not put everything in question. Perhaps one precise
* definition of this is as follows (probably incorrect, better refer to the algorithm!).
* Conflicts in a set of commits: the pairs of commits taken in that set such that the earlier
* commit has a later date. Conflicting set: a set of commits that has at least one conflict.
* Related conflicting set: a conflicting set such that each conflict pair have a common child or
* a common parent that is in conflict with each of the elements of the pair. Minimal conflicting
* set: a related conflicting set such that any superset that is a related conflicting set has the
* same conflicts. [Perhaps unnecessary because a related conflicting set would be minimal?]
* Reconciliation of a related conflicting set: assignment of dates to each commit in the set such
* that it is no more a conflicting set when considering the assignment. Min reconciliation: the
* reconciliation that chooses pushed dates as early as possible among the reconciliations that
* leave at least one value unchanged among all the conflicts and does not touch the commits that
* are in no conflicting pairs.
*
* The resulting pushed dates, when they have been patched, are coherent but should be taken with
* caution. Only when {@link #getPatchedKnowns()} is empty should they be used, ideally. Even in
* that case, the completion (about the pushed dates that were missing in the reported data) is to
* be taken only as lower bounds.
*
*/
public GitHistory getConsistentPushHistory() {
return GitHistory.create(graph, finalPushDates);
}
/**
* Starting with nodes with no successor, computes the change to be brought to the given initial
* map so that predecessors have weakly “smaller” values (meaning, not “greater” values) that
* their successors, proposing only changes that “lower” the initial values.
*
* Words such as “greater” or “smallest” are understood as defined by the given comparator.
*
* @return for a given key oid, indicates as value which is the oid that should give its date, if
* the date of the key oid is to be changed (is itself iff not to be changed). For each
* key oid, the value is the descendant (including itself) whose date value is “smallest”.
*/
private static ImmutableMap getLoweringPatchForNonIncreasing(
Graph graph, Map initial, Comparator comparator) {
final Map originatorOfDate = new LinkedHashMap<>();
final Set nodes = graph.nodes();
final Map modifiedPushedDates = new LinkedHashMap<>(initial);
final Queue visitNext = new ArrayDeque<>();
final Multiset remainingVisits = HashMultiset.create();
for (ObjectId node : nodes) {
final int nbIncoming = graph.successors(node).size();
remainingVisits.add(node, nbIncoming);
if (nbIncoming == 0) {
visitNext.add(node);
originatorOfDate.put(node, node);
}
}
verify(nodes.isEmpty() || !visitNext.isEmpty());
while (!visitNext.isEmpty()) {
final ObjectId successor = visitNext.remove();
verify(originatorOfDate.containsKey(successor));
final Instant successorDate = modifiedPushedDates.get(successor);
for (ObjectId predecessor : graph.predecessors(successor)) {
final Instant predecessorDate = modifiedPushedDates.get(predecessor);
/*
* Ensures the value associated to this predecessor is the “smallest” one among all
* descendants of this predecessor seen so far and the original value of this predecessor.
*/
final boolean change = comparator.compare(successorDate, predecessorDate) < 0;
if (change) {
modifiedPushedDates.put(predecessor, successorDate);
originatorOfDate.put(predecessor, originatorOfDate.get(successor));
}
final int before = remainingVisits.remove(predecessor, 1);
if (before == 1) {
visitNext.add(predecessor);
if (!originatorOfDate.containsKey(predecessor)) {
originatorOfDate.put(predecessor, predecessor);
}
}
}
}
verify(originatorOfDate.keySet().equals(nodes));
return ImmutableMap.copyOf(originatorOfDate);
}
/**
* From tips downwards, propagate a “ceiling” information: a parent of a child can have at most
* the push date of the child. This step is sufficient to patch possible bugs in the dates
* reported by GitHub. But it is not sufficient to obtain lower bounds for the missing values. As
* a second step, from roots upwards, propagate a “floor” information, in order to fill-in the
* missing pushedDate values.
*/
private void checkAndCompletePushDates() {
final ImmutableMap.Builder initialBuilder = ImmutableMap.builder();
initialBuilder.putAll(pushDates);
final Set nodes = graph.nodes();
final ImmutableSet unobservedPushedDates =
Sets.difference(nodes, pushDates.keySet()).immutableCopy();
for (ObjectId unobserved : unobservedPushedDates) {
initialBuilder.put(unobserved, Instant.MAX);
}
final ImmutableMap initial = initialBuilder.build();
verify(initial.keySet().equals(nodes));
final Comparator comparator = Comparator.naturalOrder();
final ImmutableMap patch =
getLoweringPatchForNonIncreasing(graph, initial, comparator);
final ImmutableMap modifiedPushedDates =
nodes.stream().collect(ImmutableMap.toImmutableMap(Function.identity(),
n -> pushDates.containsKey(n) ? pushDates.get(patch.get(n)) : Instant.MIN));
{
final ImmutableMap patchedKnownsMap = ImmutableMap
.copyOf(Maps.filterKeys(patch, o -> pushDates.containsKey(o) && !patch.get(o).equals(o)));
final ImmutableSet> entrySet = patchedKnownsMap.entrySet();
final ImmutableGraph.Builder graphBuilder = GraphBuilder.directed().immutable();
for (Entry patchEntry : entrySet) {
graphBuilder.putEdge(patchEntry.getKey(), patchEntry.getValue());
}
patchedKnowns = graphBuilder.build();
}
final ImmutableMap unknownsPatch = getLoweringPatchForNonIncreasing(
Graphs.transpose(graph), modifiedPushedDates, comparator.reversed());
verify(unknownsPatch.keySet().stream().filter((o) -> !unknownsPatch.get(o).equals(o))
.allMatch((o) -> !pushDates.containsKey(o)));
finalPushDates = nodes.stream().collect(ImmutableMap.toImmutableMap(Function.identity(),
n -> modifiedPushedDates.get(unknownsPatch.get(n))));
}
/**
* @return the object ids that have been patched (changed compared to the reported values) due to
* a suspected bug in GitHub.
*/
public ImmutableGraph getPatchedPushCommits() {
return patchedKnowns;
}
}