eu.interedition.collatex.matching.Matches Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of collatex-core Show documentation
Show all versions of collatex-core Show documentation
A Java library for collating textual sources, for example, to produce an apparatus.
/*
* Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see .
*/
package eu.interedition.collatex.matching;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
public class Matches {
public final Map> allMatches;
public final Set unmatchedInWitness;
public final Set ambiguousInWitness;
public final Set uniqueInWitness;
public static Matches between(final Iterable vertices, final Iterable witnessTokens, Comparator comparator) {
final Map> allMatches = new HashMap<>();
StreamSupport.stream(vertices.spliterator(), false).forEach(vertex ->
vertex.tokens().stream().findFirst().ifPresent(baseToken ->
StreamSupport.stream(witnessTokens.spliterator(), false)
.filter(witnessToken -> comparator.compare(baseToken, witnessToken) == 0)
.forEach(matchingToken -> allMatches.computeIfAbsent(matchingToken, t -> new ArrayList<>()).add(vertex))));
final Set unmatchedInWitness = StreamSupport.stream(witnessTokens.spliterator(), false)
.filter(t -> !allMatches.containsKey(t))
.collect(Collectors.toCollection(LinkedHashSet::new));
final Set ambiguousInBase = allMatches.values().stream()
.flatMap(List::stream)
.collect(Collectors.toMap(Function.identity(), v -> 1, (a, b) -> a + b))
.entrySet()
.stream()
.filter(v -> v.getValue() > 1)
.map(Map.Entry::getKey)
.collect(Collectors.toCollection(LinkedHashSet::new));
// (have to check: base -> witness, and witness -> base)
final Set ambiguousInWitness = Stream.concat(
StreamSupport.stream(witnessTokens.spliterator(), false)
.filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() > 1),
allMatches.entrySet().stream()
.filter(match -> match.getValue().stream().anyMatch(ambiguousInBase::contains))
.map(Map.Entry::getKey)
).collect(Collectors.toCollection(LinkedHashSet::new));
// sure tokens
// have to check unsure tokens because of (base -> witness && witness -> base)
final Set uniqueInWitness = StreamSupport.stream(witnessTokens.spliterator(), false)
.filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() == 1 && !ambiguousInWitness.contains(t))
.collect(Collectors.toCollection(LinkedHashSet::new));
return new Matches(allMatches, unmatchedInWitness, ambiguousInWitness, uniqueInWitness);
}
private Matches(Map> allMatches, Set unmatchedInWitness, Set ambiguousInWitness, Set uniqueInWitness) {
this.allMatches = Collections.unmodifiableMap(allMatches);
this.unmatchedInWitness = Collections.unmodifiableSet(unmatchedInWitness);
this.ambiguousInWitness = Collections.unmodifiableSet(ambiguousInWitness);
this.uniqueInWitness = Collections.unmodifiableSet(uniqueInWitness);
}
}