eu.interedition.collatex.matching.Matches Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of collatex-core Show documentation
Show all versions of collatex-core Show documentation
A Java library for collating textual sources, for example, to produce an apparatus.
/*
* Copyright (c) 2013 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see .
*/
package eu.interedition.collatex.matching;
import java.util.Collection;
import java.util.Comparator;
import java.util.Map;
import java.util.Set;
import com.google.common.collect.ArrayListMultimap;
import com.google.common.collect.ImmutableMultiset;
import com.google.common.collect.Iterables;
import com.google.common.collect.ListMultimap;
import com.google.common.collect.Multiset;
import com.google.common.collect.Sets;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
public class Matches {
private final ListMultimap all;
private final Set unmatched;
private final Set ambiguous;
private final Set unique;
public static Matches between(final Iterable vertices, final Iterable witnessTokens, Comparator comparator) {
final ListMultimap all = ArrayListMultimap.create();
for (VariantGraph.Vertex vertex : vertices) {
final Set tokens = vertex.tokens();
if (tokens.isEmpty()) {
continue;
}
for (Token witnessToken : witnessTokens) {
if (comparator.compare(Iterables.getFirst(tokens, null), witnessToken) == 0) {
all.put(witnessToken, vertex);
}
}
}
// unmatched tokens
Set unmatched = Sets.newLinkedHashSet();
for (Token witnessToken : witnessTokens) {
if (!all.containsKey(witnessToken)) {
unmatched.add(witnessToken);
}
}
// unsure tokens (have to check: base -> witness, and witness -> base)
Set ambiguous = Sets.newLinkedHashSet();
for (Token witnessToken : witnessTokens) {
int count = all.keys().count(witnessToken);
if (count > 1) {
ambiguous.add(witnessToken);
}
}
Multiset bag = ImmutableMultiset.copyOf(all.values());
Set unsureBaseTokens = Sets.newLinkedHashSet();
for (VariantGraph.Vertex baseToken : vertices) {
int count = bag.count(baseToken);
if (count > 1) {
unsureBaseTokens.add(baseToken);
}
}
Collection> entries = all.entries();
for (Map.Entry entry : entries) {
if (unsureBaseTokens.contains(entry.getValue())) {
ambiguous.add(entry.getKey());
}
}
// sure tokens
// have to check unsure tokens because of (base -> witness && witness -> base)
Set unique = Sets.newLinkedHashSet();
for (Token witnessToken : witnessTokens) {
if (all.keys().count(witnessToken) == 1 && !ambiguous.contains(witnessToken)) {
unique.add(witnessToken);
}
}
return new Matches(all, unmatched, ambiguous, unique);
}
private Matches(ListMultimap all, Set unmatched, Set ambiguous, Set unique) {
this.all = all;
this.unmatched = unmatched;
this.ambiguous = ambiguous;
this.unique = unique;
}
public ListMultimap getAll() {
return all;
}
public Set getUnmatched() {
return unmatched;
}
public Set getAmbiguous() {
return ambiguous;
}
public Set getUnique() {
return unique;
}
}