
eu.interedition.collatex.medite.Matches Maven / Gradle / Ivy
/*
* Copyright (c) 2013 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see .
*/
package eu.interedition.collatex.medite;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Preconditions;
import com.google.common.collect.HashMultimap;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Multimap;
import com.google.common.collect.Range;
import com.google.common.collect.Sets;
import com.google.common.collect.SortedSetMultimap;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
import eu.interedition.collatex.util.VariantGraphRanking;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;
import java.util.SortedSet;
/**
* @author Gregor Middell
*/
public class Matches extends ArrayList> {
public Matches(int initialCapacity) {
super(initialCapacity);
}
public static Matches between(VariantGraphRanking ranking, SuffixTree suffixTree, Function, Integer> matchEvaluator) {
final SortedSetMultimap rankMap = ranking.getByRank();
final Multimap matchThreads = HashMultimap.create();
for (Integer rank : rankMap.keySet()) {
final SortedSet vertices = rankMap.get(rank);
for (VariantGraph.Vertex vertex : vertices) {
final MatchThreadElement matchThreadElement = new MatchThreadElement(suffixTree).advance(vertex, rank);
if (matchThreadElement != null) {
matchThreads.put(rank, matchThreadElement);
}
}
for (MatchThreadElement matchThreadElement : matchThreads.get(rank - 1)) {
for (VariantGraph.Vertex vertex : vertices) {
final MatchThreadElement advanced = matchThreadElement.advance(vertex, rank);
if (advanced != null) {
matchThreads.put(rank, advanced);
}
}
}
}
final Matches matches = new Matches(matchThreads.size());
for (MatchThreadElement matchThreadElement : matchThreads.values()) {
final List> threadPhrases = Lists.newArrayList();
boolean firstElement = true;
for (MatchThreadElement threadElement : matchThreadElement.thread()) {
final SuffixTree.EquivalenceClass equivalenceClass = threadElement.cursor.matchedClass();
for (int mc = 0; mc < equivalenceClass.length; mc++) {
final int tokenCandidate = equivalenceClass.members[mc];
if (firstElement) {
final Phrase phrase = new Phrase();
phrase.add(new Match.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate));
threadPhrases.add(phrase);
} else {
for (Phrase phrase : threadPhrases) {
if ((phrase.last().token + 1) == tokenCandidate) {
phrase.add(new Match.WithTokenIndex(threadElement.vertex, threadElement.vertexRank, tokenCandidate));
}
}
}
}
firstElement = false;
}
matches.addAll(threadPhrases);
}
Collections.sort(matches, maximalUniqueMatchOrdering(matchEvaluator));
return matches;
}
private static Comparator> maximalUniqueMatchOrdering(final Function, Integer> matchEvaluator) {
return new Comparator>() {
@Override
public int compare(Phrase o1, Phrase o2) {
// 1. reverse ordering by match value
int result = matchEvaluator.apply(o2) - matchEvaluator.apply(o1);
if (result != 0) {
return result;
}
final Match.WithTokenIndex firstMatch1 = o1.first();
final Match.WithTokenIndex firstMatch2 = o2.first();
// 2. ordering by match distance
result = (Math.abs(firstMatch1.token - firstMatch1.vertexRank) - Math.abs(firstMatch2.token - firstMatch2.vertexRank));
if (result != 0) {
return result;
}
// 3. ordering by first vertex ranking
result = firstMatch1.vertexRank - firstMatch2.vertexRank;
if (result != 0) {
return result;
}
// 3. ordering by first token index
return firstMatch1.token - firstMatch2.token;
}
};
}
public SortedSet> findMaximalUniqueMatches() {
final List> allMatches = Lists.newArrayList(this);
final SortedSet> maximalUniqueMatches = Sets.newTreeSet();
while (true) {
Phrase nextMum = null;
Phrase candidate = null;
for (Phrase successor : allMatches) {
if (candidate == null) {
continue;
}
if (candidate.size() > successor.size() || candidate.first().token == successor.first().token) {
nextMum = candidate;
break;
}
candidate = successor;
}
if (nextMum == null) {
nextMum = Iterables.getFirst(allMatches, null);
}
if (nextMum == null) {
break;
}
Preconditions.checkState(maximalUniqueMatches.add(nextMum), "Duplicate MUM");
Iterables.removeIf(allMatches, Match.filter(
new IndexRangeSet(Range.closed(nextMum.first().vertexRank, nextMum.last().vertexRank)),
new IndexRangeSet(Range.closed(nextMum.first().token, nextMum.last().token))
));
}
return maximalUniqueMatches;
}
/**
* @author Gregor Middell
*/
static class MatchThreadElement {
final MatchThreadElement previous;
final VariantGraph.Vertex vertex;
final int vertexRank;
final SuffixTree.Cursor cursor;
MatchThreadElement(SuffixTree suffixTree) {
this(null, null, -1, suffixTree.cursor());
}
MatchThreadElement(MatchThreadElement previous, VariantGraph.Vertex vertex, int vertexRank, SuffixTree.Cursor cursor) {
this.previous = previous;
this.vertex = vertex;
this.vertexRank = vertexRank;
this.cursor = cursor;
}
MatchThreadElement advance(VariantGraph.Vertex vertex, int vertexRank) {
final Set tokens = vertex.tokens();
if (!tokens.isEmpty()) {
final SuffixTree.Cursor next = cursor.move(Iterables.get(tokens, 0));
if (next != null) {
return new MatchThreadElement(this, vertex, vertexRank, next);
}
}
return null;
}
List thread() {
final LinkedList thread = Lists.newLinkedList();
MatchThreadElement current = this;
while (current.vertex != null) {
thread.addFirst(current);
current = current.previous;
}
return thread;
}
@Override
public String toString() {
return "[" + Joiner.on(", ").join(vertexRank, vertex, cursor.matchedClass()) + "]";
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy