eu.interedition.collatex.medite.MediteAlgorithm Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of collatex-core Show documentation
Show all versions of collatex-core Show documentation
A Java library for collating textual sources, for example, to produce an apparatus.
/*
* Copyright (c) 2015 The Interedition Development Group.
*
* This file is part of CollateX.
*
* CollateX is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* CollateX is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with CollateX. If not, see .
*/
package eu.interedition.collatex.medite;
import eu.interedition.collatex.CollationAlgorithm;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
import eu.interedition.collatex.util.VariantGraphRanking;
import eu.interedition.collatex.util.VertexMatch;
import java.util.BitSet;
import java.util.Comparator;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;
/**
* @author Gregor Middell
*/
public class MediteAlgorithm extends CollationAlgorithm.Base {
private final Comparator comparator;
private final Function, Integer> matchEvaluator;
public MediteAlgorithm(Comparator comparator, Function, Integer> matchEvaluator) {
this.comparator = comparator;
this.matchEvaluator = matchEvaluator;
}
@Override
public void collate(VariantGraph graph, Iterable witness) {
final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray();
final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new);
final SuffixTree suffixTree = SuffixTree.build(comparator, tokens);
final MatchEvaluatorWrapper matchEvaluator = new MatchEvaluatorWrapper(this.matchEvaluator, tokens);
final Matches matchCandidates = Matches.between(vertices, suffixTree, matchEvaluator);
final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator());
while (true) {
final SortedSet> maximalUniqueMatches = matchCandidates.findMaximalUniqueMatches();
if (maximalUniqueMatches.isEmpty()) {
break;
}
final BitSet rankFilter = new BitSet();
final BitSet tokenFilter = new BitSet();
for (SortedSet phrase : AlignmentDecisionGraph.filter(maximalUniqueMatches, matchEvaluator)) {
final VertexMatch.WithTokenIndex firstMatch = phrase.first();
final VertexMatch.WithTokenIndex lastMatch = phrase.last();
matches.add(phrase);
IntStream.range(firstMatch.vertexRank, lastMatch.vertexRank + 1).forEach(rankFilter::set);
IntStream.range(firstMatch.token, lastMatch.token + 1).forEach(tokenFilter::set);
}
matchCandidates.removeIf(VertexMatch.filter(rankFilter, tokenFilter));
}
merge(graph, vertices, tokens, matches);
}
static class MatchEvaluatorWrapper implements Function, Integer> {
private final Function, Integer> wrapped;
private final Function tokenResolver;
MatchEvaluatorWrapper(final Function, Integer> wrapped, final Token[] tokens) {
this.wrapped = wrapped;
this.tokenResolver = VertexMatch.tokenResolver(tokens);
}
@Override
public Integer apply(SortedSet input) {
final SortedSet tokenPhrase = new TreeSet<>();
for (VertexMatch.WithTokenIndex match : input) {
tokenPhrase.add(tokenResolver.apply(match));
}
return wrapped.apply(tokenPhrase);
}
}
}