All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.interedition.collatex.medite.MediteAlgorithm Maven / Gradle / Ivy

Go to download

A Java library for collating textual sources, for example, to produce an apparatus.

There is a newer version: 1.7.1
Show newest version
/*
 * Copyright (c) 2015 The Interedition Development Group.
 *
 * This file is part of CollateX.
 *
 * CollateX is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CollateX is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CollateX.  If not, see .
 */

package eu.interedition.collatex.medite;

import eu.interedition.collatex.CollationAlgorithm;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
import eu.interedition.collatex.util.VariantGraphRanking;
import eu.interedition.collatex.util.VertexMatch;

import java.util.BitSet;
import java.util.Comparator;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.function.Function;
import java.util.stream.IntStream;
import java.util.stream.StreamSupport;

/**
 * @author Gregor Middell
 */
public class MediteAlgorithm extends CollationAlgorithm.Base {

    private final Comparator comparator;
    private final Function, Integer> matchEvaluator;

    public MediteAlgorithm(Comparator comparator, Function, Integer> matchEvaluator) {
        this.comparator = comparator;
        this.matchEvaluator = matchEvaluator;
    }

    @Override
    public void collate(VariantGraph graph, Iterable witness) {
        final VariantGraph.Vertex[][] vertices = VariantGraphRanking.of(graph).asArray();
        final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new);

        final SuffixTree suffixTree = SuffixTree.build(comparator, tokens);
        final MatchEvaluatorWrapper matchEvaluator = new MatchEvaluatorWrapper(this.matchEvaluator, tokens);

        final Matches matchCandidates = Matches.between(vertices, suffixTree, matchEvaluator);
        final SortedSet> matches = new TreeSet<>(VertexMatch.setComparator());

        while (true) {
            final SortedSet> maximalUniqueMatches = matchCandidates.findMaximalUniqueMatches();
            if (maximalUniqueMatches.isEmpty()) {
                break;
            }

            final BitSet rankFilter = new BitSet();
            final BitSet tokenFilter = new BitSet();

            for (SortedSet phrase : AlignmentDecisionGraph.filter(maximalUniqueMatches, matchEvaluator)) {
                final VertexMatch.WithTokenIndex firstMatch = phrase.first();
                final VertexMatch.WithTokenIndex lastMatch = phrase.last();

                matches.add(phrase);
                IntStream.range(firstMatch.vertexRank, lastMatch.vertexRank + 1).forEach(rankFilter::set);
                IntStream.range(firstMatch.token, lastMatch.token + 1).forEach(tokenFilter::set);
            }

            matchCandidates.removeIf(VertexMatch.filter(rankFilter, tokenFilter));
        }

        merge(graph, vertices, tokens, matches);
    }

    static class MatchEvaluatorWrapper implements Function, Integer> {

        private final Function, Integer> wrapped;
        private final Function tokenResolver;

        MatchEvaluatorWrapper(final Function, Integer> wrapped, final Token[] tokens) {
            this.wrapped = wrapped;
            this.tokenResolver = VertexMatch.tokenResolver(tokens);
        }

        @Override
        public Integer apply(SortedSet input) {
            final SortedSet tokenPhrase = new TreeSet<>();
            for (VertexMatch.WithTokenIndex match : input) {
                tokenPhrase.add(tokenResolver.apply(match));
            }
            return wrapped.apply(tokenPhrase);
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy