All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.interedition.collatex.matching.Matches Maven / Gradle / Ivy

Go to download

A Java library for collating textual sources, for example, to produce an apparatus.

There is a newer version: 1.7.1
Show newest version
/*
 * Copyright (c) 2015 The Interedition Development Group.
 *
 * This file is part of CollateX.
 *
 * CollateX is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CollateX is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CollateX.  If not, see .
 */

package eu.interedition.collatex.matching;

import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;

public class Matches {

    public final Map> allMatches;
    public final Set unmatchedInWitness;
    public final Set ambiguousInWitness;
    public final Set uniqueInWitness;

    public static Matches between(final Iterable vertices, final Iterable witnessTokens, Comparator comparator) {

        final Map> allMatches = new HashMap<>();

        StreamSupport.stream(vertices.spliterator(), false).forEach(vertex ->
            vertex.tokens().stream().findFirst().ifPresent(baseToken ->
                StreamSupport.stream(witnessTokens.spliterator(), false)
                    .filter(witnessToken -> comparator.compare(baseToken, witnessToken) == 0)
                    .forEach(matchingToken -> allMatches.computeIfAbsent(matchingToken, t -> new ArrayList<>()).add(vertex))));

        final Set unmatchedInWitness = StreamSupport.stream(witnessTokens.spliterator(), false)
            .filter(t -> !allMatches.containsKey(t))
            .collect(Collectors.toCollection(LinkedHashSet::new));

        final Set ambiguousInBase = allMatches.values().stream()
            .flatMap(List::stream)
            .collect(Collectors.toMap(Function.identity(), v -> 1, (a, b) -> a + b))
            .entrySet()
            .stream()
            .filter(v -> v.getValue() > 1)
            .map(Map.Entry::getKey)
            .collect(Collectors.toCollection(LinkedHashSet::new));

        // (have to check: base -> witness, and witness -> base)
        final Set ambiguousInWitness = Stream.concat(
            StreamSupport.stream(witnessTokens.spliterator(), false)
                .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() > 1),

            allMatches.entrySet().stream()
                .filter(match -> match.getValue().stream().anyMatch(ambiguousInBase::contains))
                .map(Map.Entry::getKey)
        ).collect(Collectors.toCollection(LinkedHashSet::new));

        // sure tokens
        // have to check unsure tokens because of (base -> witness && witness -> base)
        final Set uniqueInWitness = StreamSupport.stream(witnessTokens.spliterator(), false)
            .filter(t -> allMatches.getOrDefault(t, Collections.emptyList()).size() == 1 && !ambiguousInWitness.contains(t))
            .collect(Collectors.toCollection(LinkedHashSet::new));

        return new Matches(allMatches, unmatchedInWitness, ambiguousInWitness, uniqueInWitness);
    }

    private Matches(Map> allMatches, Set unmatchedInWitness, Set ambiguousInWitness, Set uniqueInWitness) {
        this.allMatches = Collections.unmodifiableMap(allMatches);
        this.unmatchedInWitness = Collections.unmodifiableSet(unmatchedInWitness);
        this.ambiguousInWitness = Collections.unmodifiableSet(ambiguousInWitness);
        this.uniqueInWitness = Collections.unmodifiableSet(uniqueInWitness);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy