All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.interedition.collatex.needlemanwunsch.NeedlemanWunschAlgorithm Maven / Gradle / Ivy

Go to download

A Java library for collating textual sources, for example, to produce an apparatus.

There is a newer version: 1.7.1
Show newest version
/*
 * Copyright (c) 2015 The Interedition Development Group.
 *
 * This file is part of CollateX.
 *
 * CollateX is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * CollateX is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with CollateX.  If not, see .
 */

package eu.interedition.collatex.needlemanwunsch;

import eu.interedition.collatex.CollationAlgorithm;
import eu.interedition.collatex.Token;
import eu.interedition.collatex.VariantGraph;
import eu.interedition.collatex.util.VariantGraphRanking;

import java.util.Arrays;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.stream.StreamSupport;

/**
 * @author Gregor Middell
 */
public class NeedlemanWunschAlgorithm extends CollationAlgorithm.Base {

    private final Comparator comparator;
    private final NeedlemanWunschScorer scorer = new NeedlemanWunschScorer() {

        @Override
        public float score(VariantGraph.Vertex[] a, Token b) {
            return Arrays.stream(a).map(VariantGraph.Vertex::tokens).flatMap(Set::stream).anyMatch(t -> comparator.compare(t, b) == 0) ? 1 : -1;
        }

        @Override
        public float gap() {
            return -1;
        }
    };

    public NeedlemanWunschAlgorithm(Comparator comparator) {
        this.comparator = comparator;
    }

    @Override
    public void collate(VariantGraph against, Iterable witness) {
        final VariantGraph.Vertex[][] ranks = VariantGraphRanking.of(against).asArray();
        final Token[] tokens = StreamSupport.stream(witness.spliterator(), false).toArray(Token[]::new);

        final Map alignments = new HashMap<>();
        for (Map.Entry alignment : align(ranks, tokens, scorer).entrySet()) {
            boolean aligned = false;
            final Token token = alignment.getValue();
            for (VariantGraph.Vertex vertex : alignment.getKey()) {
                for (Token vertexToken : vertex.tokens()) {
                    if (comparator.compare(vertexToken, token) == 0) {
                        alignments.put(token, vertex);
                        aligned = true;
                        break;
                    }
                }
                if (aligned) {
                    break;
                }
            }
        }

        merge(against, witness, alignments);
    }

    public static  Map align(A[] a, B[] b, NeedlemanWunschScorer scorer) {

        final Map alignments = new HashMap<>();
        final float[][] matrix = new float[a.length + 1][b.length + 1];

        int ac = 0;
        int bc = 0;
        while (ac < a.length) {
            matrix[ac++][0] = scorer.gap() * ac;
        }
        while (bc < b.length) {
            matrix[0][bc++] = scorer.gap() * bc;
        }

        ac = 1;
        for (A aElement : a) {
            bc = 1;
            for (B bElement : b) {
                final float k = matrix[ac - 1][bc - 1] + scorer.score(aElement, bElement);
                final float l = matrix[ac - 1][bc] + scorer.gap();
                final float m = matrix[ac][bc - 1] + scorer.gap();
                matrix[ac][bc++] = Math.max(Math.max(k, l), m);
            }
            ac++;
        }

        ac = a.length;
        bc = b.length;
        while (ac > 0 && bc > 0) {
            final float score = matrix[ac][bc];
            final float scoreDiag = matrix[ac - 1][bc - 1];
            final float scoreUp = matrix[ac][bc - 1];
            final float scoreLeft = matrix[ac - 1][bc];

            if (score == scoreDiag + scorer.score(a[ac - 1], b[bc - 1])) {
                // match
                alignments.put(a[ac - 1], b[bc - 1]);
                ac--;
                bc--;
            } else if (score == scoreLeft + scorer.gap()) {
                ac--;
            } else if (score == scoreUp + scorer.gap()) {
                bc--;
            }
        }

        return alignments;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy