All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.gumtreediff.utils.SequenceAlgorithms Maven / Gradle / Ivy

The newest version!
/*
 * This file is part of GumTree.
 *
 * GumTree is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Lesser General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * GumTree is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public License
 * along with GumTree.  If not, see .
 *
 * Copyright 2011-2015 Jean-Rémy Falleri 
 * Copyright 2011-2015 Floréal Morandat 
 */

package com.github.gumtreediff.utils;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import com.github.gumtreediff.tree.Tree;

public final class SequenceAlgorithms {
    private SequenceAlgorithms() {}

    /**
     * Returns the longest common subsequence between two strings.
     *
     * @return a list of size 2 int arrays that corresponds
     *     to match of index in sequence 1 to index in sequence 2.
     */
    public static List longestCommonSubsequence(String s0, String s1) {
        int[][] lengths = new int[s0.length() + 1][s1.length() + 1];
        for (int i = 0; i < s0.length(); i++)
            for (int j = 0; j < s1.length(); j++)
                if (s0.charAt(i) == (s1.charAt(j)))
                    lengths[i + 1][j + 1] = lengths[i][j] + 1;
                else
                    lengths[i + 1][j + 1] = Math.max(lengths[i + 1][j], lengths[i][j + 1]);

        return extractIndexes(lengths, s0.length(), s1.length());
    }

    /**
     * Returns the hunks of the longest common subsequence between s1 and s2.
     * @return the hunks as a list of int arrays of size 4 with start index and end index of sequence 1
     *     and corresponding start index and end index in sequence 2.
     */
    public static List hunks(String s0, String s1) {
        List lcs = longestCommonSubsequence(s0 ,s1);
        List hunks = new ArrayList();
        int inf0 = -1;
        int inf1 = -1;
        int last0 = -1;
        int last1 = -1;
        for (int i = 0; i < lcs.size(); i++) {
            int[] match = lcs.get(i);
            if (inf0 == -1 || inf1 == -1) {
                inf0 = match[0];
                inf1 = match[1];
            } else if (last0 + 1 != match[0] || last1 + 1 != match[1]) {
                hunks.add(new int[] {inf0, last0 + 1, inf1, last1 + 1});
                inf0 = match[0];
                inf1 = match[1];
            } else if (i == lcs.size() - 1) {
                hunks.add(new int[] {inf0, match[0] + 1, inf1, match[1] + 1});
                break;
            }
            last0 = match[0];
            last1 = match[1];
        }
        return hunks;
    }

    /**
     * Returns the longest common sequence between two strings as a string.
     */
    public static String longestCommonSequence(String s1, String s2) {
        int start = 0;
        int max = 0;
        for (int i = 0; i < s1.length(); i++) {
            for (int j = 0; j < s2.length(); j++) {
                int x = 0;
                while (s1.charAt(i + x) == s2.charAt(j + x)) {
                    x++;
                    if (((i + x) >= s1.length()) || ((j + x) >= s2.length())) break;
                }
                if (x > max) {
                    max = x;
                    start = i;
                }
            }
        }
        return s1.substring(start, (start + max));
    }

    /**
     * Returns the longest common subsequence between the two list of nodes. This version use
     *     type and label to ensure equality.
     *
     * @see Tree#hasSameTypeAndLabel(Tree)
     * @return a list of size 2 int arrays that corresponds
     *     to match of index in sequence 1 to index in sequence 2.
     */
    public static List longestCommonSubsequenceWithTypeAndLabel(List s0, List s1) {
        int[][] lengths = new int[s0.size() + 1][s1.size() + 1];
        for (int i = 0; i < s0.size(); i++)
            for (int j = 0; j < s1.size(); j++)
                if (s0.get(i).hasSameTypeAndLabel(s1.get(j)))
                    lengths[i + 1][j + 1] = lengths[i][j] + 1;
                else
                    lengths[i + 1][j + 1] = Math.max(lengths[i + 1][j], lengths[i][j + 1]);

        return extractIndexes(lengths, s0.size(), s1.size());
    }

    /**
     * Returns the longest common subsequence between the two list of nodes. This version use
     *     type to ensure equality.
     *
     * @see Tree#hasSameType(Tree)
     * @return a list of size 2 int arrays that corresponds
     *     to match of index in sequence 1 to index in sequence 2.
     */
    public static List longestCommonSubsequenceWithType(List s0, List s1) {
        int[][] lengths = new int[s0.size() + 1][s1.size() + 1];
        for (int i = 0; i < s0.size(); i++)
            for (int j = 0; j < s1.size(); j++)
                if (s0.get(i).hasSameType(s1.get(j)))
                    lengths[i + 1][j + 1] = lengths[i][j] + 1;
                else
                    lengths[i + 1][j + 1] = Math.max(lengths[i + 1][j], lengths[i][j + 1]);

        return extractIndexes(lengths, s0.size(), s1.size());
    }

    /**
     * Returns the longest common subsequence between the two list of nodes. This version use
     *     isomorphism to ensure equality.
     *
     * @see Tree#isIsomorphicTo(Tree)
     * @return a list of size 2 int arrays that corresponds
     *     to match of index in sequence 1 to index in sequence 2.
     */
    public static List longestCommonSubsequenceWithIsomorphism(List s0, List s1) {
        int[][] lengths = new int[s0.size() + 1][s1.size() + 1];
        for (int i = 0; i < s0.size(); i++)
            for (int j = 0; j < s1.size(); j++)
                if (s0.get(i).isIsomorphicTo(s1.get(j)))
                    lengths[i + 1][j + 1] = lengths[i][j] + 1;
                else
                    lengths[i + 1][j + 1] = Math.max(lengths[i + 1][j], lengths[i][j + 1]);

        return extractIndexes(lengths, s0.size(), s1.size());
    }

    /**
     * Returns the longest common subsequence between the two list of nodes. This version use
     *     isomorphism to ensure equality.
     *
     * @see Tree#isIsoStructuralTo(Tree)
     * @return a list of size 2 int arrays that corresponds
     *     to match of index in sequence 1 to index in sequence 2.
     */
    public static List longestCommonSubsequenceWithIsostructure(List s0, List s1) {
        int[][] lengths = new int[s0.size() + 1][s1.size() + 1];
        for (int i = 0; i < s0.size(); i++)
            for (int j = 0; j < s1.size(); j++)
                if (s0.get(i).isIsoStructuralTo(s1.get(j)))
                    lengths[i + 1][j + 1] = lengths[i][j] + 1;
                else
                    lengths[i + 1][j + 1] = Math.max(lengths[i + 1][j], lengths[i][j + 1]);

        return extractIndexes(lengths, s0.size(), s1.size());
    }

    private static List extractIndexes(int[][] lengths, int length1, int length2) {
        List indexes = new ArrayList<>();

        for (int x = length1, y = length2; x != 0 && y != 0; ) {
            if (lengths[x][y] == lengths[x - 1][y]) x--;
            else if (lengths[x][y] == lengths[x][y - 1]) y--;
            else {
                indexes.add(new int[] {x - 1, y - 1});
                x--;
                y--;
            }
        }
        Collections.reverse(indexes);
        return indexes;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy