All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.collections4.sequence.SequencesComparator Maven / Gradle / Ivy

There is a newer version: 3.3.8
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.commons.collections4.sequence;

import java.util.List;

import org.apache.commons.collections4.Equator;
import org.apache.commons.collections4.functors.DefaultEquator;

/**
 * This class allows to compare two objects sequences.
 * 

* The two sequences can hold any object type, as only the equals * method is used to compare the elements of the sequences. It is guaranteed * that the comparisons will always be done as o1.equals(o2) where * o1 belongs to the first sequence and o2 belongs to * the second sequence. This can be important if subclassing is used for some * elements in the first sequence and the equals method is * specialized. *

*

* Comparison can be seen from two points of view: either as giving the smallest * modification allowing to transform the first sequence into the second one, or * as giving the longest sequence which is a subsequence of both initial * sequences. The equals method is used to compare objects, so any * object can be put into sequences. Modifications include deleting, inserting * or keeping one object, starting from the beginning of the first sequence. *

*

* This class implements the comparison algorithm, which is the very efficient * algorithm from Eugene W. Myers * * An O(ND) Difference Algorithm and Its Variations. This algorithm produces * the shortest possible * {@link EditScript edit script} * containing all the * {@link EditCommand commands} * needed to transform the first sequence into the second one. *

* * @see EditScript * @see EditCommand * @see CommandVisitor * * @since 4.0 */ public class SequencesComparator { /** First sequence. */ private final List sequence1; /** Second sequence. */ private final List sequence2; /** The equator used for testing object equality. */ private final Equator equator; /** Temporary variables. */ private final int[] vDown; private final int[] vUp; /** * Simple constructor. *

* Creates a new instance of SequencesComparator using a {@link DefaultEquator}. *

* It is guaranteed that the comparisons will always be done as * o1.equals(o2) where o1 belongs to the first * sequence and o2 belongs to the second sequence. This can be * important if subclassing is used for some elements in the first sequence * and the equals method is specialized. * * @param sequence1 first sequence to be compared * @param sequence2 second sequence to be compared */ public SequencesComparator(final List sequence1, final List sequence2) { this(sequence1, sequence2, DefaultEquator.defaultEquator()); } /** * Simple constructor. *

* Creates a new instance of SequencesComparator with a custom {@link Equator}. *

* It is guaranteed that the comparisons will always be done as * Equator.equate(o1, o2) where o1 belongs to the first * sequence and o2 belongs to the second sequence. * * @param sequence1 first sequence to be compared * @param sequence2 second sequence to be compared * @param equator the equator to use for testing object equality */ public SequencesComparator(final List sequence1, final List sequence2, final Equator equator) { this.sequence1 = sequence1; this.sequence2 = sequence2; this.equator = equator; final int size = sequence1.size() + sequence2.size() + 2; vDown = new int[size]; vUp = new int[size]; } /** * Get the {@link EditScript} object. *

* It is guaranteed that the objects embedded in the {@link InsertCommand * insert commands} come from the second sequence and that the objects * embedded in either the {@link DeleteCommand delete commands} or * {@link KeepCommand keep commands} come from the first sequence. This can * be important if subclassing is used for some elements in the first * sequence and the equals method is specialized. * * @return the edit script resulting from the comparison of the two * sequences */ public EditScript getScript() { final EditScript script = new EditScript<>(); buildScript(0, sequence1.size(), 0, sequence2.size(), script); return script; } /** * Build a snake. * * @param start the value of the start of the snake * @param diag the value of the diagonal of the snake * @param end1 the value of the end of the first sequence to be compared * @param end2 the value of the end of the second sequence to be compared * @return the snake built */ private Snake buildSnake(final int start, final int diag, final int end1, final int end2) { int end = start; while (end - diag < end2 && end < end1 && equator.equate(sequence1.get(end), sequence2.get(end - diag))) { ++end; } return new Snake(start, end, diag); } /** * Get the middle snake corresponding to two subsequences of the * main sequences. *

* The snake is found using the MYERS Algorithm (this algorithms has * also been implemented in the GNU diff program). This algorithm is * explained in Eugene Myers article: * * An O(ND) Difference Algorithm and Its Variations. * * @param start1 the begin of the first sequence to be compared * @param end1 the end of the first sequence to be compared * @param start2 the begin of the second sequence to be compared * @param end2 the end of the second sequence to be compared * @return the middle snake */ private Snake getMiddleSnake(final int start1, final int end1, final int start2, final int end2) { // Myers Algorithm // Initialisations final int m = end1 - start1; final int n = end2 - start2; if (m == 0 || n == 0) { return null; } final int delta = m - n; final int sum = n + m; final int offset = (sum % 2 == 0 ? sum : sum + 1) / 2; vDown[1+offset] = start1; vUp[1+offset] = end1 + 1; for (int d = 0; d <= offset ; ++d) { // Down for (int k = -d; k <= d; k += 2) { // First step final int i = k + offset; if (k == -d || k != d && vDown[i-1] < vDown[i+1]) { vDown[i] = vDown[i+1]; } else { vDown[i] = vDown[i-1] + 1; } int x = vDown[i]; int y = x - start1 + start2 - k; while (x < end1 && y < end2 && equator.equate(sequence1.get(x), sequence2.get(y))) { vDown[i] = ++x; ++y; } // Second step if (delta % 2 != 0 && delta - d <= k && k <= delta + d) { if (vUp[i-delta] <= vDown[i]) { // NOPMD return buildSnake(vUp[i-delta], k + start1 - start2, end1, end2); } } } // Up for (int k = delta - d; k <= delta + d; k += 2) { // First step final int i = k + offset - delta; if (k == delta - d || k != delta + d && vUp[i+1] <= vUp[i-1]) { vUp[i] = vUp[i+1] - 1; } else { vUp[i] = vUp[i-1]; } int x = vUp[i] - 1; int y = x - start1 + start2 - k; while (x >= start1 && y >= start2 && equator.equate(sequence1.get(x), sequence2.get(y))) { vUp[i] = x--; y--; } // Second step if (delta % 2 == 0 && -d <= k && k <= d ) { if (vUp[i] <= vDown[i + delta]) { // NOPMD return buildSnake(vUp[i], k + start1 - start2, end1, end2); } } } } // this should not happen throw new RuntimeException("Internal Error"); } /** * Build an edit script. * * @param start1 the begin of the first sequence to be compared * @param end1 the end of the first sequence to be compared * @param start2 the begin of the second sequence to be compared * @param end2 the end of the second sequence to be compared * @param script the edited script */ private void buildScript(final int start1, final int end1, final int start2, final int end2, final EditScript script) { final Snake middle = getMiddleSnake(start1, end1, start2, end2); if (middle == null || middle.getStart() == end1 && middle.getDiag() == end1 - end2 || middle.getEnd() == start1 && middle.getDiag() == start1 - start2) { int i = start1; int j = start2; while (i < end1 || j < end2) { if (i < end1 && j < end2 && equator.equate(sequence1.get(i), sequence2.get(j))) { script.append(new KeepCommand<>(sequence1.get(i))); ++i; ++j; } else { if (end1 - start1 > end2 - start2) { script.append(new DeleteCommand<>(sequence1.get(i))); ++i; } else { script.append(new InsertCommand<>(sequence2.get(j))); ++j; } } } } else { buildScript(start1, middle.getStart(), start2, middle.getStart() - middle.getDiag(), script); for (int i = middle.getStart(); i < middle.getEnd(); ++i) { script.append(new KeepCommand<>(sequence1.get(i))); } buildScript(middle.getEnd(), end1, middle.getEnd() - middle.getDiag(), end2, script); } } /** * This class is a simple placeholder to hold the end part of a path * under construction in a {@link SequencesComparator SequencesComparator}. */ private static class Snake { /** Start index. */ private final int start; /** End index. */ private final int end; /** Diagonal number. */ private final int diag; /** * Simple constructor. Creates a new instance of Snake with specified indices. * * @param start start index of the snake * @param end end index of the snake * @param diag diagonal number */ public Snake(final int start, final int end, final int diag) { this.start = start; this.end = end; this.diag = diag; } /** * Get the start index of the snake. * * @return start index of the snake */ public int getStart() { return start; } /** * Get the end index of the snake. * * @return end index of the snake */ public int getEnd() { return end; } /** * Get the diagonal number of the snake. * * @return diagonal number of the snake */ public int getDiag() { return diag; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy