All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.diffx.sequence.TokenListSlicer Maven / Gradle / Ivy

/*
 * Copyright 2010-2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.diffx.sequence;

import org.pageseeder.diffx.api.DiffHandler;
import org.pageseeder.diffx.api.Operator;
import org.pageseeder.diffx.token.EndElementToken;
import org.pageseeder.diffx.token.StartElementToken;
import org.pageseeder.diffx.token.XMLToken;

import java.util.Collections;
import java.util.Iterator;
import java.util.List;

/**
 * The slicer takes two sequences and removes the common elements from the beginning
 * and the end of the chain so that only the smallest sequences are passed to the
 * DiffXAlgorithmBase.
 *
 * 

The slice does NOT modify the original sequences. * *

Note: Using this class may lead to problems in the execution of the Diff-X * algorithm and incorrect results, because it could potentially take off some parts * that helps the Diff-X algorithm ensuring that the XML is well-formed. * * @author Christophe Lauret * @version 0.9.0 * @since 0.7.0 */ public final class TokenListSlicer { /** * The first sequence of tokens to test. */ final List sequence1; /** * The second sequence of tokens to test. */ final List sequence2; /** * The common start between the two sequences. */ int startCount = -1; /** * The common end between the two sequences. */ int endCount = -1; /** * Creates a new sequence slicer. * * @param seq0 The first sequence to slice. * @param seq1 The second sequence to slice. */ public TokenListSlicer(List seq0, List seq1) { this.sequence1 = seq0; this.sequence2 = seq1; } /** * Analyse the sequences to know whether they can be sliced. * * @return the number of common tokens */ public int analyze() throws IllegalStateException { this.startCount = computeStart(); this.endCount = sliceEnd(this.startCount); return this.startCount + this.endCount; } /** * Slices the start of both sequences. * *

The common start sequence will be stored in the class until the next * {@link #handleStart(DiffHandler)} is called. * * @return The number of common elements at the start of the sequences. * @throws IllegalStateException If the start buffer is not empty. */ public int computeStart() throws IllegalStateException { int toBeRemoved = 0; // the number of tokens to be removed int depth = 0; // the depth of the XML or number of open elements Iterator i = this.sequence1.iterator(); Iterator j = this.sequence2.iterator(); int counter = 0; // calculate the max possible index for slicing. while (i.hasNext() && j.hasNext()) { XMLToken token = i.next(); if (j.next().equals(token)) { counter++; // increase the depth if (token instanceof StartElementToken) { depth++; // decrease the depth } else if (token instanceof EndElementToken) { depth--; } // if depth = 1, it is a direct child of the document element, // so we can cut off the whole branch if (depth == 1 || depth == 0) { toBeRemoved = counter; } } else { break; } } return toBeRemoved; } /** * Slices the end of both sequences. * *

The common end sequence will be stored in the class until the next * {@link #handleEnd(DiffHandler)} is called. * * @return The number of common elements at the end of the sequences. * @throws IllegalStateException If the end buffer is not empty. */ public int sliceEnd(int start) throws IllegalStateException { int depth = 0; // the depth of the XML or number of open elements int toBeRemoved = 0; // number of tokens to be removed from the end int counter = 0; // number of tokens evaluated int pos1 = this.sequence1.size() - 1; // current position of the first sequence int pos2 = this.sequence2.size() - 1; // current position of the second sequence while (pos1 >= start && pos2 >= start) { XMLToken token = this.sequence1.get(pos1); if (token.equals(this.sequence2.get(pos2))) { counter++; // increase the depth for close, decrease for open if (token instanceof EndElementToken) { depth++; } else if (token instanceof StartElementToken) { depth--; } // if depth = 1, it is a direct child of the document element, // so we can cut off the whole branch if (depth == 1 || depth == 0) { toBeRemoved = counter; } pos1--; pos2--; } else { break; } } return toBeRemoved; } /** * Formats the start subsequence that has been buffered by this class. * *

This method will clear the buffer, but will do nothing if the start buffer is * null. * * @param handler The handler for the output. * * @throws NullPointerException If the specified formatter is null. */ public void handleStart(DiffHandler handler) { for (int i = 0; i < this.startCount; i++) { handler.handle(Operator.MATCH, this.sequence1.get(i)); } } /** * Formats the end subsequence that has been buffered by this class. * *

This method will clear the buffer, but will do nothing if the end buffer is * null. * * @param handler The handler for the output. * * @throws NullPointerException If the specified formatter is null. */ public void handleEnd(DiffHandler handler) { int from = this.sequence1.size() - this.endCount; int to = this.sequence1.size(); for (int i = from; i < to; i++) { handler.handle(Operator.MATCH, this.sequence1.get(i)); } } /** * @return The number of common tokens at the start of the sequence. */ public int getStartCount() { return this.startCount; } /** * @return The number of common tokens at the end of the sequence. */ public int getEndCount() { return this.endCount; } /** * @return The common sublist at the start of the sequence. */ public List getStart() { if (this.startCount <= 0) return Collections.emptyList(); return this.sequence1.subList(0, this.startCount); } /** * @return The common sublist at the end of the sequence. */ public List getEnd() { if (this.endCount <= 0) return Collections.emptyList(); int size = this.sequence1.size(); return this.sequence1.subList(size - this.endCount, size); } public List getSubSequence1() { if (this.startCount <= 0 && this.endCount <= 0) return this.sequence1; return this.sequence1.subList(this.startCount, this.sequence1.size() - this.endCount); } public List getSubSequence2() { if (this.startCount <= 0 && this.endCount <= 0) return this.sequence2; return this.sequence2.subList(this.startCount, this.sequence2.size() - this.endCount); } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy