All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.archivepatcher.generator.bsdiff.BsDiffMatcher Maven / Gradle / Ivy

The newest version!
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.archivepatcher.generator.bsdiff;

import java.io.IOException;

/**
 * Implementation of matcher used by BsDiff. Exact matches between newData[a ... a + len - 1]
 * and oldData[b ... b + len - 1] are valid if:
 * 
    *
  • |len| > mMinimumMatchLength
  • *
  • The number of matches between newData[a ... a + len - 1] and * oldData[previous_b ... previous_b + len - 1] < |len| - mMinimumMatchLength where * |previous_b| is the |b| value of the previous match if there was one and zero otherwise.
  • *
*/ class BsDiffMatcher implements Matcher { private final RandomAccessObject mOldData; private final RandomAccessObject mNewData; /** * Contains order of the sorted suffixes of |oldData|. The element at mGroupArray[i] contains the * position of oldData[i ... oldData.length - 1] in the sorted list of suffixes of |oldData|. */ private final RandomAccessObject mGroupArray; /** * The index in |oldData| of the first byte of the match. Zero if no matches have been found yet. */ private int mOldPos; /** * The index in |newData| of the first byte of the match. Zero if no matches have been found yet. * The next match will be searched starting at |mNewPos| + |mMatchLen|. */ private int mNewPos; /** * Minimum match length in bytes. */ private final int mMinimumMatchLength; /** * A limit on how many total match lengths encountered, to exit the match extension loop in next() * and prevent O(n^2) behavior. */ private final long mTotalMatchLenBudget = 1L << 26; // ~64 million. /** * The number of bytes, |n|, which match between newData[mNewPos ... mNewPos + n] and * oldData[mOldPos ... mOldPos + n]. */ private int mMatchLen; /** * Create a standard BsDiffMatcher. * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the * oldData and newData. This can have a significant effect on both the generated patch size and * the amount of time and memory required to apply the patch. */ BsDiffMatcher( RandomAccessObject oldData, RandomAccessObject newData, RandomAccessObject groupArray, int minimumMatchLength) { mOldData = oldData; mNewData = newData; mGroupArray = groupArray; mOldPos = 0; mMinimumMatchLength = minimumMatchLength; } @Override public Matcher.NextMatch next() throws IOException, InterruptedException { RandomAccessObject oldData = mOldData; RandomAccessObject newData = mNewData; // The offset between between the indices in |oldData| and |newData| // of the previous match. int previousOldOffset = mOldPos - mNewPos; // Look for a new match starting from the end of the previous match. mNewPos += mMatchLen; // The number of matching bytes in the forward extension of the previous match: // oldData[mNewPos + previousOldOffset ... mNewPos + previousOldOffset + mMatchLen - 1] // and newData[mNewPos ... mNewPos + mMatchLen - 1]. int numMatches = 0; // The size of the range for which |numMatches| has been computed. int matchesCacheSize = 0; // Sum over all match lengths encountered, to exit loop if we take too long to compute. long totalMatchLen = 0; while (mNewPos < newData.length()) { if (Thread.interrupted()) { throw new InterruptedException(); } BsDiff.Match match = BsDiff.searchForMatch(mGroupArray, oldData, newData, mNewPos, 0, (int) oldData.length()); mOldPos = match.start; mMatchLen = match.length; totalMatchLen += mMatchLen; // Update |numMatches| for the new value of |matchLen|. for (; matchesCacheSize < mMatchLen; ++matchesCacheSize) { int oldIndex = mNewPos + previousOldOffset + matchesCacheSize; int newIndex = mNewPos + matchesCacheSize; if (oldIndex < oldData.length()) { oldData.seek(oldIndex); newData.seek(newIndex); if (oldData.readByte() == newData.readByte()) { ++numMatches; } } } // Also return if we've been trying to extend a large match for a long time. if (mMatchLen > numMatches + mMinimumMatchLength || totalMatchLen >= mTotalMatchLenBudget) { return Matcher.NextMatch.of(true, mOldPos, mNewPos); } if (mMatchLen == 0) { ++mNewPos; } else if (mMatchLen == numMatches) { // This seems to be an optimization because it is unlikely to find a valid match in the // range mNewPos = [ mNewPos ... mNewPos + mMatchLen - 1 ] especially for large values of // |mMatchLen|. mNewPos += numMatches; numMatches = 0; matchesCacheSize = 0; } else { // Update |numMatches| for the value of |mNewPos| in the next iteration of the loop. In the // next iteration of the loop, the new value of |numMatches| will be at least // |numMatches - 1| because // oldData[mNewPos + previousOldOffset + 1 ... mNewPos + previousOldOffset + mMatchLen - 1] // matches newData[mNewPos + 1 ... mNewPos + mMatchLen - 1]. if (mNewPos + previousOldOffset < oldData.length()) { oldData.seek(mNewPos + previousOldOffset); newData.seek(mNewPos); if (oldData.readByte() == newData.readByte()) { --numMatches; } } ++mNewPos; --matchesCacheSize; } } return Matcher.NextMatch.of(false, 0, 0); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy