All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.archivepatcher.generator.bsdiff.BsDiffPatchWriter Maven / Gradle / Ivy

The newest version!
// Copyright 2016 Google Inc. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.archivepatcher.generator.bsdiff;

import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.RandomAccessFile;
import java.nio.charset.StandardCharsets;

// TODO(andrewhayden) clean up the various generatePatch(...) methods, there are too many.

/**
 * A helper class that handles the main BsDiff I/O and patch generation, by calling into the main
 * algorithm implementation in {@link BsDiff}
 */
public class BsDiffPatchWriter {

  static final int DEFAULT_MINIMUM_MATCH_LENGTH = 16;

  /**
   * Write a patch entry.
   *
   * @param newData
   * @param oldData
   * @param newPosition the first byte in |newData| to write either raw or as a diff against
   *     |oldData|.
   * @param oldPosition the first byte in |oldData| to diff against |newData|. Ignored if
   *     |diffLength| is empty.
   * @param diffLength the number of bytes to diff between |newData| and |oldData| starting at
   *     |newPosition| and |oldPosition| respectively.
   * @param extraLength the number of bytes from |newData| to write starting at |newPosition +
   *     diffLength|.
   * @param oldPositionOffsetForNextEntry the offset between |oldPosition| for the next entry and
   *     |oldPosition| + |diffLength| for this entry.
   * @param outputStream the output stream to write the patch entry to.
   * @throws IOException if unable to read or write data
   */
  private static void writeEntry(
      RandomAccessObject newData,
      RandomAccessObject oldData,
      int newPosition,
      int oldPosition,
      int diffLength,
      int extraLength,
      int oldPositionOffsetForNextEntry,
      OutputStream outputStream)
      throws IOException {
    // Write control data
    BsUtil.writeFormattedLong(diffLength, outputStream);
    BsUtil.writeFormattedLong(extraLength, outputStream);
    BsUtil.writeFormattedLong(oldPositionOffsetForNextEntry, outputStream);

    newData.seek(newPosition);
    oldData.seek(oldPosition);
    // Write diff data
    for (int i = 0; i < diffLength; ++i) {
      // TODO(hartmanng): test using a small buffer to insulate read() calls (and write() for that
      // matter).
      outputStream.write(newData.readUnsignedByte() - oldData.readUnsignedByte());
    }

    if (extraLength > 0) {
      // This seek will throw an IOException sometimes, if we try to seek to the byte after
      // the end of the RandomAccessObject.
      newData.seek(newPosition + diffLength);
      // Write extra data
      for (int i = 0; i < extraLength; ++i) {
        // TODO(hartmanng): same as above - test buffering readByte().
        outputStream.write(newData.readByte());
      }
    }
  }

  /**
   * Generate a BsDiff patch given a Matcher.
   *
   * @param oldData the old blob
   * @param newData the new blob
   * @param matcher a Matcher to find binary matches between oldData and newData
   * @param outputStream the outputStream for the new generated patch
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  // Visible for testing only
  static void generatePatchWithMatcher(
      RandomAccessObject oldData,
      RandomAccessObject newData,
      Matcher matcher,
      OutputStream outputStream)
      throws IOException, InterruptedException {
    // Compute the differences, writing ctrl as we go
    int lastNewPosition = 0;
    int lastOldPosition = 0;

    int newPosition = 0;
    int oldPosition = 0;
    while (newPosition < newData.length()) {
      if (Thread.interrupted()) {
        throw new InterruptedException();
      }
      Matcher.NextMatch nextMatch = matcher.next();
      if (nextMatch.didFindMatch) {
        newPosition = nextMatch.newPosition;
        oldPosition = nextMatch.oldPosition;
      } else {
        newPosition = (int) newData.length();
      }

      // Extend the current match (|newPosition|, |oldPosition|) backward such that 50% of the bytes
      // match. We have written diff / extra data up till |lastNewPosition| so we cannot extend
      // further back than |lastNewPosition|.
      int backwardExtension = 0;
      if (newPosition < newData.length()) {
        int score = 0;
        int bestScore = 0;
        for (int i = 1; newPosition - i >= lastNewPosition && oldPosition >= i; ++i) {
          oldData.seek(oldPosition - i);
          newData.seek(newPosition - i);
          if (oldData.readByte() == newData.readByte()) {
            ++score;
          } else {
            --score;
          }

          if (score > bestScore) {
            bestScore = score;
            backwardExtension = i;
          }
        }
      }

      // Extend the previous match (|lastNewPosition|, |lastOldPosition|) forward such that 50% of
      // the bytes match. (|lastNewPosition|, |lastOldPosition|) were extended backward in the
      // previous iteration of the loop.
      int forwardExtension = 0;
      {
        int score = 0;
        int bestScore = 0;
        oldData.seek(lastOldPosition);
        newData.seek(lastNewPosition);
        for (int i = 0;
            lastNewPosition + i < newPosition && lastOldPosition + i < oldData.length();
            ++i) {
          if (oldData.readByte() == newData.readByte()) {
            ++score;
          } else {
            --score;
          }
          if (score > bestScore) {
            bestScore = score;
            forwardExtension = i + 1;
          }
        }
      }

      // Adjust |backwardExtension| and |forwardExtension| such that the extended matches do
      // not intersect in |newData|. They can intersect in |oldData|.
      int overlap = (lastNewPosition + forwardExtension) - (newPosition - backwardExtension);
      if (overlap > 0) {
        int score = 0;
        int bestScore = 0;
        int backwardExtensionDecrement = 0;
        for (int i = 0; i < overlap; ++i) {
          newData.seek(lastNewPosition + forwardExtension - overlap + i);
          oldData.seek(lastOldPosition + forwardExtension - overlap + i);
          if (newData.readByte() == oldData.readByte()) {
            ++score;
          }

          newData.seek(newPosition - backwardExtension + i);
          oldData.seek(oldPosition - backwardExtension + i);
          if (newData.readByte() == oldData.readByte()) {
            --score;
          }
          if (score > bestScore) {
            bestScore = score;
            backwardExtensionDecrement = i + 1;
          }
        }
        forwardExtension -= overlap - backwardExtensionDecrement;
        backwardExtension -= backwardExtensionDecrement;
      }

      // Write an entry with:
      // - The diff between |newData| and |oldData| for the previous extended match:
      //   oldData[lastOldPosition ... lastOldPosition + forwardExtension - 1] and
      //   newData[lastNewPosition ... lastNewPosition + forwardExtension - 1].
      // - The bytes in |newData| between |lastNewPosition| and |newPosition| which are part of
      //   neither the previous extended match or the new extended match:
      //   newData[lastNewPosition + forwardExtension ... newPosition - backwardExtension - 1]

      int oldPositionOffset = 0;
      if (newPosition < newData.length()) {
        // The offset from the byte after the last byte of the previous match in |newData| to the
        // first byte of the new match in |oldData|.
        oldPositionOffset =
            (oldPosition - backwardExtension) - (lastOldPosition + forwardExtension);
      }

      // The number of bytes in |newData| between |lastNewPosition| and |newPosition| which are part
      // of neither the previous extended match or the new extended match.
      int newNoMatchLength =
          (newPosition - backwardExtension) - (lastNewPosition + forwardExtension);

      writeEntry(
          newData,
          oldData,
          lastNewPosition,
          lastOldPosition,
          forwardExtension,
          newNoMatchLength,
          oldPositionOffset,
          outputStream);

      lastNewPosition = newPosition - backwardExtension;
      lastOldPosition = oldPosition - backwardExtension;
    }
  }

  /**
   * Generate a diff between the old data and the new, writing to the specified stream. Uses {@link
   * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length.
   *
   * @param oldData the old data
   * @param newData the new data
   * @param outputStream where output should be written
   * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  public static void generatePatch(
      final RandomAccessObject oldData,
      final RandomAccessObject newData,
      final OutputStream outputStream,
      final RandomAccessObjectFactory randomAccessObjectFactory)
      throws IOException, InterruptedException {
    generatePatch(
        oldData, newData, outputStream, randomAccessObjectFactory, DEFAULT_MINIMUM_MATCH_LENGTH);
  }

  /**
   * Generate a diff between the old data and the new, writing to the specified stream. Uses
   * in-memory byte array storage for ancillary allocations and {@link
   * #DEFAULT_MINIMUM_MATCH_LENGTH} as the match length.
   *
   * @param oldData the old data
   * @param newData the new data
   * @param outputStream where output should be written
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  public static void generatePatch(
      final byte[] oldData, final byte[] newData, final OutputStream outputStream)
      throws IOException, InterruptedException {
    generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH);
  }

  /**
   * Generate a diff between the old data and the new, writing to the specified stream. Uses
   * in-memory byte array storage for ancillary allocations.
   *
   * @param oldData the old data
   * @param newData the new data
   * @param outputStream where output should be written
   * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the
   *     oldData and newData. This can have a significant effect on both the generated patch size
   *     and the amount of time and memory required to apply the patch.
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  public static void generatePatch(
      final byte[] oldData,
      final byte[] newData,
      final OutputStream outputStream,
      final int minimumMatchLength)
      throws IOException, InterruptedException {
    try (RandomAccessObject oldDataRAO =
            new RandomAccessObject.RandomAccessByteArrayObject(oldData);
        RandomAccessObject newDataRAO =
            new RandomAccessObject.RandomAccessByteArrayObject(newData); ) {
      generatePatch(
          oldDataRAO,
          newDataRAO,
          outputStream,
          new RandomAccessObjectFactory.RandomAccessByteArrayObjectFactory(),
          minimumMatchLength);
    }
  }

  /**
   * Generate a diff between the old data and the new, writing to the specified stream. Uses
   * file-based storage for ancillary operations and {@link #DEFAULT_MINIMUM_MATCH_LENGTH} as the
   * match length.
   *
   * @param oldData a file containing the old data
   * @param newData a file containing the new data
   * @param outputStream where output should be written
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  public static void generatePatch(
      final File oldData, final File newData, final OutputStream outputStream)
      throws IOException, InterruptedException {
    generatePatch(oldData, newData, outputStream, DEFAULT_MINIMUM_MATCH_LENGTH);
  }

  /**
   * Generate a diff between the old data and the new, writing to the specified stream. Uses
   * file-based storage for ancillary allocations.
   *
   * @param oldData a file containing the old data
   * @param newData a file containing the new data
   * @param outputStream where output should be written
   * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the
   *     oldData and newData. This can have a significant effect on both the generated patch size
   *     and the amount of time and memory required to apply the patch.
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  public static void generatePatch(
      final File oldData,
      final File newData,
      final OutputStream outputStream,
      final int minimumMatchLength)
      throws IOException, InterruptedException {
    try (RandomAccessFile oldDataRAF = new RandomAccessFile(oldData, "r");
        RandomAccessFile newDataRAF = new RandomAccessFile(newData, "r");
        RandomAccessObject oldDataRAO =
            new RandomAccessObject.RandomAccessMmapObject(oldDataRAF, "r");
        RandomAccessObject newDataRAO =
            new RandomAccessObject.RandomAccessMmapObject(newDataRAF, "r"); ) {
      generatePatch(
          oldDataRAO,
          newDataRAO,
          outputStream,
          new RandomAccessObjectFactory.RandomAccessMmapObjectFactory("rw"),
          minimumMatchLength);
    }

    // Due to a bug in the JVM (http://bugs.java.com/view_bug.do?bug_id=6417205), we need to call
    // gc() and runFinalization() explicitly to get rid of any MappedByteBuffers we may have used
    // during patch generation.
    System.gc();
    System.runFinalization();
  }

  /**
   * Generate a diff between the old data and the new, writing to the specified stream.
   *
   * @param oldData the old data
   * @param newData the new data
   * @param outputStream where output should be written
   * @param randomAccessObjectFactory factory to create auxiliary storage during BsDiff
   * @param minimumMatchLength the minimum "match" (in bytes) for BsDiff to consider between the
   *     oldData and newData. This can have a significant effect on both the generated patch size
   *     and the amount of time and memory required to apply the patch.
   * @throws IOException if unable to read or write data
   * @throws InterruptedException if any thread interrupts this thread
   */
  public static void generatePatch(
      final RandomAccessObject oldData,
      final RandomAccessObject newData,
      final OutputStream outputStream,
      final RandomAccessObjectFactory randomAccessObjectFactory,
      final int minimumMatchLength)
      throws IOException, InterruptedException {
    // Write header (signature + new file length)
    outputStream.write("ENDSLEY/BSDIFF43".getBytes(StandardCharsets.US_ASCII));
    BsUtil.writeFormattedLong(newData.length(), outputStream);

    // Do the suffix search.
    try (final RandomAccessObject groupArray =
        new DivSuffixSorter(randomAccessObjectFactory).suffixSort(oldData)) {
      BsDiffMatcher matcher = new BsDiffMatcher(oldData, newData, groupArray, minimumMatchLength);
      generatePatchWithMatcher(oldData, newData, matcher, outputStream);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy