All Downloads are FREE. Search and download functionalities are using the official Maven repository.

japgolly.microlibs.testutil.LineDiff.scala Maven / Gradle / Ivy

There is a newer version: 4.2.1
Show newest version
package japgolly.microlibs.testutil

import japgolly.microlibs.testutil.TestUtilInternals._
import scala.collection.immutable.HashMap
import scala.Console._

/* Copyright (c) 2011, Owen Stephens
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions are met:
 *     * Redistributions of source code must retain the above copyright
 *       notice, this list of conditions and the following disclaimer.
 *     * Redistributions in binary form must reproduce the above copyright
 *       notice, this list of conditions and the following disclaimer in the
 *       documentation and/or other materials provided with the distribution.
 *     * Neither the name of Owen Stephens nor the
 *       names of its contributors may be used to endorse or promote products
 *       derived from this software without specific prior written permission.
 *
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED. IN NO EVENT SHALL Owen Stephens BE LIABLE FOR ANY
 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 */
// https://raw.githubusercontent.com/owst/Scala-Patience-Diff
object LineDiff {

  def apply(str1: String, str2: String): Result =
    apply(
      str1.linesIterator.toSeq,
      str2.linesIterator.toSeq)

  def apply(lines1: Seq[String], lines2: Seq[String]): Result =
    new Result(Diff.diff(lines1, lines2).toVector)

  final class Result(results: Vector[DiffResult]) {

    def diffCustom(equalPrefix: String,
                   delPrefix  : String,
                   addPrefix  : String,
                   reset      : String,
                   modLine    : String => String = identity): String = {
      val e = equalPrefix
      val d = delPrefix
      val a = addPrefix
      results.iterator.flatMap {
        case r: Equal  => r.lines.iterator.map(e + modLine(_) + reset)
        case r: Insert => r.lines.iterator.map(a + modLine(_) + reset)
        case r: Delete => r.lines.iterator.map(d + modLine(_) + reset)
        case r: Modify => r.oldLines.iterator.map(d + modLine(_) + reset) ++ r.newLines.iterator.map(a + modLine(_) + reset)
      }.mkString("\n")
    }

    lazy val diffColoured: String =
      diffCustom (
        equalPrefix = BRIGHT_BLACK + " ",
        delPrefix   = BOLD_BRIGHT_GREEN + "-",
        addPrefix   = BOLD_BRIGHT_RED + "+",
        reset       = RESET,
      )

    lazy val expectActualColoured: String =
      diffCustom (
        equalPrefix = BRIGHT_BLACK      + "  | ",
        delPrefix   = BOLD_BRIGHT_GREEN + "-e| ",
        addPrefix   = BOLD_BRIGHT_RED   + "+a| ",
        reset       = RESET,
        modLine     = highlightTrailingWhitespace,
      )
  }

  def highlightTrailingWhitespace(s: String): String =
    s.reverse.takeWhile(_.isWhitespace).length match {
      case 0 => s
      case n => s.dropRight(n) + RED_B + s.takeRight(n)
    }

  // ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████

  sealed abstract class DiffResult {
    val file1Index: Int
    val file2Index: Int
    val lengths: (Int, Int)
  }

  final case class Insert(file1Index: Int, file2Index: Int, lengths: (Int, Int), lines: Seq[String])
    extends DiffResult

  final case class Modify(file1Index: Int, file2Index: Int, lengths: (Int, Int), oldLines: Seq[String], newLines: Seq[String])
    extends DiffResult

  final case class Delete(file1Index: Int, file2Index: Int, lengths: (Int, Int), lines: Seq[String])
    extends DiffResult

  final case class Equal(file1Index: Int, file2Index: Int, lengths: (Int, Int), lines: Seq[String])
    extends DiffResult

  // ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████

  private object Diff {
    type IndexPair = (Int, Int)

    // Find the LCS of common, unique lines.
    def uniqueLCS(lines1: Seq[String], lines2: Seq[String]): Iterable[IndexPair] = {
      type LineToIndex = HashMap[String, Int]
      type LineAndIndex = (String, Int)

      // Insert into, or mark as duplicate, the line in the map.
      def updateLineMap(map: LineToIndex, lineAndIndex: LineAndIndex) = {
        val (line, index) = lineAndIndex
        map + ((line, if (map.contains(line)) -1 else index))
      }

      val lines1Indices =
        lines1.view.zipWithIndex.foldLeft(HashMap.empty: LineToIndex)(updateLineMap)

      // Remove any duplicated entries (marked by value of -1)
      val uniques1 = lines1Indices.filter(kv => kv._2 >= 0)

      // Represents the current state of the mapping fold.
      // Tuple is (file1Uniques, line#In2ToLine#In1, file2Uniques)
      type MappingState =
        (LineToIndex, HashMap[Int, Int], LineToIndex)

      def updateUniqueMaps(state: MappingState, lineAndIndex: LineAndIndex) = {
        val (uniques1, line2ToLine1, uniqueIndices2) = state
        val (line, index) = lineAndIndex

        // Only pay attention to common lines.
        if (uniques1.contains(line)) {
          val newTuple = if (uniqueIndices2.contains(line)) {
            (uniques1 - line, // Ensure we don't match this line again
              line2ToLine1 - uniqueIndices2(line), // Not unique, so unset.
              uniqueIndices2)
          } else {
            (uniques1,
              line2ToLine1 + ((index, uniques1(line))),
              uniqueIndices2 + ((line, index)))
          }

          newTuple
        } else {
          state
        }
      }

      // Find indices of all unique line2s and create mapping between files.
      val lineMaps = (uniques1, HashMap.empty[Int, Int], new LineToIndex())
      val (_, line2ToLine1, _) =
        lines2.view.zipWithIndex.foldLeft(lineMaps)(updateUniqueMaps)

      // Order the pairs by the line order in file2.
      val indices1OrderedBy2 = line2ToLine1.toList.sortBy(p => p._1)

      // Create an Ordered[IndexPair], so that pairs are ordered small-big by
      // their 2nd element (line # in file1).
      implicit def IndexPairOrdered(thisVal: IndexPair): Ordered[IndexPair] =
        new Ordered[IndexPair] {
          def compare(thatVal: IndexPair) = thisVal._2 compare thatVal._2
        }

      // Obtain the LCS of the line pairs by finding the LIS
      // of the pairs.
      val lcs = PatienceSort.LIS(indices1OrderedBy2)(IndexPairOrdered)

      // Swap the returned tuples' order, so we return pairs: (line1#, line2#)
      lcs.map(t => (t._2, t._1))
    }

    def recursiveMatch(lines1: Seq[String], lines2: Seq[String],
                       bounds1: (Int, Int), bounds2: (Int, Int)): List[IndexPair] = {
      // Catch base-case bounds.
      if (bounds1._1 == bounds1._2 || bounds2._1 == bounds2._2)
        return Nil

      // Obtain a list of line pairs that form the LCS
      val equalLineIndices = uniqueLCS(
        lines1.slice(bounds1._1, bounds1._2),
        lines2.slice(bounds2._1, bounds2._2))

      def processIndexPair(lastPosAndMatches: ((Int, Int), List[IndexPair]), pair: IndexPair) = {
        val offsetPos1 = pair._1 + bounds1._1
        val offsetPos2 = pair._2 + bounds2._1
        val (lastPos1, lastPos2) = lastPosAndMatches._1

        // We want to recurse between the last matched line pair and the
        // next, but only when there are lines in between.
        val isGap = lastPos1 + 1 < offsetPos1 && lastPos2 + 1 < offsetPos2
        val localResults = if (isGap) {
          recursiveMatch(lines1, lines2, (lastPos1 + 1, offsetPos1),
            (lastPos2 + 1, offsetPos2))
        } else
          Nil

        ((offsetPos1, offsetPos2), lastPosAndMatches._2 ++ localResults :+ ((offsetPos1, offsetPos2)))
      }

      // Fold up the list of matched line equalLineIndices, recursing between
      // groups of matching lines.
      val initialTuple = ((bounds1._1 - 1, bounds2._1), List[IndexPair]())
      val (lastPos, returnList) = equalLineIndices.foldLeft(initialTuple)(processIndexPair)

      val extraList = if (returnList.nonEmpty) {
        // If we matched at all, look for matches between the last match
        // and the end.
        recursiveMatch(lines1, lines2, (lastPos._1 + 1, bounds1._2),
          (lastPos._2 + 1, bounds2._2))
      } else if (lines1(bounds1._1) == lines2(bounds2._1)) {
        // Find matches at the "start". Catches non-unique, yet equal lines.
        // Collect matches until we pass the bounds or lines don't match.
        def findStartMatches(pos1: Int, pos2: Int,
                             acc: List[IndexPair]): (Int, Int, List[IndexPair]) =
          if (pos1 >= bounds1._2 || pos2 >= bounds2._2 || lines1(pos1) != lines2(pos2))
            (pos1, pos2, acc)
          else
            findStartMatches(pos1 + 1, pos2 + 1, acc :+ ((pos1, pos2)))

        val (pos1, pos2, startList) =
          findStartMatches(bounds1._1, bounds2._1, Nil)

        // Recurse between the last match at the start and the end.
        startList ++ recursiveMatch(lines1, lines2, (pos1, bounds1._2), (pos2, bounds2._2))
      } else if (lines1(bounds1._2 - 1) == lines2(bounds2._2 - 1)) {
        // Find matches at the end of the lines. Catches non-unique, yet
        // equal lines.
        def findEndMatches(pos1: Int, pos2: Int, acc: List[IndexPair]): (Int, Int, List[IndexPair]) =
          if (pos1 <= bounds1._1 || pos2 <= bounds2._1 || lines1(pos1 - 1) != lines2(pos2 - 1))
            (pos1, pos2, acc)
          else
            findEndMatches(pos1 - 1, pos2 - 1, acc :+ ((pos1, pos2)))

        val (pos1, pos2, endList) = findEndMatches(bounds1._2 - 1, bounds2._2 - 1, Nil)
        // Find any matches between end matches and last position.
        val endGapList = recursiveMatch(lines1, lines2, (lastPos._1 + 1, pos1), (lastPos._2, pos2))

        // Add any matches between end matched and last match first
        // to retain correct ordering.
        endGapList ++ endList
      } else
        Nil

      returnList ++ extraList
    }

    // Turn increasing sequences of matched lines into a single MatchResult
    def coalesceResults(results: Seq[IndexPair]): List[MatchResult] = {
      def processMatchResult(acc: ((Int, Int, Int), List[MatchResult]), res: IndexPair) = {
        val (index1, index2) = res
        val (offset1, offset2, length) = acc._1
        val list = acc._2
        // Don't match at the start.
        val notFirst = offset1 != -1

        if (notFirst && index1 == offset1 + length && index2 == offset2 + length)
          ((offset1, offset2, length + 1), list)
        else {
          val nextList = if (notFirst)
            MatchResult(offset1, offset2, length) :: list
          else
            list

          ((index1, index2, 1), nextList)
        }
      }

      // Fold up the list of matchingLines to join adjacent matches.
      val ((offset1, offset2, length), list) =
        results.foldLeft(((-1, -1, 0), List.empty[MatchResult]))(processMatchResult)

      // Create a match for anything at the end.
      val finalList = if (length > 0)
        MatchResult(offset1, offset2, length) :: list
      else
        list

      finalList.reverse
    }

    def diff(lines1: Seq[String], lines2: Seq[String]): Iterable[DiffResult] = {
      val matchLines = recursiveMatch(lines1, lines2, (0, lines1.length), (0, lines2.length))
      val matchBlocks = coalesceResults(matchLines) :+ MatchResult(lines1.length, lines2.length, 0)

      // Calculate the actual differences, using the equal line indices.
      def processMatchBlock(acc: (IndexPair, List[DiffResult]), block: MatchResult) = {
        val index1 = block.file1Index
        val index2 = block.file2Index
        val blockLen = block.length
        val (pos1, pos2) = acc._1
        val list = acc._2

        // Update the change list, by calculating which sort of change
        // has happened, based on line positions.
        val modificationList =
          if (pos1 < index1 && pos2 < index2)
            Modify(pos1, pos2, (index1 - pos1, index2 - pos2), lines1.slice(pos1, index1), lines2.slice(pos2, index2)) :: list
          else if (pos1 < index1)
            Delete(pos1, pos2, (index1 - pos1, 0), lines1.slice(pos1, index1)) :: list
          else if (pos2 < index2)
            Insert(pos1, pos2, (0, index2 - pos2), lines2.slice(pos2, index2)) :: list
          else
            list

        val newPos@(newPos1, newPos2) =
          (index1 + blockLen, index2 + blockLen)

        val returnList = if (blockLen > 0)
          Equal(newPos1, newPos2, (blockLen, blockLen), lines1.slice(newPos1 - blockLen, newPos1)) :: modificationList
        else
          modificationList

        (newPos, returnList)
      }

      // Fold up a current line tuple with a list of
      // line changes that describe going from file1 to file2.
      // Reverse, since we cons'd to create the list
      val initialTuple = ((0, 0), List[DiffResult]())
      matchBlocks.foldLeft(initialTuple)(processMatchBlock)._2.reverse
    }
  }

  /*
   * Used to represent a match result between two files.
   * file1[file1Index:file1Index + length] ==
   * file2[file2Index:file2Index + length]
   */
  final case class MatchResult(file1Index: Int, file2Index: Int, length: Int)

  // ███████████████████████████████████████████████████████████████████████████████████████████████████████████████████

  private object PatienceSort {
    // Find the longest increasing subsequence
    def LIS[A](source: Iterable[A])(implicit ev: A => Ordered[A]): Iterable[A] = {
      // Take a list of pile tops, a hashmap of backpointers
      // and an element. Add the element to the piles and backpointers.
      def createBackPointers(pileAndBackPointers: (List[A], HashMap[A, Option[A]]), elem: A) = {
        val (pileTops, backPointers) = pileAndBackPointers

        // Find the index at which the elem would be added
        // into pileTops.
        val index = bisect(pileTops, elem)

        // Add the element at that index
        val newPileTops = if (index == pileTops.length)
          pileTops :+ elem
        else
          pileTops.updated(index, elem)

        // If this isn't the first element, its backpointer
        // is the elem at the prior index.
        val newBPTuple = (elem, if (index > 0) Some(pileTops(index - 1)) else None)
        val newBackPointers = backPointers + newBPTuple
        (newPileTops, newBackPointers)
      }

      // Fold over the input list, creating backpointers and pile tops.
      val (pileTops, backPointers) =
        source.foldLeft((List.empty[A], HashMap.empty[A, Option[A]]))(createBackPointers)

      // Accumulate an increasing list of values, by following the chain of
      // backpointers, starting at the last value.
      def followPointers(current: Option[A], acc: List[A]): List[A] =
        current match {
          case Some(v) => followPointers(backPointers(v), v +: acc)
          case None    => acc
        }

      followPointers(pileTops.lastOption, Nil)
    }

    // Calculate the insertion position of elem in elems,
    // using a binary search.
    def bisect[A](elems: Seq[A], elem: A, lo: Int = 0, hi: Option[Int] = None)(implicit ev: A => Ordered[A]): Int = {
      if (lo < 0)
        throw new IllegalArgumentException("Lower threshold out of range")

      val high = hi match {
        case Some(v) => v
        case None    => elems.length
      }

      if (lo >= high)
        lo
      else {
        val mid = (lo + high) >> 1

        val (newLow, newHigh) = if (ev(elems(mid)) < elem)
          (mid + 1, high)
        else
          (lo, mid)

        bisect(elems, elem, newLow, Some(newHigh))
      }
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy