All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sageserpent.kineticmerge.core.MappedContentSources.scala Maven / Gradle / Ivy

package com.sageserpent.kineticmerge.core

import com.sageserpent.kineticmerge.core.MappedContentSourcesOfTokens.{TextPosition, linebreakExtraction}
import com.typesafe.scalalogging.StrictLogging
import pprint.Tree

import java.util.Arrays as JavaArrays
import scala.collection.immutable.SortedSet
import scala.collection.mutable.Map as MutableMap

trait MappedContentSources[Path, Element]
    extends Sources[Path, Element]
    with StrictLogging:
  val contentsByPath: Map[Path, IndexedSeq[Element]]
  val label: String

  override def filesByPathUtilising(
      mandatorySections: Set[Section[Element]],
      candidateGapChunksByPath: Map[Path, Set[IndexedSeq[Element]]]
  ): Map[Path, File[Element]] =
    val sectionsByPath = mandatorySections.groupBy(pathFor)

    contentsByPath.map { case (path, content) =>
      val pertinentSections = sectionsByPath.getOrElse(path, Set.empty)

      val candidateGapChunksInDescendingSizeOrder = SortedSet.from(
        candidateGapChunksByPath.getOrElse(path, Set.empty)
      )(Ordering.by((_: IndexedSeq[Element]).size).reverse)

      path -> File(
        if content.nonEmpty then
          if pertinentSections.nonEmpty then
            val sectionsInStartOffsetOrder =
              // Sort by the lowest start offset and then use the largest one
              // past end offset as a tiebreaker.
              pertinentSections.toSeq.sortBy(section =>
                section.startOffset -> -section.onePastEndOffset
              )

            sectionsInStartOffsetOrder
              .zip(sectionsInStartOffsetOrder.tail)
              .foreach((first, second) =>
                if first.onePastEndOffset > second.startOffset then
                  require(
                    first.onePastEndOffset < second.onePastEndOffset,
                    s"Subsumed section ${pprintCustomised(second)} is subsumed by section: ${pprintCustomised(first)}."
                  )
                  throw new OverlappingSections(path, first, second)
              )

            def gapFilling(gapStart: Int, onePastGapEnd: Int): IndexedSeq[Section[Element]] =
              val gapSize = onePastGapEnd - gapStart

              val gapContent = contentsByPath(path)
                .slice(gapStart, onePastGapEnd)

              val biggestChunkContainedInGapContent =
                candidateGapChunksInDescendingSizeOrder
                  .dropWhile(chunk =>
                    chunk.size > gapSize || !gapContent
                      .containsSlice(chunk)
                  )
                  .headOption

              biggestChunkContainedInGapContent.fold(
                ifEmpty =
                  val fillerSection = this.section(path)(
                    gapStart,
                    onePastGapEnd - gapStart
                  )
                  
                  IndexedSeq(fillerSection)
              ) { chunk =>
                val chunkStartInGapContent =
                  gapContent.indexOfSlice(chunk)

                assume(-1 != chunkStartInGapContent)

                val onePastChunkEnd =
                  chunkStartInGapContent + chunk.size

                val prefixSection =
                  Option.when(0 < chunkStartInGapContent)(
                    this.section(path)(
                      gapStart,
                      chunkStartInGapContent
                    )
                  )

                val chunkSection = this.section(path)(
                  gapStart + chunkStartInGapContent,
                  chunk.size
                )

                val suffixSection =
                  Option.when(gapSize > onePastChunkEnd)(
                    this.section(path)(
                      gapStart + onePastChunkEnd,
                      gapSize - onePastChunkEnd
                    )
                  )

                val fillerSections = (prefixSection ++ Iterator(
                  chunkSection
                ) ++ suffixSection).toIndexedSeq
                
                fillerSections
              }
            end gapFilling


            val (onePastLastEndOffset, contiguousSections) =
              sectionsInStartOffsetOrder.foldLeft(
                0 -> Vector.empty[Section[Element]]
              ) { case ((onePastLastEndOffset, partialResult), section) =>
                section.onePastEndOffset ->
                  ((if onePastLastEndOffset < section.startOffset then
                      // Fill the gap - this may be a leading gap before the
                      // first section or between two sections.

                      val fillerSections = gapFilling(gapStart = onePastLastEndOffset, onePastGapEnd = section.startOffset)
  
                      logger.debug(
                        s"Filling gap on side: $label at path: $path prior to following section with: ${pprintCustomised(fillerSections)}."
                      )
                      
                      partialResult ++ fillerSections
                    else partialResult)
                  :+ section)
              }

            if content.size > onePastLastEndOffset then
              // Fill out the final gap with new sections to cover the entire
              // content.
              val fillerSections = gapFilling(gapStart = onePastLastEndOffset, onePastGapEnd = content.size)

              logger.debug(
                s"Filling final gap on side: $label at path: $path with ${pprintCustomised(fillerSections)}."
              )
              contiguousSections ++ fillerSections
            else contiguousSections
            end if
          else
            Vector(
              section(path = path)(
                startOffset = 0,
                size = content.length
              )
            )
        else
          // Section content is *never* empty: if there is no content at this
          // path, how could there be mandatory sections using it?
          require(pertinentSections.isEmpty)
          Vector.empty
      )
    }
  end filesByPathUtilising

  override def pathFor(section: Section[Element]): Path =
    section match
      case section: SectionImplementation
          if contentsByPath.contains(section.path) =>
        section.path

  override def section(path: Path)(
      startOffset: Int,
      size: Int
  ): Section[Element] = new SectionImplementation(path, startOffset, size)

  override def paths: Set[Path] = contentsByPath.keySet

  class SectionImplementation(
      val path: Path,
      override val startOffset: Int,
      override val size: Int
  ) extends Section[Element]:

    override def toString: String = pprintCustomised(this).plainText

    def render: Tree =
      Tree.Apply(
        "Section",
        Iterator(
          Tree.KeyValue("label", pprintCustomised.treeFrom(label)),
          Tree.KeyValue("path", pprintCustomised.treeFrom(path)),
          Tree.KeyValue("startOffset", pprintCustomised.treeFrom(startOffset)),
          Tree.KeyValue("size", pprintCustomised.treeFrom(size)),
          Tree.KeyValue("content", pprintCustomised.treeFrom(content))
        )
      )
    end render

    override def content: IndexedSeq[Element] =
      contentsByPath(path).slice(startOffset, onePastEndOffset)
  end SectionImplementation

  class OverlappingSections(
      path: Path,
      first: Section[Element],
      second: Section[Element]
  ) extends RuntimeException({
        val overlap = section(path)(
          startOffset = second.startOffset,
          size = first.onePastEndOffset - second.startOffset
        )

        s"Overlapping section detected on side: $label at path: $path, $first (content: ${first.content}) overlaps with start of section: $second (content: ${second.content}), overlap content: ${overlap.content}."
      }):

  end OverlappingSections
end MappedContentSources

object MappedContentSourcesOfTokens:
  private val linebreakExtraction =
    """(?m:.*$(\s+?)^)""".r // Matches a single, possibly empty line; the nested parentheses capture the terminating linebreak sequence. A final line without a terminating linebreak is not matched.

      /** @param line
        *   One-relative line number within the entire content at some path.
        * @param characterOffset
        *   Zero-relative character offset from the start of the line.
        */
  case class TextPosition(
      line: Int,
      characterOffset: Int
  )
end MappedContentSourcesOfTokens

case class MappedContentSourcesOfTokens[Path](
    override val contentsByPath: Map[Path, IndexedSeq[Token]],
    override val label: String
) extends MappedContentSources[Path, Token]:
  override def section(path: Path)(
      startOffset: Int,
      size: Int
  ): Section[Token] = new SectionImplementation(path, startOffset, size):
    override def render: Tree =
      val contentLimit = 10

      // The content is the text covered by a prefix of the section's tokens.
      val (contentLabel, revealedContent) =
        if content.size > contentLimit then
          "content summary" -> (content
            .take((1 + contentLimit) / 2)
            .map(_.text)
            .mkString ++ " ... " ++ content
            .drop(content.size - contentLimit / 2)
            .map(_.text)
            .mkString)
        else "content" -> content.map(_.text).mkString

      Tree.Apply(
        "Section",
        Iterator(
          Tree.KeyValue("label", pprintCustomised.treeFrom(label)),
          Tree.KeyValue("path", pprintCustomised.treeFrom(this.path)),
          Tree.KeyValue(
            "start",
            pprintCustomised.treeFrom(
              LineInformation(this.path)
                .textPositionFor(
                  this.startOffset
                )
            )
          ),
          Tree.KeyValue(
            "onePastEnd",
            pprintCustomised.treeFrom(
              LineInformation(this.path)
                .textPositionFor(
                  this.startOffset + this.size
                )
            )
          ),
          Tree.KeyValue(
            "startTokenIndex",
            pprintCustomised.treeFrom(this.startOffset)
          ),
          Tree.KeyValue("sizeInTokens", pprintCustomised.treeFrom(this.size)),
          Tree.KeyValue(
            contentLabel,
            pprintCustomised.treeFrom(revealedContent)
          )
        )
      )
    end render

  private class LineInformation(contents: IndexedSeq[Token]):
    private val cumulativeCharacterOffsetsOfTokensIncludingThePhantomOffTheEnd
        : IndexedSeq[Int] =
      (0 until contents.size)
        .scanLeft(0)((cumulativeCharacterOffset, tokenIndex) =>
          cumulativeCharacterOffset + contents(tokenIndex).text.size
        )

    private val cumulativeCharacterOffsetsOfLines: Array[Int] =
      val textCoveredBySection = contents.map(_.text).mkString

      val offsetsFollowingLinebreaks =
        linebreakExtraction.findAllMatchIn(textCoveredBySection).map(_.end(1))

      (Iterator(0) ++ offsetsFollowingLinebreaks).toArray
    end cumulativeCharacterOffsetsOfLines

    def textPositionFor(
        tokenIndex: Int
    ): TextPosition =
      val characterOffsetOfToken =
        cumulativeCharacterOffsetsOfTokensIncludingThePhantomOffTheEnd(
          tokenIndex
        )

      val locationOrInsertion = JavaArrays.binarySearch(
        cumulativeCharacterOffsetsOfLines,
        characterOffsetOfToken
      )

      val lineIndex =
        if 0 > locationOrInsertion then -(locationOrInsertion + 2)
        else locationOrInsertion

      TextPosition(
        line = 1 + lineIndex,
        characterOffset =
          characterOffsetOfToken - cumulativeCharacterOffsetsOfLines(lineIndex)
      )
    end textPositionFor
  end LineInformation

  private object LineInformation:
    private val lineInformationByPath: MutableMap[Path, LineInformation] =
      MutableMap.empty

    def apply(path: Path): LineInformation =
      lineInformationByPath.getOrElseUpdate(
        path,
        new LineInformation(contentsByPath(path))
      )
  end LineInformation

end MappedContentSourcesOfTokens




© 2015 - 2025 Weber Informatics LLC | Privacy Policy