laika.parse.ParserContext.scala Maven / Gradle / Ivy

Go to download
/*
 * Copyright 2013-2017 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package laika.parse

import scala.collection.mutable.ArrayBuffer

/** Represents the state and context of a parsing operation,
  * containing the input string as well as positional information/
  *
  * @author Jens Halm
  */
case class ParserContext (source: Source, offset: Int, nestLevel: Int) {

  /** The full input string, containing the string
    * portions before and after the current offset.
    */
  val input: String = source.value

  /**  Indicates whether this contexts offset is behind
    *  the last character of the input string
    */
  def atEnd: Boolean = offset >= input.length

  /** Indicates the number of characters remaining in the
    * input string after the current offset.
    */
  def remaining: Int = input.length - offset

  /** The character at the current offset.
    */
  def char: Char = charAt(0)

  /** The character at the specified offset, relative from the current offset.
    */
  def charAt (relativeOffset: Int): Char = {
    val i = offset + relativeOffset
    if (i < input.length) input.charAt(i) else throw new IndexOutOfBoundsException(i.toString)
  }

  /** Captures a string containing the specified number of characters from the current offset.
    * Throws an exception if the number of characters left is less than the specified number.
    */
  def capture (numChars: Int): String =
    if (numChars == 0) ""
    else if (numChars < 0 || numChars + offset > input.length) throw new IndexOutOfBoundsException(numChars.toString)
    else input.substring(offset, offset + numChars)

  /** Consumes the specified number of characters, returning a new `ParserContext`
    * with the new offset.
    */
  def consume (numChars: Int): ParserContext =
    if (numChars != 0) ParserContext(source, offset + numChars, nestLevel)
    else this

  /** The current position in the input string.
    */
  def position: Position = new Position(source, offset)

  /** Returns a new `ParserContext` with the input string being reversed,
    * but pointing to the same character as this context.
    *
    * This is a low-level optimization for parsers that look for strings like
    * email addresses where the first character is not significant, so that
    * parsing backwards from any `@` encountered in the input provided better
    * performance.
    *
    * @return
    */
  def reverse: ParserContext = ParserContext(source.reverse, remaining, nestLevel)

}

/** Companion for creating new `ParserContext` instances.
  *
  */
object ParserContext {

  /** Builds a new instance for the specified input string.
    */
  def apply (input: String): ParserContext = ParserContext(Source(input), 0, 0)

  /** Builds a new instance for the specified input string and nesting level.
    *
    * Keeping track of the nesting level allows to protect against malicious
    * input that would otherwise cause endless recursion triggering stack
    * overflows or ultra-slow performance.
    */
  def apply (input: String, nestLevel: Int): ParserContext = ParserContext(Source(input), 0, nestLevel)

  /** Builds a new instance for the specified input reader.
    */
  def apply (input: java.io.Reader): ParserContext = apply(input, 8 * 1024)

  /** Builds a new instance for the specified input reader, providing a hint
    * for the expected size of the input string.
    */
  def apply (input: java.io.Reader, sizeHint: Int): ParserContext = {

    val arr = new Array[Char](sizeHint)
    val buffer = new StringBuilder
    var numCharsRead: Int = 0

    while ({numCharsRead = input.read(arr, 0, arr.length); numCharsRead != -1}) {
      buffer.appendAll(arr, 0, numCharsRead)
    }

    apply(buffer.toString)
  }

}

/** Represents the input string for a parsing operation.
  */
case class Source (value: String) {

  /** An index that contains all line starts, including first line, and eof.
    */
  lazy val lineStarts: Array[Int] = {
    val lineStarts = new ArrayBuffer[Int]
    lineStarts += 0
    var pos = 0
    val len = value.length
    while (pos < len) {
      if (value(pos) == '\n') lineStarts += pos + 1
      pos += 1
    }
    lineStarts += len
    lineStarts.toArray
  }

  /** Builds a new `Source` instance with the input string reversed.
    */
  lazy val reverse = Source(value.reverse)

}

/**  Represents an offset into a source string. Its main purpose
  *  is error reporting, e.g. printing a visual representation of the line
  *  containing the error.
  *
  *  @param s the source for this position
  *  @param offset the offset into the source string
  *
  *  @author Jens Halm
  */
case class Position(s: Source, offset: Int) {

  val source = s.value

  /** The line number referred to by this position, starting at 1.
    */
  lazy val line: Int = {
    val result = java.util.Arrays.binarySearch(s.lineStarts, offset)
    if (result == s.lineStarts.length - 1) result // EOF position is not on a new line
    else if (result < 0) Math.abs(result) - 1 // see javadoc for binarySearch
    else result + 1 // line is 1-based
  }

  /** The column number referred to by this position, starting at 1.
    */
  lazy val column: Int = offset - s.lineStarts(line - 1) + 1

  /** The contents of the line at the current offset (not including a newline).
    */
  lazy val lineContent: String = {
    val startIndex = s.lineStarts(line - 1)
    val endIndex = s.lineStarts(line)

    val result = source.substring(startIndex, endIndex)
    if (result.endsWith("\n")) result.dropRight(1) else result
  }

  /** The contents of the line at the current offset, decorated with
    * a caret indicating the column. Example:
    * {{{
    *   The content of the current line with a caret under the c.
    *       ^
    * }}}
    */
  def lineContentWithCaret = lineContent + "\n" + " " * (column-1) + "^"

  /** A string representation of this Position of the form `line.column`.
    */
  override lazy val toString = s"$line.$column"

}