All Downloads are FREE. Search and download functionalities are using the official Maven repository.

dotty.tools.scaladoc.tasty.comments.wiki.Parser.scala Maven / Gradle / Ivy

There is a newer version: 3.6.0-RC1-bin-20240903-21a3d39-NIGHTLY
Show newest version
package dotty.tools.scaladoc
package tasty.comments.wiki

import scala.collection.mutable
import scala.annotation.tailrec

import dotty.tools.scaladoc.tasty.comments.Regexes._

/** Original wikiparser from NSC
  * @author Ingo Maier
  * @author Manohar Jonnalagedda
  * @author Gilles Dubochet
  */
final class Parser(
  val buffer: String,
  linkResolver: (String, Option[Inline]) => Inline
) extends CharReader(buffer) { wiki =>
  var summaryParsed = false

  def document(): Body = {
    val blocks = new mutable.ListBuffer[Block]
    while (char != endOfText)
      blocks += block()
    Body(blocks.toList)
  }

  /* BLOCKS */

  /** {{{ block ::= code | title | hrule | listBlock | para }}} */
  def block(): Block = {
    if (checkSkipInitWhitespace("{{{"))
      code()
    else if (checkSkipInitWhitespace('='))
      title()
    else if (checkSkipInitWhitespace("----"))
      hrule()
    else if (checkList)
      listBlock()
    else if (checkTableRow)
      table()
    else {
      para()
    }
  }

  /** listStyle ::= '-' spc | '1.' spc | 'I.' spc | 'i.' spc | 'A.' spc | 'a.' spc
    * Characters used to build lists and their constructors */
  protected val listStyles = Map[String, (Seq[Block] => Block)](
    "- "  -> ( UnorderedList(_) ),
    "1. " -> ( OrderedList(_,"decimal") ),
    "I. " -> ( OrderedList(_,"upperRoman") ),
    "i. " -> ( OrderedList(_,"lowerRoman") ),
    "A. " -> ( OrderedList(_,"upperAlpha") ),
    "a. " -> ( OrderedList(_,"lowerAlpha") )
  )

  /** Checks if the current line is formed with more than one space and one the listStyles */
  def checkList =
    (countWhitespace > 0) && (listStyles.keys exists { checkSkipInitWhitespace(_) })

  /** {{{
    * nListBlock ::= nLine { mListBlock }
    *      nLine ::= nSpc listStyle para '\n'
    * }}}
    * Where n and m stand for the number of spaces. When `m > n`, a new list is nested. */
  def listBlock(): Block = {

    /** Consumes one list item block and returns it, or None if the block is
      * not a list or a different list. */
    def listLine(indent: Int, style: String): Option[Block] =
      if (countWhitespace > indent && checkList)
        Some(listBlock())
      else if (countWhitespace != indent || !checkSkipInitWhitespace(style))
        None
      else {
        jumpWhitespace()
        jump(style)
        val p = Paragraph(getInline(isInlineEnd = false))
        blockEnded("end of list line ")
        Some(p)
      }

    /** Consumes all list item blocks (possibly with nested lists) of the
      * same list and returns the list block. */
    def listLevel(indent: Int, style: String): Block = {
      val lines = mutable.ListBuffer.empty[Block]
      var line: Option[Block] = listLine(indent, style)
      while (line.isDefined) {
        lines += line.get
        line = listLine(indent, style)
      }
      val constructor = listStyles(style)
      constructor(lines.toList)
    }

    val indent = countWhitespace
    val style = (listStyles.keys find { checkSkipInitWhitespace(_) }).getOrElse(listStyles.keys.head)
    listLevel(indent, style)
  }

  def code(): Block = {
    jumpWhitespace()
    jump("{{{")
    val str = readUntil("}}}")
    if (char == endOfText)
      reportError("unclosed code block")
    else
      jump("}}}")
    blockEnded("code block")
    Code(normalizeIndentation(str))
  }

  /** {{{ title ::= ('=' inline '=' | "==" inline "==" | ...) '\n' }}} */
  def title(): Block = {
    jumpWhitespace()
    val inLevel = repeatJump('=')
    val text = getInline(check("=" * inLevel))
    val outLevel = repeatJump('=', inLevel)
    if (inLevel != outLevel)
      reportError("unbalanced or unclosed heading")
    blockEnded("heading")
    Title(text, inLevel)
  }

  /** {{{ hrule ::= "----" { '-' } '\n' }}} */
  def hrule(): Block = {
    jumpWhitespace()
    repeatJump('-')
    blockEnded("horizontal rule")
    HorizontalRule
  }


    /** Starts and end with a cell separator matching the minimal row || and all other possible rows */
    private val TableRow = """^\|.*\|$""".r
    private val TableCellStart = "|"

    /* Checks for a well-formed table row */
    private def checkTableRow = {
      check(TableCellStart) && {
        val newlineIdx = buffer.indexOf('\n', offset)
        newlineIdx != -1 &&
          TableRow.findFirstIn(buffer.substring(offset, newlineIdx)).isDefined
      }
    }

  /** {{{
    * table         ::= headerRow '\n' delimiterRow '\n' dataRows '\n'
    * content       ::= inline-content
    * row           ::= '|' { content '|' }+
    * headerRow     ::= row
    * dataRows      ::= row*
    * align         ::= ':' '-'+ | '-'+ | '-'+ ':' | ':' '-'+ ':'
    * delimiterRow :: = '|' { align '|' }+
    * }}}
    */
  def table(): Block = {
    /* Helpers */

    def peek(tag: String): Unit = {
      val peek: String = buffer.substring(offset)
      val limit = 60
      val limitedPeek = peek.substring(0, limit min peek.length)
      println(s"peek: $tag: '$limitedPeek'")
    }

    /* Accumulated state */

    var header: Option[Row] = None

    val rows = mutable.ListBuffer.empty[Row]

    val cells = mutable.ListBuffer.empty[Cell]

    def finalizeCells(): Unit = {
      if (cells.nonEmpty) {
        rows += Row(cells.toList)
      }
      cells.clear()
    }

    def finalizeHeaderCells(): Unit = {
      if (cells.nonEmpty) {
        if (header.isDefined) {
          reportError("more than one table header")
        } else {
          header = Some(Row(cells.toList))
        }
      }
      cells.clear()
    }

    val escapeChar = "\\"

    /* Poor man's negative lookbehind */
    def checkInlineEnd =
      (check(TableCellStart) && !check(escapeChar, -1)) || check("\n")

    def decodeEscapedCellMark(text: String) = text.replace(escapeChar + TableCellStart, TableCellStart)

    def isEndOfText = char == endOfText

    //def isNewline = char == endOfLine

    //def skipNewline() = jump(endOfLine)

    def isStartMarkNewline = check(TableCellStart + endOfLine)

    def skipStartMarkNewline() = jump(TableCellStart + endOfLine)

    def isStartMark = check(TableCellStart)

    def skipStartMark() = jump(TableCellStart)

    def contentNonEmpty(content: Inline) = content != Text("")

      /* Parse cells of a table.
      * @param cellStartMark The char indicating the start or end of a cell
      * @param finalizeRow   Function to invoke when the row has been fully parsed
      */
    def parseCells(cellStartMark: String, finalizeRow: () => Unit): Unit = {
      def jumpCellStartMark() = {
        if (!jump(cellStartMark)) {
          peek(s"Expected $cellStartMark")
          sys.error(s"Precondition violated: Expected $cellStartMark.")
        }
      }

      val startPos = offset

      jumpCellStartMark()

      val content = Paragraph(getInline(isInlineEnd = checkInlineEnd, textTransform = decodeEscapedCellMark))

      parseCells0(content :: Nil, finalizeRow, startPos, offset)
    }

    // Continue parsing a table row.
    //
    // After reading inline content the following conditions will be encountered,
    //
    //    Case : Next Chars
    //    ..................
    //      1  : end-of-text
    //      2  : '|' '\n'
    //      3  : '|'
    //      4  : '\n'
    //
    // Case 1.
    // State : End of text
    // Action: Store the current contents, close the row, report warning, stop parsing.
    //
    // Case 2.
    // State : The cell separator followed by a newline
    // Action: Store the current contents, skip the cell separator and newline, close the row, stop parsing.
    //
    // Case 3.
    // State : The cell separator not followed by a newline
    // Action: Store the current contents, skip the cell separator, continue parsing the row.
    //
    @tailrec def parseCells0(
                              contents: List[Block],
                              finalizeRow: () => Unit,
                              progressPreParse: Int,
                              progressPostParse: Int
                            ): Unit = {

      def storeContents() = cells += Cell(contents.reverse)

      val startPos = offset

      // The ordering of the checks ensures the state checks are correct.
      if (progressPreParse == progressPostParse) {
        peek("no-progress-table-row-parsing")
        sys.error("No progress while parsing table row")
      } else if (isEndOfText) {
        // peek("1: end-of-text")
        // Case 1
        storeContents()
        finalizeRow()
        reportError("unclosed table row")
      } else if (isStartMarkNewline) {
        // peek("2: start-mark-new-line/before")
        // Case 2
        storeContents()
        finalizeRow()
        skipStartMarkNewline()
        // peek("2: start-mark-new-line/after")
      } else if (isStartMark) {
        // peek("3: start-mark")
        // Case 3
        storeContents()
        skipStartMark()
        val content = getInline(isInlineEnd = checkInlineEnd, textTransform = decodeEscapedCellMark)
        // TrailingCellsEmpty produces empty content
        val accContents = if (contentNonEmpty(content)) Paragraph(content) :: Nil else Nil
        parseCells0(accContents, finalizeRow, startPos, offset)
      } else {
        // Case π√ⅈ
        // When the impossible happens leave some clues.
        reportError("unexpected table row markdown")
        peek("parseCell0")
        storeContents()
        finalizeRow()
      }
    }

    /* Parsing */

    jumpWhitespace()

    parseCells(TableCellStart, () => finalizeHeaderCells())

    while (checkTableRow) {
      val initialOffset = offset

      parseCells(TableCellStart, () => finalizeCells())

      /* Progress should always be made */
      if (offset == initialOffset) {
        peek("no-progress-table-parsing")
        sys.error("No progress while parsing table")
      }
    }

    /* Finalize */

    /* Structural consistency checks and coercion */

    // https://github.github.com/gfm/#tables-extension-
    // TODO: The header row must match the delimiter row in the number of cells. If not, a table will not be recognized:
    // TODO: Break at following block level element: The table is broken at the first empty line, or beginning of another block-level structure:
    // TODO: Do not return a table when: The header row must match the delimiter row in the number of cells. If not, a table will not be recognized

    if (cells.nonEmpty) {
      reportError(s"Parsed and unused content: $cells")
    }
    assert(header.isDefined, "table header was not parsed")
    val enforcedCellCount = header.get.cells.size

    def applyColumnCountConstraint(row: Row, defaultCell: Cell, rowType: String): Row = {
      if (row.cells.size == enforcedCellCount)
        row
      else if (row.cells.size > enforcedCellCount) {
        val excess = row.cells.size - enforcedCellCount
        reportError(s"Dropping $excess excess table $rowType cells from row.")
        Row(row.cells.take(enforcedCellCount))
      } else {
        val missing = enforcedCellCount - row.cells.size
        Row(row.cells ++ List.fill(missing)(defaultCell))
      }
    }

    // TODO: Abandon table parsing when the delimiter is missing instead of fixing and continuing.
    val delimiterRow :: dataRows = {
      if (rows.nonEmpty) rows.toList
      else {
        reportError("Fixing missing delimiter row")
        Row(Cell(Paragraph(Text("-")) :: Nil) :: Nil) :: Nil
      }
    }: @unchecked

    if (delimiterRow.cells.isEmpty) sys.error("TODO: Handle table with empty delimiter row")

    val constrainedDelimiterRow = applyColumnCountConstraint(delimiterRow, delimiterRow.cells(0), "delimiter")

    val constrainedDataRows = dataRows.map(applyColumnCountConstraint(_, Cell(Nil), "data"))

    /* Convert the row following the header row to column options */

    val leftAlignmentPattern = "^:?-++$".r
    val centerAlignmentPattern = "^:-++:$".r
    val rightAlignmentPattern = "^-++:$".r

    import ColumnOption._
    /* Encourage user to fix by defaulting to least ignorable fix. */
    val defaultColumnOption = ColumnOptionRight
    val columnOptions = constrainedDelimiterRow.cells.map {
      alignmentSpecifier =>
        alignmentSpecifier.blocks match {
          // TODO: Parse the second row without parsing inline markdown
          // TODO: Save pos when delimiter row is parsed and use here in reported errors
          case Paragraph(Text(as)) :: Nil =>
            as.trim match {
              case leftAlignmentPattern(_*) => ColumnOptionLeft
              case centerAlignmentPattern(_*) => ColumnOptionCenter
              case rightAlignmentPattern(_*) => ColumnOptionRight
              case x =>
                reportError(s"Fixing invalid column alignment: $x")
                defaultColumnOption
            }
          case x =>
            reportError(s"Fixing invalid column alignment: $x")
            defaultColumnOption
        }
    }

    if (check("\n", -1)) {
      prevChar()
    } else {
      peek("expected-newline-missing")
      sys.error("table parsing left buffer in unexpected state")
    }

    blockEnded("table")
    Table(header.get, columnOptions, constrainedDataRows)
  }

  /** {{{ para ::= inline '\n' }}} */
  def para(): Block = {
    val p =
      if (summaryParsed)
        Paragraph(getInline(isInlineEnd = false))
      else {
        val s = summary()
        val r =
          if (checkParaEnded()) List(s) else List(s, getInline(isInlineEnd = false))
        summaryParsed = true
        Paragraph(Chain(r))
      }
    while (char == endOfLine && char != endOfText)
      nextChar()
    p
  }

  /* INLINES */

  val OPEN_TAG = "^<([A-Za-z]+)( [^>]*)?(/?)>$".r
  val CLOSE_TAG = "^$".r
  private def readHTMLFrom(begin: HtmlTag): String = {
    val list = mutable.ListBuffer.empty[String]
    val stack = mutable.ListBuffer.empty[String]

    begin.close match {
      case Some(HtmlTag(CLOSE_TAG(s))) =>
        stack += s
      case _ =>
        return ""
    }

    while ({
      val str = readUntil { char == safeTagMarker || char == endOfText }
      nextChar()

      list += str

      str match {
        case OPEN_TAG(s, _, standalone) => {
          if (standalone != "/") {
            stack += s
          }
        }
        case CLOSE_TAG(s) => {
          if (s == stack.last) {
            stack.remove(stack.length-1)
          }
        }
        case _ => ;
      }

      stack.length > 0 && char != endOfText
    }) do {}

    list.mkString
  }

  def getInline(isInlineEnd: => Boolean, textTransform: String => String = identity): Inline = {

      def inline0(): Inline = {
        if (char == safeTagMarker) {
          val tag = htmlTag()
          HtmlTag(tag.data + readHTMLFrom(tag))
        }
        else if (check("'''")) bold()
        else if (check("''")) italic()
        else if (check("`"))  monospace()
        else if (check("__")) underline()
        else if (check("^"))  superscript()
        else if (check(",,")) subscript()
        else if (check("[[")) link()
        else {
          val str = readUntil {
            char == safeTagMarker ||
              check("''") ||
              char == '`' ||
              check("__") ||
              char == '^' ||
              check(",,") ||
              check("[[") ||
              isInlineEnd ||
              checkParaEnded() ||
              char == endOfLine
          }
          Text(textTransform(str))
        }
      }

      val inlines: List[Inline] = {
        val iss = mutable.ListBuffer.empty[Inline]
        iss += inline0()
        while (!isInlineEnd && !checkParaEnded()) {
          val skipEndOfLine = if (char == endOfLine) {
            nextChar()
            true
          } else {
            false
          }

          val current = inline0()
          (iss.last, current) match {
            case (Text(t1), Text(t2)) if skipEndOfLine =>
              iss.update(iss.length - 1, Text(t1 + endOfLine + t2))
            case (i1, i2) if skipEndOfLine =>
              iss ++= List(Text(endOfLine.toString), i2)
            case _ => iss += current
          }
        }
        iss.toList
      }

      inlines match {
        case Nil => Text("")
        case i :: Nil => i
        case is => Chain(is)
      }

    }

  def htmlTag(): HtmlTag = {
    jump(safeTagMarker)
    val read = readUntil(safeTagMarker)
    if (char != endOfText) jump(safeTagMarker)
    HtmlTag(read)
  }

  def bold(): Inline = {
    jump("'''")
    val i = getInline(check("'''"))
    jump("'''")
    Bold(i)
  }

  def italic(): Inline = {
    jump("''")
    val i = getInline(check("''"))
    jump("''")
    Italic(i)
  }

  def monospace(): Inline = {
    jump("`")
    val i = getInline(check("`"))
    jump("`")
    Monospace(i)
  }

  def underline(): Inline = {
    jump("__")
    val i = getInline(check("__"))
    jump("__")
    Underline(i)
  }

  def superscript(): Inline = {
    jump("^")
    val i = getInline(check("^"))
    if (jump("^")) {
      Superscript(i)
    } else {
      Chain(Seq(Text("^"), i))
    }
  }

  def subscript(): Inline = {
    jump(",,")
    val i = getInline(check(",,"))
    jump(",,")
    Subscript(i)
  }

  def summary(): Inline = {
    val i = getInline(checkSentenceEnded())
    Summary(
      if (jump("."))
        Chain(List(i, Text(".")))
      else
        i
    )
  }

  def link(): Inline = {
    jump("[[")
    val parens = 2 + repeatJump('[')
    val stop  = "]" * parens
    val target = readUntil { check(stop) || isWhitespaceOrNewLine(char) }
    val title =
      if (!check(stop)) Some({
        jumpWhitespaceOrNewLine()
        getInline(check(stop))
      })
      else None
    jump(stop)

    linkResolver(target, title)
  }

  /* UTILITY */

  /** {{{ eol ::= { whitespace } '\n' }}} */
  def blockEnded(blockType: String): Unit = {
    if (char != endOfLine && char != endOfText) {
      reportError("no additional content on same line after " + blockType)
      jumpUntil(endOfLine)
    }
    while (char == endOfLine)
      nextChar()
  }

  /**
   *  Eliminates the (common) leading spaces in all lines, based on the first line
   *  For indented pieces of code, it reduces the indent to the least whitespace prefix:
   *    {{{
   *       indented example
   *       another indented line
   *       if (condition)
   *         then do something;
   *       ^ this is the least whitespace prefix
   *    }}}
   */
  def normalizeIndentation(_code: String): String = {

    val code = _code.replaceAll("\\s+$", "").dropWhile(_ == '\n') // right-trim + remove all leading '\n'
    val lines = code.split("\n")

    // maxSkip - size of the longest common whitespace prefix of non-empty lines
    val nonEmptyLines = lines.filter(_.trim.nonEmpty)
    val maxSkip = if (nonEmptyLines.isEmpty) 0 else nonEmptyLines.map(line => line.iterator.takeWhile(_ == ' ').size).min

    // remove common whitespace prefix
    lines.map(line => if (line.trim.nonEmpty) line.substring(maxSkip) else line).mkString("\n")
  }

  def checkParaEnded(): Boolean = {
    (char == endOfText) ||
    ((char == endOfLine) && {
      val poff = offset
      nextChar() // read EOL
      val ok = {
        checkSkipInitWhitespace(endOfLine) ||
        checkSkipInitWhitespace('=') ||
        checkSkipInitWhitespace("{{{") ||
        checkList ||
        check(TableCellStart) ||
        checkSkipInitWhitespace('\u003D')
      }
      offset = poff
      ok
    })
  }

  def checkSentenceEnded(): Boolean = {
    (char == '.') && {
      val poff = offset
      nextChar() // read '.'
      val ok = char == endOfText || char == endOfLine || isWhitespace(char)
      offset = poff
      ok
    }
  }

  def reportError(message: String) = println(s"$message")
}

sealed class CharReader(buffer: String) { reader =>

  var offset: Int = 0
  def char: Char =
    if (offset >= buffer.length) endOfText else buffer.charAt(offset)

  final def nextChar() =
    offset += 1

  final def prevChar(): Unit =  {
    offset -= 1
  }

  final def check(chars: String): Boolean = {
    val poff = offset
    val ok = jump(chars)
    offset = poff
    ok
  }

  final def check(chars: String, checkOffset: Int): Boolean = {
    val poff = offset
    offset += checkOffset
    val ok = jump(chars)
    offset = poff
    ok
  }

  def checkSkipInitWhitespace(c: Char): Boolean = {
    val poff = offset
    jumpWhitespace()
    val ok = jump(c)
    offset = poff
    ok
  }

  def checkSkipInitWhitespace(chars: String): Boolean = {
    val poff = offset
    jumpWhitespace()
    val (ok0, chars0) =
      if (chars.charAt(0) == ' ')
        (offset > poff, chars.substring(1))
      else
        (true, chars)
    val ok = ok0 && jump(chars0)
    offset = poff
    ok
  }

  def countWhitespace: Int = {
    var count = 0
    val poff = offset
    while (isWhitespace(char) && char != endOfText) {
      nextChar()
      count += 1
    }
    offset = poff
    count
  }

  /* Jumpers */

  /** Jumps a character and consumes it
    * @return true only if the correct character has been jumped */
  final def jump(ch: Char): Boolean = {
    if (char == ch) {
      nextChar()
      true
    }
    else false
  }

  /** Jumps all the characters in chars, consuming them in the process.
    * @return true only if the correct characters have been jumped
    */
  final def jump(chars: String): Boolean = {
    var index = 0
    while (index < chars.length && char == chars.charAt(index) && char != endOfText) {
      nextChar()
      index += 1
    }
    index == chars.length
  }

  final def repeatJump(c: Char, max: Int = Int.MaxValue): Int = {
    var count = 0
    while (jump(c) && count < max)
      count += 1
    count
  }

  final def jumpUntil(ch: Char): Int = {
    var count = 0
    while (char != ch && char != endOfText) {
      nextChar()
      count += 1
    }
    count
  }

  final def jumpUntil(pred: => Boolean): Int = {
    var count = 0
    while (!pred && char != endOfText) {
      nextChar()
      count += 1
    }
    count
  }

  def jumpWhitespace() = jumpUntil(!isWhitespace(char))

  def jumpWhitespaceOrNewLine() = jumpUntil(!isWhitespaceOrNewLine(char))

  /* Readers */
  final def readUntil(c: Char): String = {
    withRead {
      while (char != c && char != endOfText) {
        nextChar()
      }
    }
  }

  final def readUntil(chars: String): String = {
    assert(chars.length > 0)
    withRead {
      val c = chars.charAt(0)
      while (!check(chars) && char != endOfText) {
        nextChar()
        while (char != c && char != endOfText)
          nextChar()
      }
    }
  }

  final def readUntil(pred: => Boolean): String = {
    withRead {
      while (char != endOfText && !pred) {
        nextChar()
      }
    }
  }

  private def withRead(read: => Unit): String = {
    val start = offset
    read
    buffer.substring(start, offset)
  }

  /* Chars classes */
  def isWhitespace(c: Char) = c == ' ' || c == '\t'

  def isWhitespaceOrNewLine(c: Char) = isWhitespace(c) || c == '\n'
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy