All Downloads are FREE. Search and download functionalities are using the official Maven repository.

laika.rst.TableParsers.scala Maven / Gradle / Ivy

/*
 * Copyright 2013-2016 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package laika.rst

import laika.ast._
import laika.bundle.{BlockParser, BlockParserBuilder}
import laika.collection.TransitionalCollectionOps.Zip3Iterator
import laika.collection.Stack
import laika.parse.{Parser, Success}
import laika.parse.text.TextParsers._

import scala.collection.mutable.ListBuffer

/** Provides parsers for the two table types supported by reStructuredText.
 * 
 * @author Jens Halm
 */
object TableParsers {


  private abstract class TableElement
  
  private abstract class TableDecoration extends TableElement
  private case object Intersection extends TableDecoration {
    override def toString = "+"
  }
  private case object RowSeparator extends TableDecoration
  private case object TableBoundary extends TableDecoration
  private case class CellSeparator (decoration: String) extends TableDecoration {
    override def toString = decoration
  }
  private case class CellElement (text: String) extends TableElement {
    override def toString = text
  }
      
  class CellBuilder (recParser: String => Seq[Block]) {
    
    private val seps = new ListBuffer[TableElement]
    private val lines = new ListBuffer[StringBuilder]
    private var last: StringBuilder = new StringBuilder
    
    var rowSpan = 1
    var colSpan = 1
    
    var removed: Boolean = false
    
    def nextLine (sep: TableElement, line: String, nextRow: Boolean): Unit = { 
      seps += sep
      last = new StringBuilder(line)
      lines += last
      if (nextRow) rowSpan += 1
    }
    def currentLine (sep: TableElement, line: String): Unit = {
      last ++= sep.toString
      last ++= line
    }
    def merge (right: CellBuilder): Unit = {
      Zip3Iterator(lines, right.seps, right.lines).foreach {
        case (left, sep, right) => left ++= sep.toString ++= right
      }
      colSpan += 1
    }
    
    def cellContent: String = lines map (_.toString) mkString "\n"
    
    def trimmedCellContent: String = {
      abstract class CellLine (val indent: Int) { def padTo (indent: Int): String }
      object BlankLine extends CellLine(Int.MaxValue) { def padTo (indent: Int) = "" }
      class TextLine (i: Int, text: String) extends CellLine(i) { def padTo (minIndent: Int) = " " * (indent - minIndent) + text }
      
      val cellLine = not(eof) ~> ((blankLine ^^^ BlankLine) |
        (ws ~ restOfLine) ^^ { case indent ~ text => new TextLine(indent.length, text.trim) }) 
      
      consumeAll(cellLine*).parse(cellContent) match {
        case Success(lines, _) => 
          val minIndent = lines map (_.indent) min;
          lines map (_.padTo(minIndent)) mkString ("\n")
        case _ => "" // TODO - error handling for edge cases
      }
    }
    
    def parsedCellContent: Seq[Block] = recParser(trimmedCellContent)

    def toCell (ct: CellType): Cell = Cell(ct, parsedCellContent, colSpan, rowSpan)
  }
  
  class CellBuilderRef (val cell: CellBuilder, val mergedLeft: Boolean = false)
  
  class RowBuilder {
    private val cells = new ListBuffer[CellBuilder]
    
    def addCell (cell: CellBuilder): Unit = cells += cell
     
    def toRow (ct: CellType): Row = Row(cells filterNot (_.removed) map (_.toCell(ct)) toList)
  }
  
  class ColumnBuilder (left: Option[ColumnBuilder], recParser: String => Seq[Block]) {
    
    private var rowSpan = 1 // only used for sanity checks
    
    private val cells = new Stack[CellBuilderRef]
    
    def currentCell: CellBuilder = cells.top.cell
    
    def previousCell: CellBuilder = cells.elements(1).cell
    
    def nextCell: CellBuilder = {
      if (cells.nonEmpty && cells.top.mergedLeft && rowspanDif != 0)
          throw new MalformedTableException("Illegal merging of rows with different cellspans")
      val cell = new CellBuilder(recParser)
      cells push new CellBuilderRef(cell)
      cell
    }
    
    private def removeCell: CellBuilder = {
      val cell = cells.pop.cell
      cell.removed = true
      cell
    }
    
    def mergeLeft (previous: Boolean = false): Unit = {
      if (rowspanDif != 0)
          throw new MalformedTableException("Illegal merging of cells with different rowspans")
      val leftCell = if (previous) left.get.previousCell else left.get.currentCell
      leftCell.merge(removeCell)
      cells push new CellBuilderRef(leftCell, true)
    }
    
    def rowspanDif: Int =
      left.get.rowSpan - rowSpan
    
    def addLine (sep: TableElement, line: String, nextRow: Boolean): Unit = {
      val ref = cells.top
      if (ref.mergedLeft) {
        if (nextRow && rowspanDif != 1)
          throw new MalformedTableException("Illegal merging of rows with different cellspans")
        ref.cell.currentLine(sep, line)
      }
      else {
        ref.cell.nextLine(sep, line, nextRow)
        sep match {
          case CellElement(_) => mergeLeft()
          case _ => ()
        }
      }
      if (nextRow) rowSpan += 1
    }
  }
  
  class TableBuilder (columnWidths: List[Int], recParser: String => Seq[Block]) {
    private object ColumnFactory {
      var lastColumn: Option[ColumnBuilder] = None
      val columnWidthIt = columnWidths.iterator
      def next = { lastColumn = Some(new ColumnBuilder(lastColumn, recParser)); lastColumn.get }
    }
    val columns: List[ColumnBuilder] = List.fill(columnWidths.length)(ColumnFactory.next)
    private val rows = new ListBuffer[RowBuilder]
    
    private def init () = {
      val row = nextRow
      columns foreach (col => row.addCell(col.nextCell))
    }
    init()
    
    def nextRow: RowBuilder = {
      val row = new RowBuilder
      rows += row
      row
    }
    
    def toRowList (ct: CellType): List[Row] = rows map (_.toRow(ct)) toList
  }
  
      
  private def flattenElements (result: Any): List[TableElement] = result match {
    case x:TableElement => List(x)
    case x ~ y => flattenElements(x) ::: flattenElements(y)
  }
   
  /** Parses a grid table.
   * 
   *  See [[http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#grid-tables]].
   */
  lazy val gridTable: BlockParserBuilder = BlockParser.forStartChar('+').recursive { recParsers =>
    
    val intersect = (anyOf('+') take 1) ^^^ Intersection
    
    val rowSep = anyOf('-').min(1).count
    val topBorder = ((rowSep <~ intersect)+) <~ wsEol

    val colSep = ((anyOf('|') take 1) ^^^ CellSeparator("|")) | intersect
    val colSepOrText = colSep | ((any take 1) ^^ CellElement)

    recParsers.withRecursiveBlockParser(topBorder) >> { case (recParser, cols) =>
      
      val separators = colSep :: List.fill(cols.length - 1)(colSepOrText)
      val colsWithSep = Zip3Iterator(separators, cols, separators.reverse)
      
      def rowSep (width: Int): Parser[Any] = 
        intersect ~ ((anyOf('-') take width) ^^^ RowSeparator) <~ lookAhead(intersect)
        
      def boundaryPart (width: Int): Parser[Any] = 
        intersect ~ ((anyOf('=') take width) ^^^ TableBoundary) <~ lookAhead(intersect)
        
      def cell (sepL: Parser[Any], width: Int, sepR: Parser[Any]): Parser[Any] = 
        sepL ~ ((any take width) ^^ CellElement) <~ lookAhead(sepR)
      
      val row = (colsWithSep map { case (separatorL, colWidth, separatorR) => 
        rowSep(colWidth) | cell(separatorL, colWidth, separatorR)
      } reduceRight (_ ~ _)) ^^ flattenElements
      
      val tableBoundary: Parser[TableDecoration] = (cols map { col => boundaryPart(col) } reduceRight (_ ~ _)) ^^^ TableBoundary
      
      def isSeparatorRow (row: List[TableElement]): Boolean = {
        row.forall {
          case RowSeparator => true
          case Intersection => true
          case _ => false
        }
      }
      
      def buildRowList (rows: List[List[TableElement]], ct: CellType): List[Row] = {
        
        val tableBuilder = new TableBuilder(cols map (_ + 1), recParser) // column width includes separator
            
        rows foreach { row =>
          val hasSeparator = row exists { case RowSeparator => true; case _ => false }
          val newRowBuilder = if (hasSeparator) Some(tableBuilder.nextRow) else None
          
          row.sliding(2,2).zip(tableBuilder.columns.iterator).foreach { 
            case (_ :: RowSeparator :: Nil, column) => newRowBuilder.get.addCell(column.nextCell)
            case (sep :: CellElement(text) :: Nil, column) => column.addLine(sep, text, hasSeparator)
            case _ => () // cannot happen, just to avoid the warning
          }
        }
        tableBuilder.toRowList(ct)
      }
      
      def validateLastRow (rows: List[List[TableElement]]): Unit = {
        if (rows.isEmpty || !isSeparatorRow(rows.last)) throw new MalformedTableException("Table not terminated correctly")
      }
      
      val boundaryRow = tableBoundary <~ (any take 1) ~ wsEol
      val tablePart = ((not(tableBoundary) ~> row <~ (any take 1) ~ wsEol)*)
      (tablePart ~ opt(boundaryRow ~> tablePart)) ^^? { result =>
        
      /* Need to fail for certain illegal constructs in the interim model, 
       * so that the next parser can pick up the (broken) table input */
        try {
          val table = result match {
            case head ~ Some(body) => validateLastRow(body); Table(TableHead(buildRowList(head, HeadCell)), TableBody(buildRowList(body.init, BodyCell)))
            case body ~ None       => validateLastRow(body); Table(TableHead(Nil), TableBody(buildRowList(body.init, BodyCell)))
          }
          Right(table)
        }
        catch {
          case ex: MalformedTableException => Left(ex.getMessage)
        }
      }      
    }
    
  }
  
  /** Parses a simple table.
   * 
   *  See [[http://docutils.sourceforge.net/docs/ref/rst/restructuredtext.html#simple-tables]].
   */
  lazy val simpleTable: BlockParserBuilder = BlockParser.withoutStartChar.recursive { recParsers =>
    
    val intersect = anyOf(' ').min(1).count
    val tableBorder = anyOf('=').min(1).count
    val columnSpec = tableBorder ~ opt(intersect) ^^ {
      case col ~ Some(sep) => (col, sep)
      case col ~ None      => (col, 0)
    }
    val topBorder = columnSpec.rep.min(2) <~ wsEol

    recParsers.withRecursiveBlockParser(topBorder) >> { case (recParser, cols) =>
      
      val (rowColumns, boundaryColumns): (Seq[Parser[Any]],Seq[Parser[Any]]) = (cols map { case (col, sep) =>
        val cellText = if (sep == 0) anyBut('\n', '\r') ^^ CellElement
                       else (any take col) ^^ CellElement 
        val separator = (anyOf(' ') take sep) ^^ CellSeparator
        val textInSep = (any take sep) ^^ CellSeparator
        val textColumn = cellText ~ (separator | textInSep)
        
        val rowSep = (anyOf('-') take col) ^^^ RowSeparator
        val merged = (anyOf('-') take sep) ^^^ RowSeparator
        val split =  (anyOf(' ') take sep) ^^^ Intersection
        val underline = rowSep ~ (split | merged)
        
        val bCell = (anyOf('=') take col) ^^^ TableBoundary
        val bMerged = (anyOf('=') take sep) ^^^ TableBoundary
        val bSplit =  (anyOf(' ') take sep) ^^^ Intersection
        val boundary = bCell ~ (bSplit | bMerged)
        
        (underline | not(boundary) ~> textColumn, boundary)
      }).unzip
      
      val row: Parser[Any] = (rowColumns reduceRight (_ ~ _)) <~ wsEol
      val boundary: Parser[Any] = (boundaryColumns reduceRight (_ ~ _)) <~ wsEol
      val blank: Parser[Any] = not(eof) ~> blankLine
      
      val tablePart: Parser[List[Any]] = (((blank | row)*) ~ boundary) ^^ { case rows ~ boundary => rows :+ boundary }
      
      
      def buildRowList (rows: List[Any], ct: CellType): List[Row] = {
        
        val tableBuilder = new TableBuilder(cols map { col => col._1 + col._2 }, recParser)
        
        def addBlankLines (acc: ListBuffer[List[TableElement]]) = 
            acc += (cols flatMap { case (cell, sep) => List(CellElement(" " * cell), CellSeparator(" " * sep)) })
        
        def addRowSeparators (acc: ListBuffer[List[TableElement]]) = 
          acc += (cols flatMap { _ => List(RowSeparator, Intersection) })
      
        /* in contrast to the grid table, some rows need to be processed in context,
         * as their exact behaviour depends on preceding or following lines. */
        val rowBuffer = rows.foldLeft((ListBuffer[List[TableElement]](), 0, false)) { case ((acc, blanks, rowOpen), row) =>
          row match {
            case result: ~[_,_] => 
              val row = flattenElements(result)
              row.head match {
                case RowSeparator => (acc += row, 0, false)
                case TableBoundary => (acc += row, 0, false)
                case CellElement(text) => 
                  if (text.trim.isEmpty) for (_ <- 1 to blanks) addBlankLines(acc)
                  else if (rowOpen) addRowSeparators(acc)
                  (acc += row, 0, true)
                case _ => (acc, blanks, rowOpen) // cannot happen, just to avoid the warning 
              }
            case _ => (acc, blanks + 1, rowOpen) // blank line 
          }
        }._1
        
        rowBuffer foreach { row =>
          
          def foreachColumn (row: List[TableElement])(f: ((List[TableElement], ColumnBuilder)) => Any) = {
            row.tail.dropRight(1).sliding(2,2).zip(tableBuilder.columns.tail.iterator).foreach(f)
          }
          row.head match {
            case RowSeparator => 
              val newRowBuilder = tableBuilder.nextRow
              newRowBuilder.addCell(tableBuilder.columns.head.nextCell)
              foreachColumn(row) {
                case (Intersection :: RowSeparator :: Nil, column) => newRowBuilder.addCell(column.nextCell)
                case (RowSeparator :: RowSeparator :: Nil, column) => column.mergeLeft(true); newRowBuilder.addCell(column.nextCell)
                case _ => ()
              }
            case TableBoundary =>
              foreachColumn(row) {
                case (Intersection :: TableBoundary :: Nil, _) => ()
                case (TableBoundary :: TableBoundary :: Nil, column) => column.mergeLeft()
                case _ => ()
              }
            case CellElement(text) =>
              tableBuilder.columns.head.addLine(CellSeparator(""), text, nextRow = false)
              foreachColumn(row) {
                case (sep :: CellElement(text) :: Nil, column) => column.addLine(sep, text, nextRow = false)
                case _ => ()
              }
            case _ => ()
          }
        }
        
        tableBuilder.toRowList(ct)
      }
      
      tablePart ~ opt(tablePart) ^^ { 
        case head ~ Some(body) => Table(TableHead(buildRowList(head, HeadCell)), TableBody(buildRowList(body, BodyCell)))
        case body ~ None       => Table(TableHead(Nil), TableBody(buildRowList(body, BodyCell)))
      }
      
    }
    
  }
  
  class MalformedTableException (msg: String) extends RuntimeException(msg)
  
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy