All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parsley.internal.deepembedding.frontend.LazyParsley.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2020 Parsley Contributors 
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */
package parsley.internal.deepembedding.frontend

import scala.annotation.nowarn
import scala.collection.mutable

import parsley.XAssert._
import parsley.state.Ref

import parsley.internal.deepembedding.{Cont, ContOps, Id}, ContOps.{perform, result, ContAdapter}
import parsley.internal.deepembedding.backend, backend.StrictParsley
import parsley.internal.diagnostics.NullParserException
import parsley.internal.machine.instructions, instructions.Instr

/** This is the root type of the parsley "frontend": it represents a combinator tree
  * where the join-points in the tree (recursive or otherwise) have not been identified
  * or factored. As such, it is a potentially cyclic graph (though finite), and must be handled with
  * caution.
  *
  * @note objects of this type may be shared across parsers or threads and, as such,
  *       must remain entirely immutable.
  */
private [parsley] abstract class LazyParsley[+A] private [deepembedding] {
    // Public API
    // $COVERAGE-OFF$
    /** Force the parser, which eagerly computes its instructions immediately */
    private [parsley] final def force(): Unit = instrs: @nowarn
    /** Denote that this parser is large enough that it might stack-overflow during
      * compilation: this allows for the slow path using `Cont` to be used immediately
      * instead of going through the (likely failing) `Id` path.
      */
    private [parsley] final def overflows(): Unit = cps = true
    // $COVERAGE-ON$

    // The instructions used to execute this parser along with the number of registers it uses
    final private [parsley] lazy val (instrs: Array[Instr], numRegs: Int) = computeInstrs

    /** This parser is the result of a `flatMap` operation, and as such must perform
      * callee-save on `numRegs` registers (which belong to its parent)
      *
      * @param numRegs the number of registers the parent uses (these must be saved)
      */
    private [deepembedding] def demandCalleeSave(numRegs: Int): Unit = numRegsUsedByParent = numRegs

    // Internals
    // To ensure that stack-overflow cannot occur during the processing of particularly
    // large parsers, the entire internals of the "frontend" and "backend" of parsley is
    // performed via the Monad of Continuations, or `Cont`. This allows for the execution
    // of the methods below to be trampolined, which evaluates them in a loop, trading
    // stack-space for heap-space. Each method is parameterised, however, by an ''abstract''
    // `Cont`, because in the event that a parser doesn't stack overflow under normal
    // execution, it is preferable to evaluate it under the much lighter-weight Identity
    // Monad, or `Id`. The choice of monad is delegated to `computeInstrs`.
    //
    // The frontend is split into two passes: the first identifies all the shared parsers
    // within the combinator tree; and the second factors these parsers out and converts
    // the combinator tree into its strict, finite, form: `StrictParsley`.
    //
    // Pass 1: `findLets` (using `findLetsAux`) populates a `LetFinderState`
    // Pass 2: extracts the `lets` and `recs` from the state, and feeds through `optimised`
    //         (using `preprocess`)

    /** Describes how to recursively traverse this combinators sub-trees using `findLets`.
      *
      * @param seen the set of all nodes that have previously been seen by the let-finding
      * @param state stores all the information of the let-finding process
      */
    protected def findLetsAux[M[_, +_]: ContOps, R](seen: Set[LazyParsley[_]])(implicit state: LetFinderState): M[R, Unit]

    /** Describes how to recursively convert this combinator into a `StrictParsley` by `optimise`ing its sub-trees.
      *
      * @param lets the known shared parsers mapped to their corresponding join-point nodes
      * @return the strict, finite, version of this tree, with all shared parsers factored out into join-points
      */
    protected def preprocess[M[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap): M[R, StrictParsley[A_]]

    /** should the `Id` instance be skipped? */
    final private var cps = false
    final private [deepembedding] def isCps: Boolean = cps
    /** how many registers are used by the ''parent'' of this combinator (this combinator is part of a `flatMap` when this is not -1) */
    final private var numRegsUsedByParent = -1

    /** Computes the instructions associated with this parser as well as the number of
      * registers it requires in a (possibly) stack-safe way.
      */
    final private def computeInstrs: (Array[Instr], Int) = {
        if (cps) computeInstrs(Cont.ops) else computeInstrs(Id.ops)
    }
    /** Computes the instructions associated with this parser as well as the number of
      * registers it requires within the context of a specific (unknown) monad.
      *
      * @param ops the instance for the monad to evaluate with
      */
    final private def computeInstrs[M[_, +_]](ops: ContOps[M]): (Array[Instr], Int) = pipeline(ops)

    /** Performs the full end-to-end pipeline through both the frontend and the backend.
      *
      * First performs let-finding to identify the shared parsers (recursive or otherwise)
      * within the combinator tree. Then performs let-factoring and (optimising) conversion
      * to `StrictParsley`. Then executes the backend pipeline on this strict combinator tree,
      * yielding the final results.
      *
      * @return the instructions associates with this parser as well as the number of
      *         registers it requires
      */
    final private def pipeline[M[_, +_]: ContOps]: (Array[Instr], Int) = {
        implicit val letFinderState: LetFinderState = new LetFinderState
        (perform[M, Array[Instr]] {
            findLets(Set.empty) >> {
                val usedRefs: Set[Ref[_]] = letFinderState.usedRefs
                implicit val letMap: LetMap = LetMap(letFinderState.lets, letFinderState.recs)
                for { sp <- this.optimised } yield {
                    implicit val state: backend.CodeGenState = new backend.CodeGenState(letFinderState.numRegs)
                    sp.generateInstructions(numRegsUsedByParent, usedRefs, letMap.bodies)
                }
            }
        }, letFinderState.numRegs)
    }

    // Pass 1
    /** A recursive process that identifies shared parsers within the combinator tree.
      *
      * - first increments the "predecessor" count of this parser: this is the number of
      *   other parsers (or the top-level) that reference. When this count is more than 1
      *   this parser will be considered as shared
      * - checks whether this parser has been encountered before during the analysis: if
      *   so it is recursive, so add it to the recs set: this node '''must''' not be processed,
      *   or it will cause an infinite divergence.
      * - otherwise add any used registers to the register set and recurse
      *
      * @param seen the set of all nodes that have previously been seen by the let-finding
      * @param state stores all the information of the let-finding process
      */
    final protected [frontend] def findLets[M[_, +_]: ContOps, R](seen: Set[LazyParsley[_]])(implicit state: LetFinderState): M[R, Unit] = {
        state.addPred(this)
        if (seen.contains(this)) result(state.addRec(this))
        else if (state.notProcessedBefore(this)) {
            this match {
                case self: UsesRef => state.addRef(self.ref)
                case _             =>
            }

            try findLetsAux(seen + this)
            catch {
                // $COVERAGE-OFF$
                case NullParserException(err) => throw err // scalastyle:ignore throw
                // $COVERAGE-ON$
            }
        }
        else result(())
    }

    // Pass 2
    /** Performs the factoring out of shared parsers and then converts this parser into its strict form performing
      * optimisations on that translated form.
      *
      * @param lets the known shared parsers mapped to their corresponding join-point nodes
      * @return the strict, finite, version of this tree, with all shared parsers factored out into join-points
      */
    final protected [frontend] def optimised[M[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap): M[R, StrictParsley[A_]] = {
        if (lets.contains(this)) result(lets(this))
        else this.unsafeOptimised
    }
    /** Similar to `optimised` but should be '''only''' used on things known to be let-bindings (to avoid infinite expansion!). */
    final private [frontend] def knownLetTopOptimised[M[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap): M[R, StrictParsley[A_]] = {
        assume(lets.contains(this), "the let check can only be skipped for known let-bindings")
        this.unsafeOptimised
    }
    /** Similar to `optimised` but does not check for inclusion in the `lets` set. */
    private def unsafeOptimised[M[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap): M[R, StrictParsley[A_]] = {
        for { p <- this.preprocess } yield p.optimise
    }

    // $COVERAGE-OFF$
    // Processing with visitors.
    /** Use a visitor implementation to process this internal lazy parser. */
    def visit[T, U[+_]](visitor: LazyParsleyIVisitor[T, U], context: T): U[A]

    /** Pretty names for parsers, for internal debugging purposes only. */
    private [parsley] def prettyName: String

    /** Pretty-prints a combinator tree, for internal debugging purposes only. */
    final private [internal] def prettyAST: String = {
        implicit val ops = Id.ops
        implicit val letFinderState: LetFinderState = new LetFinderState
        findLets(Set.empty)
        implicit val letMap: LetMap = LetMap(letFinderState.lets, letFinderState.recs)
        implicit val state: backend.CodeGenState = new backend.CodeGenState(0)
        val mrecs = for {
            (let, p) <- letMap.bodies
        } yield s"${state.getLabel(let, producesResults = true)}: ${p.pretty}"
        s"main body: ${this.optimised.pretty}\n${mrecs.mkString("\n")}"
    }
    // $COVERAGE-ON$
}

/** A mix-in trait that denotes that this parser uses a specific register, which must be allocated. */
private [deepembedding] trait UsesRef {
    /** The register used by this combinator. */
    val ref: Ref[_]
}

/** This is a collection of builders that track the shared parsers and used registers during Pass 1 */
private [deepembedding] class LetFinderState {
    private val _recs = mutable.Set.empty[LazyParsley[_]]
    private val _preds = mutable.Map.empty[LazyParsley[_], Int]
    private val _usedRefs = mutable.Set.empty[Ref[_]]

    /** Adds a "predecessor" to a given parser, which means that it is referenced by another parser.
      *
      * @note assumes that a parser isn't given a predecessor by the same root twice.
      *
      * @param p the parser to add a predecessor to
      */
    private [frontend] def addPred(p: LazyParsley[_]): Unit = _preds(p) = _preds.getOrElse(p, 0) + 1
    /** If a parser is identified as being recursive, keep track of it.
      *
      * @param p a recursive parser
      */
    private [frontend] def addRec(p: LazyParsley[_]): Unit = _recs += p
    /** If a register has been used by a parser, keep track of it.
      *
      * @param reg the register used by the parser.
      */
    private [frontend] def addRef(ref: Ref[_]): Unit = _usedRefs += ref
    /** Has the given parser never been analysed before? */
    private [frontend] def notProcessedBefore(p: LazyParsley[_]): Boolean = _preds(p) == 1

    // TODO: I think this can just drop the !_recs(p) constraint and it'll just work? (check whether recs get a pred bump)
    /** Returns all the non-recursive parsers which are referenced two or more times across the tree. */
    private [frontend] def lets: Iterable[LazyParsley[_]] = _preds.toSeq.view.collect {
        case (p, refs) if refs >= 2 && !_recs(p) => p
    } ++ recs
    /** Returns all the recursive parsers in the tree */
    private [frontend] lazy val recs: Set[LazyParsley[_]] = _recs.toSet
    /** Returns all the registers used by the parser */
    private [frontend] def usedRefs: Set[Ref[_]] = _usedRefs.toSet
    /** Returns the number of registers used by the parser */
    private [frontend] def numRegs: Int = _usedRefs.size
}

/** Represents a map of let-bound lazy parsers to their strict equivalents. */
private [deepembedding] final class LetMap private (letGen: Map[LazyParsley[_], LetMap => StrictParsley[_]], recs: Set[LazyParsley[_]]) {
    // This might not necessarily contain Let nodes: if they were inlined then they will not be present here
    private val letMap = mutable.Map.empty[LazyParsley[_], StrictParsley[_]]
    private val bodyMap = mutable.Map.empty[backend.Let[_], StrictParsley[_]]

    /** Is the given parser a let-binding? */
    def contains(p: LazyParsley[_]): Boolean = letGen.contains(p)

    /** Returns the strict parser that represents a given let-bound parser.
      *
      * @note this does not necessary return a `Let` node, as the underlying parser may be inlined.
      */
    def apply[A](p: LazyParsley[A]): StrictParsley[A] = letMap.get(p) match {
        case Some(let) => let.asInstanceOf[StrictParsley[A]]
        case None =>
            assume(contains(p), "only let-bound parsers can be mapped to a strict version in the let-map")
            val ourLet = new backend.Let[A]
            // MUST be added before processing the body
            letMap(p) = ourLet
            val sp = letGen(p)(this)
            if (sp.inlinable && !recs.contains(p)) {
                letMap(p) = sp
                sp.asInstanceOf[StrictParsley[A]]
            }
            else {
                bodyMap(ourLet) = sp
                ourLet
            }
    }

    def bodies: Map[backend.Let[_], StrictParsley[_]] = bodyMap.toMap

    // $COVERAGE-OFF$
    override def toString: String = letMap.toString
    // $COVERAGE-ON$
}
private [frontend] object LetMap {
    /** Builds a `LetMap` given the sequence of let-bound parsers as well as the map of recursive parsers
      *
      * @param lets the identified shared non-recursive parsers to include
      * @param recs the identified recursive parsers that may be required in the translation
      */
    def apply[M[_, +_]: ContOps](lets: Iterable[LazyParsley[_]], recs: Set[LazyParsley[_]]): LetMap = {
        new LetMap(lets.map(p => p -> ((_self: LetMap) => {
            implicit val self: LetMap = _self
            perform[M, StrictParsley[_]](p.knownLetTopOptimised)
        })).toMap, recs)
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy