All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parsley.internal.deepembedding.frontend.LazyParsley.scala Maven / Gradle / Ivy

There is a newer version: 5.0.0-M6
Show newest version
/* SPDX-FileCopyrightText: © 2022 Parsley Contributors 
 * SPDX-License-Identifier: BSD-3-Clause
 */
package parsley.internal.deepembedding.frontend

import scala.annotation.nowarn
import scala.collection.mutable

import parsley.XAssert._
import parsley.exceptions.BadLazinessException
import parsley.registers.Reg

import parsley.internal.deepembedding.{Cont, ContOps, Id}, ContOps.{perform, result, ContAdapter}
import parsley.internal.deepembedding.backend, backend.StrictParsley
import parsley.internal.machine.instructions, instructions.Instr

/** This is the root type of the parsley "frontend": it represents a combinator tree
  * where the join-points in the tree (recursive or otherwise) have not been identified
  * or factored. As such, it is a potentially cyclic graph (though finite), and must be handled with
  * caution.
  *
  * @note objects of this type may be shared across parsers or threads and, as such,
  *       must remain entirely immutable.
  */
private [parsley] abstract class LazyParsley[+A] private [deepembedding] {
    // Public API
    // $COVERAGE-OFF$
    /** Denotes this parser is unsafe, which will disable certain law-based optimisations that assume purity. */
    private [parsley] final def unsafe(): Unit = sSafe = false
    /** Force the parser, which eagerly computes its instructions immediately */
    private [parsley] final def force(): Unit = instrs: @nowarn
    /** Denote that this parser is large enough that it might stack-overflow during
      * compilation: this allows for the slow path using `Cont` to be used immediately
      * instead of going through the (likely failing) `Id` path.
      */
    private [parsley] final def overflows(): Unit = cps = true
    // $COVERAGE-ON$

    // The instructions used to execute this parser along with the number of registers it uses
    final private [parsley] lazy val (instrs: Array[Instr], numRegs: Int) = computeInstrs

    /** This parser is the result of a `flatMap` operation, and as such must perform
      * callee-save on `numRegs` registers (which belong to its parent)
      *
      * @param numRegs the number of registers the parent uses (these must be saved)
      */
    private [deepembedding] def demandCalleeSave(numRegs: Int): this.type = {
        numRegsUsedByParent = numRegs
        this
    }

    // Internals
    // To ensure that stack-overflow cannot occur during the processing of particularly
    // large parsers, the entire internals of the "frontend" and "backend" of parsley is
    // performed via the Monad of Continuations, or `Cont`. This allows for the execution
    // of the methods below to be trampolined, which evaluates them in a loop, trading
    // stack-space for heap-space. Each method is parameterised, however, by an ''abstract''
    // `Cont`, because in the event that a parser doesn't stack overflow under normal
    // execution, it is preferable to evaluate it under the much lighter-weight Identity
    // Monad, or `Id`. The choice of monad is delegated to `computeInstrs`.
    //
    // The frontend is split into two passes: the first identifies all the shared parsers
    // within the combinator tree; and the second factors these parsers out and converts
    // the combinator tree into its strict, finite, form: `StrictParsley`.
    //
    // Pass 1: `findLets` (using `findLetsAux`) populates a `LetFinderState`
    // Pass 2: extracts the `lets` and `recs` from the state, and feeds through `optimised`
    //         (using `preprocess`)

    /** Describes how to recursively traverse this combinators sub-trees using `findLets`.
      *
      * @param seen the set of all nodes that have previously been seen by the let-finding
      * @param state stores all the information of the let-finding process
      */
    protected def findLetsAux[Cont[_, +_]: ContOps, R](seen: Set[LazyParsley[_]])(implicit state: LetFinderState): Cont[R, Unit]

    /** Describes how to recursively convert this combinator into a `StrictParsley` by
      * `optimise`ing its sub-trees.
      *
      * @param lets the known non-recursive shared parsers mapped to their corresponding join-point nodes
      * @param recs the known recursive parsers mapped to their corresponding join-point nodes
      * @return the strict, finite, version of this tree, with all shared parsers factored out into join-points
      */
    protected def preprocess[Cont[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap, recs: RecMap): Cont[R, StrictParsley[A_]]

    /** should the underlying strict tree be considered safe? */
    final private var sSafe = true
    /** should the `Id` instance be skipped? */
    final private var cps = false
    /** how many registers are used by the ''parent'' of this combinator (this combinator is part of a `flatMap` when this is not -1) */
    final private var numRegsUsedByParent = -1

    /** Computes the instructions associated with this parser as well as the number of
      * registers it requires in a (possibly) stack-safe way.
      */
    final private def computeInstrs: (Array[Instr], Int) = {
        if (cps) computeInstrs(Cont.ops) else computeInstrs(Id.ops)
    }
    /** Computes the instructions associated with this parser as well as the number of
      * registers it requires within the context of a specific (unknown) monad.
      *
      * @param ops the instance for the monad to evaluate with
      */
    final private def computeInstrs[Cont[_, +_]](ops: ContOps[Cont]): (Array[Instr], Int) = pipeline(ops)

    /** Performs the full end-to-end pipeline through both the frontend and the backend.
      *
      * First performs let-finding to identify the shared parsers (recursive or otherwise)
      * within the combinator tree. Then performs let-factoring and (optimising) conversion
      * to `StrictParsley`. Then executes the backend pipeline on this strict combinator tree,
      * yielding the final results.
      *
      * @return the instructions associates with this parser as well as the number of
      *         registers it requires
      */
    final private def pipeline[Cont[_, +_]: ContOps]: (Array[Instr], Int) = {
        implicit val letFinderState: LetFinderState = new LetFinderState
        (perform[Cont, Array[Instr]] {
            findLets(Set.empty) >> {
                val usedRegs: Set[Reg[_]] = letFinderState.usedRegs
                implicit val state: backend.CodeGenState = new backend.CodeGenState(letFinderState.numRegs)
                implicit val recMap: RecMap = RecMap(letFinderState.recs)
                implicit val letMap: LetMap = LetMap(letFinderState.lets)
                val recs_ = recMap.map { case (p, rec) => (rec, p.unsafeOptimised[Cont, Unit, Any]) }
                for { sp <- this.optimised } yield sp.generateInstructions(numRegsUsedByParent, usedRegs, recs_)
            }
        }, letFinderState.numRegs)
    }

    // Pass 1
    /** A recursive process that identifies shared parsers within the combinator tree.
      *
      * - first increments the "predecessor" count of this parser: this is the number of
      *   other parsers (or the top-level) that reference. When this count is more than 1
      *   this parser will be considered as shared
      * - checks whether this parser has been encountered before during the analysis: if
      *   so it is recursive, so add it to the recs set: this node '''must''' not be processed,
      *   or it will cause an infinite divergence.
      * - otherwise add any used registers to the register set and recurse
      *
      * @param seen the set of all nodes that have previously been seen by the let-finding
      * @param state stores all the information of the let-finding process
      */
    @throws[BadLazinessException]("if this parser references another parser before it has been initialised")
    final protected [frontend] def findLets[Cont[_, +_]: ContOps, R](seen: Set[LazyParsley[_]])(implicit state: LetFinderState): Cont[R, Unit] = {
        state.addPred(this)
        if (seen.contains(this)) result(state.addRec(this))
        else if (state.notProcessedBefore(this)) {
            this match {
                case self: UsesRegister => state.addReg(self.reg)
                case _                  =>
            }

            try findLetsAux(seen + this)
            catch {
                // $COVERAGE-OFF$
                case _: NullPointerException => throw new BadLazinessException // scalastyle:ignore throw
                // $COVERAGE-ON$
            }
        }
        else result(())
    }

    // Pass 2
    /** Performs the factoring out of shared parsers and then converts this parser into its strict form performing
      * optimisations on that translated form.
      *
      * @param lets the known non-recursive shared parsers mapped to their corresponding join-point nodes
      * @param recs the known recursive parsers mapped to their corresponding join-point nodes
      * @return the strict, finite, version of this tree, with all shared parsers factored out into join-points
      */
    final protected [frontend] def optimised[Cont[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap, recs: RecMap): Cont[R, StrictParsley[A_]] = {
        if (recs.contains(this)) result(recs(this))
        else if (lets.contains(this)) result(lets(this))
        else this.unsafeOptimised
    }
    /** Similar to `optimised` but should be '''only''' used on things known to be let-bindings (to avoid infinite expansion!). */
    final private [frontend] def knownLetTopOptimised[Cont[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap, recs: RecMap): Cont[R, StrictParsley[A_]] = {
        assume(lets.contains(this), "the let check can only be skipped for known let-bindings")
        assume(!recs.contains(this), "rec membership can be skipped for known let-binding bodies")
        this.unsafeOptimised
    }
    /** Similar to `optimised` but does not check for inclusion in the `lets` or `recs` sets. */
    private def unsafeOptimised[Cont[_, +_]: ContOps, R, A_ >: A](implicit lets: LetMap, recs: RecMap): Cont[R, StrictParsley[A_]] = {
        for {p <- this.preprocess} yield {
            p.safe = this.sSafe
            p.optimise
        }
    }

    // $COVERAGE-OFF$
    /** Pretty-prints a combinator tree, for internal debugging purposes only. */
    final private [internal] def prettyAST: String = {
        import Cont.ops // scalastyle:ignore import.grouping
        implicit val letFinderState: LetFinderState = new LetFinderState
        perform[Cont, String] {
            findLets(Set.empty) >> {
                implicit val state: backend.CodeGenState = new backend.CodeGenState(0)
                implicit val recMap: RecMap = RecMap(letFinderState.recs)
                implicit val letMap: LetMap = LetMap(letFinderState.lets)
                val mrecs = for {
                    (p, rec) <- recMap
                } yield for {
                    sp <- p.unsafeOptimised[Cont, String, Any]
                    str <- sp.pretty
                } yield s"${rec.label}: $str"

                for {
                    sp <- this.optimised
                    str <- sp.pretty
                    strs <- ContOps.sequence(mrecs.toList)
                } yield {
                    s"main body: $str\n${strs.mkString("\n")}"
                }
            }
        }
    }
    // $COVERAGE-ON$
}

/** A mix-in trait that denotes that this parser uses a specific register, which must be allocated. */
private [deepembedding] trait UsesRegister {
    /** The register used by this combinator. */
    val reg: Reg[_]
}

/** This is a collection of builders that track the shared parsers and used registers during Pass 1 */
private [deepembedding] class LetFinderState {
    private val _recs = mutable.Set.empty[LazyParsley[_]]
    private val _preds = mutable.Map.empty[LazyParsley[_], Int]
    private val _usedRegs = mutable.Set.empty[Reg[_]]

    /** Adds a "predecessor" to a given parser, which means that it is referenced by another parser.
      *
      * @note assumes that a parser isn't given a predecessor by the same root twice.
      *
      * @param p the parser to add a predecessor to
      */
    private [frontend] def addPred(p: LazyParsley[_]): Unit = _preds(p) = _preds.getOrElse(p, 0) + 1
    /** If a parser is identified as being recursive, keep track of it.
      *
      * @param p a recursive parser
      */
    private [frontend] def addRec(p: LazyParsley[_]): Unit = _recs += p
    /** If a register has been used by a parser, keep track of it.
      *
      * @param reg the register used by the parser.
      */
    private [frontend] def addReg(reg: Reg[_]): Unit = _usedRegs += reg
    /** Has the given parser never been analysed before? */
    private [frontend] def notProcessedBefore(p: LazyParsley[_]): Boolean = _preds(p) == 1

    /** Returns all the non-recursive parsers which are referenced two or more times across the tree. */
    private [frontend] def lets: Iterable[LazyParsley[_]] = _preds.toSeq.view.collect {
        case (p, refs) if refs >= 2 && !_recs(p) => p
    }
    /** Returns all the recursive parsers in the tree */
    private [frontend] lazy val recs: Set[LazyParsley[_]] = _recs.toSet
    /** Returns all the registers used by the parser */
    private [frontend] def usedRegs: Set[Reg[_]] = _usedRegs.toSet
    /** Returns the number of registers used by the parser */
    private [frontend] def numRegs: Int = _usedRegs.size
}

/** Represents a map of let-bound lazy parsers to their strict equivalents. */
private [deepembedding] final class LetMap private (letGen: Map[LazyParsley[_], LetMap => StrictParsley[_]]) {
    // This might not necessarily contain Let nodes: if they were inlined then they will not be present here
    private val mutMap = mutable.Map.empty[LazyParsley[_], StrictParsley[_]]

    /** Is the given parser a let-binding? */
    def contains(p: LazyParsley[_]): Boolean = letGen.contains(p)

    /** Returns the strict parser that represents a given let-bound parser.
      *
      * @note this does not necessary return a `Let` node, as the underlying parser may be inlined.
      */
    def apply[A](p: LazyParsley[A]): StrictParsley[A] = mutMap.getOrElseUpdate(p, {
        assume(contains(p), "only let-bound parsers can be mapped to a strict version in the let-map")
        val sp = letGen(p)(this)
        if (sp.inlinable) sp else new backend.Let(sp)
    }).asInstanceOf[StrictParsley[A]]

    // $COVERAGE-OFF$
    override def toString: String = mutMap.toString
    // $COVERAGE-ON$
}
private [frontend] object LetMap {
    /** Builds a `LetMap` given the sequence of let-bound parsers as well as the map of recursive parsers
      *
      * @param lets the identified shared non-recursive parsers to include
      * @param recs the identified recursive parsers that may be required in the translation
      */
    def apply[Cont[_, +_]: ContOps](lets: Iterable[LazyParsley[_]])(implicit recs: RecMap): LetMap = {
        new LetMap(lets.map(p => p -> ((_self: LetMap) => {
            implicit val self: LetMap = _self
            perform[Cont, StrictParsley[_]](p.knownLetTopOptimised)
        })).toMap)
    }
}

/** Represents the map of lazy recursive parsers to their strict `Rec` node join-points. */
private [deepembedding] final class RecMap private (map: Map[LazyParsley[_], backend.Rec[_]]) extends Iterable[(LazyParsley[_], backend.Rec[_])] {
    /** Is the given parser recursive? */
    def contains(p: LazyParsley[_]): Boolean = map.contains(p)

    /** Returns the `Rec` node that corresponds to a given recursive parser. */
    def apply[A](p: LazyParsley[A]): backend.Rec[A] = map(p).asInstanceOf[backend.Rec[A]]

    /** An iterator over all the key-value pairs in this map */
    override def iterator: Iterator[(LazyParsley[_], backend.Rec[_])] = map.iterator

    // $COVERAGE-OFF$
    override def toString: String = map.toString
    // $COVERAGE-ON$
}
private [frontend] object RecMap {
    /** Creates a `RecMap` given all the recursive parsers identified by let-finding.
      * This will map each parser to a `Rec` node in the strict combinator tree.
      *
      * @param recs all of the recursive parsers to fill the map with
      * @param state code-generation state, required to generate labels for the `Call` instructions.
      */
    def apply(recs: Iterable[LazyParsley[_]])(implicit state: backend.CodeGenState): RecMap = {
        new RecMap(recs.map(p => p -> new backend.Rec(new instructions.Call(state.freshLabel()))).toMap)
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy