All Downloads are FREE. Search and download functionalities are using the official Maven repository.

parsley.internal.deepembedding.backend.StrictParsley.scala Maven / Gradle / Ivy

There is a newer version: 5.0.0-M6
Show newest version
/*
 * Copyright 2020 Parsley Contributors 
 *
 * SPDX-License-Identifier: BSD-3-Clause
 */
package parsley.internal.deepembedding.backend

import scala.annotation.tailrec
import scala.collection.mutable

import parsley.XAssert._
import parsley.state.Ref

import parsley.internal.collection.mutable.ResizableArray
import parsley.internal.deepembedding.ContOps, ContOps.{perform, ContAdapter}
import parsley.internal.machine.instructions, instructions.{Instr, Label}

import StrictParsley.*

/** This is the root type of the parsley "backend": it represents a combinator tree
  * where the join-points in the tree (recursive or otherwise) have been factored into
  * `Let` and `Rec` nodes. This means the tree is finite and acyclic.
  *
  * @note objects of this type are freshly generated by the frontend, so can be mutable
  *       and are allowed to pull all sorts of nasty tricks, as they are guaranteed to
  *       be thread- and parser-local
  */
private [deepembedding] trait StrictParsley[+A] {
    /** This function forms the entry-point of the "backend" of parsley's compiler.
      *
      * The process is as follows:
      *   1. if this parser was generated by a `flatMap`, it may need to generate a `CalleeSave` instruction
      *   1. code generation is performed for the main (non-shared) body of the parser
      *   1. a `Halt` instruction is generated to signify the end of the parser
      *   1. all the shared parsers are then generated terminated by `Return` instructions
      *   1. any sharable, fail-only, handler instructions are then generated at the end of the instruction stream
      *   1. jump-labels in the code are removed, and tail-call optimisation is applied
      *
      * @param numRegsUsedByParent the number of registers in use by the parent (should one exist), required by callee-save
      * @param usedRefs the registers used by this parser (these may require allocation)
      * @param recs a stream of pairs of rec nodes and the generators for their strict parsers
      *             (this is just because more `Cont` operations are performed later)
      * @param state the code generator state
      * @return the final array of instructions for this parser
      */
    final private [deepembedding] def generateInstructions[M[_, +_]: ContOps](numRegsUsedByParent: Int, usedRefs: Set[Ref[_]],
                                                                              bodyMap: Map[Let[_], StrictParsley[_]])
                                                                            (implicit state: CodeGenState): Array[Instr] = {
        implicit val instrs: InstrBuffer = newInstrBuffer
        perform {
            generateCalleeSave[M, Array[Instr]](numRegsUsedByParent, this.codeGen(producesResults = true), usedRefs) |> {
                // When `numRegsUsedByParent` is -1 this is top level, otherwise it is a flatMap
                instrs += (if (numRegsUsedByParent >= 0) instructions.Return else instructions.Halt)
                val letRets = finaliseLets(bodyMap)
                generateHandlers(state.handlers)
                finaliseInstrs(instrs, state.nlabels, letRets)
            }
        }
    }

    /** This method performs the code generation for this combinator and the recursive sub-parsers.
      *
      * It is fine for this method to perform peephole optimisation on the combinators and generate
      * more optimal sequences of instructions in specific circumstances.
      *
      * @param producesResults is this parser expected to push its result onto the stack?
      * @param instrs the current buffer of instructions to generate into
      * @param state code generator state, for the generation of labels
      */
    protected [backend] def codeGen[M[_, +_]: ContOps, R](producesResults: Boolean)(implicit instrs: InstrBuffer, state: CodeGenState): M[R, Unit]

    /** This method is directly called by the "frontend" and is used to perform domain-specific
      * optimisations on this parser (usually following the laws of the parser combinators).
      *
      * By default, this method just returns this parser unchanged.
      */
    protected [deepembedding] def optimise: StrictParsley[A] = this

    /** Should this parser be inlined if it is shared?
      *
      * @note the heuristic here is that single instruction combinators should always be inlined
      */
    private [deepembedding] def inlinable: Boolean

    // $COVERAGE-OFF$
    /** Pretty-prints a combinator tree, for internal debugging purposes only. */
    private [deepembedding] def pretty: String
    // $COVERAGE-ON$
}

private [deepembedding] object StrictParsley {
    /** The kind of buffer that should be used to generate instructions into */
    private [deepembedding] type InstrBuffer = ResizableArray[Instr]
    /** The type of a return label */
    private type RetLoc = Int

    /** Make a fresh instruction buffer */
    private def newInstrBuffer: InstrBuffer = new ResizableArray()

    /** Given a set of in-use registers, this function will allocate those that are currented
      * unallocated, giving them addresses not currently in use by the allocated registers
      *
      * @param unallocatedRegs the set of registers that need allocating
      * @param regs the set of all registers used by a specific parser
      * @return the list of slots that have been freshly allocated to
      */
    private def allocateRegisters(unallocatedRegs: Set[Ref[_]], regs: Set[Ref[_]]): List[Int] = {
        // Global registers cannot occupy the same slot as another global register
        // In a flatMap, that means a newly discovered global register must be allocated to a new slot: this may resize the register pool
        assert(unallocatedRegs == regs.filterNot(_.allocated))
        if (unallocatedRegs.nonEmpty) {
            val usedSlots = regs.collect {
                case reg if reg.allocated => reg.addr
            }
            val freeSlots = (0 until regs.size).filterNot(usedSlots)
            applyAllocation(unallocatedRegs, freeSlots)
        }
        else Nil
    }

    /** Given a set of unallocated registers and a supply of unoccupied slots, allocates each
      * register to one of the slots.
      *
      * @param regs the set of registers that require allocation
      * @param freeSlots the supply of slots that are currently not in-use
      * @return the slots that were used for allocation
      */
    private def applyAllocation(refs: Set[Ref[_]], freeSlots: Iterable[Int]): List[Int] = {
        val allocatedSlots = mutable.ListBuffer.empty[Int]
        // TODO: For scala 2.12, use lazyZip and foreach!
        for ((ref, addr) <- refs.zip(freeSlots)) {
            ref.allocate(addr)
            allocatedSlots += addr
        }
        allocatedSlots.toList
    }

    /** If required, generates callee-save around a main body of instructions.
      *
      * This is needed when using `flatMap`, as it is unaware of the register
      * context of its parent, other than the number used. The expectation is
      * that such a parser will save all the registers used by its parents that
      * it itself does not explicitly use and restore them when it is completed
      * (if the parent has not allocated a register it may be assumed that
      * it is local to the `flatMap`: this is a documented limitation of the
      * system).
      *
      * @param numRegsUsedByParent the size of the current register array as dictated by the parent
      * @param bodyGen a computation that generates instructions into the instruction buffer
      * @param reqRegs the number of registered used by the parser in question
      * @param allocatedRegs the slots used by the registered allocated local to the parser
      * @param instrs the instruction buffer
      * @param state the code generation state, for label generation
      */
    private def generateCalleeSave[M[_, +_]: ContOps, R](numRegsUsedByParent: Int, bodyGen: =>M[R, Unit], usedRefs: Set[Ref[_]])
                                                       (implicit instrs: InstrBuffer, state: CodeGenState): M[R, Unit] = {
        val reqRegs = usedRefs.size
        val localRegs = usedRefs.filterNot(_.allocated)
        val allocatedRegs = allocateRegisters(localRegs, usedRefs)
        val calleeSaveRequired = numRegsUsedByParent >= 0 // if this is -1, then we are the top level and have no parent, otherwise it needs to be done
        if (calleeSaveRequired && localRegs.nonEmpty) {
            val end = state.freshLabel()
            val calleeSave = state.freshLabel()
            instrs += new instructions.Label(calleeSave)
            instrs += new instructions.CalleeSave(end, localRegs, reqRegs, allocatedRegs, numRegsUsedByParent)
            bodyGen |> {
                instrs += new instructions.Jump(calleeSave)
                instrs += new instructions.Label(end)
            }
        }
        else bodyGen
    }

    /** Generates each of the shared, non-recursive, parsers that have been ''used'' by
      * the parser. These are stored within the code generation state. This is done because
      * some of the identified shared parsers may have been inlined and so do not need to
      * be generated. The state tracks which of these parsers were actually demanded, so
      * dead-code is automatically eliminated.
      *
      * @param bodyMap the map of lets to bodies
      * @param instrs the instruction buffer to generate into
      * @param state the code generation state, which contains the shared parsers
      * @return the list of return labels for each of the parsers (for TCO)
      */
    private def finaliseLets[M[_, +_]: ContOps](bodyMap: Map[Let[_], StrictParsley[_]])(implicit instrs: InstrBuffer, state: CodeGenState): List[RetLoc] = {
        val retLocs = mutable.ListBuffer.empty[RetLoc]
        while (state.more) {
            val (let, producesResults, label) = state.nextLet()
            instrs += new instructions.Label(label)
            perform[M, Unit](bodyMap(let).codeGen(producesResults))
            val retLoc = state.freshLabel()
            instrs += new instructions.Label(retLoc)
            instrs += instructions.Return
            retLocs += retLoc
        }
        retLocs.toList
    }

    /** Generates each of the shared handlers that have been demanded by the parser.
      *
      * These are those handlers that always fail, and have no combinator specific
      * (or rather, non-sharable) configuration. This means that they can be generated
      * once at the end of the instruction buffer and jumped to from anywhere:
      * they will always fail and jump-elsewhere anyway.
      *
      * @param handlers the source of handlers, and their raw labels
      * @param instrs the instruction buffer to generate into
      */
    private def generateHandlers(handlers: Iterator[(Instr, Int)])(implicit instrs: InstrBuffer): Unit = {
        for ((handler, label) <- handlers) {
            instrs += new instructions.Label(label)
            instrs += handler
        }
    }

    /** This function takes the fully generated instruction buffer (complete with labels) and
      * processes it into the final array of instructions.
      *
      * The processing happens in stages:
      *   1. traverse the buffer and find the true offset of each label in the instruction buffer
      *   1. allocate a new instruction array of the correct size
      *   1. fill this array from the original buffer by dropping each label, and resolve labels in instructions
      *   1. perform tail-call optimisation
      *
      * @param instrs the final instruction buffer
      * @param numLabels the number of labels within the instruction buffer
      * @param retLocs the labels that point to return instructions within the instruction buffer (for TCO)
      * @return the final array of instructions
      */
    private def finaliseInstrs(instrs: InstrBuffer, numLabels: Int, retLocs: List[RetLoc]): Array[Instr] = {
        @tailrec def findLabels(instrs: Array[Instr], labels: Array[Int], n: Int, i: Int, off: Int): Int = if (i + off < n) instrs(i + off) match {
            case label: Label =>
                instrs(i + off) = null
                labels(label.i) = i
                findLabels(instrs, labels, n, i, off + 1)
            case _ => findLabels(instrs, labels, n, i + 1, off)
        }
        else i
        @tailrec def applyLabels(srcs: Array[Instr], labels: Array[Int], dests: Array[Instr], n: Int, i: Int, off: Int): Unit = {
            if (i < n) srcs(i + off) match {
                case null => applyLabels(srcs, labels, dests, n, i, off + 1)
                case instr =>
                    dests(i) = instr.relabel(labels)
                    applyLabels(srcs, labels, dests, n, i + 1, off)
            }
        }
        val instrsOversize = instrs.toArray
        val labelMapping = new Array[Int](numLabels)
        val size = findLabels(instrsOversize, labelMapping, instrs.length, 0, 0)
        val instrs_ = new Array[Instr](size)
        applyLabels(instrsOversize, labelMapping, instrs_, instrs_.length, 0, 0)
        tco(instrs_, labelMapping, retLocs)
        instrs_
    }

    /** Performs Tail-Call Optimisation (TCO) on the final array of instructions.
      *
      * This is done by checking the instruction before each `Return` instruction in the
      * instruction buffer. If this is a `Call` instruction, then this is a tail-call
      * (where the last thing the subroutine does is call another subroutine). In this
      * case, the callstack frame can reused and the `Call` can be replaced by a `Jump`.
      *
      * @param instrs the instruction array
      * @param labelMapping the mapping from labels to their corresponding offset in the array
      * @param retLocs the list of labels that should be checked for tail-call
      */
    private def tco(instrs: Array[Instr], labelMapping: Array[Int], retLocs: List[RetLoc]): Unit = {
        for (label <- retLocs) {
            val retLoc = labelMapping(label)
            assert(instrs(retLoc) eq instructions.Return, "return locations are actually `Return`s")
            instrs(retLoc-1) match {
                case instr: instructions.Call => instrs(retLoc-1) = new instructions.Jump(instr.label)
                case _ =>
            }
        }
    }
}

/** Denotes that a parser unconditionally fails. */
private [deepembedding] trait MZero extends StrictParsley[Nothing]

/** This is the escapulated state required for code generation,
  * which is threaded through the entire backend.
  *
  * @param numRegs the number of registers required by the parser being generated
  */
private [deepembedding] class CodeGenState(val numRegs: Int) {
    /** The next jump-label identifier. */
    private var current = 0
    /** The shared-parsers that have been referenced at some point in the generation so far. */
    private val queue = mutable.ListBuffer.empty[(Let[_], Boolean, Int)]
    /** The mapping between a shared-parser and its generated jump-label. */
    private val map = mutable.Map.empty[(Let[_], Boolean), Int]

    /** Generates a unique jump-label. */
    def freshLabel(): Int = {
        val next = current
        current += 1
        next
    }
    /** Returns the number of labels generated by the code generation process */
    def nlabels: Int = current

    /** Given a shared-parser, will return the assigned jump-label for it,
      * generating one should it not exist yet.
      *
      * @param sub the shared parser to collect a label for
      * @return the label assigned the given parser
      */
    def getLabel(sub: Let[_], producesResults: Boolean): Int = map.getOrElseUpdate((sub, producesResults), {
        val label = freshLabel()
        (sub, producesResults, label) +=: queue
        label
    })

    /** Returns the next shared-parser that has been refered during code generation */
    def nextLet(): (Let[_], Boolean, Int) = queue.remove(0)
    /** Are there any more shared-parsers left on the processing queue? */
    def more: Boolean = queue.nonEmpty

    // Handler caching
    // To reduce redundant instructions appearing in the instruction array, many of the
    // handler instructions in parsley can be cached and generated in a single place.
    // This is because handler instructions are always jumped to (never executed by
    // fall-through) and these handlers specifically always continue by failing (which
    // means their location in the array is irrelevant). However, there are several
    // such handlers, and some of them have some configuration, like `RelabelError`
    // but this may also be shared across many parts of the parser. This gives rise
    // to several handler maps within the state

    /** A map of generic handler instructions to some assigned jump-label. */
    private val handlerMap = mutable.Map.empty[Instr, Int]
    /** A map of error labels to the assigned jump-label to its instruction. */
    private val relabelErrorMap = mutable.Map.empty[scala.Seq[String], Int]
    /** A map of reasons to the assigned jump-label to its instruction. */
    private val applyReasonMap = mutable.Map.empty[String, Int]
    /** A map of registers to the assigned jump-label to its instruction. */
    private val putAndFailMap = mutable.Map.empty[Ref[_], Int]
    /** A map of dislodge amounts to the assigned jump-label to its instruction. */
    private val dislodgeAndFailMap = mutable.Map.empty[Int, Int]

    /** Given a generic handler instruction, fetch the corresponding jump-label
      * that will represent it in the final instruction array.
      */
    def getLabel(handler: Instr): Int  = handlerMap.getOrElseUpdate(handler, freshLabel())
    /** Given a error label, fetch the corresponding jump-label that will represent
      * the `RelabelError(label)` instruction in the final instruction array.
      */
    def getLabelForRelabelError(labels: scala.Seq[String]): Int = relabelErrorMap.getOrElseUpdate(labels, freshLabel())
    /** Given an error reason, fetch the corresponding jump-label that will represent
      * the `ApplyReason(reason)` instruction in the final instruction array.
      */
    def getLabelForApplyReason(reason: String): Int = applyReasonMap.getOrElseUpdate(reason, freshLabel())
    /** Given a register, fetch the corresponding jump-label that will represent
      * the `PutAndFail(reg)` instruction in the final instruction array.
      */
    def getLabelForPutAndFail(reg: Ref[_]): Int = putAndFailMap.getOrElseUpdate(reg, freshLabel())
    /** Given an amount to dislodge, fetch the corresponding jump-label that will represent
      * the `DislodgeAndFail(reg)` instruction in the final instruction array.
      */
    def getLabelForDislodgeAndFail(n: Int): Int = dislodgeAndFailMap.getOrElseUpdate(n, freshLabel())

    /** An iterator over all the handler instructions and their corresponding jump-labels that have
      * been demanded during the process of code-generation.
      */
    def handlers: Iterator[(Instr, Int)] = {
        val relabelErrors = relabelErrorMap.view.map {
            case (labels, i) => new instructions.RelabelErrorAndFail(labels) -> i
        }
        val applyReasons = applyReasonMap.view.map {
            case (reason, i) => new instructions.ApplyReasonAndFail(reason) -> i
        }
        val putAndFail = putAndFailMap.view.map {
            case (reg, i) => new instructions.PutAndFail(reg.addr) -> i
        }
        val dislodgeAndFail = dislodgeAndFailMap.view.map {
            case (n, i) => new instructions.DislodgeAndFail(n) -> i
        }
        new Iterator[(Instr, Int)] {
            private var rest = List(relabelErrors.iterator, applyReasons.iterator, putAndFail.iterator, dislodgeAndFail.iterator)
            private var cur = handlerMap.iterator
            override def hasNext: Boolean = {
                cur.hasNext || (rest.nonEmpty && {
                    cur = rest.head
                    rest = rest.tail
                    this.hasNext
                })
            }

            override def next(): (Instr, Int) = cur.next()
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy