org.devzendo.tma.codegen.CodeGenerator.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of tma-assembler Show documentation
Show all versions of tma-assembler Show documentation
The Transputer Macro Asesmbler Code
(Apache License v2) 2018-2019 Matt Gumbley, DevZendo.org
The newest version!
/*
* Copyright (C) 2008-2018 Matt Gumbley, DevZendo.org http://devzendo.org
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.devzendo.tma.codegen
import org.devzendo.commoncode.string.HexDump
import org.devzendo.tma.ast.AST.{Label, SymbolName}
import org.devzendo.tma.ast._
import org.log4s.Logger
import scala.collection.mutable
import scala.collection.mutable.ArrayBuffer
class CodeGenerator(debugCodegen: Boolean, model: AssemblyModel) {
val logger: Logger = org.log4s.getLogger
object GenerationMode extends Enumeration {
val Assembly, If1Seen, ElseSeen = Value
}
private var generationMode = GenerationMode.Assembly
private[codegen] val p2Structures = mutable.ArrayBuffer[Pass2Structure]()
private[codegen] var currentP2Structure = new Pass2Structure()
private var lastLineNumber = 0
private var passNumber = 1
def getLastLineNumber: Int = lastLineNumber
private[codegen] var inputLines = mutable.ArrayBuffer[Line]()
/* State maintained during converge mode - when building optimal encodings for direct instruction offsets of
* forward-referenced symbols. Also see the AssemblyModel's converge mode flag that relaxes whether symbols can be
* redefined.
*
* Consider a DirectInstruction containing one or more undefined symbols, and all subsequent Statements until
* these symbols are defined. Note the line index of the start and end of this sequence.
* Compute the minimal encodings of all DirectInstructions in the list, converting them into
* DirectEncodedInstructions. The resulting list is then available to the CodeGenerator.
* The list of subsequent Statements may contain further undefined symbols - the CodeGenerator will keep giving
* Statements to this encoder until all undefined symbols have potential resolutions.
*
* The algorithm used here is from The Transputer Handbook, Graham & King, p48,49:
*
* "The solution is reasonably simple but time consuming. A data structure representing the whole program is built in
* memory. Fixed length sections of code can be held as binary, but any label must be kept as a pointer to the label and
* an associated size. Initially all offsets are assumed to fit in one nibble, with no prefixes needed. A pass over the
* program is made, altering all those that require a larger offset to a suitable value. A further pass is then made,
* expanding those instructions that do not now fit because the previous pass expanded instructions. This process
* continues until no more changes need to be made.
* This algorithm is the only one which is guaranteed to converge."
*
* need a DirectInstructionTrial that stores the length of its current encoding, and that can be asked if, when
* evaluated, it will fit in that length. if not, it can have its encoding length increased by one byte. (then the loop
* iterates). when all DITs return true, we're good, and can convert them to DirectEncodedInstructions
* Each time round the loop, the StackedAssemblyModel's cached state is reset.
*
* DirectInstruction (from the Parser) -> (replaced in converge mode) DirectInstructionTrial -> (replaced when done) DirectEncodedInstruction
*
* DirectEncodedInstructions are then trivially generated into binary.
*/
private var convergeMode: Boolean = false
private var startConvergeLineIndex = 0
private var endConvergeLineIndex = 0
private var symbolsToConverge = mutable.HashSet[CasedSymbolName]()
case class DirectInstructionState(directInstruction: DirectInstruction, currentSize: Int) {
}
private val directInstructionByLineIndex = mutable.HashMap[Int, DirectInstructionState]()
private var startConvergeDollar = 0
// End of converge mode state
private val codeGenerationErrors = mutable.ArrayBuffer[CodeGenerationException]()
// Chain of statement transformers
type StatementTransformer = Statement => Statement
private val statementTransformers = ArrayBuffer[StatementTransformer]()
def addStatementTransformer(st: StatementTransformer): Unit = {
statementTransformers += st
}
def createModel(lines: List[Line]): AssemblyModel = {
// Store the lines in a mutable, random accessible form, as convergence needs them
inputLines.clear()
inputLines ++= lines
logger.info("Pass 1: Creating model from " + lines.size + " macro-expanded line(s)")
inputLines.zipWithIndex.foreach { tuple: (Line, Int) =>
try {
processLine(tuple._1, tuple._2)
} catch {
case cge: CodeGenerationException => codeGenerationErrors += cge
}
}
logger.info("End of Pass 1: Checking for unresolved forward references")
try {
model.checkUnresolvedForwardReferences() // will throw if there are any
} catch {
case cge: CodeGenerationException => codeGenerationErrors += cge // doesn't throw these
case ame: AssemblyModelException => codeGenerationErrors += new CodeGenerationException(0, ame.getMessage)
}
// TODO in converge mode still? there must be unresolveds then...
logger.info("Pass 2: Updating model with " + p2Structures.size + " pass 2 section(s)")
try {
pass2()
} catch {
case cge: CodeGenerationException => codeGenerationErrors += cge
}
logger.info("End of Pass 2")
model
}
def getCodeGenerationExceptions: List[CodeGenerationException] = codeGenerationErrors.toList
def endCheck(): Unit = {
if (!model.hasEndBeenSeen) {
codeGenerationErrors += new CodeGenerationException(lastLineNumber, "End of input reached with no End statement")
}
}
private[codegen] def lineContainsDirectInstructionWithUndefinedSymbols(line: Line): Set[CasedSymbolName] = {
line.stmt match {
case Some(DirectInstruction(_, _, expr)) => model.findUndefineds(expr)
case Some(_) => Set.empty
case None => Set.empty
}
}
private def processLine(line: Line, lineIndex: Int): Unit = {
if (debugCodegen) {
logger.info("Line " + line.number + ": " + line.toString)
}
if (line.number > lastLineNumber) {
lastLineNumber = line.number
}
try {
if (generationMode == GenerationMode.ElseSeen && notEndif(line)) {
if (debugCodegen) {
logger.info("Adding line to Pass 2 Collection: " + line)
}
currentP2Structure.addPass2Line((line, lineIndex))
} else {
createLabel(line)
val directUndefineds = lineContainsDirectInstructionWithUndefinedSymbols(line)
if (directUndefineds.nonEmpty) {
if (!convergeMode) {
convergeMode = true
startConvergeDollar = model.getDollar
directInstructionByLineIndex.clear()
startConvergeLineIndex = lineIndex
logger.debug("Start of convergable lines at line index " + lineIndex + " line number " + line.number + " $=" + HexDump.int2hex(startConvergeDollar))
}
logger.debug("Adding " + directUndefineds + " to converge symbol set")
symbolsToConverge ++= directUndefineds
}
val (modifiedLine, maybeStatement) = applyStatementTransformers(line)
model.addLine(modifiedLine)
// Convergence replays Lines - so if the Statement has been transformed, it must be replaced in its
// Line in the input..
// TODO this is an appalling code smell. too much mutable state...
inputLines(lineIndex) = modifiedLine
maybeStatement.foreach {
stmt: Statement => processStatement(modifiedLine, lineIndex, stmt)
}
// Has convergence ended? Resolve any label on this line.
modifiedLine.label.foreach((label: Label) => {
resolveConvergeSetSymbol(CasedSymbolName(label))
})
if (convergeMode && symbolsToConverge.isEmpty) {
endConvergeLineIndex = lineIndex
logger.debug("End of convergable lines on line index " + lineIndex + " line number " + modifiedLine.number)
converge()
convergeMode = false
}
}
if (debugCodegen) {
logger.debug("")
}
} catch {
case ame: AssemblyModelException => throw new CodeGenerationException(line.number, ame.getMessage)
}
}
private def resolveConvergeSetSymbol(casedSymbolName: CasedSymbolName) = {
if (symbolsToConverge.contains(casedSymbolName)) {
logger.debug("Removing " + casedSymbolName + " from converge symbol set")
symbolsToConverge.remove(casedSymbolName)
}
}
def setOfLineNumbersInConvergence(): List[Int] = {
val set = mutable.HashSet[Int]()
for (i <- startConvergeLineIndex to endConvergeLineIndex) {
val line = inputLines(i)
set += line.number
}
set.toList
}
private def converge(): Unit = {
if (debugCodegen) {
logger.info("Converging line indices [" + startConvergeLineIndex + " .. " + endConvergeLineIndex + "] Start $ " + HexDump.int2hex(startConvergeDollar))
}
model.setConvergeMode(true)
var iteration = 0
val lineNumbersInConvergence = setOfLineNumbersInConvergence()
if (debugCodegen) {
logger.info("Line numbers in convergence: " + lineNumbersInConvergence)
}
var again = false
do {
iteration += 1
again = false
model.setDollarSilently(startConvergeDollar)
if (debugCodegen) {
logger.info("Convergence iteration " + iteration)
}
// At top of loop, clear down model storage for all lines - macro expansions mean that multiple entries
// in inputLines could have the same line number. So only clear each line once, before reprocessing them
// all, below.
lineNumbersInConvergence.foreach(lineNumber => model.clearSourcedValuesForLineNumber(lineNumber))
// Convergence should only occur in pass 1. Pass 2 could add Storages that this would clear.
for (lineIndex <- startConvergeLineIndex to endConvergeLineIndex) {
val line = inputLines(lineIndex)
if (debugCodegen) {
logger.info("Converging line index " + lineIndex + ": " + line)
}
createLabel(line) // update any label with current $
val maybeElement = directInstructionByLineIndex.get(lineIndex)
maybeElement match {
case Some(DirectInstructionState(di: DirectInstruction, currentSize: Int)) =>
if (debugCodegen) {
logger.debug("Current size for direct instruction: " + currentSize)
}
model.evaluateExpression(di.expr) match {
case Right(value) =>
if (debugCodegen) {
logger.debug("Defined: Encoding value " + value)
}
// This evaluation needs to take the encoded instruction length into account, when a
// Unary(OffsetFrom(x)) is in the expression. Here and in non-convergent evaluation.
val valueToEncode = encodeOffsetValue(di, value)
val encoded = DirectInstructionEncoder.apply(di.opbyte, valueToEncode)
if (debugCodegen) {
logger.debug(s"Defined: Encoding direct instruction (convergence); original value to encode $value; after length adjustment $valueToEncode")
logger.debug("Defined: New encoded size for direct instruction: " + encoded.size)
}
if (encoded.size > currentSize) {
if (debugCodegen) {
logger.debug("Defined: Another byte of storage required")
}
// requires more size
directInstructionByLineIndex.put(lineIndex, DirectInstructionState(di, currentSize + 1))
model.incrementDollar(currentSize + 1)
again = true
} else {
if (debugCodegen) {
logger.debug("Defined: Storage size ok; allocating")
}
model.allocateStorageForLine(line, 1, encoded map Number) // silently increments $
}
case Left(undefineds) =>
if (debugCodegen) {
logger.debug("Undefined: Storage size static: Symbol(s) (" + undefineds + ") are not yet defined; allocating 1 byte")
}
model.incrementDollar(currentSize)
}
case None =>
if (debugCodegen) {
logger.debug("Processing non-direct-instruction")
}
// NB Not processLineStatement as that adds the Line to the model, and it's already been added once.
line.stmt.foreach((stmt: Statement) =>
processStatement(line, lineIndex, stmt)
)
}
}
} while (again)
if (debugCodegen) {
logger.info("Convergence complete after " + iteration + " iteration(s)")
}
model.setConvergeMode(false)
}
private def encodeOffsetValue(di: DirectInstruction, value: Int) = {
di.expr match {
case Unary(op, _) =>
op match {
case OffsetFrom(_) => value - DirectInstructionEncoder.lengthOfEncodedOffsetFromOpcodeInstruction(value)
case _ => value
}
case _ => value
}
}
private def notEndif(line: Line): Boolean = {
line.stmt match {
case Some(Endif()) => false
case _ => true
}
}
private def createLabel(line: Line): Unit = {
line.label.foreach((label: Label) =>
model.setLabel(CasedSymbolName(label), model.getDollar, line)
)
}
private def applyStatementTransformers(line: Line): (Line, Option[Statement]) = {
line.stmt match {
case Some(initialStmt) =>
// Apply all statement transformers to the statement...
try {
val stmt = statementTransformers.foldLeft(initialStmt) {
(prevStmt: Statement, transformer: StatementTransformer) => transformer(prevStmt)
}
if (stmt != initialStmt) {
if (debugCodegen) {
logger.debug("Line " + line.number + " (Transformed): " + stmt)
}
val replacedLine = line.copy(stmt = Some(stmt))
(replacedLine, Some(stmt))
} else {
(line, Some(stmt))
}
} catch {
case ste: StatementTransformationException =>
logger.debug(s"Rethowing ${ste.getMessage}")
throw new CodeGenerationException(line.number, ste.getMessage)
}
case None =>
(line, None)
}
}
private def processStatement(line: Line, lineIndex: Int, stmt: Statement): Unit = {
val lineNumber = line.number
// Pass 2 fixups run after pass 1 (duh!), and require processing of statements after this check would have
// triggered in pass 1.
if (model.hasEndBeenSeen && passNumber == 1) {
throw new CodeGenerationException(lineNumber, "No statements allowed after End statement")
}
stmt match {
case Title(text) =>
model.title = text
logger.debug("Title is '" + text + "'")
case Page(rows, columns) =>
model.rows = rows
model.columns = columns
logger.debug("Rows: " + rows + " Columns: " + columns)
case Processor(name) =>
model.processor = Some(name)
logger.debug("Processor is '" + name + "'")
model.endianness = name match {
case "386" => Endianness.Little
case "TRANSPUTER" => Endianness.Little
}
case Align(n) => processAlign(line, n)
case Org(expr) => processOrg(line, expr)
case End(expr) => processEnd(line, expr)
case ConstantAssignment(name, expr) => processConstantAssignment(line, name, expr)
case VariableAssignment(name, expr) => processVariableAssignment(line, name, expr)
case Ignored() => // Do nothing
case MacroStart(_, _) => // All macro AST statements are handled by the parser; the expansions are handled
case MacroBody(_) => // by the rest of the AST statement handlers, here..
case MacroEnd() => // So, do nothing...
case MacroInvocation(_, _) => // Non-macro statements would follow after an invocation, unless it's an empty macro.
case DB(exprs) => model.allocateStorageForLine(line, 1, exprs)
case DW(exprs) => model.allocateStorageForLine(line, 2, exprs)
case DD(exprs) => model.allocateStorageForLine(line, 4, exprs)
case DBDup(_, _) => // Will have been transformed into DB by OffsetTransformer
case DWDup(_, _) => // Will have been transformed into DW by OffsetTransformer
case DDDup(_, _) => // Will have been transformed into DD by OffsetTransformer
case If1() => processIf1()
case Else() => processElse(line)
case Endif() => processEndif(line)
case DirectInstruction(_, opbyte, expr) => processDirectInstruction(line, lineIndex, stmt.asInstanceOf[DirectInstruction], opbyte, expr)
case DirectEncodedInstruction(_, opbytes) => model.allocateInstructionStorageForLine(line, opbytes)
case IndirectInstruction(_, opbytes) => model.allocateInstructionStorageForLine(line, opbytes)
}
}
private def processAlign(line: Line, alignment: Int): Unit = {
val dollar = model.getDollar
val remainder = dollar % alignment
if (remainder > 0) {
val newDollar = alignment - remainder
if (debugCodegen) {
logger.info("Align: from " + HexDump.int2hex(dollar) + " to " + HexDump.int2hex(newDollar))
}
model.incrementDollar(newDollar)
}
}
private def processOrg(line: Line, expr: Expression): Unit = {
val lineNumber = line.number
if (expressionContainsCharacters(expr)) {
throw new CodeGenerationException(lineNumber, "Origin cannot be set to a Character expression '" + expr + "'")
}
val either = model.evaluateExpression(expr)
either match {
case Left(undefinedSymbols) =>
throw new CodeGenerationException(lineNumber, "Undefined symbol(s) '" + undefinedSymbols.mkString(",") + "'")
case Right(org) =>
if (debugCodegen) {
logger.info("Org: " + HexDump.int2hex(org))
}
model.setDollar(org, line)
}
}
private def processEnd(line: Line, expr: Option[Expression]): Unit = {
model.endHasBeenSeen()
}
private def processConstantAssignment(line: Line, name: SymbolName, expr: Expression): Unit = {
val casedName = CasedSymbolName(name)
val lineNumber = line.number
if (expressionContainsCharacters(expr)) {
throw new CodeGenerationException(lineNumber, "Constant cannot be set to a Character expression '" + expr + "'")
}
val either = model.evaluateExpression(expr)
either match {
case Left(undefinedSymbols) =>
if (debugCodegen) {
logger.info("Cannot set constant " + casedName + " to expression " + expr + " due to undefined symbols " + undefinedSymbols + " on line number " + lineNumber)
}
model.recordSymbolForwardReferences(undefinedSymbols, casedName, expr, line, SymbolType.Constant)
case Right(value) =>
model.setConstant(casedName, value, line)
resolveConvergeSetSymbol(casedName)
}
}
private def processVariableAssignment(line: Line, name: SymbolName, expr: Expression): Unit = {
val casedName = CasedSymbolName(name)
val lineNumber = line.number
if (expressionContainsCharacters(expr)) {
throw new CodeGenerationException(lineNumber, "Variable cannot be set to a Character expression '" + expr + "'")
}
val either = model.evaluateExpression(expr)
either match {
case Left(undefineds) =>
if (debugCodegen) {
logger.info("Cannot set variable " + casedName + " to expression " + expr + " due to undefined symbols " + undefineds + " on line number " + lineNumber)
}
model.recordSymbolForwardReferences(undefineds, casedName, expr, line, SymbolType.Variable)
case Right(value) =>
model.setVariable(casedName, value, line)
resolveConvergeSetSymbol(casedName)
}
}
private def expressionContainsCharacters(expr: Expression): Boolean = {
expr match {
case SymbolArg(_) => false
case Number(_) => false
case Characters(_) => true
case Unary(_, uExpr) => expressionContainsCharacters(uExpr)
case Binary(_, lExpr, rExpr) => expressionContainsCharacters(lExpr) || expressionContainsCharacters(rExpr)
}
}
private def processIf1(): Unit = {
val dollar = model.getDollar
if (debugCodegen) {
logger.info("Setting Pass 2 start address of " + dollar + " in If1")
}
// We'll need the pass 1 current address, when processing the pass 2 lines.
currentP2Structure.setStartAddress(dollar)
// Assembly of statements continues normally...
generationMode = GenerationMode.If1Seen
}
private def processElse(line: Line): Unit = {
if (generationMode != GenerationMode.If1Seen) {
throw new CodeGenerationException(line.number, "Else seen without prior If1")
}
val dollar = model.getDollar
if (debugCodegen) {
logger.info("Setting Pass 2 end address of " + dollar + " in Else; switching to Pass 2 Line Collection")
}
// How large is the pass 1 block? Store end address (current address after its contents have been assembled)
// in current pass 2 structure..
currentP2Structure.setEndAddress(dollar)
// Switch to collect statements/lines for pass 2
generationMode = GenerationMode.ElseSeen
}
private def processEndif(line: Line): Unit = {
if (generationMode != GenerationMode.If1Seen && generationMode != GenerationMode.ElseSeen) {
throw new CodeGenerationException(line.number, "Endif seen without prior If1")
}
if (debugCodegen) {
logger.info("Storing collected Pass 2 Lines in Endif; switching to Pass 1 Line Assembly")
}
// Collect the built pass 2 structure in a list for pass 2 processing..
p2Structures += currentP2Structure
// Create new current pass 2 structure
currentP2Structure = new Pass2Structure
// Switch back to assemble statements to storage/model updates in pass 1
generationMode = GenerationMode.Assembly
}
private def pass2(): Unit = {
passNumber = 2
for (p2 <- p2Structures) {
val p2Lines = p2.getPass2Lines
// Only bother processing lines, and setting $ if there are any lines - can't set $ without a line number
// for diagnostics..
if (p2Lines.nonEmpty) {
model.setDollarSilently(p2.getStartAddress)
for (line <- p2Lines) {
// This will possibly append Storages at existing addresses - these will be resolved sequentially
// overwriting earlier memory, in the writers.
processLine(line._1, line._2)
}
// Current address must match end address of pass 1. If not, the blocks are different sizes.
val endAddressPass2 = model.getDollar
if (endAddressPass2 != p2.getEndAddress) {
throw new CodeGenerationException(p2Lines.head._1.number, "Differently-sized blocks in Passes 1 and 2: Pass 1=" +
(p2.getEndAddress - p2.getStartAddress) + " byte(s); Pass 2=" +
(endAddressPass2 - p2.getStartAddress) + " byte(s)")
}
}
}
}
private def processDirectInstruction(line: Line, lineIndex: Int, di: DirectInstruction, opbyte: Int, expr: Expression): Unit = {
val lineNumber = line.number
val evaluation = model.evaluateExpression(expr)
evaluation match {
case Right(value) =>
// This evaluation needs to take the encoded instruction length into account, when a
// Unary(OffsetFrom(x)) is in the expression. Here and in convergent evaluation.
val valueToEncode = encodeOffsetValue(di, value)
logger.debug(s"Encoding direct instruction (non-convergence); original value to encode $value; after length adjustment $valueToEncode")
val prefixedBytes = DirectInstructionEncoder.apply(opbyte, valueToEncode)
model.allocateInstructionStorageForLine(line, prefixedBytes)
case Left(undefineds) =>
if (debugCodegen) {
logger.info("Symbol(s) (" + undefineds + ") are not yet defined on line " + lineNumber)
logger.info("Storing undefined direct instruction " + di + " on line index " + lineIndex)
}
directInstructionByLineIndex.put(lineIndex, DirectInstructionState(di, 1))
model.incrementDollar(1)
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy