All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.devzendo.tma.parser.AssemblyParser.scala Maven / Gradle / Ivy

Go to download

The Transputer Macro Asesmbler Code (Apache License v2) 2018-2019 Matt Gumbley, DevZendo.org

The newest version!
/*
 * Copyright (C) 2008-2018 Matt Gumbley, DevZendo.org http://devzendo.org
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.devzendo.tma.parser

import java.io

import org.devzendo.tma.ast.AST.{Label, MacroArgument, MacroName, MacroParameterName}
import org.devzendo.tma.ast._
import org.log4s.Logger

import scala.util.matching.Regex
import scala.util.parsing.combinator._

class AssemblyParser(val debugParser: Boolean, val showParserOutput: Boolean, val macroManager: MacroManager) {

    val logger: Logger = org.log4s.getLogger

    var processorToParse: Option[Processor] = None

    @throws(classOf[AssemblyParserException])
    def parse(line: String, lineNumber: Int, inMacroExpansion: Boolean = false): List[Line] = {

        def sanitizedInput = nullToEmpty(line).trim()

        def logParserOutput(rLines: List[Line]): Unit = {
            logger.info(s"${rLines.size} lines output...")
            for (rL <- rLines) {
                val sb = new StringBuilder()
                sb.append("AST ")
                sb.append(rL.number)
                sb.append("|")
                for (label <- rL.label) {
                    sb.append("LBL ")
                    sb.append(label)
                    sb.append(":")
                }
                for (stmt <- rL.stmt) {
                    sb.append(stmt)
                }
                sb.append("|")
                logger.info(sb.toString())
                logger.info("")
            }
        }


        if (debugParser) {
            logger.debug(s"parsing IME $inMacroExpansion $lineNumber|$sanitizedInput|")
        }
        if (showParserOutput) {
            val lineType = if (inMacroExpansion) "IME" else "TXT"
            logger.info(s"$lineType $lineNumber|$sanitizedInput|")
        }
        if (lineNumber < 1) {
            throw new AssemblyParserException(lineNumber, "Line numbers must be positive")
        }

        if (sanitizedInput.length > 0) {
            val parser = if (macroManager.isInMacroBody) new MacroBodyCombinatorParser else new StatementCombinatorParser(inMacroExpansion)
            parser.setLineNumber(lineNumber)
            parser.setDebugParser(debugParser)

            val parserOutput = parser.parseProgram(sanitizedInput)
            parserOutput match {
                case parser.Success(r, _) =>
                    val rLines = r.asInstanceOf[List[Line]] // unsure why r is not right type
                    if (debugParser) {
                        logger.debug("returning " + rLines)
                    }

                    if (showParserOutput) {
                        logParserOutput(rLines)
                    }
                    rLines

                case parser.NoSuccess(x, _) =>
                    logger.debug(s"$lineNumber: $sanitizedInput") // mostly a useless, hard to understand error...
                    throw new AssemblyParserException(lineNumber, "Unknown statement '" + sanitizedInput + "'")
            }
        } else {
            val line = Line(lineNumber, sanitizedInput, None, None)
            List(line)
        }
    }

    private def nullToEmpty(input: String): String = {
        if (input == null) "" else input
    }

    private class MacroBodyCombinatorParser extends DiagnosableParser with LineParser {
        def line: Parser[List[Line]] = macroEnd | macroStart | macroBody

        def macroEnd: Parser[List[Line]] =
            """(?i)ENDM""".r ^^ {
                _ =>
                    if (debugParser) logger.debug("in endm")
                    macroManager.endMacro()
                    List(Line(lineNumber, text, None, Some(MacroEnd())))
            }

        def macroStart: Parser[List[Line]] = (
            ident ~ macroWord ~ repsep(ident, ",")
            ) ^^ {
                _ => throw new AssemblyParserException(lineNumber, "Macro definitions cannot be nested")
        }

        def macroWord: Parser[String] =
            """(?i)MACRO""".r ^^ ( _ => "MACRO" )

        def macroBody: Parser[List[Line]] =
        """.*""".r ^^ {
            x: String =>
                if (debugParser) logger.debug("in macroBody")
                macroManager.addMacroLine(x)
                List(Line(lineNumber, text, None, Some(MacroBody(x))))
        }
    }

    private trait NoInstructionParser extends Parsers {
        def noInstructionsAllowed: Parser[Statement] = failure("No instruction parser available")
    }

    private class StatementCombinatorParser(inMacroExpansion: Boolean) extends ExpressionParser with DiagnosableParser with LineParser
        with TransputerInstructionParser with NoInstructionParser {

        def line: Parser[List[Line]] =  macroInvocationLine | statementLine

        def statementLine: Parser[List[Line]] = (
            opt(label) ~ opt(statement) ~ opt(comment)
          ) ^^ {
            case optLabel ~ optStatement ~ optComment =>
                if (debugParser) logger.debug("in statementLine, inMacroExpansion=" + inMacroExpansion + ", optComment=" + optComment)
                val returnedText = removeDoubleSemicolonCommentsInMacroExpansion(optComment)
                if (debugParser) logger.debug("in statementLine after ;;-removal, text=|" + returnedText + "|")
                List(Line(lineNumber, returnedText, optLabel, optStatement))
        }

        private def removeDoubleSemicolonCommentsInMacroExpansion(optComment: Option[Comment]): String = {
            val returnedText = (inMacroExpansion, optComment) match {
                case (true, Some(DoubleComment(comment: String))) => text.substring(0, text.length - comment.length).trim
                case _ => text // already trimmed
            }
            returnedText
        }

        def replaceFirstLabel(maybeLabel: Option[Label], lines: List[Line]): List[Line] = {
            if (maybeLabel.isEmpty || lines.isEmpty) {
                lines
            } else {
                lines.head.copy(label = maybeLabel) :: lines.tail
            }
        }

        def macroInvocationLine: Parser[List[Line]] = (
          opt(label) ~ macroInvocation <~ opt(comment)
          ) ^^ {
            case optLabel ~ macroInvocation =>
                if (debugParser) {
                    logger.debug("in macroInvocationLine, macroInvocation is " + macroInvocation)
                    macroInvocation.args.foreach( (ma: MacroArgument) => logger.debug(s"macro argument |$ma|"))
                }

                // Ensure that any label in the original macro invocation is not passed through to the code generator.
                // That would cause it to set the label twice (which it can't). Any label will be passed through to
                // the first expanded parsed line.
                val macroInvocationLine = Line(lineNumber, text, None, Some(macroInvocation))

                val expansion = macroManager.expandMacro(macroInvocation.name, macroInvocation.args)
                if (debugParser) expansion.foreach( (f: String) => logger.debug("expanded macro: |" + f + "|"))
                val parsedExpansions = expansion.flatMap((str: String) => AssemblyParser.this.parse(str, lineNumber, inMacroExpansion = true))
                // Ensure that any label in the macro invocation is set in the first expanded parsed line
                val parsedExpansionsWithInvocationLabel = replaceFirstLabel(optLabel, parsedExpansions)
                if (debugParser) parsedExpansionsWithInvocationLabel.foreach( (l: Line) => logger.debug("expanded parsed macro: |" + l + "|"))
                macroInvocationLine :: parsedExpansionsWithInvocationLabel
        }

        def label: Parser[Label] = (
          ident <~ ":"
        ) ^^ {
            label =>
                if (debugParser) logger.debug("in label, ident: " + label)
                new Label(label)
        }

        def statement: Parser[Statement] = constantAssignment | variableAssignment | macroStart | origin | data |
            title | page | processor | align | condif1 | condelse | condendif | end | opcode | ignored

        // Not sure why I can't use ~> and <~ here to avoid the equ?
        def constantAssignment: Parser[ConstantAssignment] = (
            ident ~ equ ~ expression
            ) ^^ {
            case ident ~ _ ~ expression =>
                if (debugParser) logger.debug("in constantAssignment, ident: " + ident + " expr:" + expression)
                ConstantAssignment(ident.asInstanceOf[String], expression)
        }

        def variableAssignment: Parser[VariableAssignment] = (
          ident ~ "=" ~ expression
          ) ^^ {
            case ident ~ "=" ~ expression =>
                if (debugParser) logger.debug("in variableAssignment, ident: " + ident + " expr:" + expression)
                VariableAssignment(ident.asInstanceOf[String], expression)
        }

        def opcode: Parser[Statement] = {
            // if (debugParser) logger.debug("in opcode, processorToParse: " + processorToParse)

            processorToParse match {
                case None => noInstructionsAllowed
                case Some(Processor("TRANSPUTER")) => transputerInstruction
                case Some(Processor(_)) => noInstructionsAllowed
            }
        }

        def macroStart: Parser[MacroStart] = (
          ident ~ macroWord ~ repsep(ident, ",")
          ) ^^ {
            case ident ~ _ ~ parameters =>
                if (debugParser) logger.debug("in macroStart, ident: " + ident + " parameters:" + parameters)
                try {
                    val macroName = new MacroName(ident)
                    val macroParameterNames = parameters.map(new MacroParameterName(_))
                    macroManager.startMacro(macroName, macroParameterNames)
                    MacroStart(macroName, macroParameterNames)
                } catch {
                    case i: IllegalStateException => throw new AssemblyParserException(lineNumber, i.getMessage)
                }
        }

        def macroInvocation: Parser[MacroInvocation] = (
          existingMacro ~ repsep(macroArgument, macroArgumentSeparator)
        ) ^^ {
            case macroName ~ arguments =>
                if (debugParser) logger.debug("in macroInvocation, macro name: " + macroName + " arguments:" + arguments)
                MacroInvocation(macroName, arguments)
        }

        def macroArgumentSeparator: Parser[io.Serializable] = "," | rep(whiteSpace)

        // Special case to prevent re-detection of an existing macro (if the word following the existing macro name is MACRO)
        def existingMacro: Parser[MacroName] = ident ~ opt(macroWord) ^? {
            case possibleMacro ~ optMacroWord
                if macroManager.exists(possibleMacro) && optMacroWord.isEmpty =>
                if (debugParser) logger.debug("in existingMacro, ident: " + possibleMacro)
                possibleMacro
        }

        def macroArgument: Parser[MacroArgument] = (macroStringArgument | """[^\s,;]+""".r) ^^ {
            argument =>
                if (debugParser) logger.debug("in macroArgument, argument is [" + argument + "]")
                new MacroArgument(argument)
        }

        // The difference between these two and singleQuotedString is that these include the terminating quotes in the
        // parsed output. Macros must preserve this into their expansion.
        def singleQuotedMacroStringArgument: Parser[String] =
            """'([^'\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*'""".r ^^ {
                contents => {
                    if (debugParser) logger.debug("in singleQuotedMacroStringArgument, contents is: |" + contents + "|")
                    contents
                }
            }
        def doubleQuotedMacroStringArgument: Parser[String] =
            """"([^"\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*"""".r ^^ {
                contents => {
                    if (debugParser) logger.debug("in doubleQuotedMacroStringArgument, contents is: |" + contents + "|")
                    contents
                }
            }

        def macroStringArgument: Parser[String] = singleQuotedMacroStringArgument | doubleQuotedMacroStringArgument

        def origin: Parser[Org] = (
          org ~> expression
        ) ^^ {
            expr =>
                if (debugParser) logger.debug("in org, addr:" + expr)
                Org(expr)
        }

        def data: Parser[Statement] = dbDup | dwDup | ddDup | db | dw | dd

        def db: Parser[DB] = (
          """(?i)DB""".r  ~> repsep(expression | characterExpression, ",")
          ) ^^ {
            exprs =>
                if (debugParser) logger.debug("in db, exprs:" + exprs)
                if (exprs.isEmpty) {
                    throw new AssemblyParserException(lineNumber, "DB directive without data")
                }
                DB(exprs)
        }

        def dup: Parser[Expression] = (
          """(?i)DUP\s*\(""".r ~> (expression | characterExpression) <~ """\)""".r
        ) ^^ {
            repeatedExpr =>
                if (debugParser) logger.debug("in dup, repeatedExpr:" + repeatedExpr)
                repeatedExpr
        }

        def dbDup: Parser[DBDup] = (
          """(?i)DB""".r ~> expression ~ dup
          ) ^^ {
            case countExpr ~ repeatedExpr =>
                if (debugParser) logger.debug("in dbDup, countExpr:" + countExpr + " repeatedExpr:" + repeatedExpr)
                DBDup(countExpr, repeatedExpr)
        }

        def dw: Parser[DW] = (
          """(?i)DW""".r  ~> repsep(expression | characterExpression, ",")
          ) ^^ {
            exprs =>
                if (debugParser) logger.debug("in dw, exprs:" + exprs)
                if (exprs.isEmpty) {
                    throw new AssemblyParserException(lineNumber, "DW directive without data")
                }
                DW(exprs)
        }

        def dwDup: Parser[DWDup] = (
          """(?i)DW""".r ~> expression ~ dup
          ) ^^ {
            case countExpr ~ repeatedExpr =>
                if (debugParser) logger.debug("in dwDup, countExpr:" + countExpr + " repeatedExpr:" + repeatedExpr)
                DWDup(countExpr, repeatedExpr)
        }

        def dd: Parser[DD] = (
          """(?i)DD""".r  ~> repsep(expression | characterExpression, ",")
          ) ^^ {
            exprs =>
                if (debugParser) logger.debug("in dd, exprs:" + exprs)
                if (exprs.isEmpty) {
                    throw new AssemblyParserException(lineNumber, "DD directive without data")
                }
                DD(exprs)
        }

        def ddDup: Parser[DDDup] = (
          """(?i)DD""".r ~> expression ~ dup
          ) ^^ {
            case countExpr ~ repeatedExpr =>
                if (debugParser) logger.debug("in ddDup, countExpr:" + countExpr + " repeatedExpr:" + repeatedExpr)
                DDDup(countExpr, repeatedExpr)
        }

        def title: Parser[Title] = (
          """(?i)TITLE""".r  ~> """.*""".r
        ) ^^ {
            text =>
                if (debugParser) logger.debug("in title, text:" + text)
                Title(text)
        }

        def page: Parser[Page] = (
          """(?i)PAGE""".r  ~> wholeNumber ~ "," ~ wholeNumber
          ) ^^ {
            case rows ~ _ ~ columns =>
                if (debugParser) logger.debug("in page, rows:" + rows + ", columns:" + columns)
                Page(rows.toInt, columns.toInt)
        }

        def processor: Parser[Processor] = """(?i)\.(386|TRANSPUTER)""".r ^^ {
            cpuString =>
                val cpu = cpuString.substring(1)
                if (debugParser) {
                    logger.debug("in processor, cpu string is '" + cpu + "'")
                }
                val out = Processor(cpu)
                cpu match {
                    case "386" =>
                        logger.warn("386 Processor; only its little-endianness is understood, none of its instructions")
                    case "TRANSPUTER" =>
                        processorToParse = Some(out) // Enables extra instruction set parser combinator.
                        logger.debug("Enabling Transputer opcodes, little-endianness")
                }
                out
        }

        def align: Parser[Align] = (
          """(?i)ALIGN""".r  ~> wholeNumber
          ) ^^ {
            alignment =>
                if (debugParser) logger.debug("in align, alignment:" + alignment)
                Align(alignment.toInt)
        }

        def end: Parser[End] = (
          """(?i)END""".r  ~> opt(expression)
          ) ^^ {
            expr =>
                if (debugParser) logger.debug("in end, expression:" + expr)
                End(expr)
        }

        def condif1: Parser[If1] = """(?i)IF1""".r ^^ ( _ => If1() )
        def condelse: Parser[Else] = """(?i)ELSE""".r ^^ ( _ => Else() )
        def condendif: Parser[Endif] = """(?i)ENDIF""".r ^^ ( _ => Endif() )

        def ignored: Parser[Ignored] = ( ignoredKeyword ~ """.*""".r ) ^^ { _ => Ignored() }
        def ignoredKeyword: Regex = """(?i)(MAIN|ASSUME|\.LIST|\.NOLIST)""".r

        def characterExpression: Parser[Expression] = singleQuotedString | doubleQuotedString
        def singleQuotedString: Parser[Characters] =
            """'([^'\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*'""".r  ^^ {
                contents => {
                    val stringBody = contents.substring(1, contents.length - 1)
                    if (debugParser) logger.debug("in singleQuotedString, contents is: |" + stringBody + "|")
                    Characters(stringBody)
                }
            }
        def doubleQuotedString: Parser[Characters] =
            """"([^"\x00-\x1F\x7F\\]|\\[\\'"bfnrt]|\\u[a-fA-F0-9]{4})*"""".r ^^ {
                contents => {
                    val stringBody = contents.substring(1, contents.length - 1)
                    if (debugParser) logger.debug("in doubleQuotedString, contents is: |" + stringBody + "|")
                    Characters(stringBody)
                }
            }

        def equ: Parser[String] =
            """(?i)EQU""".r ^^ ( _ => "EQU" )

        def org: Parser[String] =
            """(?i)ORG""".r ^^ ( _ => "ORG" )

        def macroWord: Parser[String] =
            """(?i)MACRO""".r ^^ ( _ => "MACRO" )

        def comment: Parser[Comment] = doubleComment | singleComment

        def doubleComment: Parser[DoubleComment] =
            """;;.*""".r ^^ ( x => DoubleComment(x))

        def singleComment: Parser[SingleComment] =
            """;.*""".r ^^ ( x => SingleComment(x))
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy