All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.textmapper.tool.templates.go_parser.ltp Maven / Gradle / Ivy

There is a newer version: 0.10.0
Show newest version
${template main-}
${file 'parser.go'-}
${call go.header-}
${call parser-}
${end-}
${file 'parser_tables.go'-}
${call go.header-}
${call parserTables-}
${end-}
${call go_listener.main-}
${end}

${template errorHandler-}
// ErrorHandler is called every time a parser is unable to process some part of the input.
// This handler can return false to abort the parser.
type ErrorHandler func(err SyntaxError) bool
${end}

${template SyntaxError-}
type SyntaxError struct {
${if opts.tokenLine-}
	Line      int
${end-}
	Offset    int
	Endoffset int
}

func (e SyntaxError) Error() string {
${if opts.tokenLine-}
	return "fmt".Sprintf("syntax error at line %v", e.Line)
${else-}
	return "syntax error"
${end-}
}
${end}

${template parser-}
package ${self->go.package()}

${if self->hasRecovering()-}
${call errorHandler}
${end-}
// Parser is a table-driven LALR parser for ${opts.lang}.
type Parser struct {
${if self->hasRecovering()-}
	eh ErrorHandler
${end-}
${if self->eventBased()-}
	listener Listener
${end-}

	next      symbol
${if self->hasRecovering()-}
	endState  ${self->stateType()}
${end-}
${call stateVars-}
}

${call SyntaxError}
type symbol struct {
	symbol    int32
	offset    int
	endoffset int
}

type stackEntry struct {
	sym   symbol
	state ${self->stateType()}
${if self->hasAssocValues()-}
	value ${self->valueType()}
${end-}
}

func (p *Parser) Init(${if self->hasRecovering()}eh ErrorHandler${end}${if self->eventBased()}${if self->hasRecovering()}, ${end}l Listener${end}) {
${if self->hasRecovering()-}
	p.eh = eh
${end-}
${if self->eventBased()-}
	p.listener = l
${end-}
${call initStateVars-}
}

const (
	startStackSize = 256
${if self->ignoredReportTokens()-}
	startTokenBufferSize = 16
${end-}
	noToken        = int32(UNAVAILABLE)
	eoiToken       = int32(EOI)
	debugSyntax    = false
)

${foreach inp in syntax.input.select(it|it.requested)-}
func (p *Parser) Parse${self->util.onlyOneUserInput() ? '' : util.toFirstUpper(inp.target.id)}(${call contextParam}lexer *Lexer) ${if !self->hasAssocValueFor(inp)}error${else}(error, ${inp.target->go.type()})${end} {
${if self->hasAssocValuesForInput()-}
	err, ${if self->hasAssocValueFor(inp)}v${else}_${end} := p.parse(${call contextArg}${inp.index}, ${parser.finalStates[inp.index]}, lexer)
${if self->hasAssocValueFor(inp)-}
	val, _ := v.(${inp.target->go.type()})
	return err, val
${else-}
	return err
${end-}
${else-}
	return p.parse(${call contextArg}${inp.index}, ${parser.finalStates[inp.index]}, lexer)
${end-}
}

${end-}
${call parseFunc}
${if self->hasRecovering()-}
const errSymbol = ${syntax.error.index}

${call willShift}
${call skipBrokenCode}
${call recoverFromError}
${end-}
${if self->needActionsTable()-}
${call lalr}
${end-}
${call gotoState}
${call fetchNext}
${if self->needExplicitLookahead()-}
${call lookahead}
${end-}
${call applyRule-}
${if self->ignoredReportTokens()}
${call reportIgnoredToken-}
${end-}
${end}

${template contextParam-}
${if opts.cancellable}ctx "context".Context, ${end-}
${end}

${template contextArg-}
${if opts.cancellable}ctx, ${end-}
${end}

${template parseFunc-}
func (p *Parser) parse(${call contextParam}start, end ${self->stateType()}, lexer *Lexer) ${if !self->hasAssocValuesForInput()}error${else}(error, ${self->valueType()})${end} {
${if self->ignoredReportTokens()-}
	ignoredTokens := make([]symbol, 0, startTokenBufferSize) // to be reported with the next shift
${end-}
	state := start
${if opts.cancellable-}
  var shiftCounter int
${end-}
${if self->hasRecovering()-}
  var lastErr SyntaxError
	recovering := 0
${end-}

	var alloc [startStackSize]stackEntry
	stack := append(alloc[:0], stackEntry{state: state})
${if self->hasRecovering()-}
	p.endState = end
${end-}
	ignoredTokens = p.fetchNext(lexer, stack, ignoredTokens)

	for state != end {
		action := tmAction[state]
${if self->needActionsTable()-}
		if action < -2 {
			// Lookahead is needed.
			if p.next.symbol == noToken {
				ignoredTokens = p.fetchNext(lexer, stack, ignoredTokens)
			}
			action = lalr(action, p.next.symbol)
		}
${end-}

		if action >= 0 {
			// Reduce.
			rule := action
			ln := int(tmRuleLen[rule])

			var entry stackEntry
			entry.sym.symbol = tmRuleSymbol[rule]
			rhs := stack[len(stack)-ln:]
			stack = stack[:len(stack)-ln]
			if ln == 0 {
				entry.sym.offset, _ = lexer.Pos()
				entry.sym.endoffset = entry.sym.offset
			} else {
				entry.sym.offset = rhs[0].sym.offset
				entry.sym.endoffset = rhs[ln-1].sym.endoffset
			}
			p.applyRule(rule, &entry, rhs, lexer)
			if debugSyntax {
				"fmt".Printf("reduced to: %v\n", Symbol(entry.sym.symbol))
			}
			state = gotoState(stack[len(stack)-1].state, entry.sym.symbol)
			entry.state = state
			stack = append(stack, entry)

		} else if action == -1 {
${if opts.cancellable-}
${call checkForCancellation}
${end-}
			// Shift.
			if p.next.symbol == noToken {
				p.fetchNext(lexer, stack, nil)
			}
			state = gotoState(state, p.next.symbol)
			stack = append(stack, stackEntry{
				sym:   p.next,
				state: state,
${if self->hasAssocValues()-}
				value: lexer.Value(),
${end-}
			})
			if debugSyntax {
				"fmt".Printf("shift: %v (%s)\n", Symbol(p.next.symbol), lexer.Text())
			}
${if self->ignoredReportTokens()-}
			if len(ignoredTokens) > 0 {
				for _, tok := range ignoredTokens {
					p.reportIgnoredToken(tok)
				}
				ignoredTokens = ignoredTokens[:0]
			}
${end-}
${call onAfterShift-}
			if state != -1 && p.next.symbol != eoiToken {
				p.next.symbol = noToken
			}
${if self->hasRecovering()-}
			if recovering > 0 {
				recovering--
			}
${end-}
		}

		if action == -2 || state == -1 {
${if self->hasRecovering()-}
			if recovering == 0 {
				offset, endoffset := lexer.Pos()
				lastErr = SyntaxError{
${if opts.tokenLine-}
					Line: lexer.Line(),
${end-}
					Offset:    offset,
					Endoffset: endoffset,
				}
				if !p.eh(lastErr) {
					return lastErr${if self->hasAssocValuesForInput()}, ${self->valueTypeDefaultVal()}${end}
				}
			}
			if stack = p.recoverFromError(lexer, stack); stack == nil {
${if self->ignoredReportTokens()-}
				if len(ignoredTokens) > 0 {
					for _, tok := range ignoredTokens {
						p.reportIgnoredToken(tok)
					}
					ignoredTokens = ignoredTokens[:0]
				}
${end-}
				return lastErr${if self->hasAssocValuesForInput()}, ${self->valueTypeDefaultVal()}${end}
			}
			state = stack[len(stack)-1].state
			recovering = 4
${else-}
			break
${end-}
		}
	}

${if !self->hasRecovering()-}
	if state != end {
		offset, endoffset := lexer.Pos()
		err := SyntaxError{
${if opts.tokenLine-}
			Line: lexer.Line(),
${end-}
			Offset: offset,
			Endoffset: endoffset,
		}
		return err${if self->hasAssocValuesForInput()}, ${self->valueTypeDefaultVal()}${end}
	}

${end-}
	return nil${if self->hasAssocValuesForInput()}, stack[len(stack)-2].value${end}
}
${end}

${template checkForCancellation-}
			if shiftCounter++; shiftCounter & 0x1ff == 0 {
				// Note: checking for context cancellation is expensive so we do it from time to time.
				select {
				case <-ctx.Done():
					return ctx.Err()
				default:
				}
			}
${end}

${template willShift-}
// willShift checks if "symbol" is going to be shifted in the given state.
// This function does not support empty productions and returns false if they occur before "symbol".
func (p *Parser) willShift(stackPos int, state ${self->stateType()}, symbol int32, stack []stackEntry) bool {
	if state == -1 {
		return false
	}

	for state != p.endState {
		action := tmAction[state]
		if action < -2 {
			action = lalr(action, symbol)
		}

		if action >= 0 {
			// Reduce.
			rule := action
			ln := int(tmRuleLen[rule])
			if ln == 0 {
				// we do not support empty productions
				return false
			}
			stackPos -= ln - 1
			state = gotoState(stack[stackPos-1].state, tmRuleSymbol[rule])
		} else {
			return action == -1 && gotoState(state, symbol) >= 0
		}
	}
	return symbol == eoiToken
}
${end}

${template skipBrokenCode-}
func (p *Parser) skipBrokenCode(lexer *Lexer, stack []stackEntry, canRecover func (symbol int32) bool) int {
	var e int
	for p.next.symbol != eoiToken && !canRecover(p.next.symbol) {
		e = p.next.endoffset
		p.fetchNext(lexer, stack, nil)
	}
	return e
}
${end}

${template recoverFromError-}
func (p *Parser) recoverFromError(lexer *Lexer, stack []stackEntry) []stackEntry {
	var recoverSyms [1 + NumTokens/8]uint8
	var recoverPos []int

	for size := len(stack); size > 0; size-- {
		if gotoState(stack[size-1].state, errSymbol) == -1 {
			continue
		}
		recoverPos = append(recoverPos, size)
${foreach marker in parser.markers.select(it|it.name.toLowerCase() == 'recoveryscope')-}
${if marker.states.length == 1-}
		if ${marker.name}State == stack[size-1].state {
			break
		}
${else-}
		if ${marker.name}States[int(stack[size-1].state)] {
			break
		}
${end-}
${end-}
	}
	if len(recoverPos) == 0 {
		return nil
	}

	for _, v := range afterErr {
		recoverSyms[v/8] |= 1 << uint32(v%8)
	}
	canRecover := func (symbol int32) bool {
		return recoverSyms[symbol/8]&(1< e {
			e = endoffset
		}

		var matchingPos int
		for _, pos := range recoverPos {
			if p.willShift(pos, gotoState(stack[pos-1].state, errSymbol), p.next.symbol, stack) {
				matchingPos = pos
				break
			}
		}
		if matchingPos == 0 {
			if p.next.symbol == eoiToken {
				return nil
			}
			recoverSyms[p.next.symbol/8] &^= 1 << uint32(p.next.symbol%8)
			continue
		}

		if matchingPos < len(stack) {
			s = stack[matchingPos].sym.offset
		}
		stack = append(stack[:matchingPos], stackEntry{
			sym:   symbol{errSymbol, s, e},
			state: gotoState(stack[matchingPos-1].state, errSymbol),
		})
		return stack
	}
	return nil
}
${end}

${template lalr-}
func lalr(action, next int32) int32 {
	a := -action - 3
	for ; tmLalr[a] >= 0; a += 2 {
		if tmLalr[a] == next {
			break
		}
	}
	return tmLalr[a+1]
}
${end}

${template gotoState-}
func gotoState(state ${self->stateType()}, symbol int32) ${self->stateType()} {
	min := tmGoto[symbol]
	max := tmGoto[symbol+1]

	if max-min < 32 {
		for i := min; i < max; i += 2 {
			if tmFromTo[i] == state {
				return tmFromTo[i+1]
			}
		}
	} else {
		for min < max {
			e := (min + max) >> 1 &^ int32(1)
			i := tmFromTo[e]
			if i == state {
				return tmFromTo[e+1]
			} else if i < state {
				min = e + 2
			} else {
				max = e
			}
		}
	}
	return -1
}
${end}

${template fetchNext-}
func (p *Parser) fetchNext(lexer *Lexer, stack []stackEntry, ignoredTokens []symbol) []symbol {
restart:
	token := lexer.Next()
	switch token {
${if self->ignoredReportTokens()-}
	case ${self->ignoredReportTokens().collect(it| it->go_token.tokenName())->util.join(', ')}:
		s, e := lexer.Pos()
		tok := symbol{int32(token), s, e}
		if ignoredTokens == nil {
			p.reportIgnoredToken(tok)
		} else {
			ignoredTokens = append(ignoredTokens, tok)
		}
		goto restart
${end-}
${if !self->ignoredReportTokens().exists(t|t.nameText == 'invalid_token')-}
	case ${self->go_token.invalidTokenName()}:
		goto restart
${end-}
	}
	p.next.symbol = int32(token)
	p.next.offset, p.next.endoffset = lexer.Pos()
	return ignoredTokens
}
${end}

${template reportIgnoredToken-}
func (p *Parser) reportIgnoredToken(tok symbol) {
	var t ${self->go_listener.nodeTypeRef()}
	switch Token(tok.symbol) {
${foreach tok in self->ignoredReportTokens()-}
	case ${tok->go_token.tokenName()}:
		t = ${tok->go_token.tokenNodeName()->go_listener.nodeTypeValueRef()}
${end-}
	default:
		return
	}
	p.listener(t, tok.offset, tok.endoffset)
}
${end}

${template lookaheadNext-}
func lookaheadNext(lexer *Lexer) int32 {
restart:
	tok := lexer.Next()
	switch tok {
${if self->ignoredReportTokens()-}
	case ${self->ignoredReportTokens().collect(it| it->go_token.tokenName())->util.join(', ')}:
		goto restart
${end-}
${if !self->ignoredReportTokens().exists(t|t.nameText == 'invalid_token')-}
	case ${self->go_token.invalidTokenName()}:
		goto restart
${end-}
	}
	return int32(tok)
}

${end}

${template lookaheadRule-}
func lookaheadRule(lexer *Lexer, next, rule int32) int32 {
	switch rule {
${call caseLookaheadRules(true)-}
	}
	return 0
}

${end}

${cached query laMethodName() = 'At' + self.target.name}

${template lookaheadMethods-}
${foreach inp in syntax.input.select(it|!it.hasEoi())-}
func ${inp->laMethodName()}(lexer *Lexer, next int32) bool {
	return lookahead(lexer, next, ${inp.index}, ${parser.finalStates[inp.index]});
}

${end-}
${end}

${template setupLookaheadLexer-}
	var lexer Lexer = *l
${end}
${template callLookaheadNext}lookaheadNext(&lexer)${end}

${template lookahead-}
${call lookaheadNext-}
${if opts.recursiveLookaheads-}
${call lookaheadRule-}
${end-}
${call lookaheadMethods-}
func lookahead(l *Lexer, next int32, start, end ${self->stateType()}) bool {
${call setupLookaheadLexer-}

	var allocated [64]stackEntry
	state := start
	stack := append(allocated[:0], stackEntry{state: state})

	for state != end {
		action := tmAction[state]
${if self->needActionsTable()-}
		if action < -2 {
			// Lookahead is needed.
			if next == noToken {
				next = ${call callLookaheadNext}
			}
			action = lalr(action, next)
		}
${end-}

		if action >= 0 {
			// Reduce.
			rule := action
			ln := int(tmRuleLen[rule])

			var entry stackEntry
			entry.sym.symbol = tmRuleSymbol[rule]
			stack = stack[:len(stack)-ln]
${if opts.recursiveLookaheads-}
			if sym := lookaheadRule(&lexer, next, rule); sym != 0 {
			  entry.sym.symbol = sym
			}
${end-}
			state = gotoState(stack[len(stack)-1].state, entry.sym.symbol)
			entry.state = state
			stack = append(stack, entry)

		} else if action == -1 {
			// Shift.
			if next == noToken {
				next = ${call callLookaheadNext}
			}
			state = gotoState(state, next)
			stack = append(stack, stackEntry{
				sym:   symbol{symbol: next},
				state: state,
			})
			if state != -1 && next != eoiToken {
				next = noToken
			}
		}

		if action == -2 || state == -1 {
			break
		}
	}

	return state == end
}
${end}

${query callLaMethod(recurse) =
  recurse
    ? 'lookahead(lexer, next, ' + self.index + ', ' + context.parser.finalStates[self.index] + ')'
    : self->laMethodName() + '(lexer, p.next.symbol)' }

${template caseLookaheadRules(recurse)-}
${foreach rule in parser.lookaheadRules-}
	case ${rule.getIndex()}:
		${foreach c in rule.cases}if ${c.isNegated() ? '!' : ''}${c.input->callLaMethod(recurse)} {
			${if recurse}return${else}lhs.sym.symbol =${end} ${c.target.index} /* ${c.target.name} */;
		} else ${end}{
			${if recurse}return${else}lhs.sym.symbol =${end} ${rule.defaultTarget.index} /* ${rule.defaultTarget.name} */;
		}
${if !recurse-}
		return
${end-}
${end-}
${end}

${template applyRule-}
func (p *Parser) applyRule(rule int32, lhs *stackEntry, rhs []stackEntry, lexer *Lexer) {
${if syntax.rules.exists(r|r.code()) || self->needExplicitLookahead()-}
	switch rule {
${foreach rule in syntax.rules.select(r|r.code())-}
	case ${rule.getIndex()}: // ${rule}
		${rule.code()-}
${end-}
${call caseLookaheadRules(false)-}
	}
${end-}
${if self->eventBased()-}
	nt := rule${self->go_listener.nodeType()}[rule]
	if nt == 0 {
		return
	}
	p.listener(nt, lhs.sym.offset, lhs.sym.endoffset)
${end-}
}
${end}


${template parserTables-}
package ${self->go.package()}

${foreach marker in parser.markers}
${if marker.states.length == 1-}
const ${marker.name}State = ${marker.states[0]}
${else-}
var ${marker.name}States = map[int]bool{
${for i in [0, marker.states.length-1]-}
	${marker.states[i]}: true,
${end-}
}
${end-}
${end-}

// Symbol represents a set of all terminal and non-terminal symbols of the ${opts.lang} language.
type Symbol int

var symbolStr = [...]string{
${for i in [parser.nterms, parser.nsyms-1]-}
	"${parser.symbols[i].name}",
${end-}
}

func (n Symbol) String() string {
	if n < Symbol(NumTokens) {
		return Token(n).String()
	}
	i := int(n) - int(NumTokens)
	if i < len(symbolStr) {
		return symbolStr[i]
	}
	return "fmt".Sprintf("nonterminal(%d)", n)
}

var tmAction = []int32{
	${util.format(parser.action, 16, 1)},
}

${if self->needActionsTable()-}
var tmLalr = []int32{
	${util.format(parser.lalr, 16, 1)},
}

${end-}
var tmGoto = []int32{
	${util.format(parser.symGoto, 16, 1)},
}

var tmFromTo = []${self->stateType()}{
	${util.format(parser.symFromTo, 16, 1)},
}

var tmRuleLen = []int${util.bitsForElement(parser.ruleLength)}{
	${util.format(parser.ruleLength, 16, 1)},
}

var tmRuleSymbol = []int32{
	${util.format(parser.left, 16, 1)},
}
${foreach set in syntax.sets-}

// ${set.set} = ${set.elements.collect(i | syntax.symbols[i]->go_token.tokenName())->util.join(', ')}
var ${set.name} = []int32{
${if set.elements.size() > 0-}
	${util.format(set.elements, 16, 1)},
${end-}
}
${end-}
${end}

${query needActionsTable() = parser.lalr.size() > 0}

${query needExplicitLookahead() = parser.lookaheadRules.size() > 0}

${cached query hasRecovering() = syntax.error && syntax.sets.exists(it|it.name.camelCase == 'afterErr' && it.elements.length > 0)}

${template parserAction($)-}
${if customRanges()-}
${foreach range in customRanges()-}
		p.listener(${range.rangeType()}, ${range.first().offset}, ${range.last().endoffset})
${end-}
${end-}
${if codeTemplate()-}
${eval codeTemplate()}
${else if context.opts.genast && context->go_ast.astcode(self)-}
${eval context->go_ast.astcode(self), '#[generated action for ' + self + ']'}
${end-}
${end}


${template onAfterShift-}
${if self->nonignoredReportTokens()-}
			switch Token(p.next.symbol) {
${foreach tok in self->nonignoredReportTokens()-}
			case ${tok->go_token.tokenName()}:
				p.listener(${tok->go_token.tokenNodeName()->go_listener.nodeTypeValueRef()}, p.next.offset, p.next.endoffset)
${end-}
			}
${end-}
${end}

${template stateVars}${end}
${template initStateVars}${end}

${cached query eventBased() = opts.eventBased && !opts.genast}

${cached query hasAssocValues() = syntax.symbols.exists(it|it->go.type())}

${cached query hasAssocValuesForInput() = self->hasAssocValues() && syntax.input.select(it|it.requested).exists(it|it.target->go.type())}

${cached query hasAssocValueFor(input) = self->hasAssocValues() && input.target->go.type()}

${query valueType() = 'interface{}' }

${query valueTypeDefaultVal() = 'nil' }

${cached query stateType() = parser.statesCount < 127 ? 'int8' : parser.statesCount < 65535 ? 'int16' : 'int32' }

${cached query ignoredReportTokens() = opts.reportTokens.select(tok|self->go_token.isSpace(tok))}

${cached query nonignoredReportTokens() = opts.reportTokens.select(tok|!self->go_token.isSpace(tok))}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy