Lib.xlwt.ExcelFormulaLexer.py Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sikulixapi Show documentation
Show all versions of sikulixapi Show documentation
... for visual testing and automation
# -*- coding: windows-1252 -*-
from antlr import EOF, CommonToken as Tok, TokenStream, TokenStreamException
import ExcelFormulaParser
from re import compile as recompile, LOCALE, IGNORECASE, VERBOSE
int_const_pattern = r"\d+\b"
flt_const_pattern = r"""
(?:
(?: \d* \. \d+ ) # .1 .12 .123 etc 9.1 etc 98.1 etc
|
(?: \d+ \. ) # 1. 12. 123. etc
)
# followed by optional exponent part
(?: [Ee] [+-]? \d+ ) ?
"""
str_const_pattern = r'"(?:[^"]|"")*"'
#range2d_pattern = recompile(r"\$?[A-I]?[A-Z]\$?\d+:\$?[A-I]?[A-Z]\$?\d+"
ref2d_r1c1_pattern = r"[Rr]0*[1-9][0-9]*[Cc]0*[1-9][0-9]*"
ref2d_pattern = r"\$?[A-I]?[A-Z]\$?0*[1-9][0-9]*"
true_pattern = r"TRUE\b"
false_pattern = r"FALSE\b"
if_pattern = r"IF\b"
choose_pattern = r"CHOOSE\b"
name_pattern = r"\w[\.\w]*"
quotename_pattern = r"'(?:[^']|'')*'" #### It's essential that this bracket be non-grouping.
ne_pattern = r"<>"
ge_pattern = r">="
le_pattern = r"<="
pattern_type_tuples = (
(flt_const_pattern, ExcelFormulaParser.NUM_CONST),
(int_const_pattern, ExcelFormulaParser.INT_CONST),
(str_const_pattern, ExcelFormulaParser.STR_CONST),
# (range2d_pattern , ExcelFormulaParser.RANGE2D),
(ref2d_r1c1_pattern, ExcelFormulaParser.REF2D_R1C1),
(ref2d_pattern , ExcelFormulaParser.REF2D),
(true_pattern , ExcelFormulaParser.TRUE_CONST),
(false_pattern , ExcelFormulaParser.FALSE_CONST),
(if_pattern , ExcelFormulaParser.FUNC_IF),
(choose_pattern , ExcelFormulaParser.FUNC_CHOOSE),
(name_pattern , ExcelFormulaParser.NAME),
(quotename_pattern, ExcelFormulaParser.QUOTENAME),
(ne_pattern, ExcelFormulaParser.NE),
(ge_pattern, ExcelFormulaParser.GE),
(le_pattern, ExcelFormulaParser.LE),
)
_re = recompile(
'(' + ')|('.join([i[0] for i in pattern_type_tuples]) + ')',
VERBOSE+LOCALE+IGNORECASE)
_toktype = [None] + [i[1] for i in pattern_type_tuples]
# need dummy at start because re.MatchObject.lastindex counts from 1
single_char_lookup = {
'=': ExcelFormulaParser.EQ,
'<': ExcelFormulaParser.LT,
'>': ExcelFormulaParser.GT,
'+': ExcelFormulaParser.ADD,
'-': ExcelFormulaParser.SUB,
'*': ExcelFormulaParser.MUL,
'/': ExcelFormulaParser.DIV,
':': ExcelFormulaParser.COLON,
';': ExcelFormulaParser.SEMICOLON,
',': ExcelFormulaParser.COMMA,
'(': ExcelFormulaParser.LP,
')': ExcelFormulaParser.RP,
'&': ExcelFormulaParser.CONCAT,
'%': ExcelFormulaParser.PERCENT,
'^': ExcelFormulaParser.POWER,
'!': ExcelFormulaParser.BANG,
}
class Lexer(TokenStream):
def __init__(self, text):
self._text = text[:]
self._pos = 0
self._line = 0
def isEOF(self):
return len(self._text) <= self._pos
def curr_ch(self):
return self._text[self._pos]
def next_ch(self, n = 1):
self._pos += n
def is_whitespace(self):
return self.curr_ch() in " \t\n\r\f\v"
def match_pattern(self):
m = _re.match(self._text, self._pos)
if not m:
return None
self._pos = m.end(0)
return Tok(type = _toktype[m.lastindex], text = m.group(0), col = m.start(0) + 1)
def nextToken(self):
# skip whitespace
while not self.isEOF() and self.is_whitespace():
self.next_ch()
if self.isEOF():
return Tok(type = EOF)
# first, try to match token with 2 or more chars
t = self.match_pattern()
if t:
return t
# second, we want 1-char tokens
te = self.curr_ch()
try:
ty = single_char_lookup[te]
except KeyError:
raise TokenStreamException(
"Unexpected char %r in column %u." % (self.curr_ch(), self._pos))
self.next_ch()
return Tok(type=ty, text=te, col=self._pos)
if __name__ == '__main__':
try:
for t in Lexer(""" 1.23 456 "abcd" R2C2 a1 iv65536 true false if choose a_name 'qname' <> >= <= """):
print t
except TokenStreamException, e:
print "error:", e