z3-z3-4.13.0.src.parsers.util.scanner.cpp Maven / Gradle / Ivy
The newest version!
/*++
Copyright (c) 2006 Microsoft Corporation
Module Name:
scanner.cpp
Abstract:
Author:
Leonardo de Moura (leonardo) 2008-03-31.
Revision History:
--*/
#include "parsers/util/scanner.h"
#include
inline int scanner::read_char() {
if (m_is_interactive) {
++m_pos;
return m_stream.get();
}
if (m_bpos >= m_bend) {
m_buffer[0] = m_last_char;
m_stream.read(m_buffer.data()+1, m_buffer.size()-1);
m_bend = 1 + static_cast(m_stream.gcount());
m_bpos = 1;
m_last_char = m_buffer[m_bend-1];
}
++m_pos;
if (m_bpos < m_bend) {
return m_buffer[m_bpos++];
} else {
// increment m_bpos, so unread_char() will work properly
++m_bpos;
return -1;
}
}
inline void scanner::unread_char() {
--m_pos;
if (m_is_interactive) {
m_stream.unget();
} else {
// at most one character can be unread.
SASSERT(m_bpos > 0);
--m_bpos;
}
}
inline bool scanner::state_ok() {
return m_state != ERROR_TOKEN && m_state != EOF_TOKEN;
}
void scanner::comment(char delimiter) {
while(state_ok()) {
int ch = read_char();
if ('\n' == ch) {
++m_line;
}
if (delimiter == ch || -1 == ch) {
return;
}
}
}
scanner::token scanner::read_symbol(int ch) {
bool escape = false;
if (m_smt2)
m_string.pop_back(); // remove leading '|'
while (ch != '|' || escape) {
if (ch == EOF) {
// TODO: use error reporting
m_err << "ERROR: unexpected end of file.\n";
return EOF_TOKEN;
}
if (ch == '\n') {
++m_line;
}
escape = (ch == '\\');
m_string.push_back(ch);
ch = read_char();
}
if (!m_smt2)
m_string.push_back(ch); // don't add trailing '|'
m_string.push_back(0);
m_id = m_string.begin();
return ID_TOKEN;
}
scanner::token scanner::read_id(char first_char) {
int ch;
m_string.reset();
m_params.reset();
m_string.push_back(first_char);
bool is_arith = (m_normalized[(unsigned char) first_char] == '+');
bool is_alpha = (m_normalized[(unsigned char) first_char] == 'a');
ch = read_char();
// In SMT2 "-20" is an identifier.
if (!m_smt2 && state_ok() && first_char == '-' && m_normalized[(unsigned char) ch] == '0') {
return read_number(ch, false);
}
if (state_ok() && first_char == '|') {
return read_symbol(ch);
}
while (state_ok()) {
switch(m_normalized[(unsigned char) ch]) {
case '+':
if (is_arith) {
m_string.push_back(ch);
break;
}
// strings can have hyphens.
if (!is_alpha || ch != '-') {
goto bail_out;
}
case 'a':
case ':':
case '.':
case '0':
if (is_arith) {
goto bail_out;
}
m_string.push_back(ch);
break;
case '[':
m_string.push_back(0);
m_id = m_string.begin();
if (read_params()) {
return ID_TOKEN;
}
else {
return m_state;
}
default:
goto bail_out;
}
ch = read_char();
}
return m_state;
bail_out:
m_string.push_back(0);
m_id = m_string.begin();
unread_char();
return ID_TOKEN;
}
bool scanner::read_params() {
unsigned param_num = 0;
while (state_ok()) {
int ch = read_char();
switch (m_normalized[(unsigned char) ch]) {
case '0':
param_num = 10*param_num + (ch - '0');
break;
case ']':
m_params.push_back(parameter(param_num));
return true;
case ':':
m_params.push_back(parameter(param_num));
param_num = 0;
break;
default:
m_string.reset();
m_string.push_back(ch);
while (true) {
ch = read_char();
if (ch == ':' || ch == ']') {
m_string.push_back(0);
m_params.push_back(parameter(symbol(m_string.data())));
param_num = 0;
if (ch == ':') {
unread_char();
}
else {
return true;
}
break;
}
if (ch == EOF) {
// TODO: use error reporting
m_err << "ERROR: unexpected character: '" << ((int)ch) << " " << ch << "'.\n";
m_state = ERROR_TOKEN;
break;
}
m_string.push_back(ch);
}
break;
}
}
return false;
}
scanner::token scanner::read_number(char first_char, bool is_pos) {
unsigned divide_by = 0;
m_number = rational(first_char - '0');
m_state = INT_TOKEN;
while (true) {
int ch = read_char();
if (m_normalized[(unsigned char) ch] == '0') {
m_number = rational(10)*m_number + rational(ch - '0');
if (m_state == FLOAT_TOKEN) {
++divide_by;
}
}
else if (ch == '.') {
m_state = FLOAT_TOKEN;
}
else {
unread_char();
break;
}
}
if (!is_pos) {
m_number.neg();
}
if (m_state == FLOAT_TOKEN) {
m_number /= power(rational(10), divide_by);
}
return m_state;
}
scanner::token scanner::read_string(char delimiter, token result) {
m_string.reset();
m_params.reset();
while (true) {
int ch = read_char();
if (!state_ok()) {
return m_state;
}
if (ch == '\n') {
++m_line;
}
if (ch == delimiter || ch == EOF) {
m_string.push_back(0);
m_id = m_string.begin();
return result;
}
if (ch == '\\') {
m_string.push_back('\\');
ch = read_char();
}
m_string.push_back(ch);
}
return m_state;
}
scanner::token scanner::read_bv_literal() {
TRACE("scanner", tout << "read_bv_literal\n";);
if (m_bv_token) {
int ch = read_char();
if (ch == 'x') {
ch = read_char();
m_number = rational(0);
m_bv_size = 0;
while (true) {
if ('0' <= ch && ch <= '9') {
m_number *= rational(16);
m_number += rational(ch - '0');
}
else if ('a' <= ch && ch <= 'f') {
m_number *= rational(16);
m_number += rational(10 + (ch - 'a'));
}
else if ('A' <= ch && ch <= 'F') {
m_number *= rational(16);
m_number += rational(10 + (ch - 'A'));
}
else {
unread_char();
m_state = m_bv_size == 0 ? ERROR_TOKEN : BV_TOKEN;
TRACE("scanner", tout << m_state << ", bv-size: " << m_bv_size << ", INT_TOKEN: " << INT_TOKEN
<< ", BV_TOKEN: " << BV_TOKEN << "\n";);
return m_state;
}
m_bv_size += 4;
ch = read_char();
}
}
else if (ch == 'b') {
ch = read_char();
m_number = rational(0);
m_bv_size = 0;
while (ch == '0' || ch == '1') {
m_number *= rational(2);
m_number += rational(ch - '0');
m_bv_size++;
ch = read_char();
}
unread_char();
m_state = m_bv_size == 0 ? ERROR_TOKEN : BV_TOKEN;
return m_state;
}
else {
m_state = ERROR_TOKEN;
return m_state;
}
}
else {
// hack for the old parser
int ch = read_char();
bool is_hex = false;
m_state = ID_TOKEN;
m_string.reset();
m_params.reset();
// convert to SMT1 format
m_string.push_back('b');
m_string.push_back('v');
if (ch == 'x') {
m_string.push_back('h');
m_string.push_back('e');
m_string.push_back('x');
is_hex = true;
} else if (ch == 'b') {
m_string.push_back('b');
m_string.push_back('i');
m_string.push_back('n');
} else {
// TODO: use error reporting
m_err << "ERROR: unexpected character after '#': '" << ((int)ch) << " " << ch << "'.\n";
m_state = ERROR_TOKEN;
return m_state;
}
while (true) {
signed ch = read_char();
if (ch == '0' || ch == '1' ||
(is_hex &&
(('0' <= ch && ch <= '9') ||
('a' <= ch && ch <= 'f') ||
('A' <= ch && ch <= 'F')))) {
m_string.push_back(ch);
} else {
unread_char();
break;
}
}
m_string.push_back(0);
m_id = m_string.begin();
return m_state;
}
}
scanner::scanner(std::istream& stream, std::ostream& err, bool smt2, bool bv_token):
m_line(1),
m_pos(0),
m_id(""),
m_bv_size(UINT_MAX),
m_state(ID_TOKEN),
m_stream(stream),
m_err(err),
m_bpos(1 << 10),
m_bend(1 << 10),
m_last_char(0),
m_smt2(smt2),
m_bv_token(bv_token) {
char ch;
m_is_interactive = &stream == &std::cin;
m_buffer.resize(m_bpos);
for (int i = 0; i < 256; ++i) {
m_normalized[i] = (char) i;
}
m_normalized[static_cast('\t')] = ' ';
m_normalized[static_cast('\r')] = ' ';
// assert ('a' < 'z');
for (ch = 'b'; ch <= 'z'; ++ch) {
m_normalized[static_cast(ch)] = 'a';
}
for (ch = 'A'; ch <= 'Z'; ++ch) {
m_normalized[static_cast(ch)] = 'a';
}
// assert ('0' < '9', '9' - '0' == 9);
for (ch = '1'; ch <= '9'; ++ch) {
m_normalized[static_cast(ch)] = '0';
}
if (m_smt2) {
// SMT2 3.1, "Symbols": ~ ! @ $ % ^ & * _ - + = < > . ? /
m_normalized[static_cast('~')] = 'a';
m_normalized[static_cast('!')] = 'a';
m_normalized[static_cast('@')] = 'a';
m_normalized[static_cast('$')] = 'a';
m_normalized[static_cast('%')] = 'a';
m_normalized[static_cast('^')] = 'a';
m_normalized[static_cast('&')] = 'a';
m_normalized[static_cast('*')] = 'a';
m_normalized[static_cast('_')] = 'a';
m_normalized[static_cast('-')] = 'a';
m_normalized[static_cast('+')] = 'a';
m_normalized[static_cast('=')] = 'a';
m_normalized[static_cast('<')] = 'a';
m_normalized[static_cast('>')] = 'a';
m_normalized[static_cast('.')] = 'a';
m_normalized[static_cast('?')] = 'a';
m_normalized[static_cast('/')] = 'a';
// SMT2 3.1, "Hexadecimals", "Binaries"
m_normalized[static_cast('#')] = '#';
m_normalized[static_cast('|')] = '+';
} else {
m_normalized[static_cast('=')] = '+';
m_normalized[static_cast('<')] = '+';
m_normalized[static_cast('>')] = '+';
m_normalized[static_cast('+')] = '+';
m_normalized[static_cast('-')] = '+';
m_normalized[static_cast('*')] = '+';
m_normalized[static_cast('/')] = '+';
m_normalized[static_cast('%')] = '+';
m_normalized[static_cast('~')] = '+';
m_normalized[static_cast('&')] = '+';
m_normalized[static_cast('@')] = '+';
m_normalized[static_cast('#')] = '+';
m_normalized[static_cast('|')] = '+';
m_normalized[static_cast('\\')] = '+';
m_normalized[static_cast('.')] = '.';
m_normalized[static_cast('_')] = 'a';
m_normalized[static_cast('\'')] = 'a';
m_normalized[static_cast('!')] = 'a';
m_normalized[static_cast('?')] = 'a';
}
}
scanner::token scanner::scan() {
while (state_ok()) {
int ch = read_char();
switch (m_normalized[(unsigned char) ch]) {
case ' ':
break;
case '\n':
m_pos = 0;
++m_line;
break;
case ';':
comment('\n');
break;
case ':':
return COLON;
case '(':
return LEFT_PAREN;
case ')':
return RIGHT_PAREN;
case '?':
case '$':
case 'a':
case '+':
case '.':
return read_id(ch);
case '{':
return read_string('}',COMMENT_TOKEN);
case '"':
return read_string('"',STRING_TOKEN);
case '0':
return read_number(ch, true);
case '#':
return read_bv_literal();
case static_cast(-1):
m_state = EOF_TOKEN;
break;
default:
// TODO: use error reporting
m_err << "ERROR: unexpected character: '" << ((int)ch) << " " << ch << "'.\n";
m_state = ERROR_TOKEN;
break;
}
}
return m_state;
}