toxgene.core.genes.trees.ExpressionParser Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of ToxGene Show documentation
Show all versions of ToxGene Show documentation
Modified ToXGene for the iBench project.
The newest version!
/**
* implements a toxgene.core.parser for expressions.
*
* @author Denilson Barbosa
* @version 0.1
*/
package toxgene.core.genes.trees;
import toxgene.core.ToXgeneErrorException;
import toxgene.core.genes.lists.ToxList;
import toxgene.interfaces.ToXgeneReporter;
import toxgene.util.Dictionary;
public abstract class ExpressionParser{
private static final int SCAN = 1;
private static final int LIST = 2;
private static int mode;
private static ToxList list;
private static ToxScan scan;
private static String prefix;
private static int templateNodeLocation;
public static Expression parse(String expr, ToxScan s, int n,
ToXgeneReporter tgReporter,
Dictionary simpleTypes){
mode = SCAN;
scan = s;
prefix = null;
templateNodeLocation = n;
return parse(preprocess(expr), tgReporter, simpleTypes);
}
public static Expression parse(String expr, ToxScan s, String p, int n,
ToXgeneReporter tgReporter,
Dictionary simpleTypes){
mode = SCAN;
scan = s;
prefix = p;
templateNodeLocation = n;
return parse(preprocess(expr), tgReporter, simpleTypes);
}
public static Expression parse(String expr, ToxList l, String p, int n,
ToXgeneReporter tgReporter,
Dictionary simpleTypes){
mode = LIST;
list = l;
prefix = p;
templateNodeLocation = n;
return parse(preprocess(expr), tgReporter, simpleTypes);
}
//This is the only method really needed here
private static Expression parse(String expr, ToXgeneReporter tgReporter,
Dictionary simpleTypes){
int op;
int size = expr.length();
if (size == 0){
throw new ToXgeneErrorException("invalid tox-query! no expression provided", templateNodeLocation);
}
for (int i=size-1; i>=0; i--){
char c = expr.charAt(i);
switch (c){
case ')':{
if (i < size-1){
//there was something after the ')' that was not recognized
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0,i+1), templateNodeLocation);
}
int start = getBlockStart(expr.substring(0, i));
if (start == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Unmatching ')' after "+expr.substring(0,i+1), templateNodeLocation);
}
if (start>0){
//there is something before the '(', it has got to be an operator
op = getOperator(expr.charAt(start-1));
if (op != -1){
Expression exp1 = parse(expr.substring(0, start-1),tgReporter, simpleTypes);
Expression exp2 = parse(expr.substring(start+1, i),tgReporter, simpleTypes);
return new BinaryExpression(exp1, exp2, op, tgReporter);
}
else{
//operator missing
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Operator required after "+
expr.substring(0,start-1), templateNodeLocation);
}
}
else{
//these are redundant '(' ')'
return parse(expr.substring(1, i),tgReporter, simpleTypes);
}
}
case '\'':{
//here we know we have either a DATE or a STRING constant
if (i < size-1){
//there was something after the constant
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0, i+1), templateNodeLocation);
}
int start = getConstantStart(expr.substring(0,i));
if (start == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid constant "+expr, templateNodeLocation);
}
if (start > 0){
op = getOperator(expr.charAt(start-1));
if (op != -1){
Expression exp1 = parse(expr.substring(0, start-1),tgReporter, simpleTypes);
Constant exp2 = new Constant(expr.substring(start, i+1));
return new BinaryExpression(exp1, exp2, op, tgReporter);
}
else{
//operator missing
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Operator required after "+expr.substring(0,start),
templateNodeLocation);
}
}
else{
return new Constant(expr.substring(start, i+1));
}
}
case ']':{
//here we know we have a query
if (i < size-1){
//there was something after the ')' that was not recognized
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after ']'", templateNodeLocation);
}
int start = getQueryStart(expr.substring(0,i));
if (start == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\"\n"+
"Unmatching ']' after "+expr.substring(0,i),
templateNodeLocation);
}
if (start > 0){
op = getOperator(expr.charAt(start-1));
if (op != -1){
//we skip ']' and the operator that follows
Expression expr1 = parse(expr.substring(0, start-1),tgReporter, simpleTypes);
Query expr2 = (mode == LIST) ?
new Query(expr.substring(start, i+1), list, prefix,
templateNodeLocation, tgReporter):
new Query(expr.substring(start, i+1), scan, prefix,
templateNodeLocation, tgReporter);
return new BinaryExpression(expr1, expr2, op, tgReporter);
}
else{
//operator missing
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Operator required after "+expr.substring(0,start),
templateNodeLocation);
}
}
else{
//there can be built-in functions before the '['path']'
if (mode == LIST){
return new Query(expr, list, prefix, templateNodeLocation,
tgReporter);
}
return new Query(expr, scan, prefix, templateNodeLocation,
tgReporter);
}
}
case '(':{
//this is a syntactical error
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Unmatching '(' after "+expr.substring(0,i),templateNodeLocation);
}
case '[':{
//this is a syntactical error
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Unmatching '[' after "+expr.substring(0,i),templateNodeLocation);
}
default:{
op = getOperator(c);
if (op != -1){
if (i == size){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Operator required after "+expr,templateNodeLocation);
}
Expression expr1 = parse(expr.substring(0,i),tgReporter, simpleTypes);
Expression expr2 = parse(expr.substring(i+1),tgReporter, simpleTypes);
return new BinaryExpression(expr1, expr2, op, tgReporter);
}
}
}
}
if (expr.charAt(0) == '~'){
return new GeneInstance(expr.substring(1), simpleTypes);
}
//if we reached this point it is because expr is either a REAL or a
//INTEGER constant
return new Constant(expr);
}
private static int getBlockStart(String expr){
int level = 0;
int size = expr.length();
for (int i=size-1; i>=0; i--){
char c = expr.charAt(i);
if (c == ')'){
level++;
}
if (c == '('){
if (level == 0){
return i;
}
else{
level--;
}
}
}
return -1;
}
private static int getBlockEnd(String expr){
int level = 0;
int size = expr.length();
for (int i=0; i=0; i--){
char c = expr.charAt(i);
if (c == '\''){
return i;
}
}
return -1;
}
private static int getQueryStart(String expr){
int size = expr.length();
for (int i=size-1; i>=0; i--){
char c = expr.charAt(i);
if (c == '['){
if (i==0){
return 0;
}
//checking for built-in functions:
//we need at least i==3 for AVG,MIN,MAX,LEN and SUM
if ((i < 3) || (getOperator(expr.charAt(i-1)) != -1)){
//there can't be any function in here
return i;
}
//we need at least i==5 for COUNT
if ((i < 5) || (getOperator(expr.charAt(i-4)) != -1)){
return i-3;
}
//we need at least i==6 for CONCAT
if ((i < 6) || (getOperator(expr.charAt(i-6)) != -1)){
return i-5;
}
//we need i==8 for DISTINCT
if ((i < 8) || (getOperator(expr.charAt(i-7)) != -1)){
return i-6;
}
return i-8;
}
}
return -1;
}
private static int getOperator(char op){
switch (op){
case '+':{return Expression.ADD;}
case '-':{return Expression.SUB;}
case '*':{return Expression.MUL;}
case '/':{return Expression.DIV;}
case '%':{return Expression.MOD;}
case '#':{return Expression.CONCAT;}
}
return -1;
}
/**
* This method adds parentheses around divisions and multiplications so
* that these operations get higher precedence over other operations.
*/
private static String preprocess(String expression){
int size = expression.length();
String expr = new String(expression);
for (int i=size-1; i>=0; i--){
char c = expr.charAt(i);
if (c == '\''){
//we simply skip constants
i = getConstantStart(expression.substring(0,i));
}
if (c == ']'){
//we simply skip path expressions inside queries
i = getQueryStart(expression.substring(0,i));
}
if (c == ')'){
int start = getBlockStart(expr.substring(0, i));
String pre = expr.substring(0, start);
String pos = expr.substring(i+1);
String block = expr.substring(start+1, i);
expr = pre+"("+preprocess(block)+")"+pos;
size = expr.length();
i = start;
continue;
}
if ((c == '/') || (c == '*')){
int startExp1 = getExpressionStart(expr.substring(0,i));
int endExp2 = i + getExpressionEnd(expr.substring(i+1));
String exp1 = expr.substring(startExp1, i);
String exp2 = expr.substring(i+1, endExp2+1);
if (startExp1 == 0){
if (endExp2 == size -1){
expr = preprocess(exp1)+c+exp2;
}
else{
//no need to put parantheses here
String pos = expr.substring(endExp2+1);
expr = preprocess(exp1)+c+exp2+pos;
}
}
else{
String pre = expr.substring(0, startExp1);
if (endExp2 == size -1){
expr = pre+"("+preprocess(exp1)+c+exp2+")";
}
else{
//no need to put parantheses here
String pos = expr.substring(endExp2+1);
expr = pre+"("+preprocess(exp1)+c+exp2+")"+pos;
}
}
//we have to update i, so that we continue before the start of
//operand1
size = expr.length();
i = startExp1;
}
}
return expr;
}
private static int getExpressionStart(String expr){
int size = expr.length();
for (int i=size-1; i>=0; i--){
char c = expr.charAt(i);
switch (c){
case ')':{
if (i < size-1){
//there was something after the ')'
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0,i+1),templateNodeLocation);
}
int start = getBlockStart(expr.substring(0,i));
if (start == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Unmatching ')' after "+expr.substring(0,i),templateNodeLocation);
}
return start;
}
case ']':{
if (i < size-1){
//there was something after the ']'
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0,i+1),templateNodeLocation);
}
int start = getQueryStart(expr.substring(0,i));
if (start == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\"\n"+
"Unmatching ']' after "+expr.substring(0,i),templateNodeLocation);
}
return start;
}
case '\'':{
if (i < size-1){
//there was something after the '''
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0,i+1),templateNodeLocation);
}
int start = getConstantStart(expr.substring(0,i));
if (start == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid constant "+expr,templateNodeLocation);
}
return start;
}
default:{
if (getOperator(c) != -1){
return i+1;
}
}
}
}
return 0;
}
private static int getExpressionEnd(String expr){
int size = expr.length();
for (int i=0; i 0){
//there was something before the '('
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0,i),templateNodeLocation);
}
int end = getBlockEnd(expr.substring(i+1));
if (end == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Unmatching ')' after "+expr.substring(0,i),templateNodeLocation);
}
return end + 2;//1 for the '(', 1 to compensate getBlockEnd
}
case '[':{
int end = expr.indexOf(']',i+1);;
if (end == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\"\n"+
"Unmatching ']' after "+expr.substring(0,i),templateNodeLocation);
}
return end+1;
}
case '\'':{
if (i > 0){
//there was something before the '''
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid token \""+expr.substring(i)+
"\" after "+expr.substring(0,i),templateNodeLocation);
}
int end = expr.indexOf('\'',i+1);
if (end == -1){
throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
"Invalid constant "+expr,templateNodeLocation);
}
return end+1;
}
default:{
if (getOperator(c) != -1){
return i;
}
}
}
}
return size;
}
}