All Downloads are FREE. Search and download functionalities are using the official Maven repository.

toxgene.core.genes.trees.ExpressionParser Maven / Gradle / Ivy

The newest version!
/**
 * implements a toxgene.core.parser for expressions.
 *
 * @author Denilson Barbosa
 * @version 0.1
 */

package toxgene.core.genes.trees;

import toxgene.core.ToXgeneErrorException;
import toxgene.core.genes.lists.ToxList;
import toxgene.interfaces.ToXgeneReporter;
import toxgene.util.Dictionary;

public abstract class ExpressionParser{
  private static final int SCAN = 1;
  private static final int LIST = 2;

  private static int mode;
  private static ToxList list;
  private static ToxScan scan;

  private static String prefix;

  private static int templateNodeLocation;

  public static Expression parse(String expr, ToxScan s, int n,
																 ToXgeneReporter tgReporter,
																 Dictionary simpleTypes){
		mode = SCAN;
		scan = s;
		prefix = null;
		templateNodeLocation = n;

		return  parse(preprocess(expr), tgReporter, simpleTypes);
  }

  public static Expression parse(String expr, ToxScan s, String p, int n,
																 ToXgeneReporter tgReporter,
																 Dictionary simpleTypes){
		mode = SCAN;
		scan = s;
		prefix = p;
		templateNodeLocation = n;

		return  parse(preprocess(expr), tgReporter, simpleTypes);
  }

  public static Expression parse(String expr, ToxList l, String p, int n,
																 ToXgeneReporter tgReporter,
																 Dictionary simpleTypes){
		mode = LIST;
		list = l;
		prefix = p;	
		templateNodeLocation = n;

		return  parse(preprocess(expr), tgReporter, simpleTypes);
  }
  
  //This is the only method really needed here
  private static Expression parse(String expr, ToXgeneReporter tgReporter,
																	Dictionary simpleTypes){
		int op;
		int size = expr.length();

		if (size == 0){
			throw new ToXgeneErrorException("invalid tox-query! no expression provided", templateNodeLocation);
		}
	
		for (int i=size-1; i>=0; i--){
			char c = expr.charAt(i);

			switch (c){
			case ')':{
				if (i < size-1){
					//there was something after the ')' that was not recognized
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0,i+1), templateNodeLocation);
				}

				int start = getBlockStart(expr.substring(0, i));
				if (start == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Unmatching ')' after "+expr.substring(0,i+1), templateNodeLocation);
				}
				if (start>0){
					//there is something before the '(', it has got to be an operator
					op = getOperator(expr.charAt(start-1));
					if (op != -1){
						Expression exp1 = parse(expr.substring(0, start-1),tgReporter, simpleTypes);
						Expression exp2 = parse(expr.substring(start+1, i),tgReporter, simpleTypes);
						return new BinaryExpression(exp1, exp2, op, tgReporter);
					}
					else{
						//operator missing
						throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
													"Operator required after "+
													expr.substring(0,start-1), templateNodeLocation);
					}
				}
				else{
					//these are redundant '(' ')' 
					return parse(expr.substring(1, i),tgReporter, simpleTypes);
				}
			}

			case '\'':{
				//here we know we have either a DATE or a STRING constant
				if (i < size-1){
					//there was something after the constant
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0, i+1), templateNodeLocation);
				}

				int start = getConstantStart(expr.substring(0,i));
				if (start == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid constant "+expr, templateNodeLocation);
				}
				if (start > 0){
					op = getOperator(expr.charAt(start-1));
					if (op != -1){
						Expression exp1 = parse(expr.substring(0, start-1),tgReporter, simpleTypes);
						Constant exp2 = new Constant(expr.substring(start, i+1));
						return new BinaryExpression(exp1, exp2, op, tgReporter);
					}
					else{
						//operator missing
						throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
													"Operator required after "+expr.substring(0,start),
													templateNodeLocation);
					}
				}
				else{
					return new Constant(expr.substring(start, i+1));
				}
			}

			case ']':{
				//here we know we have a query
				if (i < size-1){
					//there was something after the ')' that was not recognized
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after ']'", templateNodeLocation);
				}

				int start = getQueryStart(expr.substring(0,i));
				if (start == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\"\n"+
												"Unmatching ']' after "+expr.substring(0,i),
												templateNodeLocation);
				}
				if (start > 0){
					op = getOperator(expr.charAt(start-1));
					if (op != -1){
						//we skip ']' and the operator that follows
						Expression expr1 = parse(expr.substring(0, start-1),tgReporter, simpleTypes);
						Query expr2 = (mode == LIST) ?
							new Query(expr.substring(start, i+1), list, prefix,
												templateNodeLocation, tgReporter):
							new Query(expr.substring(start, i+1), scan, prefix,
												templateNodeLocation, tgReporter);
						return new BinaryExpression(expr1, expr2, op, tgReporter);
					}
					else{
						//operator missing
						throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
													"Operator required after "+expr.substring(0,start),
													templateNodeLocation);
					}
				}
				else{
					//there can be built-in functions before the '['path']'
					if (mode == LIST){
						return new Query(expr, list, prefix, templateNodeLocation,
														 tgReporter);
					}
					return new Query(expr, scan, prefix, templateNodeLocation,
													 tgReporter);
				}
			}

			case '(':{
				//this is a syntactical error
				throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
											"Unmatching '(' after "+expr.substring(0,i),templateNodeLocation);
			}

			case '[':{
				//this is a syntactical error
				throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
											"Unmatching '[' after "+expr.substring(0,i),templateNodeLocation);
			}

			default:{
				op = getOperator(c);
				if (op != -1){
		  
					if (i == size){
						throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
													"Operator required after "+expr,templateNodeLocation);
					}

					Expression expr1 = parse(expr.substring(0,i),tgReporter, simpleTypes);
					Expression expr2 = parse(expr.substring(i+1),tgReporter, simpleTypes);
					return new BinaryExpression(expr1, expr2, op, tgReporter);
				}
			}
			}
		}

		if (expr.charAt(0) == '~'){
			return new GeneInstance(expr.substring(1), simpleTypes);
		}

		//if we reached this point it is because expr is either a REAL or a
		//INTEGER constant
		return new Constant(expr);
  }
	
  private static int getBlockStart(String expr){
		int level = 0;
		int size = expr.length();
	
		for (int i=size-1; i>=0; i--){
			char c = expr.charAt(i);
			if (c == ')'){
				level++;
			}
			if (c == '('){
				if (level == 0){
					return  i;
				}
				else{
					level--;
				}
			}
		}

		return -1;
  } 
  
  private static int getBlockEnd(String expr){
		int level = 0;
		int size = expr.length();
	
		for (int i=0; i=0; i--){
			char c = expr.charAt(i);
			if (c == '\''){
				return i;
			}
		}

		return -1;
  }

  private static int getQueryStart(String expr){
		int size = expr.length();

		for (int i=size-1; i>=0; i--){
			char c = expr.charAt(i);
			if (c == '['){
				if (i==0){
					return 0;
				}
				//checking for built-in functions:

				//we need at least i==3 for AVG,MIN,MAX,LEN and SUM
				if ((i < 3) || (getOperator(expr.charAt(i-1)) != -1)){
					//there can't be any function in here
					return i;
				}

				//we need at least i==5 for COUNT
				if ((i < 5) || (getOperator(expr.charAt(i-4)) != -1)){
					return i-3;
				}

				//we need at least i==6 for CONCAT
				if ((i < 6) || (getOperator(expr.charAt(i-6)) != -1)){
					return i-5;
				}

				//we need i==8 for DISTINCT
				if ((i < 8) || (getOperator(expr.charAt(i-7)) != -1)){
					return i-6;
				}
		
				return i-8;
			}
		}

		return -1;
  }
  
  private static int getOperator(char op){
		switch (op){
		case '+':{return Expression.ADD;}
		case '-':{return Expression.SUB;}
		case '*':{return Expression.MUL;}
		case '/':{return Expression.DIV;}
		case '%':{return Expression.MOD;}
		case '#':{return Expression.CONCAT;}
		}
		return -1;
  }
  
  /**
   * This method adds parentheses around divisions and multiplications so
   * that these operations get higher precedence over other operations.
   */
  private static String preprocess(String expression){
		int size = expression.length();
		String expr = new String(expression);

		for (int i=size-1; i>=0; i--){
			char c = expr.charAt(i);

			if (c == '\''){
				//we simply skip constants
				i = getConstantStart(expression.substring(0,i));
			}

			if (c == ']'){
				//we simply skip path expressions inside queries
				i = getQueryStart(expression.substring(0,i));
			}

	  
			if (c == ')'){
				int start = getBlockStart(expr.substring(0, i));
				String pre = expr.substring(0, start);
				String pos = expr.substring(i+1);
				String block = expr.substring(start+1, i);
		
				expr = pre+"("+preprocess(block)+")"+pos;
				size = expr.length();
				i = start;
				continue;
			}

			if ((c == '/') || (c == '*')){
				int startExp1 = getExpressionStart(expr.substring(0,i));
				int endExp2 = i + getExpressionEnd(expr.substring(i+1));

				String exp1 = expr.substring(startExp1, i);
				String exp2 = expr.substring(i+1, endExp2+1);

				if (startExp1 == 0){
					if (endExp2 == size -1){
						expr = preprocess(exp1)+c+exp2;
					}
					else{
						//no need to put parantheses here
						String pos = expr.substring(endExp2+1);
						expr = preprocess(exp1)+c+exp2+pos;
					}
				}
				else{
					String pre = expr.substring(0, startExp1);
					if (endExp2 == size -1){
						expr = pre+"("+preprocess(exp1)+c+exp2+")";
					}
					else{
						//no need to put parantheses here
						String pos = expr.substring(endExp2+1);
						expr = pre+"("+preprocess(exp1)+c+exp2+")"+pos;
					}
				}
 
				//we have to update i, so that we continue before the start of
				//operand1
				size = expr.length();
				i = startExp1;
			}
		}

		return expr;
  }

  private static int getExpressionStart(String expr){
		int size = expr.length();
	
		for (int i=size-1; i>=0; i--){
			char c = expr.charAt(i);
	  
			switch (c){

			case ')':{
				if (i < size-1){
					//there was something after the ')'
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0,i+1),templateNodeLocation);
				}

				int start = getBlockStart(expr.substring(0,i));
				if (start == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Unmatching ')' after "+expr.substring(0,i),templateNodeLocation);
				}
				return start;
			}

			case ']':{
				if (i < size-1){
					//there was something after the ']'
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0,i+1),templateNodeLocation);
				}

				int start = getQueryStart(expr.substring(0,i));
				if (start == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\"\n"+
												"Unmatching ']' after "+expr.substring(0,i),templateNodeLocation);
				}
				return start;
			}

			case '\'':{
				if (i < size-1){
					//there was something after the '''
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0,i+1),templateNodeLocation);
				}

				int start = getConstantStart(expr.substring(0,i));
				if (start == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid constant "+expr,templateNodeLocation);
				}
				return start;
			}

			default:{
				if (getOperator(c) != -1){
					return i+1;
				}
			}
			}
		}

		return 0;
  }

  private static int getExpressionEnd(String expr){
		int size = expr.length();
	
		for (int i=0; i 0){
					//there was something before the '('
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0,i),templateNodeLocation);
				}

				int end = getBlockEnd(expr.substring(i+1));
				if (end == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Unmatching ')' after "+expr.substring(0,i),templateNodeLocation);
				}
				return end + 2;//1 for the '(', 1 to compensate getBlockEnd
			}

			case '[':{
				int end = expr.indexOf(']',i+1);;
				if (end == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\"\n"+
												"Unmatching ']' after "+expr.substring(0,i),templateNodeLocation);
				}
				return end+1;
			}

			case '\'':{
				if (i > 0){
					//there was something before the '''
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid token \""+expr.substring(i)+
												"\" after "+expr.substring(0,i),templateNodeLocation);
				}

				int end = expr.indexOf('\'',i+1);
				if (end == -1){
					throw new ToXgeneErrorException("cannot parse expression \""+expr+"\".\n"+
												"Invalid constant "+expr,templateNodeLocation);
				}
				return end+1;
			}

			default:{
				if (getOperator(c) != -1){
					return i;
				}
			}
			}
		}

		return size;
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy