All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gr.iti.mklab.framework.utils.QueryUtils Maven / Gradle / Ivy

package gr.iti.mklab.framework.utils;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import gr.iti.mklab.framework.utils.booleanparse.BooleanQueryLexer;
import gr.iti.mklab.framework.utils.booleanparse.BooleanQueryParser;
import gr.iti.mklab.framework.utils.booleanparse.BooleanQueryParser.ExpressionContext;
import gr.iti.mklab.framework.utils.booleanparse.ast.BooleanExpression;
import gr.iti.mklab.framework.utils.booleanparse.ast.Node;
import gr.iti.mklab.framework.utils.booleanparse.ast.visitor.ASTBuilderVisitor;

import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.TokenStream;
import org.apache.commons.lang.StringUtils;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.BooleanQuery.Builder;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;

import com.bpodgursky.jbool_expressions.And;
import com.bpodgursky.jbool_expressions.Expression;
import com.bpodgursky.jbool_expressions.Not;
import com.bpodgursky.jbool_expressions.Or;
import com.bpodgursky.jbool_expressions.Variable;
import com.bpodgursky.jbool_expressions.rules.RuleSet;

public class QueryUtils {

	public static boolean isBooleanExpression(String q) {
		if(q.matches(".* OR .*") || q.matches(".* AND .*")) {
			return true;
		}
		return false;
	}
	
	public static Set> parse(String q) {

		Node ast = toAST(q);

        Expression expr = toBooleanExpression(ast);    
        expr = RuleSet.simplify(expr);
        expr = RuleSet.toSop(expr);

        q = expr.toString();
        q = q.replaceAll(" & ", " AND ");
        q = q.replaceAll(" \\| ", " OR ");
        
        Node sopAst = toAST(q); 
        return astToSoP(sopAst);

	}
	
	public static Set> astToSoP(Node ast) {
		Set> queries = new HashSet>(); 
		BooleanExpression tree = (BooleanExpression) ast;
		if("or".equals(tree.toString())) {
			if(tree.getOperands() != null && !tree.getOperands().isEmpty()) {
				for(Node andNode : tree.getOperands()) {
					BooleanExpression andBranch = (BooleanExpression) andNode;
					if("and".toString().equals(andBranch.toString())) {
						Set tags = new HashSet(); 
						for(Node terminalNode : andBranch.getOperands()) {
							tags.add(terminalNode.toString());
						}
						queries.add(tags);
					}
					else {
						Set tags = new HashSet(); 
						tags.add(andBranch.toString());
						queries.add(tags);
					}
				}
			}
		}
		
		return queries;
	}
	
	public static Node toAST(String q) {
		BooleanQueryLexer lexer = new BooleanQueryLexer(new ANTLRInputStream(q));
        TokenStream tokens = new CommonTokenStream(lexer);
        BooleanQueryParser parser = new BooleanQueryParser(tokens);
        
        ExpressionContext expession = parser.expression();
        ASTBuilderVisitor builder = new ASTBuilderVisitor();
        Node ast = builder.visit(expession);
        
        return ast;
	}
	
	public static Expression toBooleanExpression(Node ast) {
		BooleanExpression tree = (BooleanExpression) ast;
		if(tree.operands == null) {
			return Variable.of(tree.toString());
		}
		else {
			List> expressions = new ArrayList>();
			for(Node child : tree.operands) {
				Expression expression = toBooleanExpression(child);
				expressions.add(expression);
			}
			
			String type = ast.toString();
			if(type.equals("and") || type.equals("atom")) {
				return And.of(expressions);
			}
			else if(type.equals("or")) {
				return Or.of(expressions);
			}
			else if(type.equals("not")) {
				if(expressions.size() == 1) {
					return Not.of(expressions.get(0));
				}
				else {
					Expression expression = expressions.remove(0);
					return And.of(Not.of(And.of(expressions)), expression);
				}
			}
		}
		return null;
	}
	
	public static Query toBNF(Node ast) {
		Builder bq = new BooleanQuery.Builder();
		BooleanExpression tree = (BooleanExpression) ast;
		if(tree.operands == null) {
			Query query = new TermQuery(new Term("", tree.toString()));
			return query;
		}
		else {
			List expressions = new ArrayList();
			for(Node child : tree.operands) {
				Query expression = toBNF(child);
				expressions.add(expression);
			}
			
			String type = ast.toString();
			if(type.equals("and")) {
				for(Query exp : expressions) {
					bq.add(exp, Occur.MUST);
				}
			}
			else if(type.equals("atom")) {
				for(Query exp : expressions) {
					bq.add(exp, Occur.SHOULD);
				}
			}
			else if(type.equals("or")) {
				for(Query exp : expressions) {
					bq.add(exp, Occur.SHOULD);
				}
			}
			else if(type.equals("not")) {
				if(expressions.size() == 1) {
					bq.add(expressions.get(0), Occur.MUST_NOT);
				}
				else {
					bq.add(expressions.remove(0), Occur.MUST);
					for(Query exp : expressions) {
						bq.add(exp, Occur.MUST_NOT);
					}
				}
			}
		}
		return bq.build();
	}
	
	public static void printTree(Node ast) {
		printTree(ast, 0);
	}
	
	private static void printTree(Node ast, int depth) {
		BooleanExpression tree = (BooleanExpression) ast;
		System.out.print(StringUtils.repeat("\t", depth));
		System.out.println(ast.toString());
		
		depth++;
		if(tree.operands != null) {
			for(Node child : tree.operands) {
				printTree(child, depth);
			}
		}
	}
	
	public static void main(String...args) {
		String q = "(recyclicing OR waste) AND (food OR waste) NOT(waste time)";
	
		Set> queries = QueryUtils.parse(q);
		System.out.println(queries);
		
		System.out.println(QueryUtils.isBooleanExpression(q));
	}
	
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy