
net.sf.jsqlparser.util.cnfexpression.CNFConverter Maven / Gradle / Ivy
/*-
* #%L
* JSQLParser library
* %%
* Copyright (C) 2004 - 2019 JSQLParser
* %%
* Dual licensed under GNU LGPL 2.1 or Apache License 2.0
* #L%
*/
package net.sf.jsqlparser.util.cnfexpression;
import java.util.ArrayList;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Stack;
import net.sf.jsqlparser.expression.Expression;
import net.sf.jsqlparser.expression.NotExpression;
/**
* This class handles the conversion from a normal expression tree into
* the CNF form.
*
* Here is the definition of CNF form:
* https://en.wikipedia.org/wiki/Conjunctive_normal_form
*
* Basically it will follow these steps:
*
* To help understanding, I will generate an example:
* Here is the original tree:
* OR
* / \
* OR NOT
* / \ |
* NOT H AND
* | / \
* NOT G OR
* | / \
* F H NOT
* |
* OR
* / \
* AND L
* / \
* ( ) ( )
* | |
* J K
*
* 1. rebuild the tree by replacing the "and" and "or" operators
* (which are binary) into their counterparts node that could hold
* multiple elements. Also, leave out the parenthesis node between the
* conditional operators to make the tree uniform.
*
* After the transform, the result should be like this:
* OR(M)
* / \
* OR(M) NOT
* / \ |
* NOT H AND(M)
* | / \
* NOT G OR(M)
* | / \
* F H NOT
* |
* OR(M)
* / \
* AND(M) L
* / \
* J K
*
* 2. push the not operators into the bottom of the expression. That
* means the not operator will be the root of the expression tree
* where no "and" or "or" exists. Be sure use the De Morgan's law
* and double not law.
*
* How to use De Morgan law:
* For example, here is the original expression tree:
* NOT
* |
* AND(M)
* / \
* G H
*
* After we use the De Morgan law, the result should be like this:
* OR(M)
* / \
* NOT NOT
* | |
* G H
*
* After the transform, the result should be like this:
* OR(M)
* / \
* OR(M) OR(M)
* / \ / \
* F H NOT AND(M)
* | / \
* G NOT OR(M)
* | / \
* H AND(M) L
* / \
* J K
*
* 3. gather all the adjacent "and" or "or" operator together.
* After doing that, the expression tree will be presented as:
* all the and expression will be in either odd or even levels,
* this will be the same for the or operator.
*
* After the transform, the expression tree should be like this:
* OR(M)
* / / \ \
* F H NOT AND(M)
* | / \
* G NOT OR(M)
* | / \
* H AND(M) L
* / \
* J K
*
* 4. push the and operator upwards until the root is an and
* operator and all the children are or operators with multiple
* components. At this time we get the result: an expression in CNF form.
* How do we push and up? Use distribution law!
*
* For example, here is the way to push the and up and merge them.
* OR
* / \
* AND L
* / \
* J K
*
* In the normal form, it could be: (J AND K) OR L.
* If we apply the distribution law, we will get the result like this:
* (J OR L) AND (K OR L), the tree form of this should be like:
* AND
* / \
* OR OR
* / \ / \
* J L K L
*
* So after we push the AND at the deepest level up and merge it with the
* existing add, we get this result.
* OR(M)
* / / \ \
* F H NOT AND(M)
* | / | \
* G NOT OR(M) OR(M)
* | / \ / \
* H J L K L
*
* Now let us push the and up and we will get the result like this:
* AND(M)
* / | \
* OR(M) OR(M) OR(M)
* / / \ \ / / | \ \ / / | \ \
* F H NOT NOT F H NOT J L F H NOT K L
* | | | |
* G H G G
*
* 5. The last step, convert the Multiple Expression back to the binary
* form. Note the final tree shall be left-inclined.
*
* The final expression tree shall be like this:
* AND
* / \
* AND ( )
* / \ |
* ( ) ( ) part1
* | |
* OR part2
* / \
* OR NOT
* / \ |
* OR NOT H
* / \ |
* F H G
*
* part1: OR
* / \
* OR L
* / \
* OR K
* / \
* OR NOT
* / \ |
* F H G
*
* part2: OR
* / \
* OR L
* / \
* OR J
* / \
* OR NOT
* / \ |
* F H G
*
* @author messfish
*
*/
public class CNFConverter {
private Expression root;
// the variable that stores the newly generated root.
private Expression dummy;
// this variable mainly serves as the dummy root of the true root.
// generally it will be a multi and operator with root as the child.
private Expression temp1;
private Expression temp2;
private Expression child;
// these two variable mainly serves as nodes that traverse through
// the expression tree to change the structure of expression tree.
// notice temp1 will be settled as the root and temp2 will be
// settled as the dummy root.
private boolean isUsed = false;
private CloneHelper clone = new CloneHelper();
private class Mule {
private Expression parent;
private Expression child;
private int level;
private Mule(Expression parent, Expression child, int level) {
this.parent = parent;
this.child = child;
this.level = level;
}
}
public static Expression convertToCNF(Expression expr) {
CNFConverter cnf = new CNFConverter();
return cnf.convert(expr);
}
/**
* this method takes an expression tree and converts that into a CNF form. Notice the 5 steps
* shown above will turn into 5 different methods. For the sake of testing, I set them public.
* return the converted expression.
*
* @param express the original expression tree.
*/
private Expression convert(Expression express)
throws IllegalStateException {
if (isUsed) {
throw new IllegalStateException("The class could only be used once!");
} else {
isUsed = true;
}
reorder(express);
pushNotDown();
/* notice for the gather() function, we do not change the variable
* that points to the root by pointing to others. Also, we do not
* change those temp variables. So there is no need to set those
* variables back to their modified state. */
gather();
pushAndUp();
changeBack();
return root;
}
/**
* this is the first step that rebuild the expression tree. Use the standard specified in the
* above class. Traverse the original tree recursively and rebuild the tree from that.
*
* @param express the original expression tree.
*/
private void reorder(Expression express) {
root = clone.modify(express);
List list = new ArrayList();
list.add(root);
dummy = new MultiAndExpression(list);
}
/**
* This method is used to deal with pushing not operators down. Since it needs an extra
* parameter, I will create a new method to handle this.
*/
private void pushNotDown() {
/* set the two temp parameters to their staring point. */
temp1 = root;
temp2 = dummy;
/* I set it to zero since if the modification happens at the root,
* the parent will have the correct pointer to the children. */
pushNot(0);
/* do not forget to set the operators back! */
root = ((MultiAndExpression) dummy).getChild(0);
temp1 = root;
temp2 = dummy;
}
/**
* This method is the helper function to push not operators down. traverse the tree thoroughly,
* when we meet the not operator. We only need to consider these three operators:
* MultiAndOperator, MultiOrOperator, NotOperator. Handle them in a seperate way. when we finish
* the traverse, the expression tree will have all the not operators pushed as downwards as they
* could. In the method, I use two global variables: temp1 and temp2 to traverse the expression
* tree. Notice that temp2 will always be the parent of temp1.
*
* @param index the index of the children appeared in parents array.
*/
private void pushNot(int index) {
/* what really matters is the three logical operators:
* and, or, not. so we only deal with these three operators. */
if (temp1 instanceof MultiAndExpression) {
MultiAndExpression and = (MultiAndExpression) temp1;
for (int i = 0; i < and.size(); i++) {
temp2 = and;
temp1 = and.getChild(i);
pushNot(i);
}
} else if (temp1 instanceof MultiOrExpression) {
MultiOrExpression or = (MultiOrExpression) temp1;
for (int i = 0; i < or.size(); i++) {
temp2 = or;
temp1 = or.getChild(i);
pushNot(i);
}
} else if (temp1 instanceof NotExpression) {
handleNot(index);
}
}
/**
* This function mainly deals with pushing not operators down. check the child. If it is not a
* logic operator(and or or). stop at that point. Else use De Morgan law to push not downwards.
*
* @param index the index of the children appeared in parents array.
*/
private void handleNot(int index) {
child = ((NotExpression) temp1).getExpression();
int nums = 1; // takes down the number of not operators.
while (child instanceof NotExpression) {
child = ((NotExpression) child).getExpression();
nums++;
}
/* if the number of not operators are even. we could get
* rid of all the not operators. set the child to the parent. */
if (nums % 2 == 0) {
((MultipleExpression) temp2).setChild(index, child);
temp1 = child;
pushNot(-1);
} else {
/* otherwise there will be one not left to push.
* if the child is not these two types of operators.
* that means we reach the leaves of the logical part.
* set a new not operator whose child is the current one
* and connect that operator with the parent and return. */
if (!(child instanceof MultiAndExpression)
&& !(child instanceof MultiOrExpression)) {
// if (child instanceof LikeExpression) {
// ((LikeExpression) child).setNot();
// } else if (child instanceof BinaryExpression) {
// ((BinaryExpression) child).setNot();
// } else {
child = new NotExpression(child);
// }
((MultipleExpression) temp2).setChild(index, child);
// return;
} else if (child instanceof MultiAndExpression) {
MultiAndExpression and = (MultiAndExpression) child;
List list = new ArrayList();
for (int i = 0; i < and.size(); i++) {
/* push not to every element in the operator. */
NotExpression not = new NotExpression(and.getChild(i));
list.add(not);
}
/* the De Morgan law shows we need to change and to or. */
temp1 = new MultiOrExpression(list);
((MultipleExpression) temp2).setChild(index, temp1);
pushNot(-1);
} else if (child instanceof MultiOrExpression) {
MultiOrExpression or = (MultiOrExpression) child;
List list = new ArrayList();
for (int i = 0; i < or.size(); i++) {
/* push not to every element in the operator. */
NotExpression not = new NotExpression(or.getChild(i));
list.add(not);
}
/* the De Morgan law shows we need to change or to and. */
temp1 = new MultiAndExpression(list);
((MultipleExpression) temp2).setChild(index, temp1);
pushNot(-1);
}
}
}
/**
* This method serves as dealing with the third step. It is used to put all the adjacent same
* multi operators together. BFS the tree and do it node by node. In the end we will get the
* tree where all the same multi operators store in the same odd level of the tree or in the
* same even level of the tree.
*/
private void gather() {
Queue queue = new LinkedList();
queue.offer(temp1);
while (!queue.isEmpty()) {
Expression express = queue.poll();
/* at this level, we only deal with "multi and" and "multi or"
* operators, so we only consider these two operators.
* that means we do nothing if the operator is not those two. */
if (express instanceof MultiAndExpression) {
MultiAndExpression and = (MultiAndExpression) express;
while (true) {
int index = 0;
Expression get = null;
for (; index < and.size(); index++) {
get = and.getChild(index);
if (get instanceof MultiAndExpression) {
break;
}
}
/* if the index is the size of the multi operator,
* that means this is already valid. jump out of the loop. */
if (index == and.size()) {
break;
} else {
/* if not, remove the child out and push the child of that child
* in the operator, starting from the index where the child
* is removed. */
and.removeChild(index);
MultipleExpression order = (MultipleExpression) get;
for (int i = 0; i < order.size(); i++) {
and.addChild(index, order.getChild(i));
index++;
}
}
}
/* Do the standard BFS now since all children are not and operators. */
for (int i = 0; i < and.size(); i++) {
queue.offer(and.getChild(i));
}
} else if (express instanceof MultiOrExpression) {
/* for the multi or operator, the logic is the similar. */
MultiOrExpression or = (MultiOrExpression) express;
while (true) {
int index = 0;
Expression get = null;
for (; index < or.size(); index++) {
get = or.getChild(index);
if (get instanceof MultiOrExpression) {
break;
}
}
/* if the index is the size of the multi operator,
* that means this is already valid. jump out of the loop. */
if (index == or.size()) {
break;
} else {
/* if not, remove the child out and push the child of that child
* in the operator, starting from the index where the child
* is removed. */
or.removeChild(index);
MultipleExpression order = (MultipleExpression) get;
for (int i = 0; i < order.size(); i++) {
or.addChild(index, order.getChild(i));
index++;
}
}
}
/* Do the standard BFS now since all children are not or operators. */
for (int i = 0; i < or.size(); i++) {
queue.offer(or.getChild(i));
}
}
}
}
/**
* First, BFS the tree and gather all the or operators and their parents into a stack. Next, pop
* them out and push the and operators under the or operators upwards(if there are). Do this
* level by level, which means during each level we will call the gather() method to make the
* tree uniform. When we move out of the stack. The expression tree shall be in CNF form.
*/
private void pushAndUp() {
Queue queue = new LinkedList();
Stack stack = new Stack();
Mule root = new Mule(temp2, temp1, 0);
queue.offer(root);
int level = 1;
/* do the BFS and store valid mule into the stack. Notice the
* first parameter is parent and the second parameter is children. */
while (!queue.isEmpty()) {
int size = queue.size();
for (int i = 0; i < size; i++) {
Mule mule = queue.poll();
Expression parent = mule.parent;
Expression child = mule.child;
if (parent instanceof MultiAndExpression
&& child instanceof MultiOrExpression) {
stack.push(mule);
}
/* Note the child may not be an instance of multiple expression!. */
if (child instanceof MultipleExpression) {
MultipleExpression multi = (MultipleExpression) child;
for (int j = 0; j < multi.size(); j++) {
Expression get = multi.getChild(j);
if (get instanceof MultipleExpression) {
Mule added = new Mule(child, get, level);
queue.offer(added);
}
}
}
}
level++;
}
/* use another function to handle pushing and up. */
pushAnd(stack);
/* do not forget to set the operators back! */
this.root = ((MultiAndExpression) dummy).getChild(0);
temp1 = this.root;
temp2 = dummy;
/* at last, remember to gather again since there are no gather()
* method called if there are some movements on the root. */
gather();
}
/**
* This helper function is used to deal with pushing and up: generally, pop the top element out
* of the stack, use BFS to traverse the tree and push and up. It will case the expression tree
* to have the and as the new root and multiple or as the children. Push them on the queue and
* repeat the same process until the newly generated or operator does not have any and operators
* in it(which means no elements will be added into the queue). when one level is finished,
* regroup the tree. Do this until the stack is empty, the result will be the expression in CNF
* form.
*
* @param stack the stack stores a list of combined data.
*/
private void pushAnd(Stack stack) {
int level = 0;
if (!stack.isEmpty()) {
level = stack.peek().level;
}
while (!stack.isEmpty()) {
Mule mule = stack.pop();
/* we finish a level, uniform the tree by calling gather. */
if (level != mule.level) {
gather();
level = mule.level;
}
Queue queue = new LinkedList();
/* this time we do not need to take down the level of the
* tree, so simply set a 0 to the last parameter. */
Mule combined = new Mule(mule.parent, mule.child, 0);
queue.offer(combined);
while (!queue.isEmpty()) {
Mule get = queue.poll();
Expression parent = get.parent;
Expression child = get.child;
/* based on the code above, the stack only have the expression
* which they are multi operators. so safely convert them. */
MultipleExpression children = (MultipleExpression) child;
int index = 0;
MultiAndExpression and = null;
/* find the children that the child is an multi and operator. */
for (; index < children.size(); index++) {
if (children.getChild(index) instanceof MultiAndExpression) {
and = (MultiAndExpression) children.getChild(index);
break;
}
}
if (index == children.size() || and == null) {
continue;
}
children.removeChild(index);
MultipleExpression parents = (MultipleExpression) parent;
List list = new ArrayList();
MultiAndExpression newand = new MultiAndExpression(list);
parents.setChild(parents.getIndex(children), newand);
for (int i = 0; i < and.size(); i++) {
Expression temp = clone.shallowCopy(children);
MultipleExpression mtemp = (MultipleExpression) temp;
mtemp.addChild(mtemp.size(), and.getChild(i));
newand.addChild(i, mtemp);
queue.offer(new Mule(newand, mtemp, 0));
}
}
}
}
/**
* This is the final step of the CNF conversion: now we have the Expression tree that has one
* multiple and expression with a list of multiple or expression as the child. So we need to
* convert the multiple expression back to the binary counterparts. Note the converted tree is
* left inclined. Also I attach a parenthesis node before the or expression that is attached to
* the and expression to make the generated result resembles the CNF form.
*/
private void changeBack() {
if (!(root instanceof MultiAndExpression)) {
return;
}
MultipleExpression temp = (MultipleExpression) root;
for (int i = 0; i < temp.size(); i++) {
temp.setChild(i, clone.changeBack(true, temp.getChild(i)));
}
root = clone.changeBack(false, temp);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy