All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.almondtools.rexlex.pattern.CharClassNormalizer Maven / Gradle / Ivy

Go to download

Regular expression matchers, searcher, lexers based on deterministic finite automata

There is a newer version: 0.3.3
Show newest version
package com.almondtools.rexlex.pattern;

import static com.almondtools.rexlex.pattern.Pattern.ProCharNode.compact;

import java.util.ArrayList;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;

import com.almondtools.rexlex.pattern.Pattern.AlternativesNode;
import com.almondtools.rexlex.pattern.Pattern.BranchNode;
import com.almondtools.rexlex.pattern.Pattern.CharNode;
import com.almondtools.rexlex.pattern.Pattern.ComplementNode;
import com.almondtools.rexlex.pattern.Pattern.ConcatNode;
import com.almondtools.rexlex.pattern.Pattern.ConjunctiveNode;
import com.almondtools.rexlex.pattern.Pattern.DelegatorNode;
import com.almondtools.rexlex.pattern.Pattern.EmptyNode;
import com.almondtools.rexlex.pattern.Pattern.GroupNode;
import com.almondtools.rexlex.pattern.Pattern.LoopNode;
import com.almondtools.rexlex.pattern.Pattern.OptionalNode;
import com.almondtools.rexlex.pattern.Pattern.PatternNode;
import com.almondtools.rexlex.pattern.Pattern.PatternNodeVisitor;
import com.almondtools.rexlex.pattern.Pattern.ProCharNode;
import com.almondtools.rexlex.pattern.Pattern.RangeCharNode;
import com.almondtools.rexlex.pattern.Pattern.SingleCharNode;
import com.almondtools.rexlex.pattern.Pattern.StringNode;
import com.almondtools.util.collections.ArrayLists;
import com.almondtools.util.collections.Predicates;

public class CharClassNormalizer implements PatternNodeVisitor {

	@Override
	public PatternNode visitAlternative(AlternativesNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitConjunctive(ConjunctiveNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitConcat(ConcatNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitLoop(LoopNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitOptional(OptionalNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitComplement(ComplementNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitProChar(ProCharNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitRangeChar(RangeCharNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitSingleChar(SingleCharNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitString(StringNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitEmpty(EmptyNode node) {
		return visit(node);
	}

	@Override
	public PatternNode visitGroup(GroupNode node) {
		return visit(node);
	}

	private PatternNode visit(PatternNode node) {
		WithState visitor = new WithState();
		PatternNode newNode = node.apply(visitor);
		return visitor.normalize(newNode);
	}

	private static class WithState implements PatternNodeVisitor {
		
		private Map parents;

		public WithState() {
			parents = new IdentityHashMap();
		}

		public PatternNode normalize(PatternNode node) {
			List charClasses = new ArrayList(parents.keySet());
			for (int i = 0; i < charClasses.size(); i++) {
				ProCharNode proCharI = charClasses.get(i);
				for (int j = i; j < charClasses.size(); j++) {
					ProCharNode proCharJ = charClasses.get(j);
					unify(proCharI, proCharJ);
				}
			}
			return node;
		}

		private void replace(ProCharNode c, ProCharNode... replacement) {
			PatternNode parent = parents.remove(c);
			List newnodes = listOf(replacement);
			AlternativesNode splitCharClass = new AlternativesNode(newnodes);
			if (parent instanceof DelegatorNode) {
				((DelegatorNode) parent).replaceSubNode(c, splitCharClass);
			} else if (parent instanceof BranchNode) {
				((BranchNode) parent).replaceSubNode(c, splitCharClass);
			}
			for (ProCharNode node : newnodes) {
				parents.put(node, splitCharClass);
			}
		}

		private ArrayList listOf(ProCharNode... replacement) {
			return ArrayLists.of(Predicates.notNull(), replacement);
		}

		private void unify(ProCharNode proCharI, ProCharNode proCharJ) {
			ProCharNode intersectionIJ = proCharI.intersect(proCharJ);
			if (intersectionIJ == null) {
				return;
			}

			ProCharNode remainderI = proCharI.minus(intersectionIJ);
			ProCharNode remainderJ = proCharJ.minus(intersectionIJ);

			replace(proCharI, intersectionIJ, remainderI);
			replace(proCharJ, intersectionIJ, remainderJ);
		}

		@Override
		public PatternNode visitAlternative(AlternativesNode node) {
			List oldNodes = node.getSubNodes();
			List subNodes = apply(oldNodes);
			for (PatternNode subnode : subNodes) {
				if (subnode instanceof ProCharNode) {
					addCharNode((ProCharNode) subnode, node);
				}
			}
			if (!oldNodes.equals(subNodes)) {
				return new AlternativesNode(subNodes);
			} else {
				return node;
			}
		}

		@Override
		public PatternNode visitConjunctive(ConjunctiveNode node) {
			throw new UnsupportedOperationException();
		}

		@Override
		public PatternNode visitConcat(ConcatNode node) {
			List oldNodes = node.getSubNodes();
			List subNodes = apply(oldNodes);
			for (PatternNode subnode : subNodes) {
				if (subnode instanceof ProCharNode) {
					addCharNode((ProCharNode) subnode, node);
				}
			}
			if (!oldNodes.equals(subNodes)) {
				return new ConcatNode(subNodes);
			} else {
				return node;
			}
		}

		@Override
		public PatternNode visitLoop(LoopNode node) {
			PatternNode oldNode = node.getSubNode();
			PatternNode subNode = oldNode.apply(this);
			if (subNode instanceof ProCharNode) {
				addCharNode((ProCharNode) subNode, node);
			}
			if (subNode != oldNode) {
				return new LoopNode(subNode, node.getFrom(), node.getTo());
			} else {
				return node;
			}
		}

		@Override
		public PatternNode visitOptional(OptionalNode node) {
			PatternNode oldNode = node.getSubNode();
			PatternNode subNode = oldNode.apply(this);
			if (subNode instanceof ProCharNode) {
				addCharNode((ProCharNode) subNode, node);
			}
			if (subNode != oldNode) {
				return new OptionalNode(subNode);
			} else {
				return node;
			}
		}

		@Override
		public PatternNode visitComplement(ComplementNode node) {
			throw new UnsupportedOperationException();
		}

		@Override
		public PatternNode visitProChar(ProCharNode node) {
			List charNodes = node.toCharNodes();
			return new AlternativesNode(compact(charNodes));
		}

		@Override
		public PatternNode visitRangeChar(RangeCharNode node) {
			return node;
		}

		@Override
		public PatternNode visitSingleChar(SingleCharNode node) {
			return node;
		}

		@Override
		public PatternNode visitString(StringNode node) {
			for (SingleCharNode subnode : node.toChars()) {
				addCharNode(subnode, node);
			}
			return node;
		}

		@Override
		public PatternNode visitEmpty(EmptyNode node) {
			return node;
		}

		@Override
		public PatternNode visitGroup(GroupNode node) {
			PatternNode subnode = node.getSubNode().apply(this);
			return subnode;
		}

		private void addCharNode(ProCharNode child, PatternNode parent) {
			parents.put(child, parent);
		}

		private List apply(List nodes) {
			List as = new ArrayList(nodes.size());
			for (PatternNode node : nodes) {
				as.add(node.apply(this));
			}
			return as;
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy