
com.mifmif.common.regex.Generex Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of generex Show documentation
Show all versions of generex Show documentation
Generex Library for Strings generation based on RegExp
The newest version!
/**
* Copyright 2014 y.mifrah
*
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.mifmif.common.regex;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Random;
import com.mifmif.common.regex.util.Iterable;
import com.mifmif.common.regex.util.Iterator;
import dk.brics.automaton.Automaton;
import dk.brics.automaton.RegExp;
import dk.brics.automaton.State;
import dk.brics.automaton.Transition;
/**
* A Java utility class that help generating string values that match a given
* regular expression.It generate all values that are matched by the Regex, a
* random value, or you can generate only a specific string based on it's
* lexicographical order .
*
* @author y.mifrah
*
*/
public class Generex implements Iterable {
private Map predefinedCharacterClasses = new HashMap() {
private static final long serialVersionUID = 1L;
{
put("\\\\d","[0-9]");
put("\\\\D","[^0-9]");
put("\\\\s","[ \t\n\f\r]");
put("\\\\S","[^ \t\n\f\r]");
put("\\\\w","[a-zA-Z_0-9]");
put("\\\\W","[^a-zA-Z_0-9]");
}
};
public Generex(String regex) {
for (String key : predefinedCharacterClasses.keySet()) {
regex = regex.replaceAll(key, predefinedCharacterClasses.get(key));
}
regExp = new RegExp(regex);
automaton = regExp.toAutomaton();
}
public Generex(Automaton automaton) {
this.automaton = automaton;
}
private RegExp regExp;
private Automaton automaton;
private List matchedStrings = new ArrayList();
private Node rootNode;
private boolean isTransactionNodeBuilt;
/**
* @param indexOrder
* ( 1<= indexOrder <=n)
* @return The matched string by the given pattern in the given it's order
* in the sorted list of matched String.
* indexOrder
between 1 and n
where
* n
is the number of matched String.
* If indexOrder >= n , return an empty string. if there is an
* infinite number of String that matches the given Regex, the
* method throws {@code StackOverflowError}
*/
public String getMatchedString(int indexOrder) {
buildRootNode();
if (indexOrder == 0)
indexOrder = 1;
String result = buildStringFromNode(rootNode, indexOrder);
result = result.substring(1, result.length() - 1);
return result;
}
private String buildStringFromNode(Node node, int indexOrder) {
String result = "";
long passedStringNbr = 0;
long step = node.getNbrMatchedString() / node.getNbrChar();
for (char usedChar = node.getMinChar(); usedChar <= node.getMaxChar(); ++usedChar) {
passedStringNbr += step;
if (passedStringNbr >= indexOrder) {
passedStringNbr -= step;
indexOrder -= passedStringNbr;
result = result.concat("" + usedChar);
break;
}
}
long passedStringNbrInChildNode = 0;
if (result.length() == 0)
passedStringNbrInChildNode = passedStringNbr;
for (Node childN : node.getNextNodes()) {
passedStringNbrInChildNode += childN.getNbrMatchedString();
if (passedStringNbrInChildNode >= indexOrder) {
passedStringNbrInChildNode -= childN.getNbrMatchedString();
indexOrder -= passedStringNbrInChildNode;
result = result.concat(buildStringFromNode(childN, indexOrder));
break;
}
}
return result;
}
/**
* @return first string in lexicographical order that is matched by the
* given pattern.
*/
public String getFirstMatch() {
buildRootNode();
Node node = rootNode;
String result = "";
while (node.getNextNodes().size() > 0) {
result = result.concat("" + node.getMinChar());
node = node.getNextNodes().get(0);
}
result = result.substring(1);
return result;
}
/**
* @return the number of strings that are matched by the given pattern.
*/
public long matchedStringsSize() {
return rootNode.getNbrMatchedString();
}
/**
* Prepare the rootNode and it's child nodes so that we can get
* matchedString by index
*/
private void buildRootNode() {
if (isTransactionNodeBuilt)
return;
isTransactionNodeBuilt = true;
rootNode = new Node();
rootNode.setNbrChar(1);
List nextNodes = prepareTransactionNodes(automaton.getInitialState());
rootNode.setNextNodes(nextNodes);
rootNode.updateNbrMatchedString();
}
private int matchedStringCounter = 0;
private void generate(String strMatch, State state, int limit) {
if (matchedStringCounter == limit)
return;
++matchedStringCounter;
List transitions = state.getSortedTransitions(true);
if (transitions.size() == 0) {
matchedStrings.add(strMatch);
return;
}
if (state.isAccept()) {
matchedStrings.add(strMatch);
}
for (Transition transition : transitions) {
for (char c = transition.getMin(); c <= transition.getMax(); ++c) {
generate(strMatch + c, transition.getDest(), limit);
}
}
}
/**
* Build list of nodes that present possible transactions from the
* state
.
*
* @param state
* @return
*/
private List prepareTransactionNodes(State state) {
List transactionNodes = new ArrayList();
if (preparedTransactionNode == Integer.MAX_VALUE / 2)
return transactionNodes;
++preparedTransactionNode;
if (state.isAccept()) {
Node acceptedNode = new Node();
acceptedNode.setNbrChar(1);
transactionNodes.add(acceptedNode);
}
List transitions = state.getSortedTransitions(true);
for (Transition transition : transitions) {
Node trsNode = new Node();
int nbrChar = transition.getMax() - transition.getMin() + 1;
trsNode.setNbrChar(nbrChar);
trsNode.setMaxChar(transition.getMax());
trsNode.setMinChar(transition.getMin());
List nextNodes = prepareTransactionNodes(transition.getDest());
trsNode.setNextNodes(nextNodes);
transactionNodes.add(trsNode);
}
return transactionNodes;
}
private int preparedTransactionNode;
/**
* Generate all Strings that matches the given Regex.
*
* @return
*/
public List getAllMatchedStrings() {
matchedStrings = new ArrayList();
generate("", automaton.getInitialState(), Integer.MAX_VALUE);
return matchedStrings;
}
/**
* Generate subList with a size of limit
of Strings that
* matches the given Regex. the Strings are ordered in lexicographical
* order.
*
* @param limit
* @return
*/
public List getMatchedStrings(int limit) {
matchedStrings = new ArrayList();
generate("", automaton.getInitialState(), limit);
return matchedStrings;
}
/**
* Generate and return a random String that match the pattern used in this
* Generex.
*
* @return
*/
public String random() {
return prepareRandom("", automaton.getInitialState(), 1, Integer.MAX_VALUE);
}
/**
* Generate and return a random String that match the pattern used in this
* Generex, and the string has a length >= minLength
*
* @param minLength
* @return
*/
public String random(int minLength) {
return prepareRandom("", automaton.getInitialState(), minLength, Integer.MAX_VALUE);
}
/**
* Generate and return a random String that match the pattern used in this
* Generex, and the string has a length >= minLength
and <=
* maxLength
*
*
* @param minLength
* @param maxLength
* @return
*/
public String random(int minLength, int maxLength) {
return prepareRandom("", automaton.getInitialState(), minLength, maxLength);
}
private String prepareRandom(String strMatch, State state, int minLength, int maxLength) {
List transitions = state.getSortedTransitions(false);
if (state.isAccept()) {
if (strMatch.length() == maxLength) {
return strMatch;
}
if (Math.random() > 0.7 && strMatch.length() >= minLength) {
return strMatch;
}
}
if (transitions.size() == 0) {
return strMatch;
}
Random random = new Random();
Transition randomTransition = transitions.get(random.nextInt(transitions.size()));
int diff = randomTransition.getMax() - randomTransition.getMin();
int randomOffset = diff;
if( diff > 0 ) {
randomOffset = (int) (random.nextInt(diff));
}
char randomChar = (char) (randomOffset + randomTransition.getMin());
return prepareRandom(strMatch + randomChar, randomTransition.getDest(), minLength, maxLength);
}
public Iterator iterator() {
return new GenerexIterator(automaton.getInitialState());
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy