net.sf.jsimpletools.utils.StringGenerator Maven / Gradle / Ivy
Show all versions of jsimpletools-core Show documentation
/*
* #%L
* jSimpleTools
* %%
* Copyright (C) 2011 - 2015 Eric-Karl Matteau
* %%
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public
* License along with this program. If not, see
* .
* #L%
*/
package net.sf.jsimpletools.utils;
import java.util.Collections;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import net.sf.jsimpletools.Errors;
import net.sf.jsimpletools.SimpleTestToolsException;
/**
* Random string generator. Creates a random string from provided regular expressions.
*
* This class provides basic functionality to generate random strings from a regular expression.
* It features a simplistic (an incomplete) regex parser. The basic design rules are:
*
* - Generated string is guaranteed to "match" the regex or to throw an exception.
* - Only basic (and deemed useful) regex features are supported.
* - Generated strings are guaranteed unique for a specific generator instance.
* - Generator is thread safe, uniqueness is guaranteed between threads.
*
* To use the generator, simply create an instance with one of the create static methods. The
* constructor is private and cannot be called directly. The basic create() method should be
* enough for most needs. Strings can then be generated by the randomFromRegex()
* method (a more or less complete explaining of what is supported is described a little further
* down). You can easily create a string with the random method (a simple 15 to 20 characters
* long random string) or the randomAlphabetic(int) method that will give a alphabetic string
* (mixed case, with first letter always a capital) of requested length.
*
* The way that is currently used to ensure uniqueness is to simply generate strings until one
* that has not been yet generated (an internal map is used to keep track). If a regex with very
* little variation is used over and over again, there might be slight performance degradation
* or it might fail outright (the generator will give up after a certain number of attempts are
* made).
*
* The regular expressions that are supported need to remain pretty simple yet most of the basic
* things are supported.
*
* - What is NOT supported:
*
* - Groups are not supported. Parentheses will end up unchanged in the result.
* - Boundary matchers ("^$\b\B\A\G\Z\z") are not supported.
* - Escape boundaries ("\Q\E") are not supported.
* - Any logic that would require backtracking (except repetitions).
*
* - Any single character will end up unchanged in the result string.
* - Dot '.' character will generate any character between ! and ~ in the UTF-8 mapping plus
* the space.
* - Character classes (square brackets) are supported with the exception of escaped
* characters in the class.
*
* - Example, "[0-9]" is supported, but "[783\]]" will fail.
* - To add a backslash, escape it normally (it will be twice in the class, no biggie).
* - Place the caret '^' anywhere but at the beginning.
* - Place the dash '-' at the end.
* - Place the closing bracket ']' at the beginning (or just after the caret). ex: "[]$/]" or
* "[^]a-z]"
* - All characters have an equal chance of occurring (except the backslash with the previous
* trick).
*
*
* - Negated character classes are supported and will generate with what is available from the
* dot character (above) minus what is in the class.
* - Shorthand character classes are supported (\W, \w, \S, \s, \D and \d). \s will only
* generate a space.
* - Escaped characters will be unchanged in the result string.
* - Repetitions are mostly supported but, since groups are not supported they can only apply
* to single characters or character classes.
*
* - * will generate between 0 and 10 repetitions.
* - + will generate between 1 and 10 repetitions.
* - ? has a 50-50 chance of generating the preceding character.
* - {x} will generate exactly x repetitions.
* - {x,} will generate between x and x+5 repetitions.
* - {x,y} will generate between x and y repetitions.
* - Reluctant and Possessive quantifiers are not supported (behavior undefined).
*
*
*
*/
public class StringGenerator {
private static int MAX_TRY_COUNT_FOR_DUPLICATES = 50;
private static StringGeneratorPatternCache patternCache = new StringGeneratorPatternCache();
private Random javaRandom;
private Set allGenerated = Collections.synchronizedSet(new HashSet());
/**
* Factory method. Creates a StringGenerator instance with a predictable and repeatable
* generation pattern. Internal java.util.Random object initialized with default seed (0).
*/
public static StringGenerator create() {
return createWithSeed(0L);
}
/**
* Factory method. Creates a StringGenerator instance with a random generation pattern.
* Internal java.util.Random object initialized with a random seed.
*/
public static StringGenerator createWithRandomSeed() {
return new StringGenerator(new Random());
}
/**
* Factory method. Creates a StringGenerator instance with a predictable and repeatable
* generation pattern. Internal java.util.Random object initialized with provided seed.
*/
public static StringGenerator createWithSeed(long seed) {
return new StringGenerator(new Random(seed));
}
private StringGenerator(Random random) {
javaRandom = random;
}
/**
* Returns a random string between 15 to 20 characters in length. This is a convenience
* method instead of calling randomFromRegex(".{15,20}");
*
* @see StringGenerator#randomFromRegex(String);
* @see StringGenerator
*/
public String random() {
return randomFromRegex(".{15,20}");
}
/**
* Returns a random alphabetic string without whitespace of provided length. This is a
* convenience method instead of calling
* randomFromRegex("[A-Z][a-zA-Z]{length-1}");
*
* @param length
* The length (one or greater) of the string to generate.
* @see StringGenerator#randomFromRegex(String)
* @see StringGenerator
*/
public String randomAlphabetic(int length) {
if (length < 1) {
throw new IllegalArgumentException("length must be one or greater.");
}
return randomFromRegex("[A-Z][a-zA-Z]{" + (length - 1) + "}");
}
/**
* Returns a random string matching the provided regular expression. Regular expression must
* be valid and not all valid expressions are supported. The string is guaranteed to be a
* match for the regex though and the method will fail should that post-condition fail.
*
* More details in class comment.
*
* @see StringGenerator
* @throws SimpleTestToolsException
* if either the generated string does not match the requested regular
* expression (due to unsupported feature) or if the generator is unable to
* generate a unique string after a set number of attempts.
*/
public String randomFromRegex(String regex) {
StringGeneratorPattern generatorPattern = patternCache.getOrCreate(regex);
for (int i = 0; i < MAX_TRY_COUNT_FOR_DUPLICATES; i++) {
String generated = generate(generatorPattern);
if (allGenerated.add(generated)) {
generatorPattern.validate(generated);
return generated;
}
}
throw Errors.FAILED_GENERATING_UNIQUE_STRING.exception(MAX_TRY_COUNT_FOR_DUPLICATES, regex);
}
private String generate(StringGeneratorPattern pattern) {
StringBuilder generated = new StringBuilder();
StringGeneratorNode node = pattern.getHeadNode();
while (node != null) {
int repeatCount = getRepeatCount(node);
if (node.getValueSize() > 0) {
for (int i = 0; i < repeatCount; i++) {
int nextValueIndex = javaRandom.nextInt(node.getValueSize());
generated.append(node.getValueAt(nextValueIndex));
}
}
node = node.getNext();
}
return generated.toString();
}
private int getRepeatCount(StringGeneratorNode node) {
int min = node.getMinRepeat();
int max = node.getMaxRepeat();
return javaRandom.nextInt(max - min + 1) + min;
}
}