All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.jsimpletools.utils.StringGenerator Maven / Gradle / Ivy

There is a newer version: 0.3.1
Show newest version
/*
 * #%L
 * jSimpleTools
 * %%
 * Copyright (C) 2011 - 2015 Eric-Karl Matteau 
 * %%
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 * 
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 * 
 * You should have received a copy of the GNU General Public
 * License along with this program.  If not, see
 * .
 * #L%
 */
package net.sf.jsimpletools.utils;

import java.util.Collections;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;

import net.sf.jsimpletools.Errors;
import net.sf.jsimpletools.SimpleTestToolsException;

/**
 * Random string generator. Creates a random string from provided regular expressions.
 * 

* This class provides basic functionality to generate random strings from a regular expression. * It features a simplistic (an incomplete) regex parser. The basic design rules are: *

    *
  • Generated string is guaranteed to "match" the regex or to throw an exception.
  • *
  • Only basic (and deemed useful) regex features are supported.
  • *
  • Generated strings are guaranteed unique for a specific generator instance.
  • *
  • Generator is thread safe, uniqueness is guaranteed between threads.
  • *
* To use the generator, simply create an instance with one of the create static methods. The * constructor is private and cannot be called directly. The basic create() method should be * enough for most needs. Strings can then be generated by the randomFromRegex() * method (a more or less complete explaining of what is supported is described a little further * down). You can easily create a string with the random method (a simple 15 to 20 characters * long random string) or the randomAlphabetic(int) method that will give a alphabetic string * (mixed case, with first letter always a capital) of requested length. *

* The way that is currently used to ensure uniqueness is to simply generate strings until one * that has not been yet generated (an internal map is used to keep track). If a regex with very * little variation is used over and over again, there might be slight performance degradation * or it might fail outright (the generator will give up after a certain number of attempts are * made). *

* The regular expressions that are supported need to remain pretty simple yet most of the basic * things are supported. *

    *
  • What is NOT supported: *
      *
    • Groups are not supported. Parentheses will end up unchanged in the result.
    • *
    • Boundary matchers ("^$\b\B\A\G\Z\z") are not supported.
    • *
    • Escape boundaries ("\Q\E") are not supported.
    • *
    • Any logic that would require backtracking (except repetitions). *
    *
  • Any single character will end up unchanged in the result string.
  • *
  • Dot '.' character will generate any character between ! and ~ in the UTF-8 mapping plus * the space.
  • *
  • Character classes (square brackets) are supported with the exception of escaped * characters in the class. *
      *
    • Example, "[0-9]" is supported, but "[783\]]" will fail.
    • *
    • To add a backslash, escape it normally (it will be twice in the class, no biggie).
    • *
    • Place the caret '^' anywhere but at the beginning.
    • *
    • Place the dash '-' at the end.
    • *
    • Place the closing bracket ']' at the beginning (or just after the caret). ex: "[]$/]" or * "[^]a-z]"
    • *
    • All characters have an equal chance of occurring (except the backslash with the previous * trick).
    • *
    *
  • *
  • Negated character classes are supported and will generate with what is available from the * dot character (above) minus what is in the class.
  • *
  • Shorthand character classes are supported (\W, \w, \S, \s, \D and \d). \s will only * generate a space.
  • *
  • Escaped characters will be unchanged in the result string.
  • *
  • Repetitions are mostly supported but, since groups are not supported they can only apply * to single characters or character classes. *
      *
    • * will generate between 0 and 10 repetitions.
    • *
    • + will generate between 1 and 10 repetitions.
    • *
    • ? has a 50-50 chance of generating the preceding character.
    • *
    • {x} will generate exactly x repetitions.
    • *
    • {x,} will generate between x and x+5 repetitions.
    • *
    • {x,y} will generate between x and y repetitions.
    • *
    • Reluctant and Possessive quantifiers are not supported (behavior undefined).
    • *
    *
  • *
*/ public class StringGenerator { private static int MAX_TRY_COUNT_FOR_DUPLICATES = 50; private static StringGeneratorPatternCache patternCache = new StringGeneratorPatternCache(); private Random javaRandom; private Set allGenerated = Collections.synchronizedSet(new HashSet()); /** * Factory method. Creates a StringGenerator instance with a predictable and repeatable * generation pattern. Internal java.util.Random object initialized with default seed (0). */ public static StringGenerator create() { return createWithSeed(0L); } /** * Factory method. Creates a StringGenerator instance with a random generation pattern. * Internal java.util.Random object initialized with a random seed. */ public static StringGenerator createWithRandomSeed() { return new StringGenerator(new Random()); } /** * Factory method. Creates a StringGenerator instance with a predictable and repeatable * generation pattern. Internal java.util.Random object initialized with provided seed. */ public static StringGenerator createWithSeed(long seed) { return new StringGenerator(new Random(seed)); } private StringGenerator(Random random) { javaRandom = random; } /** * Returns a random string between 15 to 20 characters in length. This is a convenience * method instead of calling randomFromRegex(".{15,20}"); * * @see StringGenerator#randomFromRegex(String); * @see StringGenerator */ public String random() { return randomFromRegex(".{15,20}"); } /** * Returns a random alphabetic string without whitespace of provided length. This is a * convenience method instead of calling * randomFromRegex("[A-Z][a-zA-Z]{length-1}"); * * @param length * The length (one or greater) of the string to generate. * @see StringGenerator#randomFromRegex(String) * @see StringGenerator */ public String randomAlphabetic(int length) { if (length < 1) { throw new IllegalArgumentException("length must be one or greater."); } return randomFromRegex("[A-Z][a-zA-Z]{" + (length - 1) + "}"); } /** * Returns a random string matching the provided regular expression. Regular expression must * be valid and not all valid expressions are supported. The string is guaranteed to be a * match for the regex though and the method will fail should that post-condition fail. *

* More details in class comment. * * @see StringGenerator * @throws SimpleTestToolsException * if either the generated string does not match the requested regular * expression (due to unsupported feature) or if the generator is unable to * generate a unique string after a set number of attempts. */ public String randomFromRegex(String regex) { StringGeneratorPattern generatorPattern = patternCache.getOrCreate(regex); for (int i = 0; i < MAX_TRY_COUNT_FOR_DUPLICATES; i++) { String generated = generate(generatorPattern); if (allGenerated.add(generated)) { generatorPattern.validate(generated); return generated; } } throw Errors.FAILED_GENERATING_UNIQUE_STRING.exception(MAX_TRY_COUNT_FOR_DUPLICATES, regex); } private String generate(StringGeneratorPattern pattern) { StringBuilder generated = new StringBuilder(); StringGeneratorNode node = pattern.getHeadNode(); while (node != null) { int repeatCount = getRepeatCount(node); if (node.getValueSize() > 0) { for (int i = 0; i < repeatCount; i++) { int nextValueIndex = javaRandom.nextInt(node.getValueSize()); generated.append(node.getValueAt(nextValueIndex)); } } node = node.getNext(); } return generated.toString(); } private int getRepeatCount(StringGeneratorNode node) { int min = node.getMinRepeat(); int max = node.getMaxRepeat(); return javaRandom.nextInt(max - min + 1) + min; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy