squidpony.WeightedLetterNamegen Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of squidlib-util Show documentation
Show all versions of squidlib-util Show documentation
SquidLib platform-independent logic and utility code. Please refer to
https://github.com/SquidPony/SquidLib .
package squidpony;
import regexodus.Category;
import squidpony.annotation.Beta;
import squidpony.squidmath.GWTRNG;
import squidpony.squidmath.IStatefulRNG;
import squidpony.squidmath.ProbabilityTable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Set;
/**
* Based on work by Nolithius available at the following two sites
* https://github.com/Nolithius/weighted-letter-namegen
* http://code.google.com/p/weighted-letter-namegen/
*
* @author Eben Howard - http://squidpony.com - [email protected]
*/
@Beta
public class WeightedLetterNamegen {
//
public static final String[] VIKING_STYLE_NAMES = new String[]{
"Andor",
"Baatar",
"Beowulf",
"Drogo",
"Freya",
"Grog",
"Gruumsh",
"Grunt",
"Hodor",
"Hrothgar",
"Hrun",
"Korg",
"Lothar",
"Odin",
"Theodrin",
"Thor",
"Yngvar",
"Xandor"
};
//
//
public static final String[] STAR_WARS_STYLE_NAMES = new String[]{
"Lutoif Vap",
"Nasoi Seert",
"Jitpai",
"Sose",
"Vainau",
"Jairkau",
"Tirka Kist",
"Boush",
"Wofe",
"Voxin Voges",
"Koux Boiti",
"Loim",
"Gaungu",
"Mut Tep",
"Foimo Saispi",
"Toneeg Vaiba",
"Nix Nast",
"Gup Dangisp",
"Distark Toonausp",
"Tex Brinki",
"Kat Tosha",
"Tauna Foip",
"Frip Cex",
"Fexa Lun",
"Tafa",
"Zeesheerk",
"Cremoim Kixoop",
"Tago",
"Kesha Diplo"
};
//
//
public static final String[] COMMON_USA_MALE_NAMES = new String[]{
"James",
"John",
"Robert",
"Michael",
"William",
"David",
"Richard",
"Charles",
"Joseph",
"Tomas",
"Christopher",
"Daniel",
"Paul",
"Mark",
"Donald",
"George",
"Kenneth",
"Steven",
"Edward",
"Brian",
"Ronald",
"Anthony",
"Kevin",
"Jason",
"Matthew",
"Gary",
"Timothy",
"Jose",
"Larry",
"Jeffrey",
"Frank",
"Scott",
"Eric",
"Stephen",
"Andrew",
"Raymond",
"Gregory",
"Joshua",
"Jerry",
"Dennis",
"Walter",
"Patrick",
"Peter",
"Harold",
"Douglas",
"Henry",
"Carl",
"Arthur",
"Ryan",
"Roger"
};
//
//
public static final String[] COMMON_USA_FEMALE_NAMES = new String[]{
"Mary",
"Patricia",
"Linda",
"Barbara",
"Elizabeth",
"Jennifer",
"Maria",
"Susan",
"Margaret",
"Dorothy",
"Lisa",
"Nancy",
"Karen",
"Betty",
"Helen",
"Sandra",
"Donna",
"Carol",
"Ruth",
"Sharon",
"Michelle",
"Laura",
"Sarah",
"Kimberly",
"Deborah",
"Jessica",
"Shirley",
"Cynthia",
"Angela",
"Melissa",
"Brenda",
"Amy",
"Anna",
"Crystal",
"Virginia",
"Kathleen",
"Pamela",
"Martha",
"Becky",
"Amanda",
"Stephanie",
"Carolyn",
"Christine",
"Marie",
"Janet",
"Catherine",
"Frances",
"Ann",
"Joyce",
"Diane",
"Jane",
"Shauna",
"Trisha",
"Eileen",
"Danielle",
"Jacquelyn",
"Lynn",
"Hannah",
"Brittany"
};
//
//
public static final String[] COMMON_USA_LAST_NAMES = new String[]{
"Smith",
"Johnson",
"Williams",
"Brown",
"Jones",
"Miller",
"Davis",
"Wilson",
"Anderson",
"Taylor",
"Thomas",
"Moore",
"Martin",
"Jackson",
"Thompson",
"White",
"Clark",
"Lewis",
"Robinson",
"Walker",
"Willis",
"Carter",
"King",
"Lee",
"Grant",
"Howard",
"Morris",
"Bartlett",
"Paine",
"Wayne",
"Lorraine"
};
//
//
public static final String[] LOVECRAFT_MYTHOS_NAMES = new String[]{
"Koth",
"Ghlatelilt",
"Siarlut",
"Nyogongogg",
"Nyialan",
"Nyithiark",
"Lyun",
"Kethoshigr",
"Shobik",
"Tekogr",
"Hru-yn",
"Lya-ehibos",
"Hruna-oma-ult",
"Shabo'en",
"Shrashangal",
"Shukhaniark",
"Thaghum",
"Shrilang",
"Lukhungu'ith",
"Nyun",
"Nyia-ongin",
"Shogia-usun",
"Lyu-yl",
"Liathiagragr",
"Lyathagg",
"Hri'osurkut",
"Shothegh",
"No-orleshigh",
"Zvriangekh",
"Nyesashiv",
"Lyarkio",
"Le'akh",
"Liashi-en",
"Shurkano'um",
"Hrakhanoth",
"Ghlotsuban",
"Cthitughias",
"Ftanugh"
};
//
private static final char[] vowels = {'a', 'e', 'i', 'o'};//not using y because it looks strange as a vowel in names
private static final int LAST_LETTER_CANDIDATES_MAX = 52;
private IStatefulRNG rng;
private String[] names;
private int consonantLimit;
private ArrayList sizes;
private HashMap>> letters;
private ArrayList firstLetterSamples;
private ArrayList lastLetterSamples;
private DamerauLevenshteinAlgorithm dla = new DamerauLevenshteinAlgorithm(1, 1, 1, 1);
/**
* Creates the generator by seeding the provided list of names.
*
* @param names an array of Strings that are typical names to be emulated
*/
public WeightedLetterNamegen(String[] names) {
this(names, 2);
}
/**
* Creates the generator by seeding the provided list of names.
*
* @param names an array of Strings that are typical names to be emulated
* @param consonantLimit the maximum allowed consonants in a row
*/
public WeightedLetterNamegen(String[] names, int consonantLimit) {
this(names, consonantLimit, new GWTRNG());
}
/**
* Creates the generator by seeding the provided list of names. The given RNG will be used for
* all random decisions this has to make, so if it has the same state (and RandomnessSource) on
* different runs through the program, it will produce the same names reliably.
*
* @param names an array of Strings that are typical names to be emulated
* @param consonantLimit the maximum allowed consonants in a row
* @param rng the source of randomness to be used
*/
public WeightedLetterNamegen(String[] names, int consonantLimit, IStatefulRNG rng) {
this.names = names;
this.consonantLimit = consonantLimit;
this.rng = rng;
init();
}
/**
* Initialization, statistically measures letter likelihood.
*/
private void init() {
sizes = new ArrayList<>();
letters = new HashMap<>();
firstLetterSamples = new ArrayList<>();
lastLetterSamples = new ArrayList<>();
for (int i = 0; i < names.length - 1; i++) {
String name = names[i];
if (name == null || name.length() < 1) {
continue;
}
// (1) Insert size
sizes.add(name.length());
// (2) Grab first letter
firstLetterSamples.add(name.charAt(0));
// (3) Grab last letter
lastLetterSamples.add(name.charAt(name.length() - 1));
// (4) Process all letters
for (int n = 0; n < name.length() - 1; n++) {
char letter = name.charAt(n);
char nextLetter = name.charAt(n + 1);
// Create letter if it doesn't exist
HashMap> wl = letters.get(letter);
if (wl == null) {
wl = new HashMap<>();
letters.put(letter, wl);
}
ProbabilityTable wlg = wl.get(letter);
if (wlg == null) {
wlg = new ProbabilityTable<>(rng.getState());
wl.put(letter, wlg);
}
wlg.add(nextLetter, 1);
// If letter was uppercase (beginning of name), also add a lowercase entry
if (Category.Lu.contains(letter)) {
letter = Character.toLowerCase(letter);
wlg = wl.get(letter);
if (wlg == null) {
wlg = new ProbabilityTable<>(rng.getState());
wl.put(letter, wlg);
}
wlg.add(nextLetter, 1);
}
}
}
}
private StringBuilder generateInner(StringBuilder name) {
for (int runs = 0; runs < LAST_LETTER_CANDIDATES_MAX; runs++) {
name.setLength(0);
// Pick size
int size = rng.getRandomElement(sizes);
// Pick first letter
char latest = rng.getRandomElement(firstLetterSamples);
name.append(latest);
for (int i = 1; i < size - 2; i++) {
name.append(latest = getRandomNextLetter(latest));
}
// Attempt to find a last letter
for (int lastLetterFits = 0; lastLetterFits < LAST_LETTER_CANDIDATES_MAX; lastLetterFits++) {
char lastLetter = rng.getRandomElement(lastLetterSamples);
char intermediateLetterCandidate = getIntermediateLetter(latest, lastLetter);
// Only attach last letter if the candidate is valid (if no candidate, the antepenultimate letter always occurs at the end)
if (Category.L.contains(intermediateLetterCandidate)) {
name.append(intermediateLetterCandidate).append(lastLetter);
break;
}
}
// Check that the word has no triple letter sequences, and that the Levenshtein distance is kosher
if (validateGrouping(name) && checkLevenshtein(name)) {
return name;
}
}
name.setLength(0);
return name.append(rng.getRandomElement(names));
}
/**
* Gets one random String name.
*
* @return a single random String name
*/
public String generate() {
return generateInner(new StringBuilder(32)).toString();
}
/**
* Gets an ArrayList of random String names, sized to match amountToGenerate.
* @param amountToGenerate how many String items to include in the returned ArrayList
* @return an ArrayList of random String names
*/
public ArrayList generateList(int amountToGenerate) {
ArrayList result = new ArrayList<>();
StringBuilder name = new StringBuilder(32);
for (int i = 0; i < amountToGenerate; i++) {
result.add(generateInner(name).toString());
}
return result;
}
/**
* Gets an array of random String names, sized to match amountToGenerate.
*
* @param amountToGenerate how many String items to include in the returned array
* @return an array of random String names
*/
public String[] generate(int amountToGenerate)
{
return generateList(amountToGenerate).toArray(new String[0]);
}
/**
* Searches for the best fit letter between the letter before and the letter
* after (non-random). Used to determine penultimate letters in names.
*
* @param letterBefore The letter before the desired letter.
* @param letterAfter The letter after the desired letter.
* @return The best fit letter between the provided letters.
*/
private char getIntermediateLetter(char letterBefore, char letterAfter) {
if (Category.L.contains(letterBefore) && Category.L.contains(letterAfter)) {
// First grab all letters that come after the 'letterBefore'
HashMap> wl = letters.get(letterBefore);
if (wl == null) {
return getRandomNextLetter(letterBefore);
}
Set letterCandidates = wl.get(letterBefore).items();
char bestFitLetter = '\'';
int bestFitScore = 0;
// Step through candidates, and return best scoring letter
for (char letter : letterCandidates) {
wl = letters.get(letter);
if (wl == null) {
continue;
}
ProbabilityTable weightedLetterGroup = wl.get(letterBefore);
if (weightedLetterGroup != null) {
int letterCounter = weightedLetterGroup.weight(letterAfter);
if (letterCounter > bestFitScore) {
bestFitLetter = letter;
bestFitScore = letterCounter;
}
}
}
return bestFitLetter;
} else {
return '-';
}
}
/**
* Checks that no three letters happen in succession.
*
* @param name The name CharSequence
* @return True if no triple letter sequence is found.
*/
private boolean validateGrouping(CharSequence name) {
for (int i = 2; i < name.length(); i++) {
if (name.charAt(i) == name.charAt(i - 1) && name.charAt(i) == name.charAt(i - 2)) {
return false;
}
}
int consonants = 0;
for (int i = 0; i < name.length(); i++) {
if (isVowel(name.charAt(i))) {
consonants = 0;
} else {
if (++consonants > consonantLimit) {
return false;
}
}
}
return true;
}
private boolean isVowel(char c) {
switch(c)
{
case 'a':
case 'e':
case 'i':
case 'o':
case 'u':
return true;
default:
return false;
}
}
/**
* Checks that the Damerau-Levenshtein distance of this name is within a
* given bias from a name on the master list.
*
* @param name The name string.
* @return True if a name is found that is within the bias.
*/
private boolean checkLevenshtein(CharSequence name) {
int levenshteinBias = name.length() / 2;
for (String name1 : names) {
int levenshteinDistance = dla.execute(name, name1);
if (levenshteinDistance <= levenshteinBias) {
return true;
}
}
return false;
}
private char getRandomNextLetter(char letter) {
if (letters.containsKey(letter)) {
return letters.get(letter).get(letter).random();
} else {
return vowels[rng.next(2)]; // 2 bits, so ranging from 0 to 3
}
}
}