org.apache.commons.codec.language.bm.Rule Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gwt-commons-codec Show documentation
Show all versions of gwt-commons-codec Show documentation
The Apache Commons Codec component contains encoders and decoders for
various formats such as Base16, Base32, Base64, digest, and Hexadecimal. In addition to these
widely used encoders and decoders, the codec package also maintains a
collection of phonetic encoding utilities.
This is a port for GWT, which enables program, to use Apache Commons Codec
also in the frontend compiled by the gwt compiler to java-script.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.commons.codec.language.bm;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;
import org.apache.commons.codec.Resources;
import org.apache.commons.codec.language.bm.Languages.LanguageSet;
import com.google.gwt.core.shared.GwtIncompatible;
import com.google.gwt.regexp.shared.MatchResult;
import com.google.gwt.regexp.shared.RegExp;
/**
* A phoneme rule.
*
* Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply
* and a logical flag indicating if all languages must be in play. A rule matches if:
*
* - the pattern matches at the current position
* - the string up until the beginning of the pattern matches the left context
* - the string from the end of the pattern matches the right context
* - logical is ALL and all languages are in scope; or
* - logical is any other value and at least one language is in scope
*
*
* Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user
* to explicitly construct their own.
*
* Rules are immutable and thread-safe.
*
* Rules resources
*
* Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically
* named following the pattern:
*
org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt
*
* The format of these resources is the following:
*
* - Rules: whitespace separated, double-quoted strings. There should be 4 columns to each row, and these
* will be interpreted as:
*
* - pattern
* - left context
* - right context
* - phoneme
*
*
* - End-of-line comments: Any occurrence of '//' will cause all text following on that line to be discarded
* as a comment.
* - Multi-line comments: Any line starting with '/*' will start multi-line commenting mode. This will skip
* all content until a line ending in '*' and '/' is found.
* - Blank lines: All blank lines will be skipped.
*
*
* @since 1.6
*/
public class Rule {
public static final class Phoneme implements PhonemeExpr {
public static final Comparator COMPARATOR = new Comparator() {
@Override
public int compare(final Phoneme o1, final Phoneme o2) {
for (int i = 0; i < o1.phonemeText.length(); i++) {
if (i >= o2.phonemeText.length()) {
return +1;
}
final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i);
if (c != 0) {
return c;
}
}
if (o1.phonemeText.length() < o2.phonemeText.length()) {
return -1;
}
return 0;
}
};
private final StringBuilder phonemeText;
private final Languages.LanguageSet languages;
public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) {
this.phonemeText = new StringBuilder(phonemeText);
this.languages = languages;
}
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) {
this(phonemeLeft.phonemeText, phonemeLeft.languages);
this.phonemeText.append(phonemeRight.phonemeText);
}
public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) {
this(phonemeLeft.phonemeText, languages);
this.phonemeText.append(phonemeRight.phonemeText);
}
public Phoneme append(final CharSequence str) {
this.phonemeText.append(str);
return this;
}
public Languages.LanguageSet getLanguages() {
return this.languages;
}
@Override
public Iterable getPhonemes() {
return Collections.singleton(this);
}
public CharSequence getPhonemeText() {
return this.phonemeText;
}
/**
* Deprecated since 1.9.
*
* @param right the Phoneme to join
* @return a new Phoneme
* @deprecated since 1.9
*/
@Deprecated
public Phoneme join(final Phoneme right) {
return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(),
this.languages.restrictTo(right.languages));
}
/**
* Returns a new Phoneme with the same text but a union of its
* current language set and the given one.
*
* @param lang the language set to merge
* @return a new Phoneme
*/
public Phoneme mergeWithLanguage(final LanguageSet lang) {
return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang));
}
@Override
public String toString() {
return phonemeText.toString() + "[" + languages + "]";
}
}
public interface PhonemeExpr {
Iterable getPhonemes();
}
public static final class PhonemeList implements PhonemeExpr {
private final List phonemes;
public PhonemeList(final List phonemes) {
this.phonemes = phonemes;
}
@Override
public List getPhonemes() {
return this.phonemes;
}
}
/**
* A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations.
*/
public interface RPattern {
boolean isMatch(CharSequence input);
}
public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return true;
}
};
public static final String ALL = "ALL";
private static final String DOUBLE_QUOTE = "\"";
private static final String HASH_INCLUDE = "#include";
private static final Map>>>> RULES =
new EnumMap<>(NameType.class);
static {
for (final NameType s : NameType.values()) {
final Map>>> rts =
new EnumMap<>(RuleType.class);
for (final RuleType rt : RuleType.values()) {
final Map>> rs = new HashMap<>();
final Languages ls = Languages.getInstance(s);
for (final String l : ls.getLanguages()) {
final Map> lines = new HashMap<>();
switch(s) {
case ASHKENAZI:
switch(rt) {
case APPROX:
switch(l) {
case "any":
addAshApproxAny(lines);
break;
case "common":
addAshExactApproxCommon(lines);
addAshApproxCommon(lines);
break;
case "cyrillic":
addAshApproxRussian(lines);
break;
case "english":
addAshApproxEnglish(lines);
break;
case "french":
addAshApproxFrench(lines);
break;
case "german":
addAshApproxGerman(lines);
break;
case "hungarian":
addAshApproxFrench(lines);
break;
case "polish":
addAshApproxPolish(lines);
break;
case "romanian":
addAshApproxPolish(lines);
break;
case "russian":
addAshApproxRussian(lines);
break;
case "spanish":
addAshApproxFrench(lines);
break;
default:
break;
}
break;
case EXACT:
switch(l) {
case "any":
addAshExactAny(lines);
break;
case "common":
addAshExactApproxCommon(lines);
addAshExactCommon(lines);
break;
case "cyrillic":
addAshExactRussian(lines);
break;
case "english":
addAshExactRussian(lines);
break;
case "french":
addAshExactRussian(lines);
break;
case "german":
addAshExactAny(lines);
break;
case "hungarian":
addAshExactRussian(lines);
break;
case "polish":
addAshExactPolish(lines);
break;
case "romanian":
addAshExactRussian(lines);
break;
case "russian":
addAshExactRussian(lines);
break;
case "spanish":
addAshExactRussian(lines);
break;
default:
break;
}
break;
case RULES:
switch(l) {
case "any":
addAshRulesAny(lines);
break;
case "cyrillic":
addAshRulesCyrillic(lines);
break;
case "english":
addAshRulesEnglish(lines);
break;
case "french":
addAshRulesFrench(lines);
break;
case "german":
addAshRulesGerman(lines);
break;
case "hebrew":
addAshRulesHebrew(lines);
break;
case "hungarian":
addAshRulesHungarian(lines);
break;
case "polish":
addAshRulesPolish(lines);
break;
case "romanian":
addAshRulesRomanian(lines);
break;
case "russian":
addAshRulesRussian(lines);
break;
case "spanish":
addAshRulesSpanish(lines);
break;
default:
break;
}
break;
default:
break;
}
break;
case GENERIC:
switch(rt) {
case APPROX:
switch(l) {
case "any":
addGenApproxAny(lines);
break;
case "common":
addGenExactApproxCommon(lines);
addGenApproxCommon(lines);
break;
case "arabic":
addGenApproxArabic(lines);
break;
case "cyrillic":
addGenApproxRussian(lines);
break;
case "czech":
addGenApproxFrench(lines);
break;
case "dutch":
addGenApproxFrench(lines);
break;
case "english":
addGenApproxEnglish(lines);
break;
case "french":
addGenApproxFrench(lines);
break;
case "german":
addGenApproxGerman(lines);
break;
case "greek":
addGenApproxFrench(lines);
break;
case "greeklatin":
addGenApproxFrench(lines);
addGenApproxGreekLatin(lines);
break;
case "hungarian":
addGenApproxFrench(lines);
break;
case "italian":
addGenApproxFrench(lines);
break;
case "polish":
addGenApproxPolish(lines);
break;
case "portugese":
addGenApproxFrench(lines);
break;
case "romanian":
addGenApproxPolish(lines);
break;
case "russian":
addGenApproxRussian(lines);
break;
case "spanish":
addGenApproxSpanish(lines);
break;
case "turkish":
addGenApproxFrench(lines);
break;
default:
break;
}
break;
case EXACT:
switch(l) {
case "any":
addGenExactAny(lines);
break;
case "arabic":
addGenExactArabic(lines);
break;
case "common":
addGenExactApproxCommon(lines);
addGenExactCommon(lines);
break;
case "cyrillic":
addGenExactRussian(lines);
break;
case "czech":
addGenExactRussian(lines);
break;
case "english":
addGenExactRussian(lines);
break;
case "german":
addGenExactAny(lines);
break;
case "greeklatin":
addGenExactGreeklatin(lines);
break;
case "polish":
addGenExactPolish(lines);
break;
case "russian":
addGenExactRussian(lines);
break;
case "spanish":
addGenExactSpanish(lines);
break;
default:
break;
}
break;
case RULES:
switch(l) {
case "any":
addGenRulesAny(lines);
break;
case "arabic":
addGenRulesArabic(lines);
break;
case "cyrillic":
addGenRulesCyrillic(lines);
break;
case "czech":
addGenRulesCzech(lines);
break;
case "dutch":
addGenRulesDutch(lines);
break;
case "english":
addGenRulesEnglish(lines);
break;
case "french":
addGenRulesFrench(lines);
break;
case "german":
addGenRulesGerman(lines);
break;
case "greek":
addGenRulesGreek(lines);
break;
case "greeklatin":
addGenRulesGreeklatin(lines);
break;
case "hebrew":
addGenRulesHebrew(lines);
break;
case "hungarian":
addGenRulesHungarian(lines);
break;
case "italian":
addGenRulesItalian(lines);
break;
case "polish":
addGenRulesPolish(lines);
break;
case "portuguese":
addGenRulesPortuguese(lines);
break;
case "romanian":
addGenRulesRomanian(lines);
break;
case "russian":
addGenRulesRussian(lines);
break;
case "spanish":
addGenRulesSpanish(lines);
break;
case "turkish":
addGenRulesTurkish(lines);
break;
default:
break;
}
break;
default:
break;
}
break;
case SEPHARDIC:
switch(rt) {
case APPROX:
switch(l) {
case "any":
addSepApproxAny(lines);
break;
case "common":
addSepExactApproxCommon(lines);
addSepApproxCommon(lines);
break;
default:
break;
}
break;
case EXACT:
switch(l) {
case "any":
addSepExactAny(lines);
break;
case "common":
addSepExactApproxCommon(lines);
addSepExactCommon(lines);
break;
default:
break;
}
break;
case RULES:
switch(l) {
case "any":
addSepRulesAny(lines);
break;
case "french":
addSepRulesFrench(lines);
break;
case "hebrew":
addSepRulesHebrew(lines);
break;
case "italian":
addSepRulesItalian(lines);
break;
case "portuguese":
addSepRulesPortuguese(lines);
break;
case "spanish":
addSepRulesSpanish(lines);
break;
default:
break;
}
break;
default:
break;
}
break;
default:
break;
}
rs.put(l, lines);
}
if (!rt.equals(RuleType.RULES)) {
final Map> lines = new HashMap<>();
switch(s) {
case ASHKENAZI:
switch(rt) {
case APPROX:
addAshExactApproxCommon(lines);
addAshApproxCommon(lines);
break;
case EXACT:
addAshExactApproxCommon(lines);
addAshExactCommon(lines);
break;
default:
break;
}
break;
case GENERIC:
switch(rt) {
case APPROX:
addGenExactApproxCommon(lines);
addGenApproxCommon(lines);
break;
case EXACT:
addGenExactApproxCommon(lines);
addGenExactCommon(lines);
break;
default:
break;
}
break;
case SEPHARDIC:
switch(rt) {
case APPROX:
addSepExactApproxCommon(lines);
addSepApproxCommon(lines);
break;
case EXACT:
addSepExactApproxCommon(lines);
addSepExactCommon(lines);
break;
default:
break;
}
break;
default:
break;
}
rs.put("common", lines);
}
rts.put(rt, Collections.unmodifiableMap(rs));
}
RULES.put(s, Collections.unmodifiableMap(rts));
}
}
private static boolean contains(final CharSequence chars, final char input) {
for (int i = 0; i < chars.length(); i++) {
if (chars.charAt(i) == input) {
return true;
}
}
return false;
}
private static void addAshApproxAny(final Map> lines) {
addRule(lines, "b", "", "", "(b|v[spanish])");
addRule(lines, "J", "", "", "z");
addRule(lines, "aiB", "", "[bp]", "(D|Dm)");
addRule(lines, "AiB", "", "[bp]", "(D|Dm)");
addRule(lines, "oiB", "", "[bp]", "(D|Dm)");
addRule(lines, "OiB", "", "[bp]", "(D|Dm)");
addRule(lines, "uiB", "", "[bp]", "(D|Dm)");
addRule(lines, "UiB", "", "[bp]", "(D|Dm)");
addRule(lines, "eiB", "", "[bp]", "(D|Dm)");
addRule(lines, "EiB", "", "[bp]", "(D|Dm)");
addRule(lines, "iiB", "", "[bp]", "(D|Dm)");
addRule(lines, "IiB", "", "[bp]", "(D|Dm)");
addRule(lines, "aiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "AiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "oiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "OiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "uiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "UiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "eiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "EiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "iiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "IiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "B", "", "[bp]", "(o|om[polish]|im[polish])");
addRule(lines, "B", "", "[dgkstvz]", "(a|o|on[polish]|in[polish])");
addRule(lines, "B", "", "", "(a|o)");
addRule(lines, "aiF", "", "[bp]", "(D|Dm)");
addRule(lines, "AiF", "", "[bp]", "(D|Dm)");
addRule(lines, "oiF", "", "[bp]", "(D|Dm)");
addRule(lines, "OiF", "", "[bp]", "(D|Dm)");
addRule(lines, "uiF", "", "[bp]", "(D|Dm)");
addRule(lines, "UiF", "", "[bp]", "(D|Dm)");
addRule(lines, "eiF", "", "[bp]", "(D|Dm)");
addRule(lines, "EiF", "", "[bp]", "(D|Dm)");
addRule(lines, "iiF", "", "[bp]", "(D|Dm)");
addRule(lines, "IiF", "", "[bp]", "(D|Dm)");
addRule(lines, "aiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "AiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "oiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "OiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "uiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "UiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "eiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "EiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "iiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "IiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "F", "", "[bp]", "(i|im[polish]|om[polish])");
addRule(lines, "F", "", "[dgkstvz]", "(i|in[polish]|on[polish])");
addRule(lines, "F", "", "", "i");
addRule(lines, "P", "", "", "(o|u)");
addRule(lines, "I", "[aeiouAEIBFOUQY]", "", "i");
addRule(lines, "I", "", "[^aeiouAEBFIOU]e", "(Q[german]|i|D[english])");
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk[german])");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts[german])");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(Q[german]|i)");
addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "$", "(li|il[english])");
addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(li|il[english]|lY[german])");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "Ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "Oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "Ui", "", "", "(D|u|i)");
addRule(lines, "ei", "", "", "(D|i)");
addRule(lines, "Ei", "", "", "(D|i)");
addRule(lines, "iA", "", "$", "(ia|io)");
addRule(lines, "iA", "", "", "(ia|io|iY[german])");
addRule(lines, "A", "", "[^aeiouAEBFIOU]e", "(a|o|Y[german]|D[english])");
addRule(lines, "E", "i[^aeiouAEIOU]", "", "(i|Y[german]|[english])");
addRule(lines, "E", "a[^aeiouAEIOU]", "", "(i|Y[german]|[english])");
addRule(lines, "e", "", "[fklmnprstv]$", "i");
addRule(lines, "e", "", "ts$", "i");
addRule(lines, "e", "", "$", "i");
addRule(lines, "e", "[DaoiuAOIUQY]", "", "i");
addRule(lines, "e", "", "[aoAOQY]", "i");
addRule(lines, "e", "", "", "(i|Y[german])");
addRule(lines, "E", "", "[fklmnprst]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "", "$", "i");
addRule(lines, "E", "[DaoiuAOIUQY]", "", "i");
addRule(lines, "E", "", "[aoAOQY]", "i");
addRule(lines, "E", "", "", "(i|Y[german])");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "O", "", "[fklmnprstv]$", "o");
addRule(lines, "O", "", "ts$", "o");
addRule(lines, "O", "", "$", "o");
addRule(lines, "O", "[oeiuQY]", "", "o");
addRule(lines, "O", "", "", "(o|Y[german])");
addRule(lines, "A", "", "[fklmnprst]$", "(a|o)");
addRule(lines, "A", "", "ts$", "(a|o)");
addRule(lines, "A", "", "$", "(a|o)");
addRule(lines, "A", "[oeiuQY]", "", "(a|o)");
addRule(lines, "A", "", "", "(a|o|Y[german])");
addRule(lines, "U", "", "$", "u");
addRule(lines, "U", "[DoiuQY]", "", "u");
addRule(lines, "U", "", "[^k]$", "u");
addRule(lines, "Uk", "[lr]", "$", "(uk|Qk[german])");
addRule(lines, "Uk", "", "$", "uk");
addRule(lines, "sUts", "", "$", "(suts|sQts[german])");
addRule(lines, "Uts", "", "$", "uts");
addRule(lines, "U", "", "", "(u|Q[german])");
}
private static void addAshApproxCommon(final Map> lines) {
addRule(lines, "n", "", "[bp]", "m");
addRule(lines, "h", "", "", "");
addRule(lines, "H", "", "", "(x|)");
addRule(lines, "F", "", "[bdgkpstvzZ]h", "e");
addRule(lines, "F", "", "[bdgkpstvzZ]x", "e");
addRule(lines, "B", "", "[bdgkpstvzZ]h", "a");
addRule(lines, "B", "", "[bdgkpstvzZ]x", "a");
addRule(lines, "e", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "i", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "E", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "I", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "F", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "Q", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "Y", "[bdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "e", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "i", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "E", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "I", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "F", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "Q", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "Y", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", "");
addRule(lines, "lEs", "", "", "(lEs|lz)");
addRule(lines, "lE", "[bdfgkmnprStvzZ]", "", "(lE|l)");
addRule(lines, "aue", "", "", "D");
addRule(lines, "oue", "", "", "D");
addRule(lines, "AvE", "", "", "(D|AvE)");
addRule(lines, "Ave", "", "", "(D|Ave)");
addRule(lines, "avE", "", "", "(D|avE)");
addRule(lines, "ave", "", "", "(D|ave)");
addRule(lines, "OvE", "", "", "(D|OvE)");
addRule(lines, "Ove", "", "", "(D|Ove)");
addRule(lines, "ovE", "", "", "(D|ovE)");
addRule(lines, "ove", "", "", "(D|ove)");
addRule(lines, "ea", "", "", "(D|ea)");
addRule(lines, "EA", "", "", "(D|EA)");
addRule(lines, "Ea", "", "", "(D|Ea)");
addRule(lines, "eA", "", "", "(D|eA)");
addRule(lines, "aji", "", "", "D");
addRule(lines, "ajI", "", "", "D");
addRule(lines, "aje", "", "", "D");
addRule(lines, "ajE", "", "", "D");
addRule(lines, "Aji", "", "", "D");
addRule(lines, "AjI", "", "", "D");
addRule(lines, "Aje", "", "", "D");
addRule(lines, "AjE", "", "", "D");
addRule(lines, "oji", "", "", "D");
addRule(lines, "ojI", "", "", "D");
addRule(lines, "oje", "", "", "D");
addRule(lines, "ojE", "", "", "D");
addRule(lines, "Oji", "", "", "D");
addRule(lines, "OjI", "", "", "D");
addRule(lines, "Oje", "", "", "D");
addRule(lines, "OjE", "", "", "D");
addRule(lines, "eji", "", "", "D");
addRule(lines, "ejI", "", "", "D");
addRule(lines, "eje", "", "", "D");
addRule(lines, "ejE", "", "", "D");
addRule(lines, "Eji", "", "", "D");
addRule(lines, "EjI", "", "", "D");
addRule(lines, "Eje", "", "", "D");
addRule(lines, "EjE", "", "", "D");
addRule(lines, "uji", "", "", "D");
addRule(lines, "ujI", "", "", "D");
addRule(lines, "uje", "", "", "D");
addRule(lines, "ujE", "", "", "D");
addRule(lines, "Uji", "", "", "D");
addRule(lines, "UjI", "", "", "D");
addRule(lines, "Uje", "", "", "D");
addRule(lines, "UjE", "", "", "D");
addRule(lines, "iji", "", "", "D");
addRule(lines, "ijI", "", "", "D");
addRule(lines, "ije", "", "", "D");
addRule(lines, "ijE", "", "", "D");
addRule(lines, "Iji", "", "", "D");
addRule(lines, "IjI", "", "", "D");
addRule(lines, "Ije", "", "", "D");
addRule(lines, "IjE", "", "", "D");
addRule(lines, "aja", "", "", "D");
addRule(lines, "ajA", "", "", "D");
addRule(lines, "ajo", "", "", "D");
addRule(lines, "ajO", "", "", "D");
addRule(lines, "aju", "", "", "D");
addRule(lines, "ajU", "", "", "D");
addRule(lines, "Aja", "", "", "D");
addRule(lines, "AjA", "", "", "D");
addRule(lines, "Ajo", "", "", "D");
addRule(lines, "AjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "oja", "", "", "D");
addRule(lines, "ojA", "", "", "D");
addRule(lines, "ojo", "", "", "D");
addRule(lines, "ojO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Oja", "", "", "D");
addRule(lines, "OjA", "", "", "D");
addRule(lines, "Ojo", "", "", "D");
addRule(lines, "OjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "eja", "", "", "D");
addRule(lines, "ejA", "", "", "D");
addRule(lines, "ejo", "", "", "D");
addRule(lines, "ejO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Eja", "", "", "D");
addRule(lines, "EjA", "", "", "D");
addRule(lines, "Ejo", "", "", "D");
addRule(lines, "EjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "uja", "", "", "D");
addRule(lines, "ujA", "", "", "D");
addRule(lines, "ujo", "", "", "D");
addRule(lines, "ujO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Uja", "", "", "D");
addRule(lines, "UjA", "", "", "D");
addRule(lines, "Ujo", "", "", "D");
addRule(lines, "UjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "ija", "", "", "D");
addRule(lines, "ijA", "", "", "D");
addRule(lines, "ijo", "", "", "D");
addRule(lines, "ijO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Ija", "", "", "D");
addRule(lines, "IjA", "", "", "D");
addRule(lines, "Ijo", "", "", "D");
addRule(lines, "IjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "j", "", "", "i");
addRule(lines, "lYndEr", "", "$", "lYnder");
addRule(lines, "lander", "", "$", "lYnder");
addRule(lines, "lAndEr", "", "$", "lYnder");
addRule(lines, "lAnder", "", "$", "lYnder");
addRule(lines, "landEr", "", "$", "lYnder");
addRule(lines, "lender", "", "$", "lYnder");
addRule(lines, "lEndEr", "", "$", "lYnder");
addRule(lines, "lendEr", "", "$", "lYnder");
addRule(lines, "lEnder", "", "$", "lYnder");
addRule(lines, "bUrk", "", "$", "(burk|berk)");
addRule(lines, "burk", "", "$", "(burk|berk)");
addRule(lines, "bUrg", "", "$", "(burk|berk)");
addRule(lines, "burg", "", "$", "(burk|berk)");
addRule(lines, "s", "", "[rmnl]", "z");
addRule(lines, "S", "", "[rmnl]", "z");
addRule(lines, "s", "[rmnl]", "", "z");
addRule(lines, "S", "[rmnl]", "", "z");
addRule(lines, "dS", "", "$", "S");
addRule(lines, "dZ", "", "$", "S");
addRule(lines, "Z", "", "$", "S");
addRule(lines, "S", "", "$", "(S|s)");
addRule(lines, "z", "", "$", "(S|s)");
addRule(lines, "S", "", "", "s");
addRule(lines, "dZ", "", "", "z");
addRule(lines, "Z", "", "", "z");
}
private static void addAshApproxRussian(final Map> lines) {
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "[aeiEIou]", "", "i");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "om", "", "[bp]", "(om|im)");
addRule(lines, "on", "", "[dgkstvz]", "(on|in)");
addRule(lines, "em", "", "[bp]", "(im|om)");
addRule(lines, "en", "", "[dgkstvz]", "(in|on)");
addRule(lines, "Em", "", "[bp]", "(im|Ym|om)");
addRule(lines, "En", "", "[dgkstvz]", "(in|Yn|on)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprsStv]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "[DaoiuQ]", "", "i");
addRule(lines, "E", "", "[aoQ]", "i");
addRule(lines, "E", "", "", "(Y|i)");
}
private static void addAshApproxEnglish(final Map> lines) {
addRule(lines, "I", "", "[^aEIeiou]e", "(Q|i|D)");
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "[aEIeiou]", "", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(il|li|lY)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "E", "D[^aeiEIou]", "", "(i|)");
addRule(lines, "e", "D[^aeiEIou]", "", "(i|)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprsStv]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "[DaoiEuQY]", "", "i");
addRule(lines, "E", "", "[aoQY]", "i");
addRule(lines, "E", "", "", "(Y|i)");
addRule(lines, "a", "", "", "(a|o)");
}
private static void addAshApproxFrench(final Map> lines) {
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "[aEIeiou]", "", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprsStv]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "[aoiuQ]", "", "i");
addRule(lines, "E", "", "[aoQ]", "i");
addRule(lines, "E", "", "", "(Y|i)");
}
private static void addAshApproxPolish(final Map> lines) {
addRule(lines, "aiB", "", "[bp]", "(D|Dm)");
addRule(lines, "oiB", "", "[bp]", "(D|Dm)");
addRule(lines, "uiB", "", "[bp]", "(D|Dm)");
addRule(lines, "eiB", "", "[bp]", "(D|Dm)");
addRule(lines, "EiB", "", "[bp]", "(D|Dm)");
addRule(lines, "iiB", "", "[bp]", "(D|Dm)");
addRule(lines, "IiB", "", "[bp]", "(D|Dm)");
addRule(lines, "aiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "oiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "uiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "eiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "EiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "iiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "IiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "B", "", "[bp]", "(o|om|im)");
addRule(lines, "B", "", "[dgkstvz]", "(o|on|in)");
addRule(lines, "B", "", "", "o");
addRule(lines, "aiF", "", "[bp]", "(D|Dm)");
addRule(lines, "oiF", "", "[bp]", "(D|Dm)");
addRule(lines, "uiF", "", "[bp]", "(D|Dm)");
addRule(lines, "eiF", "", "[bp]", "(D|Dm)");
addRule(lines, "EiF", "", "[bp]", "(D|Dm)");
addRule(lines, "iiF", "", "[bp]", "(D|Dm)");
addRule(lines, "IiF", "", "[bp]", "(D|Dm)");
addRule(lines, "aiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "oiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "uiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "eiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "EiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "iiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "IiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "F", "", "[bp]", "(i|im|om)");
addRule(lines, "F", "", "[dgkstvz]", "(i|in|on)");
addRule(lines, "F", "", "", "i");
addRule(lines, "P", "", "", "(o|u)");
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "[aeiAEBFIou]", "", "i");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprst]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "", "$", "i");
addRule(lines, "E", "[DaoiuQ]", "", "i");
addRule(lines, "E", "", "[aoQ]", "i");
addRule(lines, "E", "", "", "(Y|i)");
}
private static void addAshApproxGerman(final Map> lines) {
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "[aeiAEIOUouQY]", "", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(Q|i)");
addRule(lines, "AU", "", "", "(D|a|u)");
addRule(lines, "aU", "", "", "(D|a|u)");
addRule(lines, "Au", "", "", "(D|a|u)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "OU", "", "", "(D|o|u)");
addRule(lines, "oU", "", "", "(D|o|u)");
addRule(lines, "Ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "Ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "Oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "Ui", "", "", "(D|u|i)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprst]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "", "$", "i");
addRule(lines, "E", "[DaoAOUiuQY]", "", "i");
addRule(lines, "E", "", "[aoAOQY]", "i");
addRule(lines, "E", "", "", "(Y|i)");
addRule(lines, "O", "", "$", "o");
addRule(lines, "O", "", "[fklmnprst]$", "o");
addRule(lines, "O", "", "ts$", "o");
addRule(lines, "O", "[aoAOUeiuQY]", "", "o");
addRule(lines, "O", "", "", "(o|Y)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "A", "", "$", "(a|o)");
addRule(lines, "A", "", "[fklmnprst]$", "(a|o)");
addRule(lines, "A", "", "ts$", "(a|o)");
addRule(lines, "A", "[aoeOUiuQY]", "", "(a|o)");
addRule(lines, "A", "", "", "(a|o|Y)");
addRule(lines, "U", "", "$", "u");
addRule(lines, "U", "[DaoiuUQY]", "", "u");
addRule(lines, "U", "", "[^k]$", "u");
addRule(lines, "Uk", "[lr]", "$", "(uk|Qk)");
addRule(lines, "Uk", "", "$", "uk");
addRule(lines, "sUts", "", "$", "(suts|sQts)");
addRule(lines, "Uts", "", "$", "uts");
addRule(lines, "U", "", "", "(u|Q)");
}
private static void addAshExactApproxCommon(final Map> lines) {
addRule(lines, "h", "", "$", "");
addRule(lines, "b", "", "[fktSs]", "p");
addRule(lines, "b", "", "p", "");
addRule(lines, "b", "", "$", "p");
addRule(lines, "p", "", "[gdZz]", "b");
addRule(lines, "p", "", "b", "");
addRule(lines, "v", "", "[pktSs]", "f");
addRule(lines, "v", "", "f", "");
addRule(lines, "v", "", "$", "f");
addRule(lines, "f", "", "[bgdZz]", "v");
addRule(lines, "f", "", "v", "");
addRule(lines, "g", "", "[pftSs]", "k");
addRule(lines, "g", "", "k", "");
addRule(lines, "g", "", "$", "k");
addRule(lines, "k", "", "[bdZz]", "g");
addRule(lines, "k", "", "g", "");
addRule(lines, "d", "", "[pfkSs]", "t");
addRule(lines, "d", "", "t", "");
addRule(lines, "d", "", "$", "t");
addRule(lines, "t", "", "[bgZz]", "d");
addRule(lines, "t", "", "d", "");
addRule(lines, "s", "", "dZ", "");
addRule(lines, "s", "", "tS", "");
addRule(lines, "z", "", "[pfkSt]", "s");
addRule(lines, "z", "", "[sSzZ]", "");
addRule(lines, "s", "", "[sSzZ]", "");
addRule(lines, "Z", "", "[sSzZ]", "");
addRule(lines, "S", "", "[sSzZ]", "");
addRule(lines, "jnm", "", "", "jm");
addRule(lines, "ji", "^", "", "i");
addRule(lines, "jI", "^", "", "I");
addRule(lines, "a", "", "[aAB]", "");
addRule(lines, "a", "[AB]", "", "");
addRule(lines, "A", "", "A", "");
addRule(lines, "B", "", "B", "");
addRule(lines, "b", "", "b", "");
addRule(lines, "d", "", "d", "");
addRule(lines, "f", "", "f", "");
addRule(lines, "g", "", "g", "");
addRule(lines, "k", "", "k", "");
addRule(lines, "l", "", "l", "");
addRule(lines, "m", "", "m", "");
addRule(lines, "n", "", "n", "");
addRule(lines, "p", "", "p", "");
addRule(lines, "r", "", "r", "");
addRule(lines, "t", "", "t", "");
addRule(lines, "v", "", "v", "");
addRule(lines, "z", "", "z", "");
}
private static void addAshExactAny(final Map> lines) {
addRule(lines, "A", "", "", "a");
addRule(lines, "B", "", "", "a");
addRule(lines, "E", "", "", "e");
addRule(lines, "F", "", "", "e");
addRule(lines, "I", "", "", "i");
addRule(lines, "O", "", "", "o");
addRule(lines, "P", "", "", "o");
addRule(lines, "U", "", "", "u");
addRule(lines, "J", "", "", "l");
}
private static void addAshExactCommon(final Map> lines) {
addRule(lines, "H", "", "", "h");
addRule(lines, "s", "[^t]", "[bgZd]", "z");
addRule(lines, "Z", "", "[pfkst]", "S");
addRule(lines, "Z", "", "$", "S");
addRule(lines, "S", "", "[bgzd]", "Z");
addRule(lines, "z", "", "$", "s");
addRule(lines, "ji", "[aAoOeEiIuU]", "", "j");
addRule(lines, "jI", "[aAoOeEiIuU]", "", "j");
addRule(lines, "je", "[aAoOeEiIuU]", "", "j");
addRule(lines, "jE", "[aAoOeEiIuU]", "", "j");
}
private static void addAshExactRussian(final Map> lines) {
addRule(lines, "E", "", "", "e");
addRule(lines, "I", "", "", "i");
}
private static void addAshExactPolish(final Map> lines) {
addRule(lines, "B", "", "", "a");
addRule(lines, "F", "", "", "e");
addRule(lines, "P", "", "", "o");
addRule(lines, "E", "", "", "e");
addRule(lines, "I", "", "", "i");
}
private static void addAshRulesAny(final Map> lines) {
addRule(lines, "yna", "", "$", "(in[russian]|ina)");
addRule(lines, "ina", "", "$", "(in[russian]|ina)");
addRule(lines, "liova", "", "$", "(lof[russian]|lef[russian]|lova)");
addRule(lines, "lova", "", "$", "(lof[russian]|lef[russian]|lova)");
addRule(lines, "ova", "", "$", "(of[russian]|ova)");
addRule(lines, "eva", "", "$", "(ef[russian]|eva)");
addRule(lines, "aia", "", "$", "(aja|i[russian])");
addRule(lines, "aja", "", "$", "(aja|i[russian])");
addRule(lines, "aya", "", "$", "(aja|i[russian])");
addRule(lines, "lowa", "", "$", "(lova|lof[polish]|l[polish]|el[polish])");
addRule(lines, "kowa", "", "$", "(kova|kof[polish]|k[polish]|ek[polish])");
addRule(lines, "owa", "", "$", "(ova|of[polish]|)");
addRule(lines, "lowna", "", "$", "(lovna|levna|l[polish]|el[polish])");
addRule(lines, "kowna", "", "$", "(kovna|k[polish]|ek[polish])");
addRule(lines, "owna", "", "$", "(ovna|[polish])");
addRule(lines, "lówna", "", "$", "(l|el[polish])");
addRule(lines, "kówna", "", "$", "(k|ek[polish])");
addRule(lines, "ówna", "", "$", "");
addRule(lines, "a", "", "$", "(a|i[polish])");
addRule(lines, "rh", "^", "", "r");
addRule(lines, "ssch", "", "", "S");
addRule(lines, "chsch", "", "", "xS");
addRule(lines, "tsch", "", "", "tS");
addRule(lines, "sch", "", "[ei]", "(sk[romanian]|S|StS[russian])");
addRule(lines, "sch", "", "", "(S|StS[russian])");
addRule(lines, "ssh", "", "", "S");
addRule(lines, "sh", "", "[äöü]", "sh");
addRule(lines, "sh", "", "[aeiou]", "(S[russian+english]|sh)");
addRule(lines, "sh", "", "", "S");
addRule(lines, "kh", "", "", "(x[russian+english]|kh)");
addRule(lines, "chs", "", "", "(ks[german]|xs|tSs[russian+english])");
addRule(lines, "ch", "", "[ei]", "(x|k[romanian]|tS[russian+english])");
addRule(lines, "ch", "", "", "(x|tS[russian+english])");
addRule(lines, "ck", "", "", "(k|tsk[polish])");
addRule(lines, "czy", "", "", "tSi");
addRule(lines, "cze", "", "[bcdgkpstwzż]", "(tSe|tSF)");
addRule(lines, "ciewicz", "", "", "(tsevitS|tSevitS)");
addRule(lines, "siewicz", "", "", "(sevitS|SevitS)");
addRule(lines, "ziewicz", "", "", "(zevitS|ZevitS)");
addRule(lines, "riewicz", "", "", "rjevitS");
addRule(lines, "diewicz", "", "", "djevitS");
addRule(lines, "tiewicz", "", "", "tjevitS");
addRule(lines, "iewicz", "", "", "evitS");
addRule(lines, "ewicz", "", "", "evitS");
addRule(lines, "owicz", "", "", "ovitS");
addRule(lines, "icz", "", "", "itS");
addRule(lines, "cz", "", "", "tS");
addRule(lines, "cia", "", "[bcdgkpstwzż]", "(tSB[polish]|tsB)");
addRule(lines, "cia", "", "", "(tSa[polish]|tsa)");
addRule(lines, "cią", "", "[bp]", "(tSom[polish]|tsom)");
addRule(lines, "cią", "", "", "(tSon[polish]|tson)");
addRule(lines, "cię", "", "[bp]", "(tSem[polish]|tsem)");
addRule(lines, "cię", "", "", "(tSen[polish]|tsen)");
addRule(lines, "cie", "", "[bcdgkpstwzż]", "(tSF[polish]|tsF)");
addRule(lines, "cie", "", "", "(tSe[polish]|tse)");
addRule(lines, "cio", "", "", "(tSo[polish]|tso)");
addRule(lines, "ciu", "", "", "(tSu[polish]|tsu)");
addRule(lines, "ci", "", "$", "(tsi[polish]|tSi[polish+romanian]|tS[romanian]|si)");
addRule(lines, "ci", "", "", "(tsi[polish]|tSi[polish+romanian]|si)");
addRule(lines, "ce", "", "[bcdgkpstwzż]", "(tsF[polish]|tSe[polish+romanian]|se)");
addRule(lines, "ce", "", "", "(tSe[polish+romanian]|tse[polish]|se)");
addRule(lines, "cy", "", "", "(si|tsi[polish])");
addRule(lines, "ssz", "", "", "S");
addRule(lines, "sz", "", "", "S");
addRule(lines, "ssp", "", "", "(Sp[german]|sp)");
addRule(lines, "sp", "", "", "(Sp[german]|sp)");
addRule(lines, "sst", "", "", "(St[german]|st)");
addRule(lines, "st", "", "", "(St[german]|st)");
addRule(lines, "ss", "", "", "s");
addRule(lines, "sia", "", "[bcdgkpstwzż]", "(SB[polish]|sB[polish]|sja)");
addRule(lines, "sia", "", "", "(Sa[polish]|sja)");
addRule(lines, "sią", "", "[bp]", "(Som[polish]|som)");
addRule(lines, "sią", "", "", "(Son[polish]|son)");
addRule(lines, "się", "", "[bp]", "(Sem[polish]|sem)");
addRule(lines, "się", "", "", "(Sen[polish]|sen)");
addRule(lines, "sie", "", "[bcdgkpstwzż]", "(SF[polish]|sF|zi[german])");
addRule(lines, "sie", "", "", "(se|Se[polish]|zi[german])");
addRule(lines, "sio", "", "", "(So[polish]|so)");
addRule(lines, "siu", "", "", "(Su[polish]|sju)");
addRule(lines, "si", "", "", "(Si[polish]|si|zi[german])");
addRule(lines, "s", "", "[aeiouäöë]", "(s|z[german])");
addRule(lines, "gue", "", "", "ge");
addRule(lines, "gui", "", "", "gi");
addRule(lines, "guy", "", "", "gi");
addRule(lines, "gh", "", "[ei]", "(g[romanian]|gh)");
addRule(lines, "gauz", "", "$", "haus");
addRule(lines, "gaus", "", "$", "haus");
addRule(lines, "gol'ts", "", "$", "holts");
addRule(lines, "golts", "", "$", "holts");
addRule(lines, "gol'tz", "", "$", "holts");
addRule(lines, "goltz", "", "", "holts");
addRule(lines, "gol'ts", "^", "", "holts");
addRule(lines, "golts", "^", "", "holts");
addRule(lines, "gol'tz", "^", "", "holts");
addRule(lines, "goltz", "^", "", "holts");
addRule(lines, "gendler", "", "$", "hendler");
addRule(lines, "gejmer", "", "$", "hajmer");
addRule(lines, "gejm", "", "$", "hajm");
addRule(lines, "geymer", "", "$", "hajmer");
addRule(lines, "geym", "", "$", "hajm");
addRule(lines, "geimer", "", "$", "hajmer");
addRule(lines, "geim", "", "$", "hajm");
addRule(lines, "gof", "", "$", "hof");
addRule(lines, "ger", "", "$", "ger");
addRule(lines, "gen", "", "$", "gen");
addRule(lines, "gin", "", "$", "gin");
addRule(lines, "gie", "", "$", "(ge|gi[german]|ji[french])");
addRule(lines, "gie", "", "", "ge");
addRule(lines, "ge", "[yaeiou]", "", "(gE|xe[spanish]|dZe[english+romanian])");
addRule(lines, "gi", "[yaeiou]", "", "(gI|xi[spanish]|dZi[english+romanian])");
addRule(lines, "ge", "", "", "(gE|dZe[english+romanian]|hE[russian]|xe[spanish])");
addRule(lines, "gi", "", "", "(gI|dZi[english+romanian]|hI[russian]|xi[spanish])");
addRule(lines, "gy", "", "[aeouáéóúüöőű]", "(gi|dj[hungarian])");
addRule(lines, "gy", "", "", "(gi|d[hungarian])");
addRule(lines, "g", "[jyaeiou]", "[aouyei]", "g");
addRule(lines, "g", "", "[aouei]", "(g|h[russian])");
addRule(lines, "ej", "", "", "(aj|eZ[french+romanian]|ex[spanish])");
addRule(lines, "ej", "", "", "aj");
addRule(lines, "ly", "", "[au]", "l");
addRule(lines, "li", "", "[au]", "l");
addRule(lines, "lj", "", "[au]", "l");
addRule(lines, "lio", "", "", "(lo|le[russian])");
addRule(lines, "lyo", "", "", "(lo|le[russian])");
addRule(lines, "ll", "", "", "(l|J[spanish])");
addRule(lines, "j", "", "[aoeiuy]", "(j|dZ[english]|x[spanish]|Z[french+romanian])");
addRule(lines, "j", "", "", "(j|x[spanish])");
addRule(lines, "pf", "", "", "(pf|p|f)");
addRule(lines, "ph", "", "", "(ph|f)");
addRule(lines, "qu", "", "", "(kv[german]|k)");
addRule(lines, "rze", "t", "", "(Se[polish]|re)");
addRule(lines, "rze", "", "", "(rze|rtsE[german]|Ze[polish]|re[polish]|rZe[polish])");
addRule(lines, "rzy", "t", "", "(Si[polish]|ri)");
addRule(lines, "rzy", "", "", "(Zi[polish]|ri[polish]|rZi)");
addRule(lines, "rz", "t", "", "(S[polish]|r)");
addRule(lines, "rz", "", "", "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])");
addRule(lines, "tz", "", "$", "(ts|tS[english+german])");
addRule(lines, "tz", "^", "", "(ts|tS[english+german])");
addRule(lines, "tz", "", "", "(ts[english+german+russian]|tz)");
addRule(lines, "zh", "", "", "(Z|zh[polish]|tsh[german])");
addRule(lines, "zia", "", "[bcdgkpstwzż]", "(ZB[polish]|zB[polish]|zja)");
addRule(lines, "zia", "", "", "(Za[polish]|zja)");
addRule(lines, "zią", "", "[bp]", "(Zom[polish]|zom)");
addRule(lines, "zią", "", "", "(Zon[polish]|zon)");
addRule(lines, "zię", "", "[bp]", "(Zem[polish]|zem)");
addRule(lines, "zię", "", "", "(Zen[polish]|zen)");
addRule(lines, "zie", "", "[bcdgkpstwzż]", "(ZF[polish]|zF[polish]|ze|tsi[german])");
addRule(lines, "zie", "", "", "(ze|Ze[polish]|tsi[german])");
addRule(lines, "zio", "", "", "(Zo[polish]|zo)");
addRule(lines, "ziu", "", "", "(Zu[polish]|zju)");
addRule(lines, "zi", "", "", "(Zi[polish]|zi|tsi[german])");
addRule(lines, "thal", "", "$", "tal");
addRule(lines, "th", "^", "", "t");
addRule(lines, "th", "", "[aeiou]", "(t[german]|th)");
addRule(lines, "th", "", "", "t");
addRule(lines, "vogel", "", "", "(vogel|fogel[german])");
addRule(lines, "v", "^", "", "(v|f[german])");
addRule(lines, "h", "[aeiouyäöü]", "", "");
addRule(lines, "h", "", "", "(h|x[romanian+polish])");
addRule(lines, "h", "^", "", "(h|H[english+german])");
addRule(lines, "yi", "^", "", "i");
addRule(lines, "e", "in", "$", "(e|[french])");
addRule(lines, "ii", "", "$", "i");
addRule(lines, "iy", "", "$", "i");
addRule(lines, "yy", "", "$", "i");
addRule(lines, "yi", "", "$", "i");
addRule(lines, "yj", "", "$", "i");
addRule(lines, "ij", "", "$", "i");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "oue", "", "", "oue");
addRule(lines, "au", "", "", "(au|o[french])");
addRule(lines, "ou", "", "", "(ou|u[french])");
addRule(lines, "ue", "", "", "(Q|uje[russian])");
addRule(lines, "ae", "", "", "(Y[german]|aje[russian]|ae)");
addRule(lines, "oe", "", "", "(Y[german]|oje[russian]|oe)");
addRule(lines, "ee", "", "", "(i[english]|aje[russian]|e)");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "eu", "", "", "(aj[german]|oj[german]|eu)");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "y", "[aou]", "", "j");
addRule(lines, "ie", "", "[bcdgkpstwzż]", "(i[german]|e[polish]|ije[russian]|je)");
addRule(lines, "ie", "", "", "(i[german]|e[polish]|ije[russian]|je)");
addRule(lines, "ye", "", "", "(je|ije[russian])");
addRule(lines, "i", "", "[au]", "j");
addRule(lines, "y", "", "[au]", "j");
addRule(lines, "io", "", "", "(jo|e[russian])");
addRule(lines, "yo", "", "", "(jo|e[russian])");
addRule(lines, "ea", "", "", "(ea|ja[romanian])");
addRule(lines, "e", "^", "", "(e|je[russian])");
addRule(lines, "oo", "", "", "(u[english]|o)");
addRule(lines, "uu", "", "", "u");
addRule(lines, "ć", "", "", "(tS[polish]|ts)");
addRule(lines, "ł", "", "", "l");
addRule(lines, "ń", "", "", "n");
addRule(lines, "ñ", "", "", "(n|nj[spanish])");
addRule(lines, "ś", "", "", "(S[polish]|s)");
addRule(lines, "ş", "", "", "S");
addRule(lines, "ţ", "", "", "ts");
addRule(lines, "ż", "", "", "Z");
addRule(lines, "ź", "", "", "(Z[polish]|z)");
addRule(lines, "où", "", "", "u");
addRule(lines, "ą", "", "[bp]", "om");
addRule(lines, "ą", "", "", "on");
addRule(lines, "ä", "", "", "(Y|e)");
addRule(lines, "á", "", "", "a");
addRule(lines, "ă", "", "", "(e[romanian]|a)");
addRule(lines, "à", "", "", "a");
addRule(lines, "â", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "è", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "ę", "", "[bp]", "em");
addRule(lines, "ę", "", "", "en");
addRule(lines, "í", "", "", "i");
addRule(lines, "î", "", "", "i");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ő", "", "", "Y");
addRule(lines, "ó", "", "", "(u[polish]|o)");
addRule(lines, "ű", "", "", "Q");
addRule(lines, "ü", "", "", "Q");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ű", "", "", "Q");
addRule(lines, "ß", "", "", "s");
addRule(lines, "'", "", "", "");
addRule(lines, "\"", "", "", "");
addRule(lines, "a", "", "[bcdgkpstwzż]", "(A|B[polish])");
addRule(lines, "e", "", "[bcdgkpstwzż]", "(E|F[polish])");
addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "(O|P[polish])");
addRule(lines, "a", "", "", "A");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "(k|ts[polish])");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "O");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "U");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "(ts[german]|z)");
}
private static void addAshRulesCyrillic(final Map> lines) {
addRule(lines, "ця", "", "", "tsa");
addRule(lines, "цю", "", "", "tsu");
addRule(lines, "циа", "", "", "tsa");
addRule(lines, "цие", "", "", "tse");
addRule(lines, "цио", "", "", "tso");
addRule(lines, "циу", "", "", "tsu");
addRule(lines, "сие", "", "", "se");
addRule(lines, "сио", "", "", "so");
addRule(lines, "зие", "", "", "ze");
addRule(lines, "зио", "", "", "zo");
addRule(lines, "гауз", "", "$", "haus");
addRule(lines, "гаус", "", "$", "haus");
addRule(lines, "гольц", "", "$", "holts");
addRule(lines, "геймер", "", "$", "hajmer");
addRule(lines, "гейм", "", "$", "hajm");
addRule(lines, "гоф", "", "$", "hof");
addRule(lines, "гер", "", "$", "ger");
addRule(lines, "ген", "", "$", "gen");
addRule(lines, "гин", "", "$", "gin");
addRule(lines, "г", "(й|ё|я|ю|ы|а|е|о|и|у)", "(а|е|о|и|у)", "g");
addRule(lines, "г", "", "(а|е|о|и|у)", "(g|h)");
addRule(lines, "ля", "", "", "la");
addRule(lines, "лю", "", "", "lu");
addRule(lines, "лё", "", "", "(le|lo)");
addRule(lines, "лио", "", "", "(le|lo)");
addRule(lines, "ле", "", "", "(lE|lo)");
addRule(lines, "ийе", "", "", "je");
addRule(lines, "ие", "", "", "je");
addRule(lines, "ыйе", "", "", "je");
addRule(lines, "ые", "", "", "je");
addRule(lines, "ий", "", "(а|о|у)", "j");
addRule(lines, "ый", "", "(а|о|у)", "j");
addRule(lines, "ий", "", "$", "i");
addRule(lines, "ый", "", "$", "i");
addRule(lines, "ё", "", "", "(e|jo)");
addRule(lines, "ей", "^", "", "(jaj|aj)");
addRule(lines, "е", "(а|е|о|у)", "", "je");
addRule(lines, "е", "^", "", "je");
addRule(lines, "эй", "", "", "aj");
addRule(lines, "ей", "", "", "aj");
addRule(lines, "ауе", "", "", "aue");
addRule(lines, "ауэ", "", "", "aue");
addRule(lines, "а", "", "", "a");
addRule(lines, "б", "", "", "b");
addRule(lines, "в", "", "", "v");
addRule(lines, "г", "", "", "g");
addRule(lines, "д", "", "", "d");
addRule(lines, "е", "", "", "E");
addRule(lines, "ж", "", "", "Z");
addRule(lines, "з", "", "", "z");
addRule(lines, "и", "", "", "I");
addRule(lines, "й", "", "", "j");
addRule(lines, "к", "", "", "k");
addRule(lines, "л", "", "", "l");
addRule(lines, "м", "", "", "m");
addRule(lines, "н", "", "", "n");
addRule(lines, "о", "", "", "o");
addRule(lines, "п", "", "", "p");
addRule(lines, "р", "", "", "r");
addRule(lines, "с", "", "с", "");
addRule(lines, "с", "", "", "s");
addRule(lines, "т", "", "", "t");
addRule(lines, "у", "", "", "u");
addRule(lines, "ф", "", "", "f");
addRule(lines, "х", "", "", "x");
addRule(lines, "ц", "", "", "ts");
addRule(lines, "ч", "", "", "tS");
addRule(lines, "ш", "", "", "S");
addRule(lines, "щ", "", "", "StS");
addRule(lines, "ъ", "", "", "");
addRule(lines, "ы", "", "", "I");
addRule(lines, "ь", "", "", "");
addRule(lines, "э", "", "", "E");
addRule(lines, "ю", "", "", "ju");
addRule(lines, "я", "", "", "ja");
}
private static void addAshRulesEnglish(final Map> lines) {
addRule(lines, "tch", "", "", "tS");
addRule(lines, "ch", "", "", "(tS|x)");
addRule(lines, "ck", "", "", "k");
addRule(lines, "cc", "", "[iey]", "ks");
addRule(lines, "c", "", "c", "");
addRule(lines, "c", "", "[iey]", "s");
addRule(lines, "c", "", "", "k");
addRule(lines, "gh", "^", "", "g");
addRule(lines, "gh", "", "", "(g|f|w)");
addRule(lines, "gn", "", "", "(gn|n)");
addRule(lines, "g", "", "[iey]", "(g|dZ)");
addRule(lines, "th", "", "", "t");
addRule(lines, "kh", "", "", "x");
addRule(lines, "ph", "", "", "f");
addRule(lines, "sch", "", "", "(S|sk)");
addRule(lines, "sh", "", "", "S");
addRule(lines, "who", "^", "", "hu");
addRule(lines, "wh", "^", "", "w");
addRule(lines, "h", "", "$", "");
addRule(lines, "h", "", "[^aeiou]", "");
addRule(lines, "h", "^", "", "H");
addRule(lines, "h", "", "", "h");
addRule(lines, "j", "", "", "dZ");
addRule(lines, "kn", "^", "", "n");
addRule(lines, "mb", "", "$", "m");
addRule(lines, "ng", "", "$", "(N|ng)");
addRule(lines, "pn", "^", "", "(pn|n)");
addRule(lines, "ps", "^", "", "(ps|s)");
addRule(lines, "qu", "", "", "kw");
addRule(lines, "q", "", "", "k");
addRule(lines, "tia", "", "", "(So|Sa)");
addRule(lines, "tio", "", "", "So");
addRule(lines, "wr", "^", "", "r");
addRule(lines, "w", "", "", "(w|v)");
addRule(lines, "x", "^", "", "z");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "^", "", "j");
addRule(lines, "y", "^", "[aeiouy]", "j");
addRule(lines, "yi", "^", "", "i");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "oue", "", "", "(aue|oue)");
addRule(lines, "ai", "", "", "(aj|e)");
addRule(lines, "ay", "", "", "aj");
addRule(lines, "a", "", "[^aeiou]e", "aj");
addRule(lines, "a", "", "", "(e|o|a)");
addRule(lines, "ei", "", "", "(aj|i)");
addRule(lines, "ey", "", "", "(aj|i)");
addRule(lines, "ear", "", "", "ia");
addRule(lines, "ea", "", "", "(i|e)");
addRule(lines, "ee", "", "", "i");
addRule(lines, "e", "", "[^aeiou]e", "i");
addRule(lines, "e", "", "$", "(|E)");
addRule(lines, "e", "", "", "E");
addRule(lines, "ie", "", "", "i");
addRule(lines, "i", "", "[^aeiou]e", "aj");
addRule(lines, "i", "", "", "I");
addRule(lines, "oa", "", "", "ou");
addRule(lines, "oi", "", "", "oj");
addRule(lines, "oo", "", "", "u");
addRule(lines, "ou", "", "", "(u|ou)");
addRule(lines, "oy", "", "", "oj");
addRule(lines, "o", "", "[^aeiou]e", "ou");
addRule(lines, "o", "", "", "(o|a)");
addRule(lines, "u", "", "[^aeiou]e", "(ju|u)");
addRule(lines, "u", "", "r", "(e|u)");
addRule(lines, "u", "", "", "(u|a)");
addRule(lines, "y", "", "", "i");
addRule(lines, "b", "", "", "b");
addRule(lines, "d", "", "", "d");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "v", "", "", "v");
addRule(lines, "z", "", "", "z");
}
private static void addAshRulesFrench(final Map> lines) {
addRule(lines, "kh", "", "", "x");
addRule(lines, "ph", "", "", "f");
addRule(lines, "ç", "", "", "s");
addRule(lines, "x", "", "", "ks");
addRule(lines, "ch", "", "", "S");
addRule(lines, "c", "", "[eiyéèê]", "s");
addRule(lines, "c", "", "", "k");
addRule(lines, "gn", "", "", "(n|gn)");
addRule(lines, "g", "", "[eiy]", "Z");
addRule(lines, "gue", "", "$", "k");
addRule(lines, "gu", "", "[eiy]", "g");
addRule(lines, "que", "", "$", "k");
addRule(lines, "qu", "", "", "k");
addRule(lines, "q", "", "", "k");
addRule(lines, "s", "[aeiouyéèê]", "[aeiouyéèê]", "z");
addRule(lines, "h", "[bdgt]", "", "");
addRule(lines, "h", "", "$", "");
addRule(lines, "j", "", "", "Z");
addRule(lines, "w", "", "", "v");
addRule(lines, "ouh", "", "[aioe]", "(v|uh)");
addRule(lines, "ou", "", "[aeio]", "v");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aeio]", "v");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "eau", "", "", "o");
addRule(lines, "ai", "", "", "aj");
addRule(lines, "ay", "", "", "aj");
addRule(lines, "é", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "è", "", "", "e");
addRule(lines, "à", "", "", "a");
addRule(lines, "â", "", "", "a");
addRule(lines, "où", "", "", "u");
addRule(lines, "ou", "", "", "u");
addRule(lines, "oi", "", "", "oj");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "y", "[ou]", "", "j");
addRule(lines, "e", "", "$", "(e|)");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aoeu]", "j");
addRule(lines, "y", "", "", "i");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "z", "", "", "z");
}
private static void addAshRulesGerman(final Map> lines) {
addRule(lines, "ziu", "", "", "tsu");
addRule(lines, "zia", "", "", "tsa");
addRule(lines, "zio", "", "", "tso");
addRule(lines, "ssch", "", "", "S");
addRule(lines, "chsch", "", "", "xS");
addRule(lines, "ewitsch", "", "$", "evitS");
addRule(lines, "owitsch", "", "$", "ovitS");
addRule(lines, "evitsch", "", "$", "evitS");
addRule(lines, "ovitsch", "", "$", "ovitS");
addRule(lines, "witsch", "", "$", "vitS");
addRule(lines, "vitsch", "", "$", "vitS");
addRule(lines, "sch", "", "", "S");
addRule(lines, "chs", "", "", "ks");
addRule(lines, "ch", "", "", "x");
addRule(lines, "ck", "", "", "k");
addRule(lines, "c", "", "[eiy]", "ts");
addRule(lines, "sp", "^", "", "Sp");
addRule(lines, "st", "^", "", "St");
addRule(lines, "ssp", "", "", "(Sp|sp)");
addRule(lines, "sp", "", "", "(Sp|sp)");
addRule(lines, "sst", "", "", "(St|st)");
addRule(lines, "st", "", "", "(St|st)");
addRule(lines, "pf", "", "", "(pf|p|f)");
addRule(lines, "ph", "", "", "(ph|f)");
addRule(lines, "qu", "", "", "kv");
addRule(lines, "ewitz", "", "$", "(evits|evitS)");
addRule(lines, "ewiz", "", "$", "(evits|evitS)");
addRule(lines, "evitz", "", "$", "(evits|evitS)");
addRule(lines, "eviz", "", "$", "(evits|evitS)");
addRule(lines, "owitz", "", "$", "(ovits|ovitS)");
addRule(lines, "owiz", "", "$", "(ovits|ovitS)");
addRule(lines, "ovitz", "", "$", "(ovits|ovitS)");
addRule(lines, "oviz", "", "$", "(ovits|ovitS)");
addRule(lines, "witz", "", "$", "(vits|vitS)");
addRule(lines, "wiz", "", "$", "(vits|vitS)");
addRule(lines, "vitz", "", "$", "(vits|vitS)");
addRule(lines, "viz", "", "$", "(vits|vitS)");
addRule(lines, "tz", "", "", "ts");
addRule(lines, "thal", "", "$", "tal");
addRule(lines, "th", "^", "", "t");
addRule(lines, "th", "", "[äöüaeiou]", "(t|th)");
addRule(lines, "th", "", "", "t");
addRule(lines, "rh", "^", "", "r");
addRule(lines, "h", "[aeiouyäöü]", "", "");
addRule(lines, "h", "^", "", "H");
addRule(lines, "ss", "", "", "s");
addRule(lines, "s", "", "[äöüaeiouy]", "(z|s)");
addRule(lines, "s", "[aeiouyäöüj]", "[aeiouyäöü]", "z");
addRule(lines, "ß", "", "", "s");
addRule(lines, "ij", "", "$", "i");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "ue", "", "", "Q");
addRule(lines, "ae", "", "", "Y");
addRule(lines, "oe", "", "", "Y");
addRule(lines, "ü", "", "", "Q");
addRule(lines, "ä", "", "", "(Y|e)");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "eu", "", "", "(aj|oj)");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "y", "[aou]", "", "j");
addRule(lines, "ie", "", "", "I");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aoeu]", "j");
addRule(lines, "ñ", "", "", "n");
addRule(lines, "ã", "", "", "a");
addRule(lines, "ő", "", "", "o");
addRule(lines, "ű", "", "", "u");
addRule(lines, "ç", "", "", "s");
addRule(lines, "a", "", "", "A");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "O");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "U");
addRule(lines, "v", "", "", "(f|v)");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "ts");
}
private static void addAshRulesHebrew(final Map> lines) {
addRule(lines, "אי", "", "", "i");
addRule(lines, "עי", "", "", "i");
addRule(lines, "עו", "", "", "VV");
addRule(lines, "או", "", "", "VV");
addRule(lines, "ג׳", "", "", "Z");
addRule(lines, "ד׳", "", "", "dZ");
addRule(lines, "א", "", "", "L");
addRule(lines, "ב", "", "", "b");
addRule(lines, "ג", "", "", "g");
addRule(lines, "ד", "", "", "d");
addRule(lines, "ה", "^", "", "1");
addRule(lines, "ה", "", "$", "1");
addRule(lines, "ה", "", "", "");
addRule(lines, "וו", "", "", "V");
addRule(lines, "וי", "", "", "WW");
addRule(lines, "ו", "", "", "W");
addRule(lines, "ז", "", "", "z");
addRule(lines, "ח", "", "", "X");
addRule(lines, "ט", "", "", "T");
addRule(lines, "יי", "", "", "i");
addRule(lines, "י", "", "", "i");
addRule(lines, "ך", "", "", "X");
addRule(lines, "כ", "^", "", "K");
addRule(lines, "כ", "", "", "k");
addRule(lines, "ל", "", "", "l");
addRule(lines, "ם", "", "", "m");
addRule(lines, "מ", "", "", "m");
addRule(lines, "ן", "", "", "n");
addRule(lines, "נ", "", "", "n");
addRule(lines, "ס", "", "", "s");
addRule(lines, "ע", "", "", "L");
addRule(lines, "ף", "", "", "f");
addRule(lines, "פ", "", "", "f");
addRule(lines, "ץ", "", "", "C");
addRule(lines, "צ", "", "", "C");
addRule(lines, "ק", "", "", "K");
addRule(lines, "ר", "", "", "r");
addRule(lines, "ש", "", "", "s");
addRule(lines, "ת", "", "", "TB");
}
private static void addAshRulesHungarian(final Map> lines) {
addRule(lines, "sz", "", "", "s");
addRule(lines, "zs", "", "", "Z");
addRule(lines, "cs", "", "", "tS");
addRule(lines, "ay", "", "", "(oj|aj)");
addRule(lines, "ai", "", "", "(oj|aj)");
addRule(lines, "aj", "", "", "(oj|aj)");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "y", "[áo]", "", "j");
addRule(lines, "i", "[áo]", "", "j");
addRule(lines, "ee", "", "", "(aj|e)");
addRule(lines, "ely", "", "", "(aj|eli)");
addRule(lines, "ly", "", "", "(j|li)");
addRule(lines, "gy", "", "[aeouáéóúüöőű]", "dj");
addRule(lines, "gy", "", "", "(d|gi)");
addRule(lines, "ny", "", "[aeouáéóúüöőű]", "nj");
addRule(lines, "ny", "", "", "(n|ni)");
addRule(lines, "ty", "", "[aeouáéóúüöőű]", "tj");
addRule(lines, "ty", "", "", "(t|ti)");
addRule(lines, "qu", "", "", "(ku|kv)");
addRule(lines, "h", "", "$", "");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ő", "", "", "Y");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ü", "", "", "Q");
addRule(lines, "ű", "", "", "Q");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "ts");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "(S|s)");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addAshRulesPolish(final Map> lines) {
addRule(lines, "ska", "", "$", "ski");
addRule(lines, "cka", "", "$", "tski");
addRule(lines, "lowa", "", "$", "(lova|lof|l|el)");
addRule(lines, "kowa", "", "$", "(kova|kof|k|ek)");
addRule(lines, "owa", "", "$", "(ova|of|)");
addRule(lines, "lowna", "", "$", "(lovna|levna|l|el)");
addRule(lines, "kowna", "", "$", "(kovna|k|ek)");
addRule(lines, "owna", "", "$", "(ovna|)");
addRule(lines, "lówna", "", "$", "(l|el)");
addRule(lines, "kówna", "", "$", "(k|ek)");
addRule(lines, "ówna", "", "$", "");
addRule(lines, "a", "", "$", "(a|i)");
addRule(lines, "czy", "", "", "tSi");
addRule(lines, "cze", "", "[bcdgkpstwzż]", "(tSe|tSF)");
addRule(lines, "ciewicz", "", "", "(tsevitS|tSevitS)");
addRule(lines, "siewicz", "", "", "(sevitS|SevitS)");
addRule(lines, "ziewicz", "", "", "(zevitS|ZevitS)");
addRule(lines, "riewicz", "", "", "rjevitS");
addRule(lines, "diewicz", "", "", "djevitS");
addRule(lines, "tiewicz", "", "", "tjevitS");
addRule(lines, "iewicz", "", "", "evitS");
addRule(lines, "ewicz", "", "", "evitS");
addRule(lines, "owicz", "", "", "ovitS");
addRule(lines, "icz", "", "", "itS");
addRule(lines, "cz", "", "", "tS");
addRule(lines, "ch", "", "", "x");
addRule(lines, "cia", "", "[bcdgkpstwzż]", "(tSB|tsB)");
addRule(lines, "cia", "", "", "(tSa|tsa)");
addRule(lines, "cią", "", "[bp]", "(tSom|tsom)");
addRule(lines, "cią", "", "", "(tSon|tson)");
addRule(lines, "cię", "", "[bp]", "(tSem|tsem)");
addRule(lines, "cię", "", "", "(tSen|tsen)");
addRule(lines, "cie", "", "[bcdgkpstwzż]", "(tSF|tsF)");
addRule(lines, "cie", "", "", "(tSe|tse)");
addRule(lines, "cio", "", "", "(tSo|tso)");
addRule(lines, "ciu", "", "", "(tSu|tsu)");
addRule(lines, "ci", "", "", "(tSi|tsI)");
addRule(lines, "ć", "", "", "(tS|ts)");
addRule(lines, "ssz", "", "", "S");
addRule(lines, "sz", "", "", "S");
addRule(lines, "sia", "", "[bcdgkpstwzż]", "(SB|sB|sja)");
addRule(lines, "sia", "", "", "(Sa|sja)");
addRule(lines, "sią", "", "[bp]", "(Som|som)");
addRule(lines, "sią", "", "", "(Son|son)");
addRule(lines, "się", "", "[bp]", "(Sem|sem)");
addRule(lines, "się", "", "", "(Sen|sen)");
addRule(lines, "sie", "", "[bcdgkpstwzż]", "(SF|sF|se)");
addRule(lines, "sie", "", "", "(Se|se)");
addRule(lines, "sio", "", "", "(So|so)");
addRule(lines, "siu", "", "", "(Su|sju)");
addRule(lines, "si", "", "", "(Si|sI)");
addRule(lines, "ś", "", "", "(S|s)");
addRule(lines, "zia", "", "[bcdgkpstwzż]", "(ZB|zB|zja)");
addRule(lines, "zia", "", "", "(Za|zja)");
addRule(lines, "zią", "", "[bp]", "(Zom|zom)");
addRule(lines, "zią", "", "", "(Zon|zon)");
addRule(lines, "zię", "", "[bp]", "(Zem|zem)");
addRule(lines, "zię", "", "", "(Zen|zen)");
addRule(lines, "zie", "", "[bcdgkpstwzż]", "(ZF|zF)");
addRule(lines, "zie", "", "", "(Ze|ze)");
addRule(lines, "zio", "", "", "(Zo|zo)");
addRule(lines, "ziu", "", "", "(Zu|zju)");
addRule(lines, "zi", "", "", "(Zi|zI)");
addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF)");
addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF|ze|zF)");
addRule(lines, "że", "", "", "Ze");
addRule(lines, "źe", "", "", "(Ze|ze)");
addRule(lines, "ży", "", "", "Zi");
addRule(lines, "źi", "", "", "(Zi|zi)");
addRule(lines, "ż", "", "", "Z");
addRule(lines, "ź", "", "", "(Z|z)");
addRule(lines, "rze", "t", "", "(Se|re)");
addRule(lines, "rze", "", "", "(Ze|re|rZe)");
addRule(lines, "rzy", "t", "", "(Si|ri)");
addRule(lines, "rzy", "", "", "(Zi|ri|rZi)");
addRule(lines, "rz", "t", "", "(S|r)");
addRule(lines, "rz", "", "", "(Z|r|rZ)");
addRule(lines, "lio", "", "", "(lo|le)");
addRule(lines, "ł", "", "", "l");
addRule(lines, "ń", "", "", "n");
addRule(lines, "qu", "", "", "k");
addRule(lines, "s", "", "s", "");
addRule(lines, "ó", "", "", "(u|o)");
addRule(lines, "ą", "", "[bp]", "om");
addRule(lines, "ę", "", "[bp]", "em");
addRule(lines, "ą", "", "", "on");
addRule(lines, "ę", "", "", "en");
addRule(lines, "ije", "", "", "je");
addRule(lines, "yje", "", "", "je");
addRule(lines, "iie", "", "", "je");
addRule(lines, "yie", "", "", "je");
addRule(lines, "iye", "", "", "je");
addRule(lines, "yye", "", "", "je");
addRule(lines, "ij", "", "[aou]", "j");
addRule(lines, "yj", "", "[aou]", "j");
addRule(lines, "ii", "", "[aou]", "j");
addRule(lines, "yi", "", "[aou]", "j");
addRule(lines, "iy", "", "[aou]", "j");
addRule(lines, "yy", "", "[aou]", "j");
addRule(lines, "rie", "", "", "rje");
addRule(lines, "die", "", "", "dje");
addRule(lines, "tie", "", "", "tje");
addRule(lines, "ie", "", "[bcdgkpstwzż]", "F");
addRule(lines, "ie", "", "", "e");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "au", "", "", "au");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "ej", "", "", "aj");
addRule(lines, "ai", "", "", "aj");
addRule(lines, "ay", "", "", "aj");
addRule(lines, "aj", "", "", "aj");
addRule(lines, "i", "[ou]", "", "j");
addRule(lines, "y", "[ou]", "", "j");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aeou]", "j");
addRule(lines, "a", "", "[bcdgkpstwzż]", "B");
addRule(lines, "e", "", "[bcdgkpstwzż]", "(E|F)");
addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "P");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "ts");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "(h|x)");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "I");
addRule(lines, "z", "", "", "z");
}
private static void addAshRulesRomanian(final Map> lines) {
addRule(lines, "j", "", "", "Z");
addRule(lines, "ce", "", "", "tSe");
addRule(lines, "ci", "", "", "(tSi|tS)");
addRule(lines, "ch", "", "[ei]", "k");
addRule(lines, "ch", "", "", "x");
addRule(lines, "c", "", "", "k");
addRule(lines, "gi", "", "", "(dZi|dZ)");
addRule(lines, "g", "", "[ei]", "dZ");
addRule(lines, "gh", "", "", "g");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "ţ", "", "", "ts");
addRule(lines, "ş", "", "", "S");
addRule(lines, "h", "", "", "(x|h)");
addRule(lines, "qu", "", "", "k");
addRule(lines, "q", "", "", "k");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "î", "", "", "i");
addRule(lines, "ea", "", "", "ja");
addRule(lines, "ă", "", "", "(e|a)");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "i", "", "", "I");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "z", "", "", "z");
}
private static void addAshRulesRussian(final Map> lines) {
addRule(lines, "yna", "", "$", "(in|ina)");
addRule(lines, "ina", "", "$", "(in|ina)");
addRule(lines, "liova", "", "$", "(lof|lef)");
addRule(lines, "lova", "", "$", "(lof|lef|lova)");
addRule(lines, "ova", "", "$", "(of|ova)");
addRule(lines, "eva", "", "$", "(ef|ova)");
addRule(lines, "aia", "", "$", "(aja|i)");
addRule(lines, "aja", "", "$", "(aja|i)");
addRule(lines, "aya", "", "$", "(aja|i)");
addRule(lines, "tsya", "", "", "tsa");
addRule(lines, "tsyu", "", "", "tsu");
addRule(lines, "tsia", "", "", "tsa");
addRule(lines, "tsie", "", "", "tse");
addRule(lines, "tsio", "", "", "tso");
addRule(lines, "tsye", "", "", "tse");
addRule(lines, "tsyo", "", "", "tso");
addRule(lines, "tsiu", "", "", "tsu");
addRule(lines, "sie", "", "", "se");
addRule(lines, "sio", "", "", "so");
addRule(lines, "zie", "", "", "ze");
addRule(lines, "zio", "", "", "zo");
addRule(lines, "sye", "", "", "se");
addRule(lines, "syo", "", "", "so");
addRule(lines, "zye", "", "", "ze");
addRule(lines, "zyo", "", "", "zo");
addRule(lines, "gauz", "", "$", "haus");
addRule(lines, "gaus", "", "$", "haus");
addRule(lines, "gol'ts", "", "$", "holts");
addRule(lines, "golts", "", "$", "holts");
addRule(lines, "gol'tz", "", "$", "holts");
addRule(lines, "goltz", "", "$", "holts");
addRule(lines, "gejmer", "", "$", "hajmer");
addRule(lines, "gejm", "", "$", "hajm");
addRule(lines, "geimer", "", "$", "hajmer");
addRule(lines, "geim", "", "$", "hajm");
addRule(lines, "geymer", "", "$", "hajmer");
addRule(lines, "geym", "", "$", "hajm");
addRule(lines, "gendler", "", "$", "hendler");
addRule(lines, "gof", "", "$", "hof");
addRule(lines, "gojf", "", "$", "hojf");
addRule(lines, "goyf", "", "$", "hojf");
addRule(lines, "goif", "", "$", "hojf");
addRule(lines, "ger", "", "$", "ger");
addRule(lines, "gen", "", "$", "gen");
addRule(lines, "gin", "", "$", "gin");
addRule(lines, "gg", "", "", "g");
addRule(lines, "g", "[jaeoiuy]", "[aeoiu]", "g");
addRule(lines, "g", "", "[aeoiu]", "(g|h)");
addRule(lines, "kh", "", "", "x");
addRule(lines, "ch", "", "", "(tS|x)");
addRule(lines, "sch", "", "", "(StS|S)");
addRule(lines, "ssh", "", "", "S");
addRule(lines, "sh", "", "", "S");
addRule(lines, "zh", "", "", "Z");
addRule(lines, "tz", "", "$", "ts");
addRule(lines, "tz", "", "", "(ts|tz)");
addRule(lines, "c", "", "[iey]", "s");
addRule(lines, "c", "", "", "k");
addRule(lines, "qu", "", "", "(kv|k)");
addRule(lines, "q", "", "", "k");
addRule(lines, "s", "", "s", "");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "lya", "", "", "la");
addRule(lines, "lyu", "", "", "lu");
addRule(lines, "lia", "", "", "la");
addRule(lines, "liu", "", "", "lu");
addRule(lines, "lja", "", "", "la");
addRule(lines, "lju", "", "", "lu");
addRule(lines, "le", "", "", "(lo|lE)");
addRule(lines, "lyo", "", "", "(lo|le)");
addRule(lines, "lio", "", "", "(lo|le)");
addRule(lines, "ije", "", "", "je");
addRule(lines, "ie", "", "", "je");
addRule(lines, "iye", "", "", "je");
addRule(lines, "iie", "", "", "je");
addRule(lines, "yje", "", "", "je");
addRule(lines, "ye", "", "", "je");
addRule(lines, "yye", "", "", "je");
addRule(lines, "yie", "", "", "je");
addRule(lines, "ij", "", "[aou]", "j");
addRule(lines, "iy", "", "[aou]", "j");
addRule(lines, "ii", "", "[aou]", "j");
addRule(lines, "yj", "", "[aou]", "j");
addRule(lines, "yy", "", "[aou]", "j");
addRule(lines, "yi", "", "[aou]", "j");
addRule(lines, "io", "", "", "(jo|e)");
addRule(lines, "i", "", "[au]", "j");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "ej", "", "", "aj");
addRule(lines, "yo", "", "", "(jo|e)");
addRule(lines, "y", "", "[au]", "j");
addRule(lines, "y", "[aiou]", "", "j");
addRule(lines, "ii", "", "$", "i");
addRule(lines, "iy", "", "$", "i");
addRule(lines, "yy", "", "$", "i");
addRule(lines, "yi", "", "$", "i");
addRule(lines, "yj", "", "$", "i");
addRule(lines, "ij", "", "$", "i");
addRule(lines, "e", "^", "", "(je|E)");
addRule(lines, "ee", "", "", "(aje|i)");
addRule(lines, "e", "[aou]", "", "je");
addRule(lines, "y", "", "", "I");
addRule(lines, "oo", "", "", "(oo|u)");
addRule(lines, "'", "", "", "");
addRule(lines, "\"", "", "", "");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "z", "", "", "z");
}
private static void addAshRulesSpanish(final Map> lines) {
addRule(lines, "ñ", "", "", "(n|nj)");
addRule(lines, "ch", "", "", "(tS|dZ)");
addRule(lines, "h", "[bdgt]", "", "");
addRule(lines, "h", "", "$", "");
addRule(lines, "j", "", "", "x");
addRule(lines, "x", "", "", "ks");
addRule(lines, "ll", "", "", "(l|Z)");
addRule(lines, "w", "", "", "v");
addRule(lines, "v", "", "", "(b|v)");
addRule(lines, "b", "", "", "(b|v)");
addRule(lines, "m", "", "[bpvf]", "(m|n)");
addRule(lines, "c", "", "[ei]", "s");
addRule(lines, "c", "", "", "k");
addRule(lines, "z", "", "", "(z|s)");
addRule(lines, "gu", "", "[ei]", "(g|gv)");
addRule(lines, "g", "", "[ei]", "(x|g)");
addRule(lines, "qu", "", "", "k");
addRule(lines, "q", "", "", "k");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "y", "", "", "(i|j|S|Z)");
addRule(lines, "ü", "", "", "v");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "ú", "", "", "u");
addRule(lines, "a", "", "", "a");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
}
private static void addGenApproxAny(final Map> lines) {
addRule(lines, "mb", "", "", "(mb|b[greeklatin])");
addRule(lines, "mp", "", "", "(mp|b[greeklatin])");
addRule(lines, "ng", "", "", "(ng|g[greeklatin])");
addRule(lines, "B", "", "[fktSs]", "(p|f[spanish])");
addRule(lines, "B", "", "p", "");
addRule(lines, "B", "", "$", "(p|f[spanish])");
addRule(lines, "V", "", "[pktSs]", "(f|p[spanish])");
addRule(lines, "V", "", "f", "");
addRule(lines, "V", "", "$", "(f|p[spanish])");
addRule(lines, "B", "", "", "(b|v[spanish])");
addRule(lines, "V", "", "", "(v|b[spanish])");
addRule(lines, "t", "", "$", "(t|[french])");
addRule(lines, "g", "n", "$", "(g|[french])");
addRule(lines, "k", "n", "$", "(k|[french])");
addRule(lines, "p", "", "$", "(p|[french])");
addRule(lines, "r", "[Ee]", "$", "(r|[french])");
addRule(lines, "s", "", "$", "(s|[french])");
addRule(lines, "t", "[aeiouAEIOU]", "[^aeiouAEIOU]", "(t|[french])");
addRule(lines, "s", "[aeiouAEIOU]", "[^aeiouAEIOU]", "(s|[french])");
addRule(lines, "I", "[aeiouAEIBFOUQY]", "", "i");
addRule(lines, "I", "", "[^aeiouAEBFIOU]e", "(Q[german]|i|D[english])");
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk[german])");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts[german])");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(Q[german]|i)");
addRule(lines, "lEE", "[bdfgkmnprsStvzZ]", "", "(li|il[english])");
addRule(lines, "rEE", "[bdfgkmnprsStvzZ]", "", "(ri|ir[english])");
addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(li|il[english]|lY[german])");
addRule(lines, "rE", "[bdfgkmnprsStvzZ]", "", "(ri|ir[english]|rY[german])");
addRule(lines, "EE", "", "", "(i|)");
addRule(lines, "ea", "", "", "(D|a|i)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "eu", "", "", "(D|e|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "Ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "Oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "Ui", "", "", "(D|u|i)");
addRule(lines, "ei", "", "", "(D|i)");
addRule(lines, "Ei", "", "", "(D|i)");
addRule(lines, "iA", "", "$", "(ia|io)");
addRule(lines, "iA", "", "", "(ia|io|iY[german])");
addRule(lines, "A", "", "[^aeiouAEBFIOU]e", "(a|o|Y[german]|D[english])");
addRule(lines, "E", "i[^aeiouAEIOU]", "", "(i|Y[german]|[english])");
addRule(lines, "E", "a[^aeiouAEIOU]", "", "(i|Y[german]|[english])");
addRule(lines, "E", "", "[fklmnprst]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "", "$", "i");
addRule(lines, "E", "[DaoiuAOIUQY]", "", "i");
addRule(lines, "E", "", "[aoAOQY]", "i");
addRule(lines, "E", "", "", "(i|Y[german])");
addRule(lines, "P", "", "", "(o|u)");
addRule(lines, "O", "", "[fklmnprstv]$", "o");
addRule(lines, "O", "", "ts$", "o");
addRule(lines, "O", "", "$", "o");
addRule(lines, "O", "[oeiuQY]", "", "o");
addRule(lines, "O", "", "", "(o|Y[german])");
addRule(lines, "O", "", "", "o");
addRule(lines, "A", "", "[fklmnprst]$", "(a|o)");
addRule(lines, "A", "", "ts$", "(a|o)");
addRule(lines, "A", "", "$", "(a|o)");
addRule(lines, "A", "[oeiuQY]", "", "(a|o)");
addRule(lines, "A", "", "", "(a|o|Y[german])");
addRule(lines, "A", "", "", "(a|o)");
addRule(lines, "U", "", "$", "u");
addRule(lines, "U", "[DoiuQY]", "", "u");
addRule(lines, "U", "", "[^k]$", "u");
addRule(lines, "Uk", "[lr]", "$", "(uk|Qk[german])");
addRule(lines, "Uk", "", "$", "uk");
addRule(lines, "sUts", "", "$", "(suts|sQts[german])");
addRule(lines, "Uts", "", "$", "uts");
addRule(lines, "U", "", "", "(u|Q[german])");
addRule(lines, "U", "", "", "u");
addRule(lines, "e", "", "[fklmnprstv]$", "i");
addRule(lines, "e", "", "ts$", "i");
addRule(lines, "e", "", "$", "i");
addRule(lines, "e", "[DaoiuAOIUQY]", "", "i");
addRule(lines, "e", "", "[aoAOQY]", "i");
addRule(lines, "e", "", "", "(i|Y[german])");
addRule(lines, "a", "", "", "(a|o)");
}
private static void addGenExactApproxCommon(final Map> lines) {
addRule(lines, "h", "", "$", "");
addRule(lines, "b", "", "[fktSs]", "p");
addRule(lines, "b", "", "p", "");
addRule(lines, "b", "", "$", "p");
addRule(lines, "p", "", "[vgdZz]", "b");
addRule(lines, "p", "", "b", "");
addRule(lines, "v", "", "[pktSs]", "f");
addRule(lines, "v", "", "f", "");
addRule(lines, "v", "", "$", "f");
addRule(lines, "f", "", "[vbgdZz]", "v");
addRule(lines, "f", "", "v", "");
addRule(lines, "g", "", "[pftSs]", "k");
addRule(lines, "g", "", "k", "");
addRule(lines, "g", "", "$", "k");
addRule(lines, "k", "", "[vbdZz]", "g");
addRule(lines, "k", "", "g", "");
addRule(lines, "d", "", "[pfkSs]", "t");
addRule(lines, "d", "", "t", "");
addRule(lines, "d", "", "$", "t");
addRule(lines, "t", "", "[vbgZz]", "d");
addRule(lines, "t", "", "d", "");
addRule(lines, "s", "", "dZ", "");
addRule(lines, "s", "", "tS", "");
addRule(lines, "z", "", "[pfkSt]", "s");
addRule(lines, "z", "", "[sSzZ]", "");
addRule(lines, "s", "", "[sSzZ]", "");
addRule(lines, "Z", "", "[sSzZ]", "");
addRule(lines, "S", "", "[sSzZ]", "");
addRule(lines, "jnm", "", "", "jm");
addRule(lines, "ji", "^", "", "i");
addRule(lines, "jI", "^", "", "I");
addRule(lines, "a", "", "[aA]", "");
addRule(lines, "a", "A", "", "");
addRule(lines, "A", "", "A", "");
addRule(lines, "b", "", "b", "");
addRule(lines, "d", "", "d", "");
addRule(lines, "f", "", "f", "");
addRule(lines, "g", "", "g", "");
addRule(lines, "j", "", "j", "");
addRule(lines, "k", "", "k", "");
addRule(lines, "l", "", "l", "");
addRule(lines, "m", "", "m", "");
addRule(lines, "n", "", "n", "");
addRule(lines, "p", "", "p", "");
addRule(lines, "r", "", "r", "");
addRule(lines, "t", "", "t", "");
addRule(lines, "v", "", "v", "");
addRule(lines, "z", "", "z", "");
}
private static void addGenApproxCommon(final Map> lines) {
addRule(lines, "van", "^", "[bp]", "(vam|)");
addRule(lines, "van", "^", "", "(van|)");
addRule(lines, "n", "", "[bp]", "m");
addRule(lines, "h", "", "", "");
addRule(lines, "H", "", "", "(x|)");
addRule(lines, "sen", "[rmnl]", "$", "(zn|zon)");
addRule(lines, "sen", "", "$", "(sn|son)");
addRule(lines, "sEn", "[rmnl]", "$", "(zn|zon)");
addRule(lines, "sEn", "", "$", "(sn|son)");
addRule(lines, "e", "[BbdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "i", "[BbdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "E", "[BbdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "I", "[BbdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "Q", "[BbdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "Y", "[BbdfgklmnprsStvzZ]", "[ln]$", "");
addRule(lines, "e", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", "");
addRule(lines, "i", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", "");
addRule(lines, "E", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", "");
addRule(lines, "I", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", "");
addRule(lines, "Q", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", "");
addRule(lines, "Y", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", "");
addRule(lines, "lEs", "", "", "(lEs|lz)");
addRule(lines, "lE", "[bdfgkmnprStvzZ]", "", "(lE|l)");
addRule(lines, "aue", "", "", "D");
addRule(lines, "oue", "", "", "D");
addRule(lines, "AvE", "", "", "(D|AvE)");
addRule(lines, "Ave", "", "", "(D|Ave)");
addRule(lines, "avE", "", "", "(D|avE)");
addRule(lines, "ave", "", "", "(D|ave)");
addRule(lines, "OvE", "", "", "(D|OvE)");
addRule(lines, "Ove", "", "", "(D|Ove)");
addRule(lines, "ovE", "", "", "(D|ovE)");
addRule(lines, "ove", "", "", "(D|ove)");
addRule(lines, "ea", "", "", "(D|ea)");
addRule(lines, "EA", "", "", "(D|EA)");
addRule(lines, "Ea", "", "", "(D|Ea)");
addRule(lines, "eA", "", "", "(D|eA)");
addRule(lines, "aji", "", "", "D");
addRule(lines, "ajI", "", "", "D");
addRule(lines, "aje", "", "", "D");
addRule(lines, "ajE", "", "", "D");
addRule(lines, "Aji", "", "", "D");
addRule(lines, "AjI", "", "", "D");
addRule(lines, "Aje", "", "", "D");
addRule(lines, "AjE", "", "", "D");
addRule(lines, "oji", "", "", "D");
addRule(lines, "ojI", "", "", "D");
addRule(lines, "oje", "", "", "D");
addRule(lines, "ojE", "", "", "D");
addRule(lines, "Oji", "", "", "D");
addRule(lines, "OjI", "", "", "D");
addRule(lines, "Oje", "", "", "D");
addRule(lines, "OjE", "", "", "D");
addRule(lines, "eji", "", "", "D");
addRule(lines, "ejI", "", "", "D");
addRule(lines, "eje", "", "", "D");
addRule(lines, "ejE", "", "", "D");
addRule(lines, "Eji", "", "", "D");
addRule(lines, "EjI", "", "", "D");
addRule(lines, "Eje", "", "", "D");
addRule(lines, "EjE", "", "", "D");
addRule(lines, "uji", "", "", "D");
addRule(lines, "ujI", "", "", "D");
addRule(lines, "uje", "", "", "D");
addRule(lines, "ujE", "", "", "D");
addRule(lines, "Uji", "", "", "D");
addRule(lines, "UjI", "", "", "D");
addRule(lines, "Uje", "", "", "D");
addRule(lines, "UjE", "", "", "D");
addRule(lines, "iji", "", "", "D");
addRule(lines, "ijI", "", "", "D");
addRule(lines, "ije", "", "", "D");
addRule(lines, "ijE", "", "", "D");
addRule(lines, "Iji", "", "", "D");
addRule(lines, "IjI", "", "", "D");
addRule(lines, "Ije", "", "", "D");
addRule(lines, "IjE", "", "", "D");
addRule(lines, "aja", "", "", "D");
addRule(lines, "ajA", "", "", "D");
addRule(lines, "ajo", "", "", "D");
addRule(lines, "ajO", "", "", "D");
addRule(lines, "aju", "", "", "D");
addRule(lines, "ajU", "", "", "D");
addRule(lines, "Aja", "", "", "D");
addRule(lines, "AjA", "", "", "D");
addRule(lines, "Ajo", "", "", "D");
addRule(lines, "AjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "oja", "", "", "D");
addRule(lines, "ojA", "", "", "D");
addRule(lines, "ojo", "", "", "D");
addRule(lines, "ojO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Oja", "", "", "D");
addRule(lines, "OjA", "", "", "D");
addRule(lines, "Ojo", "", "", "D");
addRule(lines, "OjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "eja", "", "", "D");
addRule(lines, "ejA", "", "", "D");
addRule(lines, "ejo", "", "", "D");
addRule(lines, "ejO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Eja", "", "", "D");
addRule(lines, "EjA", "", "", "D");
addRule(lines, "Ejo", "", "", "D");
addRule(lines, "EjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "uja", "", "", "D");
addRule(lines, "ujA", "", "", "D");
addRule(lines, "ujo", "", "", "D");
addRule(lines, "ujO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Uja", "", "", "D");
addRule(lines, "UjA", "", "", "D");
addRule(lines, "Ujo", "", "", "D");
addRule(lines, "UjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "ija", "", "", "D");
addRule(lines, "ijA", "", "", "D");
addRule(lines, "ijo", "", "", "D");
addRule(lines, "ijO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "Ija", "", "", "D");
addRule(lines, "IjA", "", "", "D");
addRule(lines, "Ijo", "", "", "D");
addRule(lines, "IjO", "", "", "D");
addRule(lines, "Aju", "", "", "D");
addRule(lines, "AjU", "", "", "D");
addRule(lines, "j", "", "", "i");
addRule(lines, "lYndEr", "", "$", "lYnder");
addRule(lines, "lander", "", "$", "lYnder");
addRule(lines, "lAndEr", "", "$", "lYnder");
addRule(lines, "lAnder", "", "$", "lYnder");
addRule(lines, "landEr", "", "$", "lYnder");
addRule(lines, "lender", "", "$", "lYnder");
addRule(lines, "lEndEr", "", "$", "lYnder");
addRule(lines, "lendEr", "", "$", "lYnder");
addRule(lines, "lEnder", "", "$", "lYnder");
addRule(lines, "burk", "", "$", "(burk|berk)");
addRule(lines, "bUrk", "", "$", "(burk|berk)");
addRule(lines, "burg", "", "$", "(burk|berk)");
addRule(lines, "bUrg", "", "$", "(burk|berk)");
addRule(lines, "Burk", "", "$", "(burk|berk)");
addRule(lines, "BUrk", "", "$", "(burk|berk)");
addRule(lines, "Burg", "", "$", "(burk|berk)");
addRule(lines, "BUrg", "", "$", "(burk|berk)");
addRule(lines, "s", "", "[rmnl]", "z");
addRule(lines, "S", "", "[rmnl]", "z");
addRule(lines, "s", "[rmnl]", "", "z");
addRule(lines, "S", "[rmnl]", "", "z");
addRule(lines, "dS", "", "$", "S");
addRule(lines, "dZ", "", "$", "S");
addRule(lines, "Z", "", "$", "S");
addRule(lines, "S", "", "$", "(S|s)");
addRule(lines, "z", "", "$", "(S|s)");
addRule(lines, "S", "", "", "s");
addRule(lines, "dZ", "", "", "z");
addRule(lines, "Z", "", "", "z");
}
private static void addGenApproxArabic(final Map> lines) {
addRule(lines, "1a", "", "", "(D|a)");
addRule(lines, "1i", "", "", "(D|i|e)");
addRule(lines, "1u", "", "", "(D|u|o)");
addRule(lines, "j1", "", "", "(ja|je|jo|ju|j)");
addRule(lines, "1", "", "", "(a|e|i|o|u|)");
addRule(lines, "u", "", "", "(o|u)");
addRule(lines, "i", "", "", "(i|e)");
addRule(lines, "p", "", "$", "p");
addRule(lines, "p", "", "", "(p|b)");
}
private static void addGenApproxRussian(final Map> lines) {
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "[aeiEIou]", "", "i");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "om", "", "[bp]", "(om|im)");
addRule(lines, "on", "", "[dgkstvz]", "(on|in)");
addRule(lines, "em", "", "[bp]", "(im|om)");
addRule(lines, "en", "", "[dgkstvz]", "(in|on)");
addRule(lines, "Em", "", "[bp]", "(im|Ym|om)");
addRule(lines, "En", "", "[dgkstvz]", "(in|Yn|on)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprsStv]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "[DaoiuQ]", "", "i");
addRule(lines, "E", "", "[aoQ]", "i");
addRule(lines, "E", "", "", "(Y|i)");
}
private static void addGenApproxFrench(final Map> lines) {
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "e", "", "", "i");
}
private static void addGenApproxEnglish(final Map> lines) {
addRule(lines, "I", "", "[^aEIeiou]e", "(Q|i|D)");
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "[aEIeiou]", "", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(il|li|lY)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "E", "D[^aeiEIou]", "", "(i|)");
addRule(lines, "e", "D[^aeiEIou]", "", "(i|)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprsStv]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "[DaoiEuQY]", "", "i");
addRule(lines, "E", "", "[aoQY]", "i");
addRule(lines, "E", "", "", "(Y|i)");
addRule(lines, "a", "", "", "(a|o)");
}
private static void addGenApproxGerman(final Map> lines) {
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "[aeiAEIOUouQY]", "", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "", "", "(Q|i)");
addRule(lines, "AU", "", "", "(D|a|u)");
addRule(lines, "aU", "", "", "(D|a|u)");
addRule(lines, "Au", "", "", "(D|a|u)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "OU", "", "", "(D|o|u)");
addRule(lines, "oU", "", "", "(D|o|u)");
addRule(lines, "Ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "Ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "Oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "Ui", "", "", "(D|u|i)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprst]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "", "$", "i");
addRule(lines, "E", "[DaoAOUiuQY]", "", "i");
addRule(lines, "E", "", "[aoAOQY]", "i");
addRule(lines, "E", "", "", "(Y|i)");
addRule(lines, "O", "", "$", "o");
addRule(lines, "O", "", "[fklmnprst]$", "o");
addRule(lines, "O", "", "ts$", "o");
addRule(lines, "O", "[aoAOUeiuQY]", "", "o");
addRule(lines, "O", "", "", "(o|Y)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "A", "", "$", "(a|o)");
addRule(lines, "A", "", "[fklmnprst]$", "(a|o)");
addRule(lines, "A", "", "ts$", "(a|o)");
addRule(lines, "A", "[aoeOUiuQY]", "", "(a|o)");
addRule(lines, "A", "", "", "(a|o|Y)");
addRule(lines, "U", "", "$", "u");
addRule(lines, "U", "[DaoiuUQY]", "", "u");
addRule(lines, "U", "", "[^k]$", "u");
addRule(lines, "Uk", "[lr]", "$", "(uk|Qk)");
addRule(lines, "Uk", "", "$", "uk");
addRule(lines, "sUts", "", "$", "(suts|sQts)");
addRule(lines, "Uts", "", "$", "uts");
addRule(lines, "U", "", "", "(u|Q)");
}
private static void addGenApproxGreekLatin(final Map> lines) {
addRule(lines, "N", "", "", "");
}
private static void addGenApproxPolish(final Map> lines) {
addRule(lines, "aiB", "", "[bp]", "(D|Dm)");
addRule(lines, "oiB", "", "[bp]", "(D|Dm)");
addRule(lines, "uiB", "", "[bp]", "(D|Dm)");
addRule(lines, "eiB", "", "[bp]", "(D|Dm)");
addRule(lines, "EiB", "", "[bp]", "(D|Dm)");
addRule(lines, "iiB", "", "[bp]", "(D|Dm)");
addRule(lines, "IiB", "", "[bp]", "(D|Dm)");
addRule(lines, "aiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "oiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "uiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "eiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "EiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "iiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "IiB", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "B", "", "[bp]", "(o|om|im)");
addRule(lines, "B", "", "[dgkstvz]", "(o|on|in)");
addRule(lines, "B", "", "", "o");
addRule(lines, "aiF", "", "[bp]", "(D|Dm)");
addRule(lines, "oiF", "", "[bp]", "(D|Dm)");
addRule(lines, "uiF", "", "[bp]", "(D|Dm)");
addRule(lines, "eiF", "", "[bp]", "(D|Dm)");
addRule(lines, "EiF", "", "[bp]", "(D|Dm)");
addRule(lines, "iiF", "", "[bp]", "(D|Dm)");
addRule(lines, "IiF", "", "[bp]", "(D|Dm)");
addRule(lines, "aiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "oiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "uiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "eiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "EiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "iiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "IiF", "", "[dgkstvz]", "(D|Dn)");
addRule(lines, "F", "", "[bp]", "(i|im|om)");
addRule(lines, "F", "", "[dgkstvz]", "(i|in|on)");
addRule(lines, "F", "", "", "i");
addRule(lines, "P", "", "", "(o|u)");
addRule(lines, "I", "", "$", "i");
addRule(lines, "I", "", "[^k]$", "i");
addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)");
addRule(lines, "Ik", "", "$", "ik");
addRule(lines, "sIts", "", "$", "(sits|sQts)");
addRule(lines, "Its", "", "$", "its");
addRule(lines, "I", "[aeiAEBFIou]", "", "i");
addRule(lines, "I", "", "", "(i|Q)");
addRule(lines, "au", "", "", "(D|a|u)");
addRule(lines, "ou", "", "", "(D|o|u)");
addRule(lines, "ai", "", "", "(D|a|i)");
addRule(lines, "oi", "", "", "(D|o|i)");
addRule(lines, "ui", "", "", "(D|u|i)");
addRule(lines, "a", "", "", "(a|o)");
addRule(lines, "e", "", "", "i");
addRule(lines, "E", "", "[fklmnprst]$", "i");
addRule(lines, "E", "", "ts$", "i");
addRule(lines, "E", "", "$", "i");
addRule(lines, "E", "[DaoiuQ]", "", "i");
addRule(lines, "E", "", "[aoQ]", "i");
addRule(lines, "E", "", "", "(Y|i)");
}
private static void addGenApproxSpanish(final Map> lines) {
addRule(lines, "B", "", "", "(b|v)");
addRule(lines, "V", "", "", "(b|v)");
}
private static void addGenExactAny(final Map> lines) {
addRule(lines, "EE", "", "$", "e");
addRule(lines, "A", "", "", "a");
addRule(lines, "E", "", "", "e");
addRule(lines, "I", "", "", "i");
addRule(lines, "O", "", "", "o");
addRule(lines, "P", "", "", "o");
addRule(lines, "U", "", "", "u");
addRule(lines, "B", "", "[fktSs]", "p");
addRule(lines, "B", "", "p", "");
addRule(lines, "B", "", "$", "p");
addRule(lines, "V", "", "[pktSs]", "f");
addRule(lines, "V", "", "f", "");
addRule(lines, "V", "", "$", "f");
addRule(lines, "B", "", "", "b");
addRule(lines, "V", "", "", "v");
}
private static void addGenExactArabic(final Map> lines) {
addRule(lines, "1", "", "", "");
}
private static void addGenExactCommon(final Map> lines) {
addRule(lines, "H", "", "", "");
addRule(lines, "s", "[^t]", "[bgZd]", "z");
addRule(lines, "Z", "", "[pfkst]", "S");
addRule(lines, "Z", "", "$", "S");
addRule(lines, "S", "", "[bgzd]", "Z");
addRule(lines, "z", "", "$", "s");
addRule(lines, "ji", "[aAoOeEiIuU]", "", "j");
addRule(lines, "jI", "[aAoOeEiIuU]", "", "j");
addRule(lines, "je", "[aAoOeEiIuU]", "", "j");
addRule(lines, "jE", "[aAoOeEiIuU]", "", "j");
}
private static void addGenExactRussian(final Map> lines) {
addRule(lines, "E", "", "", "e");
addRule(lines, "I", "", "", "i");
}
private static void addGenExactGreeklatin(final Map> lines) {
addRule(lines, "N", "", "", "n");
}
private static void addGenExactPolish(final Map> lines) {
addRule(lines, "B", "", "", "a");
addRule(lines, "F", "", "", "e");
addRule(lines, "P", "", "", "o");
addRule(lines, "E", "", "", "e");
addRule(lines, "I", "", "", "i");
}
private static void addGenExactSpanish(final Map> lines) {
addRule(lines, "B", "", "", "b");
addRule(lines, "V", "", "", "v");
}
private static void addGenRulesAny(final Map> lines) {
addRule(lines, "yna", "", "$", "(in[russian]|ina)");
addRule(lines, "ina", "", "$", "(in[russian]|ina)");
addRule(lines, "liova", "", "$", "(lova|lof[russian]|lef[russian])");
addRule(lines, "lova", "", "$", "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])");
addRule(lines, "kova", "", "$", "(kova|kof[russian]|k[czech]|ek[czech])");
addRule(lines, "ova", "", "$", "(ova|of[russian]|[czech])");
addRule(lines, "ová", "", "$", "(ova|[czech])");
addRule(lines, "eva", "", "$", "(eva|ef[russian])");
addRule(lines, "aia", "", "$", "(aja|i[russian])");
addRule(lines, "aja", "", "$", "(aja|i[russian])");
addRule(lines, "aya", "", "$", "(aja|i[russian])");
addRule(lines, "lowa", "", "$", "(lova|lof[polish]|l[polish]|el[polish])");
addRule(lines, "kowa", "", "$", "(kova|kof[polish]|k[polish]|ek[polish])");
addRule(lines, "owa", "", "$", "(ova|of[polish]|)");
addRule(lines, "lowna", "", "$", "(lovna|levna|l[polish]|el[polish])");
addRule(lines, "kowna", "", "$", "(kovna|k[polish]|ek[polish])");
addRule(lines, "owna", "", "$", "(ovna|[polish])");
addRule(lines, "lówna", "", "$", "(l|el)");
addRule(lines, "kówna", "", "$", "(k|ek)");
addRule(lines, "ówna", "", "$", "");
addRule(lines, "á", "", "$", "(a|i[czech])");
addRule(lines, "a", "", "$", "(a|i[polish+czech])");
addRule(lines, "pf", "", "", "(pf|p|f)");
addRule(lines, "que", "", "$", "(k[french]|ke|kve)");
addRule(lines, "qu", "", "", "(kv|k)");
addRule(lines, "m", "", "[bfpv]", "(m|n)");
addRule(lines, "m", "[aeiouy]", "[aeiouy]", "m");
addRule(lines, "m", "[aeiouy]", "", "(m|n[french+portuguese])");
addRule(lines, "ly", "", "[au]", "l");
addRule(lines, "li", "", "[au]", "l");
addRule(lines, "lio", "", "", "(lo|le[russian])");
addRule(lines, "lyo", "", "", "(lo|le[russian])");
addRule(lines, "lt", "u", "$", "(lt|[french])");
addRule(lines, "v", "^", "", "(v|f[german]|b[spanish])");
addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez[portuguese]|eS[portuguese]|eks|egz)");
addRule(lines, "ex", "", "[cs]", "(e[portuguese]|ek)");
addRule(lines, "x", "u", "$", "(ks|[french])");
addRule(lines, "ck", "", "", "(k|tsk[polish+czech])");
addRule(lines, "cz", "", "", "(tS|tsz[czech])");
addRule(lines, "rh", "^", "", "r");
addRule(lines, "dh", "^", "", "d");
addRule(lines, "bh", "^", "", "b");
addRule(lines, "ph", "", "", "(ph|f)");
addRule(lines, "kh", "", "", "(x[russian+english]|kh)");
addRule(lines, "lh", "", "", "(lh|l[portuguese])");
addRule(lines, "nh", "", "", "(nh|nj[portuguese])");
addRule(lines, "ssch", "", "", "S");
addRule(lines, "chsch", "", "", "xS");
addRule(lines, "tsch", "", "", "tS");
addRule(lines, "sch", "[aeiouy]", "[ei]", "(S|StS[russian]|sk[romanian+italian])");
addRule(lines, "sch", "[aeiouy]", "", "(S|StS[russian])");
addRule(lines, "sch", "", "[ei]", "(sk[romanian+italian]|S|StS[russian])");
addRule(lines, "sch", "", "", "(S|StS[russian])");
addRule(lines, "ssh", "", "", "S");
addRule(lines, "sh", "", "[äöü]", "sh");
addRule(lines, "sh", "", "[aeiou]", "(S[russian+english]|sh)");
addRule(lines, "sh", "", "", "S");
addRule(lines, "zh", "", "", "(Z[english+russian]|zh|tsh[german])");
addRule(lines, "chs", "", "", "(ks[german]|xs|tSs[russian+english])");
addRule(lines, "ch", "", "[ei]", "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])");
addRule(lines, "ch", "", "", "(x|tS[spanish+english+russian]|S[portuguese+french])");
addRule(lines, "th", "^", "", "t");
addRule(lines, "th", "", "[äöüaeiou]", "(t[english+german+greeklatin]|th)");
addRule(lines, "th", "", "", "t");
addRule(lines, "gh", "", "[ei]", "(g[romanian+italian+greeklatin]|gh)");
addRule(lines, "ouh", "", "[aioe]", "(v[french]|uh)");
addRule(lines, "uh", "", "[aioe]", "(v|uh)");
addRule(lines, "h", ".", "$", "");
addRule(lines, "h", "[aeiouyäöü]", "", "");
addRule(lines, "h", "^", "", "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])");
addRule(lines, "cia", "", "", "(tSa[polish]|tsa)");
addRule(lines, "cią", "", "[bp]", "(tSom|tsom)");
addRule(lines, "cią", "", "", "(tSon[polish]|tson)");
addRule(lines, "cię", "", "[bp]", "(tSem[polish]|tsem)");
addRule(lines, "cię", "", "", "(tSen[polish]|tsen)");
addRule(lines, "cie", "", "", "(tSe[polish]|tse)");
addRule(lines, "cio", "", "", "(tSo[polish]|tso)");
addRule(lines, "ciu", "", "", "(tSu[polish]|tsu)");
addRule(lines, "sci", "", "$", "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)");
addRule(lines, "sc", "", "[ei]", "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)");
addRule(lines, "ci", "", "$", "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)");
addRule(lines, "cy", "", "", "(si|tsi[polish])");
addRule(lines, "c", "", "[ei]", "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)");
addRule(lines, "sç", "", "[aeiou]", "(s|stS[turkish])");
addRule(lines, "ssz", "", "", "S");
addRule(lines, "sz", "^", "", "(S|s[hungarian])");
addRule(lines, "sz", "", "$", "(S|s[hungarian])");
addRule(lines, "sz", "", "", "(S|s[hungarian]|sts[german])");
addRule(lines, "ssp", "", "", "(Sp[german]|sp)");
addRule(lines, "sp", "", "", "(Sp[german]|sp)");
addRule(lines, "sst", "", "", "(St[german]|st)");
addRule(lines, "st", "", "", "(St[german]|st)");
addRule(lines, "ss", "", "", "s");
addRule(lines, "sj", "^", "", "S");
addRule(lines, "sj", "", "$", "S");
addRule(lines, "sj", "", "", "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])");
addRule(lines, "sia", "", "", "(Sa[polish]|sa[polish]|sja)");
addRule(lines, "sią", "", "[bp]", "(Som[polish]|som)");
addRule(lines, "sią", "", "", "(Son[polish]|son)");
addRule(lines, "się", "", "[bp]", "(Sem[polish]|sem)");
addRule(lines, "się", "", "", "(Sen[polish]|sen)");
addRule(lines, "sie", "", "", "(se|sje|Se[polish]|zi[german])");
addRule(lines, "sio", "", "", "(So[polish]|so)");
addRule(lines, "siu", "", "", "(Su[polish]|sju)");
addRule(lines, "si", "[äöëaáuiíoóeéêy]", "", "(Si[polish]|si|zi[portuguese+french+italian+german])");
addRule(lines, "si", "", "", "(Si[polish]|si|zi[german])");
addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuíoóeéêy]", "(s|z[portuguese+french+italian+german])");
addRule(lines, "s", "", "[aeouäöë]", "(s|z[german])");
addRule(lines, "s", "[aeiouy]", "[dglmnrv]", "(s|z|Z[portuguese]|[french])");
addRule(lines, "s", "", "[dglmnrv]", "(s|z|Z[portuguese])");
addRule(lines, "gue", "", "$", "(k[french]|gve)");
addRule(lines, "gu", "", "[ei]", "(g[french]|gv[portuguese+spanish])");
addRule(lines, "gu", "", "[ao]", "gv");
addRule(lines, "guy", "", "", "gi");
addRule(lines, "gli", "", "", "(glI|l[italian])");
addRule(lines, "gni", "", "", "(gnI|ni[italian+french])");
addRule(lines, "gn", "", "[aeou]", "(n[italian+french]|nj[italian+french]|gn)");
addRule(lines, "ggie", "", "", "(je[greeklatin]|dZe)");
addRule(lines, "ggi", "", "[aou]", "(j[greeklatin]|dZ)");
addRule(lines, "ggi", "[yaeiou]", "[aou]", "(gI|dZ[italian]|j[greeklatin])");
addRule(lines, "gge", "[yaeiou]", "", "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])");
addRule(lines, "ggi", "[yaeiou]", "", "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])");
addRule(lines, "ggi", "", "[aou]", "(gI|dZ[italian]|j[greeklatin])");
addRule(lines, "gie", "", "$", "(ge|gi[german]|ji[french]|dZe[italian])");
addRule(lines, "gie", "", "", "(ge|gi[german]|dZe[italian]|je[greeklatin])");
addRule(lines, "gi", "", "[aou]", "(i[greeklatin]|dZ)");
addRule(lines, "ge", "[yaeiou]", "", "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])");
addRule(lines, "gi", "[yaeiou]", "", "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])");
addRule(lines, "ge", "", "", "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])");
addRule(lines, "gi", "", "", "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])");
addRule(lines, "gy", "", "[aeouáéóúüöőű]", "(gi|dj[hungarian])");
addRule(lines, "gy", "", "", "(gi|d[hungarian])");
addRule(lines, "g", "[yaeiou]", "[aouyei]", "g");
addRule(lines, "g", "", "[aouei]", "(g|h[russian])");
addRule(lines, "ij", "", "", "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])");
addRule(lines, "j", "", "[aoeiuy]", "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])");
addRule(lines, "rz", "t", "", "(S[polish]|r)");
addRule(lines, "rz", "", "", "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])");
addRule(lines, "tz", "", "$", "(ts|tS[english+german])");
addRule(lines, "tz", "^", "", "(ts[english+german+russian]|tS[english+german])");
addRule(lines, "tz", "", "", "(ts[english+german+russian]|tz)");
addRule(lines, "zia", "", "[bcdgkpstwzż]", "(Za[polish]|za[polish]|zja)");
addRule(lines, "zia", "", "", "(Za[polish]|zja)");
addRule(lines, "zią", "", "[bp]", "(Zom[polish]|zom)");
addRule(lines, "zią", "", "", "(Zon[polish]|zon)");
addRule(lines, "zię", "", "[bp]", "(Zem[polish]|zem)");
addRule(lines, "zię", "", "", "(Zen[polish]|zen)");
addRule(lines, "zie", "", "[bcdgkpstwzż]", "(Ze[polish]|ze[polish]|ze|tsi[german])");
addRule(lines, "zie", "", "", "(ze|Ze[polish]|tsi[german])");
addRule(lines, "zio", "", "", "(Zo[polish]|zo)");
addRule(lines, "ziu", "", "", "(Zu[polish]|zju)");
addRule(lines, "zi", "", "", "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])");
addRule(lines, "z", "", "$", "(s|ts[german]|ts[italian]|S[portuguese])");
addRule(lines, "z", "", "[bdgv]", "(z|dz[italian]|Z[portuguese])");
addRule(lines, "z", "", "[ptckf]", "(s|ts[italian]|S[portuguese])");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "oue", "", "", "(oue|ve[french])");
addRule(lines, "eau", "", "", "o");
addRule(lines, "ae", "", "", "(Y[german]|aje[russian]|ae)");
addRule(lines, "ai", "", "", "aj");
addRule(lines, "au", "", "", "(au|o[french])");
addRule(lines, "ay", "", "", "aj");
addRule(lines, "ão", "", "", "(au|an)");
addRule(lines, "ãe", "", "", "(aj|an)");
addRule(lines, "ãi", "", "", "(aj|an)");
addRule(lines, "ea", "", "", "(ea|ja[romanian])");
addRule(lines, "ee", "", "", "(i[english]|aje[russian]|e)");
addRule(lines, "ei", "", "", "(aj|ej)");
addRule(lines, "eu", "", "", "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])");
addRule(lines, "ey", "", "", "(aj|ej)");
addRule(lines, "ia", "", "", "ja");
addRule(lines, "ie", "", "", "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)");
addRule(lines, "ii", "", "$", "i");
addRule(lines, "io", "", "", "(jo|e[russian])");
addRule(lines, "iu", "", "", "ju");
addRule(lines, "iy", "", "$", "i");
addRule(lines, "oe", "", "", "(Y[german]|oje[russian]|u[dutch]|oe)");
addRule(lines, "oi", "", "", "oj");
addRule(lines, "oo", "", "", "(u[english]|o)");
addRule(lines, "ou", "", "", "(ou|u[french+greeklatin]|au[dutch])");
addRule(lines, "où", "", "", "u");
addRule(lines, "oy", "", "", "oj");
addRule(lines, "õe", "", "", "(oj|on)");
addRule(lines, "ua", "", "", "va");
addRule(lines, "ue", "", "", "(Q[german]|uje[russian]|ve)");
addRule(lines, "ui", "", "", "(uj|vi|Y[dutch])");
addRule(lines, "uu", "", "", "(u|Q[dutch])");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "uy", "", "", "uj");
addRule(lines, "ya", "", "", "ja");
addRule(lines, "ye", "", "", "(je|ije[russian])");
addRule(lines, "yi", "^", "", "i");
addRule(lines, "yi", "", "$", "i");
addRule(lines, "yo", "", "", "(jo|e[russian])");
addRule(lines, "yu", "", "", "ju");
addRule(lines, "yy", "", "$", "i");
addRule(lines, "i", "[áóéê]", "", "j");
addRule(lines, "y", "[áóéê]", "", "j");
addRule(lines, "e", "^", "", "(e|je[russian])");
addRule(lines, "e", "", "$", "(e|EE[english+french])");
addRule(lines, "ą", "", "[bp]", "om");
addRule(lines, "ą", "", "", "on");
addRule(lines, "ä", "", "", "(Y|e)");
addRule(lines, "á", "", "", "a");
addRule(lines, "à", "", "", "a");
addRule(lines, "â", "", "", "a");
addRule(lines, "ã", "", "", "(a|an)");
addRule(lines, "ă", "", "", "(e[romanian]|a)");
addRule(lines, "č", "", "", "tS");
addRule(lines, "ć", "", "", "(tS[polish]|ts)");
addRule(lines, "ç", "", "", "(s|tS[turkish])");
addRule(lines, "ď", "", "", "(d|dj[czech])");
addRule(lines, "ę", "", "[bp]", "em");
addRule(lines, "ę", "", "", "en");
addRule(lines, "é", "", "", "e");
addRule(lines, "è", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "ě", "", "", "(e|je[czech])");
addRule(lines, "ğ", "", "", "");
addRule(lines, "í", "", "", "i");
addRule(lines, "î", "", "", "i");
addRule(lines, "ı", "", "", "(i|e[turkish]|[turkish])");
addRule(lines, "ł", "", "", "l");
addRule(lines, "ń", "", "", "(n|nj[polish])");
addRule(lines, "ñ", "", "", "(n|nj[spanish])");
addRule(lines, "ó", "", "", "(u[polish]|o)");
addRule(lines, "ô", "", "", "o");
addRule(lines, "õ", "", "", "(o|on[portuguese]|Y[hungarian])");
addRule(lines, "ò", "", "", "o");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ř", "", "", "(r|rZ[czech])");
addRule(lines, "ś", "", "", "(S[polish]|s)");
addRule(lines, "ş", "", "", "S");
addRule(lines, "š", "", "", "S");
addRule(lines, "ţ", "", "", "ts");
addRule(lines, "ť", "", "", "(t|tj[czech])");
addRule(lines, "ű", "", "", "Q");
addRule(lines, "ü", "", "", "(Q|u[portuguese+spanish])");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ů", "", "", "u");
addRule(lines, "ù", "", "", "u");
addRule(lines, "ý", "", "", "i");
addRule(lines, "ż", "", "", "Z");
addRule(lines, "ź", "", "", "(Z[polish]|z)");
addRule(lines, "ß", "", "", "s");
addRule(lines, "'", "", "", "");
addRule(lines, "\"", "", "", "");
addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "(O|P[polish])");
addRule(lines, "a", "", "", "A");
addRule(lines, "b", "", "", "B");
addRule(lines, "c", "", "", "(k|ts[polish+czech]|dZ[turkish])");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "(h|x[romanian]|H[french+portuguese+italian+spanish])");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "(j|x[spanish]|Z[french+romanian+turkish+portuguese])");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "O");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "(s|S[portuguese])");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "U");
addRule(lines, "v", "", "", "V");
addRule(lines, "w", "", "", "(v|w[english+dutch])");
addRule(lines, "x", "", "", "(ks|gz|S[portuguese+spanish])");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])");
}
private static void addGenRulesArabic(final Map> lines) {
addRule(lines, "ا", "", "", "a");
addRule(lines, "ب", "", "$", "b");
addRule(lines, "ب", "", "", "b1");
addRule(lines, "ت", "", "$", "t");
addRule(lines, "ت", "", "", "t1");
addRule(lines, "ث", "", "$", "t");
addRule(lines, "ث", "", "", "t1");
addRule(lines, "ج", "", "$", "(dZ|Z)");
addRule(lines, "ج", "", "", "(dZ1|Z1)");
addRule(lines, "ح", "^", "", "1");
addRule(lines, "ح", "", "$", "1");
addRule(lines, "ح", "", "", "(h1|1)");
addRule(lines, "خ", "", "$", "x");
addRule(lines, "خ", "", "", "x1");
addRule(lines, "د", "", "$", "d");
addRule(lines, "د", "", "", "d1");
addRule(lines, "ذ", "", "$", "d");
addRule(lines, "ذ", "", "", "d1");
addRule(lines, "ر", "", "$", "r");
addRule(lines, "ر", "", "", "r1");
addRule(lines, "ز", "", "$", "z");
addRule(lines, "ز", "", "", "z1");
addRule(lines, "س", "", "$", "s");
addRule(lines, "س", "", "", "s1");
addRule(lines, "ش", "", "$", "S");
addRule(lines, "ش", "", "", "S1");
addRule(lines, "ص", "", "$", "s");
addRule(lines, "ص", "", "", "s1");
addRule(lines, "ض", "", "$", "d");
addRule(lines, "ض", "", "", "d1");
addRule(lines, "ط", "", "$", "t");
addRule(lines, "ط", "", "", "t1");
addRule(lines, "ظ", "", "$", "z");
addRule(lines, "ظ", "", "", "z1");
addRule(lines, "ع", "^", "", "1");
addRule(lines, "ع", "", "$", "1");
addRule(lines, "ع", "", "", "(h1|1)");
addRule(lines, "غ", "", "$", "g");
addRule(lines, "غ", "", "", "g1");
addRule(lines, "ف", "", "$", "f");
addRule(lines, "ف", "", "", "f1");
addRule(lines, "ق", "", "$", "k");
addRule(lines, "ق", "", "", "k1");
addRule(lines, "ك", "", "$", "k");
addRule(lines, "ك", "", "", "k1");
addRule(lines, "ل", "", "$", "l");
addRule(lines, "ل", "", "", "l1");
addRule(lines, "م", "", "$", "m");
addRule(lines, "م", "", "", "m1");
addRule(lines, "ن", "", "$", "n");
addRule(lines, "ن", "", "", "n1");
addRule(lines, "ه", "^", "", "1");
addRule(lines, "ه", "", "$", "1");
addRule(lines, "ه", "", "", "(h1|1)");
addRule(lines, "و", "", "$", "(u|v)");
addRule(lines, "و", "", "", "(u|v1)");
addRule(lines, "ي", "", "$", "(i|j)");
addRule(lines, "ي", "", "", "(i|j1)");
}
private static void addGenRulesCyrillic(final Map> lines) {
addRule(lines, "ця", "", "", "tsa");
addRule(lines, "цю", "", "", "tsu");
addRule(lines, "циа", "", "", "tsa");
addRule(lines, "цие", "", "", "tse");
addRule(lines, "цио", "", "", "tso");
addRule(lines, "циу", "", "", "tsu");
addRule(lines, "сие", "", "", "se");
addRule(lines, "сио", "", "", "so");
addRule(lines, "зие", "", "", "ze");
addRule(lines, "зио", "", "", "zo");
addRule(lines, "с", "", "с", "");
addRule(lines, "гауз", "", "$", "haus");
addRule(lines, "гаус", "", "$", "haus");
addRule(lines, "гольц", "", "$", "holts");
addRule(lines, "геймер", "", "$", "(hejmer|hajmer)");
addRule(lines, "гейм", "", "$", "(hejm|hajm)");
addRule(lines, "гоф", "", "$", "hof");
addRule(lines, "гер", "", "$", "ger");
addRule(lines, "ген", "", "$", "gen");
addRule(lines, "гин", "", "$", "gin");
addRule(lines, "г", "(й|ё|я|ю|ы|а|е|о|и|у)", "(а|е|о|и|у)", "g");
addRule(lines, "г", "", "(а|е|о|и|у)", "(g|h)");
addRule(lines, "ля", "", "", "la");
addRule(lines, "лю", "", "", "lu");
addRule(lines, "лё", "", "", "(le|lo)");
addRule(lines, "лио", "", "", "(le|lo)");
addRule(lines, "ле", "", "", "(lE|lo)");
addRule(lines, "ийе", "", "", "je");
addRule(lines, "ие", "", "", "je");
addRule(lines, "ыйе", "", "", "je");
addRule(lines, "ые", "", "", "je");
addRule(lines, "ий", "", "(а|о|у)", "j");
addRule(lines, "ый", "", "(а|о|у)", "j");
addRule(lines, "ий", "", "$", "i");
addRule(lines, "ый", "", "$", "i");
addRule(lines, "ей", "^", "", "(jej|ej)");
addRule(lines, "е", "(а|е|о|у)", "", "je");
addRule(lines, "е", "^", "", "je");
addRule(lines, "эй", "", "", "ej");
addRule(lines, "ей", "", "", "ej");
addRule(lines, "ауе", "", "", "aue");
addRule(lines, "ауэ", "", "", "aue");
addRule(lines, "а", "", "", "a");
addRule(lines, "б", "", "", "b");
addRule(lines, "в", "", "", "v");
addRule(lines, "г", "", "", "g");
addRule(lines, "д", "", "", "d");
addRule(lines, "е", "", "", "E");
addRule(lines, "ё", "", "", "(e|jo)");
addRule(lines, "ж", "", "", "Z");
addRule(lines, "з", "", "", "z");
addRule(lines, "и", "", "", "I");
addRule(lines, "й", "", "", "j");
addRule(lines, "к", "", "", "k");
addRule(lines, "л", "", "", "l");
addRule(lines, "м", "", "", "m");
addRule(lines, "н", "", "", "n");
addRule(lines, "о", "", "", "o");
addRule(lines, "п", "", "", "p");
addRule(lines, "р", "", "", "r");
addRule(lines, "с", "", "", "s");
addRule(lines, "т", "", "", "t");
addRule(lines, "у", "", "", "u");
addRule(lines, "ф", "", "", "f");
addRule(lines, "х", "", "", "x");
addRule(lines, "ц", "", "", "ts");
addRule(lines, "ч", "", "", "tS");
addRule(lines, "ш", "", "", "S");
addRule(lines, "щ", "", "", "StS");
addRule(lines, "ъ", "", "", "");
addRule(lines, "ы", "", "", "I");
addRule(lines, "ь", "", "", "");
addRule(lines, "э", "", "", "E");
addRule(lines, "ю", "", "", "ju");
addRule(lines, "я", "", "", "ja");
}
private static void addGenRulesCzech(final Map> lines) {
addRule(lines, "ch", "", "", "x");
addRule(lines, "qu", "", "", "(k|kv)");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "ei", "", "", "(ej|aj)");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "č", "", "", "tS");
addRule(lines, "š", "", "", "S");
addRule(lines, "ň", "", "", "n");
addRule(lines, "ť", "", "", "(t|tj)");
addRule(lines, "ď", "", "", "(d|dj)");
addRule(lines, "ř", "", "", "(r|rZ)");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ý", "", "", "i");
addRule(lines, "ě", "", "", "(e|je)");
addRule(lines, "ů", "", "", "u");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "ts");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "(h|g)");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "(k|kv)");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesDutch(final Map> lines) {
addRule(lines, "ssj", "", "", "S");
addRule(lines, "sj", "", "", "S");
addRule(lines, "ch", "", "", "x");
addRule(lines, "c", "", "[eiy]", "ts");
addRule(lines, "ck", "", "", "k");
addRule(lines, "pf", "", "", "(pf|p|f)");
addRule(lines, "ph", "", "", "(ph|f)");
addRule(lines, "qu", "", "", "kv");
addRule(lines, "th", "^", "", "t");
addRule(lines, "th", "", "[äöüaeiou]", "(t|th)");
addRule(lines, "th", "", "", "t");
addRule(lines, "ss", "", "", "s");
addRule(lines, "h", "[aeiouy]", "", "");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "ou", "", "", "au");
addRule(lines, "ie", "", "", "(Q|i)");
addRule(lines, "uu", "", "", "(Q|u)");
addRule(lines, "ee", "", "", "e");
addRule(lines, "eu", "", "", "(Y|Yj)");
addRule(lines, "aa", "", "", "a");
addRule(lines, "oo", "", "", "o");
addRule(lines, "oe", "", "", "u");
addRule(lines, "ij", "", "", "ej");
addRule(lines, "ui", "", "", "(Y|uj)");
addRule(lines, "ei", "", "", "(ej|aj)");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aeou]", "j");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "y", "[aeou]", "", "j");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "(g|x)");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "(i|Q)");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "(u|Q)");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "(w|v)");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesEnglish(final Map> lines) {
addRule(lines, "�", "", "", "");
addRule(lines, "'", "", "", "");
addRule(lines, "mc", "^", "", "mak");
addRule(lines, "tz", "", "", "ts");
addRule(lines, "tch", "", "", "tS");
addRule(lines, "ch", "", "", "(tS|x)");
addRule(lines, "ck", "", "", "k");
addRule(lines, "cc", "", "[iey]", "ks");
addRule(lines, "c", "", "c", "");
addRule(lines, "c", "", "[iey]", "s");
addRule(lines, "gh", "^", "", "g");
addRule(lines, "gh", "", "", "(g|f|w)");
addRule(lines, "gn", "", "", "(gn|n)");
addRule(lines, "g", "", "[iey]", "(g|dZ)");
addRule(lines, "th", "", "", "t");
addRule(lines, "kh", "", "", "x");
addRule(lines, "ph", "", "", "f");
addRule(lines, "sch", "", "", "(S|sk)");
addRule(lines, "sh", "", "", "S");
addRule(lines, "who", "^", "", "hu");
addRule(lines, "wh", "^", "", "w");
addRule(lines, "h", "", "$", "");
addRule(lines, "h", "", "[^aeiou]", "");
addRule(lines, "h", "^", "", "H");
addRule(lines, "kn", "^", "", "n");
addRule(lines, "mb", "", "$", "m");
addRule(lines, "ng", "", "$", "(N|ng)");
addRule(lines, "pn", "^", "", "(pn|n)");
addRule(lines, "ps", "^", "", "(ps|s)");
addRule(lines, "qu", "", "", "kw");
addRule(lines, "tia", "", "", "(So|Sa)");
addRule(lines, "tio", "", "", "So");
addRule(lines, "wr", "^", "", "r");
addRule(lines, "x", "^", "", "z");
addRule(lines, "y", "^", "", "j");
addRule(lines, "y", "^", "[aeiouy]", "j");
addRule(lines, "yi", "^", "", "i");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "oue", "", "", "(aue|oue)");
addRule(lines, "ai", "", "", "(aj|ej|e)");
addRule(lines, "ay", "", "", "(aj|ej)");
addRule(lines, "a", "", "[^aeiou]e", "ej");
addRule(lines, "ei", "", "", "(ej|aj|i)");
addRule(lines, "ey", "", "", "(ej|aj|i)");
addRule(lines, "ear", "", "", "ia");
addRule(lines, "ea", "", "", "(i|e)");
addRule(lines, "ee", "", "", "i");
addRule(lines, "e", "", "[^aeiou]e", "i");
addRule(lines, "e", "", "$", "(|E)");
addRule(lines, "ie", "", "", "i");
addRule(lines, "i", "", "[^aeiou]e", "aj");
addRule(lines, "oa", "", "", "ou");
addRule(lines, "oi", "", "", "oj");
addRule(lines, "oo", "", "", "u");
addRule(lines, "ou", "", "", "(u|ou)");
addRule(lines, "oy", "", "", "oj");
addRule(lines, "o", "", "[^aeiou]e", "ou");
addRule(lines, "u", "", "[^aeiou]e", "(ju|u)");
addRule(lines, "u", "", "r", "(e|u)");
addRule(lines, "a", "", "", "(e|o|a)");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "dZ");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "(o|a)");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "(u|a)");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "(w|v)");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesFrench(final Map> lines) {
addRule(lines, "lt", "u", "$", "(lt|)");
addRule(lines, "c", "n", "$", "(k|)");
addRule(lines, "d", "", "$", "(t|)");
addRule(lines, "g", "n", "$", "(k|)");
addRule(lines, "p", "", "$", "(p|)");
addRule(lines, "r", "e", "$", "(r|)");
addRule(lines, "t", "", "$", "(t|)");
addRule(lines, "z", "", "$", "(s|)");
addRule(lines, "ds", "", "$", "(ds|)");
addRule(lines, "ps", "", "$", "(ps|)");
addRule(lines, "rs", "e", "$", "(rs|)");
addRule(lines, "ts", "", "$", "(ts|)");
addRule(lines, "s", "", "$", "(s|)");
addRule(lines, "x", "u", "$", "(ks|)");
addRule(lines, "s", "[aeéèêiou]", "[^aeéèêiou]", "(s|)");
addRule(lines, "t", "[aeéèêiou]", "[^aeéèêiou]", "(t|)");
addRule(lines, "kh", "", "", "x");
addRule(lines, "ph", "", "", "f");
addRule(lines, "ç", "", "", "s");
addRule(lines, "x", "", "", "ks");
addRule(lines, "ch", "", "", "S");
addRule(lines, "c", "", "[eiyéèê]", "s");
addRule(lines, "gn", "", "", "(n|gn)");
addRule(lines, "g", "", "[eiy]", "Z");
addRule(lines, "gue", "", "$", "k");
addRule(lines, "gu", "", "[eiy]", "g");
addRule(lines, "aill", "", "e", "aj");
addRule(lines, "ll", "", "e", "(l|j)");
addRule(lines, "que", "", "$", "k");
addRule(lines, "qu", "", "", "k");
addRule(lines, "s", "[aeiouyéèê]", "[aeiouyéèê]", "z");
addRule(lines, "h", "[bdgt]", "", "");
addRule(lines, "m", "[aeiouy]", "[aeiouy]", "m");
addRule(lines, "m", "[aeiouy]", "", "(m|n)");
addRule(lines, "ou", "", "[aeio]", "v");
addRule(lines, "u", "", "[aeio]", "v");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "eau", "", "", "o");
addRule(lines, "au", "", "", "(o|au)");
addRule(lines, "ai", "", "", "(e|aj)");
addRule(lines, "ay", "", "", "(e|aj)");
addRule(lines, "é", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "è", "", "", "e");
addRule(lines, "à", "", "", "a");
addRule(lines, "â", "", "", "a");
addRule(lines, "où", "", "", "u");
addRule(lines, "ou", "", "", "u");
addRule(lines, "oi", "", "", "(oj|va)");
addRule(lines, "ei", "", "", "(aj|ej|e)");
addRule(lines, "ey", "", "", "(aj|ej|e)");
addRule(lines, "eu", "", "", "(ej|Y)");
addRule(lines, "y", "[ou]", "", "j");
addRule(lines, "e", "", "$", "(e|)");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aoeu]", "j");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "Z");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "(u|Q)");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesGerman(final Map> lines) {
addRule(lines, "ewitsch", "", "$", "evitS");
addRule(lines, "owitsch", "", "$", "ovitS");
addRule(lines, "evitsch", "", "$", "evitS");
addRule(lines, "ovitsch", "", "$", "ovitS");
addRule(lines, "witsch", "", "$", "vitS");
addRule(lines, "vitsch", "", "$", "vitS");
addRule(lines, "ssch", "", "", "S");
addRule(lines, "chsch", "", "", "xS");
addRule(lines, "sch", "", "", "S");
addRule(lines, "ziu", "", "", "tsu");
addRule(lines, "zia", "", "", "tsa");
addRule(lines, "zio", "", "", "tso");
addRule(lines, "chs", "", "", "ks");
addRule(lines, "ch", "", "", "x");
addRule(lines, "ck", "", "", "k");
addRule(lines, "c", "", "[eiy]", "ts");
addRule(lines, "sp", "^", "", "Sp");
addRule(lines, "st", "^", "", "St");
addRule(lines, "ssp", "", "", "(Sp|sp)");
addRule(lines, "sp", "", "", "(Sp|sp)");
addRule(lines, "sst", "", "", "(St|st)");
addRule(lines, "st", "", "", "(St|st)");
addRule(lines, "pf", "", "", "(pf|p|f)");
addRule(lines, "ph", "", "", "(ph|f)");
addRule(lines, "qu", "", "", "kv");
addRule(lines, "ewitz", "", "$", "(evits|evitS)");
addRule(lines, "ewiz", "", "$", "(evits|evitS)");
addRule(lines, "evitz", "", "$", "(evits|evitS)");
addRule(lines, "eviz", "", "$", "(evits|evitS)");
addRule(lines, "owitz", "", "$", "(ovits|ovitS)");
addRule(lines, "owiz", "", "$", "(ovits|ovitS)");
addRule(lines, "ovitz", "", "$", "(ovits|ovitS)");
addRule(lines, "oviz", "", "$", "(ovits|ovitS)");
addRule(lines, "witz", "", "$", "(vits|vitS)");
addRule(lines, "wiz", "", "$", "(vits|vitS)");
addRule(lines, "vitz", "", "$", "(vits|vitS)");
addRule(lines, "viz", "", "$", "(vits|vitS)");
addRule(lines, "tz", "", "", "ts");
addRule(lines, "thal", "", "$", "tal");
addRule(lines, "th", "^", "", "t");
addRule(lines, "th", "", "[äöüaeiou]", "(t|th)");
addRule(lines, "th", "", "", "t");
addRule(lines, "rh", "^", "", "r");
addRule(lines, "h", "[aeiouyäöü]", "", "");
addRule(lines, "h", "^", "", "H");
addRule(lines, "ss", "", "", "s");
addRule(lines, "s", "", "[äöüaeiouy]", "(z|s)");
addRule(lines, "s", "[aeiouyäöüj]", "[aeiouyäöü]", "z");
addRule(lines, "ß", "", "", "s");
addRule(lines, "ij", "", "$", "i");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "ue", "", "", "Q");
addRule(lines, "ae", "", "", "Y");
addRule(lines, "oe", "", "", "Y");
addRule(lines, "ü", "", "", "Q");
addRule(lines, "ä", "", "", "(Y|e)");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ei", "", "", "(aj|ej)");
addRule(lines, "ey", "", "", "(aj|ej)");
addRule(lines, "eu", "", "", "(Yj|ej|aj|oj)");
addRule(lines, "i", "[aou]", "", "j");
addRule(lines, "y", "[aou]", "", "j");
addRule(lines, "ie", "", "", "I");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aoeu]", "j");
addRule(lines, "ñ", "", "", "n");
addRule(lines, "ã", "", "", "a");
addRule(lines, "ő", "", "", "o");
addRule(lines, "ű", "", "", "u");
addRule(lines, "ç", "", "", "s");
addRule(lines, "a", "", "", "A");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "O");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "U");
addRule(lines, "v", "", "", "(f|v)");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "ts");
}
private static void addGenRulesGreek(final Map> lines) {
addRule(lines, "αυ", "", "$", "af");
addRule(lines, "αυ", "", "(κ|π|σ|τ|φ|θ|χ|ψ)", "af");
addRule(lines, "αυ", "", "", "av");
addRule(lines, "ευ", "", "$", "ef");
addRule(lines, "ευ", "", "(κ|π|σ|τ|φ|θ|χ|ψ)", "ef");
addRule(lines, "ευ", "", "", "ev");
addRule(lines, "ηυ", "", "$", "if");
addRule(lines, "ηυ", "", "(κ|π|σ|τ|φ|θ|χ|ψ)", "if");
addRule(lines, "ηυ", "", "", "iv");
addRule(lines, "ου", "", "", "u");
addRule(lines, "αι", "", "", "aj");
addRule(lines, "ει", "", "", "ej");
addRule(lines, "οι", "", "", "oj");
addRule(lines, "ωι", "", "", "oj");
addRule(lines, "ηι", "", "", "ej");
addRule(lines, "υι", "", "", "i");
addRule(lines, "γγ", "(ε|ι|η|α|ο|ω|υ)", "(ε|ι|η)", "(nj|j)");
addRule(lines, "γγ", "", "(ε|ι|η)", "j");
addRule(lines, "γγ", "(ε|ι|η|α|ο|ω|υ)", "", "(ng|g)");
addRule(lines, "γγ", "", "", "g");
addRule(lines, "γκ", "^", "", "g");
addRule(lines, "γκ", "(ε|ι|η|α|ο|ω|υ)", "(ε|ι|η)", "(nj|j)");
addRule(lines, "γκ", "", "(ε|ι|η)", "j");
addRule(lines, "γκ", "(ε|ι|η|α|ο|ω|υ)", "", "(ng|g)");
addRule(lines, "γκ", "", "", "g");
addRule(lines, "γι", "", "(α|ο|ω|υ)", "j");
addRule(lines, "γι", "", "", "(gi|i)");
addRule(lines, "γε", "", "(α|ο|ω|υ)", "j");
addRule(lines, "γε", "", "", "(ge|je)");
addRule(lines, "κζ", "", "", "gz");
addRule(lines, "τζ", "", "", "dz");
addRule(lines, "σ", "", "(β|γ|δ|μ|ν|ρ)", "z");
addRule(lines, "μβ", "", "", "(mb|b)");
addRule(lines, "μπ", "^", "", "b");
addRule(lines, "μπ", "(ε|ι|η|α|ο|ω|υ)", "", "mb");
addRule(lines, "μπ", "", "", "b");
addRule(lines, "ντ", "^", "", "d");
addRule(lines, "ντ", "(ε|ι|η|α|ο|ω|υ)", "", "(nd|nt)");
addRule(lines, "ντ", "", "", "(nt|d)");
addRule(lines, "ά", "", "", "a");
addRule(lines, "έ", "", "", "e");
addRule(lines, "ή", "", "", "(i|e)");
addRule(lines, "ί", "", "", "i");
addRule(lines, "ό", "", "", "o");
addRule(lines, "ύ", "", "", "(Q|i|u)");
addRule(lines, "ώ", "", "", "o");
addRule(lines, "ΰ", "", "", "(Q|i|u)");
addRule(lines, "ϋ", "", "", "(Q|i|u)");
addRule(lines, "ϊ", "", "", "j");
addRule(lines, "α", "", "", "a");
addRule(lines, "β", "", "", "(v|b)");
addRule(lines, "γ", "", "", "g");
addRule(lines, "δ", "", "", "d");
addRule(lines, "ε", "", "", "e");
addRule(lines, "ζ", "", "", "z");
addRule(lines, "η", "", "", "(i|e)");
addRule(lines, "ι", "", "", "i");
addRule(lines, "κ", "", "", "k");
addRule(lines, "λ", "", "", "l");
addRule(lines, "μ", "", "", "m");
addRule(lines, "ν", "", "", "n");
addRule(lines, "ξ", "", "", "ks");
addRule(lines, "ο", "", "", "o");
addRule(lines, "π", "", "", "p");
addRule(lines, "ρ", "", "", "r");
addRule(lines, "σ", "", "", "s");
addRule(lines, "ς", "", "", "s");
addRule(lines, "τ", "", "", "t");
addRule(lines, "υ", "", "", "(Q|i|u)");
addRule(lines, "φ", "", "", "f");
addRule(lines, "θ", "", "", "t");
addRule(lines, "χ", "", "", "x");
addRule(lines, "ψ", "", "", "ps");
addRule(lines, "ω", "", "", "o");
}
private static void addGenRulesGreeklatin(final Map> lines) {
addRule(lines, "au", "", "$", "af");
addRule(lines, "au", "", "[kpstfh]", "af");
addRule(lines, "au", "", "", "av");
addRule(lines, "eu", "", "$", "ef");
addRule(lines, "eu", "", "[kpstfh]", "ef");
addRule(lines, "eu", "", "", "ev");
addRule(lines, "ou", "", "", "u");
addRule(lines, "gge", "[aeiouy]", "", "(nje|je)");
addRule(lines, "ggi", "[aeiouy]", "[aou]", "(nj|j)");
addRule(lines, "ggi", "[aeiouy]", "", "(ni|i)");
addRule(lines, "gge", "", "", "je");
addRule(lines, "ggi", "", "", "i");
addRule(lines, "gg", "[aeiouy]", "", "(ng|g)");
addRule(lines, "gg", "", "", "g");
addRule(lines, "gk", "^", "", "g");
addRule(lines, "gke", "[aeiouy]", "", "(nje|je)");
addRule(lines, "gki", "[aeiouy]", "", "(ni|i)");
addRule(lines, "gke", "", "", "je");
addRule(lines, "gki", "", "", "i");
addRule(lines, "gk", "[aeiouy]", "", "(ng|g)");
addRule(lines, "gk", "", "", "g");
addRule(lines, "nghi", "", "[aouy]", "Nj");
addRule(lines, "nghi", "", "", "(Ngi|Ni)");
addRule(lines, "nghe", "", "[aouy]", "Nj");
addRule(lines, "nghe", "", "", "(Nje|Nge)");
addRule(lines, "ghi", "", "[aouy]", "j");
addRule(lines, "ghi", "", "", "(gi|i)");
addRule(lines, "ghe", "", "[aouy]", "j");
addRule(lines, "ghe", "", "", "(je|ge)");
addRule(lines, "ngh", "", "", "Ng");
addRule(lines, "gh", "", "", "g");
addRule(lines, "ngi", "", "[aouy]", "Nj");
addRule(lines, "ngi", "", "", "(Ngi|Ni)");
addRule(lines, "nge", "", "[aouy]", "Nj");
addRule(lines, "nge", "", "", "(Nje|Nge)");
addRule(lines, "gi", "", "[aouy]", "j");
addRule(lines, "gi", "", "", "(gi|i)");
addRule(lines, "ge", "", "[aouy]", "j");
addRule(lines, "ge", "", "", "(je|ge)");
addRule(lines, "ng", "", "", "Ng");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "i", "[aeou]", "", "j");
addRule(lines, "y", "", "[aeou]", "j");
addRule(lines, "y", "[aeou]", "", "j");
addRule(lines, "yi", "", "[aeou]", "j");
addRule(lines, "yi", "", "", "i");
addRule(lines, "ch", "", "", "x");
addRule(lines, "kh", "", "", "x");
addRule(lines, "dh", "", "", "d");
addRule(lines, "dj", "", "", "dZ");
addRule(lines, "ph", "", "", "f");
addRule(lines, "th", "", "", "t");
addRule(lines, "kz", "", "", "gz");
addRule(lines, "tz", "", "", "dz");
addRule(lines, "s", "", "[bgdmnr]", "z");
addRule(lines, "mb", "", "", "(mb|b)");
addRule(lines, "mp", "^", "", "b");
addRule(lines, "mp", "[aeiouy]", "", "mp");
addRule(lines, "mp", "", "", "b");
addRule(lines, "nt", "^", "", "d");
addRule(lines, "nt", "[aeiouy]", "", "(nd|nt)");
addRule(lines, "nt", "", "", "(nt|d)");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "óu", "", "", "u");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ý", "", "", "(i|Q|u)");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "(b|v)");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "x");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "(j|Z)");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "ο", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "(i|Q|u)");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesHebrew(final Map> lines) {
addRule(lines, "אי", "", "", "i");
addRule(lines, "עי", "", "", "i");
addRule(lines, "עו", "", "", "VV");
addRule(lines, "או", "", "", "VV");
addRule(lines, "ג׳", "", "", "Z");
addRule(lines, "ד׳", "", "", "dZ");
addRule(lines, "א", "", "", "L");
addRule(lines, "ב", "", "", "b");
addRule(lines, "ג", "", "", "g");
addRule(lines, "ד", "", "", "d");
addRule(lines, "ה", "^", "", "1");
addRule(lines, "ה", "", "$", "1");
addRule(lines, "ה", "", "", "");
addRule(lines, "וו", "", "", "V");
addRule(lines, "וי", "", "", "WW");
addRule(lines, "ו", "", "", "W");
addRule(lines, "ז", "", "", "z");
addRule(lines, "ח", "", "", "X");
addRule(lines, "ט", "", "", "T");
addRule(lines, "יי", "", "", "i");
addRule(lines, "י", "", "", "i");
addRule(lines, "ך", "", "", "X");
addRule(lines, "כ", "^", "", "K");
addRule(lines, "כ", "", "", "k");
addRule(lines, "ל", "", "", "l");
addRule(lines, "ם", "", "", "m");
addRule(lines, "מ", "", "", "m");
addRule(lines, "ן", "", "", "n");
addRule(lines, "נ", "", "", "n");
addRule(lines, "ס", "", "", "s");
addRule(lines, "ע", "", "", "L");
addRule(lines, "ף", "", "", "f");
addRule(lines, "פ", "", "", "f");
addRule(lines, "ץ", "", "", "C");
addRule(lines, "צ", "", "", "C");
addRule(lines, "ק", "", "", "K");
addRule(lines, "ר", "", "", "r");
addRule(lines, "ש", "", "", "s");
addRule(lines, "ת", "", "", "TB");
}
private static void addGenRulesHungarian(final Map> lines) {
addRule(lines, "sz", "", "", "s");
addRule(lines, "zs", "", "", "Z");
addRule(lines, "cs", "", "", "tS");
addRule(lines, "ay", "", "", "(oj|aj)");
addRule(lines, "ai", "", "", "(oj|aj)");
addRule(lines, "aj", "", "", "(oj|aj)");
addRule(lines, "ei", "", "", "(aj|ej)");
addRule(lines, "ey", "", "", "(aj|ej)");
addRule(lines, "y", "[áo]", "", "j");
addRule(lines, "i", "[áo]", "", "j");
addRule(lines, "ee", "", "", "(ej|e)");
addRule(lines, "ely", "", "", "(ej|eli)");
addRule(lines, "ly", "", "", "(j|li)");
addRule(lines, "gy", "", "[aeouáéóúüöőű]", "dj");
addRule(lines, "gy", "", "", "(d|gi)");
addRule(lines, "ny", "", "[aeouáéóúüöőű]", "nj");
addRule(lines, "ny", "", "", "(n|ni)");
addRule(lines, "ty", "", "[aeouáéóúüöőű]", "tj");
addRule(lines, "ty", "", "", "(t|ti)");
addRule(lines, "qu", "", "", "(ku|kv)");
addRule(lines, "h", "", "$", "");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ő", "", "", "Y");
addRule(lines, "ü", "", "", "Q");
addRule(lines, "ű", "", "", "Q");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "ts");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "(S|s)");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesItalian(final Map> lines) {
addRule(lines, "kh", "", "", "x");
addRule(lines, "gli", "", "", "(l|gli)");
addRule(lines, "gn", "", "[aeou]", "(n|nj|gn)");
addRule(lines, "gni", "", "", "(ni|gni)");
addRule(lines, "gi", "", "[aeou]", "dZ");
addRule(lines, "gg", "", "[ei]", "dZ");
addRule(lines, "g", "", "[ei]", "dZ");
addRule(lines, "h", "[bdgt]", "", "g");
addRule(lines, "h", "", "$", "");
addRule(lines, "ci", "", "[aeou]", "tS");
addRule(lines, "ch", "", "[ei]", "k");
addRule(lines, "sc", "", "[ei]", "S");
addRule(lines, "cc", "", "[ei]", "tS");
addRule(lines, "c", "", "[ei]", "tS");
addRule(lines, "s", "[aeiou]", "[aeiou]", "z");
addRule(lines, "i", "[aeou]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "y", "[aeou]", "", "j");
addRule(lines, "y", "", "[aeou]", "j");
addRule(lines, "qu", "", "", "k");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "�", "", "", "e");
addRule(lines, "�", "", "", "e");
addRule(lines, "�", "", "", "o");
addRule(lines, "�", "", "", "o");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "(Z|dZ|j)");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "(ts|dz)");
}
private static void addGenRulesPolish(final Map> lines) {
addRule(lines, "ska", "", "$", "ski");
addRule(lines, "cka", "", "$", "tski");
addRule(lines, "lowa", "", "$", "(lova|lof|l|el)");
addRule(lines, "kowa", "", "$", "(kova|kof|k|ek)");
addRule(lines, "owa", "", "$", "(ova|of|)");
addRule(lines, "lowna", "", "$", "(lovna|levna|l|el)");
addRule(lines, "kowna", "", "$", "(kovna|k|ek)");
addRule(lines, "owna", "", "$", "(ovna|)");
addRule(lines, "lówna", "", "$", "(l|el)");
addRule(lines, "kówna", "", "$", "(k|ek)");
addRule(lines, "ówna", "", "$", "");
addRule(lines, "a", "", "$", "(a|i)");
addRule(lines, "czy", "", "", "tSi");
addRule(lines, "cze", "", "[bcdgkpstwzż]", "(tSe|tSF)");
addRule(lines, "ciewicz", "", "", "(tsevitS|tSevitS)");
addRule(lines, "siewicz", "", "", "(sevitS|SevitS)");
addRule(lines, "ziewicz", "", "", "(zevitS|ZevitS)");
addRule(lines, "riewicz", "", "", "rjevitS");
addRule(lines, "diewicz", "", "", "djevitS");
addRule(lines, "tiewicz", "", "", "tjevitS");
addRule(lines, "iewicz", "", "", "evitS");
addRule(lines, "ewicz", "", "", "evitS");
addRule(lines, "owicz", "", "", "ovitS");
addRule(lines, "icz", "", "", "itS");
addRule(lines, "cz", "", "", "tS");
addRule(lines, "ch", "", "", "x");
addRule(lines, "cia", "", "[bcdgkpstwzż]", "(tSB|tsB)");
addRule(lines, "cia", "", "", "(tSa|tsa)");
addRule(lines, "cią", "", "[bp]", "(tSom|tsom)");
addRule(lines, "cią", "", "", "(tSon|tson)");
addRule(lines, "cię", "", "[bp]", "(tSem|tsem)");
addRule(lines, "cię", "", "", "(tSen|tsen)");
addRule(lines, "cie", "", "[bcdgkpstwzż]", "(tSF|tsF)");
addRule(lines, "cie", "", "", "(tSe|tse)");
addRule(lines, "cio", "", "", "(tSo|tso)");
addRule(lines, "ciu", "", "", "(tSu|tsu)");
addRule(lines, "ci", "", "", "(tSi|tsI)");
addRule(lines, "ć", "", "", "(tS|ts)");
addRule(lines, "ssz", "", "", "S");
addRule(lines, "sz", "", "", "S");
addRule(lines, "sia", "", "[bcdgkpstwzż]", "(SB|sB|sja)");
addRule(lines, "sia", "", "", "(Sa|sja)");
addRule(lines, "sią", "", "[bp]", "(Som|som)");
addRule(lines, "sią", "", "", "(Son|son)");
addRule(lines, "się", "", "[bp]", "(Sem|sem)");
addRule(lines, "się", "", "", "(Sen|sen)");
addRule(lines, "sie", "", "[bcdgkpstwzż]", "(SF|sF|se)");
addRule(lines, "sie", "", "", "(Se|se)");
addRule(lines, "sio", "", "", "(So|so)");
addRule(lines, "siu", "", "", "(Su|sju)");
addRule(lines, "si", "", "", "(Si|sI)");
addRule(lines, "ś", "", "", "(S|s)");
addRule(lines, "zia", "", "[bcdgkpstwzż]", "(ZB|zB|zja)");
addRule(lines, "zia", "", "", "(Za|zja)");
addRule(lines, "zią", "", "[bp]", "(Zom|zom)");
addRule(lines, "zią", "", "", "(Zon|zon)");
addRule(lines, "zię", "", "[bp]", "(Zem|zem)");
addRule(lines, "zię", "", "", "(Zen|zen)");
addRule(lines, "zie", "", "[bcdgkpstwzż]", "(ZF|zF)");
addRule(lines, "zie", "", "", "(Ze|ze)");
addRule(lines, "zio", "", "", "(Zo|zo)");
addRule(lines, "ziu", "", "", "(Zu|zju)");
addRule(lines, "zi", "", "", "(Zi|zI)");
addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF)");
addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF|ze|zF)");
addRule(lines, "że", "", "", "Ze");
addRule(lines, "źe", "", "", "(Ze|ze)");
addRule(lines, "ży", "", "", "Zi");
addRule(lines, "źi", "", "", "(Zi|zi)");
addRule(lines, "ż", "", "", "Z");
addRule(lines, "ź", "", "", "(Z|z)");
addRule(lines, "rze", "t", "", "(Se|re)");
addRule(lines, "rze", "", "", "(Ze|re|rZe)");
addRule(lines, "rzy", "t", "", "(Si|ri)");
addRule(lines, "rzy", "", "", "(Zi|ri|rZi)");
addRule(lines, "rz", "t", "", "(S|r)");
addRule(lines, "rz", "", "", "(Z|r|rZ)");
addRule(lines, "lio", "", "", "(lo|le)");
addRule(lines, "ł", "", "", "l");
addRule(lines, "ń", "", "", "n");
addRule(lines, "qu", "", "", "k");
addRule(lines, "s", "", "s", "");
addRule(lines, "ó", "", "", "(u|o)");
addRule(lines, "ą", "", "[bp]", "om");
addRule(lines, "ę", "", "[bp]", "em");
addRule(lines, "ą", "", "", "on");
addRule(lines, "ę", "", "", "en");
addRule(lines, "ije", "", "", "je");
addRule(lines, "yje", "", "", "je");
addRule(lines, "iie", "", "", "je");
addRule(lines, "yie", "", "", "je");
addRule(lines, "iye", "", "", "je");
addRule(lines, "yye", "", "", "je");
addRule(lines, "ij", "", "[aou]", "j");
addRule(lines, "yj", "", "[aou]", "j");
addRule(lines, "ii", "", "[aou]", "j");
addRule(lines, "yi", "", "[aou]", "j");
addRule(lines, "iy", "", "[aou]", "j");
addRule(lines, "yy", "", "[aou]", "j");
addRule(lines, "rie", "", "", "rje");
addRule(lines, "die", "", "", "dje");
addRule(lines, "tie", "", "", "tje");
addRule(lines, "ie", "", "[bcdgkpstwzż]", "F");
addRule(lines, "ie", "", "", "e");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "au", "", "", "au");
addRule(lines, "ei", "", "", "aj");
addRule(lines, "ey", "", "", "aj");
addRule(lines, "ej", "", "", "aj");
addRule(lines, "ai", "", "", "aj");
addRule(lines, "ay", "", "", "aj");
addRule(lines, "aj", "", "", "aj");
addRule(lines, "i", "[aeou]", "", "j");
addRule(lines, "y", "[aeou]", "", "j");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aeou]", "j");
addRule(lines, "a", "", "[bcdgkpstwzż]", "B");
addRule(lines, "e", "", "[bcdgkpstwzż]", "(E|F)");
addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "P");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "ts");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "(h|x)");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "I");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesPortuguese(final Map> lines) {
addRule(lines, "kh", "", "", "x");
addRule(lines, "ch", "", "", "S");
addRule(lines, "ss", "", "", "s");
addRule(lines, "sc", "", "[ei]", "s");
addRule(lines, "sç", "", "[aou]", "s");
addRule(lines, "ç", "", "", "s");
addRule(lines, "c", "", "[ei]", "s");
addRule(lines, "s", "^", "", "s");
addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuiíoóeéêy]", "z");
addRule(lines, "s", "", "[dglmnrv]", "(Z|S)");
addRule(lines, "z", "", "$", "(Z|s|S)");
addRule(lines, "z", "", "[bdgv]", "(Z|z)");
addRule(lines, "z", "", "[ptckf]", "(s|S|z)");
addRule(lines, "gu", "", "[eiu]", "g");
addRule(lines, "gu", "", "[ao]", "gv");
addRule(lines, "g", "", "[ei]", "Z");
addRule(lines, "qu", "", "[eiu]", "k");
addRule(lines, "qu", "", "[ao]", "kv");
addRule(lines, "uo", "", "", "(vo|o|u)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "lh", "", "", "l");
addRule(lines, "nh", "", "", "nj");
addRule(lines, "h", "[bdgt]", "", "");
addRule(lines, "h", "", "$", "");
addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez|eS|eks)");
addRule(lines, "ex", "", "[cs]", "e");
addRule(lines, "y", "[aáuiíoóeéê]", "", "j");
addRule(lines, "y", "", "[aeiíou]", "j");
addRule(lines, "m", "", "[bcdfglnprstv]", "(m|n)");
addRule(lines, "m", "", "$", "(m|n)");
addRule(lines, "ão", "", "", "(au|an|on)");
addRule(lines, "ãe", "", "", "(aj|an)");
addRule(lines, "ãi", "", "", "(aj|an)");
addRule(lines, "õe", "", "", "(oj|on)");
addRule(lines, "i", "[aáuoóeéê]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "â", "", "", "a");
addRule(lines, "à", "", "", "a");
addRule(lines, "á", "", "", "a");
addRule(lines, "ã", "", "", "(a|an|on)");
addRule(lines, "é", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ô", "", "", "o");
addRule(lines, "ó", "", "", "o");
addRule(lines, "õ", "", "", "(o|on)");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ü", "", "", "u");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "(e|i)");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "Z");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "(o|u)");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "S");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "(S|ks)");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesRomanian(final Map> lines) {
addRule(lines, "ce", "", "", "tSe");
addRule(lines, "ci", "", "", "(tSi|tS)");
addRule(lines, "ch", "", "[ei]", "k");
addRule(lines, "ch", "", "", "x");
addRule(lines, "gi", "", "", "(dZi|dZ)");
addRule(lines, "g", "", "[ei]", "dZ");
addRule(lines, "gh", "", "", "g");
addRule(lines, "i", "[aeou]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "ţ", "", "", "ts");
addRule(lines, "ş", "", "", "S");
addRule(lines, "qu", "", "", "k");
addRule(lines, "î", "", "", "i");
addRule(lines, "ea", "", "", "ja");
addRule(lines, "ă", "", "", "(e|a)");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "(x|h)");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "Z");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesRussian(final Map> lines) {
addRule(lines, "yna", "", "$", "(in|ina)");
addRule(lines, "ina", "", "$", "(in|ina)");
addRule(lines, "liova", "", "$", "(lof|lef)");
addRule(lines, "lova", "", "$", "(lof|lef|lova)");
addRule(lines, "ova", "", "$", "(of|ova)");
addRule(lines, "eva", "", "$", "(ef|ova)");
addRule(lines, "aia", "", "$", "(aja|i)");
addRule(lines, "aja", "", "$", "(aja|i)");
addRule(lines, "aya", "", "$", "(aja|i)");
addRule(lines, "tsya", "", "", "tsa");
addRule(lines, "tsyu", "", "", "tsu");
addRule(lines, "tsia", "", "", "tsa");
addRule(lines, "tsie", "", "", "tse");
addRule(lines, "tsio", "", "", "tso");
addRule(lines, "tsye", "", "", "tse");
addRule(lines, "tsyo", "", "", "tso");
addRule(lines, "tsiu", "", "", "tsu");
addRule(lines, "sie", "", "", "se");
addRule(lines, "sio", "", "", "so");
addRule(lines, "zie", "", "", "ze");
addRule(lines, "zio", "", "", "zo");
addRule(lines, "sye", "", "", "se");
addRule(lines, "syo", "", "", "so");
addRule(lines, "zye", "", "", "ze");
addRule(lines, "zyo", "", "", "zo");
addRule(lines, "ger", "", "$", "ger");
addRule(lines, "gen", "", "$", "gen");
addRule(lines, "gin", "", "$", "gin");
addRule(lines, "gg", "", "", "g");
addRule(lines, "g", "[jaeoiuy]", "[aeoiu]", "g");
addRule(lines, "g", "", "[aeoiu]", "(g|h)");
addRule(lines, "kh", "", "", "x");
addRule(lines, "ch", "", "", "(tS|x)");
addRule(lines, "sch", "", "", "(StS|S)");
addRule(lines, "ssh", "", "", "S");
addRule(lines, "sh", "", "", "S");
addRule(lines, "zh", "", "", "Z");
addRule(lines, "tz", "", "$", "ts");
addRule(lines, "tz", "", "", "(ts|tz)");
addRule(lines, "c", "", "[iey]", "s");
addRule(lines, "qu", "", "", "(kv|k)");
addRule(lines, "s", "", "s", "");
addRule(lines, "lya", "", "", "la");
addRule(lines, "lyu", "", "", "lu");
addRule(lines, "lia", "", "", "la");
addRule(lines, "liu", "", "", "lu");
addRule(lines, "lja", "", "", "la");
addRule(lines, "lju", "", "", "lu");
addRule(lines, "le", "", "", "(lo|lE)");
addRule(lines, "lyo", "", "", "(lo|le)");
addRule(lines, "lio", "", "", "(lo|le)");
addRule(lines, "ije", "", "", "je");
addRule(lines, "ie", "", "", "je");
addRule(lines, "iye", "", "", "je");
addRule(lines, "iie", "", "", "je");
addRule(lines, "yje", "", "", "je");
addRule(lines, "ye", "", "", "je");
addRule(lines, "yye", "", "", "je");
addRule(lines, "yie", "", "", "je");
addRule(lines, "ij", "", "[aou]", "j");
addRule(lines, "iy", "", "[aou]", "j");
addRule(lines, "ii", "", "[aou]", "j");
addRule(lines, "yj", "", "[aou]", "j");
addRule(lines, "yy", "", "[aou]", "j");
addRule(lines, "yi", "", "[aou]", "j");
addRule(lines, "io", "", "", "(jo|e)");
addRule(lines, "i", "", "[au]", "j");
addRule(lines, "i", "[aeou]", "", "j");
addRule(lines, "yo", "", "", "(jo|e)");
addRule(lines, "y", "", "[au]", "j");
addRule(lines, "y", "[aeiou]", "", "j");
addRule(lines, "ii", "", "$", "i");
addRule(lines, "iy", "", "$", "i");
addRule(lines, "yy", "", "$", "i");
addRule(lines, "yi", "", "$", "i");
addRule(lines, "yj", "", "$", "i");
addRule(lines, "ij", "", "$", "i");
addRule(lines, "e", "^", "", "(je|E)");
addRule(lines, "ee", "", "", "(aje|i)");
addRule(lines, "e", "[aou]", "", "je");
addRule(lines, "oo", "", "", "(oo|u)");
addRule(lines, "'", "", "", "");
addRule(lines, "\"", "", "", "");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "E");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "I");
addRule(lines, "j", "", "", "j");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "I");
addRule(lines, "z", "", "", "z");
}
private static void addGenRulesSpanish(final Map> lines) {
addRule(lines, "ñ", "", "", "(n|nj)");
addRule(lines, "ny", "", "", "nj");
addRule(lines, "ç", "", "", "s");
addRule(lines, "ig", "[aeiou]", "", "(tS|ig)");
addRule(lines, "ix", "[aeiou]", "", "S");
addRule(lines, "tx", "", "", "tS");
addRule(lines, "tj", "", "$", "tS");
addRule(lines, "tj", "", "", "dZ");
addRule(lines, "tg", "", "", "(tg|dZ)");
addRule(lines, "ch", "", "", "(tS|dZ)");
addRule(lines, "bh", "", "", "b");
addRule(lines, "h", "[dgt]", "", "");
addRule(lines, "h", "", "$", "");
addRule(lines, "m", "", "[bpvf]", "(m|n)");
addRule(lines, "c", "", "[ei]", "s");
addRule(lines, "gu", "", "[ei]", "(g|gv)");
addRule(lines, "g", "", "[ei]", "(x|g|dZ)");
addRule(lines, "qu", "", "", "k");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "ü", "", "", "v");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "ú", "", "", "u");
addRule(lines, "à", "", "", "a");
addRule(lines, "è", "", "", "e");
addRule(lines, "ò", "", "", "o");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "B");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "(x|Z)");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "V");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "(ks|gz|S)");
addRule(lines, "y", "", "", "(i|j)");
addRule(lines, "z", "", "", "(z|s)");
}
private static void addGenRulesTurkish(final Map> lines) {
addRule(lines, "ç", "", "", "tS");
addRule(lines, "ğ", "", "", "");
addRule(lines, "ş", "", "", "S");
addRule(lines, "ü", "", "", "Q");
addRule(lines, "ö", "", "", "Y");
addRule(lines, "ı", "", "", "(e|i|)");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "dZ");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "Z");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "j");
addRule(lines, "z", "", "", "z");
}
private static void addSepApproxAny(final Map> lines) {
addRule(lines, "E", "", "", "");
}
private static void addSepExactApproxCommon(final Map> lines) {
addRule(lines, "h", "", "$", "");
addRule(lines, "b", "", "[fktSs]", "p");
addRule(lines, "b", "", "p", "");
addRule(lines, "b", "", "$", "p");
addRule(lines, "p", "", "[vgdZz]", "b");
addRule(lines, "p", "", "b", "");
addRule(lines, "v", "", "[pktSs]", "f");
addRule(lines, "v", "", "f", "");
addRule(lines, "v", "", "$", "f");
addRule(lines, "f", "", "[vbgdZz]", "v");
addRule(lines, "f", "", "v", "");
addRule(lines, "g", "", "[pftSs]", "k");
addRule(lines, "g", "", "k", "");
addRule(lines, "g", "", "$", "k");
addRule(lines, "k", "", "[vbdZz]", "g");
addRule(lines, "k", "", "g", "");
addRule(lines, "d", "", "[pfkSs]", "t");
addRule(lines, "d", "", "t", "");
addRule(lines, "d", "", "$", "t");
addRule(lines, "t", "", "[vbgZz]", "d");
addRule(lines, "t", "", "d", "");
addRule(lines, "s", "", "dZ", "");
addRule(lines, "s", "", "tS", "");
addRule(lines, "z", "", "[pfkSt]", "s");
addRule(lines, "z", "", "[sSzZ]", "");
addRule(lines, "s", "", "[sSzZ]", "");
addRule(lines, "Z", "", "[sSzZ]", "");
addRule(lines, "S", "", "[sSzZ]", "");
addRule(lines, "nm", "", "", "m");
addRule(lines, "ji", "^", "", "i");
addRule(lines, "a", "", "a", "");
addRule(lines, "b", "", "b", "");
addRule(lines, "d", "", "d", "");
addRule(lines, "e", "", "e", "");
addRule(lines, "f", "", "f", "");
addRule(lines, "g", "", "g", "");
addRule(lines, "i", "", "i", "");
addRule(lines, "k", "", "k", "");
addRule(lines, "l", "", "l", "");
addRule(lines, "m", "", "m", "");
addRule(lines, "n", "", "n", "");
addRule(lines, "o", "", "o", "");
addRule(lines, "p", "", "p", "");
addRule(lines, "r", "", "r", "");
addRule(lines, "t", "", "t", "");
addRule(lines, "u", "", "u", "");
addRule(lines, "v", "", "v", "");
addRule(lines, "z", "", "z", "");
}
private static void addSepApproxCommon(final Map> lines) {
addRule(lines, "bens", "^", "", "(binz|s)");
addRule(lines, "benS", "^", "", "(binz|s)");
addRule(lines, "ben", "^", "", "(bin|)");
addRule(lines, "abens", "^", "", "(abinz|binz|s)");
addRule(lines, "abenS", "^", "", "(abinz|binz|s)");
addRule(lines, "aben", "^", "", "(abin|bin|)");
addRule(lines, "els", "^", "", "(ilz|alz|s)");
addRule(lines, "elS", "^", "", "(ilz|alz|s)");
addRule(lines, "el", "^", "", "(il|al|)");
addRule(lines, "als", "^", "", "(alz|s)");
addRule(lines, "alS", "^", "", "(alz|s)");
addRule(lines, "al", "^", "", "(al|)");
addRule(lines, "del", "^", "", "(dil|)");
addRule(lines, "dela", "^", "", "(dila|)");
addRule(lines, "da", "^", "", "(da|)");
addRule(lines, "de", "^", "", "(di|)");
addRule(lines, "oa", "", "", "(va|a|D)");
addRule(lines, "oe", "", "", "(vi|D)");
addRule(lines, "ae", "", "", "D");
addRule(lines, "n", "", "[bp]", "m");
addRule(lines, "h", "", "", "(|h|f)");
addRule(lines, "x", "", "", "h");
addRule(lines, "aja", "^", "", "(Da|ia)");
addRule(lines, "aje", "^", "", "(Di|Da|i|ia)");
addRule(lines, "aji", "^", "", "(Di|i)");
addRule(lines, "ajo", "^", "", "(Du|Da|iu|ia)");
addRule(lines, "aju", "^", "", "(Du|iu)");
addRule(lines, "aj", "", "", "D");
addRule(lines, "ej", "", "", "D");
addRule(lines, "oj", "", "", "D");
addRule(lines, "uj", "", "", "D");
addRule(lines, "au", "", "", "D");
addRule(lines, "eu", "", "", "D");
addRule(lines, "ou", "", "", "D");
addRule(lines, "a", "^", "", "(a|)");
addRule(lines, "ja", "^", "", "ia");
addRule(lines, "je", "^", "", "i");
addRule(lines, "jo", "^", "", "(iu|ia)");
addRule(lines, "ju", "^", "", "iu");
addRule(lines, "ja", "", "", "a");
addRule(lines, "je", "", "", "i");
addRule(lines, "ji", "", "", "i");
addRule(lines, "jo", "", "", "u");
addRule(lines, "ju", "", "", "u");
addRule(lines, "j", "", "", "i");
addRule(lines, "s", "", "[rmnl]", "z");
addRule(lines, "S", "", "[rmnl]", "z");
addRule(lines, "s", "[rmnl]", "", "z");
addRule(lines, "S", "[rmnl]", "", "z");
addRule(lines, "dS", "", "$", "S");
addRule(lines, "dZ", "", "$", "S");
addRule(lines, "Z", "", "$", "S");
addRule(lines, "S", "", "$", "(S|s)");
addRule(lines, "z", "", "$", "(S|s)");
addRule(lines, "S", "", "", "s");
addRule(lines, "dZ", "", "", "z");
addRule(lines, "Z", "", "", "z");
addRule(lines, "i", "", "$", "(i|)");
addRule(lines, "e", "", "", "i");
addRule(lines, "o", "", "$", "(a|u)");
addRule(lines, "o", "", "", "u");
addRule(lines, "B", "", "", "b");
addRule(lines, "V", "", "", "v");
addRule(lines, "p", "^", "", "b");
}
private static void addSepExactAny(final Map> lines) {
addRule(lines, "E", "", "", "e");
}
private static void addSepExactCommon(final Map> lines) {
addRule(lines, "h", "", "", "");
addRule(lines, "s", "[^t]", "[bgZd]", "z");
addRule(lines, "Z", "", "[pfkst]", "S");
addRule(lines, "Z", "", "$", "S");
addRule(lines, "S", "", "[bgzd]", "Z");
addRule(lines, "z", "", "$", "s");
addRule(lines, "B", "", "", "b");
addRule(lines, "V", "", "", "v");
}
private static void addSepRulesAny(final Map> lines) {
addRule(lines, "ph", "", "", "f");
addRule(lines, "sh", "", "", "S");
addRule(lines, "kh", "", "", "x");
addRule(lines, "gli", "", "", "(gli|l[italian])");
addRule(lines, "gni", "", "", "(gni|ni[italian+french])");
addRule(lines, "gn", "", "[aeou]", "(n[italian+french]|nj[italian+french]|gn)");
addRule(lines, "gh", "", "", "g");
addRule(lines, "dh", "", "", "d");
addRule(lines, "bh", "", "", "b");
addRule(lines, "th", "", "", "t");
addRule(lines, "lh", "", "", "l");
addRule(lines, "nh", "", "", "nj");
addRule(lines, "ig", "[aeiou]", "", "(ig|tS[spanish])");
addRule(lines, "ix", "[aeiou]", "", "S");
addRule(lines, "tx", "", "", "tS");
addRule(lines, "tj", "", "$", "tS");
addRule(lines, "tj", "", "", "dZ");
addRule(lines, "tg", "", "", "(tg|dZ[spanish])");
addRule(lines, "gi", "", "[aeou]", "dZ");
addRule(lines, "g", "", "y", "Z");
addRule(lines, "gg", "", "[ei]", "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])");
addRule(lines, "g", "", "[ei]", "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])");
addRule(lines, "guy", "", "", "gi");
addRule(lines, "gue", "", "$", "(k[french]|ge)");
addRule(lines, "gu", "", "[ei]", "(g|gv)");
addRule(lines, "gu", "", "[ao]", "gv");
addRule(lines, "ñ", "", "", "(n|nj)");
addRule(lines, "ny", "", "", "nj");
addRule(lines, "sc", "", "[ei]", "(s|S[italian])");
addRule(lines, "sç", "", "[aeiou]", "s");
addRule(lines, "ss", "", "", "s");
addRule(lines, "ç", "", "", "s");
addRule(lines, "ch", "", "[ei]", "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])");
addRule(lines, "ch", "", "", "(S|tS[spanish]|dZ[spanish])");
addRule(lines, "ci", "", "[aeou]", "(tS[italian]|si)");
addRule(lines, "cc", "", "[eiyéèê]", "(tS[italian]|ks[portuguese+french+spanish])");
addRule(lines, "c", "", "[eiyéèê]", "(tS[italian]|s[portuguese+french+spanish])");
addRule(lines, "s", "^", "", "s");
addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuiíoóeéêy]", "(s[spanish]|z[portuguese+french+italian])");
addRule(lines, "s", "", "[dglmnrv]", "(z|Z[portuguese])");
addRule(lines, "z", "", "$", "(s|ts[italian]|S[portuguese])");
addRule(lines, "z", "", "[bdgv]", "(z|dz[italian]|Z[portuguese])");
addRule(lines, "z", "", "[ptckf]", "(s|ts[italian]|S[portuguese])");
addRule(lines, "z", "", "", "(z|dz[italian]|ts[italian]|s[spanish])");
addRule(lines, "que", "", "$", "(k[french]|ke)");
addRule(lines, "qu", "", "[eiu]", "k");
addRule(lines, "qu", "", "[ao]", "(kv|k)");
addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez[portuguese]|eS[portuguese]|eks|egz)");
addRule(lines, "ex", "", "[cs]", "(e[portuguese]|ek)");
addRule(lines, "m", "", "[cdglnrst]", "(m|n[portuguese])");
addRule(lines, "m", "", "[bfpv]", "(m|n[portuguese+spanish])");
addRule(lines, "m", "", "$", "(m|n[portuguese])");
addRule(lines, "b", "^", "", "(b|V[spanish])");
addRule(lines, "v", "^", "", "(v|B[spanish])");
addRule(lines, "eau", "", "", "o");
addRule(lines, "ouh", "", "[aioe]", "(v[french]|uh)");
addRule(lines, "uh", "", "[aioe]", "(v|uh)");
addRule(lines, "ou", "", "[aioe]", "v");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aie]", "v");
addRule(lines, "i", "[aáuoóeéê]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "y", "[aáuiíoóeéê]", "", "j");
addRule(lines, "y", "", "[aeiíou]", "j");
addRule(lines, "e", "", "$", "(e|E[french])");
addRule(lines, "ão", "", "", "(au|an)");
addRule(lines, "ãe", "", "", "(aj|an)");
addRule(lines, "ãi", "", "", "(aj|an)");
addRule(lines, "õe", "", "", "(oj|on)");
addRule(lines, "où", "", "", "u");
addRule(lines, "ou", "", "", "(ou|u[french])");
addRule(lines, "â", "", "", "a");
addRule(lines, "à", "", "", "a");
addRule(lines, "á", "", "", "a");
addRule(lines, "ã", "", "", "(a|an)");
addRule(lines, "é", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "è", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "î", "", "", "i");
addRule(lines, "ô", "", "", "o");
addRule(lines, "ó", "", "", "o");
addRule(lines, "õ", "", "", "(o|on)");
addRule(lines, "ò", "", "", "o");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ü", "", "", "u");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "(b|v[spanish])");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "(x[spanish]|Z)");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "(s|S[portuguese])");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "(v|b[spanish])");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "(ks|gz|S[portuguese+spanish])");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addSepRulesFrench(final Map> lines) {
addRule(lines, "kh", "", "", "x");
addRule(lines, "ph", "", "", "f");
addRule(lines, "ç", "", "", "s");
addRule(lines, "x", "", "", "ks");
addRule(lines, "ch", "", "", "S");
addRule(lines, "c", "", "[eiyéèê]", "s");
addRule(lines, "c", "", "", "k");
addRule(lines, "gn", "", "", "(n|gn)");
addRule(lines, "g", "", "[eiy]", "Z");
addRule(lines, "gue", "", "$", "k");
addRule(lines, "gu", "", "[eiy]", "g");
addRule(lines, "que", "", "$", "k");
addRule(lines, "qu", "", "", "k");
addRule(lines, "q", "", "", "k");
addRule(lines, "s", "[aeiouyéèê]", "[aeiouyéèê]", "z");
addRule(lines, "h", "[bdgt]", "", "");
addRule(lines, "h", "", "$", "");
addRule(lines, "j", "", "", "Z");
addRule(lines, "w", "", "", "v");
addRule(lines, "ouh", "", "[aioe]", "(v|uh)");
addRule(lines, "ou", "", "[aeio]", "v");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aeio]", "v");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "eau", "", "", "o");
addRule(lines, "ai", "", "", "aj");
addRule(lines, "ay", "", "", "aj");
addRule(lines, "é", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "è", "", "", "e");
addRule(lines, "à", "", "", "a");
addRule(lines, "â", "", "", "a");
addRule(lines, "où", "", "", "u");
addRule(lines, "ou", "", "", "u");
addRule(lines, "oi", "", "", "oj");
addRule(lines, "ei", "", "", "ej");
addRule(lines, "ey", "", "", "ej");
addRule(lines, "y", "[ou]", "", "j");
addRule(lines, "e", "", "$", "(e|)");
addRule(lines, "i", "", "[aou]", "j");
addRule(lines, "y", "", "[aoeu]", "j");
addRule(lines, "y", "", "", "i");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "z", "", "", "z");
}
private static void addSepRulesHebrew(final Map> lines) {
addRule(lines, "אי", "", "", "i");
addRule(lines, "עי", "", "", "i");
addRule(lines, "עו", "", "", "VV");
addRule(lines, "או", "", "", "VV");
addRule(lines, "ג׳", "", "", "Z");
addRule(lines, "ד׳", "", "", "dZ");
addRule(lines, "א", "", "", "L");
addRule(lines, "ב", "", "", "b");
addRule(lines, "ג", "", "", "g");
addRule(lines, "ד", "", "", "d");
addRule(lines, "ה", "^", "", "1");
addRule(lines, "ה", "", "$", "1");
addRule(lines, "ה", "", "", "");
addRule(lines, "וו", "", "", "V");
addRule(lines, "וי", "", "", "WW");
addRule(lines, "ו", "", "", "W");
addRule(lines, "ז", "", "", "z");
addRule(lines, "ח", "", "", "X");
addRule(lines, "ט", "", "", "T");
addRule(lines, "יי", "", "", "i");
addRule(lines, "י", "", "", "i");
addRule(lines, "ך", "", "", "X");
addRule(lines, "כ", "^", "", "K");
addRule(lines, "כ", "", "", "k");
addRule(lines, "ל", "", "", "l");
addRule(lines, "ם", "", "", "m");
addRule(lines, "מ", "", "", "m");
addRule(lines, "ן", "", "", "n");
addRule(lines, "נ", "", "", "n");
addRule(lines, "ס", "", "", "s");
addRule(lines, "ע", "", "", "L");
addRule(lines, "ף", "", "", "f");
addRule(lines, "פ", "", "", "f");
addRule(lines, "ץ", "", "", "C");
addRule(lines, "צ", "", "", "C");
addRule(lines, "ק", "", "", "K");
addRule(lines, "ר", "", "", "r");
addRule(lines, "ש", "", "", "s");
addRule(lines, "ת", "", "", "T");
}
private static void addSepRulesItalian(final Map> lines) {
addRule(lines, "kh", "", "", "x");
addRule(lines, "gli", "", "", "(l|gli)");
addRule(lines, "gn", "", "[aeou]", "(n|nj|gn)");
addRule(lines, "gni", "", "", "(ni|gni)");
addRule(lines, "gi", "", "[aeou]", "dZ");
addRule(lines, "gg", "", "[ei]", "dZ");
addRule(lines, "g", "", "[ei]", "dZ");
addRule(lines, "h", "[bdgt]", "", "g");
addRule(lines, "ci", "", "[aeou]", "tS");
addRule(lines, "ch", "", "[ei]", "k");
addRule(lines, "sc", "", "[ei]", "S");
addRule(lines, "cc", "", "[ei]", "tS");
addRule(lines, "c", "", "[ei]", "tS");
addRule(lines, "s", "[aeiou]", "[aeiou]", "z");
addRule(lines, "i", "[aeou]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "y", "[aeou]", "", "j");
addRule(lines, "y", "", "[aeou]", "j");
addRule(lines, "qu", "", "", "k");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "�", "", "", "e");
addRule(lines, "�", "", "", "e");
addRule(lines, "�", "", "", "o");
addRule(lines, "�", "", "", "o");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "(Z|dZ|j)");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "ks");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "(ts|dz)");
}
private static void addSepRulesPortuguese(final Map> lines) {
addRule(lines, "kh", "", "", "x");
addRule(lines, "ch", "", "", "S");
addRule(lines, "ss", "", "", "s");
addRule(lines, "sc", "", "[ei]", "s");
addRule(lines, "sç", "", "[aou]", "s");
addRule(lines, "ç", "", "", "s");
addRule(lines, "c", "", "[ei]", "s");
addRule(lines, "s", "^", "", "s");
addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuiíoóeéêy]", "z");
addRule(lines, "s", "", "[dglmnrv]", "(Z|S)");
addRule(lines, "z", "", "$", "(Z|s|S)");
addRule(lines, "z", "", "[bdgv]", "(Z|z)");
addRule(lines, "z", "", "[ptckf]", "(s|S|z)");
addRule(lines, "gu", "", "[eiu]", "g");
addRule(lines, "gu", "", "[ao]", "gv");
addRule(lines, "g", "", "[ei]", "Z");
addRule(lines, "qu", "", "[eiu]", "k");
addRule(lines, "qu", "", "[ao]", "kv");
addRule(lines, "uo", "", "", "(vo|o|u)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "lh", "", "", "l");
addRule(lines, "nh", "", "", "nj");
addRule(lines, "h", "[bdgt]", "", "");
addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez|eS|eks)");
addRule(lines, "ex", "", "[cs]", "e");
addRule(lines, "y", "[aáuiíoóeéê]", "", "j");
addRule(lines, "y", "", "[aeiíou]", "j");
addRule(lines, "m", "", "[bcdfglnprstv]", "(m|n)");
addRule(lines, "m", "", "$", "(m|n)");
addRule(lines, "ão", "", "", "(au|an|on)");
addRule(lines, "ãe", "", "", "(aj|an)");
addRule(lines, "ãi", "", "", "(aj|an)");
addRule(lines, "õe", "", "", "(oj|on)");
addRule(lines, "i", "[aáuoóeéê]", "", "j");
addRule(lines, "i", "", "[aeou]", "j");
addRule(lines, "â", "", "", "a");
addRule(lines, "à", "", "", "a");
addRule(lines, "á", "", "", "a");
addRule(lines, "ã", "", "", "(a|an|on)");
addRule(lines, "é", "", "", "e");
addRule(lines, "ê", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ô", "", "", "o");
addRule(lines, "ó", "", "", "o");
addRule(lines, "õ", "", "", "(o|on)");
addRule(lines, "ú", "", "", "u");
addRule(lines, "ü", "", "", "u");
addRule(lines, "aue", "", "", "aue");
addRule(lines, "a", "", "", "a");
addRule(lines, "b", "", "", "b");
addRule(lines, "c", "", "", "k");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "(e|i)");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "j", "", "", "Z");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "(o|u)");
addRule(lines, "p", "", "", "p");
addRule(lines, "q", "", "", "k");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "S");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
addRule(lines, "v", "", "", "v");
addRule(lines, "w", "", "", "v");
addRule(lines, "x", "", "", "(S|ks)");
addRule(lines, "y", "", "", "i");
addRule(lines, "z", "", "", "z");
}
private static void addSepRulesSpanish(final Map> lines) {
addRule(lines, "ñ", "", "", "(n|nj)");
addRule(lines, "ny", "", "", "nj");
addRule(lines, "ç", "", "", "s");
addRule(lines, "ig", "[aeiou]", "", "(tS|ig)");
addRule(lines, "ix", "[aeiou]", "", "S");
addRule(lines, "tx", "", "", "tS");
addRule(lines, "tj", "", "$", "tS");
addRule(lines, "tj", "", "", "dZ");
addRule(lines, "tg", "", "", "(tg|dZ)");
addRule(lines, "ch", "", "", "(tS|dZ)");
addRule(lines, "bh", "", "", "b");
addRule(lines, "h", "[dgt]", "", "");
addRule(lines, "j", "", "", "(x|Z)");
addRule(lines, "x", "", "", "(ks|gz|S)");
addRule(lines, "w", "", "", "v");
addRule(lines, "v", "^", "", "(B|v)");
addRule(lines, "b", "^", "", "(b|V)");
addRule(lines, "v", "", "", "(b|v)");
addRule(lines, "b", "", "", "(b|v)");
addRule(lines, "m", "", "[bpvf]", "(m|n)");
addRule(lines, "c", "", "[ei]", "s");
addRule(lines, "c", "", "", "k");
addRule(lines, "z", "", "", "(z|s)");
addRule(lines, "gu", "", "[ei]", "(g|gv)");
addRule(lines, "g", "", "[ei]", "(x|g|dZ)");
addRule(lines, "qu", "", "", "k");
addRule(lines, "q", "", "", "k");
addRule(lines, "uo", "", "", "(vo|o)");
addRule(lines, "u", "", "[aei]", "v");
addRule(lines, "y", "", "", "(i|j)");
addRule(lines, "ü", "", "", "v");
addRule(lines, "á", "", "", "a");
addRule(lines, "é", "", "", "e");
addRule(lines, "í", "", "", "i");
addRule(lines, "ó", "", "", "o");
addRule(lines, "ú", "", "", "u");
addRule(lines, "à", "", "", "a");
addRule(lines, "è", "", "", "e");
addRule(lines, "ò", "", "", "o");
addRule(lines, "a", "", "", "a");
addRule(lines, "d", "", "", "d");
addRule(lines, "e", "", "", "e");
addRule(lines, "f", "", "", "f");
addRule(lines, "g", "", "", "g");
addRule(lines, "h", "", "", "h");
addRule(lines, "i", "", "", "i");
addRule(lines, "k", "", "", "k");
addRule(lines, "l", "", "", "l");
addRule(lines, "m", "", "", "m");
addRule(lines, "n", "", "", "n");
addRule(lines, "o", "", "", "o");
addRule(lines, "p", "", "", "p");
addRule(lines, "r", "", "", "r");
addRule(lines, "s", "", "", "s");
addRule(lines, "t", "", "", "t");
addRule(lines, "u", "", "", "u");
}
private static void addRule(Map> lines, String pat,
String lCon, String rCon, String ph) {
final Rule r = new Rule(pat, lCon, rCon, parsePhonemeExpr(ph));
final String patternKey = r.pattern.substring(0,1);
List rules = lines.get(patternKey);
if (rules == null) {
rules = new ArrayList< >();
lines.put(patternKey, rules);
}
rules.add(r);
}
@GwtIncompatible("incompatible method")
private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) {
return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt",
nameType.getName(), rt.getName(), lang);
}
@GwtIncompatible("incompatible method")
private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) {
final String resName = createResourceName(nameType, rt, lang);
return new Scanner(Resources.getInputStream(resName), ResourceConstants.ENCODING);
}
@GwtIncompatible("incompatible method")
private static Scanner createScanner(final String lang) {
final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang);
return new Scanner(Resources.getInputStream(resName), ResourceConstants.ENCODING);
}
private static boolean endsWith(final CharSequence input, final CharSequence suffix) {
if (suffix.length() > input.length()) {
return false;
}
for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) {
if (input.charAt(i) != suffix.charAt(j)) {
return false;
}
}
return true;
}
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a list of Rules that apply
*/
public static List getInstance(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
final Map> ruleMap = getInstanceMap(nameType, rt, langs);
final List allRules = new ArrayList<>();
for (final List rules : ruleMap.values()) {
allRules.addAll(rules);
}
return allRules;
}
/**
* Gets rules for a combination of name type, rule type and a single language.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param lang
* the language to consider
* @return a list of Rules that apply
*/
public static List getInstance(final NameType nameType, final RuleType rt, final String lang) {
return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang))));
}
/**
* Gets rules for a combination of name type, rule type and languages.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param langs
* the set of languages to consider
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
* @since 1.9
*/
public static Map> getInstanceMap(final NameType nameType, final RuleType rt,
final Languages.LanguageSet langs) {
return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) :
getInstanceMap(nameType, rt, Languages.ANY);
}
/**
* Gets rules for a combination of name type, rule type and a single language.
*
* @param nameType
* the NameType to consider
* @param rt
* the RuleType to consider
* @param lang
* the language to consider
* @return a map containing all Rules that apply, grouped by the first character of the rule pattern
* @since 1.9
*/
public static Map> getInstanceMap(final NameType nameType, final RuleType rt,
final String lang) {
final Map> rules = RULES.get(nameType).get(rt).get(lang);
if (rules == null) {
throw new IllegalArgumentException("No rules found for "
+ nameType.getName() + ", " + rt.getName() + ", " + lang + ".");
}
return rules;
}
private static Phoneme parsePhoneme(final String ph) {
final int open = ph.indexOf("[");
if (open >= 0) {
if (!ph.endsWith("]")) {
throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'");
}
final String before = ph.substring(0, open);
final String in = ph.substring(open + 1, ph.length() - 1);
final Set langs = new HashSet<>(Arrays.asList(in.split("[+]")));
return new Phoneme(before, Languages.LanguageSet.from(langs));
}
return new Phoneme(ph, Languages.ANY_LANGUAGE);
}
private static PhonemeExpr parsePhonemeExpr(final String ph) {
if (ph.startsWith("(")) { // we have a bracketed list of options
if (!ph.endsWith(")")) {
throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'");
}
final List phs = new ArrayList<>();
final String body = ph.substring(1, ph.length() - 1);
for (final String part : body.split("[|]")) {
phs.add(parsePhoneme(part));
}
if (body.startsWith("|") || body.endsWith("|")) {
phs.add(new Phoneme("", Languages.ANY_LANGUAGE));
}
return new PhonemeList(phs);
}
return parsePhoneme(ph);
}
@GwtIncompatible("incompatible method")
private static Map> parseRules(final Scanner scanner, final String location) {
final Map> lines = new HashMap<>();
int currentLine = 0;
boolean inMultilineComment = false;
while (scanner.hasNextLine()) {
currentLine++;
final String rawLine = scanner.nextLine();
String line = rawLine;
if (inMultilineComment) {
if (line.endsWith(ResourceConstants.EXT_CMT_END)) {
inMultilineComment = false;
}
} else {
if (line.startsWith(ResourceConstants.EXT_CMT_START)) {
inMultilineComment = true;
} else {
// discard comments
final int cmtI = line.indexOf(ResourceConstants.CMT);
if (cmtI >= 0) {
line = line.substring(0, cmtI);
}
// trim leading-trailing whitespace
line = line.trim();
if (line.length() == 0) {
continue; // empty lines can be safely skipped
}
if (line.startsWith(HASH_INCLUDE)) {
// include statement
final String incl = line.substring(HASH_INCLUDE.length()).trim();
if (incl.contains(" ")) {
throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " +
location);
}
try (final Scanner hashIncludeScanner = createScanner(incl)) {
lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl));
}
} else {
// rule
final String[] parts = line.split("\\s+");
if (parts.length != 4) {
throw new IllegalArgumentException("Malformed rule statement split into " + parts.length +
" parts: " + rawLine + " in " + location);
}
try {
final String pat = stripQuotes(parts[0]);
final String lCon = stripQuotes(parts[1]);
final String rCon = stripQuotes(parts[2]);
final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3]));
final int cLine = currentLine;
final Rule r = new Rule(pat, lCon, rCon, ph) {
private final int myLine = cLine;
private final String loc = location;
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
sb.append("Rule");
sb.append("{line=").append(myLine);
sb.append(", loc='").append(loc).append('\'');
sb.append(", pat='").append(pat).append('\'');
sb.append(", lcon='").append(lCon).append('\'');
sb.append(", rcon='").append(rCon).append('\'');
sb.append('}');
return sb.toString();
}
};
final String patternKey = r.pattern.substring(0,1);
List rules = lines.get(patternKey);
if (rules == null) {
rules = new ArrayList<>();
lines.put(patternKey, rules);
}
rules.add(r);
} catch (final IllegalArgumentException e) {
throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " +
location, e);
}
}
}
}
}
return lines;
}
/**
* Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case.
*
* @param regex
* the regular expression to compile
* @return an RPattern that will match this regex
*/
private static RPattern pattern(final String regex) {
final boolean startsWith = regex.startsWith("^");
final boolean endsWith = regex.endsWith("$");
final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length());
final boolean boxes = content.contains("[");
if (!boxes) {
if (startsWith && endsWith) {
// exact match
if (content.length() == 0) {
// empty
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() == 0;
}
};
}
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.equals(content);
}
};
} else if ((startsWith || endsWith) && content.length() == 0) {
// matches every string
return ALL_STRINGS_RMATCHER;
} else if (startsWith) {
// matches from start
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return startsWith(input, content);
}
};
} else if (endsWith) {
// matches from start
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return endsWith(input, content);
}
};
}
} else {
final boolean startsWithBox = content.startsWith("[");
final boolean endsWithBox = content.endsWith("]");
if (startsWithBox && endsWithBox) {
String boxContent = content.substring(1, content.length() - 1);
if (!boxContent.contains("[")) {
// box containing alternatives
final boolean negate = boxContent.startsWith("^");
if (negate) {
boxContent = boxContent.substring(1);
}
final String bContent = boxContent;
final boolean shouldMatch = !negate;
if (startsWith && endsWith) {
// exact match
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch;
}
};
} else if (startsWith) {
// first char
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch;
}
};
} else if (endsWith) {
// last char
return new RPattern() {
@Override
public boolean isMatch(final CharSequence input) {
return input.length() > 0 &&
contains(bContent, input.charAt(input.length() - 1)) == shouldMatch;
}
};
}
}
}
}
return new RPattern() {
RegExp pattern = RegExp.compile(regex);
@Override
public boolean isMatch(final CharSequence input) {
final MatchResult matcher = pattern.exec((String) input);
return (matcher != null);
}
};
}
private static boolean startsWith(final CharSequence input, final CharSequence prefix) {
if (prefix.length() > input.length()) {
return false;
}
for (int i = 0; i < prefix.length(); i++) {
if (input.charAt(i) != prefix.charAt(i)) {
return false;
}
}
return true;
}
private static String stripQuotes(String str) {
if (str.startsWith(DOUBLE_QUOTE)) {
str = str.substring(1);
}
if (str.endsWith(DOUBLE_QUOTE)) {
str = str.substring(0, str.length() - 1);
}
return str;
}
private final RPattern lContext;
private final String pattern;
private final PhonemeExpr phoneme;
private final RPattern rContext;
/**
* Creates a new rule.
*
* @param pattern
* the pattern
* @param lContext
* the left context
* @param rContext
* the right context
* @param phoneme
* the resulting phoneme
*/
public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) {
this.pattern = pattern;
this.lContext = pattern(lContext + "$");
this.rContext = pattern("^" + rContext);
this.phoneme = phoneme;
}
/**
* Gets the left context. This is a regular expression that must match to the left of the pattern.
*
* @return the left context Pattern
*/
public RPattern getLContext() {
return this.lContext;
}
/**
* Gets the pattern. This is a string-literal that must exactly match.
*
* @return the pattern
*/
public String getPattern() {
return this.pattern;
}
/**
* Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match.
*
* @return the phoneme
*/
public PhonemeExpr getPhoneme() {
return this.phoneme;
}
/**
* Gets the right context. This is a regular expression that must match to the right of the pattern.
*
* @return the right context Pattern
*/
public RPattern getRContext() {
return this.rContext;
}
/**
* Decides if the pattern and context match the input starting at a position. It is a match if the
* {@code lContext} matches {@code input} up to {@code i}, {@code pattern} matches at i and
* {@code rContext} matches from the end of the match of {@code pattern} to the end of {@code input}.
*
* @param input
* the input String
* @param i
* the int position within the input
* @return true if the pattern and left/right context match, false otherwise
*/
public boolean patternAndContextMatches(final CharSequence input, final int i) {
if (i < 0) {
throw new IndexOutOfBoundsException("Can not match pattern at negative indexes");
}
final int patternLength = this.pattern.length();
final int ipl = i + patternLength;
if (ipl > input.length()) {
// not enough room for the pattern to match
return false;
}
// evaluate the pattern, left context and right context
// fail early if any of the evaluations is not successful
if (!input.subSequence(i, ipl).equals(this.pattern)) {
return false;
} else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) {
return false;
}
return this.lContext.isMatch(input.subSequence(0, i));
}
}