All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.codec.language.bm.Rule Maven / Gradle / Ivy

Go to download

The Apache Commons Codec component contains encoders and decoders for various formats such as Base16, Base32, Base64, digest, and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities. This is a port for GWT, which enables program, to use Apache Commons Codec also in the frontend compiled by the gwt compiler to java-script.

There is a newer version: 1.17.1-0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.commons.codec.language.bm;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Scanner;
import java.util.Set;

import org.apache.commons.codec.Resources;
import org.apache.commons.codec.language.bm.Languages.LanguageSet;

import com.google.gwt.core.shared.GwtIncompatible;
import com.google.gwt.regexp.shared.MatchResult;
import com.google.gwt.regexp.shared.RegExp;

/**
 * A phoneme rule.
 * 

* Rules have a pattern, left context, right context, output phoneme, set of languages for which they apply * and a logical flag indicating if all languages must be in play. A rule matches if: *

    *
  • the pattern matches at the current position
  • *
  • the string up until the beginning of the pattern matches the left context
  • *
  • the string from the end of the pattern matches the right context
  • *
  • logical is ALL and all languages are in scope; or
  • *
  • logical is any other value and at least one language is in scope
  • *
*

* Rules are typically generated by parsing rules resources. In normal use, there will be no need for the user * to explicitly construct their own. *

* Rules are immutable and thread-safe. *

* Rules resources *

* Rules are typically loaded from resource files. These are UTF-8 encoded text files. They are systematically * named following the pattern: *

org/apache/commons/codec/language/bm/${NameType#getName}_${RuleType#getName}_${language}.txt
*

* The format of these resources is the following: *

    *
  • Rules: whitespace separated, double-quoted strings. There should be 4 columns to each row, and these * will be interpreted as: *
      *
    1. pattern
    2. *
    3. left context
    4. *
    5. right context
    6. *
    7. phoneme
    8. *
    *
  • *
  • End-of-line comments: Any occurrence of '//' will cause all text following on that line to be discarded * as a comment.
  • *
  • Multi-line comments: Any line starting with '/*' will start multi-line commenting mode. This will skip * all content until a line ending in '*' and '/' is found.
  • *
  • Blank lines: All blank lines will be skipped.
  • *
* * @since 1.6 */ public class Rule { public static final class Phoneme implements PhonemeExpr { public static final Comparator COMPARATOR = new Comparator() { @Override public int compare(final Phoneme o1, final Phoneme o2) { for (int i = 0; i < o1.phonemeText.length(); i++) { if (i >= o2.phonemeText.length()) { return +1; } final int c = o1.phonemeText.charAt(i) - o2.phonemeText.charAt(i); if (c != 0) { return c; } } if (o1.phonemeText.length() < o2.phonemeText.length()) { return -1; } return 0; } }; private final StringBuilder phonemeText; private final Languages.LanguageSet languages; public Phoneme(final CharSequence phonemeText, final Languages.LanguageSet languages) { this.phonemeText = new StringBuilder(phonemeText); this.languages = languages; } public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight) { this(phonemeLeft.phonemeText, phonemeLeft.languages); this.phonemeText.append(phonemeRight.phonemeText); } public Phoneme(final Phoneme phonemeLeft, final Phoneme phonemeRight, final Languages.LanguageSet languages) { this(phonemeLeft.phonemeText, languages); this.phonemeText.append(phonemeRight.phonemeText); } public Phoneme append(final CharSequence str) { this.phonemeText.append(str); return this; } public Languages.LanguageSet getLanguages() { return this.languages; } @Override public Iterable getPhonemes() { return Collections.singleton(this); } public CharSequence getPhonemeText() { return this.phonemeText; } /** * Deprecated since 1.9. * * @param right the Phoneme to join * @return a new Phoneme * @deprecated since 1.9 */ @Deprecated public Phoneme join(final Phoneme right) { return new Phoneme(this.phonemeText.toString() + right.phonemeText.toString(), this.languages.restrictTo(right.languages)); } /** * Returns a new Phoneme with the same text but a union of its * current language set and the given one. * * @param lang the language set to merge * @return a new Phoneme */ public Phoneme mergeWithLanguage(final LanguageSet lang) { return new Phoneme(this.phonemeText.toString(), this.languages.merge(lang)); } @Override public String toString() { return phonemeText.toString() + "[" + languages + "]"; } } public interface PhonemeExpr { Iterable getPhonemes(); } public static final class PhonemeList implements PhonemeExpr { private final List phonemes; public PhonemeList(final List phonemes) { this.phonemes = phonemes; } @Override public List getPhonemes() { return this.phonemes; } } /** * A minimal wrapper around the functionality of Pattern that we use, to allow for alternate implementations. */ public interface RPattern { boolean isMatch(CharSequence input); } public static final RPattern ALL_STRINGS_RMATCHER = new RPattern() { @Override public boolean isMatch(final CharSequence input) { return true; } }; public static final String ALL = "ALL"; private static final String DOUBLE_QUOTE = "\""; private static final String HASH_INCLUDE = "#include"; private static final Map>>>> RULES = new EnumMap<>(NameType.class); static { for (final NameType s : NameType.values()) { final Map>>> rts = new EnumMap<>(RuleType.class); for (final RuleType rt : RuleType.values()) { final Map>> rs = new HashMap<>(); final Languages ls = Languages.getInstance(s); for (final String l : ls.getLanguages()) { final Map> lines = new HashMap<>(); switch(s) { case ASHKENAZI: switch(rt) { case APPROX: switch(l) { case "any": addAshApproxAny(lines); break; case "common": addAshExactApproxCommon(lines); addAshApproxCommon(lines); break; case "cyrillic": addAshApproxRussian(lines); break; case "english": addAshApproxEnglish(lines); break; case "french": addAshApproxFrench(lines); break; case "german": addAshApproxGerman(lines); break; case "hungarian": addAshApproxFrench(lines); break; case "polish": addAshApproxPolish(lines); break; case "romanian": addAshApproxPolish(lines); break; case "russian": addAshApproxRussian(lines); break; case "spanish": addAshApproxFrench(lines); break; default: break; } break; case EXACT: switch(l) { case "any": addAshExactAny(lines); break; case "common": addAshExactApproxCommon(lines); addAshExactCommon(lines); break; case "cyrillic": addAshExactRussian(lines); break; case "english": addAshExactRussian(lines); break; case "french": addAshExactRussian(lines); break; case "german": addAshExactAny(lines); break; case "hungarian": addAshExactRussian(lines); break; case "polish": addAshExactPolish(lines); break; case "romanian": addAshExactRussian(lines); break; case "russian": addAshExactRussian(lines); break; case "spanish": addAshExactRussian(lines); break; default: break; } break; case RULES: switch(l) { case "any": addAshRulesAny(lines); break; case "cyrillic": addAshRulesCyrillic(lines); break; case "english": addAshRulesEnglish(lines); break; case "french": addAshRulesFrench(lines); break; case "german": addAshRulesGerman(lines); break; case "hebrew": addAshRulesHebrew(lines); break; case "hungarian": addAshRulesHungarian(lines); break; case "polish": addAshRulesPolish(lines); break; case "romanian": addAshRulesRomanian(lines); break; case "russian": addAshRulesRussian(lines); break; case "spanish": addAshRulesSpanish(lines); break; default: break; } break; default: break; } break; case GENERIC: switch(rt) { case APPROX: switch(l) { case "any": addGenApproxAny(lines); break; case "common": addGenExactApproxCommon(lines); addGenApproxCommon(lines); break; case "arabic": addGenApproxArabic(lines); break; case "cyrillic": addGenApproxRussian(lines); break; case "czech": addGenApproxFrench(lines); break; case "dutch": addGenApproxFrench(lines); break; case "english": addGenApproxEnglish(lines); break; case "french": addGenApproxFrench(lines); break; case "german": addGenApproxGerman(lines); break; case "greek": addGenApproxFrench(lines); break; case "greeklatin": addGenApproxFrench(lines); addGenApproxGreekLatin(lines); break; case "hungarian": addGenApproxFrench(lines); break; case "italian": addGenApproxFrench(lines); break; case "polish": addGenApproxPolish(lines); break; case "portugese": addGenApproxFrench(lines); break; case "romanian": addGenApproxPolish(lines); break; case "russian": addGenApproxRussian(lines); break; case "spanish": addGenApproxSpanish(lines); break; case "turkish": addGenApproxFrench(lines); break; default: break; } break; case EXACT: switch(l) { case "any": addGenExactAny(lines); break; case "arabic": addGenExactArabic(lines); break; case "common": addGenExactApproxCommon(lines); addGenExactCommon(lines); break; case "cyrillic": addGenExactRussian(lines); break; case "czech": addGenExactRussian(lines); break; case "english": addGenExactRussian(lines); break; case "german": addGenExactAny(lines); break; case "greeklatin": addGenExactGreeklatin(lines); break; case "polish": addGenExactPolish(lines); break; case "russian": addGenExactRussian(lines); break; case "spanish": addGenExactSpanish(lines); break; default: break; } break; case RULES: switch(l) { case "any": addGenRulesAny(lines); break; case "arabic": addGenRulesArabic(lines); break; case "cyrillic": addGenRulesCyrillic(lines); break; case "czech": addGenRulesCzech(lines); break; case "dutch": addGenRulesDutch(lines); break; case "english": addGenRulesEnglish(lines); break; case "french": addGenRulesFrench(lines); break; case "german": addGenRulesGerman(lines); break; case "greek": addGenRulesGreek(lines); break; case "greeklatin": addGenRulesGreeklatin(lines); break; case "hebrew": addGenRulesHebrew(lines); break; case "hungarian": addGenRulesHungarian(lines); break; case "italian": addGenRulesItalian(lines); break; case "polish": addGenRulesPolish(lines); break; case "portuguese": addGenRulesPortuguese(lines); break; case "romanian": addGenRulesRomanian(lines); break; case "russian": addGenRulesRussian(lines); break; case "spanish": addGenRulesSpanish(lines); break; case "turkish": addGenRulesTurkish(lines); break; default: break; } break; default: break; } break; case SEPHARDIC: switch(rt) { case APPROX: switch(l) { case "any": addSepApproxAny(lines); break; case "common": addSepExactApproxCommon(lines); addSepApproxCommon(lines); break; default: break; } break; case EXACT: switch(l) { case "any": addSepExactAny(lines); break; case "common": addSepExactApproxCommon(lines); addSepExactCommon(lines); break; default: break; } break; case RULES: switch(l) { case "any": addSepRulesAny(lines); break; case "french": addSepRulesFrench(lines); break; case "hebrew": addSepRulesHebrew(lines); break; case "italian": addSepRulesItalian(lines); break; case "portuguese": addSepRulesPortuguese(lines); break; case "spanish": addSepRulesSpanish(lines); break; default: break; } break; default: break; } break; default: break; } rs.put(l, lines); } if (!rt.equals(RuleType.RULES)) { final Map> lines = new HashMap<>(); switch(s) { case ASHKENAZI: switch(rt) { case APPROX: addAshExactApproxCommon(lines); addAshApproxCommon(lines); break; case EXACT: addAshExactApproxCommon(lines); addAshExactCommon(lines); break; default: break; } break; case GENERIC: switch(rt) { case APPROX: addGenExactApproxCommon(lines); addGenApproxCommon(lines); break; case EXACT: addGenExactApproxCommon(lines); addGenExactCommon(lines); break; default: break; } break; case SEPHARDIC: switch(rt) { case APPROX: addSepExactApproxCommon(lines); addSepApproxCommon(lines); break; case EXACT: addSepExactApproxCommon(lines); addSepExactCommon(lines); break; default: break; } break; default: break; } rs.put("common", lines); } rts.put(rt, Collections.unmodifiableMap(rs)); } RULES.put(s, Collections.unmodifiableMap(rts)); } } private static boolean contains(final CharSequence chars, final char input) { for (int i = 0; i < chars.length(); i++) { if (chars.charAt(i) == input) { return true; } } return false; } private static void addAshApproxAny(final Map> lines) { addRule(lines, "b", "", "", "(b|v[spanish])"); addRule(lines, "J", "", "", "z"); addRule(lines, "aiB", "", "[bp]", "(D|Dm)"); addRule(lines, "AiB", "", "[bp]", "(D|Dm)"); addRule(lines, "oiB", "", "[bp]", "(D|Dm)"); addRule(lines, "OiB", "", "[bp]", "(D|Dm)"); addRule(lines, "uiB", "", "[bp]", "(D|Dm)"); addRule(lines, "UiB", "", "[bp]", "(D|Dm)"); addRule(lines, "eiB", "", "[bp]", "(D|Dm)"); addRule(lines, "EiB", "", "[bp]", "(D|Dm)"); addRule(lines, "iiB", "", "[bp]", "(D|Dm)"); addRule(lines, "IiB", "", "[bp]", "(D|Dm)"); addRule(lines, "aiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "AiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "oiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "OiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "uiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "UiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "eiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "EiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "iiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "IiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "B", "", "[bp]", "(o|om[polish]|im[polish])"); addRule(lines, "B", "", "[dgkstvz]", "(a|o|on[polish]|in[polish])"); addRule(lines, "B", "", "", "(a|o)"); addRule(lines, "aiF", "", "[bp]", "(D|Dm)"); addRule(lines, "AiF", "", "[bp]", "(D|Dm)"); addRule(lines, "oiF", "", "[bp]", "(D|Dm)"); addRule(lines, "OiF", "", "[bp]", "(D|Dm)"); addRule(lines, "uiF", "", "[bp]", "(D|Dm)"); addRule(lines, "UiF", "", "[bp]", "(D|Dm)"); addRule(lines, "eiF", "", "[bp]", "(D|Dm)"); addRule(lines, "EiF", "", "[bp]", "(D|Dm)"); addRule(lines, "iiF", "", "[bp]", "(D|Dm)"); addRule(lines, "IiF", "", "[bp]", "(D|Dm)"); addRule(lines, "aiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "AiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "oiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "OiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "uiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "UiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "eiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "EiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "iiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "IiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "F", "", "[bp]", "(i|im[polish]|om[polish])"); addRule(lines, "F", "", "[dgkstvz]", "(i|in[polish]|on[polish])"); addRule(lines, "F", "", "", "i"); addRule(lines, "P", "", "", "(o|u)"); addRule(lines, "I", "[aeiouAEIBFOUQY]", "", "i"); addRule(lines, "I", "", "[^aeiouAEBFIOU]e", "(Q[german]|i|D[english])"); addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk[german])"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts[german])"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(Q[german]|i)"); addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "$", "(li|il[english])"); addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(li|il[english]|lY[german])"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "Ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "Oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "Ui", "", "", "(D|u|i)"); addRule(lines, "ei", "", "", "(D|i)"); addRule(lines, "Ei", "", "", "(D|i)"); addRule(lines, "iA", "", "$", "(ia|io)"); addRule(lines, "iA", "", "", "(ia|io|iY[german])"); addRule(lines, "A", "", "[^aeiouAEBFIOU]e", "(a|o|Y[german]|D[english])"); addRule(lines, "E", "i[^aeiouAEIOU]", "", "(i|Y[german]|[english])"); addRule(lines, "E", "a[^aeiouAEIOU]", "", "(i|Y[german]|[english])"); addRule(lines, "e", "", "[fklmnprstv]$", "i"); addRule(lines, "e", "", "ts$", "i"); addRule(lines, "e", "", "$", "i"); addRule(lines, "e", "[DaoiuAOIUQY]", "", "i"); addRule(lines, "e", "", "[aoAOQY]", "i"); addRule(lines, "e", "", "", "(i|Y[german])"); addRule(lines, "E", "", "[fklmnprst]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "", "$", "i"); addRule(lines, "E", "[DaoiuAOIUQY]", "", "i"); addRule(lines, "E", "", "[aoAOQY]", "i"); addRule(lines, "E", "", "", "(i|Y[german])"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "O", "", "[fklmnprstv]$", "o"); addRule(lines, "O", "", "ts$", "o"); addRule(lines, "O", "", "$", "o"); addRule(lines, "O", "[oeiuQY]", "", "o"); addRule(lines, "O", "", "", "(o|Y[german])"); addRule(lines, "A", "", "[fklmnprst]$", "(a|o)"); addRule(lines, "A", "", "ts$", "(a|o)"); addRule(lines, "A", "", "$", "(a|o)"); addRule(lines, "A", "[oeiuQY]", "", "(a|o)"); addRule(lines, "A", "", "", "(a|o|Y[german])"); addRule(lines, "U", "", "$", "u"); addRule(lines, "U", "[DoiuQY]", "", "u"); addRule(lines, "U", "", "[^k]$", "u"); addRule(lines, "Uk", "[lr]", "$", "(uk|Qk[german])"); addRule(lines, "Uk", "", "$", "uk"); addRule(lines, "sUts", "", "$", "(suts|sQts[german])"); addRule(lines, "Uts", "", "$", "uts"); addRule(lines, "U", "", "", "(u|Q[german])"); } private static void addAshApproxCommon(final Map> lines) { addRule(lines, "n", "", "[bp]", "m"); addRule(lines, "h", "", "", ""); addRule(lines, "H", "", "", "(x|)"); addRule(lines, "F", "", "[bdgkpstvzZ]h", "e"); addRule(lines, "F", "", "[bdgkpstvzZ]x", "e"); addRule(lines, "B", "", "[bdgkpstvzZ]h", "a"); addRule(lines, "B", "", "[bdgkpstvzZ]x", "a"); addRule(lines, "e", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "i", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "E", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "I", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "F", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "Q", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "Y", "[bdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "e", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "i", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "E", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "I", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "F", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "Q", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "Y", "[bdfgklmnprsStvzZ]", "[ln][bdfgklmnprsStvzZ]", ""); addRule(lines, "lEs", "", "", "(lEs|lz)"); addRule(lines, "lE", "[bdfgkmnprStvzZ]", "", "(lE|l)"); addRule(lines, "aue", "", "", "D"); addRule(lines, "oue", "", "", "D"); addRule(lines, "AvE", "", "", "(D|AvE)"); addRule(lines, "Ave", "", "", "(D|Ave)"); addRule(lines, "avE", "", "", "(D|avE)"); addRule(lines, "ave", "", "", "(D|ave)"); addRule(lines, "OvE", "", "", "(D|OvE)"); addRule(lines, "Ove", "", "", "(D|Ove)"); addRule(lines, "ovE", "", "", "(D|ovE)"); addRule(lines, "ove", "", "", "(D|ove)"); addRule(lines, "ea", "", "", "(D|ea)"); addRule(lines, "EA", "", "", "(D|EA)"); addRule(lines, "Ea", "", "", "(D|Ea)"); addRule(lines, "eA", "", "", "(D|eA)"); addRule(lines, "aji", "", "", "D"); addRule(lines, "ajI", "", "", "D"); addRule(lines, "aje", "", "", "D"); addRule(lines, "ajE", "", "", "D"); addRule(lines, "Aji", "", "", "D"); addRule(lines, "AjI", "", "", "D"); addRule(lines, "Aje", "", "", "D"); addRule(lines, "AjE", "", "", "D"); addRule(lines, "oji", "", "", "D"); addRule(lines, "ojI", "", "", "D"); addRule(lines, "oje", "", "", "D"); addRule(lines, "ojE", "", "", "D"); addRule(lines, "Oji", "", "", "D"); addRule(lines, "OjI", "", "", "D"); addRule(lines, "Oje", "", "", "D"); addRule(lines, "OjE", "", "", "D"); addRule(lines, "eji", "", "", "D"); addRule(lines, "ejI", "", "", "D"); addRule(lines, "eje", "", "", "D"); addRule(lines, "ejE", "", "", "D"); addRule(lines, "Eji", "", "", "D"); addRule(lines, "EjI", "", "", "D"); addRule(lines, "Eje", "", "", "D"); addRule(lines, "EjE", "", "", "D"); addRule(lines, "uji", "", "", "D"); addRule(lines, "ujI", "", "", "D"); addRule(lines, "uje", "", "", "D"); addRule(lines, "ujE", "", "", "D"); addRule(lines, "Uji", "", "", "D"); addRule(lines, "UjI", "", "", "D"); addRule(lines, "Uje", "", "", "D"); addRule(lines, "UjE", "", "", "D"); addRule(lines, "iji", "", "", "D"); addRule(lines, "ijI", "", "", "D"); addRule(lines, "ije", "", "", "D"); addRule(lines, "ijE", "", "", "D"); addRule(lines, "Iji", "", "", "D"); addRule(lines, "IjI", "", "", "D"); addRule(lines, "Ije", "", "", "D"); addRule(lines, "IjE", "", "", "D"); addRule(lines, "aja", "", "", "D"); addRule(lines, "ajA", "", "", "D"); addRule(lines, "ajo", "", "", "D"); addRule(lines, "ajO", "", "", "D"); addRule(lines, "aju", "", "", "D"); addRule(lines, "ajU", "", "", "D"); addRule(lines, "Aja", "", "", "D"); addRule(lines, "AjA", "", "", "D"); addRule(lines, "Ajo", "", "", "D"); addRule(lines, "AjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "oja", "", "", "D"); addRule(lines, "ojA", "", "", "D"); addRule(lines, "ojo", "", "", "D"); addRule(lines, "ojO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Oja", "", "", "D"); addRule(lines, "OjA", "", "", "D"); addRule(lines, "Ojo", "", "", "D"); addRule(lines, "OjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "eja", "", "", "D"); addRule(lines, "ejA", "", "", "D"); addRule(lines, "ejo", "", "", "D"); addRule(lines, "ejO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Eja", "", "", "D"); addRule(lines, "EjA", "", "", "D"); addRule(lines, "Ejo", "", "", "D"); addRule(lines, "EjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "uja", "", "", "D"); addRule(lines, "ujA", "", "", "D"); addRule(lines, "ujo", "", "", "D"); addRule(lines, "ujO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Uja", "", "", "D"); addRule(lines, "UjA", "", "", "D"); addRule(lines, "Ujo", "", "", "D"); addRule(lines, "UjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "ija", "", "", "D"); addRule(lines, "ijA", "", "", "D"); addRule(lines, "ijo", "", "", "D"); addRule(lines, "ijO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Ija", "", "", "D"); addRule(lines, "IjA", "", "", "D"); addRule(lines, "Ijo", "", "", "D"); addRule(lines, "IjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "j", "", "", "i"); addRule(lines, "lYndEr", "", "$", "lYnder"); addRule(lines, "lander", "", "$", "lYnder"); addRule(lines, "lAndEr", "", "$", "lYnder"); addRule(lines, "lAnder", "", "$", "lYnder"); addRule(lines, "landEr", "", "$", "lYnder"); addRule(lines, "lender", "", "$", "lYnder"); addRule(lines, "lEndEr", "", "$", "lYnder"); addRule(lines, "lendEr", "", "$", "lYnder"); addRule(lines, "lEnder", "", "$", "lYnder"); addRule(lines, "bUrk", "", "$", "(burk|berk)"); addRule(lines, "burk", "", "$", "(burk|berk)"); addRule(lines, "bUrg", "", "$", "(burk|berk)"); addRule(lines, "burg", "", "$", "(burk|berk)"); addRule(lines, "s", "", "[rmnl]", "z"); addRule(lines, "S", "", "[rmnl]", "z"); addRule(lines, "s", "[rmnl]", "", "z"); addRule(lines, "S", "[rmnl]", "", "z"); addRule(lines, "dS", "", "$", "S"); addRule(lines, "dZ", "", "$", "S"); addRule(lines, "Z", "", "$", "S"); addRule(lines, "S", "", "$", "(S|s)"); addRule(lines, "z", "", "$", "(S|s)"); addRule(lines, "S", "", "", "s"); addRule(lines, "dZ", "", "", "z"); addRule(lines, "Z", "", "", "z"); } private static void addAshApproxRussian(final Map> lines) { addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "[aeiEIou]", "", "i"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "om", "", "[bp]", "(om|im)"); addRule(lines, "on", "", "[dgkstvz]", "(on|in)"); addRule(lines, "em", "", "[bp]", "(im|om)"); addRule(lines, "en", "", "[dgkstvz]", "(in|on)"); addRule(lines, "Em", "", "[bp]", "(im|Ym|om)"); addRule(lines, "En", "", "[dgkstvz]", "(in|Yn|on)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprsStv]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "[DaoiuQ]", "", "i"); addRule(lines, "E", "", "[aoQ]", "i"); addRule(lines, "E", "", "", "(Y|i)"); } private static void addAshApproxEnglish(final Map> lines) { addRule(lines, "I", "", "[^aEIeiou]e", "(Q|i|D)"); addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "[aEIeiou]", "", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(il|li|lY)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "E", "D[^aeiEIou]", "", "(i|)"); addRule(lines, "e", "D[^aeiEIou]", "", "(i|)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprsStv]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "[DaoiEuQY]", "", "i"); addRule(lines, "E", "", "[aoQY]", "i"); addRule(lines, "E", "", "", "(Y|i)"); addRule(lines, "a", "", "", "(a|o)"); } private static void addAshApproxFrench(final Map> lines) { addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "[aEIeiou]", "", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprsStv]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "[aoiuQ]", "", "i"); addRule(lines, "E", "", "[aoQ]", "i"); addRule(lines, "E", "", "", "(Y|i)"); } private static void addAshApproxPolish(final Map> lines) { addRule(lines, "aiB", "", "[bp]", "(D|Dm)"); addRule(lines, "oiB", "", "[bp]", "(D|Dm)"); addRule(lines, "uiB", "", "[bp]", "(D|Dm)"); addRule(lines, "eiB", "", "[bp]", "(D|Dm)"); addRule(lines, "EiB", "", "[bp]", "(D|Dm)"); addRule(lines, "iiB", "", "[bp]", "(D|Dm)"); addRule(lines, "IiB", "", "[bp]", "(D|Dm)"); addRule(lines, "aiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "oiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "uiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "eiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "EiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "iiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "IiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "B", "", "[bp]", "(o|om|im)"); addRule(lines, "B", "", "[dgkstvz]", "(o|on|in)"); addRule(lines, "B", "", "", "o"); addRule(lines, "aiF", "", "[bp]", "(D|Dm)"); addRule(lines, "oiF", "", "[bp]", "(D|Dm)"); addRule(lines, "uiF", "", "[bp]", "(D|Dm)"); addRule(lines, "eiF", "", "[bp]", "(D|Dm)"); addRule(lines, "EiF", "", "[bp]", "(D|Dm)"); addRule(lines, "iiF", "", "[bp]", "(D|Dm)"); addRule(lines, "IiF", "", "[bp]", "(D|Dm)"); addRule(lines, "aiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "oiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "uiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "eiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "EiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "iiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "IiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "F", "", "[bp]", "(i|im|om)"); addRule(lines, "F", "", "[dgkstvz]", "(i|in|on)"); addRule(lines, "F", "", "", "i"); addRule(lines, "P", "", "", "(o|u)"); addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "[aeiAEBFIou]", "", "i"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprst]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "", "$", "i"); addRule(lines, "E", "[DaoiuQ]", "", "i"); addRule(lines, "E", "", "[aoQ]", "i"); addRule(lines, "E", "", "", "(Y|i)"); } private static void addAshApproxGerman(final Map> lines) { addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "[aeiAEIOUouQY]", "", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(Q|i)"); addRule(lines, "AU", "", "", "(D|a|u)"); addRule(lines, "aU", "", "", "(D|a|u)"); addRule(lines, "Au", "", "", "(D|a|u)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "OU", "", "", "(D|o|u)"); addRule(lines, "oU", "", "", "(D|o|u)"); addRule(lines, "Ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "Ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "Oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "Ui", "", "", "(D|u|i)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprst]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "", "$", "i"); addRule(lines, "E", "[DaoAOUiuQY]", "", "i"); addRule(lines, "E", "", "[aoAOQY]", "i"); addRule(lines, "E", "", "", "(Y|i)"); addRule(lines, "O", "", "$", "o"); addRule(lines, "O", "", "[fklmnprst]$", "o"); addRule(lines, "O", "", "ts$", "o"); addRule(lines, "O", "[aoAOUeiuQY]", "", "o"); addRule(lines, "O", "", "", "(o|Y)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "A", "", "$", "(a|o)"); addRule(lines, "A", "", "[fklmnprst]$", "(a|o)"); addRule(lines, "A", "", "ts$", "(a|o)"); addRule(lines, "A", "[aoeOUiuQY]", "", "(a|o)"); addRule(lines, "A", "", "", "(a|o|Y)"); addRule(lines, "U", "", "$", "u"); addRule(lines, "U", "[DaoiuUQY]", "", "u"); addRule(lines, "U", "", "[^k]$", "u"); addRule(lines, "Uk", "[lr]", "$", "(uk|Qk)"); addRule(lines, "Uk", "", "$", "uk"); addRule(lines, "sUts", "", "$", "(suts|sQts)"); addRule(lines, "Uts", "", "$", "uts"); addRule(lines, "U", "", "", "(u|Q)"); } private static void addAshExactApproxCommon(final Map> lines) { addRule(lines, "h", "", "$", ""); addRule(lines, "b", "", "[fktSs]", "p"); addRule(lines, "b", "", "p", ""); addRule(lines, "b", "", "$", "p"); addRule(lines, "p", "", "[gdZz]", "b"); addRule(lines, "p", "", "b", ""); addRule(lines, "v", "", "[pktSs]", "f"); addRule(lines, "v", "", "f", ""); addRule(lines, "v", "", "$", "f"); addRule(lines, "f", "", "[bgdZz]", "v"); addRule(lines, "f", "", "v", ""); addRule(lines, "g", "", "[pftSs]", "k"); addRule(lines, "g", "", "k", ""); addRule(lines, "g", "", "$", "k"); addRule(lines, "k", "", "[bdZz]", "g"); addRule(lines, "k", "", "g", ""); addRule(lines, "d", "", "[pfkSs]", "t"); addRule(lines, "d", "", "t", ""); addRule(lines, "d", "", "$", "t"); addRule(lines, "t", "", "[bgZz]", "d"); addRule(lines, "t", "", "d", ""); addRule(lines, "s", "", "dZ", ""); addRule(lines, "s", "", "tS", ""); addRule(lines, "z", "", "[pfkSt]", "s"); addRule(lines, "z", "", "[sSzZ]", ""); addRule(lines, "s", "", "[sSzZ]", ""); addRule(lines, "Z", "", "[sSzZ]", ""); addRule(lines, "S", "", "[sSzZ]", ""); addRule(lines, "jnm", "", "", "jm"); addRule(lines, "ji", "^", "", "i"); addRule(lines, "jI", "^", "", "I"); addRule(lines, "a", "", "[aAB]", ""); addRule(lines, "a", "[AB]", "", ""); addRule(lines, "A", "", "A", ""); addRule(lines, "B", "", "B", ""); addRule(lines, "b", "", "b", ""); addRule(lines, "d", "", "d", ""); addRule(lines, "f", "", "f", ""); addRule(lines, "g", "", "g", ""); addRule(lines, "k", "", "k", ""); addRule(lines, "l", "", "l", ""); addRule(lines, "m", "", "m", ""); addRule(lines, "n", "", "n", ""); addRule(lines, "p", "", "p", ""); addRule(lines, "r", "", "r", ""); addRule(lines, "t", "", "t", ""); addRule(lines, "v", "", "v", ""); addRule(lines, "z", "", "z", ""); } private static void addAshExactAny(final Map> lines) { addRule(lines, "A", "", "", "a"); addRule(lines, "B", "", "", "a"); addRule(lines, "E", "", "", "e"); addRule(lines, "F", "", "", "e"); addRule(lines, "I", "", "", "i"); addRule(lines, "O", "", "", "o"); addRule(lines, "P", "", "", "o"); addRule(lines, "U", "", "", "u"); addRule(lines, "J", "", "", "l"); } private static void addAshExactCommon(final Map> lines) { addRule(lines, "H", "", "", "h"); addRule(lines, "s", "[^t]", "[bgZd]", "z"); addRule(lines, "Z", "", "[pfkst]", "S"); addRule(lines, "Z", "", "$", "S"); addRule(lines, "S", "", "[bgzd]", "Z"); addRule(lines, "z", "", "$", "s"); addRule(lines, "ji", "[aAoOeEiIuU]", "", "j"); addRule(lines, "jI", "[aAoOeEiIuU]", "", "j"); addRule(lines, "je", "[aAoOeEiIuU]", "", "j"); addRule(lines, "jE", "[aAoOeEiIuU]", "", "j"); } private static void addAshExactRussian(final Map> lines) { addRule(lines, "E", "", "", "e"); addRule(lines, "I", "", "", "i"); } private static void addAshExactPolish(final Map> lines) { addRule(lines, "B", "", "", "a"); addRule(lines, "F", "", "", "e"); addRule(lines, "P", "", "", "o"); addRule(lines, "E", "", "", "e"); addRule(lines, "I", "", "", "i"); } private static void addAshRulesAny(final Map> lines) { addRule(lines, "yna", "", "$", "(in[russian]|ina)"); addRule(lines, "ina", "", "$", "(in[russian]|ina)"); addRule(lines, "liova", "", "$", "(lof[russian]|lef[russian]|lova)"); addRule(lines, "lova", "", "$", "(lof[russian]|lef[russian]|lova)"); addRule(lines, "ova", "", "$", "(of[russian]|ova)"); addRule(lines, "eva", "", "$", "(ef[russian]|eva)"); addRule(lines, "aia", "", "$", "(aja|i[russian])"); addRule(lines, "aja", "", "$", "(aja|i[russian])"); addRule(lines, "aya", "", "$", "(aja|i[russian])"); addRule(lines, "lowa", "", "$", "(lova|lof[polish]|l[polish]|el[polish])"); addRule(lines, "kowa", "", "$", "(kova|kof[polish]|k[polish]|ek[polish])"); addRule(lines, "owa", "", "$", "(ova|of[polish]|)"); addRule(lines, "lowna", "", "$", "(lovna|levna|l[polish]|el[polish])"); addRule(lines, "kowna", "", "$", "(kovna|k[polish]|ek[polish])"); addRule(lines, "owna", "", "$", "(ovna|[polish])"); addRule(lines, "lówna", "", "$", "(l|el[polish])"); addRule(lines, "kówna", "", "$", "(k|ek[polish])"); addRule(lines, "ówna", "", "$", ""); addRule(lines, "a", "", "$", "(a|i[polish])"); addRule(lines, "rh", "^", "", "r"); addRule(lines, "ssch", "", "", "S"); addRule(lines, "chsch", "", "", "xS"); addRule(lines, "tsch", "", "", "tS"); addRule(lines, "sch", "", "[ei]", "(sk[romanian]|S|StS[russian])"); addRule(lines, "sch", "", "", "(S|StS[russian])"); addRule(lines, "ssh", "", "", "S"); addRule(lines, "sh", "", "[äöü]", "sh"); addRule(lines, "sh", "", "[aeiou]", "(S[russian+english]|sh)"); addRule(lines, "sh", "", "", "S"); addRule(lines, "kh", "", "", "(x[russian+english]|kh)"); addRule(lines, "chs", "", "", "(ks[german]|xs|tSs[russian+english])"); addRule(lines, "ch", "", "[ei]", "(x|k[romanian]|tS[russian+english])"); addRule(lines, "ch", "", "", "(x|tS[russian+english])"); addRule(lines, "ck", "", "", "(k|tsk[polish])"); addRule(lines, "czy", "", "", "tSi"); addRule(lines, "cze", "", "[bcdgkpstwzż]", "(tSe|tSF)"); addRule(lines, "ciewicz", "", "", "(tsevitS|tSevitS)"); addRule(lines, "siewicz", "", "", "(sevitS|SevitS)"); addRule(lines, "ziewicz", "", "", "(zevitS|ZevitS)"); addRule(lines, "riewicz", "", "", "rjevitS"); addRule(lines, "diewicz", "", "", "djevitS"); addRule(lines, "tiewicz", "", "", "tjevitS"); addRule(lines, "iewicz", "", "", "evitS"); addRule(lines, "ewicz", "", "", "evitS"); addRule(lines, "owicz", "", "", "ovitS"); addRule(lines, "icz", "", "", "itS"); addRule(lines, "cz", "", "", "tS"); addRule(lines, "cia", "", "[bcdgkpstwzż]", "(tSB[polish]|tsB)"); addRule(lines, "cia", "", "", "(tSa[polish]|tsa)"); addRule(lines, "cią", "", "[bp]", "(tSom[polish]|tsom)"); addRule(lines, "cią", "", "", "(tSon[polish]|tson)"); addRule(lines, "cię", "", "[bp]", "(tSem[polish]|tsem)"); addRule(lines, "cię", "", "", "(tSen[polish]|tsen)"); addRule(lines, "cie", "", "[bcdgkpstwzż]", "(tSF[polish]|tsF)"); addRule(lines, "cie", "", "", "(tSe[polish]|tse)"); addRule(lines, "cio", "", "", "(tSo[polish]|tso)"); addRule(lines, "ciu", "", "", "(tSu[polish]|tsu)"); addRule(lines, "ci", "", "$", "(tsi[polish]|tSi[polish+romanian]|tS[romanian]|si)"); addRule(lines, "ci", "", "", "(tsi[polish]|tSi[polish+romanian]|si)"); addRule(lines, "ce", "", "[bcdgkpstwzż]", "(tsF[polish]|tSe[polish+romanian]|se)"); addRule(lines, "ce", "", "", "(tSe[polish+romanian]|tse[polish]|se)"); addRule(lines, "cy", "", "", "(si|tsi[polish])"); addRule(lines, "ssz", "", "", "S"); addRule(lines, "sz", "", "", "S"); addRule(lines, "ssp", "", "", "(Sp[german]|sp)"); addRule(lines, "sp", "", "", "(Sp[german]|sp)"); addRule(lines, "sst", "", "", "(St[german]|st)"); addRule(lines, "st", "", "", "(St[german]|st)"); addRule(lines, "ss", "", "", "s"); addRule(lines, "sia", "", "[bcdgkpstwzż]", "(SB[polish]|sB[polish]|sja)"); addRule(lines, "sia", "", "", "(Sa[polish]|sja)"); addRule(lines, "sią", "", "[bp]", "(Som[polish]|som)"); addRule(lines, "sią", "", "", "(Son[polish]|son)"); addRule(lines, "się", "", "[bp]", "(Sem[polish]|sem)"); addRule(lines, "się", "", "", "(Sen[polish]|sen)"); addRule(lines, "sie", "", "[bcdgkpstwzż]", "(SF[polish]|sF|zi[german])"); addRule(lines, "sie", "", "", "(se|Se[polish]|zi[german])"); addRule(lines, "sio", "", "", "(So[polish]|so)"); addRule(lines, "siu", "", "", "(Su[polish]|sju)"); addRule(lines, "si", "", "", "(Si[polish]|si|zi[german])"); addRule(lines, "s", "", "[aeiouäöë]", "(s|z[german])"); addRule(lines, "gue", "", "", "ge"); addRule(lines, "gui", "", "", "gi"); addRule(lines, "guy", "", "", "gi"); addRule(lines, "gh", "", "[ei]", "(g[romanian]|gh)"); addRule(lines, "gauz", "", "$", "haus"); addRule(lines, "gaus", "", "$", "haus"); addRule(lines, "gol'ts", "", "$", "holts"); addRule(lines, "golts", "", "$", "holts"); addRule(lines, "gol'tz", "", "$", "holts"); addRule(lines, "goltz", "", "", "holts"); addRule(lines, "gol'ts", "^", "", "holts"); addRule(lines, "golts", "^", "", "holts"); addRule(lines, "gol'tz", "^", "", "holts"); addRule(lines, "goltz", "^", "", "holts"); addRule(lines, "gendler", "", "$", "hendler"); addRule(lines, "gejmer", "", "$", "hajmer"); addRule(lines, "gejm", "", "$", "hajm"); addRule(lines, "geymer", "", "$", "hajmer"); addRule(lines, "geym", "", "$", "hajm"); addRule(lines, "geimer", "", "$", "hajmer"); addRule(lines, "geim", "", "$", "hajm"); addRule(lines, "gof", "", "$", "hof"); addRule(lines, "ger", "", "$", "ger"); addRule(lines, "gen", "", "$", "gen"); addRule(lines, "gin", "", "$", "gin"); addRule(lines, "gie", "", "$", "(ge|gi[german]|ji[french])"); addRule(lines, "gie", "", "", "ge"); addRule(lines, "ge", "[yaeiou]", "", "(gE|xe[spanish]|dZe[english+romanian])"); addRule(lines, "gi", "[yaeiou]", "", "(gI|xi[spanish]|dZi[english+romanian])"); addRule(lines, "ge", "", "", "(gE|dZe[english+romanian]|hE[russian]|xe[spanish])"); addRule(lines, "gi", "", "", "(gI|dZi[english+romanian]|hI[russian]|xi[spanish])"); addRule(lines, "gy", "", "[aeouáéóúüöőű]", "(gi|dj[hungarian])"); addRule(lines, "gy", "", "", "(gi|d[hungarian])"); addRule(lines, "g", "[jyaeiou]", "[aouyei]", "g"); addRule(lines, "g", "", "[aouei]", "(g|h[russian])"); addRule(lines, "ej", "", "", "(aj|eZ[french+romanian]|ex[spanish])"); addRule(lines, "ej", "", "", "aj"); addRule(lines, "ly", "", "[au]", "l"); addRule(lines, "li", "", "[au]", "l"); addRule(lines, "lj", "", "[au]", "l"); addRule(lines, "lio", "", "", "(lo|le[russian])"); addRule(lines, "lyo", "", "", "(lo|le[russian])"); addRule(lines, "ll", "", "", "(l|J[spanish])"); addRule(lines, "j", "", "[aoeiuy]", "(j|dZ[english]|x[spanish]|Z[french+romanian])"); addRule(lines, "j", "", "", "(j|x[spanish])"); addRule(lines, "pf", "", "", "(pf|p|f)"); addRule(lines, "ph", "", "", "(ph|f)"); addRule(lines, "qu", "", "", "(kv[german]|k)"); addRule(lines, "rze", "t", "", "(Se[polish]|re)"); addRule(lines, "rze", "", "", "(rze|rtsE[german]|Ze[polish]|re[polish]|rZe[polish])"); addRule(lines, "rzy", "t", "", "(Si[polish]|ri)"); addRule(lines, "rzy", "", "", "(Zi[polish]|ri[polish]|rZi)"); addRule(lines, "rz", "t", "", "(S[polish]|r)"); addRule(lines, "rz", "", "", "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])"); addRule(lines, "tz", "", "$", "(ts|tS[english+german])"); addRule(lines, "tz", "^", "", "(ts|tS[english+german])"); addRule(lines, "tz", "", "", "(ts[english+german+russian]|tz)"); addRule(lines, "zh", "", "", "(Z|zh[polish]|tsh[german])"); addRule(lines, "zia", "", "[bcdgkpstwzż]", "(ZB[polish]|zB[polish]|zja)"); addRule(lines, "zia", "", "", "(Za[polish]|zja)"); addRule(lines, "zią", "", "[bp]", "(Zom[polish]|zom)"); addRule(lines, "zią", "", "", "(Zon[polish]|zon)"); addRule(lines, "zię", "", "[bp]", "(Zem[polish]|zem)"); addRule(lines, "zię", "", "", "(Zen[polish]|zen)"); addRule(lines, "zie", "", "[bcdgkpstwzż]", "(ZF[polish]|zF[polish]|ze|tsi[german])"); addRule(lines, "zie", "", "", "(ze|Ze[polish]|tsi[german])"); addRule(lines, "zio", "", "", "(Zo[polish]|zo)"); addRule(lines, "ziu", "", "", "(Zu[polish]|zju)"); addRule(lines, "zi", "", "", "(Zi[polish]|zi|tsi[german])"); addRule(lines, "thal", "", "$", "tal"); addRule(lines, "th", "^", "", "t"); addRule(lines, "th", "", "[aeiou]", "(t[german]|th)"); addRule(lines, "th", "", "", "t"); addRule(lines, "vogel", "", "", "(vogel|fogel[german])"); addRule(lines, "v", "^", "", "(v|f[german])"); addRule(lines, "h", "[aeiouyäöü]", "", ""); addRule(lines, "h", "", "", "(h|x[romanian+polish])"); addRule(lines, "h", "^", "", "(h|H[english+german])"); addRule(lines, "yi", "^", "", "i"); addRule(lines, "e", "in", "$", "(e|[french])"); addRule(lines, "ii", "", "$", "i"); addRule(lines, "iy", "", "$", "i"); addRule(lines, "yy", "", "$", "i"); addRule(lines, "yi", "", "$", "i"); addRule(lines, "yj", "", "$", "i"); addRule(lines, "ij", "", "$", "i"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "oue", "", "", "oue"); addRule(lines, "au", "", "", "(au|o[french])"); addRule(lines, "ou", "", "", "(ou|u[french])"); addRule(lines, "ue", "", "", "(Q|uje[russian])"); addRule(lines, "ae", "", "", "(Y[german]|aje[russian]|ae)"); addRule(lines, "oe", "", "", "(Y[german]|oje[russian]|oe)"); addRule(lines, "ee", "", "", "(i[english]|aje[russian]|e)"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "eu", "", "", "(aj[german]|oj[german]|eu)"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "y", "[aou]", "", "j"); addRule(lines, "ie", "", "[bcdgkpstwzż]", "(i[german]|e[polish]|ije[russian]|je)"); addRule(lines, "ie", "", "", "(i[german]|e[polish]|ije[russian]|je)"); addRule(lines, "ye", "", "", "(je|ije[russian])"); addRule(lines, "i", "", "[au]", "j"); addRule(lines, "y", "", "[au]", "j"); addRule(lines, "io", "", "", "(jo|e[russian])"); addRule(lines, "yo", "", "", "(jo|e[russian])"); addRule(lines, "ea", "", "", "(ea|ja[romanian])"); addRule(lines, "e", "^", "", "(e|je[russian])"); addRule(lines, "oo", "", "", "(u[english]|o)"); addRule(lines, "uu", "", "", "u"); addRule(lines, "ć", "", "", "(tS[polish]|ts)"); addRule(lines, "ł", "", "", "l"); addRule(lines, "ń", "", "", "n"); addRule(lines, "ñ", "", "", "(n|nj[spanish])"); addRule(lines, "ś", "", "", "(S[polish]|s)"); addRule(lines, "ş", "", "", "S"); addRule(lines, "ţ", "", "", "ts"); addRule(lines, "ż", "", "", "Z"); addRule(lines, "ź", "", "", "(Z[polish]|z)"); addRule(lines, "où", "", "", "u"); addRule(lines, "ą", "", "[bp]", "om"); addRule(lines, "ą", "", "", "on"); addRule(lines, "ä", "", "", "(Y|e)"); addRule(lines, "á", "", "", "a"); addRule(lines, "ă", "", "", "(e[romanian]|a)"); addRule(lines, "à", "", "", "a"); addRule(lines, "â", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "è", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "ę", "", "[bp]", "em"); addRule(lines, "ę", "", "", "en"); addRule(lines, "í", "", "", "i"); addRule(lines, "î", "", "", "i"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ő", "", "", "Y"); addRule(lines, "ó", "", "", "(u[polish]|o)"); addRule(lines, "ű", "", "", "Q"); addRule(lines, "ü", "", "", "Q"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ű", "", "", "Q"); addRule(lines, "ß", "", "", "s"); addRule(lines, "'", "", "", ""); addRule(lines, "\"", "", "", ""); addRule(lines, "a", "", "[bcdgkpstwzż]", "(A|B[polish])"); addRule(lines, "e", "", "[bcdgkpstwzż]", "(E|F[polish])"); addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "(O|P[polish])"); addRule(lines, "a", "", "", "A"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "(k|ts[polish])"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "O"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "U"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "(ts[german]|z)"); } private static void addAshRulesCyrillic(final Map> lines) { addRule(lines, "ця", "", "", "tsa"); addRule(lines, "цю", "", "", "tsu"); addRule(lines, "циа", "", "", "tsa"); addRule(lines, "цие", "", "", "tse"); addRule(lines, "цио", "", "", "tso"); addRule(lines, "циу", "", "", "tsu"); addRule(lines, "сие", "", "", "se"); addRule(lines, "сио", "", "", "so"); addRule(lines, "зие", "", "", "ze"); addRule(lines, "зио", "", "", "zo"); addRule(lines, "гауз", "", "$", "haus"); addRule(lines, "гаус", "", "$", "haus"); addRule(lines, "гольц", "", "$", "holts"); addRule(lines, "геймер", "", "$", "hajmer"); addRule(lines, "гейм", "", "$", "hajm"); addRule(lines, "гоф", "", "$", "hof"); addRule(lines, "гер", "", "$", "ger"); addRule(lines, "ген", "", "$", "gen"); addRule(lines, "гин", "", "$", "gin"); addRule(lines, "г", "(й|ё|я|ю|ы|а|е|о|и|у)", "(а|е|о|и|у)", "g"); addRule(lines, "г", "", "(а|е|о|и|у)", "(g|h)"); addRule(lines, "ля", "", "", "la"); addRule(lines, "лю", "", "", "lu"); addRule(lines, "лё", "", "", "(le|lo)"); addRule(lines, "лио", "", "", "(le|lo)"); addRule(lines, "ле", "", "", "(lE|lo)"); addRule(lines, "ийе", "", "", "je"); addRule(lines, "ие", "", "", "je"); addRule(lines, "ыйе", "", "", "je"); addRule(lines, "ые", "", "", "je"); addRule(lines, "ий", "", "(а|о|у)", "j"); addRule(lines, "ый", "", "(а|о|у)", "j"); addRule(lines, "ий", "", "$", "i"); addRule(lines, "ый", "", "$", "i"); addRule(lines, "ё", "", "", "(e|jo)"); addRule(lines, "ей", "^", "", "(jaj|aj)"); addRule(lines, "е", "(а|е|о|у)", "", "je"); addRule(lines, "е", "^", "", "je"); addRule(lines, "эй", "", "", "aj"); addRule(lines, "ей", "", "", "aj"); addRule(lines, "ауе", "", "", "aue"); addRule(lines, "ауэ", "", "", "aue"); addRule(lines, "а", "", "", "a"); addRule(lines, "б", "", "", "b"); addRule(lines, "в", "", "", "v"); addRule(lines, "г", "", "", "g"); addRule(lines, "д", "", "", "d"); addRule(lines, "е", "", "", "E"); addRule(lines, "ж", "", "", "Z"); addRule(lines, "з", "", "", "z"); addRule(lines, "и", "", "", "I"); addRule(lines, "й", "", "", "j"); addRule(lines, "к", "", "", "k"); addRule(lines, "л", "", "", "l"); addRule(lines, "м", "", "", "m"); addRule(lines, "н", "", "", "n"); addRule(lines, "о", "", "", "o"); addRule(lines, "п", "", "", "p"); addRule(lines, "р", "", "", "r"); addRule(lines, "с", "", "с", ""); addRule(lines, "с", "", "", "s"); addRule(lines, "т", "", "", "t"); addRule(lines, "у", "", "", "u"); addRule(lines, "ф", "", "", "f"); addRule(lines, "х", "", "", "x"); addRule(lines, "ц", "", "", "ts"); addRule(lines, "ч", "", "", "tS"); addRule(lines, "ш", "", "", "S"); addRule(lines, "щ", "", "", "StS"); addRule(lines, "ъ", "", "", ""); addRule(lines, "ы", "", "", "I"); addRule(lines, "ь", "", "", ""); addRule(lines, "э", "", "", "E"); addRule(lines, "ю", "", "", "ju"); addRule(lines, "я", "", "", "ja"); } private static void addAshRulesEnglish(final Map> lines) { addRule(lines, "tch", "", "", "tS"); addRule(lines, "ch", "", "", "(tS|x)"); addRule(lines, "ck", "", "", "k"); addRule(lines, "cc", "", "[iey]", "ks"); addRule(lines, "c", "", "c", ""); addRule(lines, "c", "", "[iey]", "s"); addRule(lines, "c", "", "", "k"); addRule(lines, "gh", "^", "", "g"); addRule(lines, "gh", "", "", "(g|f|w)"); addRule(lines, "gn", "", "", "(gn|n)"); addRule(lines, "g", "", "[iey]", "(g|dZ)"); addRule(lines, "th", "", "", "t"); addRule(lines, "kh", "", "", "x"); addRule(lines, "ph", "", "", "f"); addRule(lines, "sch", "", "", "(S|sk)"); addRule(lines, "sh", "", "", "S"); addRule(lines, "who", "^", "", "hu"); addRule(lines, "wh", "^", "", "w"); addRule(lines, "h", "", "$", ""); addRule(lines, "h", "", "[^aeiou]", ""); addRule(lines, "h", "^", "", "H"); addRule(lines, "h", "", "", "h"); addRule(lines, "j", "", "", "dZ"); addRule(lines, "kn", "^", "", "n"); addRule(lines, "mb", "", "$", "m"); addRule(lines, "ng", "", "$", "(N|ng)"); addRule(lines, "pn", "^", "", "(pn|n)"); addRule(lines, "ps", "^", "", "(ps|s)"); addRule(lines, "qu", "", "", "kw"); addRule(lines, "q", "", "", "k"); addRule(lines, "tia", "", "", "(So|Sa)"); addRule(lines, "tio", "", "", "So"); addRule(lines, "wr", "^", "", "r"); addRule(lines, "w", "", "", "(w|v)"); addRule(lines, "x", "^", "", "z"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "^", "", "j"); addRule(lines, "y", "^", "[aeiouy]", "j"); addRule(lines, "yi", "^", "", "i"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "oue", "", "", "(aue|oue)"); addRule(lines, "ai", "", "", "(aj|e)"); addRule(lines, "ay", "", "", "aj"); addRule(lines, "a", "", "[^aeiou]e", "aj"); addRule(lines, "a", "", "", "(e|o|a)"); addRule(lines, "ei", "", "", "(aj|i)"); addRule(lines, "ey", "", "", "(aj|i)"); addRule(lines, "ear", "", "", "ia"); addRule(lines, "ea", "", "", "(i|e)"); addRule(lines, "ee", "", "", "i"); addRule(lines, "e", "", "[^aeiou]e", "i"); addRule(lines, "e", "", "$", "(|E)"); addRule(lines, "e", "", "", "E"); addRule(lines, "ie", "", "", "i"); addRule(lines, "i", "", "[^aeiou]e", "aj"); addRule(lines, "i", "", "", "I"); addRule(lines, "oa", "", "", "ou"); addRule(lines, "oi", "", "", "oj"); addRule(lines, "oo", "", "", "u"); addRule(lines, "ou", "", "", "(u|ou)"); addRule(lines, "oy", "", "", "oj"); addRule(lines, "o", "", "[^aeiou]e", "ou"); addRule(lines, "o", "", "", "(o|a)"); addRule(lines, "u", "", "[^aeiou]e", "(ju|u)"); addRule(lines, "u", "", "r", "(e|u)"); addRule(lines, "u", "", "", "(u|a)"); addRule(lines, "y", "", "", "i"); addRule(lines, "b", "", "", "b"); addRule(lines, "d", "", "", "d"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "v", "", "", "v"); addRule(lines, "z", "", "", "z"); } private static void addAshRulesFrench(final Map> lines) { addRule(lines, "kh", "", "", "x"); addRule(lines, "ph", "", "", "f"); addRule(lines, "ç", "", "", "s"); addRule(lines, "x", "", "", "ks"); addRule(lines, "ch", "", "", "S"); addRule(lines, "c", "", "[eiyéèê]", "s"); addRule(lines, "c", "", "", "k"); addRule(lines, "gn", "", "", "(n|gn)"); addRule(lines, "g", "", "[eiy]", "Z"); addRule(lines, "gue", "", "$", "k"); addRule(lines, "gu", "", "[eiy]", "g"); addRule(lines, "que", "", "$", "k"); addRule(lines, "qu", "", "", "k"); addRule(lines, "q", "", "", "k"); addRule(lines, "s", "[aeiouyéèê]", "[aeiouyéèê]", "z"); addRule(lines, "h", "[bdgt]", "", ""); addRule(lines, "h", "", "$", ""); addRule(lines, "j", "", "", "Z"); addRule(lines, "w", "", "", "v"); addRule(lines, "ouh", "", "[aioe]", "(v|uh)"); addRule(lines, "ou", "", "[aeio]", "v"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aeio]", "v"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "eau", "", "", "o"); addRule(lines, "ai", "", "", "aj"); addRule(lines, "ay", "", "", "aj"); addRule(lines, "é", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "è", "", "", "e"); addRule(lines, "à", "", "", "a"); addRule(lines, "â", "", "", "a"); addRule(lines, "où", "", "", "u"); addRule(lines, "ou", "", "", "u"); addRule(lines, "oi", "", "", "oj"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "y", "[ou]", "", "j"); addRule(lines, "e", "", "$", "(e|)"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aoeu]", "j"); addRule(lines, "y", "", "", "i"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "z", "", "", "z"); } private static void addAshRulesGerman(final Map> lines) { addRule(lines, "ziu", "", "", "tsu"); addRule(lines, "zia", "", "", "tsa"); addRule(lines, "zio", "", "", "tso"); addRule(lines, "ssch", "", "", "S"); addRule(lines, "chsch", "", "", "xS"); addRule(lines, "ewitsch", "", "$", "evitS"); addRule(lines, "owitsch", "", "$", "ovitS"); addRule(lines, "evitsch", "", "$", "evitS"); addRule(lines, "ovitsch", "", "$", "ovitS"); addRule(lines, "witsch", "", "$", "vitS"); addRule(lines, "vitsch", "", "$", "vitS"); addRule(lines, "sch", "", "", "S"); addRule(lines, "chs", "", "", "ks"); addRule(lines, "ch", "", "", "x"); addRule(lines, "ck", "", "", "k"); addRule(lines, "c", "", "[eiy]", "ts"); addRule(lines, "sp", "^", "", "Sp"); addRule(lines, "st", "^", "", "St"); addRule(lines, "ssp", "", "", "(Sp|sp)"); addRule(lines, "sp", "", "", "(Sp|sp)"); addRule(lines, "sst", "", "", "(St|st)"); addRule(lines, "st", "", "", "(St|st)"); addRule(lines, "pf", "", "", "(pf|p|f)"); addRule(lines, "ph", "", "", "(ph|f)"); addRule(lines, "qu", "", "", "kv"); addRule(lines, "ewitz", "", "$", "(evits|evitS)"); addRule(lines, "ewiz", "", "$", "(evits|evitS)"); addRule(lines, "evitz", "", "$", "(evits|evitS)"); addRule(lines, "eviz", "", "$", "(evits|evitS)"); addRule(lines, "owitz", "", "$", "(ovits|ovitS)"); addRule(lines, "owiz", "", "$", "(ovits|ovitS)"); addRule(lines, "ovitz", "", "$", "(ovits|ovitS)"); addRule(lines, "oviz", "", "$", "(ovits|ovitS)"); addRule(lines, "witz", "", "$", "(vits|vitS)"); addRule(lines, "wiz", "", "$", "(vits|vitS)"); addRule(lines, "vitz", "", "$", "(vits|vitS)"); addRule(lines, "viz", "", "$", "(vits|vitS)"); addRule(lines, "tz", "", "", "ts"); addRule(lines, "thal", "", "$", "tal"); addRule(lines, "th", "^", "", "t"); addRule(lines, "th", "", "[äöüaeiou]", "(t|th)"); addRule(lines, "th", "", "", "t"); addRule(lines, "rh", "^", "", "r"); addRule(lines, "h", "[aeiouyäöü]", "", ""); addRule(lines, "h", "^", "", "H"); addRule(lines, "ss", "", "", "s"); addRule(lines, "s", "", "[äöüaeiouy]", "(z|s)"); addRule(lines, "s", "[aeiouyäöüj]", "[aeiouyäöü]", "z"); addRule(lines, "ß", "", "", "s"); addRule(lines, "ij", "", "$", "i"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "ue", "", "", "Q"); addRule(lines, "ae", "", "", "Y"); addRule(lines, "oe", "", "", "Y"); addRule(lines, "ü", "", "", "Q"); addRule(lines, "ä", "", "", "(Y|e)"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "eu", "", "", "(aj|oj)"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "y", "[aou]", "", "j"); addRule(lines, "ie", "", "", "I"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aoeu]", "j"); addRule(lines, "ñ", "", "", "n"); addRule(lines, "ã", "", "", "a"); addRule(lines, "ő", "", "", "o"); addRule(lines, "ű", "", "", "u"); addRule(lines, "ç", "", "", "s"); addRule(lines, "a", "", "", "A"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "O"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "U"); addRule(lines, "v", "", "", "(f|v)"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "ts"); } private static void addAshRulesHebrew(final Map> lines) { addRule(lines, "אי", "", "", "i"); addRule(lines, "עי", "", "", "i"); addRule(lines, "עו", "", "", "VV"); addRule(lines, "או", "", "", "VV"); addRule(lines, "ג׳", "", "", "Z"); addRule(lines, "ד׳", "", "", "dZ"); addRule(lines, "א", "", "", "L"); addRule(lines, "ב", "", "", "b"); addRule(lines, "ג", "", "", "g"); addRule(lines, "ד", "", "", "d"); addRule(lines, "ה", "^", "", "1"); addRule(lines, "ה", "", "$", "1"); addRule(lines, "ה", "", "", ""); addRule(lines, "וו", "", "", "V"); addRule(lines, "וי", "", "", "WW"); addRule(lines, "ו", "", "", "W"); addRule(lines, "ז", "", "", "z"); addRule(lines, "ח", "", "", "X"); addRule(lines, "ט", "", "", "T"); addRule(lines, "יי", "", "", "i"); addRule(lines, "י", "", "", "i"); addRule(lines, "ך", "", "", "X"); addRule(lines, "כ", "^", "", "K"); addRule(lines, "כ", "", "", "k"); addRule(lines, "ל", "", "", "l"); addRule(lines, "ם", "", "", "m"); addRule(lines, "מ", "", "", "m"); addRule(lines, "ן", "", "", "n"); addRule(lines, "נ", "", "", "n"); addRule(lines, "ס", "", "", "s"); addRule(lines, "ע", "", "", "L"); addRule(lines, "ף", "", "", "f"); addRule(lines, "פ", "", "", "f"); addRule(lines, "ץ", "", "", "C"); addRule(lines, "צ", "", "", "C"); addRule(lines, "ק", "", "", "K"); addRule(lines, "ר", "", "", "r"); addRule(lines, "ש", "", "", "s"); addRule(lines, "ת", "", "", "TB"); } private static void addAshRulesHungarian(final Map> lines) { addRule(lines, "sz", "", "", "s"); addRule(lines, "zs", "", "", "Z"); addRule(lines, "cs", "", "", "tS"); addRule(lines, "ay", "", "", "(oj|aj)"); addRule(lines, "ai", "", "", "(oj|aj)"); addRule(lines, "aj", "", "", "(oj|aj)"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "y", "[áo]", "", "j"); addRule(lines, "i", "[áo]", "", "j"); addRule(lines, "ee", "", "", "(aj|e)"); addRule(lines, "ely", "", "", "(aj|eli)"); addRule(lines, "ly", "", "", "(j|li)"); addRule(lines, "gy", "", "[aeouáéóúüöőű]", "dj"); addRule(lines, "gy", "", "", "(d|gi)"); addRule(lines, "ny", "", "[aeouáéóúüöőű]", "nj"); addRule(lines, "ny", "", "", "(n|ni)"); addRule(lines, "ty", "", "[aeouáéóúüöőű]", "tj"); addRule(lines, "ty", "", "", "(t|ti)"); addRule(lines, "qu", "", "", "(ku|kv)"); addRule(lines, "h", "", "$", ""); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ő", "", "", "Y"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ü", "", "", "Q"); addRule(lines, "ű", "", "", "Q"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "ts"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "(S|s)"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addAshRulesPolish(final Map> lines) { addRule(lines, "ska", "", "$", "ski"); addRule(lines, "cka", "", "$", "tski"); addRule(lines, "lowa", "", "$", "(lova|lof|l|el)"); addRule(lines, "kowa", "", "$", "(kova|kof|k|ek)"); addRule(lines, "owa", "", "$", "(ova|of|)"); addRule(lines, "lowna", "", "$", "(lovna|levna|l|el)"); addRule(lines, "kowna", "", "$", "(kovna|k|ek)"); addRule(lines, "owna", "", "$", "(ovna|)"); addRule(lines, "lówna", "", "$", "(l|el)"); addRule(lines, "kówna", "", "$", "(k|ek)"); addRule(lines, "ówna", "", "$", ""); addRule(lines, "a", "", "$", "(a|i)"); addRule(lines, "czy", "", "", "tSi"); addRule(lines, "cze", "", "[bcdgkpstwzż]", "(tSe|tSF)"); addRule(lines, "ciewicz", "", "", "(tsevitS|tSevitS)"); addRule(lines, "siewicz", "", "", "(sevitS|SevitS)"); addRule(lines, "ziewicz", "", "", "(zevitS|ZevitS)"); addRule(lines, "riewicz", "", "", "rjevitS"); addRule(lines, "diewicz", "", "", "djevitS"); addRule(lines, "tiewicz", "", "", "tjevitS"); addRule(lines, "iewicz", "", "", "evitS"); addRule(lines, "ewicz", "", "", "evitS"); addRule(lines, "owicz", "", "", "ovitS"); addRule(lines, "icz", "", "", "itS"); addRule(lines, "cz", "", "", "tS"); addRule(lines, "ch", "", "", "x"); addRule(lines, "cia", "", "[bcdgkpstwzż]", "(tSB|tsB)"); addRule(lines, "cia", "", "", "(tSa|tsa)"); addRule(lines, "cią", "", "[bp]", "(tSom|tsom)"); addRule(lines, "cią", "", "", "(tSon|tson)"); addRule(lines, "cię", "", "[bp]", "(tSem|tsem)"); addRule(lines, "cię", "", "", "(tSen|tsen)"); addRule(lines, "cie", "", "[bcdgkpstwzż]", "(tSF|tsF)"); addRule(lines, "cie", "", "", "(tSe|tse)"); addRule(lines, "cio", "", "", "(tSo|tso)"); addRule(lines, "ciu", "", "", "(tSu|tsu)"); addRule(lines, "ci", "", "", "(tSi|tsI)"); addRule(lines, "ć", "", "", "(tS|ts)"); addRule(lines, "ssz", "", "", "S"); addRule(lines, "sz", "", "", "S"); addRule(lines, "sia", "", "[bcdgkpstwzż]", "(SB|sB|sja)"); addRule(lines, "sia", "", "", "(Sa|sja)"); addRule(lines, "sią", "", "[bp]", "(Som|som)"); addRule(lines, "sią", "", "", "(Son|son)"); addRule(lines, "się", "", "[bp]", "(Sem|sem)"); addRule(lines, "się", "", "", "(Sen|sen)"); addRule(lines, "sie", "", "[bcdgkpstwzż]", "(SF|sF|se)"); addRule(lines, "sie", "", "", "(Se|se)"); addRule(lines, "sio", "", "", "(So|so)"); addRule(lines, "siu", "", "", "(Su|sju)"); addRule(lines, "si", "", "", "(Si|sI)"); addRule(lines, "ś", "", "", "(S|s)"); addRule(lines, "zia", "", "[bcdgkpstwzż]", "(ZB|zB|zja)"); addRule(lines, "zia", "", "", "(Za|zja)"); addRule(lines, "zią", "", "[bp]", "(Zom|zom)"); addRule(lines, "zią", "", "", "(Zon|zon)"); addRule(lines, "zię", "", "[bp]", "(Zem|zem)"); addRule(lines, "zię", "", "", "(Zen|zen)"); addRule(lines, "zie", "", "[bcdgkpstwzż]", "(ZF|zF)"); addRule(lines, "zie", "", "", "(Ze|ze)"); addRule(lines, "zio", "", "", "(Zo|zo)"); addRule(lines, "ziu", "", "", "(Zu|zju)"); addRule(lines, "zi", "", "", "(Zi|zI)"); addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF)"); addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF|ze|zF)"); addRule(lines, "że", "", "", "Ze"); addRule(lines, "źe", "", "", "(Ze|ze)"); addRule(lines, "ży", "", "", "Zi"); addRule(lines, "źi", "", "", "(Zi|zi)"); addRule(lines, "ż", "", "", "Z"); addRule(lines, "ź", "", "", "(Z|z)"); addRule(lines, "rze", "t", "", "(Se|re)"); addRule(lines, "rze", "", "", "(Ze|re|rZe)"); addRule(lines, "rzy", "t", "", "(Si|ri)"); addRule(lines, "rzy", "", "", "(Zi|ri|rZi)"); addRule(lines, "rz", "t", "", "(S|r)"); addRule(lines, "rz", "", "", "(Z|r|rZ)"); addRule(lines, "lio", "", "", "(lo|le)"); addRule(lines, "ł", "", "", "l"); addRule(lines, "ń", "", "", "n"); addRule(lines, "qu", "", "", "k"); addRule(lines, "s", "", "s", ""); addRule(lines, "ó", "", "", "(u|o)"); addRule(lines, "ą", "", "[bp]", "om"); addRule(lines, "ę", "", "[bp]", "em"); addRule(lines, "ą", "", "", "on"); addRule(lines, "ę", "", "", "en"); addRule(lines, "ije", "", "", "je"); addRule(lines, "yje", "", "", "je"); addRule(lines, "iie", "", "", "je"); addRule(lines, "yie", "", "", "je"); addRule(lines, "iye", "", "", "je"); addRule(lines, "yye", "", "", "je"); addRule(lines, "ij", "", "[aou]", "j"); addRule(lines, "yj", "", "[aou]", "j"); addRule(lines, "ii", "", "[aou]", "j"); addRule(lines, "yi", "", "[aou]", "j"); addRule(lines, "iy", "", "[aou]", "j"); addRule(lines, "yy", "", "[aou]", "j"); addRule(lines, "rie", "", "", "rje"); addRule(lines, "die", "", "", "dje"); addRule(lines, "tie", "", "", "tje"); addRule(lines, "ie", "", "[bcdgkpstwzż]", "F"); addRule(lines, "ie", "", "", "e"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "au", "", "", "au"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "ej", "", "", "aj"); addRule(lines, "ai", "", "", "aj"); addRule(lines, "ay", "", "", "aj"); addRule(lines, "aj", "", "", "aj"); addRule(lines, "i", "[ou]", "", "j"); addRule(lines, "y", "[ou]", "", "j"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aeou]", "j"); addRule(lines, "a", "", "[bcdgkpstwzż]", "B"); addRule(lines, "e", "", "[bcdgkpstwzż]", "(E|F)"); addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "P"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "ts"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "(h|x)"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "I"); addRule(lines, "z", "", "", "z"); } private static void addAshRulesRomanian(final Map> lines) { addRule(lines, "j", "", "", "Z"); addRule(lines, "ce", "", "", "tSe"); addRule(lines, "ci", "", "", "(tSi|tS)"); addRule(lines, "ch", "", "[ei]", "k"); addRule(lines, "ch", "", "", "x"); addRule(lines, "c", "", "", "k"); addRule(lines, "gi", "", "", "(dZi|dZ)"); addRule(lines, "g", "", "[ei]", "dZ"); addRule(lines, "gh", "", "", "g"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "ţ", "", "", "ts"); addRule(lines, "ş", "", "", "S"); addRule(lines, "h", "", "", "(x|h)"); addRule(lines, "qu", "", "", "k"); addRule(lines, "q", "", "", "k"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "î", "", "", "i"); addRule(lines, "ea", "", "", "ja"); addRule(lines, "ă", "", "", "(e|a)"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "i", "", "", "I"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "z", "", "", "z"); } private static void addAshRulesRussian(final Map> lines) { addRule(lines, "yna", "", "$", "(in|ina)"); addRule(lines, "ina", "", "$", "(in|ina)"); addRule(lines, "liova", "", "$", "(lof|lef)"); addRule(lines, "lova", "", "$", "(lof|lef|lova)"); addRule(lines, "ova", "", "$", "(of|ova)"); addRule(lines, "eva", "", "$", "(ef|ova)"); addRule(lines, "aia", "", "$", "(aja|i)"); addRule(lines, "aja", "", "$", "(aja|i)"); addRule(lines, "aya", "", "$", "(aja|i)"); addRule(lines, "tsya", "", "", "tsa"); addRule(lines, "tsyu", "", "", "tsu"); addRule(lines, "tsia", "", "", "tsa"); addRule(lines, "tsie", "", "", "tse"); addRule(lines, "tsio", "", "", "tso"); addRule(lines, "tsye", "", "", "tse"); addRule(lines, "tsyo", "", "", "tso"); addRule(lines, "tsiu", "", "", "tsu"); addRule(lines, "sie", "", "", "se"); addRule(lines, "sio", "", "", "so"); addRule(lines, "zie", "", "", "ze"); addRule(lines, "zio", "", "", "zo"); addRule(lines, "sye", "", "", "se"); addRule(lines, "syo", "", "", "so"); addRule(lines, "zye", "", "", "ze"); addRule(lines, "zyo", "", "", "zo"); addRule(lines, "gauz", "", "$", "haus"); addRule(lines, "gaus", "", "$", "haus"); addRule(lines, "gol'ts", "", "$", "holts"); addRule(lines, "golts", "", "$", "holts"); addRule(lines, "gol'tz", "", "$", "holts"); addRule(lines, "goltz", "", "$", "holts"); addRule(lines, "gejmer", "", "$", "hajmer"); addRule(lines, "gejm", "", "$", "hajm"); addRule(lines, "geimer", "", "$", "hajmer"); addRule(lines, "geim", "", "$", "hajm"); addRule(lines, "geymer", "", "$", "hajmer"); addRule(lines, "geym", "", "$", "hajm"); addRule(lines, "gendler", "", "$", "hendler"); addRule(lines, "gof", "", "$", "hof"); addRule(lines, "gojf", "", "$", "hojf"); addRule(lines, "goyf", "", "$", "hojf"); addRule(lines, "goif", "", "$", "hojf"); addRule(lines, "ger", "", "$", "ger"); addRule(lines, "gen", "", "$", "gen"); addRule(lines, "gin", "", "$", "gin"); addRule(lines, "gg", "", "", "g"); addRule(lines, "g", "[jaeoiuy]", "[aeoiu]", "g"); addRule(lines, "g", "", "[aeoiu]", "(g|h)"); addRule(lines, "kh", "", "", "x"); addRule(lines, "ch", "", "", "(tS|x)"); addRule(lines, "sch", "", "", "(StS|S)"); addRule(lines, "ssh", "", "", "S"); addRule(lines, "sh", "", "", "S"); addRule(lines, "zh", "", "", "Z"); addRule(lines, "tz", "", "$", "ts"); addRule(lines, "tz", "", "", "(ts|tz)"); addRule(lines, "c", "", "[iey]", "s"); addRule(lines, "c", "", "", "k"); addRule(lines, "qu", "", "", "(kv|k)"); addRule(lines, "q", "", "", "k"); addRule(lines, "s", "", "s", ""); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "lya", "", "", "la"); addRule(lines, "lyu", "", "", "lu"); addRule(lines, "lia", "", "", "la"); addRule(lines, "liu", "", "", "lu"); addRule(lines, "lja", "", "", "la"); addRule(lines, "lju", "", "", "lu"); addRule(lines, "le", "", "", "(lo|lE)"); addRule(lines, "lyo", "", "", "(lo|le)"); addRule(lines, "lio", "", "", "(lo|le)"); addRule(lines, "ije", "", "", "je"); addRule(lines, "ie", "", "", "je"); addRule(lines, "iye", "", "", "je"); addRule(lines, "iie", "", "", "je"); addRule(lines, "yje", "", "", "je"); addRule(lines, "ye", "", "", "je"); addRule(lines, "yye", "", "", "je"); addRule(lines, "yie", "", "", "je"); addRule(lines, "ij", "", "[aou]", "j"); addRule(lines, "iy", "", "[aou]", "j"); addRule(lines, "ii", "", "[aou]", "j"); addRule(lines, "yj", "", "[aou]", "j"); addRule(lines, "yy", "", "[aou]", "j"); addRule(lines, "yi", "", "[aou]", "j"); addRule(lines, "io", "", "", "(jo|e)"); addRule(lines, "i", "", "[au]", "j"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "ej", "", "", "aj"); addRule(lines, "yo", "", "", "(jo|e)"); addRule(lines, "y", "", "[au]", "j"); addRule(lines, "y", "[aiou]", "", "j"); addRule(lines, "ii", "", "$", "i"); addRule(lines, "iy", "", "$", "i"); addRule(lines, "yy", "", "$", "i"); addRule(lines, "yi", "", "$", "i"); addRule(lines, "yj", "", "$", "i"); addRule(lines, "ij", "", "$", "i"); addRule(lines, "e", "^", "", "(je|E)"); addRule(lines, "ee", "", "", "(aje|i)"); addRule(lines, "e", "[aou]", "", "je"); addRule(lines, "y", "", "", "I"); addRule(lines, "oo", "", "", "(oo|u)"); addRule(lines, "'", "", "", ""); addRule(lines, "\"", "", "", ""); addRule(lines, "aue", "", "", "aue"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "z", "", "", "z"); } private static void addAshRulesSpanish(final Map> lines) { addRule(lines, "ñ", "", "", "(n|nj)"); addRule(lines, "ch", "", "", "(tS|dZ)"); addRule(lines, "h", "[bdgt]", "", ""); addRule(lines, "h", "", "$", ""); addRule(lines, "j", "", "", "x"); addRule(lines, "x", "", "", "ks"); addRule(lines, "ll", "", "", "(l|Z)"); addRule(lines, "w", "", "", "v"); addRule(lines, "v", "", "", "(b|v)"); addRule(lines, "b", "", "", "(b|v)"); addRule(lines, "m", "", "[bpvf]", "(m|n)"); addRule(lines, "c", "", "[ei]", "s"); addRule(lines, "c", "", "", "k"); addRule(lines, "z", "", "", "(z|s)"); addRule(lines, "gu", "", "[ei]", "(g|gv)"); addRule(lines, "g", "", "[ei]", "(x|g)"); addRule(lines, "qu", "", "", "k"); addRule(lines, "q", "", "", "k"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "y", "", "", "(i|j|S|Z)"); addRule(lines, "ü", "", "", "v"); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "ú", "", "", "u"); addRule(lines, "a", "", "", "a"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); } private static void addGenApproxAny(final Map> lines) { addRule(lines, "mb", "", "", "(mb|b[greeklatin])"); addRule(lines, "mp", "", "", "(mp|b[greeklatin])"); addRule(lines, "ng", "", "", "(ng|g[greeklatin])"); addRule(lines, "B", "", "[fktSs]", "(p|f[spanish])"); addRule(lines, "B", "", "p", ""); addRule(lines, "B", "", "$", "(p|f[spanish])"); addRule(lines, "V", "", "[pktSs]", "(f|p[spanish])"); addRule(lines, "V", "", "f", ""); addRule(lines, "V", "", "$", "(f|p[spanish])"); addRule(lines, "B", "", "", "(b|v[spanish])"); addRule(lines, "V", "", "", "(v|b[spanish])"); addRule(lines, "t", "", "$", "(t|[french])"); addRule(lines, "g", "n", "$", "(g|[french])"); addRule(lines, "k", "n", "$", "(k|[french])"); addRule(lines, "p", "", "$", "(p|[french])"); addRule(lines, "r", "[Ee]", "$", "(r|[french])"); addRule(lines, "s", "", "$", "(s|[french])"); addRule(lines, "t", "[aeiouAEIOU]", "[^aeiouAEIOU]", "(t|[french])"); addRule(lines, "s", "[aeiouAEIOU]", "[^aeiouAEIOU]", "(s|[french])"); addRule(lines, "I", "[aeiouAEIBFOUQY]", "", "i"); addRule(lines, "I", "", "[^aeiouAEBFIOU]e", "(Q[german]|i|D[english])"); addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk[german])"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts[german])"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(Q[german]|i)"); addRule(lines, "lEE", "[bdfgkmnprsStvzZ]", "", "(li|il[english])"); addRule(lines, "rEE", "[bdfgkmnprsStvzZ]", "", "(ri|ir[english])"); addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(li|il[english]|lY[german])"); addRule(lines, "rE", "[bdfgkmnprsStvzZ]", "", "(ri|ir[english]|rY[german])"); addRule(lines, "EE", "", "", "(i|)"); addRule(lines, "ea", "", "", "(D|a|i)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "eu", "", "", "(D|e|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "Ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "Oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "Ui", "", "", "(D|u|i)"); addRule(lines, "ei", "", "", "(D|i)"); addRule(lines, "Ei", "", "", "(D|i)"); addRule(lines, "iA", "", "$", "(ia|io)"); addRule(lines, "iA", "", "", "(ia|io|iY[german])"); addRule(lines, "A", "", "[^aeiouAEBFIOU]e", "(a|o|Y[german]|D[english])"); addRule(lines, "E", "i[^aeiouAEIOU]", "", "(i|Y[german]|[english])"); addRule(lines, "E", "a[^aeiouAEIOU]", "", "(i|Y[german]|[english])"); addRule(lines, "E", "", "[fklmnprst]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "", "$", "i"); addRule(lines, "E", "[DaoiuAOIUQY]", "", "i"); addRule(lines, "E", "", "[aoAOQY]", "i"); addRule(lines, "E", "", "", "(i|Y[german])"); addRule(lines, "P", "", "", "(o|u)"); addRule(lines, "O", "", "[fklmnprstv]$", "o"); addRule(lines, "O", "", "ts$", "o"); addRule(lines, "O", "", "$", "o"); addRule(lines, "O", "[oeiuQY]", "", "o"); addRule(lines, "O", "", "", "(o|Y[german])"); addRule(lines, "O", "", "", "o"); addRule(lines, "A", "", "[fklmnprst]$", "(a|o)"); addRule(lines, "A", "", "ts$", "(a|o)"); addRule(lines, "A", "", "$", "(a|o)"); addRule(lines, "A", "[oeiuQY]", "", "(a|o)"); addRule(lines, "A", "", "", "(a|o|Y[german])"); addRule(lines, "A", "", "", "(a|o)"); addRule(lines, "U", "", "$", "u"); addRule(lines, "U", "[DoiuQY]", "", "u"); addRule(lines, "U", "", "[^k]$", "u"); addRule(lines, "Uk", "[lr]", "$", "(uk|Qk[german])"); addRule(lines, "Uk", "", "$", "uk"); addRule(lines, "sUts", "", "$", "(suts|sQts[german])"); addRule(lines, "Uts", "", "$", "uts"); addRule(lines, "U", "", "", "(u|Q[german])"); addRule(lines, "U", "", "", "u"); addRule(lines, "e", "", "[fklmnprstv]$", "i"); addRule(lines, "e", "", "ts$", "i"); addRule(lines, "e", "", "$", "i"); addRule(lines, "e", "[DaoiuAOIUQY]", "", "i"); addRule(lines, "e", "", "[aoAOQY]", "i"); addRule(lines, "e", "", "", "(i|Y[german])"); addRule(lines, "a", "", "", "(a|o)"); } private static void addGenExactApproxCommon(final Map> lines) { addRule(lines, "h", "", "$", ""); addRule(lines, "b", "", "[fktSs]", "p"); addRule(lines, "b", "", "p", ""); addRule(lines, "b", "", "$", "p"); addRule(lines, "p", "", "[vgdZz]", "b"); addRule(lines, "p", "", "b", ""); addRule(lines, "v", "", "[pktSs]", "f"); addRule(lines, "v", "", "f", ""); addRule(lines, "v", "", "$", "f"); addRule(lines, "f", "", "[vbgdZz]", "v"); addRule(lines, "f", "", "v", ""); addRule(lines, "g", "", "[pftSs]", "k"); addRule(lines, "g", "", "k", ""); addRule(lines, "g", "", "$", "k"); addRule(lines, "k", "", "[vbdZz]", "g"); addRule(lines, "k", "", "g", ""); addRule(lines, "d", "", "[pfkSs]", "t"); addRule(lines, "d", "", "t", ""); addRule(lines, "d", "", "$", "t"); addRule(lines, "t", "", "[vbgZz]", "d"); addRule(lines, "t", "", "d", ""); addRule(lines, "s", "", "dZ", ""); addRule(lines, "s", "", "tS", ""); addRule(lines, "z", "", "[pfkSt]", "s"); addRule(lines, "z", "", "[sSzZ]", ""); addRule(lines, "s", "", "[sSzZ]", ""); addRule(lines, "Z", "", "[sSzZ]", ""); addRule(lines, "S", "", "[sSzZ]", ""); addRule(lines, "jnm", "", "", "jm"); addRule(lines, "ji", "^", "", "i"); addRule(lines, "jI", "^", "", "I"); addRule(lines, "a", "", "[aA]", ""); addRule(lines, "a", "A", "", ""); addRule(lines, "A", "", "A", ""); addRule(lines, "b", "", "b", ""); addRule(lines, "d", "", "d", ""); addRule(lines, "f", "", "f", ""); addRule(lines, "g", "", "g", ""); addRule(lines, "j", "", "j", ""); addRule(lines, "k", "", "k", ""); addRule(lines, "l", "", "l", ""); addRule(lines, "m", "", "m", ""); addRule(lines, "n", "", "n", ""); addRule(lines, "p", "", "p", ""); addRule(lines, "r", "", "r", ""); addRule(lines, "t", "", "t", ""); addRule(lines, "v", "", "v", ""); addRule(lines, "z", "", "z", ""); } private static void addGenApproxCommon(final Map> lines) { addRule(lines, "van", "^", "[bp]", "(vam|)"); addRule(lines, "van", "^", "", "(van|)"); addRule(lines, "n", "", "[bp]", "m"); addRule(lines, "h", "", "", ""); addRule(lines, "H", "", "", "(x|)"); addRule(lines, "sen", "[rmnl]", "$", "(zn|zon)"); addRule(lines, "sen", "", "$", "(sn|son)"); addRule(lines, "sEn", "[rmnl]", "$", "(zn|zon)"); addRule(lines, "sEn", "", "$", "(sn|son)"); addRule(lines, "e", "[BbdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "i", "[BbdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "E", "[BbdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "I", "[BbdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "Q", "[BbdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "Y", "[BbdfgklmnprsStvzZ]", "[ln]$", ""); addRule(lines, "e", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", ""); addRule(lines, "i", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", ""); addRule(lines, "E", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", ""); addRule(lines, "I", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", ""); addRule(lines, "Q", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", ""); addRule(lines, "Y", "[BbdfgklmnprsStvzZ]", "[ln][BbdfgklmnprsStvzZ]", ""); addRule(lines, "lEs", "", "", "(lEs|lz)"); addRule(lines, "lE", "[bdfgkmnprStvzZ]", "", "(lE|l)"); addRule(lines, "aue", "", "", "D"); addRule(lines, "oue", "", "", "D"); addRule(lines, "AvE", "", "", "(D|AvE)"); addRule(lines, "Ave", "", "", "(D|Ave)"); addRule(lines, "avE", "", "", "(D|avE)"); addRule(lines, "ave", "", "", "(D|ave)"); addRule(lines, "OvE", "", "", "(D|OvE)"); addRule(lines, "Ove", "", "", "(D|Ove)"); addRule(lines, "ovE", "", "", "(D|ovE)"); addRule(lines, "ove", "", "", "(D|ove)"); addRule(lines, "ea", "", "", "(D|ea)"); addRule(lines, "EA", "", "", "(D|EA)"); addRule(lines, "Ea", "", "", "(D|Ea)"); addRule(lines, "eA", "", "", "(D|eA)"); addRule(lines, "aji", "", "", "D"); addRule(lines, "ajI", "", "", "D"); addRule(lines, "aje", "", "", "D"); addRule(lines, "ajE", "", "", "D"); addRule(lines, "Aji", "", "", "D"); addRule(lines, "AjI", "", "", "D"); addRule(lines, "Aje", "", "", "D"); addRule(lines, "AjE", "", "", "D"); addRule(lines, "oji", "", "", "D"); addRule(lines, "ojI", "", "", "D"); addRule(lines, "oje", "", "", "D"); addRule(lines, "ojE", "", "", "D"); addRule(lines, "Oji", "", "", "D"); addRule(lines, "OjI", "", "", "D"); addRule(lines, "Oje", "", "", "D"); addRule(lines, "OjE", "", "", "D"); addRule(lines, "eji", "", "", "D"); addRule(lines, "ejI", "", "", "D"); addRule(lines, "eje", "", "", "D"); addRule(lines, "ejE", "", "", "D"); addRule(lines, "Eji", "", "", "D"); addRule(lines, "EjI", "", "", "D"); addRule(lines, "Eje", "", "", "D"); addRule(lines, "EjE", "", "", "D"); addRule(lines, "uji", "", "", "D"); addRule(lines, "ujI", "", "", "D"); addRule(lines, "uje", "", "", "D"); addRule(lines, "ujE", "", "", "D"); addRule(lines, "Uji", "", "", "D"); addRule(lines, "UjI", "", "", "D"); addRule(lines, "Uje", "", "", "D"); addRule(lines, "UjE", "", "", "D"); addRule(lines, "iji", "", "", "D"); addRule(lines, "ijI", "", "", "D"); addRule(lines, "ije", "", "", "D"); addRule(lines, "ijE", "", "", "D"); addRule(lines, "Iji", "", "", "D"); addRule(lines, "IjI", "", "", "D"); addRule(lines, "Ije", "", "", "D"); addRule(lines, "IjE", "", "", "D"); addRule(lines, "aja", "", "", "D"); addRule(lines, "ajA", "", "", "D"); addRule(lines, "ajo", "", "", "D"); addRule(lines, "ajO", "", "", "D"); addRule(lines, "aju", "", "", "D"); addRule(lines, "ajU", "", "", "D"); addRule(lines, "Aja", "", "", "D"); addRule(lines, "AjA", "", "", "D"); addRule(lines, "Ajo", "", "", "D"); addRule(lines, "AjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "oja", "", "", "D"); addRule(lines, "ojA", "", "", "D"); addRule(lines, "ojo", "", "", "D"); addRule(lines, "ojO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Oja", "", "", "D"); addRule(lines, "OjA", "", "", "D"); addRule(lines, "Ojo", "", "", "D"); addRule(lines, "OjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "eja", "", "", "D"); addRule(lines, "ejA", "", "", "D"); addRule(lines, "ejo", "", "", "D"); addRule(lines, "ejO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Eja", "", "", "D"); addRule(lines, "EjA", "", "", "D"); addRule(lines, "Ejo", "", "", "D"); addRule(lines, "EjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "uja", "", "", "D"); addRule(lines, "ujA", "", "", "D"); addRule(lines, "ujo", "", "", "D"); addRule(lines, "ujO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Uja", "", "", "D"); addRule(lines, "UjA", "", "", "D"); addRule(lines, "Ujo", "", "", "D"); addRule(lines, "UjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "ija", "", "", "D"); addRule(lines, "ijA", "", "", "D"); addRule(lines, "ijo", "", "", "D"); addRule(lines, "ijO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "Ija", "", "", "D"); addRule(lines, "IjA", "", "", "D"); addRule(lines, "Ijo", "", "", "D"); addRule(lines, "IjO", "", "", "D"); addRule(lines, "Aju", "", "", "D"); addRule(lines, "AjU", "", "", "D"); addRule(lines, "j", "", "", "i"); addRule(lines, "lYndEr", "", "$", "lYnder"); addRule(lines, "lander", "", "$", "lYnder"); addRule(lines, "lAndEr", "", "$", "lYnder"); addRule(lines, "lAnder", "", "$", "lYnder"); addRule(lines, "landEr", "", "$", "lYnder"); addRule(lines, "lender", "", "$", "lYnder"); addRule(lines, "lEndEr", "", "$", "lYnder"); addRule(lines, "lendEr", "", "$", "lYnder"); addRule(lines, "lEnder", "", "$", "lYnder"); addRule(lines, "burk", "", "$", "(burk|berk)"); addRule(lines, "bUrk", "", "$", "(burk|berk)"); addRule(lines, "burg", "", "$", "(burk|berk)"); addRule(lines, "bUrg", "", "$", "(burk|berk)"); addRule(lines, "Burk", "", "$", "(burk|berk)"); addRule(lines, "BUrk", "", "$", "(burk|berk)"); addRule(lines, "Burg", "", "$", "(burk|berk)"); addRule(lines, "BUrg", "", "$", "(burk|berk)"); addRule(lines, "s", "", "[rmnl]", "z"); addRule(lines, "S", "", "[rmnl]", "z"); addRule(lines, "s", "[rmnl]", "", "z"); addRule(lines, "S", "[rmnl]", "", "z"); addRule(lines, "dS", "", "$", "S"); addRule(lines, "dZ", "", "$", "S"); addRule(lines, "Z", "", "$", "S"); addRule(lines, "S", "", "$", "(S|s)"); addRule(lines, "z", "", "$", "(S|s)"); addRule(lines, "S", "", "", "s"); addRule(lines, "dZ", "", "", "z"); addRule(lines, "Z", "", "", "z"); } private static void addGenApproxArabic(final Map> lines) { addRule(lines, "1a", "", "", "(D|a)"); addRule(lines, "1i", "", "", "(D|i|e)"); addRule(lines, "1u", "", "", "(D|u|o)"); addRule(lines, "j1", "", "", "(ja|je|jo|ju|j)"); addRule(lines, "1", "", "", "(a|e|i|o|u|)"); addRule(lines, "u", "", "", "(o|u)"); addRule(lines, "i", "", "", "(i|e)"); addRule(lines, "p", "", "$", "p"); addRule(lines, "p", "", "", "(p|b)"); } private static void addGenApproxRussian(final Map> lines) { addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "[aeiEIou]", "", "i"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "om", "", "[bp]", "(om|im)"); addRule(lines, "on", "", "[dgkstvz]", "(on|in)"); addRule(lines, "em", "", "[bp]", "(im|om)"); addRule(lines, "en", "", "[dgkstvz]", "(in|on)"); addRule(lines, "Em", "", "[bp]", "(im|Ym|om)"); addRule(lines, "En", "", "[dgkstvz]", "(in|Yn|on)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprsStv]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "[DaoiuQ]", "", "i"); addRule(lines, "E", "", "[aoQ]", "i"); addRule(lines, "E", "", "", "(Y|i)"); } private static void addGenApproxFrench(final Map> lines) { addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "e", "", "", "i"); } private static void addGenApproxEnglish(final Map> lines) { addRule(lines, "I", "", "[^aEIeiou]e", "(Q|i|D)"); addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "[aEIeiou]", "", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "lE", "[bdfgkmnprsStvzZ]", "", "(il|li|lY)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "E", "D[^aeiEIou]", "", "(i|)"); addRule(lines, "e", "D[^aeiEIou]", "", "(i|)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprsStv]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "[DaoiEuQY]", "", "i"); addRule(lines, "E", "", "[aoQY]", "i"); addRule(lines, "E", "", "", "(Y|i)"); addRule(lines, "a", "", "", "(a|o)"); } private static void addGenApproxGerman(final Map> lines) { addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "[aeiAEIOUouQY]", "", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "", "", "(Q|i)"); addRule(lines, "AU", "", "", "(D|a|u)"); addRule(lines, "aU", "", "", "(D|a|u)"); addRule(lines, "Au", "", "", "(D|a|u)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "OU", "", "", "(D|o|u)"); addRule(lines, "oU", "", "", "(D|o|u)"); addRule(lines, "Ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "Ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "Oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "Ui", "", "", "(D|u|i)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprst]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "", "$", "i"); addRule(lines, "E", "[DaoAOUiuQY]", "", "i"); addRule(lines, "E", "", "[aoAOQY]", "i"); addRule(lines, "E", "", "", "(Y|i)"); addRule(lines, "O", "", "$", "o"); addRule(lines, "O", "", "[fklmnprst]$", "o"); addRule(lines, "O", "", "ts$", "o"); addRule(lines, "O", "[aoAOUeiuQY]", "", "o"); addRule(lines, "O", "", "", "(o|Y)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "A", "", "$", "(a|o)"); addRule(lines, "A", "", "[fklmnprst]$", "(a|o)"); addRule(lines, "A", "", "ts$", "(a|o)"); addRule(lines, "A", "[aoeOUiuQY]", "", "(a|o)"); addRule(lines, "A", "", "", "(a|o|Y)"); addRule(lines, "U", "", "$", "u"); addRule(lines, "U", "[DaoiuUQY]", "", "u"); addRule(lines, "U", "", "[^k]$", "u"); addRule(lines, "Uk", "[lr]", "$", "(uk|Qk)"); addRule(lines, "Uk", "", "$", "uk"); addRule(lines, "sUts", "", "$", "(suts|sQts)"); addRule(lines, "Uts", "", "$", "uts"); addRule(lines, "U", "", "", "(u|Q)"); } private static void addGenApproxGreekLatin(final Map> lines) { addRule(lines, "N", "", "", ""); } private static void addGenApproxPolish(final Map> lines) { addRule(lines, "aiB", "", "[bp]", "(D|Dm)"); addRule(lines, "oiB", "", "[bp]", "(D|Dm)"); addRule(lines, "uiB", "", "[bp]", "(D|Dm)"); addRule(lines, "eiB", "", "[bp]", "(D|Dm)"); addRule(lines, "EiB", "", "[bp]", "(D|Dm)"); addRule(lines, "iiB", "", "[bp]", "(D|Dm)"); addRule(lines, "IiB", "", "[bp]", "(D|Dm)"); addRule(lines, "aiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "oiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "uiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "eiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "EiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "iiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "IiB", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "B", "", "[bp]", "(o|om|im)"); addRule(lines, "B", "", "[dgkstvz]", "(o|on|in)"); addRule(lines, "B", "", "", "o"); addRule(lines, "aiF", "", "[bp]", "(D|Dm)"); addRule(lines, "oiF", "", "[bp]", "(D|Dm)"); addRule(lines, "uiF", "", "[bp]", "(D|Dm)"); addRule(lines, "eiF", "", "[bp]", "(D|Dm)"); addRule(lines, "EiF", "", "[bp]", "(D|Dm)"); addRule(lines, "iiF", "", "[bp]", "(D|Dm)"); addRule(lines, "IiF", "", "[bp]", "(D|Dm)"); addRule(lines, "aiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "oiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "uiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "eiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "EiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "iiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "IiF", "", "[dgkstvz]", "(D|Dn)"); addRule(lines, "F", "", "[bp]", "(i|im|om)"); addRule(lines, "F", "", "[dgkstvz]", "(i|in|on)"); addRule(lines, "F", "", "", "i"); addRule(lines, "P", "", "", "(o|u)"); addRule(lines, "I", "", "$", "i"); addRule(lines, "I", "", "[^k]$", "i"); addRule(lines, "Ik", "[lr]", "$", "(ik|Qk)"); addRule(lines, "Ik", "", "$", "ik"); addRule(lines, "sIts", "", "$", "(sits|sQts)"); addRule(lines, "Its", "", "$", "its"); addRule(lines, "I", "[aeiAEBFIou]", "", "i"); addRule(lines, "I", "", "", "(i|Q)"); addRule(lines, "au", "", "", "(D|a|u)"); addRule(lines, "ou", "", "", "(D|o|u)"); addRule(lines, "ai", "", "", "(D|a|i)"); addRule(lines, "oi", "", "", "(D|o|i)"); addRule(lines, "ui", "", "", "(D|u|i)"); addRule(lines, "a", "", "", "(a|o)"); addRule(lines, "e", "", "", "i"); addRule(lines, "E", "", "[fklmnprst]$", "i"); addRule(lines, "E", "", "ts$", "i"); addRule(lines, "E", "", "$", "i"); addRule(lines, "E", "[DaoiuQ]", "", "i"); addRule(lines, "E", "", "[aoQ]", "i"); addRule(lines, "E", "", "", "(Y|i)"); } private static void addGenApproxSpanish(final Map> lines) { addRule(lines, "B", "", "", "(b|v)"); addRule(lines, "V", "", "", "(b|v)"); } private static void addGenExactAny(final Map> lines) { addRule(lines, "EE", "", "$", "e"); addRule(lines, "A", "", "", "a"); addRule(lines, "E", "", "", "e"); addRule(lines, "I", "", "", "i"); addRule(lines, "O", "", "", "o"); addRule(lines, "P", "", "", "o"); addRule(lines, "U", "", "", "u"); addRule(lines, "B", "", "[fktSs]", "p"); addRule(lines, "B", "", "p", ""); addRule(lines, "B", "", "$", "p"); addRule(lines, "V", "", "[pktSs]", "f"); addRule(lines, "V", "", "f", ""); addRule(lines, "V", "", "$", "f"); addRule(lines, "B", "", "", "b"); addRule(lines, "V", "", "", "v"); } private static void addGenExactArabic(final Map> lines) { addRule(lines, "1", "", "", ""); } private static void addGenExactCommon(final Map> lines) { addRule(lines, "H", "", "", ""); addRule(lines, "s", "[^t]", "[bgZd]", "z"); addRule(lines, "Z", "", "[pfkst]", "S"); addRule(lines, "Z", "", "$", "S"); addRule(lines, "S", "", "[bgzd]", "Z"); addRule(lines, "z", "", "$", "s"); addRule(lines, "ji", "[aAoOeEiIuU]", "", "j"); addRule(lines, "jI", "[aAoOeEiIuU]", "", "j"); addRule(lines, "je", "[aAoOeEiIuU]", "", "j"); addRule(lines, "jE", "[aAoOeEiIuU]", "", "j"); } private static void addGenExactRussian(final Map> lines) { addRule(lines, "E", "", "", "e"); addRule(lines, "I", "", "", "i"); } private static void addGenExactGreeklatin(final Map> lines) { addRule(lines, "N", "", "", "n"); } private static void addGenExactPolish(final Map> lines) { addRule(lines, "B", "", "", "a"); addRule(lines, "F", "", "", "e"); addRule(lines, "P", "", "", "o"); addRule(lines, "E", "", "", "e"); addRule(lines, "I", "", "", "i"); } private static void addGenExactSpanish(final Map> lines) { addRule(lines, "B", "", "", "b"); addRule(lines, "V", "", "", "v"); } private static void addGenRulesAny(final Map> lines) { addRule(lines, "yna", "", "$", "(in[russian]|ina)"); addRule(lines, "ina", "", "$", "(in[russian]|ina)"); addRule(lines, "liova", "", "$", "(lova|lof[russian]|lef[russian])"); addRule(lines, "lova", "", "$", "(lova|lof[russian]|lef[russian]|l[czech]|el[czech])"); addRule(lines, "kova", "", "$", "(kova|kof[russian]|k[czech]|ek[czech])"); addRule(lines, "ova", "", "$", "(ova|of[russian]|[czech])"); addRule(lines, "ová", "", "$", "(ova|[czech])"); addRule(lines, "eva", "", "$", "(eva|ef[russian])"); addRule(lines, "aia", "", "$", "(aja|i[russian])"); addRule(lines, "aja", "", "$", "(aja|i[russian])"); addRule(lines, "aya", "", "$", "(aja|i[russian])"); addRule(lines, "lowa", "", "$", "(lova|lof[polish]|l[polish]|el[polish])"); addRule(lines, "kowa", "", "$", "(kova|kof[polish]|k[polish]|ek[polish])"); addRule(lines, "owa", "", "$", "(ova|of[polish]|)"); addRule(lines, "lowna", "", "$", "(lovna|levna|l[polish]|el[polish])"); addRule(lines, "kowna", "", "$", "(kovna|k[polish]|ek[polish])"); addRule(lines, "owna", "", "$", "(ovna|[polish])"); addRule(lines, "lówna", "", "$", "(l|el)"); addRule(lines, "kówna", "", "$", "(k|ek)"); addRule(lines, "ówna", "", "$", ""); addRule(lines, "á", "", "$", "(a|i[czech])"); addRule(lines, "a", "", "$", "(a|i[polish+czech])"); addRule(lines, "pf", "", "", "(pf|p|f)"); addRule(lines, "que", "", "$", "(k[french]|ke|kve)"); addRule(lines, "qu", "", "", "(kv|k)"); addRule(lines, "m", "", "[bfpv]", "(m|n)"); addRule(lines, "m", "[aeiouy]", "[aeiouy]", "m"); addRule(lines, "m", "[aeiouy]", "", "(m|n[french+portuguese])"); addRule(lines, "ly", "", "[au]", "l"); addRule(lines, "li", "", "[au]", "l"); addRule(lines, "lio", "", "", "(lo|le[russian])"); addRule(lines, "lyo", "", "", "(lo|le[russian])"); addRule(lines, "lt", "u", "$", "(lt|[french])"); addRule(lines, "v", "^", "", "(v|f[german]|b[spanish])"); addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez[portuguese]|eS[portuguese]|eks|egz)"); addRule(lines, "ex", "", "[cs]", "(e[portuguese]|ek)"); addRule(lines, "x", "u", "$", "(ks|[french])"); addRule(lines, "ck", "", "", "(k|tsk[polish+czech])"); addRule(lines, "cz", "", "", "(tS|tsz[czech])"); addRule(lines, "rh", "^", "", "r"); addRule(lines, "dh", "^", "", "d"); addRule(lines, "bh", "^", "", "b"); addRule(lines, "ph", "", "", "(ph|f)"); addRule(lines, "kh", "", "", "(x[russian+english]|kh)"); addRule(lines, "lh", "", "", "(lh|l[portuguese])"); addRule(lines, "nh", "", "", "(nh|nj[portuguese])"); addRule(lines, "ssch", "", "", "S"); addRule(lines, "chsch", "", "", "xS"); addRule(lines, "tsch", "", "", "tS"); addRule(lines, "sch", "[aeiouy]", "[ei]", "(S|StS[russian]|sk[romanian+italian])"); addRule(lines, "sch", "[aeiouy]", "", "(S|StS[russian])"); addRule(lines, "sch", "", "[ei]", "(sk[romanian+italian]|S|StS[russian])"); addRule(lines, "sch", "", "", "(S|StS[russian])"); addRule(lines, "ssh", "", "", "S"); addRule(lines, "sh", "", "[äöü]", "sh"); addRule(lines, "sh", "", "[aeiou]", "(S[russian+english]|sh)"); addRule(lines, "sh", "", "", "S"); addRule(lines, "zh", "", "", "(Z[english+russian]|zh|tsh[german])"); addRule(lines, "chs", "", "", "(ks[german]|xs|tSs[russian+english])"); addRule(lines, "ch", "", "[ei]", "(x|tS[spanish+english+russian]|k[romanian+italian]|S[portuguese+french])"); addRule(lines, "ch", "", "", "(x|tS[spanish+english+russian]|S[portuguese+french])"); addRule(lines, "th", "^", "", "t"); addRule(lines, "th", "", "[äöüaeiou]", "(t[english+german+greeklatin]|th)"); addRule(lines, "th", "", "", "t"); addRule(lines, "gh", "", "[ei]", "(g[romanian+italian+greeklatin]|gh)"); addRule(lines, "ouh", "", "[aioe]", "(v[french]|uh)"); addRule(lines, "uh", "", "[aioe]", "(v|uh)"); addRule(lines, "h", ".", "$", ""); addRule(lines, "h", "[aeiouyäöü]", "", ""); addRule(lines, "h", "^", "", "(h|x[romanian+greeklatin]|H[english+romanian+polish+french+portuguese+italian+spanish])"); addRule(lines, "cia", "", "", "(tSa[polish]|tsa)"); addRule(lines, "cią", "", "[bp]", "(tSom|tsom)"); addRule(lines, "cią", "", "", "(tSon[polish]|tson)"); addRule(lines, "cię", "", "[bp]", "(tSem[polish]|tsem)"); addRule(lines, "cię", "", "", "(tSen[polish]|tsen)"); addRule(lines, "cie", "", "", "(tSe[polish]|tse)"); addRule(lines, "cio", "", "", "(tSo[polish]|tso)"); addRule(lines, "ciu", "", "", "(tSu[polish]|tsu)"); addRule(lines, "sci", "", "$", "(Si[italian]|stsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)"); addRule(lines, "sc", "", "[ei]", "(S[italian]|sts[polish+czech]|dZ[turkish]|tS[polish+romanian]|s)"); addRule(lines, "ci", "", "$", "(tsi[polish+czech]|dZi[turkish]|tSi[polish+romanian]|tS[romanian]|si)"); addRule(lines, "cy", "", "", "(si|tsi[polish])"); addRule(lines, "c", "", "[ei]", "(ts[polish+czech]|dZ[turkish]|tS[polish+romanian]|k[greeklatin]|s)"); addRule(lines, "sç", "", "[aeiou]", "(s|stS[turkish])"); addRule(lines, "ssz", "", "", "S"); addRule(lines, "sz", "^", "", "(S|s[hungarian])"); addRule(lines, "sz", "", "$", "(S|s[hungarian])"); addRule(lines, "sz", "", "", "(S|s[hungarian]|sts[german])"); addRule(lines, "ssp", "", "", "(Sp[german]|sp)"); addRule(lines, "sp", "", "", "(Sp[german]|sp)"); addRule(lines, "sst", "", "", "(St[german]|st)"); addRule(lines, "st", "", "", "(St[german]|st)"); addRule(lines, "ss", "", "", "s"); addRule(lines, "sj", "^", "", "S"); addRule(lines, "sj", "", "$", "S"); addRule(lines, "sj", "", "", "(sj|S[dutch]|sx[spanish]|sZ[romanian+turkish])"); addRule(lines, "sia", "", "", "(Sa[polish]|sa[polish]|sja)"); addRule(lines, "sią", "", "[bp]", "(Som[polish]|som)"); addRule(lines, "sią", "", "", "(Son[polish]|son)"); addRule(lines, "się", "", "[bp]", "(Sem[polish]|sem)"); addRule(lines, "się", "", "", "(Sen[polish]|sen)"); addRule(lines, "sie", "", "", "(se|sje|Se[polish]|zi[german])"); addRule(lines, "sio", "", "", "(So[polish]|so)"); addRule(lines, "siu", "", "", "(Su[polish]|sju)"); addRule(lines, "si", "[äöëaáuiíoóeéêy]", "", "(Si[polish]|si|zi[portuguese+french+italian+german])"); addRule(lines, "si", "", "", "(Si[polish]|si|zi[german])"); addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuíoóeéêy]", "(s|z[portuguese+french+italian+german])"); addRule(lines, "s", "", "[aeouäöë]", "(s|z[german])"); addRule(lines, "s", "[aeiouy]", "[dglmnrv]", "(s|z|Z[portuguese]|[french])"); addRule(lines, "s", "", "[dglmnrv]", "(s|z|Z[portuguese])"); addRule(lines, "gue", "", "$", "(k[french]|gve)"); addRule(lines, "gu", "", "[ei]", "(g[french]|gv[portuguese+spanish])"); addRule(lines, "gu", "", "[ao]", "gv"); addRule(lines, "guy", "", "", "gi"); addRule(lines, "gli", "", "", "(glI|l[italian])"); addRule(lines, "gni", "", "", "(gnI|ni[italian+french])"); addRule(lines, "gn", "", "[aeou]", "(n[italian+french]|nj[italian+french]|gn)"); addRule(lines, "ggie", "", "", "(je[greeklatin]|dZe)"); addRule(lines, "ggi", "", "[aou]", "(j[greeklatin]|dZ)"); addRule(lines, "ggi", "[yaeiou]", "[aou]", "(gI|dZ[italian]|j[greeklatin])"); addRule(lines, "gge", "[yaeiou]", "", "(gE|xe[spanish]|gZe[portuguese+french]|dZe[english+romanian+italian+spanish]|je[greeklatin])"); addRule(lines, "ggi", "[yaeiou]", "", "(gI|xi[spanish]|gZi[portuguese+french]|dZi[english+romanian+italian+spanish]|i[greeklatin])"); addRule(lines, "ggi", "", "[aou]", "(gI|dZ[italian]|j[greeklatin])"); addRule(lines, "gie", "", "$", "(ge|gi[german]|ji[french]|dZe[italian])"); addRule(lines, "gie", "", "", "(ge|gi[german]|dZe[italian]|je[greeklatin])"); addRule(lines, "gi", "", "[aou]", "(i[greeklatin]|dZ)"); addRule(lines, "ge", "[yaeiou]", "", "(gE|xe[spanish]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])"); addRule(lines, "gi", "[yaeiou]", "", "(gI|xi[spanish]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])"); addRule(lines, "ge", "", "", "(gE|xe[spanish]|hE[russian]|je[greeklatin]|Ze[portuguese+french]|dZe[english+romanian+italian+spanish])"); addRule(lines, "gi", "", "", "(gI|xi[spanish]|hI[russian]|i[greeklatin]|Zi[portuguese+french]|dZi[english+romanian+italian+spanish])"); addRule(lines, "gy", "", "[aeouáéóúüöőű]", "(gi|dj[hungarian])"); addRule(lines, "gy", "", "", "(gi|d[hungarian])"); addRule(lines, "g", "[yaeiou]", "[aouyei]", "g"); addRule(lines, "g", "", "[aouei]", "(g|h[russian])"); addRule(lines, "ij", "", "", "(i|ej[dutch]|ix[spanish]|iZ[french+romanian+turkish+portuguese])"); addRule(lines, "j", "", "[aoeiuy]", "(j|dZ[english]|x[spanish]|Z[french+romanian+turkish+portuguese])"); addRule(lines, "rz", "t", "", "(S[polish]|r)"); addRule(lines, "rz", "", "", "(rz|rts[german]|Z[polish]|r[polish]|rZ[polish])"); addRule(lines, "tz", "", "$", "(ts|tS[english+german])"); addRule(lines, "tz", "^", "", "(ts[english+german+russian]|tS[english+german])"); addRule(lines, "tz", "", "", "(ts[english+german+russian]|tz)"); addRule(lines, "zia", "", "[bcdgkpstwzż]", "(Za[polish]|za[polish]|zja)"); addRule(lines, "zia", "", "", "(Za[polish]|zja)"); addRule(lines, "zią", "", "[bp]", "(Zom[polish]|zom)"); addRule(lines, "zią", "", "", "(Zon[polish]|zon)"); addRule(lines, "zię", "", "[bp]", "(Zem[polish]|zem)"); addRule(lines, "zię", "", "", "(Zen[polish]|zen)"); addRule(lines, "zie", "", "[bcdgkpstwzż]", "(Ze[polish]|ze[polish]|ze|tsi[german])"); addRule(lines, "zie", "", "", "(ze|Ze[polish]|tsi[german])"); addRule(lines, "zio", "", "", "(Zo[polish]|zo)"); addRule(lines, "ziu", "", "", "(Zu[polish]|zju)"); addRule(lines, "zi", "", "", "(Zi[polish]|zi|tsi[german]|dzi[italian]|tsi[italian]|si[spanish])"); addRule(lines, "z", "", "$", "(s|ts[german]|ts[italian]|S[portuguese])"); addRule(lines, "z", "", "[bdgv]", "(z|dz[italian]|Z[portuguese])"); addRule(lines, "z", "", "[ptckf]", "(s|ts[italian]|S[portuguese])"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "oue", "", "", "(oue|ve[french])"); addRule(lines, "eau", "", "", "o"); addRule(lines, "ae", "", "", "(Y[german]|aje[russian]|ae)"); addRule(lines, "ai", "", "", "aj"); addRule(lines, "au", "", "", "(au|o[french])"); addRule(lines, "ay", "", "", "aj"); addRule(lines, "ão", "", "", "(au|an)"); addRule(lines, "ãe", "", "", "(aj|an)"); addRule(lines, "ãi", "", "", "(aj|an)"); addRule(lines, "ea", "", "", "(ea|ja[romanian])"); addRule(lines, "ee", "", "", "(i[english]|aje[russian]|e)"); addRule(lines, "ei", "", "", "(aj|ej)"); addRule(lines, "eu", "", "", "(eu|Yj[german]|ej[german]|oj[german]|Y[dutch])"); addRule(lines, "ey", "", "", "(aj|ej)"); addRule(lines, "ia", "", "", "ja"); addRule(lines, "ie", "", "", "(i[german]|e[polish]|ije[russian]|Q[dutch]|je)"); addRule(lines, "ii", "", "$", "i"); addRule(lines, "io", "", "", "(jo|e[russian])"); addRule(lines, "iu", "", "", "ju"); addRule(lines, "iy", "", "$", "i"); addRule(lines, "oe", "", "", "(Y[german]|oje[russian]|u[dutch]|oe)"); addRule(lines, "oi", "", "", "oj"); addRule(lines, "oo", "", "", "(u[english]|o)"); addRule(lines, "ou", "", "", "(ou|u[french+greeklatin]|au[dutch])"); addRule(lines, "où", "", "", "u"); addRule(lines, "oy", "", "", "oj"); addRule(lines, "õe", "", "", "(oj|on)"); addRule(lines, "ua", "", "", "va"); addRule(lines, "ue", "", "", "(Q[german]|uje[russian]|ve)"); addRule(lines, "ui", "", "", "(uj|vi|Y[dutch])"); addRule(lines, "uu", "", "", "(u|Q[dutch])"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "uy", "", "", "uj"); addRule(lines, "ya", "", "", "ja"); addRule(lines, "ye", "", "", "(je|ije[russian])"); addRule(lines, "yi", "^", "", "i"); addRule(lines, "yi", "", "$", "i"); addRule(lines, "yo", "", "", "(jo|e[russian])"); addRule(lines, "yu", "", "", "ju"); addRule(lines, "yy", "", "$", "i"); addRule(lines, "i", "[áóéê]", "", "j"); addRule(lines, "y", "[áóéê]", "", "j"); addRule(lines, "e", "^", "", "(e|je[russian])"); addRule(lines, "e", "", "$", "(e|EE[english+french])"); addRule(lines, "ą", "", "[bp]", "om"); addRule(lines, "ą", "", "", "on"); addRule(lines, "ä", "", "", "(Y|e)"); addRule(lines, "á", "", "", "a"); addRule(lines, "à", "", "", "a"); addRule(lines, "â", "", "", "a"); addRule(lines, "ã", "", "", "(a|an)"); addRule(lines, "ă", "", "", "(e[romanian]|a)"); addRule(lines, "č", "", "", "tS"); addRule(lines, "ć", "", "", "(tS[polish]|ts)"); addRule(lines, "ç", "", "", "(s|tS[turkish])"); addRule(lines, "ď", "", "", "(d|dj[czech])"); addRule(lines, "ę", "", "[bp]", "em"); addRule(lines, "ę", "", "", "en"); addRule(lines, "é", "", "", "e"); addRule(lines, "è", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "ě", "", "", "(e|je[czech])"); addRule(lines, "ğ", "", "", ""); addRule(lines, "í", "", "", "i"); addRule(lines, "î", "", "", "i"); addRule(lines, "ı", "", "", "(i|e[turkish]|[turkish])"); addRule(lines, "ł", "", "", "l"); addRule(lines, "ń", "", "", "(n|nj[polish])"); addRule(lines, "ñ", "", "", "(n|nj[spanish])"); addRule(lines, "ó", "", "", "(u[polish]|o)"); addRule(lines, "ô", "", "", "o"); addRule(lines, "õ", "", "", "(o|on[portuguese]|Y[hungarian])"); addRule(lines, "ò", "", "", "o"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ř", "", "", "(r|rZ[czech])"); addRule(lines, "ś", "", "", "(S[polish]|s)"); addRule(lines, "ş", "", "", "S"); addRule(lines, "š", "", "", "S"); addRule(lines, "ţ", "", "", "ts"); addRule(lines, "ť", "", "", "(t|tj[czech])"); addRule(lines, "ű", "", "", "Q"); addRule(lines, "ü", "", "", "(Q|u[portuguese+spanish])"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ů", "", "", "u"); addRule(lines, "ù", "", "", "u"); addRule(lines, "ý", "", "", "i"); addRule(lines, "ż", "", "", "Z"); addRule(lines, "ź", "", "", "(Z[polish]|z)"); addRule(lines, "ß", "", "", "s"); addRule(lines, "'", "", "", ""); addRule(lines, "\"", "", "", ""); addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "(O|P[polish])"); addRule(lines, "a", "", "", "A"); addRule(lines, "b", "", "", "B"); addRule(lines, "c", "", "", "(k|ts[polish+czech]|dZ[turkish])"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "(h|x[romanian]|H[french+portuguese+italian+spanish])"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "(j|x[spanish]|Z[french+romanian+turkish+portuguese])"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "O"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "(s|S[portuguese])"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "U"); addRule(lines, "v", "", "", "V"); addRule(lines, "w", "", "", "(v|w[english+dutch])"); addRule(lines, "x", "", "", "(ks|gz|S[portuguese+spanish])"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "(z|ts[german]|dz[italian]|ts[italian]|s[spanish])"); } private static void addGenRulesArabic(final Map> lines) { addRule(lines, "ا", "", "", "a"); addRule(lines, "ب", "", "$", "b"); addRule(lines, "ب", "", "", "b1"); addRule(lines, "ت", "", "$", "t"); addRule(lines, "ت", "", "", "t1"); addRule(lines, "ث", "", "$", "t"); addRule(lines, "ث", "", "", "t1"); addRule(lines, "ج", "", "$", "(dZ|Z)"); addRule(lines, "ج", "", "", "(dZ1|Z1)"); addRule(lines, "ح", "^", "", "1"); addRule(lines, "ح", "", "$", "1"); addRule(lines, "ح", "", "", "(h1|1)"); addRule(lines, "خ", "", "$", "x"); addRule(lines, "خ", "", "", "x1"); addRule(lines, "د", "", "$", "d"); addRule(lines, "د", "", "", "d1"); addRule(lines, "ذ", "", "$", "d"); addRule(lines, "ذ", "", "", "d1"); addRule(lines, "ر", "", "$", "r"); addRule(lines, "ر", "", "", "r1"); addRule(lines, "ز", "", "$", "z"); addRule(lines, "ز", "", "", "z1"); addRule(lines, "س", "", "$", "s"); addRule(lines, "س", "", "", "s1"); addRule(lines, "ش", "", "$", "S"); addRule(lines, "ش", "", "", "S1"); addRule(lines, "ص", "", "$", "s"); addRule(lines, "ص", "", "", "s1"); addRule(lines, "ض", "", "$", "d"); addRule(lines, "ض", "", "", "d1"); addRule(lines, "ط", "", "$", "t"); addRule(lines, "ط", "", "", "t1"); addRule(lines, "ظ", "", "$", "z"); addRule(lines, "ظ", "", "", "z1"); addRule(lines, "ع", "^", "", "1"); addRule(lines, "ع", "", "$", "1"); addRule(lines, "ع", "", "", "(h1|1)"); addRule(lines, "غ", "", "$", "g"); addRule(lines, "غ", "", "", "g1"); addRule(lines, "ف", "", "$", "f"); addRule(lines, "ف", "", "", "f1"); addRule(lines, "ق", "", "$", "k"); addRule(lines, "ق", "", "", "k1"); addRule(lines, "ك", "", "$", "k"); addRule(lines, "ك", "", "", "k1"); addRule(lines, "ل", "", "$", "l"); addRule(lines, "ل", "", "", "l1"); addRule(lines, "م", "", "$", "m"); addRule(lines, "م", "", "", "m1"); addRule(lines, "ن", "", "$", "n"); addRule(lines, "ن", "", "", "n1"); addRule(lines, "ه", "^", "", "1"); addRule(lines, "ه", "", "$", "1"); addRule(lines, "ه", "", "", "(h1|1)"); addRule(lines, "و", "", "$", "(u|v)"); addRule(lines, "و", "", "", "(u|v1)"); addRule(lines, "ي‎", "", "$", "(i|j)"); addRule(lines, "ي‎", "", "", "(i|j1)"); } private static void addGenRulesCyrillic(final Map> lines) { addRule(lines, "ця", "", "", "tsa"); addRule(lines, "цю", "", "", "tsu"); addRule(lines, "циа", "", "", "tsa"); addRule(lines, "цие", "", "", "tse"); addRule(lines, "цио", "", "", "tso"); addRule(lines, "циу", "", "", "tsu"); addRule(lines, "сие", "", "", "se"); addRule(lines, "сио", "", "", "so"); addRule(lines, "зие", "", "", "ze"); addRule(lines, "зио", "", "", "zo"); addRule(lines, "с", "", "с", ""); addRule(lines, "гауз", "", "$", "haus"); addRule(lines, "гаус", "", "$", "haus"); addRule(lines, "гольц", "", "$", "holts"); addRule(lines, "геймер", "", "$", "(hejmer|hajmer)"); addRule(lines, "гейм", "", "$", "(hejm|hajm)"); addRule(lines, "гоф", "", "$", "hof"); addRule(lines, "гер", "", "$", "ger"); addRule(lines, "ген", "", "$", "gen"); addRule(lines, "гин", "", "$", "gin"); addRule(lines, "г", "(й|ё|я|ю|ы|а|е|о|и|у)", "(а|е|о|и|у)", "g"); addRule(lines, "г", "", "(а|е|о|и|у)", "(g|h)"); addRule(lines, "ля", "", "", "la"); addRule(lines, "лю", "", "", "lu"); addRule(lines, "лё", "", "", "(le|lo)"); addRule(lines, "лио", "", "", "(le|lo)"); addRule(lines, "ле", "", "", "(lE|lo)"); addRule(lines, "ийе", "", "", "je"); addRule(lines, "ие", "", "", "je"); addRule(lines, "ыйе", "", "", "je"); addRule(lines, "ые", "", "", "je"); addRule(lines, "ий", "", "(а|о|у)", "j"); addRule(lines, "ый", "", "(а|о|у)", "j"); addRule(lines, "ий", "", "$", "i"); addRule(lines, "ый", "", "$", "i"); addRule(lines, "ей", "^", "", "(jej|ej)"); addRule(lines, "е", "(а|е|о|у)", "", "je"); addRule(lines, "е", "^", "", "je"); addRule(lines, "эй", "", "", "ej"); addRule(lines, "ей", "", "", "ej"); addRule(lines, "ауе", "", "", "aue"); addRule(lines, "ауэ", "", "", "aue"); addRule(lines, "а", "", "", "a"); addRule(lines, "б", "", "", "b"); addRule(lines, "в", "", "", "v"); addRule(lines, "г", "", "", "g"); addRule(lines, "д", "", "", "d"); addRule(lines, "е", "", "", "E"); addRule(lines, "ё", "", "", "(e|jo)"); addRule(lines, "ж", "", "", "Z"); addRule(lines, "з", "", "", "z"); addRule(lines, "и", "", "", "I"); addRule(lines, "й", "", "", "j"); addRule(lines, "к", "", "", "k"); addRule(lines, "л", "", "", "l"); addRule(lines, "м", "", "", "m"); addRule(lines, "н", "", "", "n"); addRule(lines, "о", "", "", "o"); addRule(lines, "п", "", "", "p"); addRule(lines, "р", "", "", "r"); addRule(lines, "с", "", "", "s"); addRule(lines, "т", "", "", "t"); addRule(lines, "у", "", "", "u"); addRule(lines, "ф", "", "", "f"); addRule(lines, "х", "", "", "x"); addRule(lines, "ц", "", "", "ts"); addRule(lines, "ч", "", "", "tS"); addRule(lines, "ш", "", "", "S"); addRule(lines, "щ", "", "", "StS"); addRule(lines, "ъ", "", "", ""); addRule(lines, "ы", "", "", "I"); addRule(lines, "ь", "", "", ""); addRule(lines, "э", "", "", "E"); addRule(lines, "ю", "", "", "ju"); addRule(lines, "я", "", "", "ja"); } private static void addGenRulesCzech(final Map> lines) { addRule(lines, "ch", "", "", "x"); addRule(lines, "qu", "", "", "(k|kv)"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "ei", "", "", "(ej|aj)"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "č", "", "", "tS"); addRule(lines, "š", "", "", "S"); addRule(lines, "ň", "", "", "n"); addRule(lines, "ť", "", "", "(t|tj)"); addRule(lines, "ď", "", "", "(d|dj)"); addRule(lines, "ř", "", "", "(r|rZ)"); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ý", "", "", "i"); addRule(lines, "ě", "", "", "(e|je)"); addRule(lines, "ů", "", "", "u"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "ts"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "(h|g)"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "(k|kv)"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesDutch(final Map> lines) { addRule(lines, "ssj", "", "", "S"); addRule(lines, "sj", "", "", "S"); addRule(lines, "ch", "", "", "x"); addRule(lines, "c", "", "[eiy]", "ts"); addRule(lines, "ck", "", "", "k"); addRule(lines, "pf", "", "", "(pf|p|f)"); addRule(lines, "ph", "", "", "(ph|f)"); addRule(lines, "qu", "", "", "kv"); addRule(lines, "th", "^", "", "t"); addRule(lines, "th", "", "[äöüaeiou]", "(t|th)"); addRule(lines, "th", "", "", "t"); addRule(lines, "ss", "", "", "s"); addRule(lines, "h", "[aeiouy]", "", ""); addRule(lines, "aue", "", "", "aue"); addRule(lines, "ou", "", "", "au"); addRule(lines, "ie", "", "", "(Q|i)"); addRule(lines, "uu", "", "", "(Q|u)"); addRule(lines, "ee", "", "", "e"); addRule(lines, "eu", "", "", "(Y|Yj)"); addRule(lines, "aa", "", "", "a"); addRule(lines, "oo", "", "", "o"); addRule(lines, "oe", "", "", "u"); addRule(lines, "ij", "", "", "ej"); addRule(lines, "ui", "", "", "(Y|uj)"); addRule(lines, "ei", "", "", "(ej|aj)"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aeou]", "j"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "y", "[aeou]", "", "j"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "(g|x)"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "(i|Q)"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "(u|Q)"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "(w|v)"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesEnglish(final Map> lines) { addRule(lines, "�", "", "", ""); addRule(lines, "'", "", "", ""); addRule(lines, "mc", "^", "", "mak"); addRule(lines, "tz", "", "", "ts"); addRule(lines, "tch", "", "", "tS"); addRule(lines, "ch", "", "", "(tS|x)"); addRule(lines, "ck", "", "", "k"); addRule(lines, "cc", "", "[iey]", "ks"); addRule(lines, "c", "", "c", ""); addRule(lines, "c", "", "[iey]", "s"); addRule(lines, "gh", "^", "", "g"); addRule(lines, "gh", "", "", "(g|f|w)"); addRule(lines, "gn", "", "", "(gn|n)"); addRule(lines, "g", "", "[iey]", "(g|dZ)"); addRule(lines, "th", "", "", "t"); addRule(lines, "kh", "", "", "x"); addRule(lines, "ph", "", "", "f"); addRule(lines, "sch", "", "", "(S|sk)"); addRule(lines, "sh", "", "", "S"); addRule(lines, "who", "^", "", "hu"); addRule(lines, "wh", "^", "", "w"); addRule(lines, "h", "", "$", ""); addRule(lines, "h", "", "[^aeiou]", ""); addRule(lines, "h", "^", "", "H"); addRule(lines, "kn", "^", "", "n"); addRule(lines, "mb", "", "$", "m"); addRule(lines, "ng", "", "$", "(N|ng)"); addRule(lines, "pn", "^", "", "(pn|n)"); addRule(lines, "ps", "^", "", "(ps|s)"); addRule(lines, "qu", "", "", "kw"); addRule(lines, "tia", "", "", "(So|Sa)"); addRule(lines, "tio", "", "", "So"); addRule(lines, "wr", "^", "", "r"); addRule(lines, "x", "^", "", "z"); addRule(lines, "y", "^", "", "j"); addRule(lines, "y", "^", "[aeiouy]", "j"); addRule(lines, "yi", "^", "", "i"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "oue", "", "", "(aue|oue)"); addRule(lines, "ai", "", "", "(aj|ej|e)"); addRule(lines, "ay", "", "", "(aj|ej)"); addRule(lines, "a", "", "[^aeiou]e", "ej"); addRule(lines, "ei", "", "", "(ej|aj|i)"); addRule(lines, "ey", "", "", "(ej|aj|i)"); addRule(lines, "ear", "", "", "ia"); addRule(lines, "ea", "", "", "(i|e)"); addRule(lines, "ee", "", "", "i"); addRule(lines, "e", "", "[^aeiou]e", "i"); addRule(lines, "e", "", "$", "(|E)"); addRule(lines, "ie", "", "", "i"); addRule(lines, "i", "", "[^aeiou]e", "aj"); addRule(lines, "oa", "", "", "ou"); addRule(lines, "oi", "", "", "oj"); addRule(lines, "oo", "", "", "u"); addRule(lines, "ou", "", "", "(u|ou)"); addRule(lines, "oy", "", "", "oj"); addRule(lines, "o", "", "[^aeiou]e", "ou"); addRule(lines, "u", "", "[^aeiou]e", "(ju|u)"); addRule(lines, "u", "", "r", "(e|u)"); addRule(lines, "a", "", "", "(e|o|a)"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "dZ"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "(o|a)"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "(u|a)"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "(w|v)"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesFrench(final Map> lines) { addRule(lines, "lt", "u", "$", "(lt|)"); addRule(lines, "c", "n", "$", "(k|)"); addRule(lines, "d", "", "$", "(t|)"); addRule(lines, "g", "n", "$", "(k|)"); addRule(lines, "p", "", "$", "(p|)"); addRule(lines, "r", "e", "$", "(r|)"); addRule(lines, "t", "", "$", "(t|)"); addRule(lines, "z", "", "$", "(s|)"); addRule(lines, "ds", "", "$", "(ds|)"); addRule(lines, "ps", "", "$", "(ps|)"); addRule(lines, "rs", "e", "$", "(rs|)"); addRule(lines, "ts", "", "$", "(ts|)"); addRule(lines, "s", "", "$", "(s|)"); addRule(lines, "x", "u", "$", "(ks|)"); addRule(lines, "s", "[aeéèêiou]", "[^aeéèêiou]", "(s|)"); addRule(lines, "t", "[aeéèêiou]", "[^aeéèêiou]", "(t|)"); addRule(lines, "kh", "", "", "x"); addRule(lines, "ph", "", "", "f"); addRule(lines, "ç", "", "", "s"); addRule(lines, "x", "", "", "ks"); addRule(lines, "ch", "", "", "S"); addRule(lines, "c", "", "[eiyéèê]", "s"); addRule(lines, "gn", "", "", "(n|gn)"); addRule(lines, "g", "", "[eiy]", "Z"); addRule(lines, "gue", "", "$", "k"); addRule(lines, "gu", "", "[eiy]", "g"); addRule(lines, "aill", "", "e", "aj"); addRule(lines, "ll", "", "e", "(l|j)"); addRule(lines, "que", "", "$", "k"); addRule(lines, "qu", "", "", "k"); addRule(lines, "s", "[aeiouyéèê]", "[aeiouyéèê]", "z"); addRule(lines, "h", "[bdgt]", "", ""); addRule(lines, "m", "[aeiouy]", "[aeiouy]", "m"); addRule(lines, "m", "[aeiouy]", "", "(m|n)"); addRule(lines, "ou", "", "[aeio]", "v"); addRule(lines, "u", "", "[aeio]", "v"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "eau", "", "", "o"); addRule(lines, "au", "", "", "(o|au)"); addRule(lines, "ai", "", "", "(e|aj)"); addRule(lines, "ay", "", "", "(e|aj)"); addRule(lines, "é", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "è", "", "", "e"); addRule(lines, "à", "", "", "a"); addRule(lines, "â", "", "", "a"); addRule(lines, "où", "", "", "u"); addRule(lines, "ou", "", "", "u"); addRule(lines, "oi", "", "", "(oj|va)"); addRule(lines, "ei", "", "", "(aj|ej|e)"); addRule(lines, "ey", "", "", "(aj|ej|e)"); addRule(lines, "eu", "", "", "(ej|Y)"); addRule(lines, "y", "[ou]", "", "j"); addRule(lines, "e", "", "$", "(e|)"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aoeu]", "j"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "Z"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "(u|Q)"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesGerman(final Map> lines) { addRule(lines, "ewitsch", "", "$", "evitS"); addRule(lines, "owitsch", "", "$", "ovitS"); addRule(lines, "evitsch", "", "$", "evitS"); addRule(lines, "ovitsch", "", "$", "ovitS"); addRule(lines, "witsch", "", "$", "vitS"); addRule(lines, "vitsch", "", "$", "vitS"); addRule(lines, "ssch", "", "", "S"); addRule(lines, "chsch", "", "", "xS"); addRule(lines, "sch", "", "", "S"); addRule(lines, "ziu", "", "", "tsu"); addRule(lines, "zia", "", "", "tsa"); addRule(lines, "zio", "", "", "tso"); addRule(lines, "chs", "", "", "ks"); addRule(lines, "ch", "", "", "x"); addRule(lines, "ck", "", "", "k"); addRule(lines, "c", "", "[eiy]", "ts"); addRule(lines, "sp", "^", "", "Sp"); addRule(lines, "st", "^", "", "St"); addRule(lines, "ssp", "", "", "(Sp|sp)"); addRule(lines, "sp", "", "", "(Sp|sp)"); addRule(lines, "sst", "", "", "(St|st)"); addRule(lines, "st", "", "", "(St|st)"); addRule(lines, "pf", "", "", "(pf|p|f)"); addRule(lines, "ph", "", "", "(ph|f)"); addRule(lines, "qu", "", "", "kv"); addRule(lines, "ewitz", "", "$", "(evits|evitS)"); addRule(lines, "ewiz", "", "$", "(evits|evitS)"); addRule(lines, "evitz", "", "$", "(evits|evitS)"); addRule(lines, "eviz", "", "$", "(evits|evitS)"); addRule(lines, "owitz", "", "$", "(ovits|ovitS)"); addRule(lines, "owiz", "", "$", "(ovits|ovitS)"); addRule(lines, "ovitz", "", "$", "(ovits|ovitS)"); addRule(lines, "oviz", "", "$", "(ovits|ovitS)"); addRule(lines, "witz", "", "$", "(vits|vitS)"); addRule(lines, "wiz", "", "$", "(vits|vitS)"); addRule(lines, "vitz", "", "$", "(vits|vitS)"); addRule(lines, "viz", "", "$", "(vits|vitS)"); addRule(lines, "tz", "", "", "ts"); addRule(lines, "thal", "", "$", "tal"); addRule(lines, "th", "^", "", "t"); addRule(lines, "th", "", "[äöüaeiou]", "(t|th)"); addRule(lines, "th", "", "", "t"); addRule(lines, "rh", "^", "", "r"); addRule(lines, "h", "[aeiouyäöü]", "", ""); addRule(lines, "h", "^", "", "H"); addRule(lines, "ss", "", "", "s"); addRule(lines, "s", "", "[äöüaeiouy]", "(z|s)"); addRule(lines, "s", "[aeiouyäöüj]", "[aeiouyäöü]", "z"); addRule(lines, "ß", "", "", "s"); addRule(lines, "ij", "", "$", "i"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "ue", "", "", "Q"); addRule(lines, "ae", "", "", "Y"); addRule(lines, "oe", "", "", "Y"); addRule(lines, "ü", "", "", "Q"); addRule(lines, "ä", "", "", "(Y|e)"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ei", "", "", "(aj|ej)"); addRule(lines, "ey", "", "", "(aj|ej)"); addRule(lines, "eu", "", "", "(Yj|ej|aj|oj)"); addRule(lines, "i", "[aou]", "", "j"); addRule(lines, "y", "[aou]", "", "j"); addRule(lines, "ie", "", "", "I"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aoeu]", "j"); addRule(lines, "ñ", "", "", "n"); addRule(lines, "ã", "", "", "a"); addRule(lines, "ő", "", "", "o"); addRule(lines, "ű", "", "", "u"); addRule(lines, "ç", "", "", "s"); addRule(lines, "a", "", "", "A"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "O"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "U"); addRule(lines, "v", "", "", "(f|v)"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "ts"); } private static void addGenRulesGreek(final Map> lines) { addRule(lines, "αυ", "", "$", "af"); addRule(lines, "αυ", "", "(κ|π|σ|τ|φ|θ|χ|ψ)", "af"); addRule(lines, "αυ", "", "", "av"); addRule(lines, "ευ", "", "$", "ef"); addRule(lines, "ευ", "", "(κ|π|σ|τ|φ|θ|χ|ψ)", "ef"); addRule(lines, "ευ", "", "", "ev"); addRule(lines, "ηυ", "", "$", "if"); addRule(lines, "ηυ", "", "(κ|π|σ|τ|φ|θ|χ|ψ)", "if"); addRule(lines, "ηυ", "", "", "iv"); addRule(lines, "ου", "", "", "u"); addRule(lines, "αι", "", "", "aj"); addRule(lines, "ει", "", "", "ej"); addRule(lines, "οι", "", "", "oj"); addRule(lines, "ωι", "", "", "oj"); addRule(lines, "ηι", "", "", "ej"); addRule(lines, "υι", "", "", "i"); addRule(lines, "γγ", "(ε|ι|η|α|ο|ω|υ)", "(ε|ι|η)", "(nj|j)"); addRule(lines, "γγ", "", "(ε|ι|η)", "j"); addRule(lines, "γγ", "(ε|ι|η|α|ο|ω|υ)", "", "(ng|g)"); addRule(lines, "γγ", "", "", "g"); addRule(lines, "γκ", "^", "", "g"); addRule(lines, "γκ", "(ε|ι|η|α|ο|ω|υ)", "(ε|ι|η)", "(nj|j)"); addRule(lines, "γκ", "", "(ε|ι|η)", "j"); addRule(lines, "γκ", "(ε|ι|η|α|ο|ω|υ)", "", "(ng|g)"); addRule(lines, "γκ", "", "", "g"); addRule(lines, "γι", "", "(α|ο|ω|υ)", "j"); addRule(lines, "γι", "", "", "(gi|i)"); addRule(lines, "γε", "", "(α|ο|ω|υ)", "j"); addRule(lines, "γε", "", "", "(ge|je)"); addRule(lines, "κζ", "", "", "gz"); addRule(lines, "τζ", "", "", "dz"); addRule(lines, "σ", "", "(β|γ|δ|μ|ν|ρ)", "z"); addRule(lines, "μβ", "", "", "(mb|b)"); addRule(lines, "μπ", "^", "", "b"); addRule(lines, "μπ", "(ε|ι|η|α|ο|ω|υ)", "", "mb"); addRule(lines, "μπ", "", "", "b"); addRule(lines, "ντ", "^", "", "d"); addRule(lines, "ντ", "(ε|ι|η|α|ο|ω|υ)", "", "(nd|nt)"); addRule(lines, "ντ", "", "", "(nt|d)"); addRule(lines, "ά", "", "", "a"); addRule(lines, "έ", "", "", "e"); addRule(lines, "ή", "", "", "(i|e)"); addRule(lines, "ί", "", "", "i"); addRule(lines, "ό", "", "", "o"); addRule(lines, "ύ", "", "", "(Q|i|u)"); addRule(lines, "ώ", "", "", "o"); addRule(lines, "ΰ", "", "", "(Q|i|u)"); addRule(lines, "ϋ", "", "", "(Q|i|u)"); addRule(lines, "ϊ", "", "", "j"); addRule(lines, "α", "", "", "a"); addRule(lines, "β", "", "", "(v|b)"); addRule(lines, "γ", "", "", "g"); addRule(lines, "δ", "", "", "d"); addRule(lines, "ε", "", "", "e"); addRule(lines, "ζ", "", "", "z"); addRule(lines, "η", "", "", "(i|e)"); addRule(lines, "ι", "", "", "i"); addRule(lines, "κ", "", "", "k"); addRule(lines, "λ", "", "", "l"); addRule(lines, "μ", "", "", "m"); addRule(lines, "ν", "", "", "n"); addRule(lines, "ξ", "", "", "ks"); addRule(lines, "ο", "", "", "o"); addRule(lines, "π", "", "", "p"); addRule(lines, "ρ", "", "", "r"); addRule(lines, "σ", "", "", "s"); addRule(lines, "ς", "", "", "s"); addRule(lines, "τ", "", "", "t"); addRule(lines, "υ", "", "", "(Q|i|u)"); addRule(lines, "φ", "", "", "f"); addRule(lines, "θ", "", "", "t"); addRule(lines, "χ", "", "", "x"); addRule(lines, "ψ", "", "", "ps"); addRule(lines, "ω", "", "", "o"); } private static void addGenRulesGreeklatin(final Map> lines) { addRule(lines, "au", "", "$", "af"); addRule(lines, "au", "", "[kpstfh]", "af"); addRule(lines, "au", "", "", "av"); addRule(lines, "eu", "", "$", "ef"); addRule(lines, "eu", "", "[kpstfh]", "ef"); addRule(lines, "eu", "", "", "ev"); addRule(lines, "ou", "", "", "u"); addRule(lines, "gge", "[aeiouy]", "", "(nje|je)"); addRule(lines, "ggi", "[aeiouy]", "[aou]", "(nj|j)"); addRule(lines, "ggi", "[aeiouy]", "", "(ni|i)"); addRule(lines, "gge", "", "", "je"); addRule(lines, "ggi", "", "", "i"); addRule(lines, "gg", "[aeiouy]", "", "(ng|g)"); addRule(lines, "gg", "", "", "g"); addRule(lines, "gk", "^", "", "g"); addRule(lines, "gke", "[aeiouy]", "", "(nje|je)"); addRule(lines, "gki", "[aeiouy]", "", "(ni|i)"); addRule(lines, "gke", "", "", "je"); addRule(lines, "gki", "", "", "i"); addRule(lines, "gk", "[aeiouy]", "", "(ng|g)"); addRule(lines, "gk", "", "", "g"); addRule(lines, "nghi", "", "[aouy]", "Nj"); addRule(lines, "nghi", "", "", "(Ngi|Ni)"); addRule(lines, "nghe", "", "[aouy]", "Nj"); addRule(lines, "nghe", "", "", "(Nje|Nge)"); addRule(lines, "ghi", "", "[aouy]", "j"); addRule(lines, "ghi", "", "", "(gi|i)"); addRule(lines, "ghe", "", "[aouy]", "j"); addRule(lines, "ghe", "", "", "(je|ge)"); addRule(lines, "ngh", "", "", "Ng"); addRule(lines, "gh", "", "", "g"); addRule(lines, "ngi", "", "[aouy]", "Nj"); addRule(lines, "ngi", "", "", "(Ngi|Ni)"); addRule(lines, "nge", "", "[aouy]", "Nj"); addRule(lines, "nge", "", "", "(Nje|Nge)"); addRule(lines, "gi", "", "[aouy]", "j"); addRule(lines, "gi", "", "", "(gi|i)"); addRule(lines, "ge", "", "[aouy]", "j"); addRule(lines, "ge", "", "", "(je|ge)"); addRule(lines, "ng", "", "", "Ng"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "i", "[aeou]", "", "j"); addRule(lines, "y", "", "[aeou]", "j"); addRule(lines, "y", "[aeou]", "", "j"); addRule(lines, "yi", "", "[aeou]", "j"); addRule(lines, "yi", "", "", "i"); addRule(lines, "ch", "", "", "x"); addRule(lines, "kh", "", "", "x"); addRule(lines, "dh", "", "", "d"); addRule(lines, "dj", "", "", "dZ"); addRule(lines, "ph", "", "", "f"); addRule(lines, "th", "", "", "t"); addRule(lines, "kz", "", "", "gz"); addRule(lines, "tz", "", "", "dz"); addRule(lines, "s", "", "[bgdmnr]", "z"); addRule(lines, "mb", "", "", "(mb|b)"); addRule(lines, "mp", "^", "", "b"); addRule(lines, "mp", "[aeiouy]", "", "mp"); addRule(lines, "mp", "", "", "b"); addRule(lines, "nt", "^", "", "d"); addRule(lines, "nt", "[aeiouy]", "", "(nd|nt)"); addRule(lines, "nt", "", "", "(nt|d)"); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "óu", "", "", "u"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ý", "", "", "(i|Q|u)"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "(b|v)"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "x"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "(j|Z)"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "ο", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "(i|Q|u)"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesHebrew(final Map> lines) { addRule(lines, "אי", "", "", "i"); addRule(lines, "עי", "", "", "i"); addRule(lines, "עו", "", "", "VV"); addRule(lines, "או", "", "", "VV"); addRule(lines, "ג׳", "", "", "Z"); addRule(lines, "ד׳", "", "", "dZ"); addRule(lines, "א", "", "", "L"); addRule(lines, "ב", "", "", "b"); addRule(lines, "ג", "", "", "g"); addRule(lines, "ד", "", "", "d"); addRule(lines, "ה", "^", "", "1"); addRule(lines, "ה", "", "$", "1"); addRule(lines, "ה", "", "", ""); addRule(lines, "וו", "", "", "V"); addRule(lines, "וי", "", "", "WW"); addRule(lines, "ו", "", "", "W"); addRule(lines, "ז", "", "", "z"); addRule(lines, "ח", "", "", "X"); addRule(lines, "ט", "", "", "T"); addRule(lines, "יי", "", "", "i"); addRule(lines, "י", "", "", "i"); addRule(lines, "ך", "", "", "X"); addRule(lines, "כ", "^", "", "K"); addRule(lines, "כ", "", "", "k"); addRule(lines, "ל", "", "", "l"); addRule(lines, "ם", "", "", "m"); addRule(lines, "מ", "", "", "m"); addRule(lines, "ן", "", "", "n"); addRule(lines, "נ", "", "", "n"); addRule(lines, "ס", "", "", "s"); addRule(lines, "ע", "", "", "L"); addRule(lines, "ף", "", "", "f"); addRule(lines, "פ", "", "", "f"); addRule(lines, "ץ", "", "", "C"); addRule(lines, "צ", "", "", "C"); addRule(lines, "ק", "", "", "K"); addRule(lines, "ר", "", "", "r"); addRule(lines, "ש", "", "", "s"); addRule(lines, "ת", "", "", "TB"); } private static void addGenRulesHungarian(final Map> lines) { addRule(lines, "sz", "", "", "s"); addRule(lines, "zs", "", "", "Z"); addRule(lines, "cs", "", "", "tS"); addRule(lines, "ay", "", "", "(oj|aj)"); addRule(lines, "ai", "", "", "(oj|aj)"); addRule(lines, "aj", "", "", "(oj|aj)"); addRule(lines, "ei", "", "", "(aj|ej)"); addRule(lines, "ey", "", "", "(aj|ej)"); addRule(lines, "y", "[áo]", "", "j"); addRule(lines, "i", "[áo]", "", "j"); addRule(lines, "ee", "", "", "(ej|e)"); addRule(lines, "ely", "", "", "(ej|eli)"); addRule(lines, "ly", "", "", "(j|li)"); addRule(lines, "gy", "", "[aeouáéóúüöőű]", "dj"); addRule(lines, "gy", "", "", "(d|gi)"); addRule(lines, "ny", "", "[aeouáéóúüöőű]", "nj"); addRule(lines, "ny", "", "", "(n|ni)"); addRule(lines, "ty", "", "[aeouáéóúüöőű]", "tj"); addRule(lines, "ty", "", "", "(t|ti)"); addRule(lines, "qu", "", "", "(ku|kv)"); addRule(lines, "h", "", "$", ""); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ő", "", "", "Y"); addRule(lines, "ü", "", "", "Q"); addRule(lines, "ű", "", "", "Q"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "ts"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "(S|s)"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesItalian(final Map> lines) { addRule(lines, "kh", "", "", "x"); addRule(lines, "gli", "", "", "(l|gli)"); addRule(lines, "gn", "", "[aeou]", "(n|nj|gn)"); addRule(lines, "gni", "", "", "(ni|gni)"); addRule(lines, "gi", "", "[aeou]", "dZ"); addRule(lines, "gg", "", "[ei]", "dZ"); addRule(lines, "g", "", "[ei]", "dZ"); addRule(lines, "h", "[bdgt]", "", "g"); addRule(lines, "h", "", "$", ""); addRule(lines, "ci", "", "[aeou]", "tS"); addRule(lines, "ch", "", "[ei]", "k"); addRule(lines, "sc", "", "[ei]", "S"); addRule(lines, "cc", "", "[ei]", "tS"); addRule(lines, "c", "", "[ei]", "tS"); addRule(lines, "s", "[aeiou]", "[aeiou]", "z"); addRule(lines, "i", "[aeou]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "y", "[aeou]", "", "j"); addRule(lines, "y", "", "[aeou]", "j"); addRule(lines, "qu", "", "", "k"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "�", "", "", "e"); addRule(lines, "�", "", "", "e"); addRule(lines, "�", "", "", "o"); addRule(lines, "�", "", "", "o"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "(Z|dZ|j)"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "(ts|dz)"); } private static void addGenRulesPolish(final Map> lines) { addRule(lines, "ska", "", "$", "ski"); addRule(lines, "cka", "", "$", "tski"); addRule(lines, "lowa", "", "$", "(lova|lof|l|el)"); addRule(lines, "kowa", "", "$", "(kova|kof|k|ek)"); addRule(lines, "owa", "", "$", "(ova|of|)"); addRule(lines, "lowna", "", "$", "(lovna|levna|l|el)"); addRule(lines, "kowna", "", "$", "(kovna|k|ek)"); addRule(lines, "owna", "", "$", "(ovna|)"); addRule(lines, "lówna", "", "$", "(l|el)"); addRule(lines, "kówna", "", "$", "(k|ek)"); addRule(lines, "ówna", "", "$", ""); addRule(lines, "a", "", "$", "(a|i)"); addRule(lines, "czy", "", "", "tSi"); addRule(lines, "cze", "", "[bcdgkpstwzż]", "(tSe|tSF)"); addRule(lines, "ciewicz", "", "", "(tsevitS|tSevitS)"); addRule(lines, "siewicz", "", "", "(sevitS|SevitS)"); addRule(lines, "ziewicz", "", "", "(zevitS|ZevitS)"); addRule(lines, "riewicz", "", "", "rjevitS"); addRule(lines, "diewicz", "", "", "djevitS"); addRule(lines, "tiewicz", "", "", "tjevitS"); addRule(lines, "iewicz", "", "", "evitS"); addRule(lines, "ewicz", "", "", "evitS"); addRule(lines, "owicz", "", "", "ovitS"); addRule(lines, "icz", "", "", "itS"); addRule(lines, "cz", "", "", "tS"); addRule(lines, "ch", "", "", "x"); addRule(lines, "cia", "", "[bcdgkpstwzż]", "(tSB|tsB)"); addRule(lines, "cia", "", "", "(tSa|tsa)"); addRule(lines, "cią", "", "[bp]", "(tSom|tsom)"); addRule(lines, "cią", "", "", "(tSon|tson)"); addRule(lines, "cię", "", "[bp]", "(tSem|tsem)"); addRule(lines, "cię", "", "", "(tSen|tsen)"); addRule(lines, "cie", "", "[bcdgkpstwzż]", "(tSF|tsF)"); addRule(lines, "cie", "", "", "(tSe|tse)"); addRule(lines, "cio", "", "", "(tSo|tso)"); addRule(lines, "ciu", "", "", "(tSu|tsu)"); addRule(lines, "ci", "", "", "(tSi|tsI)"); addRule(lines, "ć", "", "", "(tS|ts)"); addRule(lines, "ssz", "", "", "S"); addRule(lines, "sz", "", "", "S"); addRule(lines, "sia", "", "[bcdgkpstwzż]", "(SB|sB|sja)"); addRule(lines, "sia", "", "", "(Sa|sja)"); addRule(lines, "sią", "", "[bp]", "(Som|som)"); addRule(lines, "sią", "", "", "(Son|son)"); addRule(lines, "się", "", "[bp]", "(Sem|sem)"); addRule(lines, "się", "", "", "(Sen|sen)"); addRule(lines, "sie", "", "[bcdgkpstwzż]", "(SF|sF|se)"); addRule(lines, "sie", "", "", "(Se|se)"); addRule(lines, "sio", "", "", "(So|so)"); addRule(lines, "siu", "", "", "(Su|sju)"); addRule(lines, "si", "", "", "(Si|sI)"); addRule(lines, "ś", "", "", "(S|s)"); addRule(lines, "zia", "", "[bcdgkpstwzż]", "(ZB|zB|zja)"); addRule(lines, "zia", "", "", "(Za|zja)"); addRule(lines, "zią", "", "[bp]", "(Zom|zom)"); addRule(lines, "zią", "", "", "(Zon|zon)"); addRule(lines, "zię", "", "[bp]", "(Zem|zem)"); addRule(lines, "zię", "", "", "(Zen|zen)"); addRule(lines, "zie", "", "[bcdgkpstwzż]", "(ZF|zF)"); addRule(lines, "zie", "", "", "(Ze|ze)"); addRule(lines, "zio", "", "", "(Zo|zo)"); addRule(lines, "ziu", "", "", "(Zu|zju)"); addRule(lines, "zi", "", "", "(Zi|zI)"); addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF)"); addRule(lines, "że", "", "[bcdgkpstwzż]", "(Ze|ZF|ze|zF)"); addRule(lines, "że", "", "", "Ze"); addRule(lines, "źe", "", "", "(Ze|ze)"); addRule(lines, "ży", "", "", "Zi"); addRule(lines, "źi", "", "", "(Zi|zi)"); addRule(lines, "ż", "", "", "Z"); addRule(lines, "ź", "", "", "(Z|z)"); addRule(lines, "rze", "t", "", "(Se|re)"); addRule(lines, "rze", "", "", "(Ze|re|rZe)"); addRule(lines, "rzy", "t", "", "(Si|ri)"); addRule(lines, "rzy", "", "", "(Zi|ri|rZi)"); addRule(lines, "rz", "t", "", "(S|r)"); addRule(lines, "rz", "", "", "(Z|r|rZ)"); addRule(lines, "lio", "", "", "(lo|le)"); addRule(lines, "ł", "", "", "l"); addRule(lines, "ń", "", "", "n"); addRule(lines, "qu", "", "", "k"); addRule(lines, "s", "", "s", ""); addRule(lines, "ó", "", "", "(u|o)"); addRule(lines, "ą", "", "[bp]", "om"); addRule(lines, "ę", "", "[bp]", "em"); addRule(lines, "ą", "", "", "on"); addRule(lines, "ę", "", "", "en"); addRule(lines, "ije", "", "", "je"); addRule(lines, "yje", "", "", "je"); addRule(lines, "iie", "", "", "je"); addRule(lines, "yie", "", "", "je"); addRule(lines, "iye", "", "", "je"); addRule(lines, "yye", "", "", "je"); addRule(lines, "ij", "", "[aou]", "j"); addRule(lines, "yj", "", "[aou]", "j"); addRule(lines, "ii", "", "[aou]", "j"); addRule(lines, "yi", "", "[aou]", "j"); addRule(lines, "iy", "", "[aou]", "j"); addRule(lines, "yy", "", "[aou]", "j"); addRule(lines, "rie", "", "", "rje"); addRule(lines, "die", "", "", "dje"); addRule(lines, "tie", "", "", "tje"); addRule(lines, "ie", "", "[bcdgkpstwzż]", "F"); addRule(lines, "ie", "", "", "e"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "au", "", "", "au"); addRule(lines, "ei", "", "", "aj"); addRule(lines, "ey", "", "", "aj"); addRule(lines, "ej", "", "", "aj"); addRule(lines, "ai", "", "", "aj"); addRule(lines, "ay", "", "", "aj"); addRule(lines, "aj", "", "", "aj"); addRule(lines, "i", "[aeou]", "", "j"); addRule(lines, "y", "[aeou]", "", "j"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aeou]", "j"); addRule(lines, "a", "", "[bcdgkpstwzż]", "B"); addRule(lines, "e", "", "[bcdgkpstwzż]", "(E|F)"); addRule(lines, "o", "", "[bcćdgklłmnńrsśtwzźż]", "P"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "ts"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "(h|x)"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "I"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesPortuguese(final Map> lines) { addRule(lines, "kh", "", "", "x"); addRule(lines, "ch", "", "", "S"); addRule(lines, "ss", "", "", "s"); addRule(lines, "sc", "", "[ei]", "s"); addRule(lines, "sç", "", "[aou]", "s"); addRule(lines, "ç", "", "", "s"); addRule(lines, "c", "", "[ei]", "s"); addRule(lines, "s", "^", "", "s"); addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuiíoóeéêy]", "z"); addRule(lines, "s", "", "[dglmnrv]", "(Z|S)"); addRule(lines, "z", "", "$", "(Z|s|S)"); addRule(lines, "z", "", "[bdgv]", "(Z|z)"); addRule(lines, "z", "", "[ptckf]", "(s|S|z)"); addRule(lines, "gu", "", "[eiu]", "g"); addRule(lines, "gu", "", "[ao]", "gv"); addRule(lines, "g", "", "[ei]", "Z"); addRule(lines, "qu", "", "[eiu]", "k"); addRule(lines, "qu", "", "[ao]", "kv"); addRule(lines, "uo", "", "", "(vo|o|u)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "lh", "", "", "l"); addRule(lines, "nh", "", "", "nj"); addRule(lines, "h", "[bdgt]", "", ""); addRule(lines, "h", "", "$", ""); addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez|eS|eks)"); addRule(lines, "ex", "", "[cs]", "e"); addRule(lines, "y", "[aáuiíoóeéê]", "", "j"); addRule(lines, "y", "", "[aeiíou]", "j"); addRule(lines, "m", "", "[bcdfglnprstv]", "(m|n)"); addRule(lines, "m", "", "$", "(m|n)"); addRule(lines, "ão", "", "", "(au|an|on)"); addRule(lines, "ãe", "", "", "(aj|an)"); addRule(lines, "ãi", "", "", "(aj|an)"); addRule(lines, "õe", "", "", "(oj|on)"); addRule(lines, "i", "[aáuoóeéê]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "â", "", "", "a"); addRule(lines, "à", "", "", "a"); addRule(lines, "á", "", "", "a"); addRule(lines, "ã", "", "", "(a|an|on)"); addRule(lines, "é", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ô", "", "", "o"); addRule(lines, "ó", "", "", "o"); addRule(lines, "õ", "", "", "(o|on)"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ü", "", "", "u"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "(e|i)"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "Z"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "(o|u)"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "S"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "(S|ks)"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesRomanian(final Map> lines) { addRule(lines, "ce", "", "", "tSe"); addRule(lines, "ci", "", "", "(tSi|tS)"); addRule(lines, "ch", "", "[ei]", "k"); addRule(lines, "ch", "", "", "x"); addRule(lines, "gi", "", "", "(dZi|dZ)"); addRule(lines, "g", "", "[ei]", "dZ"); addRule(lines, "gh", "", "", "g"); addRule(lines, "i", "[aeou]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "ţ", "", "", "ts"); addRule(lines, "ş", "", "", "S"); addRule(lines, "qu", "", "", "k"); addRule(lines, "î", "", "", "i"); addRule(lines, "ea", "", "", "ja"); addRule(lines, "ă", "", "", "(e|a)"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "(x|h)"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "Z"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesRussian(final Map> lines) { addRule(lines, "yna", "", "$", "(in|ina)"); addRule(lines, "ina", "", "$", "(in|ina)"); addRule(lines, "liova", "", "$", "(lof|lef)"); addRule(lines, "lova", "", "$", "(lof|lef|lova)"); addRule(lines, "ova", "", "$", "(of|ova)"); addRule(lines, "eva", "", "$", "(ef|ova)"); addRule(lines, "aia", "", "$", "(aja|i)"); addRule(lines, "aja", "", "$", "(aja|i)"); addRule(lines, "aya", "", "$", "(aja|i)"); addRule(lines, "tsya", "", "", "tsa"); addRule(lines, "tsyu", "", "", "tsu"); addRule(lines, "tsia", "", "", "tsa"); addRule(lines, "tsie", "", "", "tse"); addRule(lines, "tsio", "", "", "tso"); addRule(lines, "tsye", "", "", "tse"); addRule(lines, "tsyo", "", "", "tso"); addRule(lines, "tsiu", "", "", "tsu"); addRule(lines, "sie", "", "", "se"); addRule(lines, "sio", "", "", "so"); addRule(lines, "zie", "", "", "ze"); addRule(lines, "zio", "", "", "zo"); addRule(lines, "sye", "", "", "se"); addRule(lines, "syo", "", "", "so"); addRule(lines, "zye", "", "", "ze"); addRule(lines, "zyo", "", "", "zo"); addRule(lines, "ger", "", "$", "ger"); addRule(lines, "gen", "", "$", "gen"); addRule(lines, "gin", "", "$", "gin"); addRule(lines, "gg", "", "", "g"); addRule(lines, "g", "[jaeoiuy]", "[aeoiu]", "g"); addRule(lines, "g", "", "[aeoiu]", "(g|h)"); addRule(lines, "kh", "", "", "x"); addRule(lines, "ch", "", "", "(tS|x)"); addRule(lines, "sch", "", "", "(StS|S)"); addRule(lines, "ssh", "", "", "S"); addRule(lines, "sh", "", "", "S"); addRule(lines, "zh", "", "", "Z"); addRule(lines, "tz", "", "$", "ts"); addRule(lines, "tz", "", "", "(ts|tz)"); addRule(lines, "c", "", "[iey]", "s"); addRule(lines, "qu", "", "", "(kv|k)"); addRule(lines, "s", "", "s", ""); addRule(lines, "lya", "", "", "la"); addRule(lines, "lyu", "", "", "lu"); addRule(lines, "lia", "", "", "la"); addRule(lines, "liu", "", "", "lu"); addRule(lines, "lja", "", "", "la"); addRule(lines, "lju", "", "", "lu"); addRule(lines, "le", "", "", "(lo|lE)"); addRule(lines, "lyo", "", "", "(lo|le)"); addRule(lines, "lio", "", "", "(lo|le)"); addRule(lines, "ije", "", "", "je"); addRule(lines, "ie", "", "", "je"); addRule(lines, "iye", "", "", "je"); addRule(lines, "iie", "", "", "je"); addRule(lines, "yje", "", "", "je"); addRule(lines, "ye", "", "", "je"); addRule(lines, "yye", "", "", "je"); addRule(lines, "yie", "", "", "je"); addRule(lines, "ij", "", "[aou]", "j"); addRule(lines, "iy", "", "[aou]", "j"); addRule(lines, "ii", "", "[aou]", "j"); addRule(lines, "yj", "", "[aou]", "j"); addRule(lines, "yy", "", "[aou]", "j"); addRule(lines, "yi", "", "[aou]", "j"); addRule(lines, "io", "", "", "(jo|e)"); addRule(lines, "i", "", "[au]", "j"); addRule(lines, "i", "[aeou]", "", "j"); addRule(lines, "yo", "", "", "(jo|e)"); addRule(lines, "y", "", "[au]", "j"); addRule(lines, "y", "[aeiou]", "", "j"); addRule(lines, "ii", "", "$", "i"); addRule(lines, "iy", "", "$", "i"); addRule(lines, "yy", "", "$", "i"); addRule(lines, "yi", "", "$", "i"); addRule(lines, "yj", "", "$", "i"); addRule(lines, "ij", "", "$", "i"); addRule(lines, "e", "^", "", "(je|E)"); addRule(lines, "ee", "", "", "(aje|i)"); addRule(lines, "e", "[aou]", "", "je"); addRule(lines, "oo", "", "", "(oo|u)"); addRule(lines, "'", "", "", ""); addRule(lines, "\"", "", "", ""); addRule(lines, "aue", "", "", "aue"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "E"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "I"); addRule(lines, "j", "", "", "j"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "I"); addRule(lines, "z", "", "", "z"); } private static void addGenRulesSpanish(final Map> lines) { addRule(lines, "ñ", "", "", "(n|nj)"); addRule(lines, "ny", "", "", "nj"); addRule(lines, "ç", "", "", "s"); addRule(lines, "ig", "[aeiou]", "", "(tS|ig)"); addRule(lines, "ix", "[aeiou]", "", "S"); addRule(lines, "tx", "", "", "tS"); addRule(lines, "tj", "", "$", "tS"); addRule(lines, "tj", "", "", "dZ"); addRule(lines, "tg", "", "", "(tg|dZ)"); addRule(lines, "ch", "", "", "(tS|dZ)"); addRule(lines, "bh", "", "", "b"); addRule(lines, "h", "[dgt]", "", ""); addRule(lines, "h", "", "$", ""); addRule(lines, "m", "", "[bpvf]", "(m|n)"); addRule(lines, "c", "", "[ei]", "s"); addRule(lines, "gu", "", "[ei]", "(g|gv)"); addRule(lines, "g", "", "[ei]", "(x|g|dZ)"); addRule(lines, "qu", "", "", "k"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "ü", "", "", "v"); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "ú", "", "", "u"); addRule(lines, "à", "", "", "a"); addRule(lines, "è", "", "", "e"); addRule(lines, "ò", "", "", "o"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "B"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "(x|Z)"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "V"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "(ks|gz|S)"); addRule(lines, "y", "", "", "(i|j)"); addRule(lines, "z", "", "", "(z|s)"); } private static void addGenRulesTurkish(final Map> lines) { addRule(lines, "ç", "", "", "tS"); addRule(lines, "ğ", "", "", ""); addRule(lines, "ş", "", "", "S"); addRule(lines, "ü", "", "", "Q"); addRule(lines, "ö", "", "", "Y"); addRule(lines, "ı", "", "", "(e|i|)"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "dZ"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "Z"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "j"); addRule(lines, "z", "", "", "z"); } private static void addSepApproxAny(final Map> lines) { addRule(lines, "E", "", "", ""); } private static void addSepExactApproxCommon(final Map> lines) { addRule(lines, "h", "", "$", ""); addRule(lines, "b", "", "[fktSs]", "p"); addRule(lines, "b", "", "p", ""); addRule(lines, "b", "", "$", "p"); addRule(lines, "p", "", "[vgdZz]", "b"); addRule(lines, "p", "", "b", ""); addRule(lines, "v", "", "[pktSs]", "f"); addRule(lines, "v", "", "f", ""); addRule(lines, "v", "", "$", "f"); addRule(lines, "f", "", "[vbgdZz]", "v"); addRule(lines, "f", "", "v", ""); addRule(lines, "g", "", "[pftSs]", "k"); addRule(lines, "g", "", "k", ""); addRule(lines, "g", "", "$", "k"); addRule(lines, "k", "", "[vbdZz]", "g"); addRule(lines, "k", "", "g", ""); addRule(lines, "d", "", "[pfkSs]", "t"); addRule(lines, "d", "", "t", ""); addRule(lines, "d", "", "$", "t"); addRule(lines, "t", "", "[vbgZz]", "d"); addRule(lines, "t", "", "d", ""); addRule(lines, "s", "", "dZ", ""); addRule(lines, "s", "", "tS", ""); addRule(lines, "z", "", "[pfkSt]", "s"); addRule(lines, "z", "", "[sSzZ]", ""); addRule(lines, "s", "", "[sSzZ]", ""); addRule(lines, "Z", "", "[sSzZ]", ""); addRule(lines, "S", "", "[sSzZ]", ""); addRule(lines, "nm", "", "", "m"); addRule(lines, "ji", "^", "", "i"); addRule(lines, "a", "", "a", ""); addRule(lines, "b", "", "b", ""); addRule(lines, "d", "", "d", ""); addRule(lines, "e", "", "e", ""); addRule(lines, "f", "", "f", ""); addRule(lines, "g", "", "g", ""); addRule(lines, "i", "", "i", ""); addRule(lines, "k", "", "k", ""); addRule(lines, "l", "", "l", ""); addRule(lines, "m", "", "m", ""); addRule(lines, "n", "", "n", ""); addRule(lines, "o", "", "o", ""); addRule(lines, "p", "", "p", ""); addRule(lines, "r", "", "r", ""); addRule(lines, "t", "", "t", ""); addRule(lines, "u", "", "u", ""); addRule(lines, "v", "", "v", ""); addRule(lines, "z", "", "z", ""); } private static void addSepApproxCommon(final Map> lines) { addRule(lines, "bens", "^", "", "(binz|s)"); addRule(lines, "benS", "^", "", "(binz|s)"); addRule(lines, "ben", "^", "", "(bin|)"); addRule(lines, "abens", "^", "", "(abinz|binz|s)"); addRule(lines, "abenS", "^", "", "(abinz|binz|s)"); addRule(lines, "aben", "^", "", "(abin|bin|)"); addRule(lines, "els", "^", "", "(ilz|alz|s)"); addRule(lines, "elS", "^", "", "(ilz|alz|s)"); addRule(lines, "el", "^", "", "(il|al|)"); addRule(lines, "als", "^", "", "(alz|s)"); addRule(lines, "alS", "^", "", "(alz|s)"); addRule(lines, "al", "^", "", "(al|)"); addRule(lines, "del", "^", "", "(dil|)"); addRule(lines, "dela", "^", "", "(dila|)"); addRule(lines, "da", "^", "", "(da|)"); addRule(lines, "de", "^", "", "(di|)"); addRule(lines, "oa", "", "", "(va|a|D)"); addRule(lines, "oe", "", "", "(vi|D)"); addRule(lines, "ae", "", "", "D"); addRule(lines, "n", "", "[bp]", "m"); addRule(lines, "h", "", "", "(|h|f)"); addRule(lines, "x", "", "", "h"); addRule(lines, "aja", "^", "", "(Da|ia)"); addRule(lines, "aje", "^", "", "(Di|Da|i|ia)"); addRule(lines, "aji", "^", "", "(Di|i)"); addRule(lines, "ajo", "^", "", "(Du|Da|iu|ia)"); addRule(lines, "aju", "^", "", "(Du|iu)"); addRule(lines, "aj", "", "", "D"); addRule(lines, "ej", "", "", "D"); addRule(lines, "oj", "", "", "D"); addRule(lines, "uj", "", "", "D"); addRule(lines, "au", "", "", "D"); addRule(lines, "eu", "", "", "D"); addRule(lines, "ou", "", "", "D"); addRule(lines, "a", "^", "", "(a|)"); addRule(lines, "ja", "^", "", "ia"); addRule(lines, "je", "^", "", "i"); addRule(lines, "jo", "^", "", "(iu|ia)"); addRule(lines, "ju", "^", "", "iu"); addRule(lines, "ja", "", "", "a"); addRule(lines, "je", "", "", "i"); addRule(lines, "ji", "", "", "i"); addRule(lines, "jo", "", "", "u"); addRule(lines, "ju", "", "", "u"); addRule(lines, "j", "", "", "i"); addRule(lines, "s", "", "[rmnl]", "z"); addRule(lines, "S", "", "[rmnl]", "z"); addRule(lines, "s", "[rmnl]", "", "z"); addRule(lines, "S", "[rmnl]", "", "z"); addRule(lines, "dS", "", "$", "S"); addRule(lines, "dZ", "", "$", "S"); addRule(lines, "Z", "", "$", "S"); addRule(lines, "S", "", "$", "(S|s)"); addRule(lines, "z", "", "$", "(S|s)"); addRule(lines, "S", "", "", "s"); addRule(lines, "dZ", "", "", "z"); addRule(lines, "Z", "", "", "z"); addRule(lines, "i", "", "$", "(i|)"); addRule(lines, "e", "", "", "i"); addRule(lines, "o", "", "$", "(a|u)"); addRule(lines, "o", "", "", "u"); addRule(lines, "B", "", "", "b"); addRule(lines, "V", "", "", "v"); addRule(lines, "p", "^", "", "b"); } private static void addSepExactAny(final Map> lines) { addRule(lines, "E", "", "", "e"); } private static void addSepExactCommon(final Map> lines) { addRule(lines, "h", "", "", ""); addRule(lines, "s", "[^t]", "[bgZd]", "z"); addRule(lines, "Z", "", "[pfkst]", "S"); addRule(lines, "Z", "", "$", "S"); addRule(lines, "S", "", "[bgzd]", "Z"); addRule(lines, "z", "", "$", "s"); addRule(lines, "B", "", "", "b"); addRule(lines, "V", "", "", "v"); } private static void addSepRulesAny(final Map> lines) { addRule(lines, "ph", "", "", "f"); addRule(lines, "sh", "", "", "S"); addRule(lines, "kh", "", "", "x"); addRule(lines, "gli", "", "", "(gli|l[italian])"); addRule(lines, "gni", "", "", "(gni|ni[italian+french])"); addRule(lines, "gn", "", "[aeou]", "(n[italian+french]|nj[italian+french]|gn)"); addRule(lines, "gh", "", "", "g"); addRule(lines, "dh", "", "", "d"); addRule(lines, "bh", "", "", "b"); addRule(lines, "th", "", "", "t"); addRule(lines, "lh", "", "", "l"); addRule(lines, "nh", "", "", "nj"); addRule(lines, "ig", "[aeiou]", "", "(ig|tS[spanish])"); addRule(lines, "ix", "[aeiou]", "", "S"); addRule(lines, "tx", "", "", "tS"); addRule(lines, "tj", "", "$", "tS"); addRule(lines, "tj", "", "", "dZ"); addRule(lines, "tg", "", "", "(tg|dZ[spanish])"); addRule(lines, "gi", "", "[aeou]", "dZ"); addRule(lines, "g", "", "y", "Z"); addRule(lines, "gg", "", "[ei]", "(gZ[portuguese+french]|dZ[italian+spanish]|x[spanish])"); addRule(lines, "g", "", "[ei]", "(Z[portuguese+french]|dZ[italian+spanish]|x[spanish])"); addRule(lines, "guy", "", "", "gi"); addRule(lines, "gue", "", "$", "(k[french]|ge)"); addRule(lines, "gu", "", "[ei]", "(g|gv)"); addRule(lines, "gu", "", "[ao]", "gv"); addRule(lines, "ñ", "", "", "(n|nj)"); addRule(lines, "ny", "", "", "nj"); addRule(lines, "sc", "", "[ei]", "(s|S[italian])"); addRule(lines, "sç", "", "[aeiou]", "s"); addRule(lines, "ss", "", "", "s"); addRule(lines, "ç", "", "", "s"); addRule(lines, "ch", "", "[ei]", "(k[italian]|S[portuguese+french]|tS[spanish]|dZ[spanish])"); addRule(lines, "ch", "", "", "(S|tS[spanish]|dZ[spanish])"); addRule(lines, "ci", "", "[aeou]", "(tS[italian]|si)"); addRule(lines, "cc", "", "[eiyéèê]", "(tS[italian]|ks[portuguese+french+spanish])"); addRule(lines, "c", "", "[eiyéèê]", "(tS[italian]|s[portuguese+french+spanish])"); addRule(lines, "s", "^", "", "s"); addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuiíoóeéêy]", "(s[spanish]|z[portuguese+french+italian])"); addRule(lines, "s", "", "[dglmnrv]", "(z|Z[portuguese])"); addRule(lines, "z", "", "$", "(s|ts[italian]|S[portuguese])"); addRule(lines, "z", "", "[bdgv]", "(z|dz[italian]|Z[portuguese])"); addRule(lines, "z", "", "[ptckf]", "(s|ts[italian]|S[portuguese])"); addRule(lines, "z", "", "", "(z|dz[italian]|ts[italian]|s[spanish])"); addRule(lines, "que", "", "$", "(k[french]|ke)"); addRule(lines, "qu", "", "[eiu]", "k"); addRule(lines, "qu", "", "[ao]", "(kv|k)"); addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez[portuguese]|eS[portuguese]|eks|egz)"); addRule(lines, "ex", "", "[cs]", "(e[portuguese]|ek)"); addRule(lines, "m", "", "[cdglnrst]", "(m|n[portuguese])"); addRule(lines, "m", "", "[bfpv]", "(m|n[portuguese+spanish])"); addRule(lines, "m", "", "$", "(m|n[portuguese])"); addRule(lines, "b", "^", "", "(b|V[spanish])"); addRule(lines, "v", "^", "", "(v|B[spanish])"); addRule(lines, "eau", "", "", "o"); addRule(lines, "ouh", "", "[aioe]", "(v[french]|uh)"); addRule(lines, "uh", "", "[aioe]", "(v|uh)"); addRule(lines, "ou", "", "[aioe]", "v"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aie]", "v"); addRule(lines, "i", "[aáuoóeéê]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "y", "[aáuiíoóeéê]", "", "j"); addRule(lines, "y", "", "[aeiíou]", "j"); addRule(lines, "e", "", "$", "(e|E[french])"); addRule(lines, "ão", "", "", "(au|an)"); addRule(lines, "ãe", "", "", "(aj|an)"); addRule(lines, "ãi", "", "", "(aj|an)"); addRule(lines, "õe", "", "", "(oj|on)"); addRule(lines, "où", "", "", "u"); addRule(lines, "ou", "", "", "(ou|u[french])"); addRule(lines, "â", "", "", "a"); addRule(lines, "à", "", "", "a"); addRule(lines, "á", "", "", "a"); addRule(lines, "ã", "", "", "(a|an)"); addRule(lines, "é", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "è", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "î", "", "", "i"); addRule(lines, "ô", "", "", "o"); addRule(lines, "ó", "", "", "o"); addRule(lines, "õ", "", "", "(o|on)"); addRule(lines, "ò", "", "", "o"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ü", "", "", "u"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "(b|v[spanish])"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "(x[spanish]|Z)"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "(s|S[portuguese])"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "(v|b[spanish])"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "(ks|gz|S[portuguese+spanish])"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addSepRulesFrench(final Map> lines) { addRule(lines, "kh", "", "", "x"); addRule(lines, "ph", "", "", "f"); addRule(lines, "ç", "", "", "s"); addRule(lines, "x", "", "", "ks"); addRule(lines, "ch", "", "", "S"); addRule(lines, "c", "", "[eiyéèê]", "s"); addRule(lines, "c", "", "", "k"); addRule(lines, "gn", "", "", "(n|gn)"); addRule(lines, "g", "", "[eiy]", "Z"); addRule(lines, "gue", "", "$", "k"); addRule(lines, "gu", "", "[eiy]", "g"); addRule(lines, "que", "", "$", "k"); addRule(lines, "qu", "", "", "k"); addRule(lines, "q", "", "", "k"); addRule(lines, "s", "[aeiouyéèê]", "[aeiouyéèê]", "z"); addRule(lines, "h", "[bdgt]", "", ""); addRule(lines, "h", "", "$", ""); addRule(lines, "j", "", "", "Z"); addRule(lines, "w", "", "", "v"); addRule(lines, "ouh", "", "[aioe]", "(v|uh)"); addRule(lines, "ou", "", "[aeio]", "v"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aeio]", "v"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "eau", "", "", "o"); addRule(lines, "ai", "", "", "aj"); addRule(lines, "ay", "", "", "aj"); addRule(lines, "é", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "è", "", "", "e"); addRule(lines, "à", "", "", "a"); addRule(lines, "â", "", "", "a"); addRule(lines, "où", "", "", "u"); addRule(lines, "ou", "", "", "u"); addRule(lines, "oi", "", "", "oj"); addRule(lines, "ei", "", "", "ej"); addRule(lines, "ey", "", "", "ej"); addRule(lines, "y", "[ou]", "", "j"); addRule(lines, "e", "", "$", "(e|)"); addRule(lines, "i", "", "[aou]", "j"); addRule(lines, "y", "", "[aoeu]", "j"); addRule(lines, "y", "", "", "i"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "z", "", "", "z"); } private static void addSepRulesHebrew(final Map> lines) { addRule(lines, "אי", "", "", "i"); addRule(lines, "עי", "", "", "i"); addRule(lines, "עו", "", "", "VV"); addRule(lines, "או", "", "", "VV"); addRule(lines, "ג׳", "", "", "Z"); addRule(lines, "ד׳", "", "", "dZ"); addRule(lines, "א", "", "", "L"); addRule(lines, "ב", "", "", "b"); addRule(lines, "ג", "", "", "g"); addRule(lines, "ד", "", "", "d"); addRule(lines, "ה", "^", "", "1"); addRule(lines, "ה", "", "$", "1"); addRule(lines, "ה", "", "", ""); addRule(lines, "וו", "", "", "V"); addRule(lines, "וי", "", "", "WW"); addRule(lines, "ו", "", "", "W"); addRule(lines, "ז", "", "", "z"); addRule(lines, "ח", "", "", "X"); addRule(lines, "ט", "", "", "T"); addRule(lines, "יי", "", "", "i"); addRule(lines, "י", "", "", "i"); addRule(lines, "ך", "", "", "X"); addRule(lines, "כ", "^", "", "K"); addRule(lines, "כ", "", "", "k"); addRule(lines, "ל", "", "", "l"); addRule(lines, "ם", "", "", "m"); addRule(lines, "מ", "", "", "m"); addRule(lines, "ן", "", "", "n"); addRule(lines, "נ", "", "", "n"); addRule(lines, "ס", "", "", "s"); addRule(lines, "ע", "", "", "L"); addRule(lines, "ף", "", "", "f"); addRule(lines, "פ", "", "", "f"); addRule(lines, "ץ", "", "", "C"); addRule(lines, "צ", "", "", "C"); addRule(lines, "ק", "", "", "K"); addRule(lines, "ר", "", "", "r"); addRule(lines, "ש", "", "", "s"); addRule(lines, "ת", "", "", "T"); } private static void addSepRulesItalian(final Map> lines) { addRule(lines, "kh", "", "", "x"); addRule(lines, "gli", "", "", "(l|gli)"); addRule(lines, "gn", "", "[aeou]", "(n|nj|gn)"); addRule(lines, "gni", "", "", "(ni|gni)"); addRule(lines, "gi", "", "[aeou]", "dZ"); addRule(lines, "gg", "", "[ei]", "dZ"); addRule(lines, "g", "", "[ei]", "dZ"); addRule(lines, "h", "[bdgt]", "", "g"); addRule(lines, "ci", "", "[aeou]", "tS"); addRule(lines, "ch", "", "[ei]", "k"); addRule(lines, "sc", "", "[ei]", "S"); addRule(lines, "cc", "", "[ei]", "tS"); addRule(lines, "c", "", "[ei]", "tS"); addRule(lines, "s", "[aeiou]", "[aeiou]", "z"); addRule(lines, "i", "[aeou]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "y", "[aeou]", "", "j"); addRule(lines, "y", "", "[aeou]", "j"); addRule(lines, "qu", "", "", "k"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "�", "", "", "e"); addRule(lines, "�", "", "", "e"); addRule(lines, "�", "", "", "o"); addRule(lines, "�", "", "", "o"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "(Z|dZ|j)"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "ks"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "(ts|dz)"); } private static void addSepRulesPortuguese(final Map> lines) { addRule(lines, "kh", "", "", "x"); addRule(lines, "ch", "", "", "S"); addRule(lines, "ss", "", "", "s"); addRule(lines, "sc", "", "[ei]", "s"); addRule(lines, "sç", "", "[aou]", "s"); addRule(lines, "ç", "", "", "s"); addRule(lines, "c", "", "[ei]", "s"); addRule(lines, "s", "^", "", "s"); addRule(lines, "s", "[aáuiíoóeéêy]", "[aáuiíoóeéêy]", "z"); addRule(lines, "s", "", "[dglmnrv]", "(Z|S)"); addRule(lines, "z", "", "$", "(Z|s|S)"); addRule(lines, "z", "", "[bdgv]", "(Z|z)"); addRule(lines, "z", "", "[ptckf]", "(s|S|z)"); addRule(lines, "gu", "", "[eiu]", "g"); addRule(lines, "gu", "", "[ao]", "gv"); addRule(lines, "g", "", "[ei]", "Z"); addRule(lines, "qu", "", "[eiu]", "k"); addRule(lines, "qu", "", "[ao]", "kv"); addRule(lines, "uo", "", "", "(vo|o|u)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "lh", "", "", "l"); addRule(lines, "nh", "", "", "nj"); addRule(lines, "h", "[bdgt]", "", ""); addRule(lines, "ex", "", "[aáuiíoóeéêy]", "(ez|eS|eks)"); addRule(lines, "ex", "", "[cs]", "e"); addRule(lines, "y", "[aáuiíoóeéê]", "", "j"); addRule(lines, "y", "", "[aeiíou]", "j"); addRule(lines, "m", "", "[bcdfglnprstv]", "(m|n)"); addRule(lines, "m", "", "$", "(m|n)"); addRule(lines, "ão", "", "", "(au|an|on)"); addRule(lines, "ãe", "", "", "(aj|an)"); addRule(lines, "ãi", "", "", "(aj|an)"); addRule(lines, "õe", "", "", "(oj|on)"); addRule(lines, "i", "[aáuoóeéê]", "", "j"); addRule(lines, "i", "", "[aeou]", "j"); addRule(lines, "â", "", "", "a"); addRule(lines, "à", "", "", "a"); addRule(lines, "á", "", "", "a"); addRule(lines, "ã", "", "", "(a|an|on)"); addRule(lines, "é", "", "", "e"); addRule(lines, "ê", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ô", "", "", "o"); addRule(lines, "ó", "", "", "o"); addRule(lines, "õ", "", "", "(o|on)"); addRule(lines, "ú", "", "", "u"); addRule(lines, "ü", "", "", "u"); addRule(lines, "aue", "", "", "aue"); addRule(lines, "a", "", "", "a"); addRule(lines, "b", "", "", "b"); addRule(lines, "c", "", "", "k"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "(e|i)"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "j", "", "", "Z"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "(o|u)"); addRule(lines, "p", "", "", "p"); addRule(lines, "q", "", "", "k"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "S"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); addRule(lines, "v", "", "", "v"); addRule(lines, "w", "", "", "v"); addRule(lines, "x", "", "", "(S|ks)"); addRule(lines, "y", "", "", "i"); addRule(lines, "z", "", "", "z"); } private static void addSepRulesSpanish(final Map> lines) { addRule(lines, "ñ", "", "", "(n|nj)"); addRule(lines, "ny", "", "", "nj"); addRule(lines, "ç", "", "", "s"); addRule(lines, "ig", "[aeiou]", "", "(tS|ig)"); addRule(lines, "ix", "[aeiou]", "", "S"); addRule(lines, "tx", "", "", "tS"); addRule(lines, "tj", "", "$", "tS"); addRule(lines, "tj", "", "", "dZ"); addRule(lines, "tg", "", "", "(tg|dZ)"); addRule(lines, "ch", "", "", "(tS|dZ)"); addRule(lines, "bh", "", "", "b"); addRule(lines, "h", "[dgt]", "", ""); addRule(lines, "j", "", "", "(x|Z)"); addRule(lines, "x", "", "", "(ks|gz|S)"); addRule(lines, "w", "", "", "v"); addRule(lines, "v", "^", "", "(B|v)"); addRule(lines, "b", "^", "", "(b|V)"); addRule(lines, "v", "", "", "(b|v)"); addRule(lines, "b", "", "", "(b|v)"); addRule(lines, "m", "", "[bpvf]", "(m|n)"); addRule(lines, "c", "", "[ei]", "s"); addRule(lines, "c", "", "", "k"); addRule(lines, "z", "", "", "(z|s)"); addRule(lines, "gu", "", "[ei]", "(g|gv)"); addRule(lines, "g", "", "[ei]", "(x|g|dZ)"); addRule(lines, "qu", "", "", "k"); addRule(lines, "q", "", "", "k"); addRule(lines, "uo", "", "", "(vo|o)"); addRule(lines, "u", "", "[aei]", "v"); addRule(lines, "y", "", "", "(i|j)"); addRule(lines, "ü", "", "", "v"); addRule(lines, "á", "", "", "a"); addRule(lines, "é", "", "", "e"); addRule(lines, "í", "", "", "i"); addRule(lines, "ó", "", "", "o"); addRule(lines, "ú", "", "", "u"); addRule(lines, "à", "", "", "a"); addRule(lines, "è", "", "", "e"); addRule(lines, "ò", "", "", "o"); addRule(lines, "a", "", "", "a"); addRule(lines, "d", "", "", "d"); addRule(lines, "e", "", "", "e"); addRule(lines, "f", "", "", "f"); addRule(lines, "g", "", "", "g"); addRule(lines, "h", "", "", "h"); addRule(lines, "i", "", "", "i"); addRule(lines, "k", "", "", "k"); addRule(lines, "l", "", "", "l"); addRule(lines, "m", "", "", "m"); addRule(lines, "n", "", "", "n"); addRule(lines, "o", "", "", "o"); addRule(lines, "p", "", "", "p"); addRule(lines, "r", "", "", "r"); addRule(lines, "s", "", "", "s"); addRule(lines, "t", "", "", "t"); addRule(lines, "u", "", "", "u"); } private static void addRule(Map> lines, String pat, String lCon, String rCon, String ph) { final Rule r = new Rule(pat, lCon, rCon, parsePhonemeExpr(ph)); final String patternKey = r.pattern.substring(0,1); List rules = lines.get(patternKey); if (rules == null) { rules = new ArrayList< >(); lines.put(patternKey, rules); } rules.add(r); } @GwtIncompatible("incompatible method") private static String createResourceName(final NameType nameType, final RuleType rt, final String lang) { return String.format("org/apache/commons/codec/language/bm/%s_%s_%s.txt", nameType.getName(), rt.getName(), lang); } @GwtIncompatible("incompatible method") private static Scanner createScanner(final NameType nameType, final RuleType rt, final String lang) { final String resName = createResourceName(nameType, rt, lang); return new Scanner(Resources.getInputStream(resName), ResourceConstants.ENCODING); } @GwtIncompatible("incompatible method") private static Scanner createScanner(final String lang) { final String resName = String.format("org/apache/commons/codec/language/bm/%s.txt", lang); return new Scanner(Resources.getInputStream(resName), ResourceConstants.ENCODING); } private static boolean endsWith(final CharSequence input, final CharSequence suffix) { if (suffix.length() > input.length()) { return false; } for (int i = input.length() - 1, j = suffix.length() - 1; j >= 0; i--, j--) { if (input.charAt(i) != suffix.charAt(j)) { return false; } } return true; } /** * Gets rules for a combination of name type, rule type and languages. * * @param nameType * the NameType to consider * @param rt * the RuleType to consider * @param langs * the set of languages to consider * @return a list of Rules that apply */ public static List getInstance(final NameType nameType, final RuleType rt, final Languages.LanguageSet langs) { final Map> ruleMap = getInstanceMap(nameType, rt, langs); final List allRules = new ArrayList<>(); for (final List rules : ruleMap.values()) { allRules.addAll(rules); } return allRules; } /** * Gets rules for a combination of name type, rule type and a single language. * * @param nameType * the NameType to consider * @param rt * the RuleType to consider * @param lang * the language to consider * @return a list of Rules that apply */ public static List getInstance(final NameType nameType, final RuleType rt, final String lang) { return getInstance(nameType, rt, LanguageSet.from(new HashSet<>(Arrays.asList(lang)))); } /** * Gets rules for a combination of name type, rule type and languages. * * @param nameType * the NameType to consider * @param rt * the RuleType to consider * @param langs * the set of languages to consider * @return a map containing all Rules that apply, grouped by the first character of the rule pattern * @since 1.9 */ public static Map> getInstanceMap(final NameType nameType, final RuleType rt, final Languages.LanguageSet langs) { return langs.isSingleton() ? getInstanceMap(nameType, rt, langs.getAny()) : getInstanceMap(nameType, rt, Languages.ANY); } /** * Gets rules for a combination of name type, rule type and a single language. * * @param nameType * the NameType to consider * @param rt * the RuleType to consider * @param lang * the language to consider * @return a map containing all Rules that apply, grouped by the first character of the rule pattern * @since 1.9 */ public static Map> getInstanceMap(final NameType nameType, final RuleType rt, final String lang) { final Map> rules = RULES.get(nameType).get(rt).get(lang); if (rules == null) { throw new IllegalArgumentException("No rules found for " + nameType.getName() + ", " + rt.getName() + ", " + lang + "."); } return rules; } private static Phoneme parsePhoneme(final String ph) { final int open = ph.indexOf("["); if (open >= 0) { if (!ph.endsWith("]")) { throw new IllegalArgumentException("Phoneme expression contains a '[' but does not end in ']'"); } final String before = ph.substring(0, open); final String in = ph.substring(open + 1, ph.length() - 1); final Set langs = new HashSet<>(Arrays.asList(in.split("[+]"))); return new Phoneme(before, Languages.LanguageSet.from(langs)); } return new Phoneme(ph, Languages.ANY_LANGUAGE); } private static PhonemeExpr parsePhonemeExpr(final String ph) { if (ph.startsWith("(")) { // we have a bracketed list of options if (!ph.endsWith(")")) { throw new IllegalArgumentException("Phoneme starts with '(' so must end with ')'"); } final List phs = new ArrayList<>(); final String body = ph.substring(1, ph.length() - 1); for (final String part : body.split("[|]")) { phs.add(parsePhoneme(part)); } if (body.startsWith("|") || body.endsWith("|")) { phs.add(new Phoneme("", Languages.ANY_LANGUAGE)); } return new PhonemeList(phs); } return parsePhoneme(ph); } @GwtIncompatible("incompatible method") private static Map> parseRules(final Scanner scanner, final String location) { final Map> lines = new HashMap<>(); int currentLine = 0; boolean inMultilineComment = false; while (scanner.hasNextLine()) { currentLine++; final String rawLine = scanner.nextLine(); String line = rawLine; if (inMultilineComment) { if (line.endsWith(ResourceConstants.EXT_CMT_END)) { inMultilineComment = false; } } else { if (line.startsWith(ResourceConstants.EXT_CMT_START)) { inMultilineComment = true; } else { // discard comments final int cmtI = line.indexOf(ResourceConstants.CMT); if (cmtI >= 0) { line = line.substring(0, cmtI); } // trim leading-trailing whitespace line = line.trim(); if (line.length() == 0) { continue; // empty lines can be safely skipped } if (line.startsWith(HASH_INCLUDE)) { // include statement final String incl = line.substring(HASH_INCLUDE.length()).trim(); if (incl.contains(" ")) { throw new IllegalArgumentException("Malformed import statement '" + rawLine + "' in " + location); } try (final Scanner hashIncludeScanner = createScanner(incl)) { lines.putAll(parseRules(hashIncludeScanner, location + "->" + incl)); } } else { // rule final String[] parts = line.split("\\s+"); if (parts.length != 4) { throw new IllegalArgumentException("Malformed rule statement split into " + parts.length + " parts: " + rawLine + " in " + location); } try { final String pat = stripQuotes(parts[0]); final String lCon = stripQuotes(parts[1]); final String rCon = stripQuotes(parts[2]); final PhonemeExpr ph = parsePhonemeExpr(stripQuotes(parts[3])); final int cLine = currentLine; final Rule r = new Rule(pat, lCon, rCon, ph) { private final int myLine = cLine; private final String loc = location; @Override public String toString() { final StringBuilder sb = new StringBuilder(); sb.append("Rule"); sb.append("{line=").append(myLine); sb.append(", loc='").append(loc).append('\''); sb.append(", pat='").append(pat).append('\''); sb.append(", lcon='").append(lCon).append('\''); sb.append(", rcon='").append(rCon).append('\''); sb.append('}'); return sb.toString(); } }; final String patternKey = r.pattern.substring(0,1); List rules = lines.get(patternKey); if (rules == null) { rules = new ArrayList<>(); lines.put(patternKey, rules); } rules.add(r); } catch (final IllegalArgumentException e) { throw new IllegalStateException("Problem parsing line '" + currentLine + "' in " + location, e); } } } } } return lines; } /** * Attempts to compile the regex into direct string ops, falling back to Pattern and Matcher in the worst case. * * @param regex * the regular expression to compile * @return an RPattern that will match this regex */ private static RPattern pattern(final String regex) { final boolean startsWith = regex.startsWith("^"); final boolean endsWith = regex.endsWith("$"); final String content = regex.substring(startsWith ? 1 : 0, endsWith ? regex.length() - 1 : regex.length()); final boolean boxes = content.contains("["); if (!boxes) { if (startsWith && endsWith) { // exact match if (content.length() == 0) { // empty return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return input.length() == 0; } }; } return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return input.equals(content); } }; } else if ((startsWith || endsWith) && content.length() == 0) { // matches every string return ALL_STRINGS_RMATCHER; } else if (startsWith) { // matches from start return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return startsWith(input, content); } }; } else if (endsWith) { // matches from start return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return endsWith(input, content); } }; } } else { final boolean startsWithBox = content.startsWith("["); final boolean endsWithBox = content.endsWith("]"); if (startsWithBox && endsWithBox) { String boxContent = content.substring(1, content.length() - 1); if (!boxContent.contains("[")) { // box containing alternatives final boolean negate = boxContent.startsWith("^"); if (negate) { boxContent = boxContent.substring(1); } final String bContent = boxContent; final boolean shouldMatch = !negate; if (startsWith && endsWith) { // exact match return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return input.length() == 1 && contains(bContent, input.charAt(0)) == shouldMatch; } }; } else if (startsWith) { // first char return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return input.length() > 0 && contains(bContent, input.charAt(0)) == shouldMatch; } }; } else if (endsWith) { // last char return new RPattern() { @Override public boolean isMatch(final CharSequence input) { return input.length() > 0 && contains(bContent, input.charAt(input.length() - 1)) == shouldMatch; } }; } } } } return new RPattern() { RegExp pattern = RegExp.compile(regex); @Override public boolean isMatch(final CharSequence input) { final MatchResult matcher = pattern.exec((String) input); return (matcher != null); } }; } private static boolean startsWith(final CharSequence input, final CharSequence prefix) { if (prefix.length() > input.length()) { return false; } for (int i = 0; i < prefix.length(); i++) { if (input.charAt(i) != prefix.charAt(i)) { return false; } } return true; } private static String stripQuotes(String str) { if (str.startsWith(DOUBLE_QUOTE)) { str = str.substring(1); } if (str.endsWith(DOUBLE_QUOTE)) { str = str.substring(0, str.length() - 1); } return str; } private final RPattern lContext; private final String pattern; private final PhonemeExpr phoneme; private final RPattern rContext; /** * Creates a new rule. * * @param pattern * the pattern * @param lContext * the left context * @param rContext * the right context * @param phoneme * the resulting phoneme */ public Rule(final String pattern, final String lContext, final String rContext, final PhonemeExpr phoneme) { this.pattern = pattern; this.lContext = pattern(lContext + "$"); this.rContext = pattern("^" + rContext); this.phoneme = phoneme; } /** * Gets the left context. This is a regular expression that must match to the left of the pattern. * * @return the left context Pattern */ public RPattern getLContext() { return this.lContext; } /** * Gets the pattern. This is a string-literal that must exactly match. * * @return the pattern */ public String getPattern() { return this.pattern; } /** * Gets the phoneme. If the rule matches, this is the phoneme associated with the pattern match. * * @return the phoneme */ public PhonemeExpr getPhoneme() { return this.phoneme; } /** * Gets the right context. This is a regular expression that must match to the right of the pattern. * * @return the right context Pattern */ public RPattern getRContext() { return this.rContext; } /** * Decides if the pattern and context match the input starting at a position. It is a match if the * {@code lContext} matches {@code input} up to {@code i}, {@code pattern} matches at i and * {@code rContext} matches from the end of the match of {@code pattern} to the end of {@code input}. * * @param input * the input String * @param i * the int position within the input * @return true if the pattern and left/right context match, false otherwise */ public boolean patternAndContextMatches(final CharSequence input, final int i) { if (i < 0) { throw new IndexOutOfBoundsException("Can not match pattern at negative indexes"); } final int patternLength = this.pattern.length(); final int ipl = i + patternLength; if (ipl > input.length()) { // not enough room for the pattern to match return false; } // evaluate the pattern, left context and right context // fail early if any of the evaluations is not successful if (!input.subSequence(i, ipl).equals(this.pattern)) { return false; } else if (!this.rContext.isMatch(input.subSequence(ipl, input.length()))) { return false; } return this.lContext.isMatch(input.subSequence(0, i)); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy