All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hfg.util.StringUtil Maven / Gradle / Ivy

There is a newer version: 20240423
Show newest version
package com.hfg.util;


import java.text.DecimalFormat;
import java.text.NumberFormat;
import java.text.ParsePosition;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Locale;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import java.util.regex.Matcher;
import java.util.Collection;

import com.hfg.math.Range;
import com.hfg.util.collection.CollectionUtil;

//------------------------------------------------------------------------------
/**
 * General String utility functions.
 *
 * @author J. Alex Taylor, hairyfatguy.com
 */
//------------------------------------------------------------------------------
// com.hfg XML/HTML Coding Library
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
//
// J. Alex Taylor, President, Founder, CEO, COO, CFO, OOPS hairyfatguy.com
// [email protected]
//------------------------------------------------------------------------------

public class StringUtil
{
   private static final Pattern sWhitespacePattern         = Pattern.compile("[\\s\u00A0]+"); // \u00A0 is the non-breaking space character
   private static final Pattern sTrailingWhitespacePattern = Pattern.compile("[\\s\u00A0]*$"); // \u00A0 is the non-breaking space character
   private static final Pattern sSingleQuotePattern        = Pattern.compile("(^|[^\\\\])'");
   private static final Pattern sDoubleQuotePattern        = Pattern.compile("(^|[^\\\\])\"");
   private static final Pattern sContainsLowerCasePattern  = Pattern.compile("[a-z]");
   private static final Pattern sContainsUpperCasePattern  = Pattern.compile("[A-Z]");

   private static final String sLineSeparator = System.getProperty("line.separator");

   private static Map> sUnicodeBlockRangeMap;
   private static Map> sUnicodeScriptMap;

   //**************************************************************************
   // PUBLIC FUNCTIONS
   //**************************************************************************

   //---------------------------------------------------------------------------
   public static String getSystemLineSeparator()
   {
      return sLineSeparator;
   }

   //---------------------------------------------------------------------------
   /**
    Returns whether the specified String contains non-whitespace content.
    */
   public static boolean isSet(CharSequence inString)
   {
      return (inString != null && inString.toString().trim().length() > 0);
   }

   //---------------------------------------------------------------------------
   /**
    Joins the elements of a String[] together using a space as the separator.
    */
   public static String join(String[] pieces)
   {
      return join(pieces, " ");
   }

   //---------------------------------------------------------------------------
   /**
    Joins the elements of a String[] together using the specified String.
    */
   public static String join(String[] pieces, String inSeperator)
   {
      StringBuffer buffer = new StringBuffer();

      if (pieces != null)
      {
         for (int i = 0; i < pieces.length; i++)
         {
            if (i > 0) buffer.append(inSeperator);
            buffer.append(pieces[i]);
         }
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Joins the elements of an Object[] together using the specified String.
    */
   public static String join(Object[] inArray, String inSeperator)
   {
      StringBuffer buffer = new StringBuffer();

      if (inArray != null)
      {
         for (int i = 0; i < inArray.length; i++)
         {
            if (i > 0) buffer.append(inSeperator);
            buffer.append(inArray[i]);
         }
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Joins the elements of a int[] together using the specified String.
    */
   public static String join(int[] inArray, String inSeperator)
   {
      StringBuffer buffer = new StringBuffer();

      if (inArray != null)
      {
         for (int i = 0; i < inArray.length; i++)
         {
            if (i > 0) buffer.append(inSeperator);
            buffer.append(inArray[i]);
         }
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Joins the elements of a Collection together using the specified String.
    */
   public static String join(Collection pieces, String inSeperator)
   {
      StringBuilder buffer = null;

      if (pieces != null)
      {
         for (Object obj : pieces)
         {
            if (buffer != null)
            {
               buffer.append(inSeperator);
            }
            else
            {
               buffer = new StringBuilder();
            }
            buffer.append(obj);
         }
      }

      return buffer != null ? buffer.toString() : null;
   }

   //---------------------------------------------------------------------------
   /**
    Joins the elements of a Collection together using the specified String.
    Enclose each element with single quotation marks.
    Author: Esther Ting
    */
   public static String joinIncludeSingleQuote(Collection pieces, String inSeperator)
   {
      StringBuilder buffer = null;

      if (pieces != null)
      {
         for (Object obj : pieces)
         {
            if (buffer != null)
            {
               buffer.append(inSeperator);
            }
            else
            {
               buffer = new StringBuilder();
            }
            buffer.append(StringUtil.singleQuote(obj));
         }
      }

      return buffer != null ? buffer.toString() : null;
   }


   //---------------------------------------------------------------------------
   /**
    Returns a String composed of inString repeated inNum times.
    */
   public static String polyString(String inString, int inNum)
   {
      StringBuffer buffer = new StringBuffer(inNum);

      for (int i = 0; i < inNum; i++)
      {
         buffer.append(inString);
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Returns a String composed of inChar repeated inNum times.
    */
   public static String polyChar(char inChar, int inNum)
   {
      StringBuffer buffer = new StringBuffer(inNum);

      for (int i = 0; i < inNum; i++)
      {
         buffer.append(inChar);
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Returns a String where all instances of inTarget in inString are replaced by
    inReplacement.
    */
   public static String replaceAll(CharSequence inString, String inTarget, String inReplacement)
   {
      String outString = null;
      if (inString != null)
      {
         outString = inString.toString();

         if (inTarget != null && inReplacement != null)
         {
            int     startFromIndex = inString.length();
            int index = outString.lastIndexOf(inTarget, startFromIndex);

            // Bail quickly if no replacements are necessary.
            if (index >= 0)
            {
               StringBuilder buffer = new StringBuilder(outString);

               do
               {
                  buffer.replace(index, index + inTarget.length(), inReplacement);
                  startFromIndex = index - 1;
               }
               while ((index = outString.lastIndexOf(inTarget, startFromIndex)) >= 0);

               outString = buffer.toString();
            }
         }
      }

      return outString;
   }

   //---------------------------------------------------------------------------
   /**
    Returns a String where all portions of inString matching the inTargetRegexp
    are replaced by inReplacement.
    */
   public static String replaceAllRegexp(CharSequence inString, String inRegexpTarget, String inReplacement)
   {
      String outString = null;
      if (inString != null)
      {
         outString = inString.toString();

         if (inRegexpTarget != null
               && inReplacement != null)
         {
            Pattern p = Pattern.compile(inRegexpTarget);

            outString = replaceAllRegexp(inString, p, inReplacement);
         }
      }

      return outString;
   }

   //---------------------------------------------------------------------------
   /**
    Returns a String where all portions of inString matching the inPattern
    are replaced by inReplacement.
    */
   public static String replaceAllRegexp(CharSequence inString, Pattern inPattern, String inReplacement)
   {
      String outString = null;
      if (inString != null)
      {
         outString = inString.toString();

         if (inPattern != null
               && inReplacement != null)
         {
            Matcher m = inPattern.matcher(inString);

            outString = m.replaceAll(inReplacement);
         }
      }

      return outString;
   }

   //---------------------------------------------------------------------------
   public static String trimTrailingWhitespace(CharSequence inString)
   {
      return replaceAllRegexp(inString, sTrailingWhitespacePattern, "");
   }

   //---------------------------------------------------------------------------
   public static String removeWhitespace(CharSequence inString)
   {
      return replaceAllRegexp(inString, sWhitespacePattern, "");
   }

   //---------------------------------------------------------------------------
   public static String replaceWhitespace(CharSequence inString, CharSequence inWhitespaceReplacementString)
   {
      return replaceAllRegexp(inString, sWhitespacePattern, inWhitespaceReplacementString != null ? inWhitespaceReplacementString.toString() : "");
   }

   //---------------------------------------------------------------------------
   /**
    Returns the specified String surrounded by quotes and escaping any internal quotes.
    */
   public static String quote(Object inObject)
   {
      StringBuilder buffer = new StringBuilder("\"");
      if (inObject != null)
      {
         buffer.append(replaceAllRegexp(inObject.toString(), sDoubleQuotePattern, "$1\\\\\""));
      }

      buffer.append("\"");
      return buffer.toString();
   }


   //---------------------------------------------------------------------------
   /**
    Returns the specified String surrounded by single quotes and escaping any internal single quotes.
    */
   public static String singleQuote(Object inObject)
   {
      StringBuilder buffer = new StringBuilder("'");
      if (inObject != null)
      {
         buffer.append(replaceAllRegexp(inObject.toString(), sSingleQuotePattern, "$1\\\\'"));
      }

      buffer.append("'");
      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Returns whether or not the specified String is contained within surrounding quotes.
    */
   public static boolean isQuoted(CharSequence inString)
   {
      boolean result = false;
      if (inString != null)
      {
         String string = inString.toString();
         result = ((string.startsWith("\"")
               && string.endsWith("\""))
               || (string.startsWith("\'")
               && string.endsWith("\'")));
      }

      return result;
   }

   //---------------------------------------------------------------------------
   /**
    Returns the specified String while removing surrounding quotes if present.
    */
   public static String unquote(CharSequence inString)
   {
      String result = null;
      if (inString != null)
      {
         result = inString.toString();

         if (isQuoted(result))
         {
            result = result.substring(1, result.length() - 1);
         }
      }

      return result;
   }

   //---------------------------------------------------------------------------
   /**
    Returns the number of times the specified character appears in the specified String.
    */
   public static int getCharCount(CharSequence inString, char inChar)
   {
      int count = 0;

      if (isSet(inString))
      {
         for (int i = 0; i < inString.length(); i++)
         {
            if (inString.charAt(i) == inChar)
            {
               count++;
            }
         }
      }

      return count;
   }

   //---------------------------------------------------------------------------
   public static String scramble(CharSequence inString)
   {
      char[] chars = inString.toString().toCharArray();
      ArrayUtil.shuffle(chars);
      return new String(chars);
   }

   //---------------------------------------------------------------------------
   public static String applySubstitutionMap(CharSequence inString, Map inSubstitutionMap)
   {
      String substitutedString = inString.toString();

      for (String key : inSubstitutionMap.keySet())
      {
         substitutedString = substitutedString.replaceAll(key, inSubstitutionMap.get(key));
      }

      return substitutedString;
   }

   //---------------------------------------------------------------------------
   public static String applySubstitutionMap(CharSequence inString, Map inSubstitutionMap, String inDelimiter)
   {
      StringBuilder buffer = new StringBuilder(inString.toString());

      Pattern pattern = Pattern.compile("(" + inDelimiter + "(.+?)" + inDelimiter + ")");

      Set unsubstitutedTokens = new HashSet<>(25);

      Matcher m = pattern.matcher(buffer);
      int index = 0;
      while (m.find(index))
      {
         String token = m.group(2);
         if (inSubstitutionMap.containsKey(token))
         {
            String replacementString = inSubstitutionMap.get(token);

            buffer.replace(m.start(1), m.end(1), replacementString);

            index = m.start(1) + replacementString.length();
         }
         else
         {
            unsubstitutedTokens.add(token);
            index = m.end(1) + 1;
         }
      }

      if (CollectionUtil.hasValues(unsubstitutedTokens))
      {
         throw new RuntimeException("Unsubstituted tokens: [" + StringUtil.join(unsubstitutedTokens, ", ") + "]!");
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   public static String stripHTMLTags(String inString)
   {
      return (inString != null ? inString.replaceAll("\\<.*?\\>", "") : null);
   }

   //---------------------------------------------------------------------------
   /**
    If the specified string is longer than the specified maximum length, a truncated
    string is returned. Otherwise the original string is returned.
    @param inString the original string value
    @param inMaxLength the maximum lenght allowed for the string
    @return the processed string value
    */
   public static String truncate(String inString, int inMaxLength)
   {
      if (inMaxLength < 0)
      {
         inMaxLength = 0;
      }

      return (inString != null ? (inString.length() > inMaxLength ? inString.substring(0, inMaxLength) : inString) : null);
   }

   //---------------------------------------------------------------------------
   /**
    Truncates the string and adds an ellipsis (…) if it exceeds the specified length.
    Otherwise the original string is returned.
    @param inString the string to be truncated
    @param inMaxLength the length beyond which the specified string should be truncated
    @return  the truncated string
    */
   public static String truncateWithEllipsis(String inString, int inMaxLength)
   {
      String result = null;

      if (StringUtil.isSet(inString))
      {
         if (inString.length() <= inMaxLength)
         {
            // No need to truncate
            result = inString;
         }
         else
         {
            result = inString.substring(0, inMaxLength) + '…';
         }
      }

      return result;
   }

   //---------------------------------------------------------------------------
   /**
    Returns whether or not the specified string contains a lower-case letter.
    @param inString the string value to be checked
    @return whether or not the specified string contains a lower-case letter
    */
   public static boolean containsLowerCase(String inString)
   {
      return (inString != null ? sContainsLowerCasePattern.matcher(inString).find() : false);
   }

   //---------------------------------------------------------------------------
   /**
    Returns whether or not the specified string contains an upper-case letter.
    @param inString the string value to be checked
    @return whether or not the specified string contains an upper-case letter
    */
   public static boolean containsUpperCase(String inString)
   {
      return (inString != null ? sContainsUpperCasePattern.matcher(inString).find() : false);
   }

   //---------------------------------------------------------------------------
   /**
    Returns the input string with the first character capitalized.
    @param inString the string value to be capitalized
    @return  the input string with the first character capitalized
    */
   public static String capitalize(String inString)
   {
      return (isSet(inString) ? Character.toUpperCase(inString.charAt(0)) + (inString.length() > 1 ? inString.substring(1) : "") : inString);
   }


   //---------------------------------------------------------------------------
   /**
    Returns a unicode string for the specified number as a superscript.
    @param inNumericValue the numeric value to be superscripted
    @return  the unicode string for the specified number as a superscript
    */
   public static String toSuperscript(int inNumericValue)
   {
      return toSuperscript(inNumericValue + "");
   }

   //---------------------------------------------------------------------------
   /**
    Returns a unicode string for the specified number as a superscript.
    @param inString the numeric value to be superscripted
    @return  the unicode string for the specified number as a superscript
    */
   public static String toSuperscript(String inString)
   {
      StringBuilder buffer = new StringBuilder();
      for (int i = 0; i < inString.length(); i++)
      {
         char superscript;
         switch (inString.charAt(i))
         {
            case '0':
               superscript = '\u2070';
               break;
            case '1':
               superscript = 0xB9;
               break;
            case '2':
               superscript = 0xB2;
               break;
            case '3':
               superscript = 0xB3;
               break;
            case '4':
               superscript = '\u2074';
               break;
            case '5':
               superscript = '\u2075';
               break;
            case '6':
               superscript = '\u2076';
               break;
            case '7':
               superscript = '\u2077';
               break;
            case '8':
               superscript = '\u2078';
               break;
            case '9':
               superscript = '\u2079';
               break;
            case '+':
               superscript = '\u207A';
               break;
            case '-':
               superscript = '\u207B';
               break;
            case '=':
               superscript = '\u207C';
               break;
            case '(':
               superscript = '\u207D';
               break;
            case ')':
               superscript = '\u207E';
               break;
            case 'n':
               superscript = '\u207F';
               break;
            default:
               throw new RuntimeException("Unsupported superscript char: " + StringUtil.singleQuote(inString.charAt(i)) + "!");
         }

         buffer.append(superscript);
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   /**
    Returns a unicode string for the specified number as a subscript.
    @param inNumericValue the numeric value to be subscripted
    @return  the unicode string for the specified number as a subscript
    */
   public static String toSubscript(int inNumericValue)
   {
      return toSubscript(inNumericValue + "");
   }

   //---------------------------------------------------------------------------
   /**
    Returns a unicode string for the specified number as a subscript.
    @param inString the numeric value to be subscripted
    @return  the unicode string for the specified number as a subscript
    */
   public static String toSubscript(String inString)
   {
      StringBuilder buffer = new StringBuilder();
      for (int i = 0; i < inString.length(); i++)
      {
         char subscript;
         switch (inString.charAt(i))
         {
            case '0':
               subscript = '\u2080';
               break;
            case '1':
               subscript = '\u2081';
               break;
            case '2':
               subscript = '\u2082';
               break;
            case '3':
               subscript = '\u2083';
               break;
            case '4':
               subscript = '\u2084';
               break;
            case '5':
               subscript = '\u2085';
               break;
            case '6':
               subscript = '\u2086';
               break;
            case '7':
               subscript = '\u2087';
               break;
            case '8':
               subscript = '\u2088';
               break;
            case '9':
               subscript = '\u2089';
               break;
            case '+':
               subscript = '\u208A';
               break;
            case '-':
               subscript = '\u208B';
               break;
            case '=':
               subscript = '\u208C';
               break;
            case '(':
               subscript = '\u208D';
               break;
            case ')':
               subscript = '\u208E';
               break;
            case 'a':
               subscript = '\u2090';
               break;
            case 'e':
               subscript = '\u2091';
               break;
            case 'o':
               subscript = '\u2092';
               break;
            case 'x':
               subscript = '\u2093';
               break;
            case 'ə':
               subscript = '\u2094';
               break;
            case 'h':
               subscript = '\u2095';
               break;
            case 'k':
               subscript = '\u2096';
               break;
            case 'l':
               subscript = '\u2097';
               break;
            case 'm':
               subscript = '\u2098';
               break;
            case 'n':
               subscript = '\u2099';
               break;
            case 'p':
               subscript = '\u209A';
               break;
            case 's':
               subscript = '\u209B';
               break;
            case 't':
               subscript = '\u209C';
               break;
            default:
               throw new RuntimeException("Unsupported subscript char: " + StringUtil.singleQuote(inString.charAt(i)) + "!");
         }

         buffer.append(subscript);
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   public static String[] lines(CharSequence inString)
   {
      return inString != null ? inString.toString().split("\\r?\\n") : null;
   }

   //---------------------------------------------------------------------------
   public static String wrap(String inString, int inMaxLineLength)
   {
      StringBuilderPlus buffer = null;
      if (inString !=  null)
      {
         buffer = new StringBuilderPlus(inString);

         int index = inMaxLineLength;
         while (index < buffer.length())
         {
            for (int i = index; i > index - inMaxLineLength; i--)
            {
               char theChar = buffer.charAt(i);

               if (theChar == ' ')
               {
                  buffer.replace(i, i + 1, sLineSeparator);
                  index = i + inMaxLineLength + 1;
               }
               else if (theChar == ','
                        || theChar == ';'
                        || theChar == '.')
               {
                  buffer.insert(i + 1, sLineSeparator);
                  index = i + inMaxLineLength + 1;
               }
            }
         }

      }

      return buffer != null ? buffer.toString() : null;
   }

   //---------------------------------------------------------------------------
   /**
    Returns a random string of the specified length using the specified alphabet of characters.
    @param inAlphabet a string of the allowed characters
    @param inLength the length of the random string to be composed
    @return  the random string 
    */
   public static String generateRandomString(CharSequence inAlphabet, int inLength)
   {
      StringBuilderPlus buffer = new StringBuilderPlus();
      for (int i = 0; i < inLength; i++)
      {
         buffer.append(inAlphabet.charAt((int) Math.floor(Math.random() * inAlphabet.length())));
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   public static String generateRandomString(Character.UnicodeScript inScript, int inLength)
   {
      List unicodeCodePoints = getCharactersForUnicodeScript(inScript);

      StringBuilderPlus buffer = new StringBuilderPlus();
      for (int i = 0; i < inLength; i++)
      {
         int decimalValue = CollectionUtil.getRandomListItem(unicodeCodePoints);

         buffer.append(new String(Character.toChars(decimalValue)));
      }

      return buffer.toString();
   }

   //---------------------------------------------------------------------------
   // https://en.wikipedia.org/wiki/Template:Unicode_chart_Egyptian_Hieroglyphs
   // 𓀁𓃠𓈷𓎼𓄊𓁖𓋊𓇚𓋻𓏰𓍪𓋼𓊠𓇙𓈓𓍄𓉨𓁹𓀻𓍝𓇻𓋨𓈳𓊄𓄯𓈺
   public static String generateRandomEgyptianHieroglyphics(int inLength)
   {
      return generateRandomString(Character.UnicodeScript.EGYPTIAN_HIEROGLYPHS, inLength);
   }


   //---------------------------------------------------------------------------
   public static synchronized Range getRangeForUnicodeBlock(Character.UnicodeBlock inBlock)
   {
      if (null == sUnicodeBlockRangeMap)
      {
         determineUnicodeBlockRanges();
      }

      return sUnicodeBlockRangeMap.get(inBlock);
   }

   //---------------------------------------------------------------------------
   public static synchronized List getCharactersForUnicodeScript(Character.UnicodeScript inScript)
   {
      if (null == sUnicodeScriptMap)
      {
         buildUnicodeScriptMap();
      }

      return sUnicodeScriptMap.get(inScript);
   }

   //---------------------------------------------------------------------------
   // Use for formatting large numbers with commas. Ex: 20,512 instead of 20512
   public static synchronized String generateLocalizedNumberString(Number inValue)
   {
      return generateLocalizedNumberString(inValue, Locale.getDefault());
   }

   //---------------------------------------------------------------------------
   // Use for formatting large numbers with commas. Ex: 20,512 instead of 20512
   public static synchronized String generateLocalizedNumberString(Number inValue, Locale inLocale)
   {
      return inValue != null ? NumberFormat.getInstance(inLocale).format(inValue) : null;
   }

   //---------------------------------------------------------------------------
   public static boolean isNumber(CharSequence inString)
   {
      boolean result = false;
      if (inString != null)
      {
         String s = inString.toString().trim();

         try
         {
            Double.parseDouble(s);
            result = true;
         }
         catch (NumberFormatException e)
         {
            // Attempt to deal with localized number strings
            NumberFormat format = DecimalFormat.getInstance();
            ParsePosition parsePosition = new ParsePosition(0);

            format.parse(s, parsePosition);
            if (parsePosition.getIndex() == s.length())
            {
               result = true;
            }
         }
      }

      return result;
   }

   //---------------------------------------------------------------------------
   public static boolean allValuesAreEmpty(String[] inArray)
   {
      boolean empty = true;

      if (inArray != null)
      {
         for (String value : inArray)
         {
            if (StringUtil.isSet(value))
            {
               empty = false;
               break;
            }
         }
      }

      return empty;
   }

   //---------------------------------------------------------------------------
   /**
    * Calculates the Levenshtein distance between two Strings.
    * Algorithm based on Apache Commons implementation.
    * @param inString1 the first string for comparison
    * @param inString2 the second string for comparison
    * @return the computed Levenshtein distance
    */
   public static int computeLevenshteinDistance(CharSequence inString1, CharSequence inString2)
   {
      int distance = 0;

      if (null == inString1)
      {
         if (inString2 != null)
         {
            distance = inString2.length();
         }
      }
      else if (null == inString2)
      {
         distance = inString1.length();
      }
      else
      {
         // Both strings not null

         // To simplify, string 1 should be the shorter of the two
         if (inString1.length() > inString2.length())
         {
            // Swap
            CharSequence tmp = inString1;
            inString1 = inString2;
            inString2 = tmp;
         }

         final int length1 = inString1.length();
         final int length2 = inString2.length();

         final int[] scoreArray = new int[length1 + 1];

         int upperLeft;
         int upper;
         char string2J;
         int ijComparison;

         // Initialize the scoreArray
         for (int i = 0; i <= length1; i++)
         {
            scoreArray[i] = i;
         }

         // Evaluate
         for (int j = 1; j <= length2; j++)
         {
            upperLeft = scoreArray[0];
            string2J = inString2.charAt(j - 1);
            scoreArray[0] = j;

            for (int i = 1; i <= length1; i++)
            {
               upper = scoreArray[i];
               ijComparison = inString1.charAt(i - 1) == string2J ? 0 : 1;
               scoreArray[i] = Math.min(Math.min(scoreArray[i - 1] + 1, scoreArray[i] + 1), upperLeft + ijComparison);
               upperLeft = upper;
            }
         }

         distance = scoreArray[length1];
      }

      return distance;
   }

   //**************************************************************************
   // PRIVATE FUNCTIONS
   //**************************************************************************

   //---------------------------------------------------------------------------
   private static void determineUnicodeBlockRanges()
   {
      sUnicodeBlockRangeMap = new HashMap<>(300);

      Character.UnicodeBlock currentUnicodeBlock = null;
      Range currentCodePointRange = null;
      for (int decimalValue = Character.MIN_CODE_POINT; decimalValue <= Character.MAX_CODE_POINT; decimalValue++)
      {
         Character.UnicodeBlock unicodeBlock = Character.UnicodeBlock.of(decimalValue);
         if (null == currentUnicodeBlock
             || ! currentUnicodeBlock.equals(unicodeBlock))
         {
             if (currentCodePointRange != null)
             {
                currentCodePointRange.setEnd(decimalValue - 1);
                sUnicodeBlockRangeMap.put(currentUnicodeBlock, currentCodePointRange);
             }

             currentUnicodeBlock = unicodeBlock;
             currentCodePointRange = new Range<>();
             currentCodePointRange.setStart(decimalValue);
         }
      }

      currentCodePointRange.setEnd(Character.MAX_CODE_POINT);
      sUnicodeBlockRangeMap.put(currentUnicodeBlock, currentCodePointRange);
   }

   //---------------------------------------------------------------------------
   private static void buildUnicodeScriptMap()
   {
      sUnicodeScriptMap = new HashMap<>(300);

      for (int decimalValue = Character.MIN_CODE_POINT; decimalValue <= Character.MAX_CODE_POINT; decimalValue++)
      {
         Character.UnicodeScript unicodeScript = Character.UnicodeScript.of(decimalValue);
         if (unicodeScript != null)
         {
            List codePoints = sUnicodeScriptMap.get(unicodeScript);
            if (null == codePoints)
            {
               codePoints = new ArrayList<>();
               sUnicodeScriptMap.put(unicodeScript, codePoints);
            }

            codePoints.add(decimalValue);
         }
      }

      // Save memory by trimming the character lists to size
      for (Character.UnicodeScript script : sUnicodeScriptMap.keySet())
      {
         ((ArrayList) sUnicodeScriptMap.get(script)).trimToSize();
      }
   }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy