All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.upokecenter.text.DomainUtility Maven / Gradle / Ivy

package com.upokecenter.text;
/*
Written by Peter O. in 2014.
Any copyright is dedicated to the Public Domain.
http://creativecommons.org/publicdomain/zero/1.0/
If you like this, you should donate to Peter O.
at: http://upokecenter.dreamhosters.com/articles/donate-now-2/
 */

import com.upokecenter.util.*;

    /**
     * Utility methods for domain names.
     */
  final class DomainUtility {
private DomainUtility() {
}
    private static int CodePointAt(String str, int index, int endIndex) {
      if (str == null) {
        throw new NullPointerException("str");
      }
      if (index >= endIndex) {
        return -1;
      }
      if (index < 0) {
        return -1;
      }
      int c = str.charAt(index);
      if ((c & 0xfc00) == 0xd800 && index + 1 < endIndex &&
          str.charAt(index + 1) >= 0xdc00 && str.charAt(index + 1) <= 0xdfff) {
        // Get the Unicode code point for the surrogate pair
        c = 0x10000 + ((c - 0xd800) << 10) + (str.charAt(index + 1) - 0xdc00);
        ++index;
      } else if ((c & 0xf800) == 0xd800) {
        // unpaired surrogate
        return 0xfffd;
      }
      return c;
    }

    /**
     * Gets the Punycode length of a string (Punycode is defined in RFC 3492).
     * @param str A string containing the desired portion to get the length for.
     * @param index Zero-based index showing where the desired portion of "str"
     * begins.
     * @param endIndex Zero-based index showing where the desired portion of "str"
     * ends. The character before this index is the last character.
     * @return The Punycode length of the encoded string. If the string contains
     * non-ASCII characters, returns the Punycode length plus 4 (the length
     * of the ACE prefix). If there are only ASCII characters, returns the
     * length of the string. Returns -1 if an overflow error occurs.
     * @throws NullPointerException The parameter {@code str} is null.
     * @throws NullPointerException Either {@code index} or {@code endIndex} is
     * less than 0 or greater than {@code str} 's length, or {@code index}
     * is greater than {@code endIndex} .
     */
    public static int PunycodeLength(String str, int index, int endIndex) {
      if (str == null) {
        throw new NullPointerException("str");
      }
      if (index < 0) {
      throw new IllegalArgumentException("index (" + index + ") is less than " +
          "0");
      }
      if (index > str.length()) {
        throw new IllegalArgumentException("index (" + index + ") is more than " +
          str.length());
      }
      if (endIndex < 0) {
throw new IllegalArgumentException("endIndex (" + endIndex + ") is less than " +
          "0");
      }
      if (endIndex > str.length()) {
        throw new IllegalArgumentException("endIndex (" + endIndex +
          ") is more than " + str.length());
      }
      if (endIndex < index) {
        throw new IllegalArgumentException("endIndex (" + endIndex +
          ") is less than " + index);
      }
      int n = 128;
      int delta = 0;
      int bias = 72;
      int h = 0;
      int tmpIndex;
      int firstIndex = -1;
      int codePointLength = 0;
      int basicsBeforeFirstNonbasic = 0;
      boolean allBasics = true;
      tmpIndex = index;
      while (tmpIndex < endIndex) {
        if (str.charAt(tmpIndex) >= 0x80) {
          allBasics = false;
          break;
        }
        ++tmpIndex;
      }
      if (allBasics) {
        return endIndex - index;
      }
      int outputLength = 4;
      tmpIndex = index;
      while (tmpIndex < endIndex) {
        int c = CodePointAt(str, tmpIndex, endIndex);
        ++codePointLength;
        if (c < 0x80) {
          // This is a basic (ASCII) code point
          ++outputLength;
          ++h;
        } else if (firstIndex < 0) {
          firstIndex = tmpIndex;
        }
        // Increment index after setting firstIndex
        tmpIndex += (c >= 0x10000) ? 2 : 1;
      }
      if (h != 0) {
        ++outputLength;
      }
      int b = h;
      if (firstIndex >= 0) {
        basicsBeforeFirstNonbasic = firstIndex - index;
      } else {
        // No non-basic code points
        return endIndex - index;
      }
      while (h < codePointLength) {
        int min = 0x110000;
        tmpIndex = firstIndex;
        while (tmpIndex < endIndex) {
          int c = CodePointAt(str, tmpIndex, endIndex);
          tmpIndex += (c >= 0x10000) ? 2 : 1;
          if (c >= n && c < min) {
            min = c;
          }
        }
        int d = min - n;
        if (d > Integer.MAX_VALUE / (h + 1)) {
          return -1;
        }
        d *= h + 1;
        n = min;
        if (d > Integer.MAX_VALUE - delta) {
          return -1;
        }
        delta += d;
        tmpIndex = firstIndex;
        if (basicsBeforeFirstNonbasic > Integer.MAX_VALUE - delta) {
          return -1;
        }
        delta += basicsBeforeFirstNonbasic;
        while (tmpIndex < endIndex) {
          int c = CodePointAt(str, tmpIndex, endIndex);
          tmpIndex += (c >= 0x10000) ? 2 : 1;
          if (c < n) {
            if (delta == Integer.MAX_VALUE) {
              return -1;
            }
            ++delta;
          } else if (c == n) {
            int q = delta;
            int k = 36;
            while (true) {
              int t;
              t = (k <= bias) ? 1 : ((k >= bias + 26) ? 26 : (k - bias));
              if (q < t) {
                break;
              }
              ++outputLength;
              q -= t;
              q /= 36 - t;
              k += 36;
            }
            ++outputLength;
            delta = (h == b) ? delta / 700 : delta >> 1;
            delta += delta / (h + 1);
            k = 0;
            while (delta > 455) {
              delta /= 35;
              k += 36;
            }
            bias = k + ((36 * delta) / (delta + 38));
            delta = 0;
            ++h;
          }
        }
        ++n;
        ++delta;
      }
      return outputLength;
    }

    private static int[] valueDigitValues = { -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1,
      -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
      26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,
      -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,
      -1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14,
      15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1 };

    static String PunycodeDecode(String str, int index, int endIndex) {
      if (str == null) {
        throw new NullPointerException("str");
      }
      if (index < 0) {
      throw new IllegalArgumentException("index (" + index + ") is less than " +
          "0");
      }
      if (index > str.length()) {
        throw new IllegalArgumentException("index (" + index + ") is more than " +
          str.length());
      }
      if (endIndex < 0) {
throw new IllegalArgumentException("endIndex (" + endIndex + ") is less than " +
          "0");
      }
      if (endIndex > str.length()) {
        throw new IllegalArgumentException("endIndex (" + endIndex +
          ") is more than " + str.length());
      }
      if (endIndex < index) {
        throw new IllegalArgumentException("endIndex (" + endIndex +
          ") is less than " + index);
      }
      if (index == endIndex) {
        return "";
      }
      int lastHyphen = endIndex - 1;
      while (lastHyphen >= index) {
        if (str.charAt(lastHyphen) == '-') {
          break;
        }
        --lastHyphen;
      }
      int i = 0;
      if (lastHyphen >= index) {
        for (i = index; i < lastHyphen; ++i) {
          if (str.charAt(i) >= 0x80) {
            return null;  // Non-basic character found
          }
        }
      }
      StringBuilder builder = new StringBuilder();
      // Append all characters up to the last hyphen
      // (they will be ASCII at this point)
      for (int k = index; k < lastHyphen; ++k) {
        int c = str.charAt(k);
        if (c >= 0x41 && c <= 0x5a) {
           // convert to lowercase
          c += 0x20;
        }
        builder.append((char)c);
      }
      if (lastHyphen >= index) {
        index = lastHyphen + 1;
      }
      i = 0;
      int n = 128;
      int bias = 72;
      int stringLength = builder.length();
      char[] chararr = new char[2];
      while (index < endIndex) {
        int old = index;
        int w = 1;
        int k = 36;
        while (true) {
          if (index >= endIndex) {
            return null;
          }
          char c = str.charAt(index);
          ++index;
          if (c >= 0x80) {
            return null;
          }
          int digit = valueDigitValues[(int)c];
          if (digit < 0) {
            return null;
          }
          if (digit > Integer.MAX_VALUE / w) {
            return null;
          }
          int temp = digit * w;
          if (i > Integer.MAX_VALUE - temp) {
            return null;
          }
          i += temp;
          int t = k - bias;
          if (k <= bias) {
            t = 1;
          } else if (k >= bias + 26) {
            t = 26;
          }
          if (digit < t) {
            break;
          }
          temp = 36 - t;
          if (w > Integer.MAX_VALUE / temp) {
            return null;
          }
          w *= temp;
          k += 36;
        }
        int futureLength = stringLength + 1;
        int delta = (old == 0) ? (i - old) / 700 : (i - old) >> 1;
        delta += delta / futureLength;
        k = 0;
        while (delta > 455) {
          delta /= 35;
          k += 36;
        }
        bias = k + ((36 * delta) / (delta + 38));
        int idiv;
        idiv = i / futureLength;
        if (n > Integer.MAX_VALUE - idiv) {
          return null;
        }
        n += idiv;
        i %= futureLength;
        if (n <= 0xffff) {
          chararr[0] = (char)n;
          builder.insert(i, chararr, 0, 1);
        } else if (n <= 0x10ffff) {
          chararr[0] = (char)((((n - 0x10000) >> 10) & 0x3ff) + 0xd800);
          chararr[1] = (char)(((n - 0x10000) & 0x3ff) + 0xdc00);
          builder.insert(i, chararr, 0, 2);
        } else {
          return null;
        }
        ++stringLength;
        ++i;
      }
      return builder.toString();
    }

private static final String PunycodeAlphabet =
      "abcdefghijklmnopqrstuvwxyz0123456789";

    static String PunycodeEncode(String str) {
      return PunycodeEncodePortion(str, 0, str.length());
    }

    static String PunycodeEncodePortion(
String str,
int index,
int endIndex) {
      if (str == null) {
        throw new NullPointerException("str");
      }
      if (index < 0) {
      throw new IllegalArgumentException("index (" + index + ") is less than " +
          "0");
      }
      if (index > str.length()) {
        throw new IllegalArgumentException("index (" + index + ") is more than " +
          str.length());
      }
      if (endIndex < 0) {
throw new IllegalArgumentException("endIndex (" + endIndex + ") is less than " +
          "0");
      }
      if (endIndex > str.length()) {
        throw new IllegalArgumentException("endIndex (" + endIndex +
          ") is more than " + str.length());
      }
      if (endIndex < index) {
        throw new IllegalArgumentException("endIndex (" + endIndex +
          ") is less than " + index);
      }
      int n = 128;
      int delta = 0;
      int bias = 72;
      int h = 0;
      int tmpIndex;
      int firstIndex = -1;
      int codePointLength = 0;
      int basicsBeforeFirstNonbasic = 0;
      boolean allBasics = true;
      tmpIndex = index;
      while (tmpIndex < endIndex) {
        if (str.charAt(tmpIndex) >= 0x80) {
          allBasics = false;
          break;
        }
        if (str.charAt(tmpIndex) >= 0x41 && str.charAt(tmpIndex) <= 0x5a) {
          // Treat as having a non-basic in case of an
          // upper-case ASCII character, since special
          // handling is required here
          allBasics = false;
          break;
        }
        ++tmpIndex;
      }
      if (allBasics) {
        return str.substring(index, (index)+(endIndex - index));
      }
      StringBuilder builder = new StringBuilder();
      builder.append("xn--");
      tmpIndex = index;
      while (tmpIndex < endIndex) {
        int c = Idna.CodePointAt(str, tmpIndex);
        ++codePointLength;
        if (c >= 0x41 && c <= 0x5a) {
          // This is an uppercase ASCII character,
          // convert to lowercase
          builder.append((char)(c + 0x20));
          ++h;
        } else if (c < 0x80) {
          // This is a basic (ASCII) code point
          builder.append((char)c);
          ++h;
        } else if (firstIndex < 0) {
          firstIndex = tmpIndex;
        }
        if (c >= 0x10000) {
          ++tmpIndex;
        }
        ++tmpIndex;
      }
      int b = h;
      if (firstIndex >= 0) {
        basicsBeforeFirstNonbasic = firstIndex - index;
      } else {
        // No non-basic code points
        // (NOTE: Not encoded with "-" at end)
        return builder.toString();
      }
      if (h != 0) {
        builder.append('-');
      }
      while (h < codePointLength) {
        int min = 0x110000;
        tmpIndex = firstIndex;
        while (tmpIndex < endIndex) {
          int c = Idna.CodePointAt(str, tmpIndex);
          if (c >= n && c < min) {
            min = c;
          }
          if (c >= 0x10000) {
            ++tmpIndex;
          }
          ++tmpIndex;
        }
        int d = min - n;
        if (d > Integer.MAX_VALUE / (h + 1)) {
          return null;
        }
        d *= h + 1;
        n = min;
        if (d > Integer.MAX_VALUE - delta) {
          return null;
        }
        delta += d;
        tmpIndex = firstIndex;
        if (basicsBeforeFirstNonbasic > Integer.MAX_VALUE - delta) {
          return null;
        }
        delta += basicsBeforeFirstNonbasic;
        while (tmpIndex < endIndex) {
          int c = Idna.CodePointAt(str, tmpIndex);
          if (c >= 0x10000) {
            ++tmpIndex;
          }
          ++tmpIndex;
          if (c < n) {
            if (delta == Integer.MAX_VALUE) {
              return null;
            }
            ++delta;
          } else if (c == n) {
            int q = delta;
            int k = 36;
            while (true) {
              int t;
              t = (k <= bias) ? 1 : ((k >= bias + 26) ? 26 : (k - bias));
              if (q < t) {
                break;
              }
              int digit = t + ((q - t) % (36 - t));
              builder.append(PunycodeAlphabet.charAt(digit));
              q -= t;
              q /= 36 - t;
              k += 36;
            }
            builder.append(PunycodeAlphabet.charAt(q));
            delta = (h == b) ? delta / 700 : delta >> 1;
            delta += delta / (h + 1);
            k = 0;
            while (delta > 455) {
              delta /= 35;
              k += 36;
            }
            bias = k + ((36 * delta) / (delta + 38));
            delta = 0;
            ++h;
          }
        }
        ++n;
        ++delta;
      }
      return builder.toString();
    }
  }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy