All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jdk.internal.icu.impl.Punycode Maven / Gradle / Ivy

There is a newer version: 17.alpha.0.57
Show newest version
/*
 * Copyright (c) 2005, 2020, Oracle and/or its affiliates. All rights reserved.
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
 *
 * This code is free software; you can redistribute it and/or modify it
 * under the terms of the GNU General Public License version 2 only, as
 * published by the Free Software Foundation.  Oracle designates this
 * particular file as subject to the "Classpath" exception as provided
 * by Oracle in the LICENSE file that accompanied this code.
 *
 * This code is distributed in the hope that it will be useful, but WITHOUT
 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 * version 2 for more details (a copy is included in the LICENSE file that
 * accompanied this code).
 *
 * You should have received a copy of the GNU General Public License version
 * 2 along with this work; if not, write to the Free Software Foundation,
 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
 *
 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
 * or visit www.oracle.com if you need additional information or have any
 * questions.
 */
/*
 *******************************************************************************
 * Copyright (C) 2003-2004, International Business Machines Corporation and    *
 * others. All Rights Reserved.                                                *
 *******************************************************************************
 */
//
// CHANGELOG
//      2005-05-19 Edward Wang
//          - copy this file from icu4jsrc_3_2/src/com/ibm/icu/text/Punycode.java
//          - move from package com.ibm.icu.text to package sun.net.idn
//          - use ParseException instead of StringPrepParseException
//      2007-08-14 Martin Buchholz
//          - remove redundant casts
//
package jdk.internal.icu.impl;

import java.text.ParseException;
import jdk.internal.icu.lang.UCharacter;
import jdk.internal.icu.text.UTF16;

/**
 * Ported code from ICU punycode.c
 * @author ram
 */

/* Package Private class */
public final class Punycode {

    /* Punycode parameters for Bootstring */
    private static final int BASE           = 36;
    private static final int TMIN           = 1;
    private static final int TMAX           = 26;
    private static final int SKEW           = 38;
    private static final int DAMP           = 700;
    private static final int INITIAL_BIAS   = 72;
    private static final int INITIAL_N      = 0x80;

    /* "Basic" Unicode/ASCII code points */
    private static final int HYPHEN         = 0x2d;
    private static final int DELIMITER      = HYPHEN;

    private static final int ZERO           = 0x30;
    private static final int NINE           = 0x39;

    private static final int SMALL_A        = 0x61;
    private static final int SMALL_Z        = 0x7a;

    private static final int CAPITAL_A      = 0x41;
    private static final int CAPITAL_Z      = 0x5a;

    //  TODO: eliminate the 256 limitation
    private static final int MAX_CP_COUNT   = 256;

    private static final int UINT_MAGIC     = 0x80000000;
    private static final long ULONG_MAGIC   = 0x8000000000000000L;

    private static int adaptBias(int delta, int length, boolean firstTime){
        if(firstTime){
            delta /=DAMP;
        }else{
            delta /=  2;
        }
        delta += delta/length;

        int count=0;
        for(; delta>((BASE-TMIN)*TMAX)/2; count+=BASE) {
            delta/=(BASE-TMIN);
        }

        return count+(((BASE-TMIN+1)*delta)/(delta+SKEW));
    }

    /**
     * basicToDigit[] contains the numeric value of a basic code
     * point (for use in representing integers) in the range 0 to
     * BASE-1, or -1 if b is does not represent a value.
     */
    static final int[]    basicToDigit= new int[]{
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        26, 27, 28, 29, 30, 31, 32, 33, 34, 35, -1, -1, -1, -1, -1, -1,

        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,

        -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14,
        15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, -1, -1, -1, -1, -1,

        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,

        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
        -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1
    };

    private static char asciiCaseMap(char b, boolean uppercase) {
        if(uppercase) {
            if(SMALL_A<=b && b<=SMALL_Z) {
                b-=(SMALL_A-CAPITAL_A);
            }
        } else {
            if(CAPITAL_A<=b && b<=CAPITAL_Z) {
                b+=(SMALL_A-CAPITAL_A);
            }
        }
        return b;
    }

    /**
     * digitToBasic() returns the basic code point whose value
     * (when used for representing integers) is d, which must be in the
     * range 0 to BASE-1. The lowercase form is used unless the uppercase flag is
     * nonzero, in which case the uppercase form is used.
     */
    private static char digitToBasic(int digit, boolean uppercase) {
        /*  0..25 map to ASCII a..z or A..Z */
        /* 26..35 map to ASCII 0..9         */
        if(digit<26) {
            if(uppercase) {
                return (char)(CAPITAL_A+digit);
            } else {
                return (char)(SMALL_A+digit);
            }
        } else {
            return (char)((ZERO-26)+digit);
        }
    }
    /**
     * Converts Unicode to Punycode.
     * The input string must not contain single, unpaired surrogates.
     * The output will be represented as an array of ASCII code points.
     *
     * @param src
     * @param caseFlags
     * @return
     * @throws ParseException
     */
    public static StringBuffer encode(StringBuffer src, boolean[] caseFlags) throws ParseException{

        int[] cpBuffer = new int[MAX_CP_COUNT];
        int n, delta, handledCPCount, basicLength, destLength, bias, j, m, q, k, t, srcCPCount;
        char c, c2;
        int srcLength = src.length();
        int destCapacity = MAX_CP_COUNT;
        char[] dest = new char[destCapacity];
        StringBuffer result = new StringBuffer();
        /*
         * Handle the basic code points and
         * convert extended ones to UTF-32 in cpBuffer (caseFlag in sign bit):
         */
        srcCPCount=destLength=0;

        for(j=0; j0) {
            if(destLength state to , but guard against overflow:
             */
            if(m-n>(0x7fffffff-MAX_CP_COUNT-delta)/(handledCPCount+1)) {
                throw new RuntimeException("Internal program error");
            }
            delta+=(m-n)*(handledCPCount+1);
            n=m;

            /* Encode a sequence of same code points n */
            for(j=0; jTMAX) {
                            t=TMAX;
                        }
                        */

                        t=k-bias;
                        if(t=(bias+TMAX)) {
                            t=TMAX;
                        }

                        if(q0;) {
            if(src.charAt(--j)==DELIMITER) {
                break;
            }
        }
        destLength=basicLength=destCPCount=j;

        while(j>0) {
            b=src.charAt(--j);
            if(!isBasic(b)) {
                throw new ParseException("Illegal char found", -1);
            }

            if(j0 ? basicLength+1 : 0; in=srcLength) {
                    throw new ParseException("Illegal char found", -1);
                }

                digit=basicToDigit[(byte)src.charAt(in++)];
                if(digit<0) {
                    throw new ParseException("Invalid char found", -1);
                }
                if(digit>(0x7fffffff-i)/w) {
                    /* integer overflow */
                    throw new ParseException("Illegal char found", -1);
                }

                i+=digit*w;
                t=k-bias;
                if(t=(bias+TMAX)) {
                    t=TMAX;
                }
                if(digit0x7fffffff/(BASE-t)) {
                    /* integer overflow */
                    throw new ParseException("Illegal char found", -1);
                }
                w*=BASE-t;
            }

            /*
             * Modification from sample code:
             * Increments destCPCount here,
             * where needed instead of in for() loop tail.
             */
            ++destCPCount;
            bias=adaptBias(i-oldi, destCPCount, (oldi==0));

            /*
             * i was supposed to wrap around from (incremented) destCPCount to 0,
             * incrementing n each time, so we'll fix that now:
             */
            if(i/destCPCount>(0x7fffffff-n)) {
                /* integer overflow */
                throw new ParseException("Illegal char found", -1);
            }

            n+=i/destCPCount;
            i%=destCPCount;
            /* not needed for Punycode: */
            /* if (decode_digit(n) <= BASE) return punycode_invalid_input; */

            if(n>0x10ffff || isSurrogate(n)) {
                /* Unicode code point overflow */
                throw new ParseException("Illegal char found", -1);
            }

            /* Insert n at position i of the output: */
            cpLength=UTF16.getCharCount(n);
            if((destLength+cpLength)1) {
                        firstSupplementaryIndex=codeUnitIndex;
                    } else {
                        ++firstSupplementaryIndex;
                    }
                } else {
                    codeUnitIndex=firstSupplementaryIndex;
                    codeUnitIndex=UTF16.moveCodePointOffset(dest, 0, destLength, codeUnitIndex, i-codeUnitIndex);
                }

                /* use the UChar index codeUnitIndex instead of the code point index i */
                if(codeUnitIndex




© 2015 - 2025 Weber Informatics LLC | Privacy Policy