tech.mlsql.tool.ZOrderingBytesUtil Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of mlsql-sql-profiler-2.4_2.11 Show documentation
The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 
 * http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package tech.mlsql.tool;

import sun.misc.Unsafe;

import java.nio.charset.Charset;

/**
 * 所有列都表示为为8byte
 * 我们限制z-ordering最大支持1024byte, 这意味着一个z-ordering 索引最多1024/8=128个字段。
 */
public class ZOrderingBytesUtil {
    static final Unsafe theUnsafe;
    public static final int SIZEOF_LONG = Long.SIZE / Byte.SIZE;

    static {
        theUnsafe = UnsafeAccess.theUnsafe;

        // sanity check - this should never fail
        if (theUnsafe.arrayIndexScale(byte[].class) != 1) {
            throw new AssertionError();
        }
    }

    public static byte[] toBytes(int val) {
        byte[] b = new byte[4];
        for (int i = 3; i > 0; i--) {
            b[i] = (byte) val;
            val >>>= 8;
        }
        b[0] = (byte) val;
        return b;
    }

    public static byte[] toBytes(long val) {
        long temp = val;
        // 还原原码
//        if(val <0){
//            temp = (~(val -1))^(1L<<63);
//        }
        byte[] b = new byte[8];
        for (int i = 7; i > 0; i--) {
            b[i] = (byte) temp;
            temp >>>= 8;
        }
        b[0] = (byte) temp;
        return b;
    }


    //考虑负数，如果是负数，还原成原码表示，然后直接将第一位翻转，最后padding 成8字节
    // 正数，第一位翻转，然后Padding成8字节
    public static byte[] intTo8Byte(int a) {
        int temp = a;
        if (a < 0) {
            temp = (~(a - 1))^ (1 << 31);
        }
        temp = temp ^ (1 << 31);
        return paddingTo8Byte(toBytes(temp));
    }

    //考虑负数，如果是负数，还原成原码表示，然后直接将第一位翻转，最后padding 成8字节
    // 正数，第一位翻转，然后Padding成8字节
    public static byte[] longTo8Byte(long a) {
        long temp = a;
        if (a < 0) {
            temp = (~(a - 1))^ (1L << 63);
        }
        temp = temp ^ (1L << 63);
        return toBytes(temp);
    }

    public static byte[] toBytes(final double d) {
        // Encode it as a long
        return toBytes(Double.doubleToRawLongBits(d));
    }

    /**
     * 1.先得到byte[]表示。
     * 2.如果是正数，翻转第一个bit
     * 3.如果是负数，翻转所有的bit
     * 此时可以自然排序
     */
    public static byte[] doubleTo8Byte(double a) {
        
        byte[] temp = toBytes(a);
        if (a > 0) {
            temp[0] = (byte) (temp[0] ^ (1 << 7));
        }
        if (a < 0) {
            for (int i = 0; i < temp.length; i++) {
                temp[i] = (byte) ~temp[i];
            }
        }
        return temp;
    }

    public static byte[] utf8To8Byte(String a) {
        /**
         * if is null, treat like empty string.
         */
        if(a==null){
            return paddingTo8Byte("".getBytes(Charset.forName("utf-8")));
        }
        return paddingTo8Byte(a.getBytes(Charset.forName("utf-8")));
    }

    /**
     * buffer1,buffer2 必须是8字节，最后输出是16字节
     * buffer2的值在奇数位
     */
    public static byte[] interleave8Byte(byte[] buffer1, byte[] buffer2) {
        byte[] result = new byte[16];
        int j = 0;
        for (int i = 0; i < 8; i++) {
            byte[] temp = interleaveByte(buffer1[i], buffer2[i]);
            result[j] = temp[0];
            result[++j] = temp[1];
            j++;
        }
        return result;
    }


    //用b 的bpos bit 位，设置a的 apos bit位
    public static byte updatePos(byte a, int apos, byte b, int bpos) {
        //将bpos以外的都设置为0
        byte temp = (byte) (b & (1 << (7 - bpos)));
        //把temp bpos位置的值移动到apos

        //小于的话，左移
        if (apos < bpos) {
            temp = (byte) (temp << (bpos - apos));
        }
        //大于，右边移动
        if (apos > bpos) {
            temp = (byte) (temp >> (apos - bpos));
        }
        //把apos以外的都设置为0
        byte atemp = (byte) (a & (1 << (7 - apos)));
        if ((byte) (atemp ^ temp) == 0) {
            return a;
        }
        return (byte) (a ^ (1 << (7 - apos)));
    }

    //每个属性用8byte表示。但是属性数目不确定。
    public static byte[] interleaveMulti8Byte(byte[][] buffer) {
        int attributesNum = buffer.length;
        byte[] result = new byte[8 * attributesNum];

        //结果的第几个byte的第几个位置
        int resBitPos = 0;

        //每个属性总的bit数
        int totalBits = 64;
        //第一层循环移动bit
        for (int bitStep = 0; bitStep < totalBits; bitStep++) {
            //首先获取当前属性在第几个byte(总共八个)
            int tempBytePos = (int) Math.floor(bitStep / 8);
            //获取bitStep在对应属性的byte位的第几个位置
            int tempBitPos = bitStep % 8;

            //获取每个属性的bitStep位置的值
            for (int i = 0; i < attributesNum; i++) {
                int tempResBytePos = (int) Math.floor(resBitPos / 8);
                int tempResBitPos = resBitPos % 8;
                result[tempResBytePos] = updatePos(result[tempResBytePos], tempResBitPos, buffer[i][tempBytePos], tempBitPos);
                //结果bit要不断累加
                resBitPos++;
            }
        }


        return result;
    }

    /**
     * x在奇数位,y在偶数位
     */
    public static byte[] interleaveByte(byte x, byte y) {
        long z = ((y * 0x0101010101010101L & 0x8040201008040201L) *
                0x0102040810204081L >> 49) & 0x5555 |
                ((x * 0x0101010101010101L & 0x8040201008040201L) *
                        0x0102040810204081L >> 48) & 0xAAAA;
        byte[] eightBytes = toBytes(z);
        return new byte[]{eightBytes[6], eightBytes[7]};
    }

    public static String toString(final byte[] b) {
        StringBuilder sb = new StringBuilder();
        for (byte temp : b) {
            sb.append(String.format("%8s", Integer.toBinaryString(temp & 0xFF)).replace(' ', '0') + " ");
        }
        return sb.toString();
    }

    //来自HBase的代码
    public static int compareTo(byte[] buffer1, int offset1, int length1,
                                byte[] buffer2, int offset2, int length2) {

        // Short circuit equal case
        if (buffer1 == buffer2 &&
                offset1 == offset2 &&
                length1 == length2) {
            return 0;
        }
        final int stride = 8;
        final int minLength = Math.min(length1, length2);
        int strideLimit = minLength & ~(stride - 1);
        final long offset1Adj = offset1 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
        final long offset2Adj = offset2 + UnsafeAccess.BYTE_ARRAY_BASE_OFFSET;
        int i;

        /*
         * Compare 8 bytes at a time. Benchmarking on x86 shows a stride of 8 bytes is no slower
         * than 4 bytes even on 32-bit. On the other hand, it is substantially faster on 64-bit.
         */
        for (i = 0; i < strideLimit; i += stride) {
            long lw = theUnsafe.getLong(buffer1, offset1Adj + i);
            long rw = theUnsafe.getLong(buffer2, offset2Adj + i);
            if (lw != rw) {
                if (!UnsafeAccess.LITTLE_ENDIAN) {
                    return ((lw + Long.MIN_VALUE) < (rw + Long.MIN_VALUE)) ? -1 : 1;
                }

                /*
                 * We want to compare only the first index where left[index] != right[index]. This
                 * corresponds to the least significant nonzero byte in lw ^ rw, since lw and rw are
                 * little-endian. Long.numberOfTrailingZeros(diff) tells us the least significant
                 * nonzero bit, and zeroing out the first three bits of L.nTZ gives us the shift to get
                 * that least significant nonzero byte. This comparison logic is based on UnsignedBytes
                 * comparator from guava v21
                 */
                int n = Long.numberOfTrailingZeros(lw ^ rw) & ~0x7;
                return ((int) ((lw >>> n) & 0xFF)) - ((int) ((rw >>> n) & 0xFF));
            }
        }

        // The epilogue to cover the last (minLength % stride) elements.
        for (; i < minLength; i++) {
            int a = (buffer1[offset1 + i] & 0xFF);
            int b = (buffer2[offset2 + i] & 0xFF);
            if (a != b) {
                return a - b;
            }
        }
        return length1 - length2;
    }

    private static byte[] arrayConcat(byte[]... arrays) {
        int length = 0;
        for (byte[] array : arrays) {
            length += array.length;
        }
        byte[] result = new byte[length];
        int pos = 0;
        for (byte[] array : arrays) {
            System.arraycopy(array, 0, result, pos, array.length);
            pos += array.length;
        }
        return result;
    }

    private static byte[] paddingTo8Byte(byte[] a) {
        if (a.length == 8) return a;
        if (a.length > 8) {
            byte[] result = new byte[8];
            System.arraycopy(a, 0, result, 0, 8);
            return result;
        }
        int paddingSize = 8 - a.length;
        byte[] result = new byte[paddingSize];
        for (int i = 0; i < paddingSize; i++) {
            result[i] = 0;
        }
        return arrayConcat(result, a);
    }
}