All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.hive.formats.DistinctMapKeys Maven / Gradle / Ivy

There is a newer version: 458
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.hive.formats;

import io.trino.spi.TrinoException;
import io.trino.spi.block.Block;
import io.trino.spi.type.MapType;

import java.util.Arrays;

import static io.trino.spi.StandardErrorCode.NOT_SUPPORTED;

public class DistinctMapKeys
{
    private static final int HASH_MULTIPLIER = 2;

    private final MapType mapType;
    private final boolean userLastEntry;

    private boolean[] distinctBuffer = new boolean[0];
    private int[] hashTableBuffer = new int[0];

    public DistinctMapKeys(MapType mapType, boolean userLastEntry)
    {
        this.mapType = mapType;
        this.userLastEntry = userLastEntry;
    }

    /**
     * Determines which keys are distinct and thus should be copied through to the final results.
     *
     * @throws TrinoException if there is an indeterminate key
     */
    // NOTE: this is a fork of the logic in MapHashTables
    public boolean[] selectDistinctKeys(Block keyBlock)
    {
        int keyCount = keyBlock.getPositionCount();
        int hashTableSize = keyCount * HASH_MULTIPLIER;

        if (distinctBuffer.length < keyCount) {
            distinctBuffer = new boolean[calculateBufferSize(keyCount)];
        }

        if (hashTableBuffer.length < hashTableSize) {
            hashTableBuffer = new int[calculateBufferSize(hashTableSize)];
        }
        boolean[] distinct = distinctBuffer;
        Arrays.fill(distinct, false);
        int[] hashTable = hashTableBuffer;
        Arrays.fill(hashTable, -1);

        for (int i = 0; i < keyCount; i++) {
            // Nulls are not marked as distinct and thus are ignored
            if (keyBlock.isNull(i)) {
                continue;
            }
            int hash = getHashPosition(keyBlock, i, hashTableSize);
            while (true) {
                if (hashTable[hash] == -1) {
                    hashTable[hash] = i;
                    distinct[i] = true;
                    break;
                }

                Boolean isDuplicateKey;
                try {
                    // assuming maps with indeterminate keys are not supported
                    isDuplicateKey = (Boolean) mapType.getKeyBlockEqual().invokeExact(keyBlock, i, keyBlock, hashTable[hash]);
                }
                catch (RuntimeException e) {
                    throw e;
                }
                catch (Throwable throwable) {
                    throw new RuntimeException(throwable);
                }

                // this will only happen if there is an indeterminate key
                if (isDuplicateKey == null) {
                    throw new TrinoException(NOT_SUPPORTED, "map key cannot be null or contain nulls");
                }

                // duplicate keys are ignored
                if (isDuplicateKey) {
                    if (userLastEntry) {
                        int duplicateIndex = hashTable[hash];
                        distinct[duplicateIndex] = false;

                        hashTable[hash] = i;
                        distinct[i] = true;
                    }
                    break;
                }

                hash++;
                if (hash == hashTableSize) {
                    hash = 0;
                }
            }
        }
        return distinct;
    }

    private int getHashPosition(Block keyBlock, int position, int hashTableSize)
    {
        if (keyBlock.isNull(position)) {
            throw new IllegalArgumentException("map keys cannot be null");
        }

        long hashCode;
        try {
            hashCode = (long) mapType.getKeyBlockHashCode().invokeExact(keyBlock, position);
        }
        catch (RuntimeException e) {
            throw e;
        }
        catch (Throwable throwable) {
            throw new RuntimeException(throwable);
        }

        return computePosition(hashCode, hashTableSize);
    }

    // This function reduces the 64 bit hashcode to [0, hashTableSize) uniformly. It first reduces the hashcode to 32-bit
    // integer x then normalize it to x / 2^32 * hashSize to reduce the range of x from [0, 2^32) to [0, hashTableSize)
    private static int computePosition(long hashcode, int hashTableSize)
    {
        return (int) ((Integer.toUnsignedLong(Long.hashCode(hashcode)) * hashTableSize) >> 32);
    }

    private static int calculateBufferSize(int value)
    {
        if (value < 128) {
            return 128;
        }

        int highestOneBit = Integer.highestOneBit(value);
        if (value == highestOneBit) {
            return value;
        }
        return highestOneBit << 1;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy