All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.mapreduce.utilities.LegacyAssignmentPolicy Maven / Gradle / Ivy

There is a newer version: 11.3.1
Show newest version
/*
 * Copyright (c) 2020 MarkLogic Corporation
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.marklogic.mapreduce.utilities;

import java.math.BigInteger;
import java.text.Normalizer;
import java.util.LinkedHashSet;

import com.marklogic.mapreduce.DocumentURI;

/**
 * Legacy Assignment Policy for fastload
 */
public class LegacyAssignmentPolicy extends AssignmentPolicy {
    private static final long HASH64_STEP = 15485863;
    private static final long HASH64_SEED = 0x39a51471f80aabf7l;
    private static final BigInteger URI_KEY_HASH = hash64("uri()");

    protected String[] forests;

    public LegacyAssignmentPolicy() {
    }

    public LegacyAssignmentPolicy(LinkedHashSet uForests) {
        this.uForests = uForests;
        forests = uForests.toArray(new String[uForests.size()]);
        policy = Kind.LEGACY;
    }

    public static int getPlacementId(DocumentURI uri, int size) {
        switch (size) {
        case 0:
            throw new IllegalArgumentException("getPlacementId(size = 0)");
        case 1:
            return 0;
        default:
            String nk = normalize(uri.getUri());
            BigInteger uriKey = getUriKey(nk);
            long u = uriKey.longValue();
            for (int i = 8; i <= 56; i += 8) {
                u += rotl(uriKey, i);
            }
            long v = (0xffff + size) / size;
            return (int) ((u & 0xffff) / v);
        }
    }

    protected static String normalize(String uri) {
        return Normalizer.normalize(uri, Normalizer.Form.NFC);
    }

    public static BigInteger getUriKey(String uri) {
        BigInteger value = hash64(uri).multiply(BigInteger.valueOf(5)).add(
            URI_KEY_HASH);
        byte[] valueBytes = value.toByteArray();
        byte[] longBytes = new byte[8];
        System.arraycopy(valueBytes, valueBytes.length - longBytes.length,
            longBytes, 0, longBytes.length);
        BigInteger key = new BigInteger(1, longBytes);
        return key;
    }

    private static BigInteger hash64(String str) {
        BigInteger value = BigInteger.valueOf(HASH64_SEED);

        for (int cp, i = 0; i < str.length(); i += Character.charCount(cp)) {
            cp = str.codePointAt(i);
            value = value.add(BigInteger.valueOf(cp)).multiply(
                BigInteger.valueOf(HASH64_STEP));
        }
        byte[] valueBytes = value.toByteArray();
        byte[] longBytes = new byte[8];
        System.arraycopy(valueBytes, valueBytes.length - longBytes.length,
            longBytes, 0, longBytes.length);
        BigInteger hash = new BigInteger(1, longBytes);
        return hash;
    }

    public static long rotl(BigInteger value, int shift) {
        return value.shiftLeft(shift).xor(value.shiftRight(64 - shift))
            .longValue();
    }

    @Override
    public int getPlacementForestIndex(DocumentURI uri) {
        return getPlacementId(uri, forests.length);
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy