org.apache.cassandra.dht.tokenallocator.NoReplicationTokenAllocator Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of cassandra-all Show documentation
The Apache Cassandra Project develops a highly scalable second-generation distributed database, bringing together Dynamo's fully distributed design and Bigtable's ColumnFamily-based data model.
There is a newer version: 5.0-rc2
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.cassandra.dht.tokenallocator;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NavigableMap;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;

import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Queues;

import org.apache.cassandra.dht.IPartitioner;
import org.apache.cassandra.dht.Token;

public class NoReplicationTokenAllocator extends TokenAllocatorBase
{
    PriorityQueue> sortedUnits = Queues.newPriorityQueue();
    Map>> tokensInUnits = Maps.newHashMap();

    private static final double MAX_TAKEOVER_RATIO = 0.90;
    private static final double MIN_TAKEOVER_RATIO = 1.0 - MAX_TAKEOVER_RATIO;

    public NoReplicationTokenAllocator(NavigableMap sortedTokens,
                                       ReplicationStrategy strategy,
                                       IPartitioner partitioner)
    {
        super(sortedTokens, strategy, partitioner);
    }

    /**
     * Construct the token ring as a CircularList of TokenInfo,
     * and populate the ownership of the UnitInfo's provided
     */
    private TokenInfo createTokenInfos(Map> units)
    {
        if (units.isEmpty())
            return null;

        // build the circular list
        TokenInfo prev = null;
        TokenInfo first = null;
        for (Map.Entry en : sortedTokens.entrySet())
        {
            Token t = en.getKey();
            UnitInfo ni = units.get(en.getValue());
            TokenInfo ti = new TokenInfo<>(t, ni);
            first = ti.insertAfter(first, prev);
            prev = ti;
        }

        TokenInfo curr = first;
        tokensInUnits.clear();
        sortedUnits.clear();
        do
        {
            populateTokenInfoAndAdjustUnit(curr);
            curr = curr.next;
        } while (curr != first);

        for (UnitInfo unitInfo : units.values())
        {
            sortedUnits.add(new Weighted(unitInfo.ownership, unitInfo));
        }

        return first;
    }

    /**
     * Used in tests.
     */
    protected void createTokenInfos()
    {
        createTokenInfos(createUnitInfos(Maps.newHashMap()));
    }

    private void populateTokenInfoAndAdjustUnit(TokenInfo token)
    {
        token.replicationStart = token.prevInRing().token;
        token.replicationThreshold = token.token;
        token.replicatedOwnership = token.replicationStart.size(token.token);
        token.owningUnit.ownership += token.replicatedOwnership;

        PriorityQueue> unitTokens = tokensInUnits.get(token.owningUnit.unit);
        if (unitTokens == null)
        {
            unitTokens = Queues.newPriorityQueue();
            tokensInUnits.put(token.owningUnit.unit, unitTokens);
        }
        unitTokens.add(new Weighted(token.replicatedOwnership, token));
    }

    private Collection generateRandomTokens(UnitInfo newUnit, int numTokens, Map> unitInfos)
    {
        Set tokens = new HashSet<>(numTokens);
        while (tokens.size() < numTokens)
        {
            Token token = partitioner.getRandomToken();
            if (!sortedTokens.containsKey(token))
            {
                tokens.add(token);
                sortedTokens.put(token, newUnit.unit);
            }
        }
        unitInfos.put(newUnit.unit, newUnit);
        createTokenInfos(unitInfos);
        return tokens;
    }

    public Collection addUnit(Unit newUnit, int numTokens)
    {
        assert !tokensInUnits.containsKey(newUnit);

        Map groups = Maps.newHashMap();
        UnitInfo newUnitInfo = new UnitInfo<>(newUnit, 0, groups, strategy);
        Map> unitInfos = createUnitInfos(groups);

        if (unitInfos.isEmpty())
            return generateRandomTokens(newUnitInfo, numTokens, unitInfos);

        if (numTokens > sortedTokens.size())
            return generateRandomTokens(newUnitInfo, numTokens, unitInfos);

        TokenInfo head = createTokenInfos(unitInfos);

        // Select the nodes we will work with, extract them from sortedUnits and calculate targetAverage
        double targetAverage = 0.0;
        double sum = 0.0;
        List> unitsToChange = new ArrayList<>();

        for (int i = 0; i < numTokens; i++)
        {
            Weighted unit = sortedUnits.peek();

            if (unit == null)
                break;

            sum += unit.weight;
            double average = sum / (unitsToChange.size() + 2); // unit and newUnit must be counted
            if (unit.weight <= average)
                // No point to include later nodes, target can only decrease from here.
                break;

            sortedUnits.remove();
            unitsToChange.add(unit);
            targetAverage = average;
        }

        List newTokens = Lists.newArrayListWithCapacity(numTokens);

        int nr = 0;
        // calculate the tokens
        for (Weighted unit : unitsToChange)
        {
            // TODO: Any better ways to assign how many tokens to change in each node?
            int tokensToChange = numTokens / unitsToChange.size() + (nr < numTokens % unitsToChange.size() ? 1 : 0);

            Queue> unitTokens = tokensInUnits.get(unit.value.unit);
            List> tokens = Lists.newArrayListWithCapacity(tokensToChange);

            double workWeight = 0;
            // Extract biggest vnodes and calculate how much weight we can work with.
            for (int i = 0; i < tokensToChange; i++)
            {
                Weighted wt = unitTokens.remove();
                tokens.add(wt);
                workWeight += wt.weight;
                unit.value.ownership -= wt.weight;
            }

            double toTakeOver = unit.weight - targetAverage;
            // Split toTakeOver proportionally between the vnodes.
            for (Weighted wt : tokens)
            {
                double slice;
                Token token;

                if (toTakeOver < workWeight)
                {
                    // Spread decrease.
                    slice = toTakeOver / workWeight;

                    if (slice < MIN_TAKEOVER_RATIO)
                        slice = MIN_TAKEOVER_RATIO;
                    if (slice > MAX_TAKEOVER_RATIO)
                        slice = MAX_TAKEOVER_RATIO;
                }
                else
                {
                    slice = MAX_TAKEOVER_RATIO;
                }
                token = partitioner.split(wt.value.prevInRing().token, wt.value.token, slice);

                //Token selected, now change all data
                sortedTokens.put(token, newUnit);

                TokenInfo ti = new TokenInfo<>(token, newUnitInfo);

                ti.insertAfter(head, wt.value.prevInRing());

                populateTokenInfoAndAdjustUnit(ti);
                populateTokenInfoAndAdjustUnit(wt.value);
                newTokens.add(token);
            }

            // adjust the weight for current unit
            sortedUnits.add(new Weighted<>(unit.value.ownership, unit.value));
            ++nr;
        }
        sortedUnits.add(new Weighted<>(newUnitInfo.ownership, newUnitInfo));

        return newTokens;
    }

    /**
     * For testing, remove the given unit preserving correct state of the allocator.
     */
    void removeUnit(Unit n)
    {
        Iterator> it = sortedUnits.iterator();
        while (it.hasNext())
        {
            if (it.next().value.unit.equals(n))
            {
                it.remove();
                break;
            }
        }

        PriorityQueue> tokenInfos = tokensInUnits.remove(n);
        Collection tokens = Lists.newArrayListWithCapacity(tokenInfos.size());
        for (Weighted tokenInfo : tokenInfos)
        {
            tokens.add(tokenInfo.value.token);
        }
        sortedTokens.keySet().removeAll(tokens);
    }

    public int getReplicas()
    {
        return 1;
    }
}