All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.aerospike.client.cluster.Partition Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2012-2023 Aerospike, Inc.
 *
 * Portions may be licensed to Aerospike, Inc. under one or more contributor
 * license agreements WHICH ARE COMPATIBLE WITH THE APACHE LICENSE, VERSION 2.0.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */
package com.aerospike.client.cluster;

import java.util.HashMap;
import java.util.concurrent.atomic.AtomicReferenceArray;

import com.aerospike.client.AerospikeException;
import com.aerospike.client.Key;
import com.aerospike.client.command.Buffer;
import com.aerospike.client.policy.Policy;
import com.aerospike.client.policy.Replica;
import com.aerospike.client.query.PartitionStatus;

public final class Partition {

	public static Partition write(Cluster cluster, Policy policy, Key key) {
		// Must copy hashmap reference for copy on write semantics to work.
		HashMap map = cluster.partitionMap;
		Partitions partitions = map.get(key.namespace);

		if (partitions == null) {
			throw new AerospikeException.InvalidNamespace(key.namespace, map.size());
		}

		return new Partition(partitions, key, policy.replica, null, false);
	}

	public static Partition read(Cluster cluster, Policy policy, Key key) {
		// Must copy hashmap reference for copy on write semantics to work.
		HashMap map = cluster.partitionMap;
		Partitions partitions = map.get(key.namespace);

		if (partitions == null) {
			throw new AerospikeException.InvalidNamespace(key.namespace, map.size());
		}

		Replica replica;
		boolean linearize;

		if (partitions.scMode) {
			switch (policy.readModeSC) {
			case SESSION:
				replica = Replica.MASTER;
				linearize = false;
				break;

			case LINEARIZE:
				replica = policy.replica == Replica.PREFER_RACK ? Replica.SEQUENCE : policy.replica;
				linearize = true;
				break;

			default:
				replica = policy.replica;
				linearize = false;
				break;
			}
		}
		else {
			replica = policy.replica;
			linearize = false;
		}
		return new Partition(partitions, key, replica, null, linearize);
	}

	public static Replica getReplicaSC(Policy policy) {
		switch (policy.readModeSC) {
		case SESSION:
			return Replica.MASTER;

		case LINEARIZE:
			return policy.replica == Replica.PREFER_RACK ? Replica.SEQUENCE : policy.replica;

		default:
			return policy.replica;
		}
	}

	public static Node getNodeBatchWrite(
		Cluster cluster,
		Key key,
		Replica replica,
		Node prevNode,
		int sequence
	) {
		HashMap map = cluster.partitionMap;
		Partitions partitions = map.get(key.namespace);

		if (partitions == null) {
			throw new AerospikeException.InvalidNamespace(key.namespace, map.size());
		}

		Partition p = new Partition(partitions, key, replica, prevNode, false);
		p.sequence = sequence;
		return p.getNodeWrite(cluster);
	}

	public static Node getNodeBatchRead(
		Cluster cluster,
		Key key,
		Replica replica,
		Replica replicaSC,
		Node prevNode,
		int sequence,
		int sequenceSC
	) {
		// Must copy hashmap reference for copy on write semantics to work.
		HashMap map = cluster.partitionMap;
		Partitions partitions = map.get(key.namespace);

		if (partitions == null) {
			throw new AerospikeException.InvalidNamespace(key.namespace, map.size());
		}

		if (partitions.scMode) {
			replica = replicaSC;
			sequence = sequenceSC;
		}

		Partition p = new Partition(partitions, key, replica, prevNode, false);
		p.sequence = sequence;
		return p.getNodeRead(cluster);
	}

	private Partitions partitions;
	private final String namespace;
	private final Replica replica;
	private Node prevNode;
	private int partitionId;
	private int sequence;
	private final boolean linearize;

	private Partition(Partitions partitions, Key key, Replica replica, Node prevNode, boolean linearize) {
		this.partitions = partitions;
		this.namespace = key.namespace;
		this.replica = replica;
		this.prevNode = prevNode;
		this.linearize = linearize;
		this.partitionId = getPartitionId(key.digest);
	}

	public Partition(String namespace, Replica replica) {
		this.namespace = namespace;
		this.replica = replica;
		this.linearize = false;
	}

	public static int getPartitionId(byte[] digest) {
		// CAN'T USE MOD directly - mod will give negative numbers.
		// First AND makes positive and negative correctly, then mod.
		return (Buffer.littleBytesToInt(digest, 0) & 0xFFFF) % Node.PARTITIONS;
	}

	public Node getNodeQuery(Cluster cluster, Partitions partitions, PartitionStatus ps) {
		this.partitions = partitions;
		this.partitionId = ps.id;
		this.sequence = ps.sequence;
		this.prevNode = ps.node;

		Node node = getNodeRead(cluster);
		ps.node = node;
		ps.sequence = this.sequence;
		ps.retry = false;
		return node;
	}

	public Node getNodeRead(Cluster cluster) {
		switch (replica) {
		default:
		case SEQUENCE:
			return getSequenceNode(cluster);

		case PREFER_RACK:
			return getRackNode(cluster);

		case MASTER:
			return getMasterNode(cluster);

		case MASTER_PROLES:
			return getMasterProlesNode(cluster);

		case RANDOM:
			return cluster.getRandomNode();
		}
	}

	public Node getNodeWrite(Cluster cluster) {
		switch (replica) {
		default:
		case SEQUENCE:
		case PREFER_RACK:
			return getSequenceNode(cluster);

		case MASTER:
		case MASTER_PROLES:
		case RANDOM:
			return getMasterNode(cluster);
		}
	}

	public void prepareRetryRead(boolean timeout) {
		if (! timeout || !linearize) {
			sequence++;
		}
	}

	public void prepareRetryWrite(boolean timeout) {
		if (! timeout) {
			sequence++;
		}
	}

	private Node getSequenceNode(Cluster cluster) {
		AtomicReferenceArray[] replicas = partitions.replicas;
		int max = replicas.length;

		for (int i = 0; i < max; i++) {
			int index = sequence % max;
			Node node = replicas[index].get(partitionId);

			if (node != null && node.isActive()) {
				return node;
			}
			sequence++;
		}
		Node[] nodeArray = cluster.getNodes();
		throw new AerospikeException.InvalidNode(nodeArray.length, this);
	}

	private Node getRackNode(Cluster cluster) {
		AtomicReferenceArray[] replicas = partitions.replicas;
		int max = replicas.length;
		int seq1 = 0;
		int seq2 = 0;
		Node fallback1 = null;
		Node fallback2 = null;

		for (int rackId : cluster.rackIds) {
			int seq = sequence;

			for (int i = 0; i < max; i++) {
				int index = seq % max;
				Node node = replicas[index].get(partitionId);
				// Log.info("Try " + rackId + ',' + index + ',' + prevNode + ',' + node + ',' + node.hasRack(namespace, rackId));

				if (node != null) {
					// Avoid retrying on node where command failed
					// even if node is the only one on the same rack.
					if (node != prevNode) {
						if (node.hasRack(namespace, rackId)) {
							if (node.isActive()) {
								// Log.info("Found node on same rack: " + node);
								prevNode = node;
								sequence = seq;
								return node;
							}
						}
						else if (fallback1 == null && node.isActive()) {
							// Meets all criteria except not on same rack.
							fallback1 = node;
							seq1 = seq;
						}
					}
					else if (fallback2 == null && node.isActive()){
						// Previous node is the least desirable fallback.
						fallback2 = node;
						seq2 = seq;
					}
				}
				seq++;
			}
		}

		// Return node on a different rack if it exists.
		if (fallback1 != null) {
			// Log.info("Found fallback node: " + fallback1);
			prevNode = fallback1;
			sequence = seq1;
			return fallback1;
		}

		// Return previous node if it still exists.
		if (fallback2 != null) {
			// Log.info("Found previous node: " + fallback2);
			prevNode = fallback2;
			sequence = seq2;
			return fallback2;
		}

		// Failed to find suitable node.
		Node[] nodeArray = cluster.getNodes();
		throw new AerospikeException.InvalidNode(nodeArray.length, this);
	}

	private Node getMasterNode(Cluster cluster) {
		Node node = partitions.replicas[0].get(partitionId);

		if (node != null && node.isActive()) {
			return node;
		}
		Node[] nodeArray = cluster.getNodes();
		throw new AerospikeException.InvalidNode(nodeArray.length, this);
	}

	private Node getMasterProlesNode(Cluster cluster) {
		AtomicReferenceArray[] replicas = partitions.replicas;

		for (int i = 0; i < replicas.length; i++) {
			int index = Math.abs(cluster.replicaIndex.getAndIncrement() % replicas.length);
			Node node = replicas[index].get(partitionId);

			if (node != null && node.isActive()) {
				return node;
			}
		}
		Node[] nodeArray = cluster.getNodes();
		throw new AerospikeException.InvalidNode(nodeArray.length, this);
	}

	@Override
	public String toString() {
		return namespace + ':' + partitionId;
	}

	@Override
	public int hashCode() {
		final int prime = 31;
		int result = prime + namespace.hashCode();
		result = prime * result + partitionId;
		return result;
	}

	@Override
	public boolean equals(Object obj) {
		if (this == obj) {
			return true;
		}
		if (obj == null || getClass() != obj.getClass()) {
			return false;
		}
		Partition other = (Partition) obj;
		return this.namespace.equals(other.namespace) && this.partitionId == other.partitionId;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy