All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.test.disruption.NetworkDisruption Maven / Gradle / Ivy

There is a newer version: 8.16.0
Show newest version
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.test.disruption;

import com.carrotsearch.randomizedtesting.generators.RandomPicks;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.elasticsearch.cluster.ClusterState;
import org.elasticsearch.cluster.NodeConnectionsService;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.test.InternalTestCluster;
import org.elasticsearch.test.transport.MockTransportService;
import org.elasticsearch.transport.ConnectTransportException;
import org.elasticsearch.transport.TransportService;

import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.Random;
import java.util.Set;
import java.util.function.BiConsumer;

import static org.junit.Assert.assertFalse;

/**
 * Network disruptions are modeled using two components:
 * 1) the {@link DisruptedLinks} represents the links in the network that are to be disrupted
 * 2) the {@link NetworkLinkDisruptionType} represents the failure mode that is to be applied to the links
 */
public class NetworkDisruption implements ServiceDisruptionScheme {

    private final Logger logger = LogManager.getLogger(NetworkDisruption.class);

    private final DisruptedLinks disruptedLinks;
    private final NetworkLinkDisruptionType networkLinkDisruptionType;

    protected volatile InternalTestCluster cluster;
    protected volatile boolean activeDisruption = false;

    public NetworkDisruption(DisruptedLinks disruptedLinks, NetworkLinkDisruptionType networkLinkDisruptionType) {
        this.disruptedLinks = disruptedLinks;
        this.networkLinkDisruptionType = networkLinkDisruptionType;
    }

    public DisruptedLinks getDisruptedLinks() {
        return disruptedLinks;
    }

    public NetworkLinkDisruptionType getNetworkLinkDisruptionType() {
        return networkLinkDisruptionType;
    }

    @Override
    public void applyToCluster(InternalTestCluster cluster) {
        this.cluster = cluster;
    }

    @Override
    public void removeFromCluster(InternalTestCluster cluster) {
        stopDisrupting();
    }

    @Override
    public void removeAndEnsureHealthy(InternalTestCluster cluster) {
        removeFromCluster(cluster);
        ensureHealthy(cluster);
    }

    /**
     * ensures the cluster is healthy after the disruption
     */
    public void ensureHealthy(InternalTestCluster cluster) {
        assert activeDisruption == false;
        ensureNodeCount(cluster);
        ensureFullyConnectedCluster(cluster);
    }

    /**
     * Ensures that all nodes in the cluster are connected to each other.
     *
     * Some network disruptions may leave nodes that are not the master disconnected from each other.
     * {@link org.elasticsearch.cluster.NodeConnectionsService} will eventually reconnect but it's
     * handy to be able to ensure this happens faster
     */
    public static void ensureFullyConnectedCluster(InternalTestCluster cluster) {
        for (String node: cluster.getNodeNames()) {
            ClusterState stateOnNode = cluster.getInstance(ClusterService.class, node).state();
            cluster.getInstance(NodeConnectionsService.class, node).connectToNodes(stateOnNode.nodes());
        }
    }

    protected void ensureNodeCount(InternalTestCluster cluster) {
        assertFalse("cluster failed to form after disruption was healed", cluster.client().admin().cluster().prepareHealth()
            .setWaitForNodes(String.valueOf(cluster.size()))
            .setWaitForNoRelocatingShards(true)
            .get().isTimedOut());
    }

    @Override
    public synchronized void applyToNode(String node, InternalTestCluster cluster) {

    }

    @Override
    public synchronized void removeFromNode(String node1, InternalTestCluster cluster) {
        logger.info("stop disrupting node (disruption type: {}, disrupted links: {})", networkLinkDisruptionType, disruptedLinks);
        applyToNodes(new String[]{ node1 }, cluster.getNodeNames(), networkLinkDisruptionType::removeDisruption);
        applyToNodes(cluster.getNodeNames(), new String[]{ node1 }, networkLinkDisruptionType::removeDisruption);
    }

    @Override
    public synchronized void testClusterClosed() {

    }

    @Override
    public synchronized void startDisrupting() {
        logger.info("start disrupting (disruption type: {}, disrupted links: {})", networkLinkDisruptionType, disruptedLinks);
        applyToNodes(cluster.getNodeNames(), cluster.getNodeNames(), networkLinkDisruptionType::applyDisruption);
        activeDisruption = true;
    }

    @Override
    public synchronized void stopDisrupting() {
        if (!activeDisruption) {
            return;
        }
        logger.info("stop disrupting (disruption scheme: {}, disrupted links: {})", networkLinkDisruptionType, disruptedLinks);
        applyToNodes(cluster.getNodeNames(), cluster.getNodeNames(), networkLinkDisruptionType::removeDisruption);
        activeDisruption = false;
    }

    /**
     * Applies action to all disrupted links between two sets of nodes.
     */
    private void applyToNodes(String[] nodes1, String[] nodes2, BiConsumer consumer) {
        for (String node1 : nodes1) {
            if (disruptedLinks.nodes().contains(node1)) {
                for (String node2 : nodes2) {
                    if (disruptedLinks.nodes().contains(node2)) {
                        if (node1.equals(node2) == false) {
                            if (disruptedLinks.disrupt(node1, node2)) {
                                consumer.accept(transport(node1), transport(node2));
                            }
                        }
                    }
                }
            }
        }
    }

    @Override
    public TimeValue expectedTimeToHeal() {
        return networkLinkDisruptionType.expectedTimeToHeal();
    }

    private MockTransportService transport(String node) {
        return (MockTransportService) cluster.getInstance(TransportService.class, node);
    }

    @Override
    public String toString() {
        return "network disruption (disruption type: " + networkLinkDisruptionType + ", disrupted links: " + disruptedLinks + ")";
    }

    /**
     * Represents a set of nodes with connections between nodes that are to be disrupted
     */
    public abstract static class DisruptedLinks {
        private final Set nodes;

        protected DisruptedLinks(Set... nodeSets) {
            Set allNodes = new HashSet<>();
            for (Set nodeSet : nodeSets) {
                allNodes.addAll(nodeSet);
            }
            this.nodes = allNodes;
        }

        /**
         * Set of all nodes that can participate in disruptions
         */
        public Set nodes() {
            return nodes;
        }

        /**
         * Returns true iff network should be disrupted between the two nodes
         */
        public abstract boolean disrupt(String node1, String node2);
    }

    /**
     * Creates two partitions with symmetric failures
     */
    public static class TwoPartitions extends DisruptedLinks {

        protected final Set nodesSideOne;
        protected final Set nodesSideTwo;

        public TwoPartitions(String node1, String node2) {
            this(Collections.singleton(node1), Collections.singleton(node2));
        }

        public TwoPartitions(Set nodesSideOne, Set nodesSideTwo) {
            super(nodesSideOne, nodesSideTwo);
            this.nodesSideOne = nodesSideOne;
            this.nodesSideTwo = nodesSideTwo;
            assert nodesSideOne.isEmpty() == false;
            assert nodesSideTwo.isEmpty() == false;
            assert Sets.haveEmptyIntersection(nodesSideOne, nodesSideTwo);
        }

        public static TwoPartitions random(Random random, String... nodes) {
            return random(random, Sets.newHashSet(nodes));
        }

        public static TwoPartitions random(Random random, Set nodes) {
            assert nodes.size() >= 2 : "two partitions topology requires at least 2 nodes";
            Set nodesSideOne = new HashSet<>();
            Set nodesSideTwo = new HashSet<>();
            for (String node : nodes) {
                if (nodesSideOne.isEmpty()) {
                    nodesSideOne.add(node);
                } else if (nodesSideTwo.isEmpty()) {
                    nodesSideTwo.add(node);
                } else if (random.nextBoolean()) {
                    nodesSideOne.add(node);
                } else {
                    nodesSideTwo.add(node);
                }
            }
            return new TwoPartitions(nodesSideOne, nodesSideTwo);
        }

        @Override
        public boolean disrupt(String node1, String node2) {
            if (nodesSideOne.contains(node1) && nodesSideTwo.contains(node2)) {
                return true;
            }
            if (nodesSideOne.contains(node2) && nodesSideTwo.contains(node1)) {
                return true;
            }
            return false;
        }

        public Set getNodesSideOne() {
            return Collections.unmodifiableSet(nodesSideOne);
        }

        public Set getNodesSideTwo() {
            return Collections.unmodifiableSet(nodesSideTwo);
        }

        public Collection getMajoritySide() {
            if (nodesSideOne.size() >= nodesSideTwo.size()) {
                return getNodesSideOne();
            } else {
                return getNodesSideTwo();
            }
        }

        public Collection getMinoritySide() {
            if (nodesSideOne.size() >= nodesSideTwo.size()) {
                return getNodesSideTwo();
            } else {
                return getNodesSideOne();
            }
        }

        @Override
        public String toString() {
            return "two partitions (partition 1: " + nodesSideOne + " and partition 2: " + nodesSideTwo + ")";
        }
    }

    /**
     * Creates two partitions with symmetric failures and a bridge node that can connect to both of the partitions
     */
    public static class Bridge extends DisruptedLinks {

        private final String bridgeNode;
        private final Set nodesSideOne;
        private final Set nodesSideTwo;

        public Bridge(String bridgeNode, Set nodesSideOne, Set nodesSideTwo) {
            super(Collections.singleton(bridgeNode), nodesSideOne, nodesSideTwo);
            this.bridgeNode = bridgeNode;
            this.nodesSideOne = nodesSideOne;
            this.nodesSideTwo = nodesSideTwo;
            assert nodesSideOne.isEmpty() == false;
            assert nodesSideTwo.isEmpty() == false;
            assert Sets.haveEmptyIntersection(nodesSideOne, nodesSideTwo);
            assert nodesSideOne.contains(bridgeNode) == false && nodesSideTwo.contains(bridgeNode) == false;
        }

        public static Bridge random(Random random, String... nodes) {
            return random(random, Sets.newHashSet(nodes));
        }

        public static Bridge random(Random random, Set nodes) {
            assert nodes.size() >= 3 : "bridge topology requires at least 3 nodes";
            String bridgeNode = RandomPicks.randomFrom(random, nodes);
            Set nodesSideOne = new HashSet<>();
            Set nodesSideTwo = new HashSet<>();
            for (String node : nodes) {
                if (node.equals(bridgeNode) == false) {
                    if (nodesSideOne.isEmpty()) {
                        nodesSideOne.add(node);
                    } else if (nodesSideTwo.isEmpty()) {
                        nodesSideTwo.add(node);
                    } else if (random.nextBoolean()) {
                        nodesSideOne.add(node);
                    } else {
                        nodesSideTwo.add(node);
                    }
                }
            }
            return new Bridge(bridgeNode, nodesSideOne, nodesSideTwo);
        }

        @Override
        public boolean disrupt(String node1, String node2) {
            if (nodesSideOne.contains(node1) && nodesSideTwo.contains(node2)) {
                return true;
            }
            if (nodesSideOne.contains(node2) && nodesSideTwo.contains(node1)) {
                return true;
            }
            return false;
        }

        public String getBridgeNode() {
            return bridgeNode;
        }

        public Set getNodesSideOne() {
            return nodesSideOne;
        }

        public Set getNodesSideTwo() {
            return nodesSideTwo;
        }

        public String toString() {
            return "bridge partition (super connected node: [" + bridgeNode + "], partition 1: " + nodesSideOne +
                " and partition 2: " + nodesSideTwo + ")";
        }
    }

    public static class IsolateAllNodes extends DisruptedLinks {

        public IsolateAllNodes(Set nodes) {
            super(nodes);
        }

        @Override
        public boolean disrupt(String node1, String node2) {
            return true;
        }
    }

    /**
     * Abstract class representing various types of network disruptions. Instances of this class override the {@link #applyDisruption}
     * method to apply their specific disruption type to requests that are send from a source to a target node.
     */
    public abstract static class NetworkLinkDisruptionType {

        /**
         * Applies network disruption for requests send from the node represented by the source transport service to the node represented
         * by the target transport service.
         *
         * @param sourceTransportService source transport service from which requests are sent
         * @param targetTransportService target transport service to which requests are sent
         */
        public abstract void applyDisruption(MockTransportService sourceTransportService, MockTransportService targetTransportService);

        /**
         * Removes network disruption that was added by {@link #applyDisruption}.
         *
         * @param sourceTransportService source transport service from which requests are sent
         * @param targetTransportService target transport service to which requests are sent
         */
        public void removeDisruption(MockTransportService sourceTransportService, MockTransportService targetTransportService) {
            sourceTransportService.clearRule(targetTransportService);
        }

        /**
         * Returns expected time to heal after disruption has been removed. Defaults to instant healing.
         */
        public TimeValue expectedTimeToHeal() {
            return TimeValue.timeValueMillis(0);
        }

    }

    /**
     * Simulates a network disconnect. Sending a request from source to target node throws a {@link ConnectTransportException}.
     */
    public static class NetworkDisconnect extends NetworkLinkDisruptionType {

        @Override
        public void applyDisruption(MockTransportService sourceTransportService, MockTransportService targetTransportService) {
            sourceTransportService.addFailToSendNoConnectRule(targetTransportService);
        }

        @Override
        public String toString() {
            return "network disconnects";
        }
    }

    /**
     * Simulates an unresponsive target node by dropping requests sent from source to target node.
     */
    public static class NetworkUnresponsive extends NetworkLinkDisruptionType {

        @Override
        public void applyDisruption(MockTransportService sourceTransportService, MockTransportService targetTransportService) {
            sourceTransportService.addUnresponsiveRule(targetTransportService);
        }

        @Override
        public String toString() {
            return "network unresponsive";
        }
    }

    /**
     * Simulates slow or congested network. Delivery of requests that are sent from source to target node are delayed by a configurable
     * time amount.
     */
    public static class NetworkDelay extends NetworkLinkDisruptionType {

        public static TimeValue DEFAULT_DELAY_MIN = TimeValue.timeValueSeconds(10);
        public static TimeValue DEFAULT_DELAY_MAX = TimeValue.timeValueSeconds(90);

        private final TimeValue delay;

        /**
         * Delays requests by a fixed time value.
         *
         * @param delay time to delay requests
         */
        public NetworkDelay(TimeValue delay) {
            this.delay = delay;
        }

        /**
         * Delays requests by a random but fixed time value between {@link #DEFAULT_DELAY_MIN} and {@link #DEFAULT_DELAY_MAX}.
         *
         * @param random instance to use for randomization of delay
         */
        public static NetworkDelay random(Random random) {
            return random(random, DEFAULT_DELAY_MIN, DEFAULT_DELAY_MAX);
        }

        /**
         * Delays requests by a random but fixed time value between delayMin and delayMax.
         *
         * @param random   instance to use for randomization of delay
         * @param delayMin minimum delay
         * @param delayMax maximum delay
         */
        public static NetworkDelay random(Random random, TimeValue delayMin, TimeValue delayMax) {
            return new NetworkDelay(TimeValue.timeValueMillis(delayMin.millis() == delayMax.millis() ?
                    delayMin.millis() :
                    delayMin.millis() + random.nextInt((int) (delayMax.millis() - delayMin.millis()))));
        }

        @Override
        public void applyDisruption(MockTransportService sourceTransportService, MockTransportService targetTransportService) {
            sourceTransportService.addUnresponsiveRule(targetTransportService, delay);
        }

        @Override
        public TimeValue expectedTimeToHeal() {
            return delay;
        }

        @Override
        public String toString() {
            return "network delays for [" + delay + "]";
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy