All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.vmware.xenon.services.common.NodeGroupUtils Maven / Gradle / Ivy

There is a newer version: 1.6.18
Show newest version
/*
 * Copyright (c) 2014-2015 VMware, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License.  You may obtain a copy of
 * the License at http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, without warranties or
 * conditions of any kind, EITHER EXPRESS OR IMPLIED.  See the License for the
 * specific language governing permissions and limitations under the License.
 */

package com.vmware.xenon.services.common;

import static java.util.stream.Collectors.toMap;

import java.net.URI;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.TimeoutException;
import java.util.logging.Level;

import com.vmware.xenon.common.NodeSelectorService.SelectAndForwardRequest;
import com.vmware.xenon.common.NodeSelectorService.SelectOwnerResponse;
import com.vmware.xenon.common.Operation;
import com.vmware.xenon.common.Operation.CompletionHandler;
import com.vmware.xenon.common.OperationJoin;
import com.vmware.xenon.common.OperationJoin.JoinedCompletionHandler;
import com.vmware.xenon.common.Service;
import com.vmware.xenon.common.ServiceErrorResponse;
import com.vmware.xenon.common.ServiceHost;
import com.vmware.xenon.common.ServiceHost.ServiceHostState;
import com.vmware.xenon.common.ServiceStats;
import com.vmware.xenon.common.ServiceStats.ServiceStat;
import com.vmware.xenon.common.UriUtils;
import com.vmware.xenon.common.Utils;
import com.vmware.xenon.services.common.NodeGroupBroadcastResult.PeerNodeResult;
import com.vmware.xenon.services.common.NodeGroupService.NodeGroupState;

public final class NodeGroupUtils {
    public static final String PROPERTY_NAME_OPERATION_TIMEOUT_SECONDS = Utils.PROPERTY_NAME_PREFIX
            + "NodeGroupUtils.operationTimeoutSeconds";

    private static final long OPERATION_TIMEOUT_SECONDS = Long.getLong(
            PROPERTY_NAME_OPERATION_TIMEOUT_SECONDS,
            TimeUnit.MICROSECONDS.toSeconds(ServiceHostState.DEFAULT_OPERATION_TIMEOUT_MICROS / 3));

    private NodeGroupUtils() {

    }

    /**
     * Issues a GET to service/stats and looks for {@link Service#STAT_NAME_AVAILABLE}
     * The request is issued on the node selected as owner, by the node selector co-located
     * with the service. The stat must have been modified after the most recent node group
     * change
     *
     * This method should be used only on replicated, owner selected factory services
     *
     * For xenon based applications, use {@link ServiceHost#registerForServiceAvailability(CompletionHandler, boolean, String...)}
     * with boolean set to true.
     */
    public static void checkServiceAvailability(CompletionHandler ch, Service s) {
        checkServiceAvailability(ch, s.getHost(), s.getSelfLink(), s.getPeerNodeSelectorPath());
    }

    /**
     * Issues a GET to service/stats and looks for {@link Service#STAT_NAME_AVAILABLE}
     * The request is issued on the node selected as owner, by the node selector co-located
     * with the service. The stat must have been modified after the most recent node group
     * change
     *
     * This method should be used only on replicated, owner selected factory services
     *
     * For xenon based applications, use {@link ServiceHost#registerForServiceAvailability(CompletionHandler, boolean, String...)}
     * with boolean set to true.
     */
    public static void checkServiceAvailability(CompletionHandler ch, ServiceHost host,
            String link, String selectorPath) {
        if (link == null) {
            throw new IllegalArgumentException("link is required");
        }

        URI service = UriUtils.buildUri(host, link);
        checkServiceAvailability(ch, host, service, selectorPath);
    }

    /**
     * Issues a GET to service/stats and looks for {@link Service#STAT_NAME_AVAILABLE}
     * The request is issued on the node selected as owner, by the node selector co-located
     * with the service. The stat must have been modified after the most recent node group
     * change
     *
     * This method should be used only on replicated, owner selected factory services
     *
     * For xenon based applications, use {@link ServiceHost#registerForServiceAvailability(CompletionHandler, boolean, String...)}
     * with boolean set to true.
     */
    public static void checkServiceAvailability(CompletionHandler ch, ServiceHost host,
            URI service,
            String selectorPath) {
        URI statsUri = UriUtils.buildStatsUri(service);

        if (selectorPath == null) {
            throw new IllegalArgumentException("selectorPath is required");
        }

        long timeoutMicros = Math.min(host.getOperationTimeoutMicros(),
                TimeUnit.SECONDS.toMicros(OPERATION_TIMEOUT_SECONDS));

        // Create operation to retrieve stats. This completion will execute after
        // we determine the owner node
        Operation get = Operation.createGet(statsUri).setCompletion((o, e) -> {
            if (e != null) {
                host.log(Level.WARNING, "%s to %s failed: %s",
                        o.getAction(), o.getUri(), e.toString());
                ch.handle(null, e);
                return;
            }

            ServiceStats s = o.getBody(ServiceStats.class);
            ServiceStat availableStat = s.entries.get(Service.STAT_NAME_AVAILABLE);

            if (availableStat == null || availableStat.latestValue == Service.STAT_VALUE_FALSE) {
                ch.handle(o, new IllegalStateException("not available"));
                return;
            }

            ch.handle(o, null);
        });
        get.setReferer(host.getPublicUri())
                .setExpiration(Utils.fromNowMicrosUtc(timeoutMicros));

        URI nodeSelector = UriUtils.buildUri(service, selectorPath);
        SelectAndForwardRequest req = new SelectAndForwardRequest();
        req.key = service.getPath();

        Operation selectPost = Operation.createPost(nodeSelector)
                .setReferer(host.getPublicUri())
                .setExpiration(get.getExpirationMicrosUtc())
                .setBodyNoCloning(req);
        selectPost.setCompletion((o, e) -> {
            if (e != null) {
                host.log(Level.WARNING, "SelectOwner for %s to %s failed: %s",
                        req.key, nodeSelector, e.toString());
                ch.handle(get, e);
                return;
            }
            SelectOwnerResponse selectRsp = o.getBody(SelectOwnerResponse.class);
            URI serviceOnOwner = UriUtils.buildUri(selectRsp.ownerNodeGroupReference,
                    statsUri.getPath());
            get.setUri(serviceOnOwner).sendWith(host);
        }).sendWith(host);
    }

    /**
     * Issues a convergence request to the node group service on all peers and returns success
     * if all nodes confirm that they are converged (in terms of last membership update).
     * This method should be used when the supplied host is not part of the node group
     */
    public static void checkConvergenceFromAnyHost(ServiceHost host, NodeGroupState ngs,
            Operation parentOp) {
        checkConvergenceAcrossPeers(host, ngs, parentOp);
    }

    /**
     * Issues a convergence request to the node group service on all peers and returns success
     * if all nodes confirm that they are converged (in terms of last membership update).
     *
     * It is expected the supplied host is listed as a peer in node group state
     */
    public static void checkConvergence(ServiceHost host, NodeGroupState ngs, Operation parentOp) {
        NodeState self = ngs.nodes.get(host.getId());
        if (self == null) {
            parentOp.fail(new IllegalStateException("Self node is required"));
            return;
        }

        if (self.membershipQuorum == 1 && ngs.nodes.size() == 1) {
            parentOp.complete();
            return;
        }
        checkConvergenceAcrossPeers(host, ngs, parentOp);
    }

    private static void checkConvergenceAcrossPeers(ServiceHost host, NodeGroupState ngs,
            Operation parentOp) {

        // confirm that all available nodes have converged on membership update time
        JoinedCompletionHandler joinedCompletion = (ops, failures) -> {
            if (failures != null) {
                StringBuilder errorRsp = new StringBuilder();
                try {
                    for (Operation op : ops.values()) {
                        if (op.getStatusCode() < Operation.STATUS_CODE_FAILURE_THRESHOLD) {
                            continue;
                        }
                        String error = op.getStatusCode() + "";
                        if (op.hasBody()) {
                            ServiceErrorResponse rsp = op.getErrorResponseBody();
                            String msg = rsp != null ? rsp.message : "";
                            error = msg + ":" + op.getStatusCode();
                        }
                        errorRsp.append("node ")
                                .append(op.getUri())
                                .append(" failed with:")
                                .append(error)
                                .append("\n");
                    }
                } catch (Exception e) {
                }
                parentOp.fail(new IllegalStateException("Failures: " + errorRsp.toString()));
                return;
            }

            Map membershipUpdateTimes = new HashMap<>();
            Set uniqueTimes = new HashSet<>();
            for (Operation peerOp : ops.values()) {
                if (!peerOp.hasBody()) {
                    membershipUpdateTimes.put(peerOp.getUri(), -1L);
                    uniqueTimes.add(-1L);
                    continue;
                }
                NodeGroupState rsp = peerOp.getBody(NodeGroupState.class);
                membershipUpdateTimes.put(peerOp.getUri(), rsp.membershipUpdateTimeMicros);
                uniqueTimes.add(rsp.membershipUpdateTimeMicros);
            }

            if (uniqueTimes.size() > 1) {
                String error = String.format("Membership times not converged: %s",
                        membershipUpdateTimes);
                parentOp.fail(new IllegalStateException(error));
                return;
            }

            parentOp.complete();
        };

        List ops = new ArrayList<>();
        for (NodeState ns : ngs.nodes.values()) {
            if (NodeState.isUnAvailable(ns)) {
                continue;
            }

            long expirationMicros = Math.min(
                    Utils.fromNowMicrosUtc(ngs.config.peerRequestTimeoutMicros),
                    parentOp.getExpirationMicrosUtc());
            Operation peerOp = Operation.createGet(ns.groupReference)
                    .transferRefererFrom(parentOp)
                    .setExpiration(expirationMicros);
            if (host.getId().equals(ns.id)) {
                // optimization, use in process fast path for self host
                peerOp.setUri(UriUtils.buildUri(host.getUri(), ns.groupReference.getPath()));
            }
            ops.add(peerOp);
        }

        if (ops.isEmpty()) {
            parentOp.fail(new IllegalStateException("no available nodes"));
            return;
        }

        OperationJoin.create(ops).setCompletion(joinedCompletion).sendWith(host);
    }

    public static void checkConvergence(ServiceHost host, URI nodegroupReference, Operation parentOp) {
        Operation.createGet(nodegroupReference)
                .transferRefererFrom(parentOp)
                .setCompletion((o, t) -> {
                    if (t != null) {
                        parentOp.fail(t);
                        return;
                    }
                    NodeGroupState ngs = o.getBody(NodeGroupState.class);
                    checkConvergenceAcrossPeers(host, ngs, parentOp);
                }).sendWith(host);
    }

    /**
     * Given a node group state, looks at the time the node group state changed. If
     * the group was updated within a certain number of maintenance intervals, we
     * consider it unstable. The interval count can be set through a PATCH to
     * core/node-groups//config.
     *
     * See {@link NodeGroupService}
     */
    public static boolean isMembershipSettled(ServiceHost host, long maintIntervalMicros,
            NodeGroupState localState) {
        NodeState selfNode = localState.nodes.get(host.getId());
        if (selfNode == null) {
            return false;
        }

        if (localState.nodes.size() == 1 && selfNode.membershipQuorum == 1) {
            // single, stand-alone node, skip update time check
            return true;
        }

        long threshold = localState.localMembershipUpdateTimeMicros
                + localState.config.stableGroupMaintenanceIntervalCount
                        * maintIntervalMicros;

        if (Utils.getSystemNowMicrosUtc() - threshold < 0) {
            return false;
        }
        return true;
    }

    /**
     * Given a node group state, and the value of NodeState.membershipQuorum for the
     * local host, it determines if enough nodes are available for the group as a whole,
     * to be considered available
     */
    public static boolean hasMembershipQuorum(ServiceHost host, NodeGroupState ngs) {
        NodeState selfNode = ngs.nodes.get(host.getId());
        if (selfNode == null) {
            return false;
        }

        int availableNodeCount = 0;
        if (ngs.nodes.size() == 1) {
            availableNodeCount = 1;
        } else {
            for (NodeState ns : ngs.nodes.values()) {
                if (!NodeState.isAvailable(ns, selfNode.id, false)) {
                    continue;
                }
                availableNodeCount++;
            }
        }

        if (availableNodeCount < selfNode.membershipQuorum) {
            return false;
        }
        return true;
    }

    /**
     * Evaluates current node group state and returns true if requests should be process,
     * forwarded, etc
     * The conditions are:
     *
     * 1) The node group membership should have been stable (gossip did not produce
     * changes) for a specific period
     *
     * 2) The number of node group members in available stage is >= membershipQuorum
     */
    public static boolean isNodeGroupAvailable(ServiceHost host, NodeGroupState localState) {
        // we invoke the isMembershipSettled first because it has low overhead, does not walk all
        // nodes in the group
        if (NodeGroupUtils.isMembershipSettled(host, host
                .getMaintenanceIntervalMicros(), localState)
                && NodeGroupUtils.hasMembershipQuorum(host, localState)) {
            return true;
        }
        return false;
    }

    /**
     * Complete operation when target service for the given path becomes available in node group
     *
     * @see ServiceHost#registerForServiceAvailability(CompletionHandler, boolean, String...)
     * @see #checkServiceAvailability(CompletionHandler, ServiceHost, URI, String)
     */
    public static void registerForReplicatedServiceAvailability(ServiceHost host, Operation op,
            String servicePath, String nodeSelectorPath) {

        CompletionHandler ch = (o, e) -> {
            if (e != null) {

                if (op.getExpirationMicrosUtc() < Utils.getSystemNowMicrosUtc()) {
                    String msg = "Failed to check replicated service availability";
                    op.fail(new TimeoutException(msg));
                    return;
                }

                // service is not yet available, reschedule
                host.scheduleCore(() -> {
                    registerForReplicatedServiceAvailability(host, op, servicePath, nodeSelectorPath);
                }, host.getMaintenanceIntervalMicros(), TimeUnit.MICROSECONDS);
                return;
            }
            op.complete();
        };

        host.checkReplicatedServiceAvailable(ch, servicePath, nodeSelectorPath);
    }

    /**
     * Convert {@link NodeGroupBroadcastResponse} to {@link NodeGroupBroadcastResult}
     */
    public static NodeGroupBroadcastResult toBroadcastResult(NodeGroupBroadcastResponse response) {
        NodeGroupBroadcastResult result = new NodeGroupBroadcastResult();

        result.totalNodeCount = response.nodeCount;
        result.availableNodeCount = response.availableNodeCount;
        result.unavailableNodeCount = response.nodeCount - response.availableNodeCount;
        result.membershipQuorum = response.membershipQuorum;


        // entry example: "http://127.0.0.1:8000" => "host1"
        Map hostIdByUrl = response.selectedNodes.entrySet().stream()
                .collect(toMap(entry -> {
                    URI uri = entry.getValue();
                    return URI.create(uri.toString().replace(uri.getPath(), ""));
                }, Map.Entry::getKey));

        // for successful responses
        for (Entry entry : response.jsonResponses.entrySet()) {
            PeerNodeResult singleResult = new PeerNodeResult();
            URI requestUri = entry.getKey();
            String json = entry.getValue();

            URI hostUri = URI.create(requestUri.toString().replace(requestUri.getPath(), ""));
            String hostId = hostIdByUrl.get(hostUri);
            URI nodeGroupUri = response.selectedNodes.get(hostId);

            singleResult.requestUri = requestUri;
            singleResult.hostId = hostId;
            singleResult.nodeGroupUri = nodeGroupUri;
            singleResult.json = json;
            singleResult.errorResponse = null;

            result.allResponses.add(singleResult);
            result.successResponses.add(singleResult);
        }

        // for failure responses
        for (Entry entry : response.failures.entrySet()) {
            PeerNodeResult singleResult = new PeerNodeResult();
            URI requestUri = entry.getKey();
            ServiceErrorResponse errorResponse = entry.getValue();

            URI hostUri = URI.create(requestUri.toString().replace(requestUri.getPath(), ""));
            String hostId = hostIdByUrl.get(hostUri);
            URI nodeGroupUri = response.selectedNodes.get(hostId);

            singleResult.requestUri = requestUri;
            singleResult.hostId = hostId;
            singleResult.nodeGroupUri = nodeGroupUri;
            singleResult.json = null;
            singleResult.errorResponse = errorResponse;

            result.allResponses.add(singleResult);
            result.failureResponses.add(singleResult);
            result.failureErrorResponses.add(errorResponse);
        }

        return result;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy