All Downloads are FREE. Search and download functionalities are using the official Maven repository.

oracle.kv.util.PingCollector Maven / Gradle / Ivy

Go to download

NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.

There is a newer version: 18.3.10
Show newest version
/*-
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.
 */
package oracle.kv.util;

import java.rmi.NotBoundException;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;

import oracle.kv.impl.admin.AdminFaultException;
import oracle.kv.impl.admin.AdminStatus;
import oracle.kv.impl.admin.CommandServiceAPI;
import oracle.kv.impl.admin.param.Parameters;
import oracle.kv.impl.admin.param.StorageNodeParams;
import oracle.kv.impl.arb.ArbNodeStatus;
import oracle.kv.impl.arb.admin.ArbNodeAdminAPI;
import oracle.kv.impl.monitor.views.ServiceChange;
import oracle.kv.impl.rep.RepNodeStatus;
import oracle.kv.impl.rep.admin.RepNodeAdminAPI;
import oracle.kv.impl.security.login.LoginManager;
import oracle.kv.impl.sna.StorageNodeAgentAPI;
import oracle.kv.impl.sna.StorageNodeStatus;
import oracle.kv.impl.topo.AdminId;
import oracle.kv.impl.topo.ArbNode;
import oracle.kv.impl.topo.ArbNodeId;
import oracle.kv.impl.topo.RepGroup;
import oracle.kv.impl.topo.RepGroupId;
import oracle.kv.impl.topo.RepNode;
import oracle.kv.impl.topo.RepNodeId;
import oracle.kv.impl.topo.ResourceId;
import oracle.kv.impl.topo.StorageNode;
import oracle.kv.impl.topo.StorageNodeId;
import oracle.kv.impl.topo.Topology;
import oracle.kv.impl.util.ConfigurableService.ServiceStatus;
import oracle.kv.impl.util.registry.RegistryUtils;
import oracle.kv.util.Ping.Problem;

/**
 * PingCollector visits each service in the store and requests ping status. The
 * collector is created with a topology instance to direct its collection
 * efforts. It is meant for one time use; detected problems are accumulated and
 * are not cleared between calls to
 *
 * - Since admin services are not in the topology, a Parameters instance
 * is used to find admins. The Parameters instance may be null, in which case
 * the collector omits admins.
 *
 * - The collector will use a login manager if one is supplied to contact
 * admin services.
 *
 * The collector is meant to be robust and to continue on past any component
 * failure. If one component of the store can't be reached, the collector
 * saves information about the problem and carries on.
 */
public class PingCollector {

    private static final int MAX_N_THREADS = 10;

    /*
     * The action to take when the collector visits a storage node
     */
    private interface StorageNodeCallback {
        void nodeCallback(StorageNode sn, StorageNodeStatus status);
    }

    /*
     * The action to take when the collector visits a replication node
     */
    private interface RepNodeCallback {
        void nodeCallback(RepNode rn, RepNodeStatus status);
    }

    /*
     * The action to take when the collector visits an Admin service.
     */
    private interface AdminCallback {
        void nodeCallback(AdminId aId, AdminInfo info);
    }

    /*
     * The action to take when the collector visits an arbiter node
     */
    private interface ArbNodeCallback {
        void nodeCallback(ArbNode an, ArbNodeStatus status);
    }

    /* A struct for packaging information returned from an Admin service. */
    class AdminInfo {
        final StorageNodeId snId;
        AdminStatus adminStatus;

        AdminInfo(StorageNodeId snId) {
            this.snId = snId;
        }
    }

    /* Collections of status information */
    private Map snMap;
    private Map rnMap;
    private Map anMap;

    /*
     * AdminMap and monitoredChanges are collected at the same time. AdminMap *
     * holds admin statuses obtained by pinging each Admin service, while *
     * monitoredChanges holds the collection of service changes held by the
     * admin monitoring system.
     */
    private Map adminMap;
    private Map monitoredChanges;

    /* Map of problems encountered when pinging components of the store. */
    private final List problems =
        Collections.synchronizedList(new ArrayList());

    private final Topology topo;
    private final Parameters params;
    private final LoginManager adminLoginManager;

    PingCollector(Topology topo,
                  Parameters params,
                  LoginManager adminLoginManager) {

        this.topo = topo;
        this.params = params;
        this.adminLoginManager = adminLoginManager;
    }

    public PingCollector(Topology topo) {
        this(topo, null, null);
    }

    Map getSNMap() {
        if (snMap == null) {
            /*
             * Note that a synchronized HashMap is used rather than
             * ConcurrentHashMap because we need to be able to support null
             * status values, and ConcurrentHashMap does not support null
             * values.
             */
            snMap = Collections.synchronizedMap(
                new HashMap());
            forEachStorageNode(new StorageNodeCallback() {
                    @Override
                    public void nodeCallback(StorageNode sn,
                                             StorageNodeStatus status) {
                        snMap.put(sn, status);
                    }
                });
        }
        return snMap;
    }

    Map getRNMap() {
        if (rnMap == null) {
            /*
             * Note that a synchronized TreeMap is used rather than
             * ConcurrentHashMap because we need to be able to support null
             * status values, and ConcurrentHashMap does not support null
             * values.
             */
            rnMap = Collections.synchronizedMap(
                new TreeMap());
            forEachRepNode(new RepNodeCallback() {
                    @Override
                    public void nodeCallback(RepNode rn, RepNodeStatus status) {
                        rnMap.put(rn, status);
                    }
                });
        }
        return rnMap;
    }

    Map getAdminMap() {
        if (adminMap == null) {
            /*
             * Note that a synchronized HashMap is used rather than
             * ConcurrentHashMap because we need to be able to support null
             * status values, and ConcurrentHashMap does not support null
             * values.
             */
            adminMap = Collections.synchronizedMap(
                new HashMap());
            monitoredChanges = Collections.synchronizedMap(
                new HashMap());
            forEachAdmin(new AdminCallback() {
                    @Override
                    public void nodeCallback(AdminId aId,
                                             AdminInfo info) {
                        adminMap.put(aId, info);
                    }
                });
        }
        return adminMap;
    }

    Map getANMap() {
        if (anMap == null) {
            anMap = new TreeMap();
            forEachArbNode(new ArbNodeCallback() {
                    @Override
                    public void nodeCallback(ArbNode an, ArbNodeStatus status) {
                        anMap.put(an, status);
                    }
                });
        }
        return anMap;
    }

    Map getMonitoredChanges() {
        if (monitoredChanges == null) {
            getAdminMap();
        }
        return monitoredChanges;
    }

    List getProblems() {
        return problems;
    }

    /**
     * Collect all statuses, and then return a service status map of all the
     * SNs, RNs, and optionally admins, that make up the topology.
     * @return a map of all node resource ids to their statuses.
     */
    public Map getTopologyStatus() {

        final Map ret =
            new HashMap();

        for (Map.Entry e :
                 getSNMap().entrySet()) {
            ServiceStatus status =
                e.getValue() == null ? ServiceStatus.UNREACHABLE:
                e.getValue().getServiceStatus();

            ret.put(e.getKey().getResourceId(), status);
        }

        for (Map.Entry e :
                 getRNMap().entrySet()) {
            ServiceStatus status =
                e.getValue() == null ? ServiceStatus.UNREACHABLE:
                e.getValue().getServiceStatus();

            ret.put(e.getKey().getResourceId(), status);
        }

        for (Map.Entry e :
                 getAdminMap().entrySet()) {
            ServiceStatus status = null;
            if (e.getValue() == null) {
                status = ServiceStatus.UNREACHABLE;
            } else {
                AdminStatus aStatus = e.getValue().adminStatus;
                status = (aStatus == null) ? ServiceStatus.UNREACHABLE:
                    aStatus.getServiceStatus();
            }
            ret.put(e.getKey(), status);
        }

        return ret;
    }

    /**
     * Collects status for a given replication group and returns the
     * replication node which is the master for the replication group. If the
     * master is not found or there are more than one node that thinks it's
     * master, @code null is returned.
     *
     * @return a replication node or @code null
     */
    public RepNode getMaster(RepGroupId rgId) {

        final List master =
            Collections.synchronizedList(new ArrayList());

        forEachRepNodeInShard
            (rgId,
             new RepNodeCallback() {
                 @Override
                 public void nodeCallback(RepNode rn, RepNodeStatus status) {
                     if ((status != null) &&
                         status.getReplicationState().isMaster()) {
                         master.add(rn);
                     }
                 }
             });
        return (master.size() == 1) ? master.get(0) : null;
    }

    /**
     * Find the master of this shard and return its full name and its
     * haport. Returns null if no master found.
     */
    public RNNameHAPort getMasterNamePort(RepGroupId rgId) {

        final List namePort =
            Collections.synchronizedList(new ArrayList());

        forEachRepNodeInShard
            (rgId, new RepNodeCallback() {
                    @Override
                    public void nodeCallback(RepNode rn, RepNodeStatus status) {
                        if ((status != null) &&
                            status.getReplicationState().isMaster()) {
                            String rnName = rn.getResourceId().getFullName();
                            namePort.add(new RNNameHAPort
                                         (rnName, status.getHAHostPort()));
                        }
                    }
                });
        return (namePort.size() == 1) ? namePort.get(0) : null;
    }

    /* A struct for returning information about a master of a shard */
    public class RNNameHAPort {
        private final String fullName;
        private final String haHostPort;

        RNNameHAPort(String fullName, String haHostPort) {
            this.fullName = fullName;
            this.haHostPort = haHostPort;
        }

        public String getFullName() {
            return fullName;
        }

        public String getHAHostPort() {
            return haHostPort;
        }
    }

    /**
     * Return a map of replication node status for each RN in the shard. Note
     * that the status value may be null if the RN is not responsive.
     */
    public Map getRepNodeStatus(RepGroupId rgId) {

        /*
         * Get status for each node in the shard
         * Note that a synchronized HashMap is used rather than
         * ConcurrentHashMap because we need to be able to support null
         * status values, and ConcurrentHashMap does not support null
         * values.
         */
        final Map statusMap =
            Collections.synchronizedMap(
                new HashMap());

        forEachRepNodeInShard
            (rgId, new RepNodeCallback() {
                    @Override
                    public void nodeCallback(RepNode rn, RepNodeStatus status) {
                        statusMap.put(rn.getResourceId(), status);
                    }
                });
        return statusMap;
    }

    /**
     * Return a map of node status for each AN in the shard. Note
     * that the status value may be null if the AN is not responsive.
     */
    public Map getArbNodeStatus(RepGroupId rgId) {

        /* Get status for each node in the shard */
        final Map statusMap =
            new HashMap();

        forEachArbNodeInShard
            (rgId, new ArbNodeCallback() {
                    @Override
                    public void nodeCallback(ArbNode an, ArbNodeStatus status) {
                        statusMap.put(an.getResourceId(), status);
                    }
                });
        return statusMap;
    }

    private void forEachStorageNode(final StorageNodeCallback callback) {

        ExecutorService executor = Executors.newFixedThreadPool(MAX_N_THREADS);
        Collection> tasks = new ArrayList>();
        /* LoginManager not needed for ping */
        final RegistryUtils regUtils = new RegistryUtils(topo,
                                                         (LoginManager) null);

        for (final StorageNode sn : topo.getStorageNodeMap().getAll()) {
            tasks.add(new Callable() {
                @Override
                public Void call() throws Exception {
                    StorageNodeStatus status = null;
                    try {
                        StorageNodeAgentAPI sna =
                            regUtils.getStorageNodeAgent(sn.getResourceId());
                        status = sna.ping();
                    } catch (RemoteException re) {
                        problems.add(new Problem(sn.getResourceId(),
                                                 sn.getHostname(),
                                                 sn.getRegistryPort(),
                                                 "Can't call ping for SN: ",
                                                 re));
                    } catch (NotBoundException e) {
                        problems.add(new Problem(sn.getResourceId(),
                                                 sn.getHostname(),
                                                 sn.getRegistryPort(),
                                                 "No RMI service for SN",
                                                 e));
                    } finally {
                        callback.nodeCallback(sn, status);
                    }
                    return null;
                }
            });
        }

        try {
            executor.invokeAll(tasks);
        } catch (InterruptedException e) {
            /* ignore */
        } finally {
            executor.shutdown();
        }
    }

    private void forEachRepNode(RepNodeCallback callback) {

        for (RepGroup rg : topo.getRepGroupMap().getAll()) {
            forEachRepNodeInShard(rg.getResourceId(), callback);
        }
    }

    private void forEachRepNodeInShard(RepGroupId rgId,
                                       final RepNodeCallback callback) {

        final RepGroup group = topo.get(rgId);

        if (group == null) {
            return;
        }

        ExecutorService executor = Executors.newFixedThreadPool(MAX_N_THREADS);
        Collection> tasks = new ArrayList>();
        /* LoginManager not needed for ping */
        final RegistryUtils regUtils = new RegistryUtils(topo,
                                                         (LoginManager) null);

        for (final RepNode rn : group.getRepNodes()) {
            tasks.add(new Callable() {
                @Override
                public Void call() throws Exception {
                    RepNodeStatus status = null;
                    StorageNode sn = topo.get(rn.getStorageNodeId());
                    try {
                        RepNodeAdminAPI rna =
                            regUtils.getRepNodeAdmin(rn.getResourceId());
                        status = rna.ping();
                    } catch (RemoteException re) {
                        problems.add(new Problem(rn.getResourceId(),
                                                 sn.getHostname(),
                                                 sn.getRegistryPort(),
                                                 "Can't call ping for RN:" +
                                                 re));
                    } catch (NotBoundException e) {
                        problems.add(new Problem(rn.getResourceId(),
                                                 sn.getHostname(),
                                                 sn.getRegistryPort(),
                                                 "No RMI service for RN: " +
                                                 e));
                    } finally {
                        callback.nodeCallback(rn, status);
                    }
                    return null;
                }
            });
        }

        try {
            executor.invokeAll(tasks);
        } catch (InterruptedException e) {
            /* ignore */
        } finally {
            executor.shutdown();
        }
    }


    private void forEachAdmin(final AdminCallback callback) {
        if (params == null) {
            return;
        }
        ExecutorService executor = Executors.newFixedThreadPool(MAX_N_THREADS);
        Collection> tasks = new ArrayList>();
        for (final AdminId aId : params.getAdminIds()) {
            tasks.add(new Callable() {
                @Override
                public Void call() throws Exception {
                    final StorageNodeId snId =
                        params.get(aId).getStorageNodeId();
                    final StorageNodeParams snp = params.get(snId);
                    String hostname = snp.getHostname();
                    int port = snp.getRegistryPort();
                    final AdminInfo info = new AdminInfo(snId);
                    try {
                        final CommandServiceAPI admin =
                            RegistryUtils.getAdmin(hostname, port,
                                                   adminLoginManager);
                        info.adminStatus = admin.getAdminStatus();

                        /*
                         * Ask the Admin master for the latest service status
                         * map, which is maintained as part of the Admin's
                         * monitoring infrastructure.
                         */
                        if (info.adminStatus.getIsAuthoritativeMaster()) {
                            Map monitorMap =
                                admin.getStatusMap();
                            monitoredChanges.putAll(monitorMap);
                        }
                    } catch (AdminFaultException afe) {
                        /*
                         * Note that admin.getAdminStatus() may throw an AFE
                         * wrapping a SAE in case of network issues, which is
                         * caused by RE or NBE.
                         */
                        problems.add(new Problem(aId, hostname, port,
                                                 "Can't get status for Admin:",
                                                 afe));
                    } catch (RemoteException re) {
                        problems.add(new Problem(aId, hostname, port,
                                                 "Can't get status for Admin:",
                                                 re));
                    } catch (NotBoundException e) {
                        problems.add(new Problem(aId, hostname, port,
                                                 "No RMI Service for Admin:",
                                                 e));
                    } finally {
                        callback.nodeCallback(aId, info);
                    }
                    return null;
                }
            });
        }

        try {
            executor.invokeAll(tasks);
        } catch (InterruptedException e) {
            /* ignore */
        } finally {
            executor.shutdown();
        }
    }

    private void forEachArbNodeInShard(RepGroupId rgId,
                                       ArbNodeCallback callback) {

        final RepGroup group = topo.get(rgId);

        if (group == null) {
            return;
        }

        /* LoginManager not needed for ping */
        final RegistryUtils regUtils = new RegistryUtils(topo,
                                                         (LoginManager) null);

        for (ArbNode an : group.getArbNodes()) {
            ArbNodeStatus status = null;
            StorageNode sn = topo.get(an.getStorageNodeId());
            try {
                ArbNodeAdminAPI ana =
                    regUtils.getArbNodeAdmin(an.getResourceId());
                status = ana.ping();
            } catch (RemoteException re) {
                problems.add(new Problem(an.getResourceId(),
                                         sn.getHostname(),
                                         sn.getRegistryPort(),
                                         "Can't call ping for AN:" + re));
            } catch (NotBoundException e) {
                problems.add(new Problem(an.getResourceId(),
                                         sn.getHostname(),
                                         sn.getRegistryPort(),
                                         "No RMI service for AN: " + e));
            }
            callback.nodeCallback(an, status);
        }
    }

    private void forEachArbNode(ArbNodeCallback callback) {

        for (RepGroup rg : topo.getRepGroupMap().getAll()) {
            forEachArbNodeInShard(rg.getResourceId(), callback);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy