oracle.kv.impl.as.AggregationService Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of oracle-nosql-server Show documentation
NoSQL Database Server - supplies build and runtime support for the server (store) side of the Oracle NoSQL Database.
The newest version!
/*-
 * Copyright (C) 2011, 2018 Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle NoSQL
 * Database made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/nosqldb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle NoSQL Database for a copy of the license and
 * additional information.
 */

package oracle.kv.impl.as;

import static java.util.concurrent.TimeUnit.MILLISECONDS;
import static java.util.concurrent.TimeUnit.SECONDS;
import static oracle.kv.KVVersion.CURRENT_VERSION;

import java.io.IOException;
import java.io.PrintStream;
import java.rmi.NotBoundException;
import java.rmi.RemoteException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.TreeSet;
import java.util.concurrent.Callable;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
/*import java.util.concurrent.atomic.AtomicLong;*/
import java.util.logging.Level;
import java.util.logging.Logger;

import oracle.kv.KVStore;
import oracle.kv.KVStoreConfig;
import oracle.kv.KVStoreException;
import oracle.kv.KVStoreFactory;
import oracle.kv.StatementResult;
import oracle.kv.impl.admin.CommandJsonUtils;
import oracle.kv.impl.admin.CommandServiceAPI;
import oracle.kv.impl.api.KVStoreImpl;
import oracle.kv.impl.as.AggregationService.Status.Beacon;
import oracle.kv.impl.rep.admin.RepNodeAdminAPI;
import oracle.kv.impl.rep.admin.ResourceInfo;
import oracle.kv.impl.rep.admin.ResourceInfo.RateRecord;
import oracle.kv.impl.rep.admin.ResourceInfo.UsageRecord;
import oracle.kv.impl.security.login.LoginManager;
import oracle.kv.impl.topo.RepGroup;
import oracle.kv.impl.topo.RepNode;
import oracle.kv.impl.topo.Topology;
import oracle.kv.impl.util.CommandParser;
import oracle.kv.impl.util.HostPort;
import oracle.kv.impl.util.JsonUtils;
import oracle.kv.impl.util.RateLimitingLogger;
import oracle.kv.impl.util.ScheduleStart;
import oracle.kv.impl.util.TopologyLocator;
import oracle.kv.impl.util.client.ClientLoggerUtils;
import oracle.kv.impl.util.registry.RegistryUtils;
import oracle.kv.table.Row;
import oracle.kv.table.Table;
import oracle.kv.table.TableAPI;
import oracle.kv.table.TimeToLive;
import oracle.kv.util.Ping.ExitCode;
import oracle.kv.util.shell.ShellCommandResult;

import org.codehaus.jackson.map.ObjectWriter;
import org.codehaus.jackson.node.ObjectNode;

public class AggregationService {

    /*
     * Peak throughput history table definition.
     */
    public static final String PEAK_TABLE_NAME = "PeakThroughput";
    public static final int PEAK_TABLE_VERSION = 1;
    public static final String PEAK_TABLE_ID_FIELD_NAME = "id";
    public static final String PEAK_START_SECOND_FIELD_NAME = "startSecond";
    public static final String PEAK_READ_KB_FIELD_NAME = "peakReadKB";
    public static final String PEAK_WRITE_KB_FIELD_NAME = "peakWriteKB";

    private static final Logger logger =
        ClientLoggerUtils.getLogger(AggregationService.class, "as");
    
    private final RateLimitingLogger rateLimitLogger;

    private final KVStore kvStore;
    private final TableAPI tableAPI;

    private final TableSizeAggregator tableAggregator;
    private final ScheduledExecutorService executor;
    private final int throughputPollPeriodSec;
    private final int tableSizePollPeriodSec;
    private final int peakThroughputCollectionPeriodSec;
    private final TimeToLive peakThroughputTTL;

    private LoginManager loginManager;

    private volatile boolean stop = false;
    private volatile boolean started = false;
    private volatile Topology topology;

    private volatile Collection sizeUsageRecords = null;

    /*
     * Map of table ID to peak throughput records. The map is replaced during
     * at the start of each collection period.
     */
    private volatile Map peakRecords;

    /*
     * Starting second for the peak data set. This is initialized to
     * MAX_VALUE and set to the earliest time in found.
     */
    /* TODO - Because of the restriction to Java6 RateRecord cannot implement
     * LongUnaryOperator which would allow the use of AtomicLong for
     * peakStartSecond. So we need to synchronize setting and access.
     */
  /*private final AtomicLong peakStartSecond = new AtomicLong(Long.MAX_VALUE);*/
    private long peakStartSecond = Long.MAX_VALUE;

    /* Cached handle to the peak throughout table */
    private Table peakTable = null;

    /*
     * set by a caller if an instance of AS is created in-process.
     * it is used to allow clean stop/shutdown.
     */
    private Thread aggThread;

    /*
     * To save error messages and the worst health code AS ran into, until
     * it resets to a new Status.
     * It will be get and reset periodically by ASManager to report AS health
     * data.
     */
    private Status status;

    public AggregationService(String storeName,
                              List hostPorts,
                              int throughputPollPeriodSec,
                              int tableSizePollPeriodSec,
                              int peakThroughputCollectionPeriodSec,
                              int peakThroughputTTLDay,
                              int maxThreads)
            throws KVStoreException {
        if (throughputPollPeriodSec < 1) {
            throw new IllegalArgumentException("Throughput poll period" +
                                               " must be > 0");
        }
        if (tableSizePollPeriodSec < 1) {
            throw new IllegalArgumentException("Table size poll period" +
                                               " must be > 0");
        }
        if (peakThroughputCollectionPeriodSec < 1) {
            throw new IllegalArgumentException("Peak throughput collection" +
                                               " period must be > 0");
        }
        if (peakThroughputTTLDay < 1) {
            throw new IllegalArgumentException("Peak throughput TTL" +
                                               " must be > 0");
        }
        this.throughputPollPeriodSec = throughputPollPeriodSec;
        this.tableSizePollPeriodSec = tableSizePollPeriodSec;
        this.peakThroughputCollectionPeriodSec =
                                        peakThroughputCollectionPeriodSec;
        peakThroughputTTL = TimeToLive.ofDays(peakThroughputTTLDay);
        peakRecords = new ConcurrentHashMap<>();

        rateLimitLogger = new RateLimitingLogger<>(60 * 1000, 10, logger);
        
        logger.log(Level.INFO,
                   "Starting AggregationService {0} for {1}," +
                   " throughput poll period: {2} seconds," +
                   " table size poll period: {3} seconds," +
                   " peak throughput collection period: {4} seconds," +
                   " peak throughput TTL: {5} days",
                   new Object[]{CURRENT_VERSION.getNumericVersionString(),
                                storeName, throughputPollPeriodSec,
                                tableSizePollPeriodSec,
                                peakThroughputCollectionPeriodSec,
                                peakThroughputTTLDay});

        final KVStoreConfig kvConfig = new KVStoreConfig(storeName,
                                                         hostPorts.get(0));
        kvStore = KVStoreFactory.getStore(kvConfig);
        tableAPI = kvStore.getTableAPI();
        loginManager = KVStoreImpl.getLoginManager(kvStore);

        topology = findTopo(hostPorts, maxThreads);
        assert topology != null;
        logger.log(Level.INFO, "Initial topology seq# {0}",
                   topology.getSequenceNumber());

        tableAggregator = new TableSizeAggregator(tableAPI, logger);
        executor = Executors.newScheduledThreadPool(maxThreads);
        status = new Status();
    }

    /* Synchronously execute the polling loop. */
    public void startPolling() throws InterruptedException {
        if (started) {
            return;
        }
        started = true;
        start();
    }

    private void start()
        throws InterruptedException {

        try {
            /*
             * Schedule a task to collect size information. At each call to
             * getTablesSizes sizeUsageRecords with be set to the latest size
             * information. The usage records, if any, are sent to the
             * RNs when polling for throughput information.
             * 
             * Polling is delayed slightly so that it starts after the
             * key stats collection on the server is done (or at least
             * started). This assumes the server uses calculateDelay()
             * to control its scanning.
             */
            final long pollPeriodMillis =
                                      SECONDS.toMillis(tableSizePollPeriodSec);
            final long initialDelayMillis =
                    ScheduleStart.calculateDelay(pollPeriodMillis,
                                                 System.currentTimeMillis()) +
                    pollPeriodMillis/4;
            final long initialDelaySeconds =
                MILLISECONDS.toSeconds(initialDelayMillis);
            
            executor.scheduleAtFixedRate(new Runnable() {
                @Override
                public void run() {
                    try {
                        sizeUsageRecords = tableAggregator.getTableSizes(
                            tableSizePollPeriodSec * 1000,
                            AggregationService.this);
                    } catch (Exception e) {
                        logger.log(Level.SEVERE,
                                  "Unexpected exception collecting table sizes",
                                   e);
                        stop = true;
                    }
                }
            }, initialDelaySeconds, tableSizePollPeriodSec, SECONDS);

            /*
             * Schedule a task to export peak throughput information.
             */
            executor.scheduleAtFixedRate(new Runnable() {
                @Override
                public void run() {
                    try {
                        exportPeakThroughput();
                    } catch (Exception e) {
                        /*
                         * Note that we do not treat failures exporting peak
                         * throughout as fatal. We log the failure and export
                         * will be retried at the end of the next collection
                         * period. If the failures continue, hopefully the
                         * health monitor will take action.
                         */
                        logger.log(Level.WARNING,
                                   "Unexpected exception exporting peak" +
                                   " throughput", e);
                    }
                }
            },
            /*
             * Delay the initial run so that there is time to collect
             * peak data.
             */
            peakThroughputCollectionPeriodSec,  /* initialDelay */
            peakThroughputCollectionPeriodSec,  /* period */
            SECONDS);

            /*
             * Initialize the last call to be a poll period ago. This should
             * get the usual history to start things off.
             */
            final long periodMillis = SECONDS.toMillis(throughputPollPeriodSec);
            long lastCallMillis = System.currentTimeMillis() - periodMillis;

            /*
             * Throughput polling loop. Note that table size limits are sent
             * to the RNs here, not in the thread collecting size info.
             */
            while (!stop) {
                final long startMillis = System.currentTimeMillis();
                pollThroughput(lastCallMillis);
                lastCallMillis = startMillis;

                /*
                 * If the poll took less than the period, sleep for remaining
                 * time.
                 */
                final long finishMillis = System.currentTimeMillis();
                final long durationMillis = finishMillis - startMillis;
                final long healthyMillis = periodMillis / 2;
                final long remaining = periodMillis - durationMillis;
                if (remaining < 0) {
                    recordHealth(Beacon.RED,
                                 "Throughput collection did not complete " +
                                 "within polling period");
                } else if (durationMillis > healthyMillis) {
                    recordHealth(Beacon.YELLOW,
                                 "Throughput collection did not complete " +
                                 "within 50% polling period");
                }
                if (!stop && (remaining > 0)) {
                    Thread.sleep(remaining);
                }
            }
        } finally {
            executor.shutdownNow();
        }
        logger.info("Shutdown");
    }

    /**
     * Polls the RNs for throughput information. During the poll the size
     * records, if any, are sent to each node. Once the throughput information
     * is collected, the rates are aggregated and if any limits are exceeded
     * the limit records are sent to all of the RNs.
     */
    private void pollThroughput(long lastCallMillis)
            throws InterruptedException {

        /*
         * Create a local reference to tableSizeRecords since it may change.
         * The size limit records only need to be sent once. So if there
         * are no errors calling the RNs, the records can be cleared. It will
         * be OK to modify the collection using this reference (see below)
         * since the contents are not changed elsewhere.
         */
        final Collection sizeRecords = sizeUsageRecords;

        /*
         * Call all nodes to get resource info since lastCallMills. Send
         * any table size limit records.
         */
        final List> results =
                                    callAllNodes(lastCallMillis, sizeRecords);
        if (results == null) {
            return;
        }

        final TreeSet records = new TreeSet<>();

        /*  Keep track of errors so we can clear the size limit records. */
        int errors = 0;

        /*
         * Collect all of the returned rate records. They are sorted by second.
         */
        for (Future f : results) {
            if (!f.isDone() || f.isCancelled()) {
                final String result = f.isCancelled() ? "cancled" :
                                                        "incomplete";
                rateLimitLogger.log(result, Level.WARNING,
                                    "Task did not complete: " + result);
                errors++;
            }
            try {
                final ResourceInfo info = f.get();
                if (info == null) {
                    errors++;
                    continue;
                }
                records.addAll(info.getRateRecords());
            } catch (Exception ex) {
                errors++;
                rateLimitLogger.log(ex.getClass().getName(),
                                    Level.WARNING,
                                    "Task failed " + ex.getLocalizedMessage());
            }
        }
        logger.log(Level.FINE, "Collected {0} records, {1} errors",
                   new Object[]{records.size(), errors});
        if (errors > 0) {
            final String errWord = (errors > 1 ? " errors" : " error");
            recordHealth(Beacon.RED, "Polling throughtput had " +
                                     errors + errWord);
        }

        /*
         * Iterate through the rate records in second order. For each
         * second accumulate the rates in the accumulators map. Then
         * see if any were over for that second. Any overages will generate
         * a record in the overageRecords map.
         */
        final Map accumulators = new HashMap<>();
        final Map throughputUsageMap = new HashMap<>();

        long second = 0L;
        for (RateRecord rr : records) {
            /* Set second to earliest seen. Uses LongUnaryOperator to update */
            /*peakStartSecond.getAndUpdate(rr);*/
            updatePeakStartSecond(rr.getSecond());

            if (second != rr.getSecond()) {
                /* new second, see if any of the accumulated records are over */
                getUsageRecords(accumulators, throughputUsageMap);
                second = rr.getSecond();
                updatePeakRecords(accumulators);
                accumulators.clear();
            }
            final long tableId = rr.getTableId();
            final ThroughputAccumulator accumulator = accumulators.get(tableId);
            if (accumulator == null) {
                accumulators.put(tableId, new ThroughputAccumulator(rr));
            } else {
                accumulator.add(rr);
            }
        }
        updatePeakRecords(accumulators);
        getUsageRecords(accumulators, throughputUsageMap);

        /*
         * If there were throughput overages, send them. Passing a 0 for
         * lastCall indicates that we do not want throughput data to be
         * sent back.
         */
        if (!throughputUsageMap.isEmpty()) {
            callAllNodes(0, new ArrayList<>(throughputUsageMap.values()));
        }

        /*
         * If there were no errors, we can safely remove size limit records.
         */
        if ((errors == 0) && (sizeRecords != null)) {
            sizeRecords.clear();
        }
    }

    private synchronized void updatePeakStartSecond(long second) {
        if (second < peakStartSecond) {
            peakStartSecond = second;
        }
    }

    private synchronized long getAndResetPeakStartSecond() {
        final long ret = peakStartSecond;
        peakStartSecond = Long.MAX_VALUE;
        return ret;
    }

    /**
     * Creates usage records from the specified accumulators.
     */
    private void getUsageRecords(Map accumulators,
                                 Map throughputUsageMap) {
        for (ThroughputAccumulator ta : accumulators.values()) {
            if (ta.isOver()) {
                /*
                 * There may already be a record for this table. If so just
                 * skip as there is no need for more than one.
                 */
                if (!throughputUsageMap.containsKey(ta.tableId)) {
                    throughputUsageMap.put(ta.tableId,
                                       new UsageRecord(ta.tableId,
                                                       ta.readKB, ta.writeKB));
                }
            }
        }
    }

    /**
     * Updates the peak record from the specified throughput accumulators.
     * Peak records are created and added to the peakRecords map as needed.
     */
    private void
              updatePeakRecords(Map accumulators) {
        for (ThroughputAccumulator ta : accumulators.values()) {
             /*
             * Track peak throughput which is independent of the second. Note
             * that peakRecords may be refreshed between the get and set. That
             * is OK because pr will also be missing from the new (empty) map.
             */
            final PeakRecord pr = peakRecords.get(ta.tableId);
            if (pr == null) {
                peakRecords.put(ta.tableId,
                                new PeakRecord(ta.readKB, ta.writeKB));
            } else {
                pr.update(ta.readKB, ta.writeKB);
            }
        }
    }

    /**
     * Calls getResourceInfo on all RNs in the store, returning the list of
     * futures with the results. The lastCall and usageRecords are passed to
     * the getResourceInfo method. If usageRecords is empty, null is sent.
     */
    private List>
                    callAllNodes(long lastCall,
                                 Collection usageRecords)
                            throws InterruptedException {
        /* Send null if there are no records */
        final Collection usageRecord =
              ((usageRecords != null) && usageRecords.isEmpty()) ? null :
                                                                   usageRecords;

        final List> tasks = new ArrayList<>();
        for (RepGroup rg : topology.getRepGroupMap().getAll()) {
            /* Generate tasks for each group */
            final RepGroup group = topology.get(rg.getResourceId());
            if (group == null) {
                logger.log(Level.INFO, "{0} missing from topo seq# {1}",
                           new Object[]{rg, topology.getSequenceNumber()});
                continue;
            }

            /* LoginManager not needed ??? */
            final RegistryUtils regUtils = new RegistryUtils(topology,
                                                             loginManager);
            for (final RepNode rn : group.getRepNodes()) {
                tasks.add(new Callable() {
                    @Override
                    public ResourceInfo call() throws Exception {
                        try {
                            final RepNodeAdminAPI rna =
                                  regUtils.getRepNodeAdmin(rn.getResourceId());

                            // TODO - do something with this?
                            //rna.getInfo().getSoftwareVersion();
                            final ResourceInfo info =
                                rna.exchangeResourceInfo(lastCall, usageRecord);
                            // TODO - info can be null????
                            checkTopology(info, rna);
                            return info;
                        } catch (Exception e) {
                            logger.log(Level.WARNING,
                                       "Unexpected exception calling " +
                                       rn.getResourceId(), e);
                        }
                        /* Returning null will be recorded as an error */
                        return null;
                    }
                });
            }
        }
        return tasks.isEmpty() ? Collections.emptyList() :
                                 executor.invokeAll(tasks,
                                                    throughputPollPeriodSec,
                                                    TimeUnit.SECONDS);
    }

    /**
     * Writes a row to the peak throughput table for each non-empty
     * PeakRecord. The peakRecords map is recreated, and the peakSecond is
     * reset.
     */
    private void exportPeakThroughput() throws Exception {
        //final int startSecond = (int)peakStartSecond.getAndSet(Long.MAX_VALUE);
        final int startSecond = (int)getAndResetPeakStartSecond();
        final Map prMap = peakRecords;
        peakRecords = new ConcurrentHashMap<>();

        final Table table = getPeakTable();
        assert table != null;

        for (Entry e : prMap.entrySet()) {
            final PeakRecord pr = e.getValue();

            if (pr.hasPeak()) {
                final long tableId = e.getKey();
                logger.log(Level.FINE, "Peak for {0} starting at {1} {2}",
                           new Object[]{tableId, startSecond, pr});
                final Row row = table.createRow();
                row.put(PEAK_TABLE_ID_FIELD_NAME, tableId);
                row.put(PEAK_START_SECOND_FIELD_NAME, startSecond);
                row.put(PEAK_READ_KB_FIELD_NAME, pr.peakReadKB);
                row.put(PEAK_WRITE_KB_FIELD_NAME, pr.peakWriteKB);
                /* Set the TTL in case it is different from the table defult */
                row.setTTL(peakThroughputTTL);
                tableAPI.put(row, null, null);
            }
        }
    }

    /**
     * Gets the peak throughput table handle. The table is created if it does
     * not exist. The table handle is cached.
     */
    private Table getPeakTable() throws Exception {
        if (peakTable != null) {
            return peakTable;
        }

        peakTable = tableAPI.getTable(PEAK_TABLE_NAME);
        if (peakTable != null) {
            final int tableVersion =
                        Integer.parseInt(peakTable.getDescription());
            logger.log(Level.FINE, "Found " + PEAK_TABLE_NAME +
                       " version {0}", tableVersion);
            if (tableVersion > PEAK_TABLE_VERSION) {
                throw new Exception(PEAK_TABLE_NAME + " is at version " +
                                    tableVersion + " please upgrade the " +
                                    "aggregration service");
            }
            /*
             * TODO - Currently changing the default TTL on a table does not
             * affect existing records. If this changes, it would be worth
             * checking if the input TTL is different than the table's default
             * and if so change the table default.
             */
            return peakTable;
        }

        logger.info("Creating peak table");
        final String createDML =
                "CREATE TABLE " + PEAK_TABLE_NAME + " " +
                "COMMENT \"" + PEAK_TABLE_VERSION + "\" (" +
                        PEAK_TABLE_ID_FIELD_NAME + " LONG, " +
                        PEAK_START_SECOND_FIELD_NAME + " INTEGER, " +
                        PEAK_READ_KB_FIELD_NAME + " INTEGER, " +
                        PEAK_WRITE_KB_FIELD_NAME + " INTEGER, " +
                        "PRIMARY KEY(SHARD(" + PEAK_TABLE_ID_FIELD_NAME + "), "+
                                     PEAK_START_SECOND_FIELD_NAME + ")) " +
                "USING TTL " + peakThroughputTTL.getValue() + " DAYS";

        final StatementResult result = kvStore.executeSync(createDML);
        if (!result.isSuccessful()) {
            throw new Exception("Failed to create " +
                                PEAK_TABLE_NAME + ": " + result);
        }

        peakTable = tableAPI.getTable(PEAK_TABLE_NAME);
        if (peakTable == null) {
            throw new Exception("Unable to get " + PEAK_TABLE_NAME);
        }
        return peakTable;
    }

    /*
     * Object to record per-table peak throughput information.
     */
    private static class PeakRecord {
        private int peakReadKB;
        private int peakWriteKB;

        private PeakRecord(int readKB, int writeKB) {
            peakReadKB = readKB;
            peakWriteKB = writeKB;
        }

        /*
         * Updates the peak read and write peak data if the input values are
         * greater.
         */
        private void update(int readKB, int writeKB) {
            if (readKB > peakReadKB) {
                peakReadKB = readKB;
            }
            if (writeKB > peakWriteKB) {
                peakWriteKB = writeKB;
            }
        }

        /*
         * Returns true if the record has non-zero peak read or write
         * throughput data.
         */
        private boolean hasPeak() {
            return peakReadKB > 0 || peakWriteKB > 0;
        }

        @Override
        public String toString() {
            return "PeakRecord[" + peakReadKB + ", " + peakWriteKB + "]";
        }
    }

    private static class ThroughputAccumulator {
        private final long tableId;
        private final int readLimitKB;
        private final int writeLimitKB;

        private int readKB;
        private int writeKB;

        ThroughputAccumulator(RateRecord rr) {
            tableId = rr.getTableId();
            readLimitKB = rr.getReadLimitKB();
            writeLimitKB = rr.getWriteLimitKB();
            add(rr);
        }

        private void add(RateRecord rr) {
            assert rr.getTableId() == tableId;
            readKB += rr.getReadKB();
            writeKB += rr.getWriteKB();
        }

        public boolean isOver() {
            return readKB > readLimitKB ||
                   writeKB > writeLimitKB;
        }

        @Override
        public String toString() {
            return "ThroughputAccumulator[" + tableId + ", " +
                   readKB + ", " + readLimitKB + ", " +
                   writeKB + ", " + writeLimitKB + "]";
        }
    }

    private Topology findTopo(List hostPorts, int maxThreads)
        throws KVStoreException {

        if (hostPorts == null) {
            throw new IllegalArgumentException("null hosts ports");
        }

        String[] hostPortsArray = new String[hostPorts.size()];
        hostPortsArray = hostPorts.toArray(hostPortsArray);

        /* Search available SNs for a topology */
        Topology newtopo = null;

        /*
         * The search for a new topo is confined to SNs that host RNs. If
         * Admins live on SNs which don't host RNs, we'll be delayed in
         * seeing a new topo; we'd have to wait for that to be propagated to
         * the RNs. That's ok; by design, the system will propagate topos to
         * RNs in a timely fashion, and it's not worth adding complications
         * for the unusual case of an Admin-only SN.
         */
        try {
            newtopo = TopologyLocator.get(hostPortsArray, 0,
                                          loginManager, null);
        } catch (KVStoreException topoLocEx) {
            /* had a problem getting a topology - try using the Admins */
            newtopo = searchAdminsForTopo(hostPortsArray, maxThreads);

            /* Still can't find a topology */
            if (newtopo == null) {
                throw topoLocEx;
            }
        } catch (Exception e) {
            logger.log(Level.WARNING, "Exception locating topology: {0}", e);
        }
        return newtopo;
    }

    /**
     * Given a set of SNs, find an AdminService to find a topology
     */
    private Topology searchAdminsForTopo(String[] hostPortStrings,
                                         int maxThreads) {
        final HostPort[] targetHPs = HostPort.parse(hostPortStrings);

        /* Look for admins to get topology */
        final Collection> tasks = new ArrayList<>();
        for (final HostPort hp : targetHPs) {
            tasks.add(new Callable() {
                @Override
                public Topology call() throws Exception {
                    try {
                        final CommandServiceAPI admin =
                            getAdmin(hp.hostname(), hp.port());
                        return admin.getTopology();
                    } catch (RemoteException re) {
                        logger.log(Level.SEVERE,
                                   "Exception attempting to contact Admin {0}",
                                   re);
                        /*
                         * Throw out all Exceptions to tell this task failed to
                         * get topology.
                         */
                        throw re;
                    }
                }
            });
        }

        final ExecutorService es =
                Executors.newFixedThreadPool(maxThreads);
        try {
            /*
             * Returns the topology result got by the first completed task.
             */
            return es.invokeAny(tasks);
        } catch (Exception e) {
            /*
             * If it throws Exception, that means all task failed.
             * Can't find any Admins, there should be some in the list.
             */
            logger.severe("Searching for topology, can't contact any " +
                          "Admin services in the store");
            return null;
        } finally {
            es.shutdownNow();
        }
    }

    /**
     * Get the CommandService on this particular SN.
     */
    private CommandServiceAPI getAdmin(String snHostname, int snRegistryPort)
        throws NotBoundException, RemoteException {
        /*
         * Use login manager first, if it is available.
         */
        if (loginManager != null) {
            return RegistryUtils.getAdmin(snHostname, snRegistryPort,
                                          loginManager);
        }

        /*
         * Non-secure case.
         */
        return RegistryUtils.getAdmin(snHostname, snRegistryPort, null);
    }

    /**
     * Checks to see if the topology needs to be updated. The info object
     * contains the topo sequence number at that node. Check it against
     * the topo we have. If it is newer get the topo from the RN.
     */
    private void checkTopology(ResourceInfo info, RepNodeAdminAPI rna)
        throws RemoteException {
        if (info == null) {
            return;
        }
        if (topology.getSequenceNumber() >= info.getTopoSeqNum()) {
            return;
        }
        logger.log(Level.FINE, "Need to update topo, {0} >= {1}",
                   new Object[]{topology.getSequenceNumber(),
                                info.getTopoSeqNum()});
        final Topology newTopo = rna.getTopology();
        synchronized (this) {
            if (topology.getSequenceNumber() < newTopo.getSequenceNumber()) {
                logger.log(Level.FINE, "Updating to topopogy seq# {0}",
                           newTopo.getSequenceNumber());
                topology = newTopo;
            }
        }
    }

    /**
     * Get AS status that have a list of error messages, and the worst health
     * code AS ran into. And then reset it to GREEN status.
     */
    public synchronized Status getAndResetStatus() {
        Status oldStatus = status;
        status = new Status();
        return oldStatus;
    }

    synchronized void recordHealth(Beacon newBeacon, String msg) {
        status.recordHealth(newBeacon, msg);
    }

    /**
     * It is the aggregation of AS status. It records error messages and
     * the worst health code AS has ever encountered, until AS reset to a new
     * Status. That means once it is set to Red, it will always be Red. It
     * can't change back to YELLOW or GREEN. Once it is set to YELLOW, it can't
     * change back to GREEN.
     */
    public static final class Status {
        private Beacon beacon;
        private static final int ERROR_SIZE_LIMIT = 100;
        private final LinkedList errors;

        Status() {
            beacon = Beacon.GREEN;
            errors = new LinkedList<>();
        }

        void recordHealth(Beacon newBeacon, String msg) {
            if (newBeacon.ordinal() > beacon.ordinal()) {
                beacon = newBeacon;
            }
            final String error = System.currentTimeMillis() + " " +
                newBeacon.name() + " " + msg;
            while (errors.size() >= ERROR_SIZE_LIMIT) {
                errors.poll();
            }
            errors.offer(error);
        }

        /**
         * Get the Beacon that represent the worst status AS ran into.
         */
        public Beacon getBeacon() {
            return beacon;
        }

        /**
         * Return an ordered list of error messages that AS has encountered.
         */
        public List getErrors() {
            return errors;
        }

        /**
         * To represent AS health level.
         */
        public static enum Beacon {
            GREEN, YELLOW, RED
        }
    }

    public static final String COMMAND_NAME = "aggregationservice";
    public static final String COMMAND_DESC =
                                    "monitors resource usage of a store";
    private static final String HELPER_HOSTS_FLAG = "-helper-hosts";
    private static final String THROUGHPUT_POLL_PERIOD_FLAG =
                                                    "-throughput-poll-period";
    private static final int THROUGHPUT_POLL_PERIOD_DEFAULT_SEC = 5;
    private static final String TABLE_SIZE_POLL_PERIOD_FLAG =
                                                    "-table-size-poll-period";
    private static final int TABLE_SIZE_POLL_PERIOD_DEFAULT_SEC = 3600;
    private static final String PEAK_THROUGHPUT_COLLECTION_PERIOD_FLAG =
                                           "-peak-throughput-collection-period";
    private static final int PEAK_THROUGHPUT_COLLECTION_PERIOD_DEFAULT_SEC = 60;
    private static final String PEAK_THROUGHPUT_TTL_FLAG =
                                                        "-peak-throughput-ttl";
    private static final int PEAK_THROUGHPUT_DEFAULT_TTL_DAY = 14;

    private static final String MAX_THREADS_FLAG = "-max-threads";
    private static final int MAX_THREADS_DEFAULT = 10;
    public static final String COMMAND_ARGS =
        CommandParser.getHostUsage() + " " +
        CommandParser.getPortUsage() + " or\n\t" +
        HELPER_HOSTS_FLAG + " \n\t" +
        THROUGHPUT_POLL_PERIOD_FLAG + " \n\t" +
        TABLE_SIZE_POLL_PERIOD_FLAG + " \n\t" +
        PEAK_THROUGHPUT_COLLECTION_PERIOD_FLAG + " \n\t" +
        PEAK_THROUGHPUT_TTL_FLAG + " \n\t" +
        MAX_THREADS_FLAG + " \n\t" +
        CommandParser.optional(CommandParser.JSON_FLAG);

    private static class AggregationServiceParser extends CommandParser {
        private String helperHosts = null;
        private int throughputPollPeriodSec =
                                             THROUGHPUT_POLL_PERIOD_DEFAULT_SEC;
        private int tableSizePollPeriodSec = TABLE_SIZE_POLL_PERIOD_DEFAULT_SEC;
        private int peakThroughputCollectionPeriodSec =
                                  PEAK_THROUGHPUT_COLLECTION_PERIOD_DEFAULT_SEC;
        private int peakThroughputTTLDay = PEAK_THROUGHPUT_DEFAULT_TTL_DAY;
        private int maxThreads = MAX_THREADS_DEFAULT;

        AggregationServiceParser(String[] args1) {
            super(args1);
        }

        @Override
        public void usage(String errorMsg) {
            /*
             * Note that you can't really test illegal arguments in a
             * threaded unit test -- the call to exit(..) when
             * dontExit is false doesn't kill the process, and the error
             * message gets lost. Still worth using dontExit so the
             * unit test process doesn't die, but unit testing of bad
             * arg handling has to happen with a process.
             */
            if (!getJson()) {
                if (errorMsg != null) {
                    System.err.println(errorMsg);
                }
                System.err.println(KVSTORE_USAGE_PREFIX + COMMAND_NAME +
                                   "\n\t" + COMMAND_ARGS);
            }
            exit(errorMsg, ExitCode.EXIT_USAGE, System.err,
                 getJsonVersion());
        }

        @Override
        protected boolean checkArg(String arg) {
            if (arg.equals(HELPER_HOSTS_FLAG)) {
                helperHosts = nextArg(arg);
                return true;
            }

            if (arg.equals(THROUGHPUT_POLL_PERIOD_FLAG)) {
                throughputPollPeriodSec = nextIntArg(arg);
                return true;
            }

            if (arg.equals(TABLE_SIZE_POLL_PERIOD_FLAG)) {
                tableSizePollPeriodSec = nextIntArg(arg);
                return true;
            }

            if (arg.equals(PEAK_THROUGHPUT_COLLECTION_PERIOD_FLAG)) {
                peakThroughputCollectionPeriodSec = nextIntArg(arg);
                return true;
            }

            if (arg.equals(PEAK_THROUGHPUT_TTL_FLAG)) {
                peakThroughputTTLDay = nextIntArg(arg);
                return true;
            }

            if (arg.equals(MAX_THREADS_FLAG)) {
                maxThreads = nextIntArg(arg);
                return true;
            }

            return false;
        }

        @Override
        protected void verifyArgs() {
            /* Check that one or more helper hosts are supplied */
            if (helperHosts != null &&
                (getHostname() != null || (getRegistryPort() != 0))) {
                usage("Only one of either " +  HELPER_HOSTS_FLAG + " or " +
                      HOST_FLAG + " plus " + PORT_FLAG +
                      " may be specified");
            }

            if (helperHosts == null) {
                if (getHostname() == null) {
                    missingArg(HOST_FLAG);
                }
                if (getRegistryPort() == 0) {
                    missingArg(PORT_FLAG);
                }
            } else {
                /*
                 * Helper hosts have been supplied - validate the
                 * argument.
                 */
                try {
                    validateHelperHosts(helperHosts);
                } catch (IllegalArgumentException e) {
                    usage("Illegal value for " + HELPER_HOSTS_FLAG );
                }
            }
        }

        /**
         * Validate that each helper host entry in the form
         * :
         */
        private void validateHelperHosts(String helperHostVal)
            throws IllegalArgumentException {

            if (helperHostVal == null) {
                throw new IllegalArgumentException
                    ("helper hosts cannot be null");
            }
            HostPort.parse(helperHostVal.split(","));
        }

        /**
         * Return a list of hostport strings. Assumes that an argument
         * to helperHosts has already been validated.
         */
        List createHostPortList() {
            final String[] hosts;
            if (helperHosts != null) {
                hosts = helperHosts.split(",");
            } else {
                hosts = new String[1];
                hosts[0] = getHostname() + ":" + getRegistryPort();
            }
            final HostPort[] hps = HostPort.parse(hosts);
            final List hpList = new ArrayList<>();
            for (HostPort hp : hps) {
                hpList.add(hp.toString());
            }
            return hpList;
        }
    }

    public static void main(String[] args) {
        final AggregationServiceParser asp = new AggregationServiceParser(args);
        try {
            asp.parseArgs();
        } catch (Exception e) {
            exit("Argument error: " + e.getMessage(),
                 ExitCode.EXIT_USAGE,
                 System.err, CommandParser.getJsonVersion(args));
            return;
        }

        try {
            new AggregationService(asp.getStoreName(),
                                   asp.createHostPortList(),
                                   asp.throughputPollPeriodSec,
                                   asp.tableSizePollPeriodSec,
                                   asp.peakThroughputCollectionPeriodSec,
                                   asp.peakThroughputTTLDay,
                                   asp.maxThreads).start();
        } catch (Exception e) {
            exit("Error: " + e.getMessage(),
                 ExitCode.EXIT_UNEXPECTED,
                 System.err, asp.getJsonVersion());
        }
        exit("Service exit", ExitCode.EXIT_OK, System.out,
             asp.getJsonVersion());
    }

    /*
     * The next few methods enable starting and stopping an in-process instance
     * of AggregationService. This can be used by test code to test generation
     * of throttling exceptions, for example. The in-process instance creates a
     * thread to provide context for the polling loop in start().
     *
     * The mechanism is:
     *   AggregationService as = createAggregationService(...);
     *    // do tests
     *   as.stop(); // shutdown
     */

    /**
     * Stops an in-process instance of this service. It sets the state to "stop"
     * which tells the polling loop to end and then waits for the thread to
     * exit.
     */
    public void stop() {
        stop = true;
        if (aggThread != null) {
            try {
                aggThread.join(10*1000);
            } catch (InterruptedException ie) {
                /* ignore */
            }
            aggThread = null;
        }
    }

    /**
     * Used to set the thread being used for the polling loop for an in-process
     * AS.
     */
    private void setThread(Thread aggThread) {
        this.aggThread = aggThread;
    }

    public static AggregationService createAggregationService(
        String storeName,
        String[] hostPorts,
        int throughputPollPeriodSec,
        int tableSizePollPeriodSec,
        int peakThroughputCollectionPeriodSec,
        int peakThroughputTTLDay,
        int maxThreads) throws Exception {

        final AggregationService as =
            new AggregationService(storeName,
                                   Arrays.asList(hostPorts),
                                   throughputPollPeriodSec,
                                   tableSizePollPeriodSec,
                                   peakThroughputCollectionPeriodSec,
                                   peakThroughputTTLDay,
                                   maxThreads);

        /*
         * This thread provides context for the polling loop used by start()
         */
        final Thread aggThread = new Thread() {
                @Override
                public void run() {
                    try {
                        as.start();
                    } catch (InterruptedException ie) {
                        logger.log(Level.SEVERE,
                                   "AggregationService failed to start: {0}",
                                   ie);
                    }
                }
            };

        aggThread.start();

        /* set the thread in the instance to allow clean stop */
        as.setThread(aggThread);
        return as;
    }

    /**
     * Exit the process with the appropriate exit code, generating the
     * appropriate message.
     */
    private static void exit(String msg,
                             ExitCode exitCode,
                             PrintStream ps,
                             int jsonVersion) {
        if ((msg != null) && (ps != null)) {
            if (jsonVersion == CommandParser.JSON_V2) {
                displayExitJson(msg, exitCode, ps);
            } else if (jsonVersion == CommandParser.JSON_V1) {
                displayExitJsonV1(msg, exitCode, ps);
            } else {
                ps.println(msg);
            }
        }
        System.exit(exitCode.value());
    }

    private static final String EXIT_CODE_FIELD_V1 = "exit_code";

    private static final String EXIT_CODE_FIELD = "exitCode";

    private static void displayExitJsonV1(String msg,
                                          ExitCode exitCode,
                                          PrintStream ps) {
        final ObjectNode on = JsonUtils.createObjectNode();
        on.put(CommandJsonUtils.FIELD_OPERATION, "aggregationservice");
        on.put(CommandJsonUtils.FIELD_RETURN_CODE,
               exitCode.getErrorCode().getValue());
        final String description =
                        (msg == null) ? exitCode.getDescription() :
                                        exitCode.getDescription() + " - " + msg;
        on.put(CommandJsonUtils.FIELD_DESCRIPTION, description);
        on.put(EXIT_CODE_FIELD_V1, exitCode.value());

        /* print the json node. */
        final ObjectWriter writer = JsonUtils.createWriter(true /* pretty */);
        try {
            ps.println(writer.writeValueAsString(on));
        } catch (IOException e) {
            ps.println(e);
        }
    }

    private static void displayExitJson(String msg,
                                        ExitCode exitCode,
                                        PrintStream ps) {
        final ShellCommandResult scr =
            ShellCommandResult.getDefault("aggregationservice");
        scr.setReturnCode(exitCode.getErrorCode().getValue());
        final String description =
            (msg == null) ? exitCode.getDescription() :
                            exitCode.getDescription() + " - " + msg;
        scr.setDescription(description);
        final ObjectNode on = JsonUtils.createObjectNode();
        on.put(EXIT_CODE_FIELD, exitCode.value());
        scr.setReturnValue(on);

        try {
            ps.println(scr.convertToJson());
        } catch (IOException e) {
            ps.println(e);
        }
    }
}