com.persistit.AlertMonitor Maven / Gradle / Ivy
Show all versions of akiban-persistit Show documentation
/**
* Copyright © 2012 Akiban Technologies, Inc. All rights reserved.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Public License v1.0 which
* accompanies this distribution, and is available at
* http://www.eclipse.org/legal/epl-v10.html
*
* This program may also be available under different license terms.
* For more information, see www.akiban.com or contact [email protected].
*
* Contributors:
* Akiban Technologies, Inc.
*/
package com.persistit;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicLong;
import java.util.regex.Pattern;
import javax.management.MBeanNotificationInfo;
import javax.management.Notification;
import javax.management.NotificationBroadcasterSupport;
import javax.management.ObjectName;
import com.persistit.logging.LogBase;
import com.persistit.logging.PersistitLogMessage.LogItem;
import com.persistit.mxbeans.AlertMonitorMXBean;
import com.persistit.util.Util;
/**
*
* Manage the process of accumulating and logging abnormal events such as
* IOExceptions and measurements outside of expected thresholds. This class
* notifies agents by (a) writing log messages and (b) sending JMX
* {@link Notification}s.
*
*
* This mechanism is designed to organize events generated by background
* threads, sometimes very frequently. For example, if a disk fails, the
* JOURNAL_FLUSHER thread may receive many IOExceptions per second. These are
* treated as Events in this class, and log messages are written periodically
* but far less frequently. But in addition, this class is also capable of
* sending a Notification
to a management client which may be
* useful in making the problem known in a timely fashion
*
*
* Events are collated by category names. For each category value there
* is a separate {@link History} of events pertaining to that category. By
* separately aggregating events of different categories this class avoids
* discarding important, but older events belonging to one category when
* receiving new events in a different category. For example, IOExceptions
* generated frequently by the JOURNAL_FLUSHER do not overwrite previous events
* indicating that the JOURNAL_COPIER has fallen behind.
*
*
* @author peter
*
*/
public final class AlertMonitor extends NotificationBroadcasterSupport implements AlertMonitorMXBean {
private final static int DEFAULT_HISTORY_LENGTH = 10;
private final static int MINIMUM_HISTORY_LENGTH = 1;
private final static int MAXIMUM_HISTORY_LENGTH = 1000;
private final static long DEFAULT_WARN_INTERVAL = 600000;
private final static long MINIMUM_WARN_INTERVAL = 1000;
private final static long MAXIMUM_WARN_INTERVAL = 86400000;
private final static long DEFAULT_ERROR_INTERVAL = 15000;
private final static long MINIMUM_ERROR_INTERVAL = 1000;
private final static long MAXIMUM_ERROR_INTERVAL = 86400000;
/**
* Severity of an event
*/
public enum AlertLevel {
/**
* Normal state
*/
NORMAL,
/**
* Warning - system is running but could be experiencing trouble that
* could lead to an error. Example: too many journal files, disk is
* filling up, pruning is falling behind etc.
*/
WARN,
/**
* Error - system is generating errors and failing. Example: disk is
* full.
*/
ERROR,
}
protected final static String EVENT_FORMAT = "event %,5d: %s";
protected final static String AGGREGATION_FORMAT = "Minimum=%,d Maximum=%,d Total=%,d";
protected final static String EXTRA_FORMAT = "Extra=%s";
/**
* Aggregation of {@link Event}s in a category. The history retains the
* first event, the most recent N events where N is determined by the
* {@link AlertMonitor#getHistoryLength()} attribute, the count of events
* and other fields used to determine how frequently new notifications are
* issued.
*
* @author peter
*/
public class History {
private AlertLevel _level = AlertLevel.NORMAL;
private final List _eventList = new ArrayList();
private volatile long _firstEventTime = Long.MAX_VALUE;
private volatile long _lastWarnLogTime = Long.MIN_VALUE;
private volatile long _lastErrorLogTime = Long.MIN_VALUE;
private volatile int _reportedCount;
private volatile Event _firstEvent;
private volatile int _count;
/**
* @return a formated report showing the most recent {@link Event}
*/
@Override
public String toString() {
final StringBuilder sb = new StringBuilder();
synchronized (AlertMonitor.this) {
final Event event = getLastEvent();
if (_count > 0) {
sb.append(String.format(EVENT_FORMAT, _count, event == null ? "missing" : format(event)));
}
}
return sb.toString();
}
/**
* @return a formated report showing all of the retained {@link Event}s
*/
public String getDetailedHistory() {
final StringBuilder sb = new StringBuilder();
synchronized (AlertMonitor.this) {
final int size = _eventList.size();
if (_count > 0) {
sb.append(String.format(EVENT_FORMAT, 1, format(_firstEvent)));
for (int index = _count > size ? 0 : 1; index < size; index++) {
if (sb.length() > 0) {
sb.append(Util.NEW_LINE);
}
sb.append(String.format(EVENT_FORMAT, _count - size + index + 1, format(_eventList.get(index))));
}
}
}
return sb.toString();
}
/**
* @return the current alert level of this monitor
*/
public AlertLevel getLevel() {
return _level;
}
/**
* @return time of the first event
*/
public long getFirstEventTime() {
return _firstEventTime;
}
/**
* @return time of the last event
*/
public long getLastEventTime() {
synchronized (AlertMonitor.this) {
if (_eventList.size() > 0) {
return _eventList.get(_eventList.size() - 1)._time;
} else {
return Long.MIN_VALUE;
}
}
}
/**
* @return interval between first and last recorded event, in seconds
*/
public long getDuration() {
final long latest = getLastEventTime();
if (latest == Long.MIN_VALUE) {
return 0;
}
return (latest - _firstEventTime) / 1000;
}
/**
* @return the recent history
*/
public List getEventList() {
synchronized (AlertMonitor.this) {
return new ArrayList(_eventList);
}
}
/**
* @return The first event added to this history, or null
* if there have been no events.
*/
public Event getFirstEvent() {
return _firstEvent;
}
/**
* @return The last event added to this history, or null
if
* there have been no events.
*/
public Event getLastEvent() {
synchronized (AlertMonitor.this) {
if (_eventList.isEmpty()) {
return null;
} else {
return _eventList.get(_eventList.size() - 1);
}
}
}
/**
* @return the count of events posted to this history
*/
public int getCount() {
return _count;
}
/**
* Issue a log messages and notifications as needed to signify an
* ongoing condition. This method is called periodically. It keeps track
* of when the last message was added to the log and writes a new
* recurring message only as frequently as allowed by
* {@link AlertMonitor#getErrorLogTimeInterval()} or
* {@link AlertMonitor#getWarnLogTimeInterval()}.
*
* @param now
* current system time as returned by
* {@link System#currentTimeMillis()}
* @param force
* if true
and the level is WARN or ERROR, this
* method emits a notification regardless of whether the
* notification interval has elapsed; otherwise this method
* does nothing unless the interval has elapsed.
*/
public void poll(final long now, final boolean force) {
final int count = getCount();
if (count > _reportedCount) {
switch (_level) {
case ERROR:
if (force || now > _lastErrorLogTime + _errorLogTimeInterval) {
_lastErrorLogTime = now;
log(this);
sendNotification(this);
_reportedCount = count;
}
break;
case WARN:
if (force || now > _lastWarnLogTime + _warnLogTimeInterval) {
_lastWarnLogTime = now;
log(this);
sendNotification(this);
_reportedCount = count;
}
break;
default:
// Ignore the NORMAL case
}
}
}
/**
* Add an Event at the specified level
*
* @param event
* the Event
to add
* @param level
* the AlertLevel
that should be assigned to it
*/
private void addEvent(final Event event) {
trim(_historyLength - 1);
_eventList.add(event);
_count++;
if (event.getTime() < _firstEventTime) {
_firstEventTime = event.getTime();
_firstEvent = event;
}
event.added(this);
_level = event.getLevel();
}
/**
* Remove events until the event list is no larger than the supplied
* size
*
* @param size
* maximum number of events to retain on the event list
*/
private void trim(final int size) {
while (_eventList.size() > size) {
_eventList.get(0).removed(this);
_eventList.remove(0);
}
}
}
/**
* Holder for event data including the event arguments and the time the
* event was posted.
*/
public static class Event {
private final AlertLevel _level;
private final LogItem _logItem;
private final Object[] _args;
private final long _time;
/**
* Construct an Event
for the specified {@link LogItem} and
* arguments with the current system time.
*
* @param level
* AlertLevel assigned to this event by the posting process
* @param logItem
* LogItem
to be used in logging this event
* @param args
* arguments specific to the LogItem
*/
public Event(final AlertLevel level, final LogItem logItem, final Object... args) {
this(level, System.currentTimeMillis(), logItem, args);
}
/**
* Construct an Event
for the specified {@link LogItem} and
* arguments with the specified system time.
*
* @param level
* AlertLevel assigned to this event by the posting process
* @param time
* System time in milliseconds at which the event occurred
* @param logItem
* LogItem
to be used in logging this event
* @param args
* arguments specific to the LogItem
*/
public Event(final AlertLevel level, final long time, final LogItem logItem, final Object... args) {
_level = level;
_logItem = logItem;
_args = args;
_time = time;
}
/**
*
* @return the AlertLevel assigned to this Event.
*/
public AlertLevel getLevel() {
return _level;
}
/**
* @return The {@link LogItem} assigned to the event
*/
public LogItem getLogItem() {
return _logItem;
}
/**
*
* @return the system time at which the event occurred
*/
public long getTime() {
return _time;
}
/**
*
* @return arguments supplied with the event and used to format a log or
* notification message to describe this event
*/
public Object[] getArgs() {
return _args;
}
/**
*
* @return the first element of the argument array, or
* null if the array is empty
*/
public Object getFirstArg() {
return _args.length > 0 ? _args[0] : null;
}
/**
* Called when this Event
is added to a
* History
. By default this method does nothing, but an
* Event subclass can use this method to compute statistics or perform
* other custom operations. This method is called after the event has
* been added.
*
* @param history
* the History
to which this event has been
* added
*/
protected void added(final History history) {
// Default: do nothing
}
/**
* Called when this Event
is removed from a
* History
. By default this method does nothing, but an
* Event subclass can use this method to compute statistics or perform
* other custom operations. This method is called before the event has
* been removed.
*
* @param history
* the History
from which this event will be
* removed
*/
protected void removed(final History history) {
// Default: do nothing
}
public String logMessage() {
return _logItem.logMessage(_args);
}
/**
* @return a description of the event with human-readable time and log
* message
*/
@Override
public String toString() {
return Util.date(_time) + " " + logMessage();
}
}
public static class AlertRecord {
final String _category;
final AlertLevel _level;
final long _time;
final String _message;
private AlertRecord(final String category, final Event event) {
_category = category;
_level = event.getLevel();
_time = event.getTime();
_message = event.logMessage();
}
/**
* @return the category
*/
public String getCategory() {
return _category;
}
/**
* @return the level
*/
public AlertLevel getLevel() {
return _level;
}
/**
* @return the _time
*/
public long getTime() {
return _time;
}
/**
* @return the message
*/
public String getMessage() {
return _message;
}
}
final static String NOTIFICATION_TYPE = "com.persistit.AlertMonitor";
private final Map _historyMap = new TreeMap();
private volatile long _warnLogTimeInterval = DEFAULT_WARN_INTERVAL;
private volatile long _errorLogTimeInterval = DEFAULT_ERROR_INTERVAL;
private volatile int _historyLength = DEFAULT_HISTORY_LENGTH;
private final AtomicLong _notificationSequence = new AtomicLong();
private volatile ObjectName _objectName;
/**
* Constructor
*/
AlertMonitor() {
super(Executors.newCachedThreadPool());
}
/**
* Record the ObjectName
with which this instance was
* registered as an MBean
*
* @param on
* The ObjectName
*/
void setObjectName(final ObjectName on) {
_objectName = on;
}
/**
* @return the ObjectName
with which this instance was
* registered as an MBean
*/
ObjectName getObjectName() {
return _objectName;
}
/**
* Post an event. If necessary, create a {@link History} for the specified
* category and then add the event to it. Subsequently, poll the
* History
to cause any pending notifications or log messages
* to be emitted.
*
* @param event
* A Event object describing what happened
* @param category
* A String describing the nature of the event. A separate
* event-history is maintained for each unique category.
*/
public synchronized final void post(final Event event, final String category) {
History history = _historyMap.get(category);
if (history == null) {
history = new History();
_historyMap.put(category, history);
}
history.addEvent(event);
history.poll(event.getTime(), false);
}
/**
* Restore this alert monitor to level {@link AlertLevel#NORMAL} with and
* remove all histories for all categories. The logging time intervals and
* history length are not changed.
*/
@Override
public synchronized void reset() {
_historyMap.clear();
}
/**
* @return the interval in milliseconds between successive log entries for
* this monitor when its {@link AlertLevel#WARN}.
*/
@Override
public long getWarnLogTimeInterval() {
return _warnLogTimeInterval;
}
/**
* Set the interval between successive log entries for this monitor when its
* {@link AlertLevel#WARN}.
*
* @param warnLogTimeInterval
* the interval in milliseconds
*/
@Override
public void setWarnLogTimeInterval(final long warnLogTimeInterval) {
Util.rangeCheck(warnLogTimeInterval, MINIMUM_WARN_INTERVAL, MAXIMUM_WARN_INTERVAL);
_warnLogTimeInterval = warnLogTimeInterval;
}
/**
* @return the interval in milliseconds between successive log entries for
* this monitor when its {@link AlertLevel#ERROR}.
*/
@Override
public long getErrorLogTimeInterval() {
return _errorLogTimeInterval;
}
/**
* Set the interval between successive log entries for this monitor when its
* {@link AlertLevel#ERROR}.
*
* @param errorLogTimeInterval
* the interval in milliseconds
*/
@Override
public void setErrorLogTimeInterval(final long errorLogTimeInterval) {
Util.rangeCheck(errorLogTimeInterval, MINIMUM_ERROR_INTERVAL, MAXIMUM_ERROR_INTERVAL);
_errorLogTimeInterval = errorLogTimeInterval;
}
/**
* @see #setHistoryLength(int)
* @return the number of events to retain in the History
per
* category.
*/
@Override
public int getHistoryLength() {
return _historyLength;
}
/**
* @param name
* Category name
* @return the History
for that category or null
* if the specified category has no History
.
*/
public synchronized History getHistory(final String name) {
return _historyMap.get(name);
}
/**
* @return Map sorted by category name of all History elements.
*/
public synchronized SortedMap getHistoryMap() {
return new TreeMap(_historyMap);
}
/**
* Set the number of Event
s per category to keep in the
* History
. Once the number of events exceeds this count,
* earlier events are removed when new events are added. (However, the first
* Event
added is retained and get accessed via
* {@link History#getFirstEvent()} even when subsequent Event
s
* are removed.) The default value is 10.
*
* @param historyLength
* the historyLength to set
*/
@Override
public synchronized void setHistoryLength(final int historyLength) {
Util.rangeCheck(historyLength, MINIMUM_HISTORY_LENGTH, MAXIMUM_HISTORY_LENGTH);
_historyLength = historyLength;
for (final History history : _historyMap.values()) {
history.trim(historyLength);
}
}
/**
* Generate a list of event data suitable for the Akiban Server in-memory
* tables implementation.
*
* @return List of AlertRecord elements.
*/
public synchronized AlertRecord[] getAlertRecordArray() {
final List list = new ArrayList();
for (final Map.Entry entry : _historyMap.entrySet()) {
for (final Event event : entry.getValue().getEventList()) {
list.add(new AlertRecord(entry.getKey(), event));
}
}
return list.toArray(new AlertRecord[list.size()]);
}
/**
* Called periodically to issue any pending log messages or notifications.
*
* @param force
* Indicates whether to issue pending log messages and
* notifications prior to expiration of their respective
* notification time intervals.
*/
@Override
public synchronized void poll(final boolean force) {
for (final History history : _historyMap.values()) {
history.poll(System.currentTimeMillis(), force);
}
}
/**
* @return A human-readable summary of all History
instances.
*/
@Override
public synchronized String toString() {
final StringBuilder sb = new StringBuilder();
for (final Map.Entry entry : _historyMap.entrySet()) {
sb.append(String.format("%12s: %s\n", entry.getKey(), entry.getValue()));
}
return sb.toString();
}
/**
* @see #toString()
* @return A human-readable summary of all History
instances.
*/
@Override
public String getSummary() {
return toString();
}
/**
* @return a detailed history report for the specified category
* @param select
* The category name(s) to include, optionally with wildcards '*'
* and '?'
*/
@Override
public synchronized String getDetailedHistory(final String select) {
final Pattern pattern = Util.pattern(select, true);
final StringBuilder sb = new StringBuilder();
for (final Map.Entry entry : _historyMap.entrySet()) {
if (pattern.matcher(entry.getKey()).matches()) {
sb.append(String.format("%s:\n", entry.getKey()));
sb.append(entry.getValue().getDetailedHistory());
}
}
return sb.toString();
}
/**
* @return the String value of the highest current {@link AlertLevel} among
* all categories being maintained
*/
@Override
public synchronized String getAlertLevel() {
AlertLevel level = AlertLevel.NORMAL;
for (final Map.Entry entry : _historyMap.entrySet()) {
if (entry.getValue().getLevel().compareTo(level) > 0) {
level = entry.getValue().getLevel();
}
}
return level.toString();
}
/**
* @return the metadata needed by an MBeanServer to know what notifications
* may be sent by this MBean
*/
@Override
public MBeanNotificationInfo[] getNotificationInfo() {
final String[] types = new String[] { NOTIFICATION_TYPE };
final String name = Notification.class.getName();
final String description = "Alert raised by Akiban PersistIT";
final MBeanNotificationInfo info = new MBeanNotificationInfo(types, name, description);
return new MBeanNotificationInfo[] { info };
}
/**
* Emit a log message. If there has been only one Event, log it as a
* standard log message. If there have been multiple Events, emit a
* recurring event message.
*
* @param history
*/
private void log(final History history) {
final Event event = history.getLastEvent();
if (event != null && event.getLogItem().isEnabled()) {
if (history.getCount() == 1) {
event.getLogItem().log(event.getArgs());
} else {
event.getLogItem().logRecurring(history.getCount(), history.getDuration(), event.getArgs());
}
}
}
/**
* Broadcast a JMX Notification.
*
* @param history
*/
private void sendNotification(final History history) {
final Event event = history.getLastEvent();
if (event != null && event.getLogItem().isEnabled()) {
final String description = LogBase.recurring(event.getLogItem().logMessage(event.getArgs()),
history.getCount(), history.getDuration());
final Notification notification = new Notification(NOTIFICATION_TYPE, getClass().getName(),
_notificationSequence.incrementAndGet(), description);
sendNotification(notification);
}
}
/**
*
* Format an event object as a String. By default this method returns a
* String containing the time of the event formatted as a compact Date/Time
* conversion followed by the event arguments concatenated in a
* comma-limited list. For example,
*
*
* 2012-01-13 16:52:05 SomeException, SomeInfo1, SomeInfo2
*
*
* for an event that happened on March 13, 2012 at 4:25:05pm for which there
* were three argument elements.
*
*
* @param event
* @return
*/
private String format(final Event event) {
return event == null ? "null" : event.toString();
}
}