All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.hazelcast.spi.impl.operationservice.impl.InvocationMonitor Maven / Gradle / Ivy

/*
 * Copyright (c) 2008-2016, Hazelcast, Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.hazelcast.spi.impl.operationservice.impl;

import com.hazelcast.core.MemberLeftException;
import com.hazelcast.instance.GroupProperties;
import com.hazelcast.instance.GroupProperty;
import com.hazelcast.instance.HazelcastThreadGroup;
import com.hazelcast.instance.MemberImpl;
import com.hazelcast.internal.metrics.MetricsRegistry;
import com.hazelcast.internal.metrics.Probe;
import com.hazelcast.logging.ILogger;
import com.hazelcast.nio.Address;
import com.hazelcast.spi.ExecutionService;
import com.hazelcast.spi.impl.operationexecutor.OperationHostileThread;
import com.hazelcast.util.Clock;
import com.hazelcast.util.EmptyStatement;
import com.hazelcast.util.counters.SwCounter;

import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.logging.Level;

import static com.hazelcast.instance.OutOfMemoryErrorDispatcher.inspectOutputMemoryError;
import static com.hazelcast.internal.metrics.ProbeLevel.MANDATORY;
import static com.hazelcast.util.counters.SwCounter.newSwCounter;
import static java.util.logging.Level.FINE;
import static java.util.logging.Level.INFO;

/**
 * The InvocationMonitor monitors all pending invocations and determines if there are any problems like timeouts. It uses the
 * {@link InvocationRegistry} to access the pending invocations.
 * 

* An experimental feature to support debugging is the slow invocation detector. So it can log any invocation that takes * more than x seconds. See {@link GroupProperty#SLOW_INVOCATION_DETECTOR_THRESHOLD_MILLIS} for more information. */ public class InvocationMonitor { private static final long ON_MEMBER_LEFT_DELAY_MS = 1111; private static final int SCAN_DELAY_MILLIS = 1000; private final long backupTimeoutMillis; private final long slowInvocationThresholdMs; private final InvocationRegistry invocationRegistry; private final ExecutionService executionService; private final MonitorThread monitorThread; private final ILogger logger; @Probe(name = "invocations.backupTimeouts", level = MANDATORY) private final SwCounter backupTimeoutsCount = newSwCounter(); @Probe(name = "invocations.normalTimeouts", level = MANDATORY) private final SwCounter normalTimeoutsCount = newSwCounter(); public InvocationMonitor(InvocationRegistry invocationRegistry, ILogger logger, GroupProperties props, HazelcastThreadGroup hzThreadGroup, ExecutionService executionService, MetricsRegistry metricsRegistry) { this.invocationRegistry = invocationRegistry; this.logger = logger; this.executionService = executionService; this.backupTimeoutMillis = props.getMillis(GroupProperty.OPERATION_BACKUP_TIMEOUT_MILLIS); this.slowInvocationThresholdMs = initSlowInvocationThresholdMs(props); this.monitorThread = new MonitorThread(hzThreadGroup); metricsRegistry.scanAndRegister(this, "operation"); monitorThread.start(); } private long initSlowInvocationThresholdMs(GroupProperties props) { long thresholdMs = props.getMillis(GroupProperty.SLOW_INVOCATION_DETECTOR_THRESHOLD_MILLIS); if (thresholdMs > -1) { logger.info("Slow invocation detector enabled, using threshold: " + thresholdMs + " ms"); } return thresholdMs; } public void shutdown() { monitorThread.shutdown(); } public void awaitTermination(long timeoutMillis) throws InterruptedException { monitorThread.join(timeoutMillis); } public void onMemberLeft(MemberImpl member) { // postpone notifying invocations since real response may arrive in the mean time. Runnable task = new OnMemberLeftTask(member); executionService.schedule(task, ON_MEMBER_LEFT_DELAY_MS, TimeUnit.MILLISECONDS); } /** * The MonitorThread iterates over all pending invocations and sees what needs to be done *

* But it should also check if a 'is still running' check needs to be done. This removed complexity from * the invocation.waitForResponse which is too complicated too understand. *

* This class needs to implement the {@link OperationHostileThread} interface to make sure that the OperationExecutor * is not going to schedule any operations on this task due to retry. */ private final class MonitorThread extends Thread implements OperationHostileThread { private volatile boolean shutdown; private MonitorThread(HazelcastThreadGroup hzThreadGroup) { super(hzThreadGroup.getInternalThreadGroup(), hzThreadGroup.getThreadNamePrefix("InvocationMonitorThread")); } public void shutdown() { shutdown = true; interrupt(); } @Override public void run() { try { while (!shutdown) { scan(); if (!shutdown) { sleep(); } } } catch (Throwable t) { inspectOutputMemoryError(t); logger.severe("Failed to run", t); } } private void sleep() { try { Thread.sleep(SCAN_DELAY_MILLIS); } catch (InterruptedException ignore) { // can safely be ignored. If this thread wants to shut down, it will read the shutdown variable. EmptyStatement.ignore(ignore); } } private void scan() { if (invocationRegistry.size() == 0) { return; } long now = Clock.currentTimeMillis(); int backupTimeouts = 0; int invocationTimeouts = 0; int invocationCount = 0; Set> invocations = invocationRegistry.entrySet(); Iterator> iterator = invocations.iterator(); while (iterator.hasNext()) { invocationCount++; if (shutdown) { return; } Map.Entry entry = iterator.next(); Long callId = entry.getKey(); Invocation invocation = entry.getValue(); /* * The reason for the following if check is a workaround for the problem explained below. * * Problematic scenario : * If an invocation with callId 1 is retried twice for any reason, * two new innovations created and registered to invocation registry with callId’s 2 and 3 respectively. * Both new invocations are sharing the same operation * When one of the new invocations, say the one with callId 2 finishes, it de-registers itself from the * invocation registry. * When doing the de-registration it sets the shared operation’s callId to 0. * After that when the invocation with the callId 3 completes, it tries to de-register itself from * invocation registry * but fails to do so since the invocation callId and the callId on the operation is not matching anymore * When InvocationMonitor thread kicks in, it sees that there is an invocation in the registry, * and asks whether invocation is finished or not. * Even if the remote node replies with invocation is timed out, * It can’t be de-registered from the registry because of aforementioned non-matching callId scenario. * * Workaround: * When InvocationMonitor kicks in, it will do a check for invocations that are completed * but their callId's are not matching with their operations. If any invocation found for that type, * it is removed from the invocation registry. * * */ if (!callIdMatches(callId, invocation) && isDone(invocation)) { iterator.remove(); continue; } detectSlowInvocation(now, invocation); if (checkInvocationTimeout(invocation)) { invocationTimeouts++; } if (checkBackupTimeout(invocation)) { backupTimeouts++; } } backupTimeoutsCount.inc(backupTimeouts); normalTimeoutsCount.inc(invocationTimeouts); log(invocationCount, backupTimeouts, invocationTimeouts); } private boolean callIdMatches(long callId, Invocation invocation) { return callId == invocation.op.getCallId(); } private boolean isDone(Invocation invocation) { return invocation.invocationFuture.isDone(); } private void detectSlowInvocation(long now, Invocation invocation) { if (slowInvocationThresholdMs > 0) { long durationMs = now - invocation.op.getInvocationTime(); if (durationMs > slowInvocationThresholdMs) { logger.info("Slow invocation: duration=" + durationMs + " ms, operation=" + invocation.op.getClass().getName() + " inv:" + invocation); } } } private boolean checkInvocationTimeout(Invocation invocation) { try { return invocation.checkInvocationTimeout(); } catch (Throwable t) { inspectOutputMemoryError(t); logger.severe("Failed to handle operation timeout of invocation:" + invocation, t); return false; } } private boolean checkBackupTimeout(Invocation invocation) { try { return invocation.checkBackupTimeout(backupTimeoutMillis); } catch (Throwable t) { inspectOutputMemoryError(t); logger.severe("Failed to handle backup timeout of invocation:" + invocation, t); return false; } } private void log(int invocationCount, int backupTimeouts, int invocationTimeouts) { Level logLevel = null; if (backupTimeouts > 0 || invocationTimeouts > 0) { logLevel = INFO; } else if (logger.isFineEnabled()) { logLevel = FINE; } if (logLevel != null) { logger.log(logLevel, "Invocations:" + invocationCount + " timeouts:" + invocationTimeouts + " backup-timeouts:" + backupTimeouts); } } } private final class OnMemberLeftTask implements Runnable { private final MemberImpl leftMember; public OnMemberLeftTask(MemberImpl leftMember) { this.leftMember = leftMember; } @Override public void run() { for (Invocation invocation : invocationRegistry.invocations()) { if (hasMemberLeft(invocation)) { invocation.notifyError(new MemberLeftException(leftMember)); } } } private boolean hasMemberLeft(Invocation invocation) { MemberImpl targetMember = invocation.targetMember; if (targetMember == null) { Address invTarget = invocation.invTarget; return leftMember.getAddress().equals(invTarget); } else { return leftMember.getUuid().equals(targetMember.getUuid()); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy