All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.netflix.eureka.registry.PeerAwareInstanceRegistryImpl Maven / Gradle / Ivy

There is a newer version: 2.0.4
Show newest version
/*
 * Copyright 2012 Netflix, Inc.
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */

package com.netflix.eureka.registry;

import com.netflix.discovery.util.SpectatorUtil;
import com.netflix.spectator.api.Spectator;
import java.net.URI;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Timer;
import java.util.TimerTask;

import com.netflix.appinfo.AmazonInfo;
import com.netflix.appinfo.AmazonInfo.MetaDataKey;
import com.netflix.appinfo.ApplicationInfoManager;
import com.netflix.appinfo.DataCenterInfo;
import com.netflix.appinfo.DataCenterInfo.Name;
import com.netflix.appinfo.InstanceInfo;
import com.netflix.appinfo.InstanceInfo.InstanceStatus;
import com.netflix.appinfo.LeaseInfo;
import com.netflix.discovery.EurekaClient;
import com.netflix.discovery.EurekaClientConfig;
import com.netflix.discovery.shared.Application;
import com.netflix.discovery.shared.Applications;
import com.netflix.eureka.registry.rule.DownOrStartingRule;
import com.netflix.eureka.registry.rule.FirstMatchWinsCompositeRule;
import com.netflix.eureka.registry.rule.InstanceStatusOverrideRule;
import com.netflix.eureka.registry.rule.LeaseExistsRule;
import com.netflix.eureka.registry.rule.OverrideExistsRule;
import com.netflix.eureka.resources.CurrentRequestVersion;
import com.netflix.eureka.EurekaServerConfig;
import com.netflix.eureka.Version;
import com.netflix.eureka.cluster.PeerEurekaNode;
import com.netflix.eureka.cluster.PeerEurekaNodes;
import com.netflix.eureka.lease.Lease;
import com.netflix.eureka.resources.ASGResource.ASGStatus;
import com.netflix.eureka.resources.ServerCodecs;
import com.netflix.eureka.transport.EurekaServerHttpClientFactory;
import com.netflix.eureka.util.MeasuredRate;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import jakarta.inject.Inject;
import jakarta.inject.Singleton;

import static com.netflix.eureka.Names.METRIC_REGISTRY_PREFIX;

/**
 * Handles replication of all operations to {@link AbstractInstanceRegistry} to peer
 * Eureka nodes to keep them all in sync.
 *
 * 

* Primary operations that are replicated are the * Registers,Renewals,Cancels,Expirations and Status Changes *

* *

* When the eureka server starts up it tries to fetch all the registry * information from the peer eureka nodes.If for some reason this operation * fails, the server does not allow the user to get the registry information for * a period specified in * {@link com.netflix.eureka.EurekaServerConfig#getWaitTimeInMsWhenSyncEmpty()}. *

* *

* One important thing to note about renewals.If the renewal drops more * than the specified threshold as specified in * {@link com.netflix.eureka.EurekaServerConfig#getRenewalPercentThreshold()} within a period of * {@link com.netflix.eureka.EurekaServerConfig#getRenewalThresholdUpdateIntervalMs()}, eureka * perceives this as a danger and stops expiring instances. *

* * @author Karthik Ranganathan, Greg Kim * */ @Singleton public class PeerAwareInstanceRegistryImpl extends AbstractInstanceRegistry implements PeerAwareInstanceRegistry { private static final Logger logger = LoggerFactory.getLogger(PeerAwareInstanceRegistryImpl.class); private static final String US_EAST_1 = "us-east-1"; private static final int PRIME_PEER_NODES_RETRY_MS = 30000; private long startupTime = 0; private boolean peerInstancesTransferEmptyOnStartup = true; public enum Action { Heartbeat, Register, Cancel, StatusUpdate, DeleteStatusOverride; private final com.netflix.spectator.api.Timer timer = Spectator.globalRegistry().timer(this.name()); public com.netflix.spectator.api.Timer getTimer() { return timer; } } private static final Comparator APP_COMPARATOR = new Comparator() { public int compare(Application l, Application r) { return l.getName().compareTo(r.getName()); } }; private final MeasuredRate numberOfReplicationsLastMin; protected final EurekaClient eurekaClient; protected volatile PeerEurekaNodes peerEurekaNodes; private final InstanceStatusOverrideRule instanceStatusOverrideRule; private final Timer timer = new Timer( "ReplicaAwareInstanceRegistry - RenewalThresholdUpdater", true); @Inject public PeerAwareInstanceRegistryImpl( EurekaServerConfig serverConfig, EurekaClientConfig clientConfig, ServerCodecs serverCodecs, EurekaClient eurekaClient, EurekaServerHttpClientFactory eurekaServerHttpClientFactory ) { super(serverConfig, clientConfig, serverCodecs, eurekaServerHttpClientFactory); this.eurekaClient = eurekaClient; this.numberOfReplicationsLastMin = new MeasuredRate(1000 * 60 * 1); // We first check if the instance is STARTING or DOWN, then we check explicit overrides, // then we check the status of a potentially existing lease. this.instanceStatusOverrideRule = new FirstMatchWinsCompositeRule(new DownOrStartingRule(), new OverrideExistsRule(overriddenInstanceStatusMap), new LeaseExistsRule()); SpectatorUtil.monitoredValue(METRIC_REGISTRY_PREFIX + "shouldAllowAccess", this, PeerAwareInstanceRegistryImpl::shouldAllowAccessMetric); SpectatorUtil.monitoredValue(METRIC_REGISTRY_PREFIX + "isLeaseExpirationEnabled", this, PeerAwareInstanceRegistryImpl::isLeaseExpirationEnabledMetric); SpectatorUtil.monitoredValue(METRIC_REGISTRY_PREFIX + "isSelfPreservationModeEnabled", this, PeerAwareInstanceRegistryImpl::isSelfPreservationModeEnabledMetric); SpectatorUtil.monitoredValue("numOfReplicationsInLastMin", this, PeerAwareInstanceRegistryImpl::getNumOfReplicationsInLastMin); SpectatorUtil.monitoredValue("isBelowRenewThreshold", this, PeerAwareInstanceRegistryImpl::isBelowRenewThresold); SpectatorUtil.monitoredValue("localRegistrySize", this, PeerAwareInstanceRegistryImpl::getLocalRegistrySize); } @Override protected InstanceStatusOverrideRule getInstanceInfoOverrideRule() { return this.instanceStatusOverrideRule; } @Override public void init(PeerEurekaNodes peerEurekaNodes) throws Exception { this.numberOfReplicationsLastMin.start(); this.peerEurekaNodes = peerEurekaNodes; initializedResponseCache(); scheduleRenewalThresholdUpdateTask(); initRemoteRegionRegistry(); } /** * Perform all cleanup and shutdown operations. */ @Override public void shutdown() { try { peerEurekaNodes.shutdown(); } catch (Throwable t) { logger.error("Cannot shutdown ReplicaAwareInstanceRegistry", t); } numberOfReplicationsLastMin.stop(); timer.cancel(); super.shutdown(); } /** * Schedule the task that updates renewal threshold periodically. * The renewal threshold would be used to determine if the renewals drop * dramatically because of network partition and to protect expiring too * many instances at a time. * */ private void scheduleRenewalThresholdUpdateTask() { timer.schedule(new TimerTask() { @Override public void run() { updateRenewalThreshold(); } }, serverConfig.getRenewalThresholdUpdateIntervalMs(), serverConfig.getRenewalThresholdUpdateIntervalMs()); } /** * Populates the registry information from a peer eureka node. This * operation fails over to other nodes until the list is exhausted if the * communication fails. */ @Override public int syncUp() { // Copy entire entry from neighboring DS node int count = 0; for (int i = 0; ((i < serverConfig.getRegistrySyncRetries()) && (count == 0)); i++) { if (i > 0) { try { Thread.sleep(serverConfig.getRegistrySyncRetryWaitMs()); } catch (InterruptedException e) { logger.warn("Interrupted during registry transfer.."); break; } } Applications apps = eurekaClient.getApplications(); for (Application app : apps.getRegisteredApplications()) { for (InstanceInfo instance : app.getInstances()) { try { if (isRegisterable(instance)) { register(instance, instance.getLeaseInfo().getDurationInSecs(), true); count++; } } catch (Throwable t) { logger.error("During DS init copy", t); } } } } return count; } @Override public void openForTraffic(ApplicationInfoManager applicationInfoManager, int count) { // Renewals happen every 30 seconds and for a minute it should be a factor of 2. this.expectedNumberOfClientsSendingRenews = count; updateRenewsPerMinThreshold(); logger.info("Got {} instances from neighboring DS node", count); logger.info("Renew threshold is: {}", numberOfRenewsPerMinThreshold); this.startupTime = System.currentTimeMillis(); if (count > 0) { this.peerInstancesTransferEmptyOnStartup = false; } DataCenterInfo.Name selfName = applicationInfoManager.getInfo().getDataCenterInfo().getName(); boolean isAws = Name.Amazon == selfName; if (isAws && serverConfig.shouldPrimeAwsReplicaConnections()) { logger.info("Priming AWS connections for all replicas.."); primeAwsReplicas(applicationInfoManager); } logger.info("Changing status to UP"); applicationInfoManager.setInstanceStatus(InstanceStatus.UP); super.postInit(); } /** * Prime connections for Aws replicas. *

* Sometimes when the eureka servers comes up, AWS firewall may not allow * the network connections immediately. This will cause the outbound * connections to fail, but the inbound connections continue to work. What * this means is the clients would have switched to this node (after EIP * binding) and so the other eureka nodes will expire all instances that * have been switched because of the lack of outgoing heartbeats from this * instance. *

*

* The best protection in this scenario is to block and wait until we are * able to ping all eureka nodes successfully atleast once. Until then we * won't open up the traffic. *

*/ private void primeAwsReplicas(ApplicationInfoManager applicationInfoManager) { boolean areAllPeerNodesPrimed = false; while (!areAllPeerNodesPrimed) { String peerHostName = null; try { Application eurekaApps = this.getApplication(applicationInfoManager.getInfo().getAppName(), false); if (eurekaApps == null) { areAllPeerNodesPrimed = true; logger.info("No peers needed to prime."); return; } for (PeerEurekaNode node : peerEurekaNodes.getPeerEurekaNodes()) { for (InstanceInfo peerInstanceInfo : eurekaApps.getInstances()) { LeaseInfo leaseInfo = peerInstanceInfo.getLeaseInfo(); // If the lease is expired - do not worry about priming if (System.currentTimeMillis() > (leaseInfo .getRenewalTimestamp() + (leaseInfo .getDurationInSecs() * 1000)) + (2 * 60 * 1000)) { continue; } peerHostName = peerInstanceInfo.getHostName(); logger.info("Trying to send heartbeat for the eureka server at {} to make sure the " + "network channels are open", peerHostName); // Only try to contact the eureka nodes that are in this instance's registry - because // the other instances may be legitimately down if (peerHostName.equalsIgnoreCase(new URI(node.getServiceUrl()).getHost())) { node.heartbeat( peerInstanceInfo.getAppName(), peerInstanceInfo.getId(), peerInstanceInfo, null, true); } } } areAllPeerNodesPrimed = true; } catch (Throwable e) { logger.error("Could not contact {}", peerHostName, e); try { Thread.sleep(PRIME_PEER_NODES_RETRY_MS); } catch (InterruptedException e1) { logger.warn("Interrupted while priming : ", e1); areAllPeerNodesPrimed = true; } } } } /** * Checks to see if the registry access is allowed or the server is in a * situation where it does not all getting registry information. The server * does not return registry information for a period specified in * {@link EurekaServerConfig#getWaitTimeInMsWhenSyncEmpty()}, if it cannot * get the registry information from the peer eureka nodes at start up. * * @return false - if the instances count from a replica transfer returned * zero and if the wait time has not elapsed, otherwise returns true */ @Override public boolean shouldAllowAccess(boolean remoteRegionRequired) { if (this.peerInstancesTransferEmptyOnStartup) { if (!(System.currentTimeMillis() > this.startupTime + serverConfig.getWaitTimeInMsWhenSyncEmpty())) { return false; } } if (remoteRegionRequired) { for (RemoteRegionRegistry remoteRegionRegistry : this.regionNameVSRemoteRegistry.values()) { if (!remoteRegionRegistry.isReadyForServingData()) { return false; } } } return true; } public boolean shouldAllowAccess() { return shouldAllowAccess(true); } public double shouldAllowAccessMetric() { return shouldAllowAccess() ? 1 : 0; } /** * @deprecated use {@link com.netflix.eureka.cluster.PeerEurekaNodes#getPeerEurekaNodes()} directly. * * Gets the list of peer eureka nodes which is the list to replicate * information to. * * @return the list of replica nodes. */ @Deprecated public List getReplicaNodes() { return Collections.unmodifiableList(peerEurekaNodes.getPeerEurekaNodes()); } /* * (non-Javadoc) * * @see com.netflix.eureka.registry.InstanceRegistry#cancel(java.lang.String, * java.lang.String, long, boolean) */ @Override public boolean cancel(final String appName, final String id, final boolean isReplication) { if (super.cancel(appName, id, isReplication)) { replicateToPeers(Action.Cancel, appName, id, null, null, isReplication); return true; } return false; } /** * Registers the information about the {@link InstanceInfo} and replicates * this information to all peer eureka nodes. If this is replication event * from other replica nodes then it is not replicated. * * @param info * the {@link InstanceInfo} to be registered and replicated. * @param isReplication * true if this is a replication event from other replica nodes, * false otherwise. */ @Override public void register(final InstanceInfo info, final boolean isReplication) { int leaseDuration = Lease.DEFAULT_DURATION_IN_SECS; if (info.getLeaseInfo() != null && info.getLeaseInfo().getDurationInSecs() > 0) { leaseDuration = info.getLeaseInfo().getDurationInSecs(); } super.register(info, leaseDuration, isReplication); replicateToPeers(Action.Register, info.getAppName(), info.getId(), info, null, isReplication); } /* * (non-Javadoc) * * @see com.netflix.eureka.registry.InstanceRegistry#renew(java.lang.String, * java.lang.String, long, boolean) */ public boolean renew(final String appName, final String id, final boolean isReplication) { if (super.renew(appName, id, isReplication)) { replicateToPeers(Action.Heartbeat, appName, id, null, null, isReplication); return true; } return false; } /* * (non-Javadoc) * * @see com.netflix.eureka.registry.InstanceRegistry#statusUpdate(java.lang.String, * java.lang.String, com.netflix.appinfo.InstanceInfo.InstanceStatus, * java.lang.String, boolean) */ @Override public boolean statusUpdate(final String appName, final String id, final InstanceStatus newStatus, String lastDirtyTimestamp, final boolean isReplication) { if (super.statusUpdate(appName, id, newStatus, lastDirtyTimestamp, isReplication)) { replicateToPeers(Action.StatusUpdate, appName, id, null, newStatus, isReplication); return true; } return false; } @Override public boolean deleteStatusOverride(String appName, String id, InstanceStatus newStatus, String lastDirtyTimestamp, boolean isReplication) { if (super.deleteStatusOverride(appName, id, newStatus, lastDirtyTimestamp, isReplication)) { replicateToPeers(Action.DeleteStatusOverride, appName, id, null, null, isReplication); return true; } return false; } /** * Replicate the ASG status updates to peer eureka nodes. If this * event is a replication from other nodes, then it is not replicated to * other nodes. * * @param asgName the asg name for which the status needs to be replicated. * @param newStatus the {@link ASGStatus} information that needs to be replicated. * @param isReplication true if this is a replication event from other nodes, false otherwise. */ @Override public void statusUpdate(final String asgName, final ASGStatus newStatus, final boolean isReplication) { // If this is replicated from an other node, do not try to replicate again. if (isReplication) { return; } for (final PeerEurekaNode node : peerEurekaNodes.getPeerEurekaNodes()) { replicateASGInfoToReplicaNodes(asgName, newStatus, node); } } @Override public boolean isLeaseExpirationEnabled() { if (!isSelfPreservationModeEnabled()) { // The self preservation mode is disabled, hence allowing the instances to expire. return true; } return numberOfRenewsPerMinThreshold > 0 && getNumOfRenewsInLastMin() > numberOfRenewsPerMinThreshold; } public double isLeaseExpirationEnabledMetric() { return isLeaseExpirationEnabled() ? 1 : 0; } /** * Checks to see if the self-preservation mode is enabled. * *

* The self-preservation mode is enabled if the expected number of renewals * per minute {@link #getNumOfRenewsInLastMin()} is lesser than the expected * threshold which is determined by {@link #getNumOfRenewsPerMinThreshold()} * . Eureka perceives this as a danger and stops expiring instances as this * is most likely because of a network event. The mode is disabled only when * the renewals get back to above the threshold or if the flag * {@link EurekaServerConfig#shouldEnableSelfPreservation()} is set to * false. *

* * @return true if the self-preservation mode is enabled, false otherwise. */ @Override public boolean isSelfPreservationModeEnabled() { return serverConfig.shouldEnableSelfPreservation(); } public double isSelfPreservationModeEnabledMetric() { return isSelfPreservationModeEnabled() ? 1 : 0; } @Override public InstanceInfo getNextServerFromEureka(String virtualHostname, boolean secure) { // TODO Auto-generated method stub return null; } /** * Updates the renewal threshold based on the current number of * renewals. The threshold is a percentage as specified in * {@link EurekaServerConfig#getRenewalPercentThreshold()} of renewals * received per minute {@link #getNumOfRenewsInLastMin()}. */ private void updateRenewalThreshold() { try { Applications apps = eurekaClient.getApplications(); int count = 0; for (Application app : apps.getRegisteredApplications()) { for (InstanceInfo instance : app.getInstances()) { if (this.isRegisterable(instance)) { ++count; } } } synchronized (lock) { // Update threshold only if the threshold is greater than the // current expected threshold or if self preservation is disabled. if ((count) > (serverConfig.getRenewalPercentThreshold() * expectedNumberOfClientsSendingRenews) || (!this.isSelfPreservationModeEnabled())) { this.expectedNumberOfClientsSendingRenews = count; updateRenewsPerMinThreshold(); } } logger.info("Current renewal threshold is : {}", numberOfRenewsPerMinThreshold); } catch (Throwable e) { logger.error("Cannot update renewal threshold", e); } } /** * Gets the list of all {@link Applications} from the registry in sorted * lexical order of {@link Application#getName()}. * * @return the list of {@link Applications} in lexical order. */ @Override public List getSortedApplications() { List apps = new ArrayList<>(getApplications().getRegisteredApplications()); Collections.sort(apps, APP_COMPARATOR); return apps; } /** * Gets the number of renewals in the last minute. * * @return a long value representing the number of renewals in the last minute. */ public long getNumOfReplicationsInLastMin() { return numberOfReplicationsLastMin.getCount(); } /** * Checks if the number of renewals is lesser than threshold. * * @return 0 if the renewals are greater than threshold, 1 otherwise. */ @Override public int isBelowRenewThresold() { if ((getNumOfRenewsInLastMin() <= numberOfRenewsPerMinThreshold) && ((this.startupTime > 0) && (System.currentTimeMillis() > this.startupTime + (serverConfig.getWaitTimeInMsWhenSyncEmpty())))) { return 1; } else { return 0; } } /** * Checks if an instance is registerable in this region. Instances from other regions are rejected. * * @param instanceInfo th instance info information of the instance * @return true, if it can be registered in this server, false otherwise. */ public boolean isRegisterable(InstanceInfo instanceInfo) { DataCenterInfo datacenterInfo = instanceInfo.getDataCenterInfo(); String serverRegion = clientConfig.getRegion(); if (AmazonInfo.class.isInstance(datacenterInfo)) { AmazonInfo info = AmazonInfo.class.cast(instanceInfo.getDataCenterInfo()); String availabilityZone = info.get(MetaDataKey.availabilityZone); // Can be null for dev environments in non-AWS data center if (availabilityZone == null && US_EAST_1.equalsIgnoreCase(serverRegion)) { return true; } else if ((availabilityZone != null) && (availabilityZone.contains(serverRegion))) { // If in the same region as server, then consider it registerable return true; } } return true; // Everything non-amazon is registrable. } /** * Replicates all eureka actions to peer eureka nodes except for replication * traffic to this node. * */ private void replicateToPeers(Action action, String appName, String id, InstanceInfo info /* optional */, InstanceStatus newStatus /* optional */, boolean isReplication) { final long time = SpectatorUtil.time(action.getTimer()); try { if (isReplication) { numberOfReplicationsLastMin.increment(); } // If it is a replication already, do not replicate again as this will create a poison replication if (peerEurekaNodes == Collections.EMPTY_LIST || isReplication) { return; } for (final PeerEurekaNode node : peerEurekaNodes.getPeerEurekaNodes()) { // If the url represents this host, do not replicate to yourself. if (peerEurekaNodes.isThisMyUrl(node.getServiceUrl())) { continue; } replicateInstanceActionsToPeers(action, appName, id, info, newStatus, node); } } finally { SpectatorUtil.record(action.getTimer(), time); } } /** * Replicates all instance changes to peer eureka nodes except for * replication traffic to this node. * */ private void replicateInstanceActionsToPeers(Action action, String appName, String id, InstanceInfo info, InstanceStatus newStatus, PeerEurekaNode node) { try { InstanceInfo infoFromRegistry; CurrentRequestVersion.set(Version.V2); switch (action) { case Cancel: node.cancel(appName, id); break; case Heartbeat: InstanceStatus overriddenStatus = overriddenInstanceStatusMap.get(id); infoFromRegistry = getInstanceByAppAndId(appName, id, false); node.heartbeat(appName, id, infoFromRegistry, overriddenStatus, false); break; case Register: node.register(info); break; case StatusUpdate: infoFromRegistry = getInstanceByAppAndId(appName, id, false); node.statusUpdate(appName, id, newStatus, infoFromRegistry); break; case DeleteStatusOverride: infoFromRegistry = getInstanceByAppAndId(appName, id, false); node.deleteStatusOverride(appName, id, infoFromRegistry); break; } } catch (Throwable t) { logger.error("Cannot replicate information to {} for action {}", node.getServiceUrl(), action.name(), t); } finally { CurrentRequestVersion.remove(); } } /** * Replicates all ASG status changes to peer eureka nodes except for * replication traffic to this node. */ private void replicateASGInfoToReplicaNodes(final String asgName, final ASGStatus newStatus, final PeerEurekaNode node) { CurrentRequestVersion.set(Version.V2); try { node.statusUpdate(asgName, newStatus); } catch (Throwable e) { logger.error("Cannot replicate ASG status information to {}", node.getServiceUrl(), e); } finally { CurrentRequestVersion.remove(); } } @Override public long getLocalRegistrySize() { return super.getLocalRegistrySize(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy