org.apache.hadoop.hbase.ipc.RpcServer Maven / Gradle / Ivy
Show all versions of hbase-server Show documentation
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.ipc;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.HADOOP_SECURITY_AUTHORIZATION;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.net.BindException;
import java.net.InetAddress;
import java.net.InetSocketAddress;
import java.net.ServerSocket;
import java.net.Socket;
import java.net.SocketException;
import java.net.UnknownHostException;
import java.nio.ByteBuffer;
import java.nio.channels.CancelledKeyException;
import java.nio.channels.Channels;
import java.nio.channels.ClosedChannelException;
import java.nio.channels.GatheringByteChannel;
import java.nio.channels.ReadableByteChannel;
import java.nio.channels.SelectionKey;
import java.nio.channels.Selector;
import java.nio.channels.ServerSocketChannel;
import java.nio.channels.SocketChannel;
import java.nio.channels.WritableByteChannel;
import java.security.PrivilegedExceptionAction;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentLinkedDeque;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;
import javax.security.sasl.Sasl;
import javax.security.sasl.SaslException;
import javax.security.sasl.SaslServer;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.CellScanner;
import org.apache.hadoop.hbase.DoNotRetryIOException;
import org.apache.hadoop.hbase.HBaseIOException;
import org.apache.hadoop.hbase.HBaseInterfaceAudience;
import org.apache.hadoop.hbase.HConstants;
import org.apache.hadoop.hbase.HRegionInfo;
import org.apache.hadoop.hbase.Server;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Operation;
import org.apache.hadoop.hbase.codec.Codec;
import org.apache.hadoop.hbase.exceptions.RegionMovedException;
import org.apache.hadoop.hbase.io.ByteBufferOutputStream;
import org.apache.hadoop.hbase.io.BoundedByteBufferPool;
import org.apache.hadoop.hbase.monitoring.MonitoredRPCHandler;
import org.apache.hadoop.hbase.protobuf.ProtobufUtil;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.CellBlockMeta;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ConnectionHeader;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ExceptionResponse;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.RequestHeader;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.ResponseHeader;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.UserInformation;
import org.apache.hadoop.hbase.protobuf.generated.RPCProtos.VersionInfo;
import org.apache.hadoop.hbase.regionserver.HRegionServer;
import org.apache.hadoop.hbase.security.AccessDeniedException;
import org.apache.hadoop.hbase.security.AuthMethod;
import org.apache.hadoop.hbase.security.HBasePolicyProvider;
import org.apache.hadoop.hbase.security.HBaseSaslRpcServer;
import org.apache.hadoop.hbase.security.User;
import org.apache.hadoop.hbase.security.HBaseSaslRpcServer.SaslDigestCallbackHandler;
import org.apache.hadoop.hbase.security.HBaseSaslRpcServer.SaslGssCallbackHandler;
import org.apache.hadoop.hbase.security.SaslStatus;
import org.apache.hadoop.hbase.security.SaslUtil;
import org.apache.hadoop.hbase.security.UserProvider;
import org.apache.hadoop.hbase.security.token.AuthenticationTokenSecretManager;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.Counter;
import org.apache.hadoop.hbase.util.Pair;
import org.apache.hadoop.io.BytesWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.io.WritableUtils;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.security.UserGroupInformation;
import org.apache.hadoop.security.UserGroupInformation.AuthenticationMethod;
import org.apache.hadoop.security.authorize.AuthorizationException;
import org.apache.hadoop.security.authorize.PolicyProvider;
import org.apache.hadoop.security.authorize.ProxyUsers;
import org.apache.hadoop.security.authorize.ServiceAuthorizationManager;
import org.apache.hadoop.security.token.SecretManager;
import org.apache.hadoop.security.token.SecretManager.InvalidToken;
import org.apache.hadoop.security.token.TokenIdentifier;
import org.apache.hadoop.util.StringUtils;
import org.codehaus.jackson.map.ObjectMapper;
import org.apache.htrace.TraceInfo;
import com.google.common.util.concurrent.ThreadFactoryBuilder;
import com.google.protobuf.BlockingService;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.Descriptors.MethodDescriptor;
import com.google.protobuf.Message;
import com.google.protobuf.ServiceException;
import com.google.protobuf.TextFormat;
/**
* An RPC server that hosts protobuf described Services.
*
* An RpcServer instance has a Listener that hosts the socket. Listener has fixed number
* of Readers in an ExecutorPool, 10 by default. The Listener does an accept and then
* round robin a Reader is chosen to do the read. The reader is registered on Selector. Read does
* total read off the channel and the parse from which it makes a Call. The call is wrapped in a
* CallRunner and passed to the scheduler to be run. Reader goes back to see if more to be done
* and loops till done.
*
* Scheduler can be variously implemented but default simple scheduler has handlers to which it
* has given the queues into which calls (i.e. CallRunner instances) are inserted. Handlers run
* taking from the queue. They run the CallRunner#run method on each item gotten from queue
* and keep taking while the server is up.
*
* CallRunner#run executes the call. When done, asks the included Call to put itself on new
* queue for Responder to pull from and return result to client.
*
* @see RpcClientImpl
*/
@InterfaceAudience.LimitedPrivate({HBaseInterfaceAudience.COPROC, HBaseInterfaceAudience.PHOENIX})
@InterfaceStability.Evolving
public class RpcServer implements RpcServerInterface {
public static final Log LOG = LogFactory.getLog(RpcServer.class);
private static final CallQueueTooBigException CALL_QUEUE_TOO_BIG_EXCEPTION
= new CallQueueTooBigException();
private final boolean authorize;
private boolean isSecurityEnabled;
public static final byte CURRENT_VERSION = 0;
/**
* How many calls/handler are allowed in the queue.
*/
static final int DEFAULT_MAX_CALLQUEUE_LENGTH_PER_HANDLER = 10;
/**
* The maximum size that we can hold in the RPC queue
*/
private static final int DEFAULT_MAX_CALLQUEUE_SIZE = 1024 * 1024 * 1024;
private static final String WARN_DELAYED_CALLS = "hbase.ipc.warn.delayedrpc.number";
private static final int DEFAULT_WARN_DELAYED_CALLS = 1000;
private final int warnDelayedCalls;
private AtomicInteger delayedCalls;
private final IPCUtil ipcUtil;
private static final String AUTH_FAILED_FOR = "Auth failed for ";
private static final String AUTH_SUCCESSFUL_FOR = "Auth successful for ";
private static final Log AUDITLOG = LogFactory.getLog("SecurityLogger." +
Server.class.getName());
protected SecretManager secretManager;
protected ServiceAuthorizationManager authManager;
/** This is set to Call object before Handler invokes an RPC and ybdie
* after the call returns.
*/
protected static final ThreadLocal CurCall = new ThreadLocal();
/** Keeps MonitoredRPCHandler per handler thread. */
static final ThreadLocal MONITORED_RPC
= new ThreadLocal();
protected final InetSocketAddress bindAddress;
protected int port; // port we listen on
private int readThreads; // number of read threads
protected int maxIdleTime; // the maximum idle time after
// which a client may be
// disconnected
protected int thresholdIdleConnections; // the number of idle
// connections after which we
// will start cleaning up idle
// connections
int maxConnectionsToNuke; // the max number of
// connections to nuke
// during a cleanup
protected MetricsHBaseServer metrics;
protected final Configuration conf;
private int maxQueueSize;
protected int socketSendBufferSize;
protected final boolean tcpNoDelay; // if T then disable Nagle's Algorithm
protected final boolean tcpKeepAlive; // if T then use keepalives
protected final long purgeTimeout; // in milliseconds
/**
* This flag is used to indicate to sub threads when they should go down. When we call
* {@link #start()}, all threads started will consult this flag on whether they should
* keep going. It is set to false when {@link #stop()} is called.
*/
volatile boolean running = true;
/**
* This flag is set to true after all threads are up and 'running' and the server is then opened
* for business by the call to {@link #start()}.
*/
volatile boolean started = false;
/**
* This is a running count of the size of all outstanding calls by size.
*/
protected final Counter callQueueSize = new Counter();
protected final List connectionList =
Collections.synchronizedList(new LinkedList());
//maintain a list
//of client connections
private Listener listener = null;
protected Responder responder = null;
protected AuthenticationTokenSecretManager authTokenSecretMgr = null;
protected int numConnections = 0;
protected HBaseRPCErrorHandler errorHandler = null;
private static final String WARN_RESPONSE_TIME = "hbase.ipc.warn.response.time";
private static final String WARN_RESPONSE_SIZE = "hbase.ipc.warn.response.size";
/** Default value for above params */
private static final int DEFAULT_WARN_RESPONSE_TIME = 10000; // milliseconds
private static final int DEFAULT_WARN_RESPONSE_SIZE = 100 * 1024 * 1024;
private static final ObjectMapper MAPPER = new ObjectMapper();
private final int warnResponseTime;
private final int warnResponseSize;
private final Server server;
private final List services;
private final RpcScheduler scheduler;
private UserProvider userProvider;
private final BoundedByteBufferPool reservoir;
/**
* Datastructure that holds all necessary to a method invocation and then afterward, carries
* the result.
*/
class Call implements RpcCallContext {
protected int id; // the client's call id
protected BlockingService service;
protected MethodDescriptor md;
protected RequestHeader header;
protected Message param; // the parameter passed
// Optional cell data passed outside of protobufs.
protected CellScanner cellScanner;
protected Connection connection; // connection to client
protected long timestamp; // the time received when response is null
// the time served when response is not null
/**
* Chain of buffers to send as response.
*/
protected BufferChain response;
protected boolean delayResponse;
protected Responder responder;
protected boolean delayReturnValue; // if the return value should be
// set at call completion
protected long size; // size of current call
protected boolean isError;
protected TraceInfo tinfo;
private ByteBuffer cellBlock = null;
private User user;
private InetAddress remoteAddress;
Call(int id, final BlockingService service, final MethodDescriptor md, RequestHeader header,
Message param, CellScanner cellScanner, Connection connection, Responder responder,
long size, TraceInfo tinfo, final InetAddress remoteAddress) {
this.id = id;
this.service = service;
this.md = md;
this.header = header;
this.param = param;
this.cellScanner = cellScanner;
this.connection = connection;
this.timestamp = System.currentTimeMillis();
this.response = null;
this.delayResponse = false;
this.responder = responder;
this.isError = false;
this.size = size;
this.tinfo = tinfo;
this.user = connection.user == null? null: userProvider.create(connection.user);
this.remoteAddress = remoteAddress;
}
/**
* Call is done. Execution happened and we returned results to client. It is now safe to
* cleanup.
*/
void done() {
if (this.cellBlock != null) {
// Return buffer to reservoir now we are done with it.
reservoir.putBuffer(this.cellBlock);
this.cellBlock = null;
}
this.connection.decRpcCount(); // Say that we're done with this call.
}
@Override
public String toString() {
return toShortString() + " param: " +
(this.param != null? ProtobufUtil.getShortTextFormat(this.param): "") +
" connection: " + connection.toString();
}
protected RequestHeader getHeader() {
return this.header;
}
/*
* Short string representation without param info because param itself could be huge depends on
* the payload of a command
*/
String toShortString() {
String serviceName = this.connection.service != null ?
this.connection.service.getDescriptorForType().getName() : "null";
return "callId: " + this.id + " service: " + serviceName +
" methodName: " + ((this.md != null) ? this.md.getName() : "n/a") +
" size: " + StringUtils.TraditionalBinaryPrefix.long2String(this.size, "", 1) +
" connection: " + connection.toString();
}
String toTraceString() {
String serviceName = this.connection.service != null ?
this.connection.service.getDescriptorForType().getName() : "";
String methodName = (this.md != null) ? this.md.getName() : "";
return serviceName + "." + methodName;
}
protected synchronized void setSaslTokenResponse(ByteBuffer response) {
this.response = new BufferChain(response);
}
protected synchronized void setResponse(Object m, final CellScanner cells,
Throwable t, String errorMsg) {
if (this.isError) return;
if (t != null) this.isError = true;
BufferChain bc = null;
try {
ResponseHeader.Builder headerBuilder = ResponseHeader.newBuilder();
// Presume it a pb Message. Could be null.
Message result = (Message)m;
// Call id.
headerBuilder.setCallId(this.id);
if (t != null) {
ExceptionResponse.Builder exceptionBuilder = ExceptionResponse.newBuilder();
exceptionBuilder.setExceptionClassName(t.getClass().getName());
exceptionBuilder.setStackTrace(errorMsg);
exceptionBuilder.setDoNotRetry(t instanceof DoNotRetryIOException);
if (t instanceof RegionMovedException) {
// Special casing for this exception. This is only one carrying a payload.
// Do this instead of build a generic system for allowing exceptions carry
// any kind of payload.
RegionMovedException rme = (RegionMovedException)t;
exceptionBuilder.setHostname(rme.getHostname());
exceptionBuilder.setPort(rme.getPort());
}
// Set the exception as the result of the method invocation.
headerBuilder.setException(exceptionBuilder.build());
}
// Pass reservoir to buildCellBlock. Keep reference to returne so can add it back to the
// reservoir when finished. This is hacky and the hack is not contained but benefits are
// high when we can avoid a big buffer allocation on each rpc.
this.cellBlock = ipcUtil.buildCellBlock(this.connection.codec,
this.connection.compressionCodec, cells, reservoir);
if (this.cellBlock != null) {
CellBlockMeta.Builder cellBlockBuilder = CellBlockMeta.newBuilder();
// Presumes the cellBlock bytebuffer has been flipped so limit has total size in it.
cellBlockBuilder.setLength(this.cellBlock.limit());
headerBuilder.setCellBlockMeta(cellBlockBuilder.build());
}
Message header = headerBuilder.build();
// Organize the response as a set of bytebuffers rather than collect it all together inside
// one big byte array; save on allocations.
ByteBuffer bbHeader = IPCUtil.getDelimitedMessageAsByteBuffer(header);
ByteBuffer bbResult = IPCUtil.getDelimitedMessageAsByteBuffer(result);
int totalSize = bbHeader.capacity() + (bbResult == null? 0: bbResult.limit()) +
(this.cellBlock == null? 0: this.cellBlock.limit());
ByteBuffer bbTotalSize = ByteBuffer.wrap(Bytes.toBytes(totalSize));
bc = new BufferChain(bbTotalSize, bbHeader, bbResult, this.cellBlock);
if (connection.useWrap) {
bc = wrapWithSasl(bc);
}
} catch (IOException e) {
LOG.warn("Exception while creating response " + e);
}
this.response = bc;
}
private BufferChain wrapWithSasl(BufferChain bc)
throws IOException {
if (!this.connection.useSasl) return bc;
// Looks like no way around this; saslserver wants a byte array. I have to make it one.
// THIS IS A BIG UGLY COPY.
byte [] responseBytes = bc.getBytes();
byte [] token;
// synchronization may be needed since there can be multiple Handler
// threads using saslServer to wrap responses.
synchronized (connection.saslServer) {
token = connection.saslServer.wrap(responseBytes, 0, responseBytes.length);
}
if (LOG.isTraceEnabled()) {
LOG.trace("Adding saslServer wrapped token of size " + token.length
+ " as call response.");
}
ByteBuffer bbTokenLength = ByteBuffer.wrap(Bytes.toBytes(token.length));
ByteBuffer bbTokenBytes = ByteBuffer.wrap(token);
return new BufferChain(bbTokenLength, bbTokenBytes);
}
@Override
public synchronized void endDelay(Object result) throws IOException {
assert this.delayResponse;
assert this.delayReturnValue || result == null;
this.delayResponse = false;
delayedCalls.decrementAndGet();
if (this.delayReturnValue) {
this.setResponse(result, null, null, null);
}
this.responder.doRespond(this);
}
@Override
public synchronized void endDelay() throws IOException {
this.endDelay(null);
}
@Override
public synchronized void startDelay(boolean delayReturnValue) {
assert !this.delayResponse;
this.delayResponse = true;
this.delayReturnValue = delayReturnValue;
int numDelayed = delayedCalls.incrementAndGet();
if (numDelayed > warnDelayedCalls) {
LOG.warn("Too many delayed calls: limit " + warnDelayedCalls + " current " + numDelayed);
}
}
@Override
public synchronized void endDelayThrowing(Throwable t) throws IOException {
this.setResponse(null, null, t, StringUtils.stringifyException(t));
this.delayResponse = false;
this.sendResponseIfReady();
}
@Override
public synchronized boolean isDelayed() {
return this.delayResponse;
}
@Override
public synchronized boolean isReturnValueDelayed() {
return this.delayReturnValue;
}
@Override
public boolean isClientCellBlockSupport() {
return this.connection != null && this.connection.codec != null;
}
@Override
public long disconnectSince() {
if (!connection.channel.isOpen()) {
return System.currentTimeMillis() - timestamp;
} else {
return -1L;
}
}
public long getSize() {
return this.size;
}
/**
* If we have a response, and delay is not set, then respond
* immediately. Otherwise, do not respond to client. This is
* called by the RPC code in the context of the Handler thread.
*/
public synchronized void sendResponseIfReady() throws IOException {
if (!this.delayResponse) {
this.responder.doRespond(this);
}
}
public UserGroupInformation getRemoteUser() {
return connection.user;
}
@Override
public User getRequestUser() {
return user;
}
@Override
public String getRequestUserName() {
User user = getRequestUser();
return user == null? null: user.getShortName();
}
@Override
public InetAddress getRemoteAddress() {
return remoteAddress;
}
@Override
public VersionInfo getClientVersionInfo() {
return connection.getVersionInfo();
}
}
/** Listens on the socket. Creates jobs for the handler threads*/
private class Listener extends Thread {
private ServerSocketChannel acceptChannel = null; //the accept channel
private Selector selector = null; //the selector that we use for the server
private Reader[] readers = null;
private int currentReader = 0;
private Random rand = new Random();
private long lastCleanupRunTime = 0; //the last time when a cleanup connec-
//-tion (for idle connections) ran
private long cleanupInterval = 10000; //the minimum interval between
//two cleanup runs
private int backlogLength;
private ExecutorService readPool;
public Listener(final String name) throws IOException {
super(name);
backlogLength = conf.getInt("hbase.ipc.server.listen.queue.size", 128);
// Create a new server socket and set to non blocking mode
acceptChannel = ServerSocketChannel.open();
acceptChannel.configureBlocking(false);
// Bind the server socket to the binding addrees (can be different from the default interface)
bind(acceptChannel.socket(), bindAddress, backlogLength);
port = acceptChannel.socket().getLocalPort(); //Could be an ephemeral port
// create a selector;
selector= Selector.open();
readers = new Reader[readThreads];
readPool = Executors.newFixedThreadPool(readThreads,
new ThreadFactoryBuilder().setNameFormat(
"RpcServer.reader=%d,bindAddress=" + bindAddress.getHostName() +
",port=" + port).setDaemon(true).build());
for (int i = 0; i < readThreads; ++i) {
Reader reader = new Reader();
readers[i] = reader;
readPool.execute(reader);
}
LOG.info(getName() + ": started " + readThreads + " reader(s).");
// Register accepts on the server socket with the selector.
acceptChannel.register(selector, SelectionKey.OP_ACCEPT);
this.setName("RpcServer.listener,port=" + port);
this.setDaemon(true);
}
private class Reader implements Runnable {
private volatile boolean adding = false;
private final Selector readSelector;
Reader() throws IOException {
this.readSelector = Selector.open();
}
@Override
public void run() {
try {
doRunLoop();
} finally {
try {
readSelector.close();
} catch (IOException ioe) {
LOG.error(getName() + ": error closing read selector in " + getName(), ioe);
}
}
}
private synchronized void doRunLoop() {
while (running) {
try {
readSelector.select();
while (adding) {
this.wait(1000);
}
Iterator iter = readSelector.selectedKeys().iterator();
while (iter.hasNext()) {
SelectionKey key = iter.next();
iter.remove();
if (key.isValid()) {
if (key.isReadable()) {
doRead(key);
}
}
}
} catch (InterruptedException e) {
LOG.debug("Interrupted while sleeping");
return;
} catch (IOException ex) {
LOG.info(getName() + ": IOException in Reader", ex);
}
}
}
/**
* This gets reader into the state that waits for the new channel
* to be registered with readSelector. If it was waiting in select()
* the thread will be woken up, otherwise whenever select() is called
* it will return even if there is nothing to read and wait
* in while(adding) for finishAdd call
*/
public void startAdd() {
adding = true;
readSelector.wakeup();
}
public synchronized SelectionKey registerChannel(SocketChannel channel)
throws IOException {
return channel.register(readSelector, SelectionKey.OP_READ);
}
public synchronized void finishAdd() {
adding = false;
this.notify();
}
}
/** cleanup connections from connectionList. Choose a random range
* to scan and also have a limit on the number of the connections
* that will be cleanedup per run. The criteria for cleanup is the time
* for which the connection was idle. If 'force' is true then all
* connections will be looked at for the cleanup.
* @param force all connections will be looked at for cleanup
*/
private void cleanupConnections(boolean force) {
if (force || numConnections > thresholdIdleConnections) {
long currentTime = System.currentTimeMillis();
if (!force && (currentTime - lastCleanupRunTime) < cleanupInterval) {
return;
}
int start = 0;
int end = numConnections - 1;
if (!force) {
start = rand.nextInt() % numConnections;
end = rand.nextInt() % numConnections;
int temp;
if (end < start) {
temp = start;
start = end;
end = temp;
}
}
int i = start;
int numNuked = 0;
while (i <= end) {
Connection c;
synchronized (connectionList) {
try {
c = connectionList.get(i);
} catch (Exception e) {return;}
}
if (c.timedOut(currentTime)) {
if (LOG.isDebugEnabled())
LOG.debug(getName() + ": disconnecting client " + c.getHostAddress());
closeConnection(c);
numNuked++;
end--;
//noinspection UnusedAssignment
c = null;
if (!force && numNuked == maxConnectionsToNuke) break;
}
else i++;
}
lastCleanupRunTime = System.currentTimeMillis();
}
}
@Override
public void run() {
LOG.info(getName() + ": starting");
while (running) {
SelectionKey key = null;
try {
selector.select(); // FindBugs IS2_INCONSISTENT_SYNC
Iterator iter = selector.selectedKeys().iterator();
while (iter.hasNext()) {
key = iter.next();
iter.remove();
try {
if (key.isValid()) {
if (key.isAcceptable())
doAccept(key);
}
} catch (IOException ignored) {
if (LOG.isTraceEnabled()) LOG.trace("ignored", ignored);
}
key = null;
}
} catch (OutOfMemoryError e) {
if (errorHandler != null) {
if (errorHandler.checkOOME(e)) {
LOG.info(getName() + ": exiting on OutOfMemoryError");
closeCurrentConnection(key, e);
cleanupConnections(true);
return;
}
} else {
// we can run out of memory if we have too many threads
// log the event and sleep for a minute and give
// some thread(s) a chance to finish
LOG.warn(getName() + ": OutOfMemoryError in server select", e);
closeCurrentConnection(key, e);
cleanupConnections(true);
try {
Thread.sleep(60000);
} catch (InterruptedException ex) {
LOG.debug("Interrupted while sleeping");
return;
}
}
} catch (Exception e) {
closeCurrentConnection(key, e);
}
cleanupConnections(false);
}
LOG.info(getName() + ": stopping");
synchronized (this) {
try {
acceptChannel.close();
selector.close();
} catch (IOException ignored) {
if (LOG.isTraceEnabled()) LOG.trace("ignored", ignored);
}
selector= null;
acceptChannel= null;
// clean up all connections
while (!connectionList.isEmpty()) {
closeConnection(connectionList.remove(0));
}
}
}
private void closeCurrentConnection(SelectionKey key, Throwable e) {
if (key != null) {
Connection c = (Connection)key.attachment();
if (c != null) {
if (LOG.isDebugEnabled()) {
LOG.debug(getName() + ": disconnecting client " + c.getHostAddress() +
(e != null ? " on error " + e.getMessage() : ""));
}
closeConnection(c);
key.attach(null);
}
}
}
InetSocketAddress getAddress() {
return (InetSocketAddress)acceptChannel.socket().getLocalSocketAddress();
}
void doAccept(SelectionKey key) throws IOException, OutOfMemoryError {
Connection c;
ServerSocketChannel server = (ServerSocketChannel) key.channel();
SocketChannel channel;
while ((channel = server.accept()) != null) {
try {
channel.configureBlocking(false);
channel.socket().setTcpNoDelay(tcpNoDelay);
channel.socket().setKeepAlive(tcpKeepAlive);
} catch (IOException ioe) {
channel.close();
throw ioe;
}
Reader reader = getReader();
try {
reader.startAdd();
SelectionKey readKey = reader.registerChannel(channel);
c = getConnection(channel, System.currentTimeMillis());
readKey.attach(c);
synchronized (connectionList) {
connectionList.add(numConnections, c);
numConnections++;
}
if (LOG.isDebugEnabled())
LOG.debug(getName() + ": connection from " + c.toString() +
"; # active connections: " + numConnections);
} finally {
reader.finishAdd();
}
}
}
void doRead(SelectionKey key) throws InterruptedException {
int count;
Connection c = (Connection) key.attachment();
if (c == null) {
return;
}
c.setLastContact(System.currentTimeMillis());
try {
count = c.readAndProcess();
if (count > 0) {
c.setLastContact(System.currentTimeMillis());
}
} catch (InterruptedException ieo) {
throw ieo;
} catch (Exception e) {
if (LOG.isDebugEnabled()) {
LOG.debug(getName() + ": Caught exception while reading:" + e.getMessage());
}
count = -1; //so that the (count < 0) block is executed
}
if (count < 0) {
if (LOG.isDebugEnabled()) {
LOG.debug(getName() + ": DISCONNECTING client " + c.toString() +
" because read count=" + count +
". Number of active connections: " + numConnections);
}
closeConnection(c);
}
}
synchronized void doStop() {
if (selector != null) {
selector.wakeup();
Thread.yield();
}
if (acceptChannel != null) {
try {
acceptChannel.socket().close();
} catch (IOException e) {
LOG.info(getName() + ": exception in closing listener socket. " + e);
}
}
readPool.shutdownNow();
}
// The method that will return the next reader to work with
// Simplistic implementation of round robin for now
Reader getReader() {
currentReader = (currentReader + 1) % readers.length;
return readers[currentReader];
}
}
// Sends responses of RPC back to clients.
protected class Responder extends Thread {
private final Selector writeSelector;
private final Set writingCons =
Collections.newSetFromMap(new ConcurrentHashMap());
Responder() throws IOException {
this.setName("RpcServer.responder");
this.setDaemon(true);
writeSelector = Selector.open(); // create a selector
}
@Override
public void run() {
LOG.info(getName() + ": starting");
try {
doRunLoop();
} finally {
LOG.info(getName() + ": stopping");
try {
writeSelector.close();
} catch (IOException ioe) {
LOG.error(getName() + ": couldn't close write selector", ioe);
}
}
}
/**
* Take the list of the connections that want to write, and register them
* in the selector.
*/
private void registerWrites() {
Iterator it = writingCons.iterator();
while (it.hasNext()) {
Connection c = it.next();
it.remove();
SelectionKey sk = c.channel.keyFor(writeSelector);
try {
if (sk == null) {
try {
c.channel.register(writeSelector, SelectionKey.OP_WRITE, c);
} catch (ClosedChannelException e) {
// ignore: the client went away.
if (LOG.isTraceEnabled()) LOG.trace("ignored", e);
}
} else {
sk.interestOps(SelectionKey.OP_WRITE);
}
} catch (CancelledKeyException e) {
// ignore: the client went away.
if (LOG.isTraceEnabled()) LOG.trace("ignored", e);
}
}
}
/**
* Add a connection to the list that want to write,
*/
public void registerForWrite(Connection c) {
if (writingCons.add(c)) {
writeSelector.wakeup();
}
}
private void doRunLoop() {
long lastPurgeTime = 0; // last check for old calls.
while (running) {
try {
registerWrites();
int keyCt = writeSelector.select(purgeTimeout);
if (keyCt == 0) {
continue;
}
Set keys = writeSelector.selectedKeys();
Iterator iter = keys.iterator();
while (iter.hasNext()) {
SelectionKey key = iter.next();
iter.remove();
try {
if (key.isValid() && key.isWritable()) {
doAsyncWrite(key);
}
} catch (IOException e) {
LOG.debug(getName() + ": asyncWrite", e);
}
}
lastPurgeTime = purge(lastPurgeTime);
} catch (OutOfMemoryError e) {
if (errorHandler != null) {
if (errorHandler.checkOOME(e)) {
LOG.info(getName() + ": exiting on OutOfMemoryError");
return;
}
} else {
//
// we can run out of memory if we have too many threads
// log the event and sleep for a minute and give
// some thread(s) a chance to finish
//
LOG.warn(getName() + ": OutOfMemoryError in server select", e);
try {
Thread.sleep(60000);
} catch (InterruptedException ex) {
LOG.debug("Interrupted while sleeping");
return;
}
}
} catch (Exception e) {
LOG.warn(getName() + ": exception in Responder " +
StringUtils.stringifyException(e), e);
}
}
LOG.info(getName() + ": stopped");
}
/**
* If there were some calls that have not been sent out for a
* long time, we close the connection.
* @return the time of the purge.
*/
private long purge(long lastPurgeTime) {
long now = System.currentTimeMillis();
if (now < lastPurgeTime + purgeTimeout) {
return lastPurgeTime;
}
ArrayList conWithOldCalls = new ArrayList();
// get the list of channels from list of keys.
synchronized (writeSelector.keys()) {
for (SelectionKey key : writeSelector.keys()) {
Connection connection = (Connection) key.attachment();
if (connection == null) {
throw new IllegalStateException("Coding error: SelectionKey key without attachment.");
}
Call call = connection.responseQueue.peekFirst();
if (call != null && now > call.timestamp + purgeTimeout) {
conWithOldCalls.add(call.connection);
}
}
}
// Seems safer to close the connection outside of the synchronized loop...
for (Connection connection : conWithOldCalls) {
closeConnection(connection);
}
return now;
}
private void doAsyncWrite(SelectionKey key) throws IOException {
Connection connection = (Connection) key.attachment();
if (connection == null) {
throw new IOException("doAsyncWrite: no connection");
}
if (key.channel() != connection.channel) {
throw new IOException("doAsyncWrite: bad channel");
}
if (processAllResponses(connection)) {
try {
// We wrote everything, so we don't need to be told when the socket is ready for
// write anymore.
key.interestOps(0);
} catch (CancelledKeyException e) {
/* The Listener/reader might have closed the socket.
* We don't explicitly cancel the key, so not sure if this will
* ever fire.
* This warning could be removed.
*/
LOG.warn("Exception while changing ops : " + e);
}
}
}
/**
* Process the response for this call. You need to have the lock on
* {@link org.apache.hadoop.hbase.ipc.RpcServer.Connection#responseWriteLock}
*
* @param call the call
* @return true if we proceed the call fully, false otherwise.
* @throws IOException
*/
private boolean processResponse(final Call call) throws IOException {
boolean error = true;
try {
// Send as much data as we can in the non-blocking fashion
long numBytes = channelWrite(call.connection.channel, call.response);
if (numBytes < 0) {
throw new HBaseIOException("Error writing on the socket " +
"for the call:" + call.toShortString());
}
error = false;
} finally {
if (error) {
LOG.debug(getName() + call.toShortString() + ": output error -- closing");
closeConnection(call.connection);
}
}
if (!call.response.hasRemaining()) {
call.done();
return true;
} else {
return false; // Socket can't take more, we will have to come back.
}
}
/**
* Process all the responses for this connection
*
* @return true if all the calls were processed or that someone else is doing it.
* false if there * is still some work to do. In this case, we expect the caller to
* delay us.
* @throws IOException
*/
private boolean processAllResponses(final Connection connection) throws IOException {
// We want only one writer on the channel for a connection at a time.
connection.responseWriteLock.lock();
try {
for (int i = 0; i < 20; i++) {
// protection if some handlers manage to need all the responder
Call call = connection.responseQueue.pollFirst();
if (call == null) {
return true;
}
if (!processResponse(call)) {
connection.responseQueue.addFirst(call);
return false;
}
}
} finally {
connection.responseWriteLock.unlock();
}
return connection.responseQueue.isEmpty();
}
//
// Enqueue a response from the application.
//
void doRespond(Call call) throws IOException {
boolean added = false;
// If there is already a write in progress, we don't wait. This allows to free the handlers
// immediately for other tasks.
if (call.connection.responseQueue.isEmpty() && call.connection.responseWriteLock.tryLock()) {
try {
if (call.connection.responseQueue.isEmpty()) {
// If we're alone, we can try to do a direct call to the socket. It's
// an optimisation to save on context switches and data transfer between cores..
if (processResponse(call)) {
return; // we're done.
}
// Too big to fit, putting ahead.
call.connection.responseQueue.addFirst(call);
added = true; // We will register to the selector later, outside of the lock.
}
} finally {
call.connection.responseWriteLock.unlock();
}
}
if (!added) {
call.connection.responseQueue.addLast(call);
}
call.responder.registerForWrite(call.connection);
// set the serve time when the response has to be sent later
call.timestamp = System.currentTimeMillis();
}
}
@SuppressWarnings("serial")
public static class CallQueueTooBigException extends IOException {
CallQueueTooBigException() {
super();
}
}
/** Reads calls from a connection and queues them for handling. */
@edu.umd.cs.findbugs.annotations.SuppressWarnings(
value="VO_VOLATILE_INCREMENT",
justification="False positive according to http://sourceforge.net/p/findbugs/bugs/1032/")
public class Connection {
// If initial preamble with version and magic has been read or not.
private boolean connectionPreambleRead = false;
// If the connection header has been read or not.
private boolean connectionHeaderRead = false;
protected SocketChannel channel;
private ByteBuffer data;
private ByteBuffer dataLengthBuffer;
protected final ConcurrentLinkedDeque responseQueue = new ConcurrentLinkedDeque();
private final Lock responseWriteLock = new ReentrantLock();
private Counter rpcCount = new Counter(); // number of outstanding rpcs
private long lastContact;
private InetAddress addr;
protected Socket socket;
// Cache the remote host & port info so that even if the socket is
// disconnected, we can say where it used to connect to.
protected String hostAddress;
protected int remotePort;
ConnectionHeader connectionHeader;
/**
* Codec the client asked use.
*/
private Codec codec;
/**
* Compression codec the client asked us use.
*/
private CompressionCodec compressionCodec;
BlockingService service;
protected UserGroupInformation user = null;
private AuthMethod authMethod;
private boolean saslContextEstablished;
private boolean skipInitialSaslHandshake;
private ByteBuffer unwrappedData;
// When is this set? FindBugs wants to know! Says NP
private ByteBuffer unwrappedDataLengthBuffer = ByteBuffer.allocate(4);
boolean useSasl;
SaslServer saslServer;
private boolean useWrap = false;
// Fake 'call' for failed authorization response
private static final int AUTHORIZATION_FAILED_CALLID = -1;
private final Call authFailedCall =
new Call(AUTHORIZATION_FAILED_CALLID, null, null, null, null, null, this, null, 0, null,
null);
private ByteArrayOutputStream authFailedResponse =
new ByteArrayOutputStream();
// Fake 'call' for SASL context setup
private static final int SASL_CALLID = -33;
private final Call saslCall =
new Call(SASL_CALLID, this.service, null, null, null, null, this, null, 0, null, null);
public UserGroupInformation attemptingUser = null; // user name before auth
public Connection(SocketChannel channel, long lastContact) {
this.channel = channel;
this.lastContact = lastContact;
this.data = null;
this.dataLengthBuffer = ByteBuffer.allocate(4);
this.socket = channel.socket();
this.addr = socket.getInetAddress();
if (addr == null) {
this.hostAddress = "*Unknown*";
} else {
this.hostAddress = addr.getHostAddress();
}
this.remotePort = socket.getPort();
if (socketSendBufferSize != 0) {
try {
socket.setSendBufferSize(socketSendBufferSize);
} catch (IOException e) {
LOG.warn("Connection: unable to set socket send buffer size to " +
socketSendBufferSize);
}
}
}
@Override
public String toString() {
return getHostAddress() + ":" + remotePort;
}
public String getHostAddress() {
return hostAddress;
}
public InetAddress getHostInetAddress() {
return addr;
}
public int getRemotePort() {
return remotePort;
}
public void setLastContact(long lastContact) {
this.lastContact = lastContact;
}
public VersionInfo getVersionInfo() {
if (connectionHeader.hasVersionInfo()) {
return connectionHeader.getVersionInfo();
}
return null;
}
/* Return true if the connection has no outstanding rpc */
private boolean isIdle() {
return rpcCount.get() == 0;
}
/* Decrement the outstanding RPC count */
protected void decRpcCount() {
rpcCount.decrement();
}
/* Increment the outstanding RPC count */
protected void incRpcCount() {
rpcCount.increment();
}
protected boolean timedOut(long currentTime) {
return isIdle() && currentTime - lastContact > maxIdleTime;
}
private UserGroupInformation getAuthorizedUgi(String authorizedId)
throws IOException {
if (authMethod == AuthMethod.DIGEST) {
TokenIdentifier tokenId = HBaseSaslRpcServer.getIdentifier(authorizedId,
secretManager);
UserGroupInformation ugi = tokenId.getUser();
if (ugi == null) {
throw new AccessDeniedException(
"Can't retrieve username from tokenIdentifier.");
}
ugi.addTokenIdentifier(tokenId);
return ugi;
} else {
return UserGroupInformation.createRemoteUser(authorizedId);
}
}
private void saslReadAndProcess(byte[] saslToken) throws IOException,
InterruptedException {
if (saslContextEstablished) {
if (LOG.isTraceEnabled())
LOG.trace("Have read input token of size " + saslToken.length
+ " for processing by saslServer.unwrap()");
if (!useWrap) {
processOneRpc(saslToken);
} else {
byte [] plaintextData = saslServer.unwrap(saslToken, 0, saslToken.length);
processUnwrappedData(plaintextData);
}
} else {
byte[] replyToken;
try {
if (saslServer == null) {
switch (authMethod) {
case DIGEST:
if (secretManager == null) {
throw new AccessDeniedException(
"Server is not configured to do DIGEST authentication.");
}
saslServer = Sasl.createSaslServer(AuthMethod.DIGEST
.getMechanismName(), null, SaslUtil.SASL_DEFAULT_REALM,
SaslUtil.SASL_PROPS, new SaslDigestCallbackHandler(
secretManager, this));
break;
default:
UserGroupInformation current = UserGroupInformation.getCurrentUser();
String fullName = current.getUserName();
if (LOG.isDebugEnabled()) {
LOG.debug("Kerberos principal name is " + fullName);
}
final String names[] = SaslUtil.splitKerberosName(fullName);
if (names.length != 3) {
throw new AccessDeniedException(
"Kerberos principal name does NOT have the expected "
+ "hostname part: " + fullName);
}
current.doAs(new PrivilegedExceptionAction