All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.spy.memcached.protocol.TCPMemcachedNodeImpl Maven / Gradle / Ivy

The newest version!
/*
 * arcus-java-client : Arcus Java client
 * Copyright 2010-2014 NAVER Corp.
 * Copyright 2014-2021 JaM2in Co., Ltd.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package net.spy.memcached.protocol;

import java.io.IOException;
import java.net.SocketAddress;
import java.nio.Buffer;
import java.nio.ByteBuffer;
import java.nio.channels.SelectionKey;
import java.nio.channels.SocketChannel;
import java.util.ArrayList;
import java.util.Collection;
import java.util.StringTokenizer;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.Lock;
import java.util.concurrent.locks.ReentrantLock;

import net.spy.memcached.ArcusReplNodeAddress;
import net.spy.memcached.MemcachedNode;
import net.spy.memcached.MemcachedReplicaGroup;
import net.spy.memcached.compat.SpyObject;
import net.spy.memcached.ops.Operation;
import net.spy.memcached.ops.OperationState;

/**
 * Represents a node with the memcached cluster, along with buffering and
 * operation queues.
 */
public abstract class TCPMemcachedNodeImpl extends SpyObject
        implements MemcachedNode {

  private String nodeName;
  private SocketAddress socketAddress;
  private final ByteBuffer rbuf;
  private final ByteBuffer wbuf;
  protected final BlockingQueue writeQ;
  private final BlockingQueue readQ;
  private final BlockingQueue inputQueue;
  private final long opQueueMaxBlockTime;
  // This has been declared volatile so it can be used as an availability
  // indicator.
  private volatile int reconnectAttempt = 1;
  private boolean isFirstConnecting = true;
  private SocketChannel channel;
  private int toWrite = 0;
  protected Operation optimizedOp = null;
  private volatile SelectionKey sk = null;
  private boolean shouldAuth = false;
  private CountDownLatch authLatch;
  private ArrayList reconnectBlocked;
  private String version = null;
  private boolean isAsciiProtocol = true;
  private boolean enabledMGetOp = false;
  private boolean enabledMGetsOp = false;
  private boolean enabledSpaceSeparate = false;

  // operation Future.get timeout counter
  private final AtomicInteger continuousTimeout = new AtomicInteger(0);
  private boolean toRatioEnabled = false;
  private int[] toCountArray;
  private final static int MAX_TOCOUNT = 100;   /* to count array size */
  private int toCountIdx;         /* to count array index */
  private int toRatioMax;         /* maximum timeout ratio */
  private int toRatioNow;         /* current timeout ratio */
  private Lock toRatioLock = new ReentrantLock();

  /* # of operations added into inputQueue as a hint.
   * If we need a correct count, AtomicLong object must be used.
   */
  private volatile long addOpCount;

  /* ENABLE_REPLICATION if */
  private MemcachedReplicaGroup replicaGroup;
  /* ENABLE_REPLICATION end */

  private void resetTimeoutRatioCount() {
    if (toRatioEnabled) {
      toRatioLock.lock();
      for (int i = 0; i < MAX_TOCOUNT; i++) {
        toCountArray[i] = 0;
      }
      toCountIdx = -1;
      toRatioMax = 0;
      toRatioNow = 0;
      toRatioLock.unlock();
    }
  }

  private void addTimeoutRatioCount(boolean timedOut) {
    if (toRatioEnabled) {
      toRatioLock.lock();
      if ((++toCountIdx) >= MAX_TOCOUNT) {
        toCountIdx = 0;
      }
      if (toCountArray[toCountIdx] > 0) {
        toRatioNow -= toCountArray[toCountIdx];
        toCountArray[toCountIdx] = 0;
      }
      if (timedOut) {
        toCountArray[toCountIdx] = 1;
        toRatioNow += 1;
        if (toRatioNow > toRatioMax) {
          toRatioMax = toRatioNow;
        }
      }
      toRatioLock.unlock();
    }
  }

  public TCPMemcachedNodeImpl(String name,
                              SocketAddress sa, SocketChannel c,
                              int bufSize, BlockingQueue rq,
                              BlockingQueue wq, BlockingQueue iq,
                              long opQueueMaxBlockTime, boolean waitForAuth,
                              boolean asciiProtocol) {
    super();
    assert sa != null : "No SocketAddress";
    assert c != null : "No SocketChannel";
    assert bufSize > 0 : "Invalid buffer size: " + bufSize;
    assert rq != null : "No operation read queue";
    assert wq != null : "No operation write queue";
    assert iq != null : "No input queue";

    nodeName = name + " " + sa;
    setSocketAddress(sa);
    setChannel(c);
    rbuf = ByteBuffer.allocate(bufSize);
    wbuf = ByteBuffer.allocate(bufSize);
    ((Buffer) getWbuf()).clear();
    readQ = rq;
    writeQ = wq;
    inputQueue = iq;
    addOpCount = 0;
    this.opQueueMaxBlockTime = opQueueMaxBlockTime;
    shouldAuth = waitForAuth;
    isAsciiProtocol = asciiProtocol;
    setupForAuth("init authentication");
  }

  public final void copyInputQueue() {
    Collection tmp = new ArrayList();

    // don't drain more than we have space to place
    inputQueue.drainTo(tmp, writeQ.remainingCapacity());

    writeQ.addAll(tmp);
  }

  public Collection destroyInputQueue() {
    Collection rv = new ArrayList();
    inputQueue.drainTo(rv);
    return rv;
  }

  private Collection destroyQueue(BlockingQueue queue, boolean resend) {
    Collection rv = new ArrayList();
    queue.drainTo(rv);
    if (resend) {
      for (Operation o : rv) {
        if ((o.getState() == OperationState.WRITE_QUEUED ||
             o.getState() == OperationState.WRITING) && o.getBuffer() != null) {
          ((Buffer) o.getBuffer()).reset(); // buffer offset reset
        } else {
          o.initialize(); // write completed or not yet initialized
        }
      }
    }

    return rv;
  }

  public Collection destroyWriteQueue(boolean resend) {
    return destroyQueue(writeQ, resend);
  }

  public Collection destroyReadQueue(boolean resend) {
    return destroyQueue(readQ, resend);
  }

  public final void setupResend(boolean cancelWrite, String cause) {
    // First, reset the current write op, or cancel it if we should
    // be authenticating
    Operation op = getCurrentWriteOp();
    if ((cancelWrite || shouldAuth) && op != null) {
      /*
       * Do not cancel the operation.
       * There is no reason to cancel it first
       * and it will be cancelled in the code below.
       */
    } else if (op != null) {
      ByteBuffer buf = op.getBuffer();
      if (buf != null) {
        ((Buffer) buf).reset();
      } else {
        /* This case cannot happen. */
        getLogger().warn("No buffer for current write op, removing");
        removeCurrentWriteOp();
      }
    }

    // Now cancel all the pending read operations.  Might be better to
    // to requeue them.
    while (hasReadOp()) {
      op = removeCurrentReadOp();
      if (op != getCurrentWriteOp()) {
        getLogger().warn("Discarding partially completed op: %s", op);
        op.cancel(cause);
      }
    }

    while ((cancelWrite || shouldAuth) && hasWriteOp()) {
      op = removeCurrentWriteOp();
      getLogger().warn("Discarding partially completed op: %s", op);
      op.cancel(cause);
    }

    ((Buffer) getWbuf()).clear();
    ((Buffer) getRbuf()).clear();
    toWrite = 0;
  }

  // Prepare the pending operations.  Return true if there are any pending
  // ops
  private boolean preparePending() {
    // Copy the input queue into the write queue.
    copyInputQueue();

    // Now check the ops
    Operation nextOp = getCurrentWriteOp();
    while (nextOp != null && nextOp.isCancelled()) {
      getLogger().info("Removing cancelled operation: %s", nextOp);
      removeCurrentWriteOp();
      nextOp = getCurrentWriteOp();
    }
    return nextOp != null;
  }

  public final void fillWriteBuffer(boolean shouldOptimize) {
    if (toWrite == 0 && readQ.remainingCapacity() > 0) {
      ((Buffer) getWbuf()).clear();
      Operation o = getNextWritableOp();
      while (o != null && toWrite < getWbuf().capacity()) {
        assert o.getState() == OperationState.WRITING;

        ByteBuffer obuf = o.getBuffer();
        assert obuf != null : "Didn't get a write buffer from " + o;
        int bytesToCopy = Math.min(getWbuf().remaining(),
                obuf.remaining());
        byte[] b = new byte[bytesToCopy];
        obuf.get(b);
        getWbuf().put(b);
        getLogger().debug("After copying stuff from %s: %s",
                o, getWbuf());
        if (!o.getBuffer().hasRemaining()) {
          o.writeComplete();
          transitionWriteItem();

          preparePending();
          if (shouldOptimize) {
            optimize();
          }

          if (readQ.remainingCapacity() > 0) {
            o = getNextWritableOp();
          } else {
            o = null;
          }
        }
        toWrite += bytesToCopy;
      }
      ((Buffer) getWbuf()).flip();
      assert toWrite <= getWbuf().capacity()
              : "toWrite exceeded capacity: " + this;
      assert toWrite == getWbuf().remaining()
              : "Expected " + toWrite + " remaining, got "
              + getWbuf().remaining();
    } else {
      getLogger().debug("Buffer is full, skipping");
    }
  }

  public final void transitionWriteItem() {
    Operation op = removeCurrentWriteOp();
    assert op != null : "There is no write item to transition";
    getLogger().debug("Finished writing %s", op);
  }

  protected abstract void optimize();

  public final Operation getCurrentReadOp() {
    return readQ.peek();
  }

  public final Operation removeCurrentReadOp() {
    return readQ.remove();
  }

  public final Operation getCurrentWriteOp() {
    return optimizedOp == null ? writeQ.peek() : optimizedOp;
  }

  private Operation getNextWritableOp() {
    Operation o = getCurrentWriteOp();
    while (o != null && o.getState() == OperationState.WRITE_QUEUED) {
      if (o.isCancelled()) {
        getLogger().debug("Not writing cancelled op.");
        Operation cancelledOp = removeCurrentWriteOp();
        assert o == cancelledOp;
      } else {
        o.writing();
        readQ.add(o);
        return o;
      }
      o = getCurrentWriteOp();
    }
    return o;
  }

  public final Operation removeCurrentWriteOp() {
    Operation rv = optimizedOp;
    if (rv == null) {
      rv = writeQ.remove();
    } else {
      optimizedOp = null;
    }
    return rv;
  }

  public final boolean hasReadOp() {
    return !readQ.isEmpty();
  }

  public final boolean hasWriteOp() {
    return !(optimizedOp == null && writeQ.isEmpty());
  }

  public final void addOp(Operation op) {
    try {
      if (!authLatch.await(1, TimeUnit.SECONDS)) {
        op.cancel("authentication timeout");
        getLogger().warn(
                "Operation canceled because authentication " +
                        "or reconnection and authentication has " +
                        "taken more than one second to complete.");
        getLogger().debug("Canceled operation %s", op.toString());
        return;
      }
      if (!inputQueue.offer(op, opQueueMaxBlockTime,
              TimeUnit.MILLISECONDS)) {
        throw new IllegalStateException("Timed out waiting to add "
                + op + "(max wait=" + opQueueMaxBlockTime + "ms)");
      }
      addOpCount += 1;
    } catch (InterruptedException e) {
      // Restore the interrupted status
      Thread.currentThread().interrupt();
      throw new IllegalStateException("Interrupted while waiting to add "
              + op);
    }
  }

  public final void insertOp(Operation op) {
    ArrayList tmp = new ArrayList(
            inputQueue.size() + 1);
    tmp.add(op);
    inputQueue.drainTo(tmp);
    inputQueue.addAll(tmp);
    addOpCount += 1;
  }

  public final int getSelectionOps() {
    int rv = 0;
    if (getChannel().isConnected()) {
      if (hasReadOp()) {
        rv |= SelectionKey.OP_READ;
      }
      if (toWrite > 0 || hasWriteOp()) {
        rv |= SelectionKey.OP_WRITE;
      }
    } else {
      rv = SelectionKey.OP_CONNECT;
    }
    return rv;
  }

  public final String getNodeName() {
    return nodeName;
  }

  public final ByteBuffer getRbuf() {
    return rbuf;
  }

  public final ByteBuffer getWbuf() {
    return wbuf;
  }

  private final void setSocketAddress(SocketAddress sa) {
    /* ENABLE_REPLICATION if */
    if (sa instanceof ArcusReplNodeAddress) {
      socketAddress = new ArcusReplNodeAddress((ArcusReplNodeAddress) sa);
      return;
    }
    /* ENABLE_REPLICATION end */
    socketAddress = sa;
  }

  public final SocketAddress getSocketAddress() {
    return socketAddress;
  }

  public final boolean isActive() {
    return reconnectAttempt == 0 && getChannel() != null && getChannel().isConnected();
  }

  public final boolean isFirstConnecting() {
    return isFirstConnecting;
  }

  public final void reconnecting() {
    reconnectAttempt++;
    isFirstConnecting = false;
    continuousTimeout.set(0);
    resetTimeoutRatioCount();
  }

  public final void connected() {
    reconnectAttempt = 0;
    isFirstConnecting = false;
    continuousTimeout.set(0);
    resetTimeoutRatioCount();
  }

  public final int getReconnectCount() {
    return reconnectAttempt;
  }

  @Override
  public final String toString() {
    int sops = 0;
    if (getSk() != null && getSk().isValid()) {
      sops = getSk().interestOps();
    }
    int rsize = readQ.size() + (optimizedOp == null ? 0 : 1);
    int wsize = writeQ.size();
    int isize = inputQueue.size();
    return "{QA name=" + nodeName
            + ", #Rops=" + rsize
            + ", #Wops=" + wsize
            + ", #iq=" + isize
            + ", topRop=" + getCurrentReadOp()
            + ", topWop=" + getCurrentWriteOp()
            + ", toWrite=" + toWrite
            + ", interested=" + sops + "}";
  }

  public final void registerChannel(SocketChannel ch, SelectionKey skey) {
    setChannel(ch);
    setSk(skey);
  }

  public final void setChannel(SocketChannel to) {
    assert channel == null || !channel.isOpen()
            : "Attempting to overwrite channel";
    channel = to;
  }

  public final SocketChannel getChannel() {
    return channel;
  }

  public final void setSk(SelectionKey to) {
    sk = to;
  }

  public final SelectionKey getSk() {
    return sk;
  }

  public final void setVersion(String vr) {
    version = vr;
    StringTokenizer tokens = new StringTokenizer(version, ".");
    int majorVersion = Integer.parseInt(tokens.nextToken());
    int minorVersion = Integer.parseInt(tokens.nextToken());
    boolean isEnterprise = version.contains("E");
    if (isAsciiProtocol) {
      setEnableMGetOp(majorVersion, minorVersion, isEnterprise);
      setEnableMGetsOp(majorVersion, minorVersion, isEnterprise);
      setEnableSpaceSeparate(majorVersion, minorVersion, isEnterprise);
    }
  }

  public final String getVersion() {
    return version;
  }

  private void setEnableMGetOp(int majorVersion, int minorVersion, boolean isEnterprise) {
    if (isEnterprise) {
      enabledMGetOp = (majorVersion > 0 || (majorVersion == 0 && minorVersion > 6));
    } else {
      enabledMGetOp = (majorVersion > 1 || (majorVersion == 1 && minorVersion > 10));
    }
  }

  private void setEnableMGetsOp(int majorVersion, int minorVersion, boolean isEnterprise) {
    if (isEnterprise) {
      enabledMGetsOp = (majorVersion > 0 || (majorVersion == 0 && minorVersion > 8));
    } else {
      enabledMGetsOp = (majorVersion > 1 || (majorVersion == 1 && minorVersion > 12));
    }
  }

  private void setEnableSpaceSeparate(int majorVersion, int minorVersion,
                                            boolean isEnterprise) {
    if (isEnterprise) {
      enabledSpaceSeparate = (majorVersion > 0 || (majorVersion == 0 && minorVersion > 6));
    } else {
      enabledSpaceSeparate = (majorVersion > 1 || (majorVersion == 1 && minorVersion > 10));
    }
  }

  public final boolean enabledMGetOp() {
    return enabledMGetOp;
  }

  public final boolean enabledMGetsOp() {
    return enabledMGetsOp;
  }

  public final boolean enabledSpaceSeparate() {
    return enabledSpaceSeparate;
  }

  public final int getBytesRemainingToWrite() {
    return toWrite;
  }

  public final int writeSome() throws IOException {
    int wrote = channel.write(wbuf);
    assert wrote >= 0 : "Wrote negative bytes?";
    toWrite -= wrote;
    assert toWrite >= 0
            : "toWrite went negative after writing " + wrote
            + " bytes for " + this;
    getLogger().debug("Wrote %d bytes", wrote);
    return wrote;
  }

  public void setContinuousTimeout(boolean timedOut) {
    if (isActive()) {
      addTimeoutRatioCount(timedOut);
    }
    if (timedOut && isActive()) {
      continuousTimeout.incrementAndGet();
    } else {
      continuousTimeout.set(0);
    }
  }

  public int getContinuousTimeout() {
    return continuousTimeout.get();
  }

  public void enableTimeoutRatio() {
    toRatioEnabled = true;
    toCountArray = new int[MAX_TOCOUNT];
    resetTimeoutRatioCount();
  }

  public int getTimeoutRatioNow() {
    int ratio = -1; // invalid
    if (toRatioEnabled) {
      toRatioLock.lock();
      ratio = toRatioNow;
      toRatioLock.unlock();
    }
    return ratio;
  }

  public final void fixupOps() {
    // As the selection key can be changed at any point due to node
    // failure, we'll grab the current volatile value and configure it.
    SelectionKey s = sk;
    if (s != null && s.isValid()) {
      int iops = getSelectionOps();
      getLogger().debug("Setting interested opts to %d", iops);
      s.interestOps(iops);
    } else {
      getLogger().debug("Selection key is not valid.");
    }
  }

  public final void authComplete() {
    if (reconnectBlocked != null && reconnectBlocked.size() > 0) {
      inputQueue.addAll(reconnectBlocked);
    }
    authLatch.countDown();
  }

  public final void setupForAuth(String cause) {
    if (shouldAuth) {
      authLatch = new CountDownLatch(1);
      if (inputQueue.size() > 0) {
        reconnectBlocked = new ArrayList(
                inputQueue.size() + 1);
        inputQueue.drainTo(reconnectBlocked);
      }
      assert (inputQueue.size() == 0);
      setupResend(false, cause);
    } else {
      authLatch = new CountDownLatch(0);
    }
  }

  public final void shutdown() throws IOException {
    if (channel != null) {
      channel.close();
      sk = null;
      if (toWrite > 0) {
        getLogger().warn(
                "Shut down with %d bytes remaining to write",
                toWrite);
      }
      getLogger().debug("Shut down channel %s", channel);
    }
  }

  public int getInputQueueSize() {
    return inputQueue.size();
  }

  public int getWriteQueueSize() {
    return writeQ.size();
  }

  public int getReadQueueSize() {
    return readQ.size();
  }

  @Override
  public String getStatus() {
    StringBuilder sb = new StringBuilder();
    sb.append("#Tops=").append(addOpCount);
    sb.append(" #iq=").append(getInputQueueSize());
    sb.append(" #Wops=").append(getWriteQueueSize());
    sb.append(" #Rops=").append(getReadQueueSize());
    sb.append(" #CT=").append(getContinuousTimeout());
    sb.append(" #TR=").append(getTimeoutRatioNow());
    return sb.toString();
  }

  /* ENABLE_REPLICATION if */
  public void setReplicaGroup(MemcachedReplicaGroup g) {
    replicaGroup = g;
  }

  public MemcachedReplicaGroup getReplicaGroup() {
    return replicaGroup;
  }

  private BlockingQueue getAllOperations() {
    BlockingQueue allOp = new LinkedBlockingQueue();

    if (hasReadOp()) {
      readQ.drainTo(allOp);
    }

    while (hasWriteOp()) {
    /* large byte operation
     * may exist write queue & read queue
     */
      Operation op = removeCurrentWriteOp();
      if (!allOp.contains(op)) {
        allOp.add(op);
      } else {
        getLogger().warn("Duplicate operation exist in " + this + " : " + op);
      }
    }

    if (inputQueue.size() > 0) {
      inputQueue.drainTo(allOp);
    }

    return allOp;
  }

  public int addAllOpToWriteQ(BlockingQueue allOp) {
    int movedOpCount = 0;
    for (Operation op : allOp) {
      op.setHandlingNode(this);
      if ((op.getState() == OperationState.WRITE_QUEUED ||
           op.getState() == OperationState.WRITING) && op.getBuffer() != null) {
        ((Buffer) op.getBuffer()).reset(); // buffer offset reset
      } else {
        op.initialize(); // write completed or not yet initialized
        op.resetState(); // reset operation state
      }
      if (writeQ.offer(op)) {
        op.setMoved(true);
        movedOpCount++;
      } else {
        op.cancel("by moving operations");
      }
    }
    addOpCount += movedOpCount;
    return movedOpCount;
  }

  public int moveOperations(final MemcachedNode toNode) {
    BlockingQueue allOp = getAllOperations();
    int opCount = allOp.size();
    int movedOpCount = 0;

    if (opCount > 0) {
      movedOpCount = toNode.addAllOpToWriteQ(allOp);
      getLogger().info("Total %d operations have been moved to %s "
              + "and %d operations have been canceled.",
          movedOpCount, toNode, opCount - movedOpCount);
    }

    return movedOpCount;
  }
  /* ENABLE_REPLICATION end */
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy