All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.client.impl.BlockReaderRemote Maven / Gradle / Ivy

There is a newer version: 3.4.1
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.client.impl;

import java.io.BufferedInputStream;
import java.io.BufferedOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.EnumSet;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.fs.FSInputChecker;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.ReadOption;
import org.apache.hadoop.hdfs.BlockReader;
import org.apache.hadoop.hdfs.PeerCache;
import org.apache.hadoop.hdfs.net.Peer;
import org.apache.hadoop.hdfs.protocol.Block;
import org.apache.hadoop.hdfs.protocol.DatanodeID;
import org.apache.hadoop.hdfs.protocol.ExtendedBlock;
import org.apache.hadoop.hdfs.protocol.datatransfer.DataTransferProtoUtil;
import org.apache.hadoop.hdfs.protocol.datatransfer.PacketHeader;
import org.apache.hadoop.hdfs.protocol.datatransfer.Sender;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.BlockOpResponseProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.ReadOpChecksumInfoProto;
import org.apache.hadoop.hdfs.protocol.proto.DataTransferProtos.Status;
import org.apache.hadoop.hdfs.protocolPB.PBHelperClient;
import org.apache.hadoop.hdfs.security.token.block.BlockTokenIdentifier;
import org.apache.hadoop.hdfs.server.datanode.CachingStrategy;
import org.apache.hadoop.hdfs.shortcircuit.ClientMmap;
import org.apache.hadoop.io.IOUtils;
import org.apache.hadoop.security.token.Token;
import org.apache.hadoop.util.DataChecksum;
import org.apache.htrace.core.TraceScope;
import org.apache.htrace.core.Tracer;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;


/**
 * @deprecated this is an old implementation that is being left around
 * in case any issues spring up with the new {@link BlockReaderRemote2}
 * implementation.
 * It will be removed in the next release.
 */
@InterfaceAudience.Private
@Deprecated
public class BlockReaderRemote extends FSInputChecker implements BlockReader {
  static final Logger LOG = LoggerFactory.getLogger(FSInputChecker.class);

  private final Peer peer;
  private final DatanodeID datanodeID;
  private final DataInputStream in;
  private DataChecksum checksum;

  /** offset in block of the last chunk received */
  private long lastChunkOffset = -1;
  private long lastChunkLen = -1;
  private long lastSeqNo = -1;

  /** offset in block where reader wants to actually read */
  private long startOffset;

  private final long blockId;

  /** offset in block of of first chunk - may be less than startOffset
   if startOffset is not chunk-aligned */
  private final long firstChunkOffset;

  private final int bytesPerChecksum;
  private final int checksumSize;

  /**
   * The total number of bytes we need to transfer from the DN.
   * This is the amount that the user has requested plus some padding
   * at the beginning so that the read can begin on a chunk boundary.
   */
  private final long bytesNeededToFinish;

  private boolean eos = false;
  private boolean sentStatusCode = false;

  ByteBuffer checksumBytes = null;
  /** Amount of unread data in the current received packet */
  int dataLeft = 0;

  private final PeerCache peerCache;

  private final Tracer tracer;

  private final int networkDistance;

  /* FSInputChecker interface */

  /* same interface as inputStream java.io.InputStream#read()
   * used by DFSInputStream#read()
   * This violates one rule when there is a checksum error:
   * "Read should not modify user buffer before successful read"
   * because it first reads the data to user buffer and then checks
   * the checksum.
   */
  @Override
  public synchronized int read(byte[] buf, int off, int len)
      throws IOException {

    // This has to be set here, *before* the skip, since we can
    // hit EOS during the skip, in the case that our entire read
    // is smaller than the checksum chunk.
    boolean eosBefore = eos;

    //for the first read, skip the extra bytes at the front.
    if (lastChunkLen < 0 && startOffset > firstChunkOffset && len > 0) {
      // Skip these bytes. But don't call this.skip()!
      int toSkip = (int)(startOffset - firstChunkOffset);
      if ( super.readAndDiscard(toSkip) != toSkip ) {
        // should never happen
        throw new IOException("Could not skip required number of bytes");
      }
    }

    int nRead = super.read(buf, off, len);

    // if eos was set in the previous read, send a status code to the DN
    if (eos && !eosBefore && nRead >= 0) {
      if (needChecksum()) {
        sendReadResult(peer, Status.CHECKSUM_OK);
      } else {
        sendReadResult(peer, Status.SUCCESS);
      }
    }
    return nRead;
  }

  @Override
  public synchronized long skip(long n) throws IOException {
    /* How can we make sure we don't throw a ChecksumException, at least
     * in majority of the cases?. This one throws. */
    long nSkipped = 0;
    while (nSkipped < n) {
      int toSkip = (int)Math.min(n-nSkipped, Integer.MAX_VALUE);
      int ret = readAndDiscard(toSkip);
      if (ret <= 0) {
        return nSkipped;
      }
      nSkipped += ret;
    }
    return nSkipped;
  }

  @Override
  public int read() throws IOException {
    throw new IOException("read() is not expected to be invoked. " +
        "Use read(buf, off, len) instead.");
  }

  @Override
  public boolean seekToNewSource(long targetPos) throws IOException {
    /* Checksum errors are handled outside the BlockReader.
     * DFSInputStream does not always call 'seekToNewSource'. In the
     * case of pread(), it just tries a different replica without seeking.
     */
    return false;
  }

  @Override
  public void seek(long pos) throws IOException {
    throw new IOException("Seek() is not supported in BlockInputChecker");
  }

  @Override
  protected long getChunkPosition(long pos) {
    throw new RuntimeException("getChunkPosition() is not supported, " +
        "since seek is not required");
  }

  /**
   * Makes sure that checksumBytes has enough capacity
   * and limit is set to the number of checksum bytes needed
   * to be read.
   */
  private void adjustChecksumBytes(int dataLen) {
    int requiredSize =
        ((dataLen + bytesPerChecksum - 1)/bytesPerChecksum)*checksumSize;
    if (checksumBytes == null || requiredSize > checksumBytes.capacity()) {
      checksumBytes =  ByteBuffer.wrap(new byte[requiredSize]);
    } else {
      checksumBytes.clear();
    }
    checksumBytes.limit(requiredSize);
  }

  @Override
  protected synchronized int readChunk(long pos, byte[] buf, int offset,
      int len, byte[] checksumBuf)
      throws IOException {
    try (TraceScope ignored = tracer.newScope(
        "BlockReaderRemote#readChunk(" + blockId + ")")) {
      return readChunkImpl(pos, buf, offset, len, checksumBuf);
    }
  }

  private synchronized int readChunkImpl(long pos, byte[] buf, int offset,
      int len, byte[] checksumBuf)
      throws IOException {
    // Read one chunk.
    if (eos) {
      // Already hit EOF
      return -1;
    }

    // Read one DATA_CHUNK.
    long chunkOffset = lastChunkOffset;
    if ( lastChunkLen > 0 ) {
      chunkOffset += lastChunkLen;
    }

    // pos is relative to the start of the first chunk of the read.
    // chunkOffset is relative to the start of the block.
    // This makes sure that the read passed from FSInputChecker is the
    // for the same chunk we expect to be reading from the DN.
    if ( (pos + firstChunkOffset) != chunkOffset ) {
      throw new IOException("Mismatch in pos : " + pos + " + " +
          firstChunkOffset + " != " + chunkOffset);
    }

    // Read next packet if the previous packet has been read completely.
    if (dataLeft <= 0) {
      //Read packet headers.
      PacketHeader header = new PacketHeader();
      header.readFields(in);

      LOG.debug("DFSClient readChunk got header {}", header);

      // Sanity check the lengths
      if (!header.sanityCheck(lastSeqNo)) {
        throw new IOException("BlockReader: error in packet header " +
            header);
      }

      lastSeqNo = header.getSeqno();
      dataLeft = header.getDataLen();
      adjustChecksumBytes(header.getDataLen());
      if (header.getDataLen() > 0) {
        IOUtils.readFully(in, checksumBytes.array(), 0,
            checksumBytes.limit());
      }
    }

    // Sanity checks
    assert len >= bytesPerChecksum;
    assert checksum != null;
    assert checksumSize == 0 || (checksumBuf.length % checksumSize == 0);


    int checksumsToRead, bytesToRead;

    if (checksumSize > 0) {

      // How many chunks left in our packet - this is a ceiling
      // since we may have a partial chunk at the end of the file
      int chunksLeft = (dataLeft - 1) / bytesPerChecksum + 1;

      // How many chunks we can fit in databuffer
      //  - note this is a floor since we always read full chunks
      int chunksCanFit = Math.min(len / bytesPerChecksum,
          checksumBuf.length / checksumSize);

      // How many chunks should we read
      checksumsToRead = Math.min(chunksLeft, chunksCanFit);
      // How many bytes should we actually read
      bytesToRead = Math.min(
          checksumsToRead * bytesPerChecksum, // full chunks
          dataLeft); // in case we have a partial
    } else {
      // no checksum
      bytesToRead = Math.min(dataLeft, len);
      checksumsToRead = 0;
    }

    if ( bytesToRead > 0 ) {
      // Assert we have enough space
      assert bytesToRead <= len;
      assert checksumBytes.remaining() >= checksumSize * checksumsToRead;
      assert checksumBuf.length >= checksumSize * checksumsToRead;
      IOUtils.readFully(in, buf, offset, bytesToRead);
      checksumBytes.get(checksumBuf, 0, checksumSize * checksumsToRead);
    }

    dataLeft -= bytesToRead;
    assert dataLeft >= 0;

    lastChunkOffset = chunkOffset;
    lastChunkLen = bytesToRead;

    // If there's no data left in the current packet after satisfying
    // this read, and we have satisfied the client read, we expect
    // an empty packet header from the DN to signify this.
    // Note that pos + bytesToRead may in fact be greater since the
    // DN finishes off the entire last chunk.
    if (dataLeft == 0 &&
        pos + bytesToRead >= bytesNeededToFinish) {

      // Read header
      PacketHeader hdr = new PacketHeader();
      hdr.readFields(in);

      if (!hdr.isLastPacketInBlock() ||
          hdr.getDataLen() != 0) {
        throw new IOException("Expected empty end-of-read packet! Header: " +
            hdr);
      }

      eos = true;
    }

    if ( bytesToRead == 0 ) {
      return -1;
    }

    return bytesToRead;
  }

  private BlockReaderRemote(String file, String bpid, long blockId,
      DataInputStream in, DataChecksum checksum, boolean verifyChecksum,
      long startOffset, long firstChunkOffset, long bytesToRead, Peer peer,
      DatanodeID datanodeID, PeerCache peerCache, Tracer tracer,
      int networkDistance) {
    // Path is used only for printing block and file information in debug
    super(new Path("/" + Block.BLOCK_FILE_PREFIX + blockId +
            ":" + bpid + ":of:"+ file)/*too non path-like?*/,
        1, verifyChecksum,
        checksum.getChecksumSize() > 0? checksum : null,
        checksum.getBytesPerChecksum(),
        checksum.getChecksumSize());

    this.peer = peer;
    this.datanodeID = datanodeID;
    this.in = in;
    this.checksum = checksum;
    this.startOffset = Math.max( startOffset, 0 );
    this.blockId = blockId;

    // The total number of bytes that we need to transfer from the DN is
    // the amount that the user wants (bytesToRead), plus the padding at
    // the beginning in order to chunk-align. Note that the DN may elect
    // to send more than this amount if the read starts/ends mid-chunk.
    this.bytesNeededToFinish = bytesToRead + (startOffset - firstChunkOffset);

    this.firstChunkOffset = firstChunkOffset;
    lastChunkOffset = firstChunkOffset;
    lastChunkLen = -1;

    bytesPerChecksum = this.checksum.getBytesPerChecksum();
    checksumSize = this.checksum.getChecksumSize();
    this.peerCache = peerCache;
    this.tracer = tracer;
    this.networkDistance = networkDistance;
  }

  /**
   * Create a new BlockReader specifically to satisfy a read.
   * This method also sends the OP_READ_BLOCK request.
   *
   * @param file  File location
   * @param block  The block object
   * @param blockToken  The block token for security
   * @param startOffset  The read offset, relative to block head
   * @param len  The number of bytes to read
   * @param bufferSize  The IO buffer size (not the client buffer size)
   * @param verifyChecksum  Whether to verify checksum
   * @param clientName  Client name
   * @return New BlockReader instance, or null on error.
   */
  public static BlockReaderRemote newBlockReader(String file,
      ExtendedBlock block,
      Token blockToken,
      long startOffset, long len,
      int bufferSize, boolean verifyChecksum,
      String clientName, Peer peer,
      DatanodeID datanodeID,
      PeerCache peerCache,
      CachingStrategy cachingStrategy,
      Tracer tracer, int networkDistance)
      throws IOException {
    // in and out will be closed when sock is closed (by the caller)
    final DataOutputStream out =
        new DataOutputStream(new BufferedOutputStream(peer.getOutputStream()));
    new Sender(out).readBlock(block, blockToken, clientName, startOffset, len,
        verifyChecksum, cachingStrategy);

    //
    // Get bytes in block, set streams
    //

    DataInputStream in = new DataInputStream(
        new BufferedInputStream(peer.getInputStream(), bufferSize));

    BlockOpResponseProto status = BlockOpResponseProto.parseFrom(
        PBHelperClient.vintPrefixed(in));
    BlockReaderRemote2.checkSuccess(status, peer, block, file);
    ReadOpChecksumInfoProto checksumInfo =
        status.getReadOpChecksumInfo();
    DataChecksum checksum = DataTransferProtoUtil.fromProto(
        checksumInfo.getChecksum());
    //Warning when we get CHECKSUM_NULL?

    // Read the first chunk offset.
    long firstChunkOffset = checksumInfo.getChunkOffset();

    if ( firstChunkOffset < 0 || firstChunkOffset > startOffset ||
        firstChunkOffset <= (startOffset - checksum.getBytesPerChecksum())) {
      throw new IOException("BlockReader: error in first chunk offset (" +
          firstChunkOffset + ") startOffset is " +
          startOffset + " for file " + file);
    }

    return new BlockReaderRemote(file, block.getBlockPoolId(), block.getBlockId(),
        in, checksum, verifyChecksum, startOffset, firstChunkOffset, len,
        peer, datanodeID, peerCache, tracer, networkDistance);
  }

  @Override
  public synchronized void close() throws IOException {
    startOffset = -1;
    checksum = null;
    if (peerCache != null & sentStatusCode) {
      peerCache.put(datanodeID, peer);
    } else {
      peer.close();
    }

    // in will be closed when its Socket is closed.
  }

  @Override
  public void readFully(byte[] buf, int readOffset, int amtToRead)
      throws IOException {
    IOUtils.readFully(this, buf, readOffset, amtToRead);
  }

  @Override
  public int readAll(byte[] buf, int offset, int len) throws IOException {
    return readFully(this, buf, offset, len);
  }

  /**
   * When the reader reaches end of the read, it sends a status response
   * (e.g. CHECKSUM_OK) to the DN. Failure to do so could lead to the DN
   * closing our connection (which we will re-open), but won't affect
   * data correctness.
   */
  void sendReadResult(Peer peer, Status statusCode) {
    assert !sentStatusCode : "already sent status code to " + peer;
    try {
      BlockReaderRemote2.writeReadResult(peer.getOutputStream(), statusCode);
      sentStatusCode = true;
    } catch (IOException e) {
      // It's ok not to be able to send this. But something is probably wrong.
      LOG.info("Could not send read status (" + statusCode + ") to datanode " +
          peer.getRemoteAddressString() + ": " + e.getMessage());
    }
  }

  @Override
  public int read(ByteBuffer buf) throws IOException {
    throw new UnsupportedOperationException("readDirect unsupported in BlockReaderRemote");
  }

  @Override
  public int available() {
    // An optimistic estimate of how much data is available
    // to us without doing network I/O.
    return BlockReaderRemote2.TCP_WINDOW_SIZE;
  }

  @Override
  public boolean isShortCircuit() {
    return false;
  }

  @Override
  public ClientMmap getClientMmap(EnumSet opts) {
    return null;
  }

  @Override
  public int getNetworkDistance() {
    return networkDistance;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy