All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hdfs.shortcircuit.ShortCircuitReplica Maven / Gradle / Ivy

There is a newer version: 3.2.0-9
Show newest version
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.hadoop.hdfs.shortcircuit;

import java.io.FileInputStream;
import java.io.IOException;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
import java.nio.channels.FileChannel.MapMode;

import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.hdfs.ExtendedBlockId;
import org.apache.hadoop.hdfs.server.datanode.BlockMetadataHeader;
import org.apache.hadoop.hdfs.shortcircuit.ShortCircuitShm.Slot;
import org.apache.hadoop.hdfs.util.IOUtilsClient;
import org.apache.hadoop.io.nativeio.NativeIO;
import org.apache.hadoop.util.Time;

import io.prestosql.hadoop.$internal.com.google.common.annotations.VisibleForTesting;
import io.prestosql.hadoop.$internal.com.google.common.base.Preconditions;

import io.prestosql.hadoop.$internal.org.slf4j.Logger;
import io.prestosql.hadoop.$internal.org.slf4j.LoggerFactory;

/**
 * A ShortCircuitReplica object contains file descriptors for a block that
 * we are reading via short-circuit local reads.
 *
 * The file descriptors can be shared between multiple threads because
 * all the operations we perform are stateless-- i.e., we use pread
 * instead of read, to avoid using the shared position state.
 */
@InterfaceAudience.Private
public class ShortCircuitReplica {
  public static final Logger LOG = LoggerFactory.getLogger(
      ShortCircuitCache.class);

  /**
   * Identifies this ShortCircuitReplica object.
   */
  final ExtendedBlockId key;

  /**
   * The block data input stream.
   */
  private final FileInputStream dataStream;

  /**
   * The block metadata input stream.
   *
   * TODO: make this nullable if the file has no checksums on disk.
   */
  private final FileInputStream metaStream;

  /**
   * Block metadata header.
   */
  private final BlockMetadataHeader metaHeader;

  /**
   * The cache we belong to.
   */
  private final ShortCircuitCache cache;

  /**
   * Monotonic time at which the replica was created.
   */
  private final long creationTimeMs;

  /**
   * If non-null, the shared memory slot associated with this replica.
   */
  private final Slot slot;

  /**
   * Current mmap state.
   *
   * Protected by the cache lock.
   */
  Object mmapData;

  /**
   * True if this replica has been purged from the cache; false otherwise.
   *
   * Protected by the cache lock.
   */
  boolean purged = false;

  /**
   * Number of external references to this replica.  Replicas are referenced
   * by the cache, BlockReaderLocal instances, and by ClientMmap instances.
   * The number starts at 2 because when we create a replica, it is referenced
   * by both the cache and the requester.
   *
   * Protected by the cache lock.
   */
  int refCount = 2;

  /**
   * The monotonic time in nanoseconds at which the replica became evictable, or
   * null if it is not evictable.
   *
   * Protected by the cache lock.
   */
  private Long evictableTimeNs = null;

  public ShortCircuitReplica(ExtendedBlockId key,
      FileInputStream dataStream, FileInputStream metaStream,
      ShortCircuitCache cache, long creationTimeMs, Slot slot) throws IOException {
    this.key = key;
    this.dataStream = dataStream;
    this.metaStream = metaStream;
    this.metaHeader =
          BlockMetadataHeader.preadHeader(metaStream.getChannel());
    if (metaHeader.getVersion() != 1) {
      throw new IOException("invalid metadata header version " +
          metaHeader.getVersion() + ".  Can only handle version 1.");
    }
    this.cache = cache;
    this.creationTimeMs = creationTimeMs;
    this.slot = slot;
  }

  /**
   * Decrement the reference count.
   */
  public void unref() {
    cache.unref(this);
  }

  /**
   * Check if the replica is stale.
   *
   * Must be called with the cache lock held.
   */
  boolean isStale() {
    if (slot != null) {
      // Check staleness by looking at the shared memory area we use to
      // communicate with the DataNode.
      boolean stale = !slot.isValid();
      LOG.trace("{}: checked shared memory segment.  isStale={}", this, stale);
      return stale;
    } else {
      // Fall back to old, time-based staleness method.
      long deltaMs = Time.monotonicNow() - creationTimeMs;
      long staleThresholdMs = cache.getStaleThresholdMs();
      if (deltaMs > staleThresholdMs) {
        LOG.trace("{} is stale because it's {} ms old and staleThreadholdMS={}",
            this, deltaMs, staleThresholdMs);
        return true;
      } else {
        LOG.trace("{} is not stale because it's only {} ms old "
            + "and staleThresholdMs={}",  this, deltaMs, staleThresholdMs);
        return false;
      }
    }
  }

  /**
   * Try to add a no-checksum anchor to our shared memory slot.
   *
   * It is only possible to add this anchor when the block is mlocked on the Datanode.
   * The DataNode will not munlock the block until the number of no-checksum anchors
   * for the block reaches zero.
   *
   * This method does not require any synchronization.
   *
   * @return     True if we successfully added a no-checksum anchor.
   */
  public boolean addNoChecksumAnchor() {
    if (slot == null) {
      return false;
    }
    boolean result = slot.addAnchor();
    LOG.trace("{}: {} no-checksum anchor to slot {}",
        this, result ? "added" : "could not add", slot);
    return result;
  }

  /**
   * Remove a no-checksum anchor for our shared memory slot.
   *
   * This method does not require any synchronization.
   */
  public void removeNoChecksumAnchor() {
    if (slot != null) {
      slot.removeAnchor();
    }
  }

  /**
   * Check if the replica has an associated mmap that has been fully loaded.
   *
   * Must be called with the cache lock held.
   */
  @VisibleForTesting
  public boolean hasMmap() {
    return ((mmapData != null) && (mmapData instanceof MappedByteBuffer));
  }

  /**
   * Free the mmap associated with this replica.
   *
   * Must be called with the cache lock held.
   */
  void munmap() {
    MappedByteBuffer mmap = (MappedByteBuffer)mmapData;
    NativeIO.POSIX.munmap(mmap);
    mmapData = null;
  }

  /**
   * Close the replica.
   *
   * Must be called after there are no more references to the replica in the
   * cache or elsewhere.
   */
  void close() {
    String suffix = "";

    Preconditions.checkState(refCount == 0,
        "tried to close replica with refCount %d: %s", refCount, this);
    refCount = -1;
    Preconditions.checkState(purged,
        "tried to close unpurged replica %s", this);
    if (hasMmap()) {
      munmap();
      if (LOG.isTraceEnabled()) {
        suffix += "  munmapped.";
      }
    }
    IOUtilsClient.cleanupWithLogger(LOG, dataStream, metaStream);
    if (slot != null) {
      cache.scheduleSlotReleaser(slot);
      if (LOG.isTraceEnabled()) {
        suffix += "  scheduling " + slot + " for later release.";
      }
    }
    LOG.trace("closed {}{}", this, suffix);
  }

  public FileInputStream getDataStream() {
    return dataStream;
  }

  public FileInputStream getMetaStream() {
    return metaStream;
  }

  public BlockMetadataHeader getMetaHeader() {
    return metaHeader;
  }

  public ExtendedBlockId getKey() {
    return key;
  }

  public ClientMmap getOrCreateClientMmap(boolean anchor) {
    return cache.getOrCreateClientMmap(this, anchor);
  }

  MappedByteBuffer loadMmapInternal() {
    try {
      FileChannel channel = dataStream.getChannel();
      MappedByteBuffer mmap = channel.map(MapMode.READ_ONLY, 0,
          Math.min(Integer.MAX_VALUE, channel.size()));
      LOG.trace("{}: created mmap of size {}", this, channel.size());
      return mmap;
    } catch (IOException e) {
      LOG.warn(this + ": mmap error", e);
      return null;
    } catch (RuntimeException e) {
      LOG.warn(this + ": mmap error", e);
      return null;
    }
  }

  /**
   * Get the evictable time in nanoseconds.
   *
   * Note: you must hold the cache lock to call this function.
   *
   * @return the evictable time in nanoseconds.
   */
  public Long getEvictableTimeNs() {
    return evictableTimeNs;
  }

  /**
   * Set the evictable time in nanoseconds.
   *
   * Note: you must hold the cache lock to call this function.
   *
   * @param evictableTimeNs   The evictable time in nanoseconds, or null
   *                          to set no evictable time.
   */
  void setEvictableTimeNs(Long evictableTimeNs) {
    this.evictableTimeNs = evictableTimeNs;
  }

  @VisibleForTesting
  public Slot getSlot() {
    return slot;
  }

  /**
   * Convert the replica to a string for debugging purposes.
   * Note that we can't take the lock here.
   */
  @Override
  public String toString() {
    return "ShortCircuitReplica{" + "key=" + key
        + ", metaHeader.version=" + metaHeader.getVersion()
        + ", metaHeader.checksum=" + metaHeader.getChecksum()
        + ", ident=" + "0x" + Integer.toHexString(System.identityHashCode(this))
        + ", creationTimeMs=" + creationTimeMs + "}";
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy