All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.qubole.rubix.core.CachedReadRequestChain Maven / Gradle / Ivy

There is a newer version: 0.3.20
Show newest version
/**
 * Copyright (c) 2019. Qubole Inc
 * Licensed under the Apache License, Version 2.0 (the License);
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an AS IS BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License. See accompanying LICENSE file.
 */
package com.qubole.rubix.core;

import com.google.common.annotations.VisibleForTesting;
import com.qubole.rubix.common.metrics.CachingFileSystemMetrics;
import com.qubole.rubix.common.metrics.CustomMetricsReporterProvider;
import com.qubole.rubix.spi.BookKeeperFactory;
import com.qubole.rubix.spi.CacheUtil;
import com.qubole.rubix.spi.RetryingPooledBookkeeperClient;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSDataInputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.util.DirectBufferPool;

import java.io.FileInputStream;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;

import static com.google.common.base.Preconditions.checkState;
import static com.qubole.rubix.spi.CacheUtil.UNKONWN_GENERATION_NUMBER;

/**
 * Created by stagra on 4/1/16.
 */
public class CachedReadRequestChain extends ReadRequestChain
{
  private String remotePath;
  private long readFromCache; // data read
  private FileSystem.Statistics statistics;
  private FileSystem remoteFileSystem;
  private DirectReadRequestChain directReadChain;
  private Configuration conf;
  private long directDataRead;
  private BookKeeperFactory factory;

  private DirectBufferPool bufferPool;
  private int directBufferSize;
  private int corruptedFileCount;

  private static final Log log = LogFactory.getLog(CachedReadRequestChain.class);

  public CachedReadRequestChain(FileSystem remoteFileSystem,
      String remotePath,
      DirectBufferPool bufferPool,
      int directBufferSize,
      FileSystem.Statistics statistics,
      Configuration conf,
      BookKeeperFactory factory,
      int generationNumber)
  {
    super(generationNumber);
    this.conf = conf;
    this.remotePath = remotePath;
    this.remoteFileSystem = remoteFileSystem;
    this.bufferPool = bufferPool;
    this.directBufferSize = directBufferSize;
    this.statistics = statistics;
    this.factory = factory;
  }

  @VisibleForTesting
  public CachedReadRequestChain(FileSystem remoteFileSystem, String remotePath, Configuration conf, BookKeeperFactory factory, int generationNumber)
  {
    this(remoteFileSystem, remotePath, new DirectBufferPool(), 100, null, conf, factory, generationNumber);
  }

  @VisibleForTesting
  public CachedReadRequestChain()
  {
    //Dummy constructor for testing #testConsequtiveRequest method.
    super(UNKONWN_GENERATION_NUMBER);
  }

  public Long call() throws IOException
  {
    // TODO: any exception here should not cause workload to fail
    // rather should be retried and eventually read from backend
    log.debug(String.format("Read Request threadName: %s, Cached read Executor threadName: %s", threadName, Thread.currentThread().getName()));
    Thread.currentThread().setName(threadName);

    if (readRequests.size() == 0) {
      return 0L;
    }

    checkState(isLocked, "Trying to execute Chain without locking");

    RandomAccessFile raf = null;
    FileInputStream fis = null;
    FileChannel fileChannel = null;
    boolean needsInvalidation = false;
    String localCachedFile = CacheUtil.getLocalPath(remotePath, conf, generationNumber);

    ByteBuffer directBuffer = bufferPool.getBuffer(directBufferSize);
    try {
      raf = new RandomAccessFile(localCachedFile, "r");
      fis = new FileInputStream(raf.getFD());
      fileChannel = fis.getChannel();

      for (ReadRequest readRequest : readRequests) {
        if (cancelled) {
          propagateCancel(this.getClass().getName());
        }
        int nread = 0;
        int leftToRead = readRequest.getActualReadLengthIntUnsafe();
        log.debug(String.format("Processing readrequest %d-%d, length %d", readRequest.actualReadStart, readRequest.actualReadEnd, leftToRead));
        while (nread < readRequest.getActualReadLengthIntUnsafe()) {
          int readInThisCycle = Math.min(leftToRead, directBuffer.capacity());
          directBuffer.clear();
          int nbytes = fileChannel.read(directBuffer, readRequest.getActualReadStart() + nread);
          if (nbytes <= 0) {
            break;
          }
          directBuffer.flip();
          int transferBytes = Math.min(readInThisCycle, nbytes);
          directBuffer.get(readRequest.getDestBuffer(), readRequest.getDestBufferOffset() + nread, transferBytes);
          leftToRead -= transferBytes;
          nread += transferBytes;
        }
        log.debug(String.format("CachedFileRead copied data [%d - %d] at buffer offset %d",
                readRequest.getActualReadStart(),
                readRequest.getActualReadStart() + nread,
                readRequest.getDestBufferOffset()));

        if (nread != readRequest.getActualReadLengthIntUnsafe()) {
          throw new InvalidationRequiredException("Cached read length didn't match with requested read length for file");
        }
        else {
          readFromCache += nread;
        }
      }
      log.debug(String.format("Read %d bytes from cached file", readFromCache));
    }
    catch (Exception ex) {
      if (ex instanceof CancelledException) {
        throw ex;
      }
      log.error(String.format("Fall back to read from object store for %s .Could not read data from cached file : ", localCachedFile), ex);
      CustomMetricsReporterProvider.getCustomMetricsReporter().addMetric(CachingFileSystemMetrics.LOCAL_FALLBACK_TO_DIRECT_READ);
      needsInvalidation = true;
      directDataRead = readFromRemoteFileSystem();
      return directDataRead;
    }
    finally {
      bufferPool.returnBuffer(directBuffer);

      if (fis != null) {
        fis.close();
      }
      if (fileChannel != null) {
        fileChannel.close();
      }
      if (raf != null) {
        raf.close();
      }

      // We are calling invalidateMetadata from finally block to make sure fileChannel is closed before we delete the file
      if (needsInvalidation) {
        corruptedFileCount++;
        invalidateMetadata();
      }

      if (statistics != null) {
        statistics.incrementBytesRead(readFromCache);
      }
    }
    return readFromCache;
  }

  @Override
  public void cancel()
  {
    super.cancel();
    if (directReadChain != null) {
      directReadChain.cancel();
    }
  }

  private void invalidateMetadata()
  {
    try (RetryingPooledBookkeeperClient client = factory.createBookKeeperClient(conf)) {
      client.invalidateFileMetadata(remotePath);
    }
    catch (Exception e) {
      log.error("Could not Invalidate Corrupted File " + remotePath + " Error : ", e);
    }
  }

  private long readFromRemoteFileSystem() throws IOException
  {
    // Setting the cached read data to zero as we are reading the whole request from remote object store
    readFromCache = 0;

    if (cancelled) {
      return 0;
    }

    try (FSDataInputStream inputStream = remoteFileSystem.open(new Path(remotePath))) {
      directReadChain = new DirectReadRequestChain(inputStream);
      for (ReadRequest readRequest : readRequests) {
        directReadChain.addReadRequest(readRequest);
      }
      directReadChain.lock();
      long directRead = directReadChain.call();
      directReadChain = null;
      return directRead;
    }
  }

  public ReadRequestChainStats getStats()
  {
    return new ReadRequestChainStats()
            .setCachedRRCDataRead(directDataRead == 0 ? readFromCache : 0)  // If read directly then do not report it under cached reads
            .setCachedRRCRequests(directDataRead == 0 ? requests : 0)
            .setDirectRRCDataRead(directDataRead)
            .setDirectRRCRequests(directDataRead == 0 ? 0 : requests)
            .setCorruptedFileCount(corruptedFileCount);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy