All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.fs.swift.snative.SwiftNativeOutputStream Maven / Gradle / Ivy

Go to download

This module contains code to support integration with OpenStack. Currently this consists of a filesystem client to read data from and write data to an OpenStack Swift object store.

The newest version!
/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.fs.swift.snative;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.fs.swift.exceptions.SwiftConnectionClosedException;
import org.apache.hadoop.fs.swift.exceptions.SwiftException;
import org.apache.hadoop.fs.swift.exceptions.SwiftInternalStateException;
import org.apache.hadoop.fs.swift.util.SwiftUtils;

import java.io.BufferedOutputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.OutputStream;

/**
 * Output stream, buffers data on local disk.
 * Writes to Swift on the close() method, unless the
 * file is significantly large that it is being written as partitions.
 * In this case, the first partition is written on the first write that puts
 * data over the partition, as may later writes. The close() then causes
 * the final partition to be written, along with a partition manifest.
 */
class SwiftNativeOutputStream extends OutputStream {
  public static final int ATTEMPT_LIMIT = 3;
  private long filePartSize;
  private static final Logger LOG =
      LoggerFactory.getLogger(SwiftNativeOutputStream.class);
  private Configuration conf;
  private String key;
  private File backupFile;
  private OutputStream backupStream;
  private SwiftNativeFileSystemStore nativeStore;
  private boolean closed;
  private int partNumber;
  private long blockOffset;
  private long bytesWritten;
  private long bytesUploaded;
  private boolean partUpload = false;
  final byte[] oneByte = new byte[1];

  /**
   * Create an output stream
   * @param conf configuration to use
   * @param nativeStore native store to write through
   * @param key the key to write
   * @param partSizeKB the partition size
   * @throws IOException
   */
  @SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
  public SwiftNativeOutputStream(Configuration conf,
                                 SwiftNativeFileSystemStore nativeStore,
                                 String key,
                                 long partSizeKB) throws IOException {
    this.conf = conf;
    this.key = key;
    this.backupFile = newBackupFile();
    this.nativeStore = nativeStore;
    this.backupStream = new BufferedOutputStream(new FileOutputStream(backupFile));
    this.partNumber = 1;
    this.blockOffset = 0;
    this.filePartSize = 1024L * partSizeKB;
  }

  private File newBackupFile() throws IOException {
    File dir = new File(conf.get("hadoop.tmp.dir"));
    if (!dir.mkdirs() && !dir.exists()) {
      throw new SwiftException("Cannot create Swift buffer directory: " + dir);
    }
    File result = File.createTempFile("output-", ".tmp", dir);
    result.deleteOnExit();
    return result;
  }

  /**
   * Flush the local backing stream.
   * This does not trigger a flush of data to the remote blobstore.
   * @throws IOException
   */
  @Override
  public void flush() throws IOException {
    backupStream.flush();
  }

  /**
   * check that the output stream is open
   *
   * @throws SwiftException if it is not
   */
  private synchronized void verifyOpen() throws SwiftException {
    if (closed) {
      throw new SwiftConnectionClosedException();
    }
  }

  /**
   * Close the stream. This will trigger the upload of all locally cached
   * data to the remote blobstore.
   * @throws IOException IO problems uploading the data.
   */
  @Override
  public synchronized void close() throws IOException {
    if (closed) {
      return;
    }

    try {
      closed = true;
      //formally declare as closed.
      backupStream.close();
      backupStream = null;
      Path keypath = new Path(key);
      if (partUpload) {
        partUpload(true);
        nativeStore.createManifestForPartUpload(keypath);
      } else {
        uploadOnClose(keypath);
      }
    } finally {
      delete(backupFile);
      backupFile = null;
    }
    assert backupStream == null: "backup stream has been reopened";
  }

  /**
   * Upload a file when closed, either in one go, or, if the file is
   * already partitioned, by uploading the remaining partition and a manifest.
   * @param keypath key as a path
   * @throws IOException IO Problems
   */
  private void uploadOnClose(Path keypath) throws IOException {
    boolean uploadSuccess = false;
    int attempt = 0;
    while (!uploadSuccess) {
      try {
        ++attempt;
        bytesUploaded += uploadFileAttempt(keypath, attempt);
        uploadSuccess = true;
      } catch (IOException e) {
        LOG.info("Upload failed " + e, e);
        if (attempt > ATTEMPT_LIMIT) {
          throw e;
        }
      }
    }
}

  @SuppressWarnings("IOResourceOpenedButNotSafelyClosed")
  private long uploadFileAttempt(Path keypath, int attempt) throws IOException {
    long uploadLen = backupFile.length();
    SwiftUtils.debug(LOG, "Closing write of file %s;" +
                          " localfile=%s of length %d - attempt %d",
                     key,
                     backupFile,
                     uploadLen,
                     attempt);

    nativeStore.uploadFile(keypath,
                           new FileInputStream(backupFile),
                           uploadLen);
    return uploadLen;
  }

  @Override
  protected void finalize() throws Throwable {
    if(!closed) {
      LOG.warn("stream not closed");
    }
    if (backupFile != null) {
      LOG.warn("Leaking backing file " + backupFile);
    }
  }

  private void delete(File file) {
    if (file != null) {
      SwiftUtils.debug(LOG, "deleting %s", file);
      if (!file.delete()) {
        LOG.warn("Could not delete " + file);
      }
    }
  }

  @Override
  public void write(int b) throws IOException {
    //insert to a one byte array
    oneByte[0] = (byte) b;
    //then delegate to the array writing routine
    write(oneByte, 0, 1);
  }

  @Override
  public synchronized void write(byte[] buffer, int offset, int len) throws
                                                                     IOException {
    //validate args
    if (offset < 0 || len < 0 || (offset + len) > buffer.length) {
      throw new IndexOutOfBoundsException("Invalid offset/length for write");
    }
    //validate the output stream
    verifyOpen();
    SwiftUtils.debug(LOG, " write(offset=%d, len=%d)", offset, len);

    // if the size of file is greater than the partition limit
    while (blockOffset + len >= filePartSize) {
      // - then partition the blob and upload as many partitions
      // are needed.
      //how many bytes to write for this partition.
      int subWriteLen = (int) (filePartSize - blockOffset);
      if (subWriteLen < 0 || subWriteLen > len) {
        throw new SwiftInternalStateException("Invalid subwrite len: "
                                              + subWriteLen
                                              + " -buffer len: " + len);
      }
      writeToBackupStream(buffer, offset, subWriteLen);
      //move the offset along and length down
      offset += subWriteLen;
      len -= subWriteLen;
      //now upload the partition that has just been filled up
      // (this also sets blockOffset=0)
      partUpload(false);
    }
    //any remaining data is now written
    writeToBackupStream(buffer, offset, len);
  }

  /**
   * Write to the backup stream.
   * Guarantees:
   * 
    *
  1. backupStream is open
  2. *
  3. blockOffset + len < filePartSize
  4. *
* @param buffer buffer to write * @param offset offset in buffer * @param len length of write. * @throws IOException backup stream write failing */ private void writeToBackupStream(byte[] buffer, int offset, int len) throws IOException { assert len >= 0 : "remainder to write is negative"; SwiftUtils.debug(LOG," writeToBackupStream(offset=%d, len=%d)", offset, len); if (len == 0) { //no remainder -downgrade to no-op return; } //write the new data out to the backup stream backupStream.write(buffer, offset, len); //increment the counters blockOffset += len; bytesWritten += len; } /** * Upload a single partition. This deletes the local backing-file, * and re-opens it to create a new one. * @param closingUpload is this the final upload of an upload * @throws IOException on IO problems */ @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") private void partUpload(boolean closingUpload) throws IOException { if (backupStream != null) { backupStream.close(); } if (closingUpload && partUpload && backupFile.length() == 0) { //skipping the upload if // - it is close time // - the final partition is 0 bytes long // - one part has already been written SwiftUtils.debug(LOG, "skipping upload of 0 byte final partition"); delete(backupFile); } else { partUpload = true; boolean uploadSuccess = false; int attempt = 0; while(!uploadSuccess) { try { ++attempt; bytesUploaded += uploadFilePartAttempt(attempt); uploadSuccess = true; } catch (IOException e) { LOG.info("Upload failed " + e, e); if (attempt > ATTEMPT_LIMIT) { throw e; } } } delete(backupFile); partNumber++; blockOffset = 0; if (!closingUpload) { //if not the final upload, create a new output stream backupFile = newBackupFile(); backupStream = new BufferedOutputStream(new FileOutputStream(backupFile)); } } } @SuppressWarnings("IOResourceOpenedButNotSafelyClosed") private long uploadFilePartAttempt(int attempt) throws IOException { long uploadLen = backupFile.length(); SwiftUtils.debug(LOG, "Uploading part %d of file %s;" + " localfile=%s of length %d - attempt %d", partNumber, key, backupFile, uploadLen, attempt); nativeStore.uploadFilePart(new Path(key), partNumber, new FileInputStream(backupFile), uploadLen); return uploadLen; } /** * Get the file partition size * @return the partition size */ long getFilePartSize() { return filePartSize; } /** * Query the number of partitions written * This is intended for testing * @return the of partitions already written to the remote FS */ synchronized int getPartitionsWritten() { return partNumber - 1; } /** * Get the number of bytes written to the output stream. * This should always be less than or equal to bytesUploaded. * @return the number of bytes written to this stream */ long getBytesWritten() { return bytesWritten; } /** * Get the number of bytes uploaded to remote Swift cluster. * bytesUploaded -bytesWritten = the number of bytes left to upload * @return the number of bytes written to the remote endpoint */ long getBytesUploaded() { return bytesUploaded; } @Override public String toString() { return "SwiftNativeOutputStream{" + ", key='" + key + '\'' + ", backupFile=" + backupFile + ", closed=" + closed + ", filePartSize=" + filePartSize + ", partNumber=" + partNumber + ", blockOffset=" + blockOffset + ", partUpload=" + partUpload + ", nativeStore=" + nativeStore + ", bytesWritten=" + bytesWritten + ", bytesUploaded=" + bytesUploaded + '}'; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy