All Downloads are FREE. Search and download functionalities are using the official Maven repository.

alluxio.underfs.obs.OBSUnderFileSystem Maven / Gradle / Ivy

There is a newer version: 313
Show newest version
/*
 * The Alluxio Open Foundation licenses this work under the Apache License, version 2.0
 * (the "License"). You may not use this work except in compliance with the License, which is
 * available at www.apache.org/licenses/LICENSE-2.0
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND,
 * either express or implied, as more fully set forth in the License.
 *
 * See the NOTICE file distributed with this work for information regarding copyright ownership.
 */

package alluxio.underfs.obs;

import alluxio.AlluxioURI;
import alluxio.Constants;
import alluxio.PositionReader;
import alluxio.conf.PropertyKey;
import alluxio.retry.RetryPolicy;
import alluxio.underfs.ObjectUnderFileSystem;
import alluxio.underfs.UfsFileStatus;
import alluxio.underfs.UfsStatus;
import alluxio.underfs.UnderFileSystem;
import alluxio.underfs.UnderFileSystemConfiguration;
import alluxio.underfs.options.OpenOptions;
import alluxio.util.UnderFileSystemUtils;
import alluxio.util.executor.ExecutorServiceFactories;
import alluxio.util.io.PathUtils;

import com.google.common.base.Preconditions;
import com.google.common.base.Suppliers;
import com.google.common.util.concurrent.ListeningExecutorService;
import com.google.common.util.concurrent.MoreExecutors;
import com.obs.services.ObsClient;
import com.obs.services.exception.ObsException;
import com.obs.services.model.AbortMultipartUploadRequest;
import com.obs.services.model.DeleteObjectsRequest;
import com.obs.services.model.DeleteObjectsResult;
import com.obs.services.model.KeyAndVersion;
import com.obs.services.model.ListMultipartUploadsRequest;
import com.obs.services.model.ListObjectsRequest;
import com.obs.services.model.MultipartUpload;
import com.obs.services.model.MultipartUploadListing;
import com.obs.services.model.ObjectListing;
import com.obs.services.model.ObjectMetadata;
import com.obs.services.model.ObsObject;
import com.obs.services.model.SetObjectMetadataRequest;
import com.obs.services.model.fs.RenameRequest;
import com.obs.services.model.fs.RenameResult;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.function.Supplier;
import java.util.stream.Collectors;
import javax.annotation.concurrent.ThreadSafe;

/**
 * Huawei OBS {@link UnderFileSystem} implementation.
 */
@ThreadSafe
public class OBSUnderFileSystem extends ObjectUnderFileSystem {
  private static final Logger LOG = LoggerFactory.getLogger(OBSUnderFileSystem.class);

  /**
   * Suffix for an empty file to flag it as a directory.
   */
  private static final String FOLDER_SUFFIX = PATH_SEPARATOR;

  /**
   * Huawei OBS client.
   */
  private final ObsClient mClient;

  /**
   * Bucket name of user's configured Alluxio bucket.
   */
  private final String mBucketName;

  private final String mBucketType;

  /** The executor service for the streaming upload. */
  private final Supplier mStreamingUploadExecutor;

  /** The executor service for the multipart upload. */
  private final Supplier mMultipartUploadExecutor;

  /**
   * Constructs a new instance of {@link OBSUnderFileSystem}.
   *
   * @param uri the {@link AlluxioURI} for this UFS
   * @param conf the configuration for this UFS
   * @return the created {@link OBSUnderFileSystem} instance
   */
  public static OBSUnderFileSystem createInstance(AlluxioURI uri,
      UnderFileSystemConfiguration conf) {
    Preconditions.checkArgument(conf.isSet(PropertyKey.OBS_ACCESS_KEY),
        "Property %s is required to connect to OBS", PropertyKey.OBS_ACCESS_KEY);
    Preconditions.checkArgument(conf.isSet(PropertyKey.OBS_SECRET_KEY),
        "Property %s is required to connect to OBS", PropertyKey.OBS_SECRET_KEY);
    Preconditions.checkArgument(conf.isSet(PropertyKey.OBS_ENDPOINT),
        "Property %s is required to connect to OBS", PropertyKey.OBS_ENDPOINT);
    Preconditions.checkArgument(conf.isSet(PropertyKey.OBS_BUCKET_TYPE),
        "Property %s is required to connect to OBS", PropertyKey.OBS_BUCKET_TYPE);
    String accessKey = conf.getString(PropertyKey.OBS_ACCESS_KEY);
    String secretKey = conf.getString(PropertyKey.OBS_SECRET_KEY);
    String endPoint = conf.getString(PropertyKey.OBS_ENDPOINT);
    String bucketType = conf.getString(PropertyKey.OBS_BUCKET_TYPE);

    ObsClient obsClient = new ObsClientExt(accessKey, secretKey, endPoint,
        conf.getMountSpecificConf());
    String bucketName = UnderFileSystemUtils.getBucketName(uri);
    return new OBSUnderFileSystem(uri, obsClient, bucketName, bucketType, conf);
  }

  /**
   * Constructor for {@link OBSUnderFileSystem}.
   *
   * @param uri the {@link AlluxioURI} for this UFS
   * @param obsClient Huawei OBS client
   * @param bucketName bucket name of user's configured Alluxio bucket
   * @param conf configuration for this UFS
   */
  protected OBSUnderFileSystem(AlluxioURI uri, ObsClient obsClient, String bucketName,
      String bucketType, UnderFileSystemConfiguration conf) {
    super(uri, conf);
    mClient = obsClient;
    mBucketName = bucketName;
    mBucketType = bucketType;

    // Initialize the executor service for the streaming upload.
    mStreamingUploadExecutor = Suppliers.memoize(() -> {
      int numTransferThreads =
          conf.getInt(PropertyKey.UNDERFS_OBS_STREAMING_UPLOAD_THREADS);
      ExecutorService service = ExecutorServiceFactories
          .fixedThreadPool("alluxio-obs-streaming-upload-worker",
              numTransferThreads).create();
      return MoreExecutors.listeningDecorator(service);
    });

    // Initialize the executor service for the multipart upload.
    mMultipartUploadExecutor = Suppliers.memoize(() -> {
      int numTransferThreads =
          conf.getInt(PropertyKey.UNDERFS_OBS_MULTIPART_UPLOAD_THREADS);
      ExecutorService service = ExecutorServiceFactories
          .fixedThreadPool("alluxio-obs-multipart-upload-worker",
              numTransferThreads).create();
      return MoreExecutors.listeningDecorator(service);
    });
  }

  @Override
  public void cleanup() {
    long cleanAge = mUfsConf.getMs(PropertyKey.UNDERFS_OBS_INTERMEDIATE_UPLOAD_CLEAN_AGE);
    Date cleanBefore = new Date(new Date().getTime() - cleanAge);
    MultipartUploadListing uploadListing = mClient.listMultipartUploads(
        new ListMultipartUploadsRequest(mBucketName));
    do {
      for (MultipartUpload upload : uploadListing.getMultipartTaskList()) {
        if (upload.getInitiatedDate().compareTo(cleanBefore) < 0) {
          mClient.abortMultipartUpload(new AbortMultipartUploadRequest(
              mBucketName, upload.getObjectKey(), upload.getUploadId()));
        }
      }
      ListMultipartUploadsRequest request = new ListMultipartUploadsRequest(mBucketName);
      request.setUploadIdMarker(uploadListing.getNextUploadIdMarker());
      request.setKeyMarker(uploadListing.getKeyMarker());
      uploadListing = mClient.listMultipartUploads(request);
    } while (uploadListing.isTruncated());
  }

  @Override
  public String getUnderFSType() {
    return "obs";
  }

  @Override
  public PositionReader openPositionRead(String path, long fileLength) {
    return new OBSPositionReader(mClient, mBucketName, stripPrefixIfPresent(path), fileLength);
  }

  // No ACL integration currently, no-op
  @Override
  public void setOwner(String path, String user, String group) {
  }

  @Override
  public void setObjectTagging(String path, String name, String value) throws IOException {
    ObjectMetadata metadata = mClient.getObjectMetadata(mBucketName, path);
    SetObjectMetadataRequest request = new SetObjectMetadataRequest(mBucketName, path);
    // It's a read-and-update race condition. When there is a competitive conflict scenario,
    // it may lead to inconsistent final results. The final conflict occurs in UFS,
    // UFS will determine the final result.
    for (Map.Entry meta : metadata.getMetadata().entrySet()) {
      request.addUserMetadata(meta.getKey(), String.valueOf(meta.getValue()));
    }
    request.addUserMetadata(name, value);
    mClient.setObjectMetadata(request);
  }

  @Override
  public Map getObjectTags(String path) throws IOException {
    try {
      ObjectMetadata metadata = mClient.getObjectMetadata(mBucketName, path);
      return Collections.unmodifiableMap(
          metadata.getMetadata().entrySet().stream().collect(HashMap::new,
              (map, entry) -> map.put(entry.getKey(), String.valueOf(entry.getValue())),
              HashMap::putAll));
    } catch (ObsException e) {
      throw new IOException("Failed to get attribute of the object", e);
    }
  }

  // No ACL integration currently, no-op
  @Override
  public void setMode(String path, short mode) throws IOException {
  }

  @Override
  protected boolean copyObject(String src, String dst) {
    try {
      LOG.debug("Copying {} to {}", src, dst);
      mClient.copyObject(mBucketName, src, mBucketName, dst);
      return true;
    } catch (ObsException e) {
      LOG.error("Failed to rename file {} to {}", src, dst, e);
      System.out.println("Failed to rename file " + src + " execption:" + e);
      return false;
    }
  }

  @Override
  public boolean createEmptyObject(String key) {
    try {
      ObjectMetadata objMeta = new ObjectMetadata();
      objMeta.setContentLength(0L);
      mClient.putObject(mBucketName, key, new ByteArrayInputStream(new byte[0]), objMeta);
      return true;
    } catch (ObsException e) {
      LOG.error("Failed to create object: {}", key, e);
      return false;
    }
  }

  @Override
  protected OutputStream createObject(String key) throws IOException {
    if (mUfsConf.getBoolean(PropertyKey.UNDERFS_OBS_STREAMING_UPLOAD_ENABLED)) {
      return new OBSLowLevelOutputStream(mBucketName, key, mClient,
          mStreamingUploadExecutor.get(), mUfsConf);
    }
    else if (mUfsConf.getBoolean(PropertyKey.UNDERFS_OBS_MULTIPART_UPLOAD_ENABLED)) {
      return new OBSMultipartUploadOutputStream(mBucketName, key, mClient,
          mMultipartUploadExecutor.get(), mUfsConf);
    }
    return new OBSOutputStream(mBucketName, key, mClient,
        mUfsConf.getList(PropertyKey.TMP_DIRS));
  }

  @Override
  protected boolean deleteObject(String key) {
    try {
      mClient.deleteObject(mBucketName, key);
    } catch (ObsException e) {
      LOG.error("Failed to delete {}", key, e);
      return false;
    }
    return true;
  }

  @Override
  protected List deleteObjects(List keys) throws IOException {
    KeyAndVersion[] kvs = keys.stream()
        .map(KeyAndVersion::new)
        .toArray(KeyAndVersion[]::new);
    DeleteObjectsRequest request = new DeleteObjectsRequest(mBucketName, false, kvs);
    try {
      DeleteObjectsResult result = mClient.deleteObjects(request);
      return result.getDeletedObjectResults()
          .stream()
          .map(DeleteObjectsResult.DeleteObjectResult::getObjectKey)
          .collect(Collectors.toList());
    } catch (ObsException e) {
      throw new IOException("Failed to delete objects", e);
    }
  }

  @Override
  protected String getFolderSuffix() {
    return FOLDER_SUFFIX;
  }

  @Override
  protected ObjectListingChunk getObjectListingChunk(String key, boolean recursive)
      throws IOException {
    String delimiter = recursive ? "" : PATH_SEPARATOR;
    key = PathUtils.normalizePath(key, PATH_SEPARATOR);
    // In case key is root (empty string) do not normalize prefix
    key = key.equals(PATH_SEPARATOR) ? "" : key;
    ListObjectsRequest request = new ListObjectsRequest(mBucketName);
    request.setPrefix(key);
    request.setMaxKeys(getListingChunkLength(mUfsConf));
    request.setDelimiter(delimiter);

    ObjectListing result = getObjectListingChunk(request);
    if (result != null) {
      return new OBSObjectListingChunk(request, result);
    }
    return null;
  }

  // Get next chunk of listing result
  protected ObjectListing getObjectListingChunk(ListObjectsRequest request) {
    ObjectListing result;
    try {
      result = mClient.listObjects(request);
      if (isEnvironmentPFS() && result.getObjects().size() == 0
          && !isDirectory(request.getPrefix())) {
        result = null;
      }
    } catch (Exception e) {
      LOG.warn("Failed to list path {}", request.getPrefix(), e);
      result = null;
    }
    return result;
  }

  private boolean isDirectoryInPFS(ObjectMetadata meta) {
    int mode = Integer.parseInt(meta.getMetadata().get("mode").toString());
    if (mode < 0) {
      return false;
    }
    int ifDIr = 0x004000;
    return (ifDIr & mode) != 0;
  }

  private boolean isEnvironmentPFS() {
    return mBucketType.equalsIgnoreCase("pfs");
  }

  /**
   * Customized {@link ObjectListingChunk}.
   */
  private final class OBSObjectListingChunk implements ObjectListingChunk {
    final ListObjectsRequest mRequest;
    final ObjectListing mResult;

    OBSObjectListingChunk(ListObjectsRequest request, ObjectListing result) throws IOException {
      mRequest = request;
      mResult = result;
      if (mResult == null) {
        throw new IOException("OBS listing result is null");
      }
    }

    @Override
    public ObjectStatus[] getObjectStatuses() {
      List objects = mResult.getObjects();
      ObjectStatus[] ret = new ObjectStatus[objects.size()];
      int i = 0;
      for (ObsObject obj : objects) {
        ObjectMetadata meta = obj.getMetadata();
        Date lastModifiedDate = meta.getLastModified();
        Long lastModifiedTime = lastModifiedDate == null ? null : lastModifiedDate.getTime();
        ret[i++] = new ObjectStatus(obj.getObjectKey(), meta.getEtag(),
            meta.getContentLength(), lastModifiedTime);
      }
      return ret;
    }

    @Override
    public String[] getCommonPrefixes() {
      List res = mResult.getCommonPrefixes();
      return res.toArray(new String[0]);
    }

    @Override
    public ObjectListingChunk getNextChunk() throws IOException {
      if (mResult.isTruncated()) {
        mRequest.setMarker(mResult.getNextMarker());
        ObjectListing nextResult = mClient.listObjects(mRequest);
        if (nextResult != null) {
          return new OBSObjectListingChunk(mRequest, nextResult);
        }
      }
      return null;
    }

    @Override
    public Boolean hasNextChunk() {
      return mResult.isTruncated();
    }
  }

  @Override
  protected ObjectStatus getObjectStatus(String key) {
    try {
      ObjectMetadata meta = mClient.getObjectMetadata(mBucketName, key);
      if (meta == null) {
        return null;
      }
      if (isEnvironmentPFS()) {
        /**
         * When in PFS environment:
         * 1. Directory will be explicitly created and have object meta.
         * 2. File will have object meta even if there is `/` at
         *    the end of the file name (e.g. `/dir1/file1/`).
         * However we should return null meta here.
         */
        if (isDirectoryInPFS(meta)) {
          return null;
        }
        if (!isDirectoryInPFS(meta) && key.endsWith(PATH_SEPARATOR)) {
          return null;
        }
      }
      Date lastModifiedDate = meta.getLastModified();
      Long lastModifiedTime = lastModifiedDate == null ? null : lastModifiedDate.getTime();
      return new ObjectStatus(key, meta.getEtag(), meta.getContentLength(),
          lastModifiedTime);
    } catch (ObsException e) {
      LOG.warn("Failed to get Object {}, return null", key, e);
      return null;
    }
  }

  @Override
  public UfsStatus getStatus(String path) throws IOException {
    if (!isDirectory(path)) {
      ObjectStatus status = getObjectStatus(stripPrefixIfPresent(path));
      ObjectPermissions permissions = getPermissions();
      return new UfsFileStatus(path, status.getContentHash(), status.getContentLength(),
              status.getLastModifiedTimeMs(), permissions.getOwner(), permissions.getGroup(),
              permissions.getMode(), mUfsConf.getBytes(PropertyKey.USER_BLOCK_SIZE_BYTES_DEFAULT));
    }
    return getDirectoryStatus(path);
  }

  @Override
  public boolean isDirectory(String path) throws IOException {
    if (!isEnvironmentPFS()) {
      return super.isDirectory(path);
    }
    if (isRoot(path)) {
      return true;
    }
    String pathKey = stripPrefixIfPresent(path);
    try {
      ObjectMetadata meta = mClient.getObjectMetadata(mBucketName, pathKey);
      if (meta != null && isDirectoryInPFS(meta)) {
        return true;
      }
      return false;
    } catch (ObsException e) {
      LOG.warn("Failed to get Object {}", pathKey, e);
      return false;
    }
  }

  // No ACL integration currently, returns default empty value
  @Override
  protected ObjectPermissions getPermissions() {
    return new ObjectPermissions("", "", Constants.DEFAULT_FILE_SYSTEM_MODE);
  }

  @Override
  protected String getRootKey() {
    return Constants.HEADER_OBS + mBucketName;
  }

  @Override
  protected InputStream openObject(String key, OpenOptions options,
      RetryPolicy retryPolicy) throws IOException {
    try {
      return new OBSInputStream(mBucketName, key, mClient, options.getOffset(), retryPolicy,
          mUfsConf.getBytes(PropertyKey.UNDERFS_OBJECT_STORE_MULTI_RANGE_CHUNK_SIZE));
    } catch (ObsException e) {
      throw new IOException(e.getMessage());
    }
  }

  @Override
  public boolean renameDirectory(String src, String dst) throws IOException {
    if (!isEnvironmentPFS()) {
      return super.renameDirectory(src, dst);
    }
    try {
      RenameRequest request = new RenameRequest(mBucketName, stripPrefixIfPresent(src),
          stripPrefixIfPresent(dst));
      RenameResult response = mClient.renameFolder(request);
      if (isSuccessResponse(response.getStatusCode())) {
        return true;
      } else {
        LOG.error("Failed to rename directory from {} to {}.", src, dst);
        return false;
      }
    } catch (ObsException e) {
      LOG.error("Failed to rename directory from {} to {}.", src, dst, e);
      return false;
    }
  }

  /**
   * @param statusCode 200 OK, 201 Created, 204 No Content
   */
  private boolean isSuccessResponse(int statusCode) {
    return statusCode == 200 || statusCode == 204 || statusCode == 201;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy