All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.gcsio.MetadataReadOnlyGoogleCloudStorage Maven / Gradle / Ivy

Go to download

An implementation of org.apache.hadoop.fs.FileSystem targeting Google Cloud Storage

There is a newer version: 3.0.4
Show newest version
/**
 * Copyright 2014 Google Inc. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.hadoop.gcsio;

import com.google.cloud.hadoop.util.LogUtil;
import com.google.common.base.Function;
import com.google.common.base.Preconditions;
import com.google.common.collect.Lists;

import java.io.IOException;
import java.nio.channels.WritableByteChannel;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * MetadataReadOnlyGoogleCloudStorage holds a collection of Storage object/bucket metadata entries
 * and serves listObjectNames, listObjectInfo, getItemInfos, and getItemInfo exclusively from the
 * in-memory cache. All other operations will throw an UnsupportedOperationException. For the time
 * being, such unsupported operations include bucket info list operations like listBucketInfo and
 * listBucketNames.
 *
 * This instance will supplement fake GoogleCloudStorageItemInfos for pure implicit directories
 * if there is no metadata entry for the directory object itself. This means that some directory
 * objects will list info that is inconsistent with an actual listing.
 */
public class MetadataReadOnlyGoogleCloudStorage
    implements GoogleCloudStorage {
  // Logger.
  public static final LogUtil log = new LogUtil(MetadataReadOnlyGoogleCloudStorage.class);

  // Immutable cache of all metadata held by this instance, populated at construction time.
  private final DirectoryListCache resourceCache = new InMemoryDirectoryListCache();

  // TODO(user): Consolidate this with the equivalent object in LaggedGoogleCloudStorage.java.
  private static final Function ITEM_INFO_TO_NAME =
      new Function() {
        @Override
        public String apply(GoogleCloudStorageItemInfo itemInfo) {
          return itemInfo.getObjectName();
        }
      };

  /**
   * Constructs a MetadataReadOnlyGoogleCloudStorage which can be used for temporary contexts
   * where only object metadata read operations will be used through the GoogleCloudStorage
   * interface.
   *
   * @param itemInfos The collection of item infos with which to serve all list/get object requests.
   */
  public MetadataReadOnlyGoogleCloudStorage(Collection itemInfos)
      throws IOException {
    // Entries never expire for this use case.
    resourceCache.getMutableConfig().setMaxEntryAgeMillis(Long.MAX_VALUE);
    resourceCache.getMutableConfig().setMaxInfoAgeMillis(Long.MAX_VALUE);

    log.debug("Populating cache with %d entries.", itemInfos.size());
    for (GoogleCloudStorageItemInfo itemInfo : itemInfos) {
      resourceCache.putResourceId(itemInfo.getResourceId()).setItemInfo(itemInfo);
    }
  }

  @Override
  public WritableByteChannel create(final StorageResourceId resourceId)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public WritableByteChannel create(StorageResourceId resourceId, CreateObjectOptions options)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void createEmptyObject(StorageResourceId resourceId)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void createEmptyObject(StorageResourceId resourceId, CreateObjectOptions options)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void createEmptyObjects(List resourceIds)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void createEmptyObjects(List resourceIds, CreateObjectOptions options)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public SeekableReadableByteChannel open(StorageResourceId resourceId)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void create(String bucketName)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void deleteBuckets(List bucketNames)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void deleteObjects(List fullObjectNames)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public void copy(String srcBucketName, List srcObjectNames,
      String dstBucketName, List dstObjectNames)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public List listBucketNames()
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public List listBucketInfo()
      throws IOException {
    throw new UnsupportedOperationException();
  }

  @Override
  public List listObjectNames(
      String bucketName, String objectNamePrefix, String delimiter)
      throws IOException {
    return listObjectNames(bucketName, objectNamePrefix, delimiter,
        GoogleCloudStorage.MAX_RESULTS_UNLIMITED);
  }

  @Override
  public List listObjectNames(
      String bucketName, String objectNamePrefix, String delimiter,
      long maxResults)
      throws IOException {
    log.debug("listObjectNames(%s, %s, %s, %d)",
        bucketName, objectNamePrefix, delimiter, maxResults);
    return Lists.transform(
        listObjectInfo(bucketName, objectNamePrefix, delimiter, maxResults),
        ITEM_INFO_TO_NAME);
  }

  /**
   * Uses shared prefix-matching logic to filter entries from the metadata cache. For implicit
   * prefix matches with no corresponding real directory object, adds a fake directory object
   * with creationTime == 0.
   */
  @Override
  public List listObjectInfo(
      final String bucketName, String objectNamePrefix, String delimiter)
      throws IOException {
    return listObjectInfo(bucketName, objectNamePrefix, delimiter,
        GoogleCloudStorage.MAX_RESULTS_UNLIMITED);
  }

  /**
   * Uses shared prefix-matching logic to filter entries from the metadata cache. For implicit
   * prefix matches with no corresponding real directory object, adds a fake directory object
   * with creationTime == 0.
   */
  @Override
  public List listObjectInfo(
      final String bucketName, String objectNamePrefix, String delimiter,
      long maxResults)
      throws IOException {
    log.debug("listObjectInfo(%s, %s, %s, %d)",
        bucketName, objectNamePrefix, delimiter, maxResults);
    List allObjectInfos = new ArrayList<>();
    Set retrievedNames = new HashSet<>();
    Set prefixes = new HashSet<>();
    List cachedObjects = resourceCache.getObjectList(
        bucketName, objectNamePrefix, delimiter, prefixes);
    if (cachedObjects != null) {
      // Pull the itemInfos out of all the matched entries; in our usage here of DirectoryListCache,
      // we expect the info to *always* be available.
      for (CacheEntry entry : cachedObjects) {
        GoogleCloudStorageItemInfo info = entry.getItemInfo();
        Preconditions.checkState(
            info != null, "Cache entry missing info for name '%s'!", entry.getResourceId());
        allObjectInfos.add(info);
        retrievedNames.add(entry.getResourceId().getObjectName());
      }

      // Check whether each raw prefix already had a valid real entry; if not, we'll add a fake
      // directory object, but we'll keep track of it.
      for (String prefix : prefixes) {
        if (!retrievedNames.contains(prefix)) {
          log.debug("Found implicit directory '%s'. Adding fake entry for it.", prefix);
          GoogleCloudStorageItemInfo fakeInfo = new GoogleCloudStorageItemInfo(
              new StorageResourceId(bucketName, prefix), 0, 0, null, null);
          allObjectInfos.add(fakeInfo);
          retrievedNames.add(prefix);

          // Also add a concrete object for each implicit directory, since the caller may decide
          // to call getItemInfo sometime later after listing it.
          resourceCache.putResourceId(fakeInfo.getResourceId()).setItemInfo(fakeInfo);
        }
      }
    }
    return allObjectInfos;
  }

  /**
   * Pure fetch from cache.
   */
  @Override
  public List getItemInfos(List resourceIds)
      throws IOException {
    log.debug("getItemInfos(%s)", resourceIds.toString());
    List infos = new ArrayList<>();
    for (StorageResourceId resourceId : resourceIds) {
      infos.add(getItemInfo(resourceId));
    }
    return infos;
  }

  @Override
  public List updateItems(List itemInfoList)
      throws IOException {
    throw new UnsupportedOperationException();
  }

  /**
   * Pure fetch from cache.
   */
  @Override
  public GoogleCloudStorageItemInfo getItemInfo(StorageResourceId resourceId)
      throws IOException {
    log.debug("getItemInfo(%s)", resourceId);
    CacheEntry entry = resourceCache.getCacheEntry(resourceId);
    if (entry == null) {
      // TODO(user): Move the createItemInfoForNotFound method into GoogleCloudStorageItemInfo.
      return GoogleCloudStorageImpl.createItemInfoForNotFound(resourceId);
    } else {
      GoogleCloudStorageItemInfo info = entry.getItemInfo();
      Preconditions.checkState(
          info != null, "Cache entry missing info for name '%s'!", entry.getResourceId());
      return info;
    }
  }

  @Override
  public void close() {
    log.debug("close()");
  }

  @Override
  public void waitForBucketEmpty(String bucketName)
      throws IOException {
    throw new UnsupportedOperationException();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy