All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.google.cloud.hadoop.gcsio.testing.InMemoryGoogleCloudStorage Maven / Gradle / Ivy

/*
 * Copyright 2014 Google Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.google.cloud.hadoop.gcsio.testing;

import static com.google.cloud.hadoop.gcsio.GoogleCloudStorageExceptions.createFileNotFoundException;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Strings.isNullOrEmpty;

import com.google.api.client.util.Clock;
import com.google.cloud.hadoop.gcsio.CreateBucketOptions;
import com.google.cloud.hadoop.gcsio.CreateObjectOptions;
import com.google.cloud.hadoop.gcsio.FolderInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorage;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageExceptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageImpl;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageItemInfo;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageReadOptions;
import com.google.cloud.hadoop.gcsio.GoogleCloudStorageStrings;
import com.google.cloud.hadoop.gcsio.ListFolderOptions;
import com.google.cloud.hadoop.gcsio.ListObjectOptions;
import com.google.cloud.hadoop.gcsio.StorageResourceId;
import com.google.cloud.hadoop.gcsio.UpdatableItemInfo;
import com.google.errorprone.annotations.CanIgnoreReturnValue;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.net.URI;
import java.nio.ByteBuffer;
import java.nio.channels.SeekableByteChannel;
import java.nio.channels.WritableByteChannel;
import java.nio.file.FileAlreadyExistsException;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.stream.Collectors;

/**
 * InMemoryGoogleCloudStorage overrides the public methods of GoogleCloudStorage by implementing all
 * the equivalent bucket/object semantics with local in-memory storage.
 */
public class InMemoryGoogleCloudStorage implements GoogleCloudStorage {

  private static final CreateObjectOptions EMPTY_OBJECT_CREATE_OPTIONS =
      CreateObjectOptions.DEFAULT_OVERWRITE.toBuilder()
          .setEnsureEmptyObjectsMetadataMatch(false)
          .build();

  // Mapping from bucketName to structs representing a bucket.
  private final Map bucketLookup = new TreeMap<>();
  private final GoogleCloudStorageOptions storageOptions;
  private final Clock clock;

  public InMemoryGoogleCloudStorage() {
    this(getInMemoryGoogleCloudStorageOptions());
  }

  public InMemoryGoogleCloudStorage(GoogleCloudStorageOptions storageOptions) {
    this(storageOptions, Clock.SYSTEM);
  }

  public InMemoryGoogleCloudStorage(GoogleCloudStorageOptions storageOptions, Clock clock) {
    this.storageOptions = storageOptions;
    this.clock = clock;
  }

  public static GoogleCloudStorageOptions getInMemoryGoogleCloudStorageOptions() {
    return GoogleCloudStorageOptions.builder().setAppName("GHFS/in-memory").build();
  }

  @Override
  public GoogleCloudStorageOptions getOptions() {
    return storageOptions;
  }

  private boolean validateBucketName(String bucketName) {
    // Validation as per https://developers.google.com/storage/docs/bucketnaming
    if (isNullOrEmpty(bucketName)) {
      return false;
    }

    if (bucketName.length() < 3) {
      return false;
    }

    if (!bucketName.matches("^[a-z0-9][a-z0-9_.-]*[a-z0-9]$")) {
      return false;
    }

    return bucketName.length() <= 63;

    // TODO(user): Handle dots and names longer than 63, but less than 222.
  }

  private boolean validateObjectName(String objectName) {
    // Validation as per https://developers.google.com/storage/docs/bucketnaming
    // Object names must be less than 1024 bytes and may not contain
    // CR or LF characters.
    return !(objectName.length() > 1024
        || objectName.indexOf((char) 0x0A) > -1
        || objectName.indexOf((char) 0x0D) > -1);
  }

  @Override
  public synchronized WritableByteChannel create(
      StorageResourceId resourceId, CreateObjectOptions options) throws IOException {
    if (!bucketLookup.containsKey(resourceId.getBucketName())) {
      throw new IOException(
          String.format(
              "Tried to insert object '%s' into nonexistent bucket '%s'",
              resourceId.getObjectName(), resourceId.getBucketName()));
    }
    if (!validateObjectName(resourceId.getObjectName())) {
      throw new IOException("Error creating object. Invalid name: " + resourceId.getObjectName());
    }
    if (resourceId.hasGenerationId() && resourceId.getGenerationId() != 0L) {
      GoogleCloudStorageItemInfo itemInfo = getItemInfo(resourceId);
      if (itemInfo.getContentGeneration() != resourceId.getGenerationId()) {
        throw new IOException(
            String.format(
                "Required generationId '%d' doesn't match existing '%d' for '%s'",
                resourceId.getGenerationId(), itemInfo.getContentGeneration(), resourceId));
      }
    }
    if (!options.isOverwriteExisting() || resourceId.getGenerationId() == 0L) {
      if (getItemInfo(resourceId).exists()) {
        throw new FileAlreadyExistsException(String.format("%s exists.", resourceId));
      }
    }
    InMemoryObjectEntry entry =
        new InMemoryObjectEntry(
            resourceId.getBucketName(),
            resourceId.getObjectName(),
            clock.currentTimeMillis(),
            clock.currentTimeMillis(),
            options.getContentType(),
            options.getContentEncoding(),
            options.getMetadata());
    bucketLookup.get(resourceId.getBucketName()).add(entry);
    return entry.getWriteChannel();
  }

  @Override
  public synchronized void createBucket(String bucketName, CreateBucketOptions options)
      throws IOException {
    if (!validateBucketName(bucketName)) {
      throw new IOException("Error creating bucket. Invalid name: " + bucketName);
    }
    if (bucketLookup.containsKey(bucketName)) {
      throw new FileAlreadyExistsException("Bucket '" + bucketName + "' already exists");
    }
    bucketLookup.put(
        bucketName,
        new InMemoryBucketEntry(
            bucketName, clock.currentTimeMillis(), clock.currentTimeMillis(), options));
  }

  @Override
  public synchronized void createEmptyObject(StorageResourceId resourceId) throws IOException {
    createEmptyObject(resourceId, EMPTY_OBJECT_CREATE_OPTIONS);
  }

  @Override
  public synchronized void createEmptyObject(
      StorageResourceId resourceId, CreateObjectOptions options) throws IOException {
    // TODO(user): Since this class is not performance-tuned, we'll just delegate to the
    // write-channel version of the method.
    create(resourceId, options).close();
  }

  @Override
  public synchronized void createEmptyObjects(List resourceIds)
      throws IOException {
    createEmptyObjects(resourceIds, EMPTY_OBJECT_CREATE_OPTIONS);
  }

  @Override
  public synchronized void createEmptyObjects(
      List resourceIds, CreateObjectOptions options) throws IOException {
    for (StorageResourceId resourceId : resourceIds) {
      createEmptyObject(resourceId, options);
    }
  }

  @Override
  public SeekableByteChannel open(
      StorageResourceId resourceId, GoogleCloudStorageReadOptions readOptions) throws IOException {
    return open(getItemInfo(resourceId), readOptions);
  }

  @Override
  public SeekableByteChannel open(
      GoogleCloudStorageItemInfo itemInfo, GoogleCloudStorageReadOptions readOptions)
      throws IOException {
    if (!itemInfo.exists()) {
      IOException notFoundException =
          createFileNotFoundException(
              itemInfo.getBucketName(), itemInfo.getObjectName(), /* cause= */ null);

      if (readOptions.isFastFailOnNotFoundEnabled()) {
        throw notFoundException;
      }

      // We'll need to simulate a lazy-evaluating byte channel which only detects nonexistence
      // on size() and read(ByteBuffer) calls.
      return new SeekableByteChannel() {
        private long position = 0;
        private boolean isOpen = true;

        @Override
        public long position() {
          return position;
        }

        @CanIgnoreReturnValue
        @Override
        public SeekableByteChannel position(long newPosition) {
          position = newPosition;
          return this;
        }

        @Override
        public int read(ByteBuffer dst) throws IOException {
          throw notFoundException;
        }

        @Override
        public long size() throws IOException {
          throw notFoundException;
        }

        @Override
        public SeekableByteChannel truncate(long size) {
          throw new UnsupportedOperationException("Cannot mutate read-only channel");
        }

        @Override
        public int write(ByteBuffer src) {
          throw new UnsupportedOperationException("Cannot mutate read-only channel");
        }

        @Override
        public void close() {
          isOpen = false;
        }

        @Override
        public boolean isOpen() {
          return isOpen;
        }
      };
    }

    return bucketLookup
        .get(itemInfo.getBucketName())
        .get(itemInfo.getObjectName())
        .getReadChannel(itemInfo.getBucketName(), itemInfo.getObjectName(), readOptions);
  }

  @Override
  public synchronized void deleteBuckets(List bucketNames) throws IOException {
    boolean hasError = false;
    for (String bucketName : bucketNames) {
      // TODO(user): Enforcement of not being able to delete non-empty buckets should probably
      // also
      // be in here, but gcsfs handles it explicitly when it calls listObjectInfo.
      if (bucketLookup.containsKey(bucketName)) {
        bucketLookup.remove(bucketName);
      } else {
        hasError = true;
      }
      hasError = hasError || !validateBucketName(bucketName);
    }

    if (hasError) {
      throw new IOException("Error deleting");
    }
  }

  @Override
  public synchronized void deleteObjects(List fullObjectNames)
      throws IOException {

    for (StorageResourceId resourceId : fullObjectNames) {
      if (!validateObjectName(resourceId.getObjectName())) {
        throw new IOException("Error deleting object. Invalid name: " + resourceId.getObjectName());
      }
    }

    for (StorageResourceId fullObjectName : fullObjectNames) {
      String bucketName = fullObjectName.getBucketName();
      String objectName = fullObjectName.getObjectName();
      if (fullObjectName.hasGenerationId()) {
        GoogleCloudStorageItemInfo existingInfo = getItemInfo(fullObjectName);
        if (existingInfo.getContentGeneration() != fullObjectName.getGenerationId()) {
          throw new IOException(
              String.format(
                  "Required generationId '%d' doesn't match existing '%d' for '%s'",
                  fullObjectName.getGenerationId(),
                  existingInfo.getContentGeneration(),
                  fullObjectName));
        }
      }
      bucketLookup.get(bucketName).remove(objectName);
    }
  }

  @Override
  public void deleteFolders(List folders) throws IOException {
    throw new IOException("Not implemented");
  }

  @Override
  public synchronized void copy(
      String srcBucketName,
      List srcObjectNames,
      String dstBucketName,
      List dstObjectNames)
      throws IOException {
    GoogleCloudStorageImpl.validateCopyArguments(
        srcBucketName, srcObjectNames, dstBucketName, dstObjectNames, this);

    // Gather FileNotFoundExceptions for individual objects, but only throw a single combined
    // exception at the end.
    // TODO(user): Add a unittest for this entire class to test for the behavior of partial
    // failures; there is no way to do so in GCSFSIT because it only indirectly calls GCS.copy.
    List innerExceptions = new ArrayList<>();

    // Perform the copy operations.
    for (int i = 0; i < srcObjectNames.size(); i++) {
      // Due to the metadata-copy semantics of GCS, we copy the object container, but not the
      // byte[]
      // contents; the write-once constraint means this behavior is indistinguishable from a
      // deep
      // copy, but the behavior might have to become complicated if GCS ever supports appends.
      if (!getItemInfo(new StorageResourceId(srcBucketName, srcObjectNames.get(i))).exists()) {
        innerExceptions.add(
            createFileNotFoundException(srcBucketName, srcObjectNames.get(i), /* cause= */ null));
        continue;
      }

      InMemoryObjectEntry srcObject = bucketLookup.get(srcBucketName).get(srcObjectNames.get(i));
      bucketLookup
          .get(dstBucketName)
          .add(srcObject.getShallowCopy(dstBucketName, dstObjectNames.get(i)));
    }

    if (!innerExceptions.isEmpty()) {
      throw GoogleCloudStorageExceptions.createCompositeException(innerExceptions);
    }
  }

  @Override
  public synchronized List listBucketNames() {
    return new ArrayList<>(bucketLookup.keySet());
  }

  @Override
  public synchronized List listBucketInfo() {
    List bucketInfos = new ArrayList<>();
    for (InMemoryBucketEntry entry : bucketLookup.values()) {
      bucketInfos.add(entry.getInfo());
    }
    return bucketInfos;
  }

  private synchronized List listObjectNames(
      String bucketName, String objectNamePrefix, ListObjectOptions listOptions) {
    InMemoryBucketEntry bucketEntry = bucketLookup.get(bucketName);
    if (bucketEntry == null) {
      return new ArrayList<>();
    }
    Set uniqueNames = new TreeSet<>();
    for (String objectName : bucketEntry.getObjectNames()) {
      String processedName =
          GoogleCloudStorageStrings.matchListPrefix(objectNamePrefix, objectName, listOptions);
      if (processedName != null) {
        uniqueNames.add(processedName);
      }
      if (listOptions.getMaxResults() > 0 && uniqueNames.size() >= listOptions.getMaxResults()) {
        break;
      }
    }
    if (listOptions.isIncludePrefix() && !uniqueNames.isEmpty() && objectNamePrefix != null) {
      uniqueNames.add(objectNamePrefix);
    }
    return new ArrayList<>(uniqueNames);
  }

  @Override
  public ListPage listObjectInfoPage(
      String bucketName, String objectNamePrefix, ListObjectOptions listOptions, String pageToken)
      throws IOException {
    // TODO: implement pagination
    return new ListPage<>(
        listObjectInfo(bucketName, objectNamePrefix, listOptions), /* nextPageToken= */ null);
  }

  @Override
  public ListPage listFolderInfoForPrefixPage(
      String bucketName,
      String objectNamePrefix,
      ListFolderOptions listFolderOptions,
      String pageToken)
      throws IOException {
    throw new IOException("Not implemented");
  }

  @Override
  public synchronized List listObjectInfo(
      String bucketName, String objectNamePrefix, ListObjectOptions listOptions)
      throws IOException {
    // Since we're just in memory, we can do the naive implementation of just listing names and
    // then calling getItemInfo for each.
    List listedNames =
        listObjectNames(
            bucketName,
            objectNamePrefix,
            listOptions.toBuilder().setMaxResults(MAX_RESULTS_UNLIMITED).build());
    List listedInfo = new ArrayList<>();
    for (String objectName : listedNames) {
      GoogleCloudStorageItemInfo itemInfo =
          getItemInfo(new StorageResourceId(bucketName, objectName));
      if (itemInfo.exists()) {
        listedInfo.add(itemInfo);
      } else if (itemInfo.getResourceId().isStorageObject()) {
        listedInfo.add(
            GoogleCloudStorageItemInfo.createInferredDirectory(itemInfo.getResourceId()));
      }
      if (listOptions.getMaxResults() > 0 && listedInfo.size() >= listOptions.getMaxResults()) {
        break;
      }
    }
    return listedInfo;
  }

  public void renameHnFolder(URI src, URI dst) throws IOException {
    throw new IOException("Not implemented");
  }

  @Override
  public boolean isHnBucket(URI src) throws IOException {
    return false;
  }

  @Override
  public synchronized GoogleCloudStorageItemInfo getItemInfo(StorageResourceId resourceId)
      throws IOException {
    if (resourceId.isRoot()) {
      return GoogleCloudStorageItemInfo.ROOT_INFO;
    }

    if (resourceId.isBucket()) {
      if (bucketLookup.containsKey(resourceId.getBucketName())) {
        return bucketLookup.get(resourceId.getBucketName()).getInfo();
      }
    } else {
      if (!validateObjectName(resourceId.getObjectName())) {
        throw new IOException(
            String.format("Invalid object name: '%s'", resourceId.getObjectName()));
      }
      if (bucketLookup.containsKey(resourceId.getBucketName())
          && bucketLookup.get(resourceId.getBucketName()).get(resourceId.getObjectName()) != null) {
        return bucketLookup
            .get(resourceId.getBucketName())
            .get(resourceId.getObjectName())
            .getInfo();
      }
    }
    return GoogleCloudStorageItemInfo.createNotFound(resourceId);
  }

  @Override
  public synchronized List getItemInfos(
      List resourceIds) throws IOException {
    List itemInfos = new ArrayList<>();
    for (StorageResourceId resourceId : resourceIds) {
      try {
        itemInfos.add(getItemInfo(resourceId));
      } catch (IOException ioe) {
        throw new IOException("Error getting StorageObject", ioe);
      }
    }
    return itemInfos;
  }

  @Override
  public List updateItems(List itemInfoList)
      throws IOException {
    List itemInfos = new ArrayList<>();
    for (UpdatableItemInfo updatableItemInfo : itemInfoList) {
      StorageResourceId resourceId = updatableItemInfo.getStorageResourceId();
      checkArgument(
          !resourceId.isRoot() && !resourceId.isBucket(),
          "Can't update item on GCS Root or bucket resources");
      if (!validateObjectName(resourceId.getObjectName())) {
        throw new IOException("Error accessing");
      }
      if (bucketLookup.containsKey(resourceId.getBucketName())
          && bucketLookup.get(resourceId.getBucketName()).get(resourceId.getObjectName()) != null) {
        InMemoryObjectEntry objectEntry =
            bucketLookup.get(resourceId.getBucketName()).get(resourceId.getObjectName());
        objectEntry.patchMetadata(updatableItemInfo.getMetadata());
        itemInfos.add(getItemInfo(resourceId));
      } else {
        throw new IOException(String.format("Error getting StorageObject %s", resourceId));
      }
    }
    return itemInfos;
  }

  @Override
  public void close() {}

  @Override
  public void compose(
      String bucketName, List sources, String destination, String contentType)
      throws IOException {
    List sourceResourcesIds =
        sources.stream()
            .map(s -> new StorageResourceId(bucketName, s))
            .collect(Collectors.toList());
    StorageResourceId destinationId = new StorageResourceId(bucketName, destination);
    CreateObjectOptions options =
        CreateObjectOptions.DEFAULT_OVERWRITE.toBuilder().setContentType(contentType).build();
    composeObjects(sourceResourcesIds, destinationId, options);
  }

  @Override
  public GoogleCloudStorageItemInfo composeObjects(
      List sources, StorageResourceId destination, CreateObjectOptions options)
      throws IOException {
    checkArgument(
        sources.size() <= MAX_COMPOSE_OBJECTS,
        "Can not compose more than %s sources",
        MAX_COMPOSE_OBJECTS);
    ByteArrayOutputStream tempOutput = new ByteArrayOutputStream();
    for (StorageResourceId sourceId : sources) {
      // TODO(user): If we change to also set generationIds for source objects in the base
      // GoogleCloudStorageImpl, make sure to also add a generationId check here.
      try (SeekableByteChannel sourceChannel = open(sourceId)) {
        byte[] bufferArray = new byte[4 * 1024 * 1024];
        int bytesRead;
        do {
          ByteBuffer buffer = ByteBuffer.wrap(bufferArray);
          bytesRead = sourceChannel.read(buffer);
          tempOutput.write(bufferArray, 0, buffer.position());
        } while (bytesRead >= 0);
      }
    }

    // If destination.hasGenerationId(), it'll automatically get enforced here by the create()
    // implementation.
    try (WritableByteChannel destChannel = create(destination, options)) {
      destChannel.write(ByteBuffer.wrap(tempOutput.toByteArray()));
    }
    return getItemInfo(destination);
  }

  @Override
  public Map getStatistics() {
    throw new UnsupportedOperationException("not implemented");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy