All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.zeppelin.notebook.repo.S3NotebookRepo Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.zeppelin.notebook.repo;

import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStreamWriter;
import java.io.Writer;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang3.StringUtils;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.amazonaws.AmazonClientException;
import com.amazonaws.ClientConfiguration;
import com.amazonaws.ClientConfigurationFactory;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.auth.DefaultAWSCredentialsProviderChain;
import com.amazonaws.services.s3.AmazonS3;
import com.amazonaws.services.s3.AmazonS3Client;
import com.amazonaws.services.s3.S3ClientOptions;
import com.amazonaws.services.s3.AmazonS3EncryptionClient;
import com.amazonaws.services.s3.model.CannedAccessControlList;
import com.amazonaws.services.s3.model.CryptoConfiguration;
import com.amazonaws.services.s3.model.EncryptionMaterialsProvider;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.KMSEncryptionMaterialsProvider;
import com.amazonaws.services.s3.model.ListObjectsRequest;
import com.amazonaws.services.s3.model.ObjectListing;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.amazonaws.services.s3.model.PutObjectRequest;
import com.amazonaws.regions.Region;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.s3.model.S3Object;
import com.amazonaws.services.s3.model.S3ObjectSummary;

/**
 * Backend for storing Notebooks on S3
 */
public class S3NotebookRepo implements NotebookRepo {
  private static final Logger LOGGER = LoggerFactory.getLogger(S3NotebookRepo.class);

  // Use a credential provider chain so that instance profiles can be utilized
  // on an EC2 instance. The order of locations where credentials are searched
  // is documented here
  //
  //    http://docs.aws.amazon.com/AWSJavaSDK/latest/javadoc/com/amazonaws/
  //        auth/DefaultAWSCredentialsProviderChain.html
  //
  // In summary, the order is:
  //
  //  1. Environment Variables - AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY
  //  2. Java System Properties - aws.accessKeyId and aws.secretKey
  //  3. Credential profiles file at the default location (~/.aws/credentials)
  //       shared by all AWS SDKs and the AWS CLI
  //  4. Instance profile credentials delivered through the Amazon EC2 metadata service
  private AmazonS3 s3client;
  private String bucketName;
  private String user;
  private boolean useServerSideEncryption;
  private CannedAccessControlList objectCannedAcl;
  private ZeppelinConfiguration conf;
  private String rootFolder;

  public S3NotebookRepo() {

  }

  public void init(ZeppelinConfiguration conf) throws IOException {
    this.conf = conf;
    bucketName = conf.getS3BucketName();
    user = conf.getS3User();
    rootFolder = user + "/notebook";
    useServerSideEncryption = conf.isS3ServerSideEncryption();
    if (StringUtils.isNotBlank(conf.getS3CannedAcl())) {
        objectCannedAcl = CannedAccessControlList.valueOf(conf.getS3CannedAcl());
    }

    // always use the default provider chain
    AWSCredentialsProvider credentialsProvider = new DefaultAWSCredentialsProviderChain();
    CryptoConfiguration cryptoConf = new CryptoConfiguration();
    String keyRegion = conf.getS3KMSKeyRegion();

    if (StringUtils.isNotBlank(keyRegion)) {
      cryptoConf.setAwsKmsRegion(Region.getRegion(Regions.fromName(keyRegion)));
    }

    ClientConfiguration cliConf = createClientConfiguration();
    
    // see if we should be encrypting data in S3
    String kmsKeyID = conf.getS3KMSKeyID();
    if (kmsKeyID != null) {
      // use the AWS KMS to encrypt data
      KMSEncryptionMaterialsProvider emp = new KMSEncryptionMaterialsProvider(kmsKeyID);
      this.s3client = new AmazonS3EncryptionClient(credentialsProvider, emp, cliConf, cryptoConf);
    }
    else if (conf.getS3EncryptionMaterialsProviderClass() != null) {
      // use a custom encryption materials provider class
      EncryptionMaterialsProvider emp = createCustomProvider(conf);
      this.s3client = new AmazonS3EncryptionClient(credentialsProvider, emp, cliConf, cryptoConf);
    }
    else {
      // regular S3
      this.s3client = new AmazonS3Client(credentialsProvider, cliConf);
    }
    s3client.setS3ClientOptions(S3ClientOptions.builder().setPathStyleAccess(conf.isS3PathStyleAccess()).build());

    // set S3 endpoint to use
    s3client.setEndpoint(conf.getS3Endpoint());
  }

  /**
   * Create an instance of a custom encryption materials provider class
   * which supplies encryption keys to use when reading/writing data in S3.
   */
  private EncryptionMaterialsProvider createCustomProvider(ZeppelinConfiguration conf)
      throws IOException {
    // use a custom encryption materials provider class
    String empClassname = conf.getS3EncryptionMaterialsProviderClass();
    EncryptionMaterialsProvider emp;
    try {
      Object empInstance = Class.forName(empClassname).newInstance();
      if (empInstance instanceof EncryptionMaterialsProvider) {
        emp = (EncryptionMaterialsProvider) empInstance;
      }
      else {
        throw new IOException("Class " + empClassname + " does not implement "
                + EncryptionMaterialsProvider.class.getName());
      }
    }
    catch (Exception e) {
      throw new IOException("Unable to instantiate encryption materials provider class "
              + empClassname + ": " + e, e);
    }

    return emp;
  }

  /**
   * Create AWS client configuration and return it.
   * @return AWS client configuration
   */
  private ClientConfiguration createClientConfiguration() {
    ClientConfigurationFactory configFactory = new ClientConfigurationFactory();
    ClientConfiguration config = configFactory.getConfig();

    String s3SignerOverride = conf.getS3SignerOverride();
    if (StringUtils.isNotBlank(s3SignerOverride)) {
      config.setSignerOverride(s3SignerOverride);
    }

    return config;
  }

  @Override
  public Map list(AuthenticationInfo subject) throws IOException {
    Map notesInfo = new HashMap<>();
    try {
      ListObjectsRequest listObjectsRequest = new ListObjectsRequest()
              .withBucketName(bucketName)
              .withPrefix(user + "/" + "notebook");
      ObjectListing objectListing;
      do {
        objectListing = s3client.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
          if (objectSummary.getKey().endsWith(".zpln")) {
            try {
              NoteInfo info = getNoteInfo(objectSummary.getKey());
              notesInfo.put(info.getId(), info);
            } catch (IOException e) {
              LOGGER.warn(e.getMessage());
            }
          }
        }
        listObjectsRequest.setMarker(objectListing.getNextMarker());
      } while (objectListing.isTruncated());
    } catch (AmazonClientException ace) {
      throw new IOException("Fail to list objects in S3", ace);
    }
    return notesInfo;
  }

  private NoteInfo getNoteInfo(String key) throws IOException {
    return new NoteInfo(getNoteId(key), getNotePath(rootFolder, key));
  }

  @Override
  public Note get(String noteId, String notePath, AuthenticationInfo subject) throws IOException {
    S3Object s3object;
    try {
      s3object = s3client.getObject(new GetObjectRequest(bucketName,
          rootFolder + "/" + buildNoteFileName(noteId, notePath)));
    }
    catch (AmazonClientException ace) {
      throw new IOException("Fail to get note: " + notePath + " from S3", ace);
    }
    try (InputStream ins = s3object.getObjectContent()) {
      String json = IOUtils.toString(ins, conf.getString(ConfVars.ZEPPELIN_ENCODING));
      return Note.fromJson(noteId, json);
    }
  }

  @Override
  public void save(Note note, AuthenticationInfo subject) throws IOException {
    String json = note.toJson();
    String key = rootFolder + "/" + buildNoteFileName(note);
    File file = File.createTempFile("note", "zpln");
    try {
      Writer writer = new OutputStreamWriter(new FileOutputStream(file));
      writer.write(json);
      writer.close();
      PutObjectRequest putRequest = new PutObjectRequest(bucketName, key, file);
      if (useServerSideEncryption) {
        // Request server-side encryption.
        ObjectMetadata objectMetadata = new ObjectMetadata();
        objectMetadata.setSSEAlgorithm(ObjectMetadata.AES_256_SERVER_SIDE_ENCRYPTION);
        putRequest.setMetadata(objectMetadata);
      }
      if (objectCannedAcl != null) {
          putRequest.withCannedAcl(objectCannedAcl);
      }
      s3client.putObject(putRequest);
    }
    catch (AmazonClientException ace) {
      throw new IOException("Fail to store note: " + note.getPath() + " in S3", ace);
    }
    finally {
      FileUtils.deleteQuietly(file);
    }
  }

  @Override
  public void move(String noteId, String notePath, String newNotePath,
                   AuthenticationInfo subject) throws IOException {
    String key = rootFolder + "/" + buildNoteFileName(noteId, notePath);
    String newKey = rootFolder + "/" + buildNoteFileName(noteId, newNotePath);
    s3client.copyObject(bucketName, key, bucketName, newKey);
    s3client.deleteObject(bucketName, key);
  }

  @Override
  public void move(String folderPath, String newFolderPath, AuthenticationInfo subject) throws IOException {
    try {
      ListObjectsRequest listObjectsRequest = new ListObjectsRequest()
              .withBucketName(bucketName)
              .withPrefix(rootFolder + folderPath + "/");
      ObjectListing objectListing;
      do {
        objectListing = s3client.listObjects(listObjectsRequest);
        for (S3ObjectSummary objectSummary : objectListing.getObjectSummaries()) {
          if (objectSummary.getKey().endsWith(".zpln")) {
            String noteId = getNoteId(objectSummary.getKey());
            String notePath = getNotePath(rootFolder, objectSummary.getKey());
            String newNotePath = newFolderPath + notePath.substring(folderPath.length());
            move(noteId, notePath, newNotePath, subject);
          }
        }
        listObjectsRequest.setMarker(objectListing.getNextMarker());
      } while (objectListing.isTruncated());
    } catch (AmazonClientException ace) {
      throw new IOException("Fail to move folder: " + folderPath + " to " + newFolderPath  + " in S3" , ace);
    }
  }

  @Override
  public void remove(String noteId, String notePath, AuthenticationInfo subject)
      throws IOException {
    try {
      s3client.deleteObject(bucketName, rootFolder + "/" + buildNoteFileName(noteId, notePath));
    } catch (AmazonClientException ace) {
      throw new IOException("Fail to remove note: " + notePath + " from S3", ace);
    }
  }

  @Override
  public void remove(String folderPath, AuthenticationInfo subject) throws IOException {
    final ListObjectsRequest listObjectsRequest = new ListObjectsRequest()
            .withBucketName(bucketName).withPrefix(rootFolder + folderPath + "/");
    try {
      ObjectListing objects = s3client.listObjects(listObjectsRequest);
      do {
        for (S3ObjectSummary objectSummary : objects.getObjectSummaries()) {
          s3client.deleteObject(bucketName, objectSummary.getKey());
        }
        objects = s3client.listNextBatchOfObjects(objects);
      } while (objects.isTruncated());
    } catch (AmazonClientException ace) {
      throw new IOException("Unable to remove folder " + folderPath  + " in S3", ace);
    }
  }

  @Override
  public void close() {
    if (s3client != null) {
      s3client.shutdown();
    }
  }

  @Override
  public List getSettings(AuthenticationInfo subject) {
    LOGGER.warn("Method not implemented");
    return Collections.emptyList();
  }

  @Override
  public void updateSettings(Map settings, AuthenticationInfo subject) {
    LOGGER.warn("Method not implemented");
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy