All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.gcs.GCSBackupRepository Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.gcs;

import static java.net.HttpURLConnection.HTTP_PRECON_FAILED;

import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.ReadChannel;
import com.google.cloud.WriteChannel;
import com.google.cloud.storage.Blob;
import com.google.cloud.storage.BlobId;
import com.google.cloud.storage.BlobInfo;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageException;
import com.google.cloud.storage.StorageOptions;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.lang.invoke.MethodHandles;
import java.net.URI;
import java.net.URISyntaxException;
import java.nio.ByteBuffer;
import java.nio.channels.Channels;
import java.nio.channels.WritableByteChannel;
import java.nio.file.FileAlreadyExistsException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.List;
import java.util.Objects;
import java.util.stream.Collectors;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.store.BufferedIndexInput;
import org.apache.lucene.store.ChecksumIndexInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.solr.common.params.CoreAdminParams;
import org.apache.solr.common.util.NamedList;
import org.apache.solr.core.DirectoryFactory;
import org.apache.solr.core.backup.repository.AbstractBackupRepository;
import org.apache.solr.core.backup.repository.BackupRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/** {@link BackupRepository} implementation that stores files in Google Cloud Storage ("GCS"). */
public class GCSBackupRepository extends AbstractBackupRepository {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
  private static final int LARGE_BLOB_THRESHOLD_BYTE_SIZE = 5 * 1024 * 1024;
  private static final Storage.BlobWriteOption[] NO_WRITE_OPTIONS = new Storage.BlobWriteOption[0];

  protected Storage storage;

  protected String bucketName = null;
  protected String credentialPath = null;
  protected int writeBufferSizeBytes;
  protected int readBufferSizeBytes;
  protected StorageOptions.Builder storageOptionsBuilder = null;

  protected Storage initStorage() {
    if (storage != null) return storage;

    try {
      if (credentialPath != null) {
        log.info("Creating GCS client using credential at {}", credentialPath);
        // 'GoogleCredentials.fromStream' closes the input stream, so we don't
        GoogleCredentials credential =
            GoogleCredentials.fromStream(new FileInputStream(credentialPath));
        storageOptionsBuilder.setCredentials(credential);
      } else {
        // nowarn compile time string concatenation
        log.warn(GCSConfigParser.potentiallyMissingCredentialMsg()); // nowarn
      }
      storage = storageOptionsBuilder.build().getService();
    } catch (IOException e) {
      throw new IllegalStateException(e);
    }
    return storage;
  }

  @Override
  public void init(NamedList args) {
    super.init(args);
    final GCSConfigParser configReader = new GCSConfigParser();
    final GCSConfigParser.GCSConfig parsedConfig = configReader.parseConfiguration(config);

    this.bucketName = parsedConfig.getBucketName();
    this.credentialPath = parsedConfig.getCredentialPath();
    this.writeBufferSizeBytes = parsedConfig.getWriteBufferSize();
    this.readBufferSizeBytes = parsedConfig.getReadBufferSize();
    this.storageOptionsBuilder = parsedConfig.getStorageOptionsBuilder();

    initStorage();
  }

  @Override
  @SuppressWarnings("unchecked")
  public  T getConfigProperty(String name) {
    return (T) this.config.get(name);
  }

  @Override
  public URI createURI(String location) {
    Objects.requireNonNull(location);

    URI result;
    try {
      result = new URI(location);
    } catch (URISyntaxException e) {
      throw new IllegalArgumentException("Error on creating URI", e);
    }

    return result;
  }

  @Override
  public URI createDirectoryURI(String location) {
    Objects.requireNonNull(location);

    if (!location.endsWith("/")) {
      location += "/";
    }

    return createURI(location);
  }

  @Override
  public URI resolve(URI baseUri, String... pathComponents) {
    StringBuilder builder = new StringBuilder(baseUri.toString());
    for (String path : pathComponents) {
      if (path != null && !path.isEmpty()) {
        if (builder.charAt(builder.length() - 1) != '/') {
          builder.append('/');
        }
        builder.append(path);
      }
    }

    return URI.create(builder.toString()).normalize();
  }

  @Override
  public URI resolveDirectory(URI baseUri, String... pathComponents) {
    if (pathComponents.length > 0) {
      if (!pathComponents[pathComponents.length - 1].endsWith("/")) {
        pathComponents[pathComponents.length - 1] = pathComponents[pathComponents.length - 1] + "/";
      }
    } else {
      if (!baseUri.getPath().endsWith("/")) {
        baseUri = URI.create(baseUri + "/");
      }
    }
    return resolve(baseUri, pathComponents);
  }

  @Override
  public boolean exists(URI path) throws IOException {
    return exists(path.toString());
  }

  public boolean exists(String path) throws IOException {
    if (path.equals(getConfigProperty(CoreAdminParams.BACKUP_LOCATION))) {
      return true;
    }

    if (path.endsWith("/")) {
      return storage.get(bucketName, path, Storage.BlobGetOption.fields()) != null;
    } else {
      final String filePath = path;
      final String directoryPath = path + "/";
      return storage.get(bucketName, filePath, Storage.BlobGetOption.fields()) != null
          || storage.get(bucketName, directoryPath, Storage.BlobGetOption.fields()) != null;
    }
  }

  @Override
  public PathType getPathType(URI path) throws IOException {
    if (path.toString().endsWith("/")) return PathType.DIRECTORY;

    Blob blob = storage.get(bucketName, path.toString() + "/", Storage.BlobGetOption.fields());
    if (blob != null) return PathType.DIRECTORY;

    return PathType.FILE;
  }

  @Override
  public String[] listAll(URI path) throws IOException {
    final String blobName = appendTrailingSeparatorIfNecessary(path.toString());

    final String pathStr = blobName;
    final List result = new ArrayList<>();
    storage
        .list(
            bucketName,
            Storage.BlobListOption.currentDirectory(),
            Storage.BlobListOption.prefix(pathStr),
            Storage.BlobListOption.fields())
        .iterateAll()
        .forEach(
            blob -> {
              assert blob.getName().startsWith(pathStr);
              final String suffixName = blob.getName().substring(pathStr.length());
              if (!suffixName.isEmpty()) {
                // Remove trailing '/' if present
                if (suffixName.endsWith("/")) {
                  result.add(suffixName.substring(0, suffixName.length() - 1));
                } else {
                  result.add(suffixName);
                }
              }
            });

    return result.toArray(new String[0]);
  }

  @Override
  public IndexInput openInput(URI dirPath, String fileName, IOContext ctx) throws IOException {
    return openInput(dirPath, fileName, ctx, readBufferSizeBytes);
  }

  private IndexInput openInput(URI dirPath, String fileName, IOContext ctx, int bufferSize) {
    String blobName = resolve(dirPath, fileName).toString();

    final BlobId blobId = BlobId.of(bucketName, blobName);
    final Blob blob = storage.get(blobId, Storage.BlobGetOption.fields(Storage.BlobField.SIZE));
    final ReadChannel readChannel = blob.reader();
    readChannel.setChunkSize(bufferSize);

    return new BufferedIndexInput(blobName, bufferSize) {

      @Override
      public long length() {
        return blob.getSize();
      }

      @Override
      protected void readInternal(ByteBuffer b) throws IOException {
        readChannel.read(b);
      }

      @Override
      protected void seekInternal(long pos) throws IOException {
        readChannel.seek(pos);
      }

      @Override
      public void close() throws IOException {
        readChannel.close();
      }
    };
  }

  @Override
  public OutputStream createOutput(URI path) throws IOException {
    final BlobInfo blobInfo = BlobInfo.newBuilder(bucketName, path.toString()).build();
    final WriteChannel writeChannel = storage.writer(blobInfo, getDefaultBlobWriteOptions());

    return Channels.newOutputStream(
        new WritableByteChannel() {
          @Override
          public int write(ByteBuffer src) throws IOException {
            return writeChannel.write(src);
          }

          @Override
          public boolean isOpen() {
            return writeChannel.isOpen();
          }

          @Override
          public void close() throws IOException {
            writeChannel.close();
          }
        });
  }

  @Override
  public void createDirectory(URI path) throws IOException {
    final String name = appendTrailingSeparatorIfNecessary(path.toString());
    if (!exists(name)) {
      storage.create(BlobInfo.newBuilder(bucketName, name).build());
    }
  }

  @Override
  public void deleteDirectory(URI path) throws IOException {
    List blobIds = allBlobsAtDir(path);
    if (!blobIds.isEmpty()) {
      storage.delete(blobIds);
    } else {
      log.debug("Path:{} doesn't have any blobs", path);
    }
  }

  protected List allBlobsAtDir(URI path) throws IOException {
    final String blobName = appendTrailingSeparatorIfNecessary(path.toString());

    final List result = new ArrayList<>();
    final String pathStr = blobName;
    storage
        .list(bucketName, Storage.BlobListOption.prefix(pathStr), Storage.BlobListOption.fields())
        .iterateAll()
        .forEach(blob -> result.add(blob.getBlobId()));

    return result;
  }

  @Override
  public void delete(URI path, Collection files) {
    if (files.isEmpty()) {
      return;
    }
    final String prefix = appendTrailingSeparatorIfNecessary(path.toString());
    List blobDeletes =
        files.stream()
            .map(file -> BlobId.of(bucketName, prefix + file))
            .collect(Collectors.toList());
    storage.delete(blobDeletes);
  }

  @Override
  public void copyIndexFileFrom(
      Directory sourceDir, String sourceFileName, URI destDir, String destFileName)
      throws IOException {
    String blobName = destDir.toString();
    blobName = appendTrailingSeparatorIfNecessary(blobName);
    blobName += destFileName;
    final BlobInfo blobInfo = BlobInfo.newBuilder(bucketName, blobName).build();
    try (IndexInput input =
        shouldVerifyChecksum
            ? sourceDir.openChecksumInput(sourceFileName, DirectoryFactory.IOCONTEXT_NO_CACHE)
            : sourceDir.openInput(sourceFileName, DirectoryFactory.IOCONTEXT_NO_CACHE)) {
      if (input.length() <= CodecUtil.footerLength()) {
        throw new CorruptIndexException("file is too small:" + input.length(), input);
      }
      if (input.length() > LARGE_BLOB_THRESHOLD_BYTE_SIZE) {
        writeBlobResumable(blobInfo, input);
      } else {
        writeBlobMultipart(blobInfo, input, (int) input.length());
      }
    }
  }

  @Override
  public void copyIndexFileTo(
      URI sourceRepo, String sourceFileName, Directory dest, String destFileName)
      throws IOException {
    try {
      String blobName = sourceRepo.toString();
      blobName = appendTrailingSeparatorIfNecessary(blobName);
      blobName += sourceFileName;
      final BlobId blobId = BlobId.of(bucketName, blobName);
      try (final ReadChannel readChannel = storage.reader(blobId);
          IndexOutput output =
              dest.createOutput(destFileName, DirectoryFactory.IOCONTEXT_NO_CACHE)) {
        ByteBuffer buffer = ByteBuffer.allocate(readBufferSizeBytes);
        while (readChannel.read(buffer) > 0) {
          buffer.flip();
          byte[] arr = buffer.array();
          output.writeBytes(arr, buffer.position(), buffer.limit() - buffer.position());
          buffer.clear();
        }
      }
    } catch (Exception e) {
      log.info("Here's an exception e", e);
    }
  }

  @Override
  public void close() throws IOException {}

  private void writeBlobMultipart(BlobInfo blobInfo, IndexInput indexInput, int blobSize)
      throws IOException {
    byte[] bytes = new byte[blobSize];
    if (shouldVerifyChecksum) {
      indexInput.readBytes(bytes, 0, blobSize - CodecUtil.footerLength());
      long checksum = CodecUtil.checkFooter((ChecksumIndexInput) indexInput);
      ByteBuffer footerBuffer =
          ByteBuffer.wrap(bytes, blobSize - CodecUtil.footerLength(), CodecUtil.footerLength());
      writeFooter(checksum, footerBuffer);
    } else {
      indexInput.readBytes(bytes, 0, blobSize);
    }
    try {
      storage.create(blobInfo, bytes, Storage.BlobTargetOption.doesNotExist());
    } catch (final StorageException se) {
      if (se.getCode() == HTTP_PRECON_FAILED) {
        throw new FileAlreadyExistsException(blobInfo.getBlobId().getName(), null, se.getMessage());
      }
      throw se;
    }
  }

  private void writeBlobResumable(BlobInfo blobInfo, IndexInput indexInput) throws IOException {
    try {
      final WriteChannel writeChannel = storage.writer(blobInfo, getDefaultBlobWriteOptions());

      ByteBuffer buffer = ByteBuffer.allocate(writeBufferSizeBytes);
      writeChannel.setChunkSize(writeBufferSizeBytes);

      long remain =
          shouldVerifyChecksum
              ? indexInput.length() - CodecUtil.footerLength()
              : indexInput.length();
      while (remain > 0) {
        // reading
        int byteReads = (int) Math.min(buffer.capacity(), remain);
        indexInput.readBytes(buffer.array(), 0, byteReads);
        buffer.position(byteReads);
        buffer.flip();

        // writing
        writeChannel.write(buffer);
        buffer.clear();
        remain -= byteReads;
      }
      if (shouldVerifyChecksum) {
        long checksum = CodecUtil.checkFooter((ChecksumIndexInput) indexInput);
        ByteBuffer bytes = getFooter(checksum);
        writeChannel.write(bytes);
      }
      writeChannel.close();
    } catch (final StorageException se) {
      if (se.getCode() == HTTP_PRECON_FAILED) {
        throw new FileAlreadyExistsException(blobInfo.getBlobId().getName(), null, se.getMessage());
      }
      throw se;
    }
  }

  private ByteBuffer getFooter(long checksum) throws IOException {
    ByteBuffer buffer = ByteBuffer.allocate(CodecUtil.footerLength());
    writeFooter(checksum, buffer);
    return buffer;
  }

  private void writeFooter(long checksum, ByteBuffer buffer) throws IOException {
    IndexOutput out =
        new IndexOutput("", "") {

          @Override
          public void writeByte(byte b) throws IOException {
            buffer.put(b);
          }

          @Override
          public void writeBytes(byte[] b, int offset, int length) throws IOException {
            buffer.put(b, offset, length);
          }

          @Override
          public void close() throws IOException {}

          @Override
          public long getFilePointer() {
            return 0;
          }

          @Override
          public long getChecksum() throws IOException {
            return checksum;
          }
        };
    CodecUtil.writeFooter(out);
    buffer.flip();
  }

  protected Storage.BlobWriteOption[] getDefaultBlobWriteOptions() {
    return NO_WRITE_OPTIONS;
  }

  private String appendTrailingSeparatorIfNecessary(String blobName) {
    if (!blobName.endsWith("/")) {
      return blobName + "/";
    }
    return blobName;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy