All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.storage.s3.S3TaskLogs Maven / Gradle / Ivy

There is a newer version: 31.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.storage.s3;

import com.amazonaws.services.s3.model.AmazonS3Exception;
import com.amazonaws.services.s3.model.GetObjectRequest;
import com.amazonaws.services.s3.model.ObjectMetadata;
import com.google.common.base.Optional;
import com.google.common.base.Throwables;
import com.google.inject.Inject;
import org.apache.druid.common.utils.CurrentTimeMillisSupplier;
import org.apache.druid.java.util.common.IOE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.tasklogs.TaskLogs;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.Date;

/**
 * Provides task logs archived on S3.
 */
public class S3TaskLogs implements TaskLogs
{
  private static final Logger log = new Logger(S3TaskLogs.class);

  private final ServerSideEncryptingAmazonS3 service;
  private final S3TaskLogsConfig config;
  private final S3InputDataConfig inputDataConfig;
  private final CurrentTimeMillisSupplier timeSupplier;

  @Inject
  public S3TaskLogs(
      ServerSideEncryptingAmazonS3 service,
      S3TaskLogsConfig config,
      S3InputDataConfig inputDataConfig,
      CurrentTimeMillisSupplier timeSupplier
  )
  {
    this.service = service;
    this.config = config;
    this.inputDataConfig = inputDataConfig;
    this.timeSupplier = timeSupplier;
  }

  @Override
  public Optional streamTaskLog(final String taskid, final long offset) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "log");
    return streamTaskFileWithRetry(offset, taskKey);
  }

  @Override
  public Optional streamTaskReports(String taskid) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "report.json");
    return streamTaskFileWithRetry(0, taskKey);
  }

  @Override
  public Optional streamTaskStatus(String taskid) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "status.json");
    return streamTaskFileWithRetry(0, taskKey);
  }

  @Override
  public void pushTaskPayload(String taskid, File taskPayloadFile) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "task.json");
    log.info("Pushing task payload [%s] to location [%s]", taskPayloadFile, taskKey);
    pushTaskFile(taskPayloadFile, taskKey);
  }

  @Override
  public Optional streamTaskPayload(String taskid) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "task.json");
    return streamTaskFileWithRetry(0, taskKey);
  }

  /**
   * Using the retry conditions defined in {@link S3Utils#S3RETRY}.
   */
  private Optional streamTaskFileWithRetry(final long offset, String taskKey) throws IOException
  {
    try {
      return S3Utils.retryS3Operation(() -> streamTaskFile(offset, taskKey));
    }
    catch (Exception e) {
      throw new IOE(e, "Failed to stream logs for task[%s] starting at offset[%d]", taskKey, offset);
    }
  }

  private Optional streamTaskFile(final long offset, String taskKey)
  {
    try {
      final ObjectMetadata objectMetadata = service.getObjectMetadata(config.getS3Bucket(), taskKey);

      final long start;
      final long end = objectMetadata.getContentLength() - 1;

      long contentLength = objectMetadata.getContentLength();
      if (offset >= contentLength || offset <= -contentLength) {
        start = 0;
      } else if (offset >= 0) {
        start = offset;
      } else {
        start = contentLength + offset;
      }

      final GetObjectRequest request = new GetObjectRequest(config.getS3Bucket(), taskKey)
          .withMatchingETagConstraint(ensureQuotated(objectMetadata.getETag()))
          .withRange(start, end);

      return Optional.of(service.getObject(request).getObjectContent());
    }
    catch (AmazonS3Exception e) {
      if (404 == e.getStatusCode()
          || "NoSuchKey".equals(e.getErrorCode())
          || "NoSuchBucket".equals(e.getErrorCode())) {
        return Optional.absent();
      } else {
        throw e;
      }
    }
  }

  static String ensureQuotated(String eTag)
  {
    if (eTag != null) {
      if (!eTag.startsWith("\"") && !eTag.endsWith("\"")) {
        return "\"" + eTag + "\"";
      }
    }
    return eTag;
  }

  @Override
  public void pushTaskLog(final String taskid, final File logFile) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "log");
    log.info("Pushing task log %s to: %s", logFile, taskKey);
    pushTaskFile(logFile, taskKey);
  }

  @Override
  public void pushTaskReports(String taskid, File reportFile) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "report.json");
    log.info("Pushing task reports %s to: %s", reportFile, taskKey);
    pushTaskFile(reportFile, taskKey);
  }

  @Override
  public void pushTaskStatus(String taskid, File statusFile) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid, "status.json");
    log.info("Pushing task status %s to: %s", statusFile, taskKey);
    pushTaskFile(statusFile, taskKey);
  }

  private void pushTaskFile(final File logFile, String taskKey) throws IOException
  {
    try {
      S3Utils.retryS3Operation(
          () -> {
            S3Utils.uploadFileIfPossible(service, config.getDisableAcl(), config.getS3Bucket(), taskKey, logFile);
            return null;
          }
      );
    }
    catch (Exception e) {
      Throwables.propagateIfInstanceOf(e, IOException.class);
      throw new RuntimeException(e);
    }
  }

  String getTaskLogKey(String taskid, String filename)
  {
    return StringUtils.format("%s/%s/%s", config.getS3Prefix(), taskid, filename);
  }

  @Override
  public void killAll() throws IOException
  {
    log.info(
        "Deleting all task logs from s3 location [bucket: '%s' prefix: '%s'].",
        config.getS3Bucket(),
        config.getS3Prefix()
    );

    long now = timeSupplier.getAsLong();
    killOlderThan(now);
  }

  @Override
  public void killOlderThan(long timestamp) throws IOException
  {
    log.info(
        "Deleting all task logs from s3 location [bucket: '%s' prefix: '%s'] older than %s.",
        config.getS3Bucket(),
        config.getS3Prefix(),
        new Date(timestamp)
    );
    try {
      S3Utils.deleteObjectsInPath(
          service,
          inputDataConfig.getMaxListingLength(),
          config.getS3Bucket(),
          config.getS3Prefix(),
          (object) -> object.getLastModified().getTime() < timestamp
      );
    }
    catch (Exception e) {
      log.error("Error occurred while deleting task log files from s3. Error: %s", e.getMessage());
      throw new IOException(e);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy