All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.druid.storage.google.GoogleTaskLogs Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.druid.storage.google;

import com.google.api.client.http.InputStreamContent;
import com.google.common.base.Optional;
import com.google.inject.Inject;
import org.apache.druid.common.utils.CurrentTimeMillisSupplier;
import org.apache.druid.java.util.common.IOE;
import org.apache.druid.java.util.common.RE;
import org.apache.druid.java.util.common.RetryUtils;
import org.apache.druid.java.util.common.logger.Logger;
import org.apache.druid.tasklogs.TaskLogs;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.nio.file.Files;
import java.util.Date;

public class GoogleTaskLogs implements TaskLogs
{
  private static final Logger LOG = new Logger(GoogleTaskLogs.class);

  /**
   * Use 1MB upload buffer, rather than the default of 15 MB in the API client. Mainly because MMs may upload logs
   * in parallel, and typically have small heaps. The default-sized 15 MB buffers add up quickly.
   */
  static final int UPLOAD_BUFFER_SIZE = 1024 * 1024;

  private final GoogleTaskLogsConfig config;
  private final GoogleStorage storage;
  private final GoogleInputDataConfig inputDataConfig;
  private final CurrentTimeMillisSupplier timeSupplier;

  @Inject
  public GoogleTaskLogs(
      GoogleTaskLogsConfig config,
      GoogleStorage storage,
      GoogleInputDataConfig inputDataConfig,
      CurrentTimeMillisSupplier timeSupplier
  )
  {
    this.config = config;
    this.storage = storage;
    this.inputDataConfig = inputDataConfig;
    this.timeSupplier = timeSupplier;
  }

  @Override
  public void pushTaskLog(final String taskid, final File logFile) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid);
    LOG.info("Pushing task log %s to: %s", logFile, taskKey);
    pushTaskFile(logFile, taskKey);
  }

  @Override
  public void pushTaskReports(String taskid, File reportFile) throws IOException
  {
    final String taskKey = getTaskReportKey(taskid);
    LOG.info("Pushing task reports %s to: %s", reportFile, taskKey);
    pushTaskFile(reportFile, taskKey);
  }

  @Override
  public void pushTaskStatus(String taskid, File statusFile) throws IOException
  {
    final String taskKey = getTaskStatusKey(taskid);
    LOG.info("Pushing task status %s to: %s", statusFile, taskKey);
    pushTaskFile(statusFile, taskKey);
  }

  private void pushTaskFile(final File logFile, final String taskKey) throws IOException
  {
    try (final InputStream fileStream = Files.newInputStream(logFile.toPath())) {

      InputStreamContent mediaContent = new InputStreamContent("text/plain", fileStream);
      mediaContent.setLength(logFile.length());

      try {
        RetryUtils.retry(
            (RetryUtils.Task) () -> {
              storage.insert(config.getBucket(), taskKey, mediaContent, UPLOAD_BUFFER_SIZE);
              return null;
            },
            GoogleUtils::isRetryable,
            1,
            5
        );
      }
      catch (IOException e) {
        throw e;
      }
      catch (Exception e) {
        throw new RE(e, "Failed to upload [%s] to [%s]", logFile, taskKey);
      }
    }
  }

  @Override
  public Optional streamTaskLog(final String taskid, final long offset) throws IOException
  {
    final String taskKey = getTaskLogKey(taskid);
    return streamTaskFile(taskid, offset, taskKey);
  }

  @Override
  public Optional streamTaskReports(String taskid) throws IOException
  {
    final String taskKey = getTaskReportKey(taskid);
    return streamTaskFile(taskid, 0, taskKey);
  }

  @Override
  public Optional streamTaskStatus(String taskid) throws IOException
  {
    final String taskKey = getTaskStatusKey(taskid);
    return streamTaskFile(taskid, 0, taskKey);
  }

  private Optional streamTaskFile(final String taskid, final long offset, String taskKey)
      throws IOException
  {
    try {
      if (!storage.exists(config.getBucket(), taskKey)) {
        return Optional.absent();
      }

      final long length = storage.size(config.getBucket(), taskKey);
      try {
        final long start;

        if (offset > 0 && offset < length) {
          start = offset;
        } else if (offset < 0 && (-1 * offset) < length) {
          start = length + offset;
        } else {
          start = 0;
        }

        return Optional.of(new GoogleByteSource(storage, config.getBucket(), taskKey).openStream(start));
      }
      catch (Exception e) {
        throw new IOException(e);
      }
    }
    catch (IOException e) {
      throw new IOE(e, "Failed to stream logs from: %s", taskKey);
    }
  }

  private String getTaskLogKey(String taskid)
  {
    return config.getPrefix() + "/" + taskid.replace(':', '_');
  }

  private String getTaskReportKey(String taskid)
  {
    return config.getPrefix() + "/" + taskid.replace(':', '_') + ".report.json";
  }

  private String getTaskStatusKey(String taskid)
  {
    return config.getPrefix() + "/" + taskid.replace(':', '_') + ".status.json";
  }

  @Override
  public void killAll() throws IOException
  {
    LOG.info(
        "Deleting all task logs from gs location [bucket: '%s' prefix: '%s'].",
        config.getBucket(),
        config.getPrefix()
    );

    long now = timeSupplier.getAsLong();
    killOlderThan(now);
  }

  @Override
  public void killOlderThan(long timestampMs) throws IOException
  {
    LOG.info(
        "Deleting all task logs from gs location [bucket: '%s' prefix: '%s'] older than %s.",
        config.getBucket(),
        config.getPrefix(),
        new Date(timestampMs)
    );
    try {
      GoogleUtils.deleteObjectsInPath(
          storage,
          inputDataConfig,
          config.getBucket(),
          config.getPrefix(),
          (object) -> object.getLastUpdateTimeMillis() < timestampMs
      );
    }
    catch (Exception e) {
      LOG.error("Error occurred while deleting task log files from gs. Error: %s", e.getMessage());
      throw new IOException(e);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy