All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.spotify.styx.storage.BigtableStorage Maven / Gradle / Ivy

/*-
 * -\-\-
 * Spotify Styx Common
 * --
 * Copyright (C) 2016 Spotify AB
 * --
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * -/-/-
 */

package com.spotify.styx.storage;

import static com.spotify.styx.serialization.Json.deserializeEvent;
import static com.spotify.styx.serialization.Json.serialize;

import com.google.cloud.datastore.DatastoreException;
import com.google.common.base.Strings;
import com.google.common.base.Throwables;
import com.google.common.collect.Sets;
import com.spotify.styx.model.Event;
import com.spotify.styx.model.SequenceEvent;
import com.spotify.styx.model.WorkflowId;
import com.spotify.styx.model.WorkflowInstance;
import com.spotify.styx.model.data.WorkflowInstanceExecutionData;
import com.spotify.styx.util.ResourceNotFoundException;
import com.spotify.styx.util.RunnableWithException;
import java.io.IOException;
import java.time.Duration;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.stream.Collectors;
import okio.ByteString;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.ResultScanner;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A backend for {@link AggregateStorage} backed by Google Bigtable
 */
public class BigtableStorage {

  private static final Logger LOG = LoggerFactory.getLogger(BigtableStorage.class);

  public static final TableName EVENTS_TABLE_NAME = TableName.valueOf("styx_events");

  public static final byte[] EVENT_CF = Bytes.toBytes("event");
  public static final byte[] EVENT_QUALIFIER = Bytes.toBytes("event");

  public static final int MAX_BIGTABLE_RETRIES = 100;

  private final Connection connection;
  private final Duration retryBaseDelay;

  BigtableStorage(Connection connection, Duration retryBaseDelay) {
    this.connection = Objects.requireNonNull(connection);
    this.retryBaseDelay = Objects.requireNonNull(retryBaseDelay);
  }

  SortedSet readEvents(WorkflowInstance workflowInstance) throws IOException {
    try (final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME)) {
      final Scan scan = new Scan()
          .setRowPrefixFilter(Bytes.toBytes(workflowInstance.toKey() + '#'));

      final SortedSet set = newSortedEventSet();

      for (Result result : eventsTable.getScanner(scan)) {
        set.add(parseEventResult(result));
      }
      return set;
    }
  }

  void writeEvent(SequenceEvent sequenceEvent) throws IOException {
    storeWithRetries(() -> {
      try (final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME)) {
        final String workflowInstanceKey = sequenceEvent.event().workflowInstance().toKey();
        final String keyString = String.format("%s#%08d", workflowInstanceKey, sequenceEvent.counter());
        final byte[] key = Bytes.toBytes(keyString);
        final Put put = new Put(key, sequenceEvent.timestamp());

        final byte[] eventBytes = serialize(sequenceEvent.event()).toByteArray();
        put.addColumn(EVENT_CF, EVENT_QUALIFIER, eventBytes);
        eventsTable.put(put);
      }
    });
  }

  List executionData(WorkflowId workflowId, String offset, int limit)
      throws IOException {
    try (final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME)) {
      final Scan scan = new Scan()
          .setRowPrefixFilter(Bytes.toBytes(workflowId.toKey() + '#'))
          .setFilter(new FirstKeyOnlyFilter());

      if (!Strings.isNullOrEmpty(offset)) {
        final WorkflowInstance offsetInstance = WorkflowInstance.create(workflowId, offset);
        scan.setStartRow(Bytes.toBytes(offsetInstance.toKey() + '#'));
      }

      final Set workflowInstancesSet = Sets.newHashSet();
      try (ResultScanner scanner = eventsTable.getScanner(scan)) {
        Result result = scanner.next();
        while (result != null) {
          final String key = new String(result.getRow());
          final int lastHash = key.lastIndexOf('#');

          final WorkflowInstance wfi = WorkflowInstance.parseKey(key.substring(0, lastHash));
          workflowInstancesSet.add(wfi);
          if (workflowInstancesSet.size() == limit) {
            break;
          }

          result = scanner.next();
        }
      }

      return executionData(workflowInstancesSet);
    }
  }

  List executionData(WorkflowId workflowId, String start, String stop)
      throws IOException {
    try (final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME)) {
      final Scan scan = new Scan()
          .setRowPrefixFilter(Bytes.toBytes(workflowId.toKey() + '#'))
          .setFilter(new FirstKeyOnlyFilter());

      final WorkflowInstance startRow = WorkflowInstance.create(workflowId, start);
      scan.setStartRow(Bytes.toBytes(startRow.toKey() + '#'));

      if (!Strings.isNullOrEmpty(stop)) {
        final WorkflowInstance stopRow = WorkflowInstance.create(workflowId, stop);
        scan.setStopRow(Bytes.toBytes(stopRow.toKey() + '#'));
      }

      final Set workflowInstancesSet = Sets.newHashSet();
      try (ResultScanner scanner = eventsTable.getScanner(scan)) {
        Result result = scanner.next();
        while (result != null) {
          final String key = new String(result.getRow());
          final int lastHash = key.lastIndexOf('#');

          final WorkflowInstance wfi = WorkflowInstance.parseKey(key.substring(0, lastHash));
          workflowInstancesSet.add(wfi);

          result = scanner.next();
        }
      }

      return executionData(workflowInstancesSet);
    }
  }

  Optional getLatestStoredCounter(WorkflowInstance workflowInstance)
      throws IOException {
    final Set storedEvents = readEvents(workflowInstance);
    final Optional lastStoredEvent = storedEvents.stream().reduce((a, b) -> b);
    if (lastStoredEvent.isPresent()) {
      return Optional.of(lastStoredEvent.get().counter());
    } else {
      return Optional.empty();
    }
  }

  WorkflowInstanceExecutionData executionData(WorkflowInstance workflowInstance) throws IOException {
    SortedSet events = readEvents(workflowInstance);
    if (events.isEmpty()) {
      throw new IOException("Workflow instance not found");
    }

    return WorkflowInstanceExecutionData.fromEvents(events);
  }

  private List executionData(
      Set workflowInstancesSet) {
    return workflowInstancesSet.parallelStream()
        .map(workflowInstance -> {
          try {
            return executionData(workflowInstance);
          } catch (IOException e) {
            throw Throwables.propagate(e);
          }
        })
        .sorted(WorkflowInstanceExecutionData.COMPARATOR)
        .collect(Collectors.toList());
  }

  private SequenceEvent parseEventResult(Result r) throws IOException {
    final String key = new String(r.getRow());
    final long timestamp = r.getColumnLatestCell(EVENT_CF, EVENT_QUALIFIER).getTimestamp();
    final byte[] value = r.getValue(EVENT_CF, EVENT_QUALIFIER);
    final Event event = deserializeEvent(ByteString.of(value));
    return SequenceEvent.parseKey(key, event, timestamp);
  }

  private void storeWithRetries(RunnableWithException storingOperation) throws IOException {
    int storeRetries = 0;
    boolean succeeded = false;

    while (storeRetries < MAX_BIGTABLE_RETRIES && !succeeded) {
      try {
        storingOperation.run();
        succeeded = true;
      } catch (ResourceNotFoundException e) {
        throw e;
      } catch (DatastoreException | IOException e) {
        storeRetries++;
        if (storeRetries == MAX_BIGTABLE_RETRIES) {
          throw e;
        }
        LOG.warn(String.format("Failed to read/write from/to Bigtable (attempt #%d)", storeRetries), e);
        try {
          Thread.sleep(retryBaseDelay.toMillis());
        } catch (InterruptedException e1) {
          throw Throwables.propagate(e1);
        }
      }
    }
  }

  private static TreeSet newSortedEventSet() {
    return Sets.newTreeSet(SequenceEvent.COUNTER_COMPARATOR);
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy