com.spotify.styx.storage.BigtableStorage Maven / Gradle / Ivy
/*
* -\-\-
* Spotify Styx Common
* --
* Copyright (C) 2016 Spotify AB
* --
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
* -/-/-
*/
package com.spotify.styx.storage;
import com.google.cloud.datastore.DatastoreException;
import com.google.common.base.Throwables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.spotify.styx.model.Event;
import com.spotify.styx.model.EventSerializer;
import com.spotify.styx.model.ExecutionStatus;
import com.spotify.styx.model.SequenceEvent;
import com.spotify.styx.model.WorkflowExecutionInfo;
import com.spotify.styx.model.WorkflowId;
import com.spotify.styx.model.WorkflowInstance;
import com.spotify.styx.model.WorkflowInstanceExecutionData;
import com.spotify.styx.util.ResourceNotFoundException;
import com.spotify.styx.util.RunnableWithException;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.Connection;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.client.Scan;
import org.apache.hadoop.hbase.client.Table;
import org.apache.hadoop.hbase.filter.FirstKeyOnlyFilter;
import org.apache.hadoop.hbase.util.Bytes;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.time.Duration;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import okio.ByteString;
/**
* A backend for {@link AggregateStorage} backed by Google Bigtable
*/
public class BigtableStorage {
private static final Logger LOG = LoggerFactory.getLogger(BigtableStorage.class);
// todo: remove when not used from API
public static final TableName EXECUTION_INFO_TABLE_NAME = TableName.valueOf("execution_info");
public static final TableName EVENTS_TABLE_NAME = TableName.valueOf("styx_events");
public static final byte[] INFO_CF = Bytes.toBytes("info");
public static final byte[] STATUS_QUALIFIER = Bytes.toBytes("status");
public static final byte[] PODNAME_QUALIFIER = Bytes.toBytes("pod_name"); // for backwards compatibility
public static final byte[] EXECUTION_ID_QUALIFIER = Bytes.toBytes("execution_id");
public static final byte[] EVENT_CF = Bytes.toBytes("event");
public static final byte[] EVENT_QUALIFIER = Bytes.toBytes("event");
public static final int MAX_BIGTABLE_RETRIES = 100;
private final Connection connection;
private final Duration retryBaseDelay;
private final EventSerializer eventSerializer = new EventSerializer();
BigtableStorage(Connection connection, Duration retryBaseDelay) {
this.connection = Objects.requireNonNull(connection);
this.retryBaseDelay = Objects.requireNonNull(retryBaseDelay);
}
SortedSet readEvents(WorkflowInstance workflowInstance) throws IOException {
final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME);
final Scan scan = new Scan()
.setRowPrefixFilter(Bytes.toBytes(workflowInstance.toKey() + '#'));
final SortedSet set = newSortedEventSet();
for (Result result : eventsTable.getScanner(scan)) {
set.add(parseEventResult(result));
}
return set;
}
void writeEvent(SequenceEvent sequenceEvent) throws IOException {
storeWithRetries(() -> {
final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME);
final String workflowInstanceKey = sequenceEvent.event().workflowInstance().toKey();
final String
keyString =
String.format("%s#%08d", workflowInstanceKey, sequenceEvent.counter());
final byte[] key = Bytes.toBytes(keyString);
final Put put = new Put(key, sequenceEvent.timestamp());
final byte[] eventBytes = eventSerializer.convert(sequenceEvent.event()).toByteArray();
put.addColumn(EVENT_CF, EVENT_QUALIFIER, eventBytes);
eventsTable.put(put);
});
}
List executionData(WorkflowId workflowId)
throws IOException {
final Table eventsTable = connection.getTable(EVENTS_TABLE_NAME);
final Scan scan = new Scan()
.setRowPrefixFilter(Bytes.toBytes(workflowId.toKey() + '#'))
.setFilter(new FirstKeyOnlyFilter());
final Set workflowInstancesSet = Sets.newHashSet();
for (Result result : eventsTable.getScanner(scan)) {
final String key = new String(result.getRow());
final int lastHash = key.lastIndexOf('#');
final WorkflowInstance wfi = WorkflowInstance.parseKey(key.substring(0, lastHash));
workflowInstancesSet.add(wfi);
}
final List workflowInstanceDataList = Lists.newArrayList();
for (WorkflowInstance workflowInstance : workflowInstancesSet) {
workflowInstanceDataList.add(executionData(workflowInstance));
}
workflowInstanceDataList.sort(WorkflowInstanceExecutionData.COMPARATOR);
return workflowInstanceDataList;
}
Optional getLatestStoredCounter(WorkflowInstance workflowInstance)
throws IOException {
final Set storedEvents = readEvents(workflowInstance);
final Optional lastStoredEvent = storedEvents.stream().reduce((a, b) -> b);
if (lastStoredEvent.isPresent()) {
return Optional.of(lastStoredEvent.get().counter());
} else {
return Optional.empty();
}
}
WorkflowInstanceExecutionData executionData(WorkflowInstance workflowInstance) throws IOException {
SortedSet events = readEvents(workflowInstance);
if (events.isEmpty()) {
throw new IOException("Workflow instance not found");
}
return WorkflowInstanceExecutionData.fromEvents(events);
}
void store(WorkflowExecutionInfo workflowExecutionInfo) throws IOException {
final Table execInfo = connection.getTable(EXECUTION_INFO_TABLE_NAME);
final byte[] key = Bytes.toBytes(workflowExecutionInfo.toKey());
final Put put = new Put(key);
put.addColumn(INFO_CF, STATUS_QUALIFIER, Bytes.toBytes(workflowExecutionInfo.executionStatus().toString()));
if (workflowExecutionInfo.executionId().isPresent()) {
put.addColumn(INFO_CF, EXECUTION_ID_QUALIFIER, Bytes.toBytes(workflowExecutionInfo.executionId().get()));
}
execInfo.put(put);
}
Map> getExecutionInfo(WorkflowId workflowId) throws
IOException {
final Table execInfo = connection.getTable(EXECUTION_INFO_TABLE_NAME);
final Scan scan = new Scan()
.setRowPrefixFilter(Bytes.toBytes(workflowId.toKey() + '#'));
final Map> map = new HashMap<>();
for (Result r : execInfo.getScanner(scan)) {
final WorkflowExecutionInfo workflowExecutionInfo = parseExecutionInfoResult(r);
final WorkflowInstance workflowInstance = workflowExecutionInfo.workflowInstance();
map.computeIfAbsent(workflowInstance, (ignore) -> Lists.newArrayList())
.add(workflowExecutionInfo);
}
map.forEach((key, list) -> list.sort(WorkflowExecutionInfo.WHEN_COMPARATOR));
return map;
}
List getExecutionInfo(WorkflowInstance workflowInstance)
throws IOException {
final Table execInfo = connection.getTable(EXECUTION_INFO_TABLE_NAME);
final Scan scan = new Scan()
.setRowPrefixFilter(Bytes.toBytes(workflowInstance.toKey() + '#'));
final List executionInfos = Lists.newArrayList();
for (Result r : execInfo.getScanner(scan)) {
final WorkflowExecutionInfo workflowExecutionInfo = parseExecutionInfoResult(r);
executionInfos.add(workflowExecutionInfo);
}
executionInfos.sort(WorkflowExecutionInfo.WHEN_COMPARATOR);
return executionInfos;
}
private WorkflowExecutionInfo parseExecutionInfoResult(Result r) throws IOException {
final String key = new String(r.getRow());
final byte[] statusValue = r.getValue(INFO_CF, STATUS_QUALIFIER);
final byte[] executionIdValue = r.getValue(INFO_CF, EXECUTION_ID_QUALIFIER);
final byte[] podNameValue = r.getValue(INFO_CF, PODNAME_QUALIFIER); // for backwards compatibility
final String status = statusValue == null ? "" : new String(statusValue);
final String executionId;
if (executionIdValue != null) {
executionId = new String(executionIdValue);
} else if (podNameValue != null) {
executionId = new String(podNameValue);
} else {
executionId = "";
}
final ExecutionStatus executionStatus = ExecutionStatus.valueOf(status);
final WorkflowExecutionInfo workflowExecutionInfo;
try {
workflowExecutionInfo = WorkflowExecutionInfo.parseKey(key, executionStatus, executionId);
} catch (Throwable t) {
throw new IOException("Failed to parse execution info key: " + key + ". " + t.getMessage(), t);
}
return workflowExecutionInfo;
}
private SequenceEvent parseEventResult(Result r) throws IOException {
final String key = new String(r.getRow());
final long timestamp = r.getColumnLatestCell(EVENT_CF, EVENT_QUALIFIER).getTimestamp();
final byte[] value = r.getValue(EVENT_CF, EVENT_QUALIFIER);
final Event event = eventSerializer.convert(ByteString.of(value));
return SequenceEvent.parseKey(key, event, timestamp);
}
private void storeWithRetries(RunnableWithException storingOperation) throws IOException {
int storeRetries = 0;
boolean succeeded = false;
while (storeRetries < MAX_BIGTABLE_RETRIES && !succeeded) {
try {
storingOperation.run();
succeeded = true;
} catch (ResourceNotFoundException e) {
throw e;
} catch (DatastoreException | IOException e) {
storeRetries++;
if (storeRetries == MAX_BIGTABLE_RETRIES) {
throw e;
}
LOG.warn(String.format("Failed to read/write from/to Bigtable (attempt #%d)", storeRetries), e);
try {
Thread.sleep(retryBaseDelay.toMillis());
} catch (InterruptedException e1) {
throw Throwables.propagate(e1);
}
}
}
}
private static TreeSet newSortedEventSet() {
return Sets.newTreeSet(SequenceEvent.COUNTER_COMPARATOR);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy