io.datakernel.crdt.local.CrdtStorageFs Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of datakernel-crdt Show documentation
Show all versions of datakernel-crdt Show documentation
Conflict-free replicated data type implementation for DataKernel Framework.
/*
* Copyright (C) 2015-2019 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.crdt.local;
import io.datakernel.async.service.EventloopService;
import io.datakernel.bytebuf.ByteBuf;
import io.datakernel.bytebuf.ByteBufQueue;
import io.datakernel.common.Initializable;
import io.datakernel.crdt.*;
import io.datakernel.crdt.primitives.CrdtType;
import io.datakernel.csp.ChannelConsumer;
import io.datakernel.csp.ChannelSupplier;
import io.datakernel.datastream.StreamConsumer;
import io.datakernel.datastream.StreamDataAcceptor;
import io.datakernel.datastream.StreamSupplier;
import io.datakernel.datastream.csp.ChannelDeserializer;
import io.datakernel.datastream.csp.ChannelSerializer;
import io.datakernel.datastream.processor.StreamFilter;
import io.datakernel.datastream.processor.StreamMapper;
import io.datakernel.datastream.processor.StreamReducerSimple;
import io.datakernel.datastream.processor.StreamReducers;
import io.datakernel.datastream.stats.StreamStats;
import io.datakernel.datastream.stats.StreamStatsBasic;
import io.datakernel.datastream.stats.StreamStatsDetailed;
import io.datakernel.eventloop.Eventloop;
import io.datakernel.eventloop.jmx.EventloopJmxMBeanEx;
import io.datakernel.jmx.api.JmxAttribute;
import io.datakernel.jmx.api.JmxOperation;
import io.datakernel.promise.Promise;
import io.datakernel.promise.Promises;
import io.datakernel.promise.jmx.PromiseStats;
import io.datakernel.remotefs.FileMetadata;
import io.datakernel.remotefs.FsClient;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.util.*;
import java.util.function.Function;
import java.util.stream.Stream;
import static java.nio.charset.StandardCharsets.UTF_8;
import static java.util.stream.Collectors.toList;
public final class CrdtStorageFs, S> implements CrdtStorage,
Initializable>, EventloopService, EventloopJmxMBeanEx {
private static final Logger logger = LoggerFactory.getLogger(CrdtStorageFs.class);
private final Eventloop eventloop;
private final FsClient client;
private final CrdtFunction function;
private final CrdtDataSerializer serializer;
private Function namingStrategy = ext -> UUID.randomUUID().toString() + "." + ext;
private Duration consolidationMargin = Duration.ofMinutes(30);
private FsClient consolidationFolderClient;
private FsClient tombstoneFolderClient;
private CrdtFilter filter = $ -> true;
// region JMX
private boolean detailedStats;
private final StreamStatsBasic> uploadStats = StreamStats.basic();
private final StreamStatsDetailed> uploadStatsDetailed = StreamStats.detailed();
private final StreamStatsBasic> downloadStats = StreamStats.basic();
private final StreamStatsDetailed> downloadStatsDetailed = StreamStats.detailed();
private final StreamStatsBasic removeStats = StreamStats.basic();
private final StreamStatsDetailed removeStatsDetailed = StreamStats.detailed();
private final PromiseStats consolidationStats = PromiseStats.create(Duration.ofMinutes(5));
// endregion
// region creators
private CrdtStorageFs(
Eventloop eventloop,
FsClient client,
FsClient consolidationFolderClient, FsClient tombstoneFolderClient, CrdtDataSerializer serializer, CrdtFunction function
) {
this.eventloop = eventloop;
this.client = client;
this.function = function;
this.serializer = serializer;
this.consolidationFolderClient = consolidationFolderClient;
this.tombstoneFolderClient = tombstoneFolderClient;
}
public static , S> CrdtStorageFs create(
Eventloop eventloop, FsClient client,
CrdtDataSerializer serializer,
CrdtFunction function
) {
return new CrdtStorageFs<>(eventloop, client, client.subfolder(".consolidation"), client.subfolder(".tombstones"), serializer, function);
}
public static , S extends CrdtType> CrdtStorageFs create(
Eventloop eventloop, FsClient client,
CrdtDataSerializer serializer
) {
return new CrdtStorageFs<>(eventloop, client, client.subfolder(".consolidation"), client.subfolder(".tombstones"), serializer, CrdtFunction.ofCrdtType());
}
public CrdtStorageFs withConsolidationMargin(Duration consolidationMargin) {
this.consolidationMargin = consolidationMargin;
return this;
}
public CrdtStorageFs withNamingStrategy(Function namingStrategy) {
this.namingStrategy = namingStrategy;
return this;
}
public CrdtStorageFs withConsolidationFolder(String subfolder) {
consolidationFolderClient = client.subfolder(subfolder);
return this;
}
public CrdtStorageFs withTombstoneFolder(String subfolder) {
tombstoneFolderClient = client.subfolder(subfolder);
return this;
}
public CrdtStorageFs withConsolidationFolderClient(FsClient consolidationFolderClient) {
this.consolidationFolderClient = consolidationFolderClient;
return this;
}
public CrdtStorageFs withFilter(CrdtFilter filter) {
this.filter = filter;
return this;
}
public CrdtStorageFs withTombstoneFolderClient(FsClient tombstoneFolderClient) {
this.tombstoneFolderClient = tombstoneFolderClient;
return this;
}
// endregion
@NotNull
@Override
public Eventloop getEventloop() {
return eventloop;
}
@Override
public Promise>> upload() {
return client.upload(namingStrategy.apply("bin"))
.map(consumer -> StreamConsumer.>ofSupplier(supplier -> supplier
.transformWith(detailedStats ? uploadStatsDetailed : uploadStats)
.transformWith(ChannelSerializer.create(serializer))
.streamTo(consumer))
.withLateBinding());
}
@Override
public Promise>> download(long timestamp) {
return Promises.toTuple(client.list("*"), tombstoneFolderClient.list("*"))
.map(f -> {
StreamReducerSimple, CrdtData, CrdtAccumulator> reducer =
StreamReducerSimple.create(x -> x.key, Comparator.naturalOrder(), new CrdtReducer());
Stream stream = f.getValue1().stream();
Stream> files = (timestamp == 0 ? stream : stream.filter(m -> m.getTimestamp() >= timestamp))
.map(meta -> ChannelSupplier.ofPromise(client.download(meta.getName()))
.transformWith(ChannelDeserializer.create(serializer))
.transformWith(StreamMapper.create(data -> {
S partial = function.extract(data.getState(), timestamp);
return partial != null ? new CrdtReducingData<>(data.getKey(), partial, meta.getTimestamp()) : null;
}))
.transformWith(StreamFilter.create(Objects::nonNull))
.streamTo(reducer.newInput()));
stream = f.getValue2().stream();
Stream> tombstones = (timestamp == 0 ? stream : stream.filter(m -> m.getTimestamp() >= timestamp))
.map(meta -> ChannelSupplier.ofPromise(tombstoneFolderClient.download(meta.getName()))
.transformWith(ChannelDeserializer.create(serializer.getKeySerializer()))
.transformWith(StreamMapper.create(key -> new CrdtReducingData<>(key, (S) null, meta.getTimestamp())))
.streamTo(reducer.newInput()));
Promise process = Promises.all(Stream.concat(files, tombstones));
return reducer.getOutput()
.transformWith(detailedStats ? downloadStatsDetailed : downloadStats)
.withLateBinding();
});
}
@Override
public Promise> remove() {
return tombstoneFolderClient.upload(namingStrategy.apply("tomb"))
.map(consumer -> StreamConsumer.ofSupplier(supplier -> supplier
.transformWith(detailedStats ? removeStatsDetailed : removeStats)
.transformWith(ChannelSerializer.create(serializer.getKeySerializer()))
.streamTo(consumer))
.withLateBinding());
}
@Override
public Promise ping() {
return client.ping();
}
@NotNull
@Override
public Promise start() {
return Promise.complete();
}
@NotNull
@Override
public Promise stop() {
return Promise.complete();
}
public Promise consolidate() {
long barrier = eventloop.currentInstant().minus(consolidationMargin).toEpochMilli();
Set blacklist = new HashSet<>();
return consolidationFolderClient.list("*")
.then(list ->
Promises.all(list.stream()
.filter(meta -> meta.getTimestamp() > barrier)
.map(meta -> ChannelSupplier.ofPromise(client.download(meta.getName()))
.toCollector(ByteBufQueue.collector())
.whenResult(byteBuf -> blacklist.addAll(Arrays.asList(byteBuf.asString(UTF_8).split("\n"))))
.toVoid())))
.then($ -> client.list("*"))
.then(list -> {
String name = namingStrategy.apply("bin");
List files = list.stream()
.map(FileMetadata::getName)
.filter(fileName -> !blacklist.contains(fileName))
.collect(toList());
String dump = String.join("\n", files);
logger.info("started consolidating into {} from {}", name, files);
String metafile = namingStrategy.apply("dump");
return consolidationFolderClient.upload(metafile)
.then(consumer ->
ChannelSupplier.of(ByteBuf.wrapForReading(dump.getBytes(UTF_8)))
.streamTo(consumer))
.then($ -> download())
.then(producer -> producer
.transformWith(ChannelSerializer.create(serializer))
.streamTo(ChannelConsumer.ofPromise(client.upload(name))))
.then($ -> tombstoneFolderClient.list("*")
.map(fileList -> Promises.sequence(fileList.stream()
.map(file -> () -> tombstoneFolderClient.delete(file.getName()))))
)
.then($ -> consolidationFolderClient.delete(metafile))
.then($ -> Promises.all(files.stream().map(client::delete)));
})
.whenComplete(consolidationStats.recordStats());
}
static class CrdtReducingData, S> {
final K key;
@Nullable
final S state;
final long timestamp;
CrdtReducingData(K key, @Nullable S state, long timestamp) {
this.key = key;
this.state = state;
this.timestamp = timestamp;
}
}
static class CrdtAccumulator {
@Nullable
S state;
long maxAppendTimestamp;
long maxRemoveTimestamp;
CrdtAccumulator(@Nullable S state, long maxAppendTimestamp, long maxRemoveTimestamp) {
this.state = state;
this.maxAppendTimestamp = maxAppendTimestamp;
this.maxRemoveTimestamp = maxRemoveTimestamp;
}
}
class CrdtReducer implements StreamReducers.Reducer, CrdtData, CrdtAccumulator> {
@Override
public CrdtAccumulator onFirstItem(StreamDataAcceptor> stream, K key, CrdtReducingData firstValue) {
if (firstValue.state != null) {
return new CrdtAccumulator<>(firstValue.state, firstValue.timestamp, 0);
}
return new CrdtAccumulator<>(null, 0, firstValue.timestamp);
}
@Override
public CrdtAccumulator onNextItem(StreamDataAcceptor> stream, K key, CrdtReducingData nextValue, CrdtAccumulator accumulator) {
if (nextValue.state != null) {
accumulator.state = accumulator.state != null ? function.merge(accumulator.state, nextValue.state) : nextValue.state;
if (nextValue.timestamp > accumulator.maxAppendTimestamp) {
accumulator.maxAppendTimestamp = nextValue.timestamp;
}
} else if (nextValue.timestamp > accumulator.maxRemoveTimestamp) {
accumulator.maxRemoveTimestamp = nextValue.timestamp;
}
return accumulator;
}
@Override
public void onComplete(StreamDataAcceptor> stream, K key, CrdtAccumulator accumulator) {
if (accumulator.state != null
&& accumulator.maxRemoveTimestamp < accumulator.maxAppendTimestamp
&& filter.test(accumulator.state)) {
stream.accept(new CrdtData<>(key, accumulator.state));
}
}
}
// region JMX
@JmxOperation
public void startDetailedMonitoring() {
detailedStats = true;
}
@JmxOperation
public void stopDetailedMonitoring() {
detailedStats = false;
}
@JmxAttribute
public StreamStatsBasic getUploadStats() {
return uploadStats;
}
@JmxAttribute
public StreamStatsDetailed getUploadStatsDetailed() {
return uploadStatsDetailed;
}
@JmxAttribute
public StreamStatsBasic getDownloadStats() {
return downloadStats;
}
@JmxAttribute
public StreamStatsDetailed getDownloadStatsDetailed() {
return downloadStatsDetailed;
}
@JmxAttribute
public StreamStatsBasic getRemoveStats() {
return removeStats;
}
@JmxAttribute
public StreamStatsDetailed getRemoveStatsDetailed() {
return removeStatsDetailed;
}
@JmxAttribute
public PromiseStats getConsolidationStats() {
return consolidationStats;
}
// endregion
}