All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.uid2.shared.optout.OptOutCloudSync Maven / Gradle / Ivy
package com.uid2.shared.optout;
import com.uid2.shared.Const;
import com.uid2.shared.Utils;
import com.uid2.shared.cloud.CloudStorageException;
import com.uid2.shared.cloud.CloudUtils;
import com.uid2.shared.cloud.ICloudStorage;
import com.uid2.shared.vertx.ICloudSync;
import io.vertx.core.Vertx;
import io.vertx.core.json.JsonObject;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.net.MalformedURLException;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.time.Instant;
import java.time.temporal.ChronoUnit;
import java.util.*;
import java.util.concurrent.atomic.AtomicReference;
import java.util.function.Consumer;
import java.util.function.Function;
import java.util.stream.Collectors;
public class OptOutCloudSync implements ICloudSync {
private static final Logger LOGGER = LoggerFactory.getLogger(OptOutCloudSync.class);
private final boolean fullSync;
private final String cloudFolder;
private final String deltaConsumerDir;
private final String partitionConsumerDir;
private final String cloudPartitionFolder;
private final String cloudDeltaRootFolder;
private final String cloudSyntheticFolder;
private final int deltaBacktrackInDays;
private final int replicaId;
private final int maxReplicas;
private final FileUtils fileUtils;
private final boolean syntheticLogsEnabled;
private final int syntheticLogsCount;
private Vertx vertx = null;
private String eventMergeDelta = null;
private Consumer> handlerIndexUpdate = null;
private AtomicReference>>> handlersNewCloudPaths = new AtomicReference<>(new ArrayList<>());
public OptOutCloudSync(JsonObject jsonConfig, boolean fullSync) {
this.fullSync = fullSync;
this.cloudFolder = CloudUtils.normalizDirPath(jsonConfig.getString(Const.Config.OptOutS3FolderProp));
this.deltaConsumerDir = OptOutUtils.getDeltaConsumerDir(jsonConfig);
this.partitionConsumerDir = OptOutUtils.getPartitionConsumerDir(jsonConfig);
assert cloudFolder != null && deltaConsumerDir != null && partitionConsumerDir != null;
this.cloudPartitionFolder = this.cloudFolder + "partition/";
this.cloudDeltaRootFolder = this.cloudFolder + "delta/";
this.fileUtils = new FileUtils(jsonConfig);
this.deltaBacktrackInDays = jsonConfig.getInteger(Const.Config.OptOutDeltaBacktrackInDaysProp);
assert this.deltaBacktrackInDays > 0;
this.replicaId = OptOutUtils.getReplicaId(jsonConfig);
this.maxReplicas = jsonConfig.getInteger(Const.Config.OptOutProducerMaxReplicasProp, 0);
assert this.maxReplicas > 0;
boolean isSyntheticLogsEnabled;
try {
isSyntheticLogsEnabled = jsonConfig.getBoolean(Const.Config.OptOutSyntheticLogsEnabledProp);
} catch (Exception ex){
isSyntheticLogsEnabled = false;
}
this.syntheticLogsEnabled = isSyntheticLogsEnabled;
if (isSyntheticLogsEnabled) {
this.syntheticLogsCount = jsonConfig.getInteger(Const.Config.OptOutSyntheticLogsCountProp);
this.cloudSyntheticFolder = this.cloudFolder + "synthetic/";
} else {
this.syntheticLogsCount = 0;
this.cloudSyntheticFolder = null;
}
this.mkdirsBlocking();
}
@Override
public String toCloudPath(String path) {
if (OptOutUtils.isDeltaFile(path)) {
return newCloudPathForDelta(path);
} else if (OptOutUtils.isPartitionFile(path)) {
return newCloudPathForPartition(path);
} else {
return null;
}
}
@Override
public String toLocalPath(String path) {
if (path.startsWith("http")) {
try {
URL url = new URL(path);
// use the URL path to convert to local file
path = url.getPath();
} catch (MalformedURLException e) {
LOGGER.error("Unable to parse preSignedUrl (" + path + "): " + e.getMessage(), e);
}
}
if (OptOutUtils.isDeltaFile(path)) {
return newLocalPathForDelta(path);
} else if (OptOutUtils.isPartitionFile(path)) {
return newLocalForPartition(path);
} else if (syntheticLogsEnabled && OptOutUtils.isSyntheticFile(path)) {
// synthetic file is a partition file (pre-sorted), we can handle it the same as partition file
return newLocalForPartition(path);
} else {
return null;
}
}
@Override
public boolean refresh(Instant now, ICloudStorage fsCloud, ICloudStorage fsLocal, Consumer> handleDownloads, Consumer> handleDeletes) throws CloudStorageException {
// list local cached paths
List cachedPathList = new ArrayList<>();
localListFiles(fsLocal, this.deltaConsumerDir, OptOutUtils.prefixDeltaFile, cachedPathList);
localListFiles(fsLocal, this.partitionConsumerDir, OptOutUtils.prefixPartitionFile, cachedPathList);
// list cloud paths
List cloudPathList = this.cloudListRelevantFiles(fsCloud);
// saving a copy of refreshed files for both remote and local
Set cloudPaths = new HashSet<>(cloudPathList);
Set cachedPaths = new HashSet<>(cachedPathList);
// saving a map that can map localPath back to cloudPath
// this is needed because with preSignedURL cloud -> local conversion is lossy
Map localToCloud = cloudPaths.stream()
.collect(Collectors.toMap(this::toLocalPath, Function.identity()));
// convert cloudPath to localPath, and remove cached local files
Set missing = cloudPaths.stream()
.map(this::toLocalPath)
.collect(Collectors.toSet());
missing.removeAll(cachedPaths);
// use local to cloud map to retrieve list of cloud files to download
missing = missing.stream()
.map(p -> localToCloud.get(p))
.collect(Collectors.toSet());
// invoke callback to handle downloads
handleDownloads.accept(missing);
Set deletes = cachedPaths.stream()
.filter(f -> !localToCloud.keySet().contains(f))
.collect(Collectors.toSet());
// invoke callback to delete files
handleDeletes.accept(deletes);
// provide cloudPaths to registered handlers
for (Consumer> handler : this.handlersNewCloudPaths.get()) {
handler.accept(cloudPaths);
}
// return true when there are no missing files && no deleting files
boolean inSync = missing.size() == 0 && deletes.size() == 0;
if (inSync) {
if (this.enableDeltaMerging()) {
Collection deltasToMerge = this.getDeltasToMerge(now, cloudPaths, cachedPaths);
if (deltasToMerge != null) {
if (deltasToMerge.size() == 0) {
LOGGER.warn("Skip partition produce due to no delta files found between now and last partition");
} else {
LOGGER.debug("sending " + this.eventMergeDelta);
vertx.eventBus().send(this.eventMergeDelta, Utils.toJson(deltasToMerge));
}
}
} else if (this.handlerIndexUpdate != null) {
this.handlerIndexUpdate.accept(cachedPaths);
}
}
return inSync;
}
public Object registerNewCloudPathsHandler(Consumer> handler) {
List>> newHandlerList = new ArrayList<>(this.handlersNewCloudPaths.get());
newHandlerList.add(handler);
this.handlersNewCloudPaths.set(Collections.unmodifiableList(newHandlerList));
return handler;
}
public void unregisterNewCloudPathsHandler(Object handler) {
Consumer> typedHandler = (Consumer>) handler;
List>> newHandlerList = new ArrayList<>(this.handlersNewCloudPaths.get());
newHandlerList.remove(typedHandler);
this.handlersNewCloudPaths.set(Collections.unmodifiableList(newHandlerList));
}
public void registerNewCachedPathsHandler(Consumer> handler) {
if (handlerIndexUpdate != null) throw new UnsupportedOperationException("already set");
this.handlerIndexUpdate = handler;
}
public boolean enableDeltaMerging() {
return this.vertx != null && this.eventMergeDelta != null;
}
public void enableDeltaMerging(Vertx vertx, String event) {
this.eventMergeDelta = event;
this.vertx = vertx;
}
private Collection getDeltasToMerge(Instant now, Set cloudPaths, Set cachedPaths) {
LOGGER.trace("getDeltasToMerge: evaluating...");
// get last partition timestamp
Instant tsLast = OptOutUtils.lastPartitionTimestamp(cachedPaths);
// get new partition timestamp
Instant ts = fileUtils.truncateToPartitionCutoffTime(now);
// if partition time is before last partition, no need to produce new partition
if (ts.isBefore(tsLast)) {
LOGGER.trace("getDeltasToMerge: found recent last partition at " + ts + ", skipping");
return null;
}
// if new partition time has not passed yet, no ned to produce new partition
if (ts.isAfter(now)) {
LOGGER.info("getDeltasToMerge: next partition scheduled at " + ts + ", too early for next partition");
return null;
}
// each replica will have its time range to take turns (grace period is set to 3 * delta interval)
int replicaInTurn = (int) (now.getEpochSecond() - ts.getEpochSecond()) / fileUtils.lookbackGracePeriod();
if (replicaInTurn >= this.maxReplicas) replicaInTurn %= this.maxReplicas;
if (replicaInTurn != this.replicaId) {
LOGGER.info("getDeltasToMerge: replica " + replicaInTurn + " needs to produce partition, this is replica " + this.replicaId);
return null;
}
// find delta files that falls in the time window
Instant tsOld = tsLast.equals(Instant.EPOCH) ? tsLast : tsLast.minusSeconds(fileUtils.lookbackGracePeriod());
Instant tsNew = now;
HashSet cached = new HashSet<>(fileUtils.filterFileInRange(cachedPaths, tsOld, tsNew).stream()
.filter(OptOutUtils::isDeltaFile).collect(Collectors.toList()));
HashSet remote = new HashSet<>(fileUtils.filterFileInRange(cloudPaths, tsOld, tsNew).stream()
.map(this::toLocalPath)
.filter(OptOutUtils::isDeltaFile).collect(Collectors.toList()));
// skip if not all delta files within the time window has already been downloaded
if (!cached.equals(remote)) return null;
// return the list of delta files
Collection ret = Collections.unmodifiableSet(cached);
LOGGER.info("getDeltasToMerge found " + ret.size() + " delta files to merge");
return ret;
}
private String newCloudPathForDelta(String fileToUpload) {
Path path = Paths.get(fileToUpload);
String fileName = path.getFileName().toString();
String ts = OptOutUtils.getFileTimestamp(path).toString(); // 2020-12-06T02:49:39.606119Z
String dateStr = ts.substring(0, 10);
return CloudUtils.normalizeFilePath(Paths.get(this.cloudFolder, "delta", dateStr, fileName));
}
private String newCloudPathForPartition(String fileToUpload) {
Path path = Paths.get(fileToUpload);
String fileName = path.getFileName().toString();
return CloudUtils.normalizeFilePath(Paths.get(this.cloudFolder, "partition", fileName));
}
private String newLocalPathForDelta(String fileToDownload) {
Path path = Paths.get(fileToDownload);
String fileName = path.getFileName().toString();
return Paths.get(this.deltaConsumerDir, fileName).toString();
}
private String newLocalForPartition(String fileToDownload) {
Path path = Paths.get(fileToDownload);
String fileName = path.getFileName().toString();
return Paths.get(this.partitionConsumerDir, fileName).toString();
}
private String getCloudDeltaFolder(Instant day) {
day.truncatedTo(ChronoUnit.DAYS);
String dateStr = OptOutUtils.getDateStr(day) + "/";
return this.cloudDeltaRootFolder + dateStr;
}
private void localListFiles(ICloudStorage fsLocal, String dirToScan, String filePrefix,
List cachedPaths) throws CloudStorageException {
List found = fsLocal.list(dirToScan);
for (String f : found) {
Path p = Paths.get(f);
if (!p.getFileName().toString().startsWith(filePrefix)) {
LOGGER.warn("Not under " + filePrefix + ", unknown file " + f);
} else if (OptOutUtils.getFileTimestamp(f) == null) {
LOGGER.warn("Unrecognized timestamp, unknown file " + f);
}
cachedPaths.add(f);
}
}
private List cloudListRelevantFiles(ICloudStorage cloudStorage) throws CloudStorageException {
if (fullSync) {
List fileList = cloudStorage.list(this.cloudPartitionFolder);
fileList.addAll(cloudStorage.list(this.cloudDeltaRootFolder));
if (syntheticLogsEnabled) fileList.addAll(listSyntheticLogs(cloudStorage));
return fileList;
}
// list all partitions
List cloudFiles = cloudStorage.list(this.cloudPartitionFolder);
Instant tsLast = OptOutUtils.lastPartitionTimestamp(cloudFiles);
if (tsLast == Instant.EPOCH) {
// if there are no partition yet, list all delta files under delta root
cloudFiles.addAll(cloudStorage.list(this.cloudDeltaRootFolder));
} else {
Instant t = tsLast.minus(this.deltaBacktrackInDays, ChronoUnit.DAYS);
Instant now = Instant.now();
// list all deltas generated after before (the date last partition file is created - N days)
while (t.isBefore(now)) {
String deltaPrefixToList = this.getCloudDeltaFolder(t);
cloudFiles.addAll(cloudStorage.list(deltaPrefixToList));
t = t.plus(1, ChronoUnit.DAYS);
}
}
if (this.syntheticLogsEnabled) {
cloudFiles.addAll(this.listSyntheticLogs(cloudStorage));
}
return cloudFiles;
}
private List listSyntheticLogs(ICloudStorage cloudStorage) throws CloudStorageException {
return cloudStorage.list(this.cloudSyntheticFolder).stream()
.sorted()
.limit(this.syntheticLogsCount)
.collect(Collectors.toList());
}
private void mkdirsBlocking() {
Utils.ensureDirectoryExists(this.deltaConsumerDir);
Utils.ensureDirectoryExists(this.partitionConsumerDir);
}
}