org.vertexium.accumulo.tools.DeleteHistoricalLegacyStreamingPropertyValueData Maven / Gradle / Ivy
package org.vertexium.accumulo.tools;
import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.vertexium.Authorizations;
import org.vertexium.VertexiumException;
import org.vertexium.accumulo.AccumuloGraph;
import org.vertexium.accumulo.keys.DataTableRowKey;
import org.vertexium.util.VertexiumLogger;
import org.vertexium.util.VertexiumLoggerFactory;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;
import static org.vertexium.accumulo.ElementMutationBuilder.EMPTY_TEXT;
/**
* To run in the Vertexium CLI
*
* d = new org.vertexium.accumulo.tools.DeleteHistoricalLegacyStreamingPropertyValueData(g)
* options = new org.vertexium.accumulo.tools.DeleteHistoricalLegacyStreamingPropertyValueData.Options()
* options.setDryRun(false)
* options.setVersionsToKeep(1)
* d.execute(options, auths)
*/
public class DeleteHistoricalLegacyStreamingPropertyValueData {
private static final VertexiumLogger LOGGER = VertexiumLoggerFactory.getLogger(DeleteHistoricalLegacyStreamingPropertyValueData.class);
private final AccumuloGraph graph;
public DeleteHistoricalLegacyStreamingPropertyValueData(AccumuloGraph graph) {
this.graph = graph;
}
public void execute(Options options, Authorizations authorizations) {
try {
org.apache.accumulo.core.security.Authorizations accumuloAuthorizations = graph.toAccumuloAuthorizations(authorizations);
Scanner scanner = graph.getConnector().createScanner(graph.getDataTableName(), accumuloAuthorizations);
BatchWriter writer = graph.getConnector().createBatchWriter(
graph.getDataTableName(),
graph.getConfiguration().createBatchWriterConfig()
);
String lastRowIdPrefix = null;
List rowsToDelete = new ArrayList<>();
try {
int rowCount = 0;
for (Map.Entry row : scanner) {
if (rowCount % 10000 == 0) {
writer.flush();
LOGGER.debug("looking at row: %s (row count: %d)", row.getKey().getRow().toString(), rowCount);
}
rowCount++;
if (!EMPTY_TEXT.equals(row.getKey().getColumnFamily())) {
continue;
}
if (!EMPTY_TEXT.equals(row.getKey().getColumnQualifier())) {
continue;
}
String rowId = row.getKey().getRow().toString();
String[] rowIdParts = rowId.split("" + DataTableRowKey.VALUE_SEPARATOR);
if (rowIdParts.length < 3) {
continue;
}
if (lastRowIdPrefix == null || !isSameProperty(lastRowIdPrefix, rowId)) {
deleteRows(writer, rowsToDelete, options);
rowsToDelete.clear();
lastRowIdPrefix = rowIdParts[0]
+ DataTableRowKey.VALUE_SEPARATOR
+ rowIdParts[1]
+ DataTableRowKey.VALUE_SEPARATOR
+ rowIdParts[2];
}
rowsToDelete.add(row.getKey());
}
deleteRows(writer, rowsToDelete, options);
} finally {
writer.flush();
scanner.close();
}
} catch (Exception ex) {
throw new VertexiumException("Could not delete old SPV data", ex);
}
}
private boolean isSameProperty(String lastRowIdPrefix, String rowId) {
return rowId.startsWith(lastRowIdPrefix + DataTableRowKey.VALUE_SEPARATOR)
|| lastRowIdPrefix.equals(rowId);
}
private void deleteRows(BatchWriter writer, List rowsToDelete, Options options) throws MutationsRejectedException {
rowsToDelete.sort(Comparator.comparingLong(Key::getTimestamp));
int i = 0;
for (Key key : rowsToDelete) {
if (i < rowsToDelete.size() - options.getVersionsToKeep()) {
LOGGER.debug("deleting row: %s", key.getRow().toString());
if (!options.isDryRun()) {
Mutation mutation = new Mutation(key.getRow());
mutation.putDelete(
key.getColumnFamily(),
key.getColumnQualifier(),
key.getColumnVisibilityParsed(),
key.getTimestamp()
);
writer.addMutation(mutation);
}
} else {
if (options.isDryRun()) {
LOGGER.debug("skipping row: %s", key.getRow().toString());
}
}
i++;
}
}
public static class Options {
private int versionsToKeep = 1;
private boolean dryRun = true;
public int getVersionsToKeep() {
return versionsToKeep;
}
public Options setVersionsToKeep(int versionsToKeep) {
this.versionsToKeep = versionsToKeep;
return this;
}
public boolean isDryRun() {
return dryRun;
}
public Options setDryRun(boolean dryRun) {
this.dryRun = dryRun;
return this;
}
}
}