All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vertexium.accumulo.tools.DeleteHistoricalLegacyStreamingPropertyValueData Maven / Gradle / Ivy

There is a newer version: 4.10.0
Show newest version
package org.vertexium.accumulo.tools;

import org.apache.accumulo.core.client.BatchWriter;
import org.apache.accumulo.core.client.MutationsRejectedException;
import org.apache.accumulo.core.client.Scanner;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Mutation;
import org.apache.accumulo.core.data.Value;
import org.vertexium.Authorizations;
import org.vertexium.VertexiumException;
import org.vertexium.accumulo.AccumuloGraph;
import org.vertexium.accumulo.keys.DataTableRowKey;
import org.vertexium.util.VertexiumLogger;
import org.vertexium.util.VertexiumLoggerFactory;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.Map;

import static org.vertexium.accumulo.ElementMutationBuilder.EMPTY_TEXT;

/**
 * To run in the Vertexium CLI
 * 

* d = new org.vertexium.accumulo.tools.DeleteHistoricalLegacyStreamingPropertyValueData(g) * options = new org.vertexium.accumulo.tools.DeleteHistoricalLegacyStreamingPropertyValueData.Options() * options.setDryRun(false) * options.setVersionsToKeep(1) * d.execute(options, auths) */ public class DeleteHistoricalLegacyStreamingPropertyValueData { private static final VertexiumLogger LOGGER = VertexiumLoggerFactory.getLogger(DeleteHistoricalLegacyStreamingPropertyValueData.class); private final AccumuloGraph graph; public DeleteHistoricalLegacyStreamingPropertyValueData(AccumuloGraph graph) { this.graph = graph; } public void execute(Options options, Authorizations authorizations) { try { org.apache.accumulo.core.security.Authorizations accumuloAuthorizations = graph.toAccumuloAuthorizations(authorizations); Scanner scanner = graph.getConnector().createScanner(graph.getDataTableName(), accumuloAuthorizations); BatchWriter writer = graph.getConnector().createBatchWriter( graph.getDataTableName(), graph.getConfiguration().createBatchWriterConfig() ); String lastRowIdPrefix = null; List rowsToDelete = new ArrayList<>(); try { int rowCount = 0; for (Map.Entry row : scanner) { if (rowCount % 10000 == 0) { writer.flush(); LOGGER.debug("looking at row: %s (row count: %d)", row.getKey().getRow().toString(), rowCount); } rowCount++; if (!EMPTY_TEXT.equals(row.getKey().getColumnFamily())) { continue; } if (!EMPTY_TEXT.equals(row.getKey().getColumnQualifier())) { continue; } String rowId = row.getKey().getRow().toString(); String[] rowIdParts = rowId.split("" + DataTableRowKey.VALUE_SEPARATOR); if (rowIdParts.length < 3) { continue; } if (lastRowIdPrefix == null || !isSameProperty(lastRowIdPrefix, rowId)) { deleteRows(writer, rowsToDelete, options); rowsToDelete.clear(); lastRowIdPrefix = rowIdParts[0] + DataTableRowKey.VALUE_SEPARATOR + rowIdParts[1] + DataTableRowKey.VALUE_SEPARATOR + rowIdParts[2]; } rowsToDelete.add(row.getKey()); } deleteRows(writer, rowsToDelete, options); } finally { writer.flush(); scanner.close(); } } catch (Exception ex) { throw new VertexiumException("Could not delete old SPV data", ex); } } private boolean isSameProperty(String lastRowIdPrefix, String rowId) { return rowId.startsWith(lastRowIdPrefix + DataTableRowKey.VALUE_SEPARATOR) || lastRowIdPrefix.equals(rowId); } private void deleteRows(BatchWriter writer, List rowsToDelete, Options options) throws MutationsRejectedException { rowsToDelete.sort(Comparator.comparingLong(Key::getTimestamp)); int i = 0; for (Key key : rowsToDelete) { if (i < rowsToDelete.size() - options.getVersionsToKeep()) { LOGGER.debug("deleting row: %s", key.getRow().toString()); if (!options.isDryRun()) { Mutation mutation = new Mutation(key.getRow()); mutation.putDelete( key.getColumnFamily(), key.getColumnQualifier(), key.getColumnVisibilityParsed(), key.getTimestamp() ); writer.addMutation(mutation); } } else { if (options.isDryRun()) { LOGGER.debug("skipping row: %s", key.getRow().toString()); } } i++; } } public static class Options { private int versionsToKeep = 1; private boolean dryRun = true; public int getVersionsToKeep() { return versionsToKeep; } public Options setVersionsToKeep(int versionsToKeep) { this.versionsToKeep = versionsToKeep; return this; } public boolean isDryRun() { return dryRun; } public Options setDryRun(boolean dryRun) { this.dryRun = dryRun; return this; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy