All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.vertexium.accumulo.StreamingPropertyValueTableData Maven / Gradle / Ivy

There is a newer version: 4.10.0
Show newest version
package org.vertexium.accumulo;

import com.google.common.collect.Lists;
import com.google.common.primitives.Longs;
import org.apache.accumulo.core.client.IteratorSetting;
import org.apache.accumulo.core.client.ScannerBase;
import org.apache.accumulo.core.client.TableNotFoundException;
import org.apache.accumulo.core.data.Key;
import org.apache.accumulo.core.data.Range;
import org.apache.accumulo.core.data.Value;
import org.apache.accumulo.core.iterators.user.TimestampFilter;
import org.apache.accumulo.core.trace.Span;
import org.apache.accumulo.core.trace.Trace;
import org.apache.hadoop.io.Text;
import org.vertexium.VertexiumException;
import org.vertexium.accumulo.util.RangeUtils;
import org.vertexium.property.StreamingPropertyValue;
import org.vertexium.util.ByteRingBuffer;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.Map;

public class StreamingPropertyValueTableData extends StreamingPropertyValue {
    private static final long serialVersionUID = 1897402273830254711L;
    public static final Text METADATA_COLUMN_FAMILY = new Text("a"); // this should sort before the data
    public static final Text DATA_COLUMN_FAMILY = new Text("d");
    public static final Text METADATA_LENGTH_COLUMN_QUALIFIER = new Text("length");
    private final AccumuloGraph graph;
    private final String dataRowKey;
    private Long length;
    private final long timestamp;

    public StreamingPropertyValueTableData(
        AccumuloGraph graph,
        String dataRowKey,
        Class valueType,
        Long length,
        long timestamp
    ) {
        super(valueType);
        this.graph = graph;
        this.dataRowKey = dataRowKey;
        this.length = length;
        this.timestamp = timestamp;
    }

    @Override
    public Long getLength() {
        return length;
    }

    @Override
    public InputStream getInputStream() {
        return new DataTableInputStream();
    }

    private class DataTableInputStream extends InputStream {
        private final ByteRingBuffer buffer = new ByteRingBuffer(1024 * 1024);
        private long timerStartTime;
        private Span trace;
        private ScannerBase scanner;
        private Iterator> scannerIterator;
        private long previousLoadedDataLength;
        private long loadedDataLength;
        private boolean closed;

        private long markRowIndex = 0;
        private long markByteOffsetInRow = 0;
        private long markLoadedDataLength = 0;
        private long currentDataRowIndex = -1;
        private long currentByteOffsetInRow;

        @Override
        public int read(byte[] dest, int off, int len) throws IOException {
            if (len == 0) {
                return 0;
            }
            len = Math.min(len, buffer.getSize());
            while (buffer.getUsed() == 0 && loadMoreData()) {

            }
            if (buffer.getUsed() == 0) {
                return -1;
            }

            int bytesRead = buffer.read(dest, off, len);
            currentByteOffsetInRow += bytesRead;
            return bytesRead;
        }

        @Override
        public int read() throws IOException {
            if (buffer.getUsed() < 1) {
                loadMoreData();
                if (buffer.getUsed() == 0) {
                    return -1;
                }
            }
            currentByteOffsetInRow++;
            return buffer.read();
        }

        @Override
        public void close() throws IOException {
            if (closed) {
                return;
            }
            scannerIterator = null;
            if (scanner != null) {
                scanner.close();
                scanner = null;
            }
            if (trace != null) {
                trace.stop();
                trace = null;
            }

            graph.getGraphLogger().logEndIterator(System.currentTimeMillis() - timerStartTime);
            super.close();
            closed = true;
        }

        private boolean loadMoreData() throws IOException {
            if (closed) {
                return false;
            }
            Iterator> it = getScannerIterator();
            while (true) {
                if (!it.hasNext()) {
                    close();
                    return false;
                }
                Map.Entry column = it.next();
                if (column.getKey().getColumnFamily().equals(METADATA_COLUMN_FAMILY)) {
                    if (column.getKey().getColumnQualifier().equals(METADATA_LENGTH_COLUMN_QUALIFIER)) {
                        length = Longs.fromByteArray(column.getValue().get());
                        continue;
                    }

                    throw new VertexiumException("unexpected metadata column qualifier: " + column.getKey().getColumnQualifier() + " (row: " + column.getKey().getRow() + ")");
                }

                if (column.getKey().getColumnFamily().equals(DATA_COLUMN_FAMILY)) {
                    currentDataRowIndex++;
                    currentByteOffsetInRow = 0;

                    byte[] data = column.getValue().get();
                    if (length == null) {
                        throw new VertexiumException("unexpected missing length (row: " + column.getKey().getRow() + ")");
                    }
                    long len = Math.min(data.length, length - loadedDataLength);
                    buffer.write(data, 0, (int) len);
                    previousLoadedDataLength = loadedDataLength;
                    loadedDataLength += len;
                    return true;
                }

                throw new VertexiumException("unexpected column family: " + column.getKey().getColumnFamily() + " (row: " + column.getKey().getRow() + ")");
            }
        }

        private Iterator> getScannerIterator() throws IOException {
            if (closed) {
                throw new IOException("stream already closed");
            }
            if (scannerIterator != null) {
                return scannerIterator;
            }
            scannerIterator = getScanner().iterator();
            return scannerIterator;
        }

        private ScannerBase getScanner() throws IOException {
            if (closed) {
                throw new IOException("stream already closed");
            }
            if (scanner != null) {
                return scanner;
            }
            ArrayList ranges = Lists.newArrayList(RangeUtils.createRangeFromString(dataRowKey));

            timerStartTime = System.currentTimeMillis();
            try {
                scanner = graph.createBatchScanner(graph.getDataTableName(), ranges, new org.apache.accumulo.core.security.Authorizations());
            } catch (TableNotFoundException ex) {
                throw new VertexiumException("Could not create scanner", ex);
            }

            IteratorSetting iteratorSetting = new IteratorSetting(
                80,
                TimestampFilter.class.getSimpleName(),
                TimestampFilter.class
            );
            TimestampFilter.setStart(iteratorSetting, timestamp, true);
            TimestampFilter.setEnd(iteratorSetting, timestamp, true);
            scanner.addScanIterator(iteratorSetting);

            graph.getGraphLogger().logStartIterator(graph.getDataTableName(), scanner);
            trace = Trace.start("streamingPropertyValueTableData");
            trace.data("dataRowKeyCount", Integer.toString(1));
            return scanner;
        }

        @Override
        public synchronized void mark(int readlimit) {
            markRowIndex = Math.max(0, currentDataRowIndex);
            markByteOffsetInRow = currentByteOffsetInRow;
            markLoadedDataLength = previousLoadedDataLength;
        }

        @Override
        public synchronized void reset() throws IOException {
            buffer.clear();
            if (scannerIterator != null) {
                scannerIterator = null;
            }

            closed = false;

            currentDataRowIndex = -1;
            currentByteOffsetInRow = 0;
            loadedDataLength = markLoadedDataLength;

            Iterator> it = getScannerIterator();
            while (true) {
                if (!it.hasNext()) {
                    close();
                    return;
                }
                Map.Entry column = it.next();
                if (column.getKey().getColumnFamily().equals(DATA_COLUMN_FAMILY)) {
                    currentDataRowIndex++;
                    currentByteOffsetInRow = 0;
                    if (currentDataRowIndex == markRowIndex) {
                        byte[] data = column.getValue().get();
                        long len = Math.min(data.length, length - loadedDataLength);
                        buffer.write(data, 0, (int) len);
                        loadedDataLength += len;
                        while (currentByteOffsetInRow != markByteOffsetInRow) {
                            buffer.read();
                            currentByteOffsetInRow++;
                        }
                        return;
                    }
                }
            }
        }

        @Override
        public boolean markSupported() {
            return true;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy