All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.cairo.TableBlockWriter Maven / Gradle / Ivy

The newest version!
/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2020 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.cairo;

import static io.questdb.cairo.TableUtils.iFile;

import java.io.Closeable;
import java.util.Arrays;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.locks.LockSupport;

import io.questdb.MessageBus;
import io.questdb.cairo.sql.RecordMetadata;
import io.questdb.cairo.vm.VmUtils;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.mp.AbstractQueueConsumerJob;
import io.questdb.mp.RingQueue;
import io.questdb.mp.Sequence;
import io.questdb.std.Files;
import io.questdb.std.FilesFacade;
import io.questdb.std.LongList;
import io.questdb.std.LongObjHashMap;
import io.questdb.std.Misc;
import io.questdb.std.ObjList;
import io.questdb.std.Unsafe;
import io.questdb.std.Vect;
import io.questdb.std.datetime.microtime.Timestamps;
import io.questdb.std.str.Path;

public class TableBlockWriter implements Closeable {
    private static final Log LOG = LogFactory.getLog(TableBlockWriter.class);
    private static final Timestamps.TimestampFloorMethod NO_PARTITIONING_FLOOR = (ts) -> 0;
    private final CharSequence root;
    private final FilesFacade ff;
    private final int mkDirMode;
    private final RingQueue queue;
    private final Sequence pubSeq;
    private final LongList columnRowsAdded = new LongList();
    private final LongObjHashMap partitionBlockWriterByTimestamp = new LongObjHashMap<>();
    private final ObjList partitionBlockWriters = new ObjList<>();
    private final ObjList concurrentTasks = new ObjList<>();
    private final AtomicInteger nCompletedConcurrentTasks = new AtomicInteger();
    private TableWriter writer;
    private RecordMetadata metadata;
    private int columnCount;
    private int partitionBy;
    private Timestamps.TimestampFloorMethod timestampFloorMethod;
    private int timestampColumnIndex;
    private long firstTimestamp;
    private long lastTimestamp;
    private int nextPartitionBlockWriterIndex;
    private int nEnqueuedConcurrentTasks;
    private PartitionBlockWriter partWriter;

    TableBlockWriter(CairoConfiguration configuration, MessageBus messageBus) {
        root = configuration.getRoot();
        this.ff = configuration.getFilesFacade();
        this.mkDirMode = configuration.getMkDirMode();
        queue = messageBus.getTableBlockWriterQueue();
        pubSeq = messageBus.getTableBlockWriterPubSeq();
    }

    public void appendPageFrameColumn(int columnIndex, long pageFrameSize, long sourceAddress) {
        LOG.info().$("appending data").$(" [tableName=").$(writer.getTableName()).$(", columnIndex=").$(columnIndex).$(", pageFrameSize=").$(pageFrameSize).$(']').$();
        if (columnIndex == timestampColumnIndex) {
            long firstBlockTimestamp = Unsafe.getUnsafe().getLong(sourceAddress);
            if (firstBlockTimestamp < firstTimestamp) {
                firstTimestamp = firstBlockTimestamp;
            }
            long addr = sourceAddress + pageFrameSize - Long.BYTES;
            long lastBlockTimestamp = Unsafe.getUnsafe().getLong(addr);
            if (lastBlockTimestamp > lastTimestamp) {
                lastTimestamp = lastBlockTimestamp;
            }
        }
        partWriter.appendPageFrameColumn(columnIndex, pageFrameSize, sourceAddress);
    }

    public void cancel() {
        completePendingConcurrentTasks(true);
        writer.cancelRow();
        for (int n = 0; n < nextPartitionBlockWriterIndex; n++) {
            partitionBlockWriters.getQuick(n).cancel();
        }
        writer.purgeUnusedPartitions();
        LOG.info().$("cancelled new block [table=").$(writer.getTableName()).$(']').$();
        clear();
    }

    @Override
    public void close() {
        clear();
        Misc.freeObjList(partitionBlockWriters);
        partitionBlockWriters.clear();
    }

    public void commit() {
        LOG.info().$("committing block write").$(" [tableName=").$(writer.getTableName()).$(", firstTimestamp=").$ts(firstTimestamp).$(", lastTimestamp=").$ts(lastTimestamp).$(']').$();
        // Need to complete all data tasks before we can start index tasks
        completePendingConcurrentTasks(false);
        for (int n = 0; n < nextPartitionBlockWriterIndex; n++) {
            partitionBlockWriters.getQuick(n).startCommitAppendedBlock();
        }
        completePendingConcurrentTasks(false);
        for (int n = 0; n < nextPartitionBlockWriterIndex; n++) {
            partitionBlockWriters.getQuick(n).completeCommitAppendedBlock();
        }
        writer.commitBlock(firstTimestamp);
        LOG.info().$("committed new block [table=").$(writer.getTableName()).$(']').$();
        clear();
    }

    public void startPageFrame(long timestampLo) {
        partWriter = getPartitionBlockWriter(timestampLo);
        partWriter.startPageFrame(timestampLo);
    }

    private static long mapFile(FilesFacade ff, long fd, final long mapOffset, final long mapSz) {
        long alignedMapOffset = (mapOffset / ff.getPageSize()) * ff.getPageSize();
        long addressOffsetDueToAlignment = mapOffset - alignedMapOffset;
        long alignedMapSz = mapSz + addressOffsetDueToAlignment;
        long fileSz = ff.length(fd);
        long minFileSz = mapOffset + alignedMapSz;
        if (fileSz < minFileSz) {
            if (!ff.allocate(fd, minFileSz)) {
                throw CairoException.instance(ff.errno()).put("Could not allocate file for append fd=").put(fd).put(", offset=").put(mapOffset).put(", size=")
                        .put(mapSz);
            }
        }
        long address = ff.mmap(fd, alignedMapSz, alignedMapOffset, Files.MAP_RW);
        if (address == -1) {
            int errno = ff.errno();
            throw CairoException.instance(ff.errno()).put("Could not mmap append fd=").put(fd).put(", offset=").put(mapOffset).put(", size=").put(mapSz).put(", errno=")
                    .put(errno);
        }
        assert (address / ff.getPageSize()) * ff.getPageSize() == address; // address MUST be page aligned
        return address + addressOffsetDueToAlignment;
    }

    private static void unmapFile(FilesFacade ff, final long address, final long mapSz) {
        long alignedAddress = (address / ff.getPageSize()) * ff.getPageSize();
        long alignedMapSz = mapSz + address - alignedAddress;
        ff.munmap(alignedAddress, alignedMapSz);
    }

    void clear() {
        if (nCompletedConcurrentTasks.get() < nEnqueuedConcurrentTasks) {
            LOG.error().$("new block should have been either committed or cancelled [table=").$(writer.getTableName()).$(']').$();
            completePendingConcurrentTasks(true);
        }
        metadata = null;
        writer = null;
        partWriter = null;
        for (int i = 0; i < nextPartitionBlockWriterIndex; i++) {
            partitionBlockWriters.getQuick(i).clear();
        }
        nextPartitionBlockWriterIndex = 0;
        partitionBlockWriterByTimestamp.clear();
    }

    private void completePendingConcurrentTasks(boolean cancel) {
        if (nCompletedConcurrentTasks.get() < nEnqueuedConcurrentTasks) {
            for (int n = 0; n < nEnqueuedConcurrentTasks; n++) {
                TableBlockWriterTask task = concurrentTasks.getQuick(n);
                if (cancel) {
                    task.cancel();
                } else {
                    task.run();
                }
            }
        }

        while (nCompletedConcurrentTasks.get() < nEnqueuedConcurrentTasks) {
            LockSupport.parkNanos(0);
        }
        nEnqueuedConcurrentTasks = 0;
        nCompletedConcurrentTasks.set(0);
    }

    private void enqueueConcurrentTask(TableBlockWriterTask task) {
        assert concurrentTasks.getQuick(nEnqueuedConcurrentTasks) == task;
        assert !task.ready.get();
        task.ready.set(true);
        nEnqueuedConcurrentTasks++;

        do {
            long seq = pubSeq.next();
            if (seq >= 0) {
                try {
                    queue.get(seq).task = task;
                } finally {
                    pubSeq.done(seq);
                }
                return;
            }
            if (seq == -1) {
                task.run();
                return;
            }
        } while (true);
    }

    private TableBlockWriterTask getConcurrentTask() {
        if (concurrentTasks.size() <= nEnqueuedConcurrentTasks) {
            concurrentTasks.extendAndSet(nEnqueuedConcurrentTasks, new TableBlockWriterTask());
        }
        return concurrentTasks.getQuick(nEnqueuedConcurrentTasks);
    }

    private PartitionBlockWriter getPartitionBlockWriter(long timestamp) {
        long timestampLo = timestampFloorMethod.floor(timestamp);
        PartitionBlockWriter partWriter = partitionBlockWriterByTimestamp.get(timestampLo);
        if (null == partWriter) {
            assert nextPartitionBlockWriterIndex <= partitionBlockWriters.size();
            if (nextPartitionBlockWriterIndex == partitionBlockWriters.size()) {
                partWriter = new PartitionBlockWriter();
                partitionBlockWriters.extendAndSet(nextPartitionBlockWriterIndex, partWriter);
            } else {
                partWriter = partitionBlockWriters.getQuick(nextPartitionBlockWriterIndex);
            }
            nextPartitionBlockWriterIndex++;
            partitionBlockWriterByTimestamp.put(timestampLo, partWriter);
            partWriter.of(timestampLo);
        }

        return partWriter;
    }

    void open(TableWriter writer) {
        this.writer = writer;
        metadata = writer.getMetadata();
        columnCount = metadata.getColumnCount();
        partitionBy = writer.getPartitionBy();
        columnRowsAdded.ensureCapacity(columnCount);
        timestampColumnIndex = metadata.getTimestampIndex();
        firstTimestamp = timestampColumnIndex >= 0 ? Long.MAX_VALUE : Long.MIN_VALUE;
        lastTimestamp = timestampColumnIndex >= 0 ? Long.MIN_VALUE : 0;
        nEnqueuedConcurrentTasks = 0;
        nCompletedConcurrentTasks.set(0);
        switch (partitionBy) {
            case PartitionBy.DAY:
                timestampFloorMethod = Timestamps.FLOOR_DD;
                break;
            case PartitionBy.MONTH:
                timestampFloorMethod = Timestamps.FLOOR_MM;
                break;
            case PartitionBy.YEAR:
                timestampFloorMethod = Timestamps.FLOOR_YYYY;
                break;
            default:
                timestampFloorMethod = NO_PARTITIONING_FLOOR;
                break;
        }
        LOG.info().$("started new block [table=").$(writer.getTableName()).$(']').$();
    }

    private enum TaskType {
        AppendBlock, GenerateStringIndex, GenerateBinaryIndex
    }

    private static class PartitionStruct {
        private static final int MAPPING_STRUCT_ENTRY_P2 = 3;
        private static final int INITIAL_ADDITIONAL_MAPPINGS = 4;
        private long[] mappingData = null;
        private int columnCount;
        private int nAdditionalMappings;

        private void addAdditionalMapping(long start, long size) {
            int i = getMappingDataIndex(columnCount, nAdditionalMappings << 1);
            nAdditionalMappings++;
            int minSz = i + nAdditionalMappings << 1;
            if (mappingData.length < minSz) {
                long[] newMappingData = new long[minSz + (INITIAL_ADDITIONAL_MAPPINGS << 1)];
                System.arraycopy(mappingData, 0, newMappingData, 0, mappingData.length);
                mappingData = newMappingData;
            }
            mappingData[i++] = start;
            mappingData[i] = size;
        }

        private void clear() {
            Arrays.fill(mappingData, 0);
        }

        private long getAdditionalMappingSize(int nMapping) {
            int i = getMappingDataIndex(columnCount, (nMapping << 1) + 1);
            return mappingData[i];
        }

        private long getAdditionalMappingStart(int nMapping) {
            int i = getMappingDataIndex(columnCount, nMapping << 1);
            return mappingData[i];
        }

        private long getColumnAppendOffset(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 5)];
        }

        private long getColumnDataFd(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 0)];
        }

        private int getColumnFieldSizePow2(int columnIndex) {
            return (int) mappingData[getMappingDataIndex(columnIndex, 7)];
        }

        private long getColumnIndexFd(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 1)];
        }

        private long getColumnMappingSize(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 3)];
        }

        private long getColumnMappingStart(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 2)];
        }

        private long getColumnNRowsAdded(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 6)];
        }

        private long getColumnStartOffset(int columnIndex) {
            return mappingData[getMappingDataIndex(columnIndex, 4)];
        }

        private int getMappingDataIndex(int columnIndex, int fieldIndex) {
            return (columnIndex << MAPPING_STRUCT_ENTRY_P2) + fieldIndex;
        }

        private int getnAdditionalMappings() {
            return nAdditionalMappings;
        }

        private void of(int columnCount) {
            this.columnCount = columnCount;
            nAdditionalMappings = 0;
            int MAPPING_STRUCT_ENTRY_SIZE = 1 << MAPPING_STRUCT_ENTRY_P2;
            int sz = columnCount * MAPPING_STRUCT_ENTRY_SIZE;
            if (mappingData == null || mappingData.length < sz) {
                sz += INITIAL_ADDITIONAL_MAPPINGS << 1;
                mappingData = new long[sz];
            }
        }

        private void setColumnAppendOffset(int columnIndex, long offset) {
            mappingData[getMappingDataIndex(columnIndex, 5)] = offset;
        }

        private void setColumnDataFd(int columnIndex, long fd) {
            mappingData[getMappingDataIndex(columnIndex, 0)] = fd;
        }

        private void setColumnFieldSizePow2(int columnIndex, int fieldSizePow2) {
            mappingData[getMappingDataIndex(columnIndex, 7)] = fieldSizePow2;
        }

        private void setColumnIndexFd(int columnIndex, long fd) {
            mappingData[getMappingDataIndex(columnIndex, 1)] = fd;
        }

        private void setColumnMappingSize(int columnIndex, long size) {
            mappingData[getMappingDataIndex(columnIndex, 3)] = size;
        }

        private void setColumnMappingStart(int columnIndex, long address) {
            mappingData[getMappingDataIndex(columnIndex, 2)] = address;
        }

        private void setColumnNRowsAdded(int columnIndex, long nRowsAdded) {
            mappingData[getMappingDataIndex(columnIndex, 6)] = nRowsAdded;
        }

        private void setColumnStartOffset(int columnIndex, long offset) {
            mappingData[getMappingDataIndex(columnIndex, 4)] = offset;
        }
    }

    public static class TableBlockWriterTaskHolder {
        private TableBlockWriterTask task;
    }

    public static class TableBlockWriterJob extends AbstractQueueConsumerJob {
        public TableBlockWriterJob(MessageBus messageBus) {
            super(messageBus.getTableBlockWriterQueue(), messageBus.getTableBlockWriterSubSeq());
        }

        @Override
        protected boolean doRun(int workerId, long cursor) {
            try {
                final TableBlockWriterTaskHolder holder = queue.get(cursor);
                boolean useful = holder.task.run();
                holder.task = null;
                return useful;
            } finally {
                subSeq.done(cursor);
            }
        }
    }

    private class PartitionBlockWriter implements Closeable {
        private final PartitionStruct partitionStruct = new PartitionStruct();
        private final LongList columnTops = new LongList();
        private final Path path = new Path();
        private long timestampLo;
        private long timestampHi;
        private boolean opened;

        @Override
        public void close() {
            clear();
            path.close();
        }

        private void openPartition() {
            assert !opened;
            partitionStruct.of(columnCount);
            path.of(root).concat(writer.getTableName());
            timestampHi = TableUtils.setPathForPartition(path, partitionBy, timestampLo, true);
            int plen = path.length();
            try {
                if (ff.mkdirs(path.slash$(), mkDirMode) != 0) {
                    throw CairoException.instance(ff.errno()).put("Could not create directory: ").put(path);
                }

                assert columnCount > 0;
                columnTops.setAll(columnCount, -1);
                for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
                    final CharSequence name = metadata.getColumnName(columnIndex);
                    final long appendOffset = writer.getPrimaryAppendOffset(timestampLo, columnIndex);
                    partitionStruct.setColumnStartOffset(columnIndex, appendOffset);
                    partitionStruct.setColumnAppendOffset(columnIndex, appendOffset);

                    partitionStruct.setColumnDataFd(columnIndex, TableUtils.openFileRWOrFail(ff, TableUtils.dFile(path.trimTo(plen), name)));
                    int columnType = metadata.getColumnType(columnIndex);
                    switch (columnType) {
                        case ColumnType.STRING:
                        case ColumnType.BINARY:
                            partitionStruct.setColumnIndexFd(columnIndex, TableUtils.openFileRWOrFail(ff, iFile(path.trimTo(plen), name)));
                            partitionStruct.setColumnFieldSizePow2(columnIndex, -1);
                            break;
                        default:
                            partitionStruct.setColumnIndexFd(columnIndex, -1);
                            partitionStruct.setColumnFieldSizePow2(columnIndex, ColumnType.pow2SizeOf(columnType));
                            break;
                    }
                }

                opened = true;
                LOG.info().$("opened partition to '").$(path).$('\'').$();
            } catch (Throwable ex) {
                closePartition();
                throw ex;
            } finally {
                path.trimTo(plen);
            }
        }

        private void closePartition() {
            try {
                int i;
                for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
                    long fd = partitionStruct.getColumnDataFd(columnIndex);
                    if (fd > 0) {
                        ff.close(fd);
                    }
                    fd = partitionStruct.getColumnIndexFd(columnIndex);
                    if (fd > 0) {
                        ff.close(fd);
                    }
                    long address = partitionStruct.getColumnMappingStart(columnIndex);
                    if (address != 0) {
                        long sz = partitionStruct.getColumnMappingSize(columnIndex);
                        unmapFile(ff, address, sz);
                        partitionStruct.setColumnMappingStart(columnIndex, 0);
                    }
                }
                int nAdditionalMappings = partitionStruct.getnAdditionalMappings();
                for (i = 0; i < nAdditionalMappings; i++) {
                    long address = partitionStruct.getAdditionalMappingStart(i);
                    long sz = partitionStruct.getAdditionalMappingSize(i);
                    unmapFile(ff, address, sz);
                }
            } finally {
                partitionStruct.clear();
                opened = false;
            }
        }

        private void appendPageFrameColumn(int columnIndex, long pageFrameSize, long sourceAddress) {
            if (sourceAddress != 0) {
                long appendOffset = partitionStruct.getColumnAppendOffset(columnIndex);
                long nextAppendOffset = appendOffset + pageFrameSize;
                partitionStruct.setColumnAppendOffset(columnIndex, nextAppendOffset);

                long destAddress;
                long columnStartAddress = partitionStruct.getColumnMappingStart(columnIndex);
                if (columnStartAddress == 0) {
                    assert appendOffset == partitionStruct.getColumnStartOffset(columnIndex);
                    long mapSz = Math.max(pageFrameSize, ff.getMapPageSize());
                    long address = mapFile(ff, partitionStruct.getColumnDataFd(columnIndex), appendOffset, mapSz);
                    partitionStruct.setColumnMappingStart(columnIndex, address);
                    partitionStruct.setColumnMappingSize(columnIndex, mapSz);
                    columnStartAddress = address;
                    destAddress = columnStartAddress;
                } else {
                    long initialOffset = partitionStruct.getColumnStartOffset(columnIndex);
                    assert initialOffset < appendOffset;
                    final long minMapSz = nextAppendOffset - initialOffset;
                    if (minMapSz > partitionStruct.getColumnMappingSize(columnIndex)) {
                        partitionStruct.addAdditionalMapping(
                                partitionStruct.getColumnMappingStart(columnIndex),
                                partitionStruct.getColumnMappingSize(columnIndex)
                        );
                        final long address = mapFile(
                                ff,
                                partitionStruct.getColumnDataFd(columnIndex),
                                partitionStruct.getColumnStartOffset(columnIndex),
                                minMapSz
                        );
                        partitionStruct.setColumnMappingStart(columnIndex, address);
                        partitionStruct.setColumnMappingSize(columnIndex, minMapSz);
                    }
                    destAddress = partitionStruct.getColumnMappingStart(columnIndex) + appendOffset - initialOffset;
                }

                TableBlockWriterTask task = getConcurrentTask();
                task.assignAppendPageFrameColumn(destAddress, pageFrameSize, sourceAddress);
                enqueueConcurrentTask(task);
            } else {
                partWriter.setColumnTop(columnIndex, pageFrameSize);
            }
        }

        private void cancel() {
            clear();
        }

        private void clear() {
            if (opened) {
                closePartition();
            }
            columnTops.clear();
        }

        private void completeCommitAppendedBlock() {
            long nRowsAdded = 0;
            for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
                long nColRowsAdded = partitionStruct.getColumnNRowsAdded(columnIndex);
                assert nColRowsAdded >= 0;
                if (nColRowsAdded > nRowsAdded) {
                    nRowsAdded = nColRowsAdded;
                }
            }
            long blockLastTimestamp = Math.min(timestampHi, lastTimestamp);
            LOG.info().$("committing ").$(nRowsAdded).$(" rows to partition at ").$(path).$(" [firstTimestamp=").$ts(timestampLo).$(", lastTimestamp=").$ts(timestampHi).$(']').$();
            writer.startAppendedBlock(timestampLo, blockLastTimestamp, nRowsAdded, columnTops);
        }

        private void completeUpdateSymbolCache(int columnIndex, long colNRowsAdded) {
            final long address = partitionStruct.getColumnMappingStart(columnIndex);
            assert address > 0;
            final int nSymbols = Vect.maxInt(address, colNRowsAdded) + 1;
            SymbolMapWriter symWriter = writer.getSymbolMapWriter(columnIndex);
            if (nSymbols > symWriter.getSymbolCount()) {
                symWriter.commitAppendedBlock(nSymbols - symWriter.getSymbolCount());
            }
        }

        private void of(long timestampLo) {
            this.timestampLo = timestampLo;
            openPartition();
            columnTops.ensureCapacity(columnCount);
        }

        private void setColumnTop(int columnIndex, long columnTop) {
            columnTops.set(columnIndex, columnTop);
        }

        private void startCommitAppendedBlock() {
            for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
                int columnType = metadata.getColumnType(columnIndex);
                long offsetLo = partitionStruct.getColumnStartOffset(columnIndex);
                long offsetHi = partitionStruct.getColumnAppendOffset(columnIndex);

                // Add binary and string indexes
                switch (columnType) {
                    case ColumnType.STRING:
                    case ColumnType.BINARY: {
                        TableBlockWriterTask task = getConcurrentTask();
                        if (offsetHi != offsetLo) {
                            long columnDataAddressLo = partitionStruct.getColumnMappingStart(columnIndex);
                            assert offsetHi - offsetLo <= partitionStruct.getColumnMappingSize(columnIndex);
                            long columnDataAddressHi = columnDataAddressLo + offsetHi - offsetLo;

                            long indexFd = partitionStruct.getColumnIndexFd(columnIndex);
                            long indexOffsetLo = writer.getSecondaryAppendOffset(timestampLo, columnIndex);

                            if (columnType == ColumnType.STRING) {
                                task.assignUpdateStringIndex(columnDataAddressLo, columnDataAddressHi, offsetLo, indexFd, indexOffsetLo, columnIndex, partitionStruct);
                            } else {
                                task.assignUpdateBinaryIndex(columnDataAddressLo, columnDataAddressHi, offsetLo, indexFd, indexOffsetLo, columnIndex, partitionStruct);
                            }
                            partitionStruct.setColumnNRowsAdded(columnIndex, -1);
                            enqueueConcurrentTask(task);
                        } else {
                            partitionStruct.setColumnNRowsAdded(columnIndex, 0);
                        }
                        break;
                    }

                    case ColumnType.SYMBOL: {
                        long colNRowsAdded = (offsetHi - offsetLo) >> partitionStruct.getColumnFieldSizePow2(columnIndex);
                        partitionStruct.setColumnNRowsAdded(columnIndex, colNRowsAdded);
                        completeUpdateSymbolCache(columnIndex, colNRowsAdded);
                        break;
                    }

                    default: {
                        long colNRowsAdded = (offsetHi - offsetLo) >> partitionStruct.getColumnFieldSizePow2(columnIndex);
                        partitionStruct.setColumnNRowsAdded(columnIndex, colNRowsAdded);
                        break;
                    }
                }
            }
        }

        private void startPageFrame(long timestamp) {
            assert opened;
            assert timestamp == Long.MIN_VALUE || timestamp >= timestampLo;
            assert timestamp <= timestampHi;
            timestampLo = timestamp;
        }
    }

    private class TableBlockWriterTask {
        private final AtomicBoolean ready = new AtomicBoolean(false);
        private TaskType taskType;
        private long sourceAddress;
        private long sourceSizeOrEnd;
        private long destAddress;
        private long sourceInitialOffset;
        private long indexFd;
        private long indexOffsetLo;
        private int columnIndex;
        private PartitionStruct partitionStruct;

        private void assignAppendPageFrameColumn(long destAddress, long pageFrameLength, long sourceAddress) {
            taskType = TaskType.AppendBlock;
            this.destAddress = destAddress;
            this.sourceSizeOrEnd = pageFrameLength;
            this.sourceAddress = sourceAddress;
        }

        private void assignUpdateBinaryIndex(
                long columnDataAddressLo,
                long columnDataAddressHi,
                long columnDataOffsetLo,
                long indexFd,
                long indexOffsetLo,
                int columnIndex,
                PartitionStruct partitionStruct
        ) {
            taskType = TaskType.GenerateBinaryIndex;
            this.sourceAddress = columnDataAddressLo;
            this.sourceSizeOrEnd = columnDataAddressHi;
            this.sourceInitialOffset = columnDataOffsetLo;
            this.indexFd = indexFd;
            this.indexOffsetLo = indexOffsetLo;
            this.columnIndex = columnIndex;
            this.partitionStruct = partitionStruct;
        }

        private void assignUpdateStringIndex(
                long columnDataAddressLo,
                long columnDataAddressHi,
                long columnDataOffsetLo,
                long indexFd,
                long indexOffsetLo,
                int columnIndex,
                PartitionStruct partitionStruct
        ) {
            taskType = TaskType.GenerateStringIndex;
            this.sourceAddress = columnDataAddressLo;
            this.sourceSizeOrEnd = columnDataAddressHi;
            this.sourceInitialOffset = columnDataOffsetLo;
            this.indexFd = indexFd;
            this.indexOffsetLo = indexOffsetLo;
            this.columnIndex = columnIndex;
            this.partitionStruct = partitionStruct;
        }

        private void cancel() {
            if (ready.compareAndSet(true, false)) {
                nCompletedConcurrentTasks.incrementAndGet();
            }
        }

        private void completeUpdateBinaryIndex(
                long columnDataAddressLo,
                long columnDataAddressHi,
                long columnDataOffsetLo,
                long indexFd,
                long indexOffsetLo,
                int columnIndex,
                PartitionStruct partitionStruct
        ) {
            long indexMappingSz = (columnDataAddressHi - columnDataAddressLo);
            long indexMappingStart = mapFile(ff, indexFd, indexOffsetLo, indexMappingSz);

            long offset = columnDataOffsetLo;
            long columnDataAddress = columnDataAddressLo;
            long columnIndexAddress = indexMappingStart;
            long nRowsAdded = 0;
            while (columnDataAddress < columnDataAddressHi) {
                assert columnIndexAddress + Long.BYTES <= (indexMappingStart + indexMappingSz);
                nRowsAdded++;
                Unsafe.getUnsafe().putLong(columnIndexAddress, offset);
                columnIndexAddress += Long.BYTES;
                // TODO: remove branching similar to how this is done for strings
                long binLen = Unsafe.getUnsafe().getLong(columnDataAddress);
                long sz;
                if (binLen == TableUtils.NULL_LEN) {
                    sz = Long.BYTES;
                } else {
                    sz = Long.BYTES + binLen;
                }
                columnDataAddress += sz;
                offset += sz;
            }

            partitionStruct.setColumnNRowsAdded(columnIndex, nRowsAdded);
            unmapFile(ff, indexMappingStart, indexMappingSz);
        }

        private void completeUpdateStringIndex(
                long columnDataAddressLo,
                long columnDataAddressHi,
                long columnDataOffsetLo,
                long indexFd,
                long indexOffsetLo,
                int columnIndex,
                PartitionStruct partitionStruct
        ) {
            final long indexMappingSz = (columnDataAddressHi - columnDataAddressLo) * 2;
            final long indexMappingStart = mapFile(ff, indexFd, indexOffsetLo, indexMappingSz);
            long offset = columnDataOffsetLo;
            long columnDataAddress = columnDataAddressLo;
            long columnIndexAddress = indexMappingStart;
            long nRowsAdded = 0;
            while (columnDataAddress < columnDataAddressHi) {
                assert columnIndexAddress + Long.BYTES <= (indexMappingStart + indexMappingSz);
                nRowsAdded++;
                Unsafe.getUnsafe().putLong(columnIndexAddress, offset);
                columnIndexAddress += Long.BYTES;
                final int strLen = Unsafe.getUnsafe().getInt(columnDataAddress);
                // +1 the length will turn NULL_LEN into 0
                final long bit = ((strLen >>> 30) & 0x02) ^ 0x02; // our sign bit is now bit #1
                // so null will evaluate to just VirtualMemory.STRING_LENGTH_BYTES
                // but for positive length values we need to subtract 2
                // how do we do that? Lets use inverted sign bit
                final long sz = (VmUtils.STRING_LENGTH_BYTES + 2L * (strLen + 1) - bit);
                columnDataAddress += sz;
                offset += sz;
            }

            partitionStruct.setColumnNRowsAdded(columnIndex, nRowsAdded);
            unmapFile(ff, indexMappingStart, indexMappingSz);
        }

        private boolean run() {
            if (ready.compareAndSet(true, false)) {
                try {
                    switch (taskType) {
                        case AppendBlock:
                            Vect.memcpy(sourceAddress, destAddress, sourceSizeOrEnd);
                            return true;

                        case GenerateStringIndex:
                            completeUpdateStringIndex(sourceAddress, sourceSizeOrEnd, sourceInitialOffset, indexFd, indexOffsetLo, columnIndex, partitionStruct);
                            return true;

                        case GenerateBinaryIndex:
                            completeUpdateBinaryIndex(sourceAddress, sourceSizeOrEnd, sourceInitialOffset, indexFd, indexOffsetLo, columnIndex, partitionStruct);
                            return true;
                    }
                } finally {
                    nCompletedConcurrentTasks.incrementAndGet();
                }
            }

            return false;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy