All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.cairo.wal.seq.TableTransactionLog Maven / Gradle / Ivy

/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2023 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.cairo.wal.seq;

import io.questdb.cairo.CairoException;
import io.questdb.cairo.MemorySerializer;
import io.questdb.cairo.TableToken;
import io.questdb.cairo.TableUtils;
import io.questdb.cairo.vm.MemoryFCRImpl;
import io.questdb.cairo.vm.Vm;
import io.questdb.cairo.vm.api.MemoryCMARW;
import io.questdb.griffin.engine.ops.AlterOperation;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.*;
import io.questdb.std.str.Path;
import io.questdb.std.str.StringSink;
import org.jetbrains.annotations.NotNull;

import java.io.Closeable;
import java.lang.ThreadLocal;
import java.util.concurrent.atomic.AtomicLong;

import static io.questdb.cairo.TableUtils.openSmallFile;
import static io.questdb.cairo.wal.WalUtils.*;

public class TableTransactionLog implements Closeable {
    public final static int HEADER_RESERVED = 8 * Long.BYTES;
    public static final long MAX_TXN_OFFSET = Integer.BYTES;
    public static final long HEADER_SIZE = MAX_TXN_OFFSET + Long.BYTES + HEADER_RESERVED;
    public static final int STRUCTURAL_CHANGE_WAL_ID = -1;
    private static final Log LOG = LogFactory.getLog(TableTransactionLog.class);
    private static final long TX_LOG_STRUCTURE_VERSION_OFFSET = 0L;
    private static final long TX_LOG_WAL_ID_OFFSET = TX_LOG_STRUCTURE_VERSION_OFFSET + Long.BYTES;
    private static final long TX_LOG_SEGMENT_OFFSET = TX_LOG_WAL_ID_OFFSET + Integer.BYTES;
    private static final long TX_LOG_SEGMENT_TXN_OFFSET = TX_LOG_SEGMENT_OFFSET + Integer.BYTES;
    private static final long TX_LOG_COMMIT_TIMESTAMP_OFFSET = TX_LOG_SEGMENT_TXN_OFFSET + Integer.BYTES;
    public static final long RECORD_SIZE = TX_LOG_COMMIT_TIMESTAMP_OFFSET + Long.BYTES;
    private static final ThreadLocal tlAlterOperation = new ThreadLocal<>();
    private static final ThreadLocal tlStructChangeCursor = new ThreadLocal<>();
    private static final ThreadLocal tlTransactionLogCursor = new ThreadLocal<>();
    private final FilesFacade ff;
    private final AtomicLong maxTxn = new AtomicLong();
    private final StringSink rootPath = new StringSink();
    private final MemoryCMARW txnMem = Vm.getCMARWInstance();
    private final MemoryCMARW txnMetaMem = Vm.getCMARWInstance();
    private final MemoryCMARW txnMetaMemIndex = Vm.getCMARWInstance();

    TableTransactionLog(FilesFacade ff) {
        this.ff = ff;
    }

    @Override
    public void close() {
        if (txnMem.isOpen()) {
            long maxTxnInFile = txnMem.getLong(MAX_TXN_OFFSET);
            if (maxTxnInFile != maxTxn.get()) {
                LOG.error().$("Max txn in the file ").$(maxTxnInFile).$(" but in memory is ").$(maxTxn.get()).$();
            }
            txnMem.close(false);
            txnMetaMem.close(false);
            txnMetaMemIndex.close(false);
        }
    }

    public boolean reload(Path path) {
        if (txnMem.isOpen()) {
            long maxTxnInFile = txnMem.getLong(MAX_TXN_OFFSET);
            if (maxTxnInFile == maxTxn.get()) {
                return false;
            }
            txnMem.close(false);
            txnMetaMem.close(false);
            txnMetaMemIndex.close(false);
        }
        open(path);
        return true;
    }

    public void sync() {
        txnMetaMemIndex.sync(false);
        txnMetaMem.sync(false);
        txnMem.sync(false);
    }

    private static int openFileRO(final FilesFacade ff, final Path path, final String fileName) {
        final int rootLen = path.length();
        path.concat(fileName).$();
        try {
            return TableUtils.openRO(ff, path, LOG);
        } finally {
            path.trimTo(rootLen);
        }
    }

    @NotNull
    static TableMetadataChangeLog getTableMetadataChangeLog() {
        TableMetadataChangeLogImpl instance = tlStructChangeCursor.get();
        if (instance == null) {
            tlStructChangeCursor.set(instance = new TableMetadataChangeLogImpl());
        }
        return instance;
    }

    long addEntry(long structureVersion, int walId, int segmentId, int segmentTxn, long timestamp) {
        txnMem.putLong(structureVersion);
        txnMem.putInt(walId);
        txnMem.putInt(segmentId);
        txnMem.putInt(segmentTxn);
        txnMem.putLong(timestamp);

        Unsafe.getUnsafe().storeFence();
        long maxTxn = this.maxTxn.incrementAndGet();
        txnMem.putLong(MAX_TXN_OFFSET, maxTxn);
        txnMem.sync(false);
        // Transactions are 1 based here
        return maxTxn;
    }

    void beginMetadataChangeEntry(long newStructureVersion, MemorySerializer serializer, Object instance, long timestamp) {
        assert newStructureVersion == txnMetaMemIndex.getAppendOffset() / Long.BYTES;

        txnMem.putLong(newStructureVersion);
        txnMem.putInt(STRUCTURAL_CHANGE_WAL_ID);
        txnMem.putInt(-1);
        txnMem.putInt(-1);
        txnMem.putLong(timestamp);

        txnMetaMem.putInt(0);
        long varMemBegin = txnMetaMem.getAppendOffset();
        serializer.toSink(instance, txnMetaMem);
        int len = (int) (txnMetaMem.getAppendOffset() - varMemBegin);
        txnMetaMem.putInt(varMemBegin - Integer.BYTES, len);
        txnMetaMemIndex.putLong(varMemBegin + len);
    }

    long endMetadataChangeEntry() {
        sync();

        Unsafe.getUnsafe().storeFence();

        // Transactions are 1 based here
        long nextTxn = maxTxn.incrementAndGet();
        txnMem.putLong(MAX_TXN_OFFSET, nextTxn);

        return nextTxn;
    }

    TransactionLogCursor getCursor(long txnLo) {
        final Path path = Path.PATH.get().of(rootPath);
        TransactionLogCursorImpl cursor = tlTransactionLogCursor.get();
        if (cursor == null) {
            cursor = new TransactionLogCursorImpl(ff, txnLo, path);
            tlTransactionLogCursor.set(cursor);
            return cursor;
        }
        return cursor.of(ff, txnLo, path);
    }

    @NotNull
    TableMetadataChangeLog getTableMetadataChangeLog(TableToken tableToken, long structureVersionLo, MemorySerializer serializer) {
        final TableMetadataChangeLogImpl cursor = (TableMetadataChangeLogImpl) getTableMetadataChangeLog();
        cursor.of(ff, tableToken, structureVersionLo, serializer, Path.getThreadLocal(rootPath));
        return cursor;
    }

    long lastTxn() {
        return maxTxn.get();
    }

    void open(Path path) {
        this.rootPath.clear();
        path.toSink(this.rootPath);

        final int pathLength = path.length();
        openSmallFile(ff, path, pathLength, txnMem, TXNLOG_FILE_NAME, MemoryTag.MMAP_TX_LOG);
        openSmallFile(ff, path, pathLength, txnMetaMem, TXNLOG_FILE_NAME_META_VAR, MemoryTag.MMAP_TX_LOG);
        openSmallFile(ff, path, pathLength, txnMetaMemIndex, TXNLOG_FILE_NAME_META_INX, MemoryTag.MMAP_TX_LOG);

        long lastTxn = txnMem.getLong(MAX_TXN_OFFSET);
        maxTxn.set(lastTxn);

        if (lastTxn == 0) {
            txnMem.jumpTo(0L);
            txnMem.putInt(WAL_FORMAT_VERSION);
            txnMem.putLong(0L);
            txnMem.putLong(0L);
            txnMem.jumpTo(HEADER_SIZE);

            txnMetaMemIndex.jumpTo(0L);
            txnMetaMemIndex.putLong(0L); // N + 1, first entry is 0.
            txnMetaMem.jumpTo(0L);
        } else {
            long maxStructureVersion = txnMem.getLong(HEADER_SIZE + (lastTxn - 1) * RECORD_SIZE + TX_LOG_STRUCTURE_VERSION_OFFSET);
            txnMem.jumpTo(HEADER_SIZE + lastTxn * RECORD_SIZE);
            long structureAppendOffset = maxStructureVersion * Long.BYTES;
            long txnMetaMemSize = txnMetaMemIndex.getLong(structureAppendOffset);
            txnMetaMemIndex.jumpTo(structureAppendOffset + Long.BYTES);
            txnMetaMem.jumpTo(txnMetaMemSize);
        }
    }

    AlterOperation readTableMetadataChangeLog(long structureVersion, MemorySerializer serializer) {
        long txnMetaOffset = txnMetaMemIndex.getLong(structureVersion * Long.BYTES);
        int recordSize = txnMetaMem.getInt(txnMetaOffset);
        if (recordSize < 0 || recordSize > Files.PAGE_SIZE) {
            throw CairoException.critical(0).put("invalid sequencer txn metadata [offset=").put(txnMetaOffset).put(", recordSize=").put(recordSize).put(']');
        }
        txnMetaOffset += Integer.BYTES;
        AlterOperation alterToDeserializeTo = tlAlterOperation.get();
        if (alterToDeserializeTo == null) {
            tlAlterOperation.set(alterToDeserializeTo = new AlterOperation());
        }
        serializer.fromSink(alterToDeserializeTo, txnMetaMem, txnMetaOffset, txnMetaOffset + recordSize);
        txnMetaMem.jumpTo(txnMetaOffset + recordSize);
        return alterToDeserializeTo;
    }

    private static class TableMetadataChangeLogImpl implements TableMetadataChangeLog {
        private final AlterOperation alterOp = new AlterOperation();
        private final MemoryFCRImpl txnMetaMem = new MemoryFCRImpl();
        private FilesFacade ff;
        private MemorySerializer serializer;
        private TableToken tableToken;
        private long txnMetaAddress;
        private long txnMetaOffset;
        private long txnMetaOffsetHi;

        @Override
        public void close() {
            if (txnMetaAddress > 0) {
                ff.munmap(txnMetaAddress, txnMetaOffsetHi, MemoryTag.MMAP_TX_LOG_CURSOR);
                txnMetaAddress = 0;
            }
            txnMetaOffset = 0;
            txnMetaOffsetHi = 0;
        }

        @Override
        public TableToken getTableToken() {
            return tableToken;
        }

        @Override
        public boolean hasNext() {
            return txnMetaOffset < txnMetaOffsetHi;
        }

        @Override
        public TableMetadataChange next() {
            int recordSize = txnMetaMem.getInt(txnMetaOffset);
            if (recordSize < 0 || recordSize > Files.PAGE_SIZE) {
                throw CairoException.critical(0).put("invalid sequencer txn metadata [offset=").put(txnMetaOffset).put(", recordSize=").put(recordSize).put(']');
            }
            txnMetaOffset += Integer.BYTES;
            serializer.fromSink(alterOp, txnMetaMem, txnMetaOffset, txnMetaOffset + recordSize);
            txnMetaOffset += recordSize;
            return alterOp;
        }

        public void of(
                FilesFacade ff,
                TableToken tableToken,
                long structureVersionLo,
                MemorySerializer serializer,
                @Transient final Path path
        ) {
            this.tableToken = tableToken;

            // deallocates current state
            close();

            this.ff = ff;
            this.serializer = serializer;

            int txnFd = -1;
            int txnMetaFd = -1;
            int txnMetaIndexFd = -1;
            try {
                txnFd = openFileRO(ff, path, TXNLOG_FILE_NAME);
                txnMetaFd = openFileRO(ff, path, TXNLOG_FILE_NAME_META_VAR);
                txnMetaIndexFd = openFileRO(ff, path, TXNLOG_FILE_NAME_META_INX);
                long txnCount = ff.readNonNegativeLong(txnFd, MAX_TXN_OFFSET);
                if (txnCount > -1L) {

                    long maxStructureVersion = ff.readNonNegativeLong(txnFd, HEADER_SIZE + (txnCount - 1) * RECORD_SIZE + TX_LOG_STRUCTURE_VERSION_OFFSET);
                    if (maxStructureVersion > structureVersionLo) {
                        txnMetaOffset = ff.readNonNegativeLong(txnMetaIndexFd, structureVersionLo * Long.BYTES);
                        if (txnMetaOffset > -1L) {
                            txnMetaOffsetHi = ff.readNonNegativeLong(txnMetaIndexFd, maxStructureVersion * Long.BYTES);

                            if (txnMetaOffsetHi > txnMetaOffset) {
                                txnMetaAddress = ff.mmap(
                                        txnMetaFd,
                                        txnMetaOffsetHi,
                                        0L,
                                        Files.MAP_RO,
                                        MemoryTag.MMAP_TX_LOG_CURSOR
                                );
                                if (txnMetaAddress < 0) {
                                    txnMetaAddress = 0;
                                    close();
                                } else {
                                    txnMetaMem.of(txnMetaAddress, txnMetaOffsetHi);
                                    return;
                                }
                            }
                        }
                    } else {
                        // Set empty. This is not an error, it just means that there are no changes.
                        txnMetaOffset = txnMetaOffsetHi = 0;
                        return;
                    }
                }

                throw CairoException.critical(0).put("expected to read table structure changes but there is no saved in the sequencer [structureVersionLo=").put(structureVersionLo).put(']');
            } finally {
                ff.close(txnFd);
                ff.close(txnMetaFd);
                ff.close(txnMetaIndexFd);
            }
        }
    }

    private static class TransactionLogCursorImpl implements TransactionLogCursor {
        private long address;
        private int fd;
        private FilesFacade ff;
        private long txn;
        private long txnCount;
        private long txnLo;
        private long txnOffset;

        public TransactionLogCursorImpl(FilesFacade ff, long txnLo, final Path path) {
            of(ff, txnLo, path);
        }

        @Override
        public void close() {
            ff.close(fd);
            ff.munmap(address, getMappedLen(), MemoryTag.MMAP_TX_LOG_CURSOR);
        }

        @Override
        public long getCommitTimestamp() {
            return Unsafe.getUnsafe().getLong(address + txnOffset + TX_LOG_COMMIT_TIMESTAMP_OFFSET);
        }

        @Override
        public int getSegmentId() {
            return Unsafe.getUnsafe().getInt(address + txnOffset + TX_LOG_SEGMENT_OFFSET);
        }

        @Override
        public int getSegmentTxn() {
            return Unsafe.getUnsafe().getInt(address + txnOffset + TX_LOG_SEGMENT_TXN_OFFSET);
        }

        @Override
        public long getStructureVersion() {
            return Unsafe.getUnsafe().getLong(address + txnOffset + TX_LOG_STRUCTURE_VERSION_OFFSET);
        }

        @Override
        public long getTxn() {
            return txn;
        }

        @Override
        public int getWalId() {
            return Unsafe.getUnsafe().getInt(address + txnOffset + TX_LOG_WAL_ID_OFFSET);
        }

        @Override
        public boolean hasNext() {
            if (hasNext(getMappedLen())) {
                return true;
            }

            final long newTxnCount = ff.readNonNegativeLong(fd, MAX_TXN_OFFSET);
            if (newTxnCount > txnCount) {
                remap(newTxnCount);
                return hasNext(getMappedLen());
            }
            return false;
        }

        @Override
        public boolean setPosition() {
            final long newTxnCount = ff.readNonNegativeLong(fd, MAX_TXN_OFFSET);
            if (newTxnCount > txnCount) {
                remap(newTxnCount);

                this.txnLo = txn - 1;
                this.txnOffset -= RECORD_SIZE;
                return true;
            }
            return false;
        }

        @Override
        public void setPosition(long txn) {
            this.txnOffset = HEADER_SIZE + (txn - 1) * RECORD_SIZE;
            this.txn = txn;
        }

        @Override
        public void toTop() {
            if (txnCount > -1L) {
                this.txnOffset = HEADER_SIZE + (txnLo - 1) * RECORD_SIZE;
                this.txn = txnLo;
            }
        }

        private long getMappedLen() {
            return txnCount * RECORD_SIZE + HEADER_SIZE;
        }

        private boolean hasNext(long mappedLen) {
            if (txnOffset + 2 * RECORD_SIZE <= mappedLen) {
                txnOffset += RECORD_SIZE;
                txn++;
                return true;
            }
            return false;
        }

        @NotNull
        private TransactionLogCursorImpl of(FilesFacade ff, long txnLo, Path path) {
            this.ff = ff;
            this.fd = openFileRO(ff, path, TXNLOG_FILE_NAME);
            this.txnCount = ff.readNonNegativeLong(fd, MAX_TXN_OFFSET);
            if (txnCount > -1L) {
                this.address = ff.mmap(fd, getMappedLen(), 0, Files.MAP_RO, MemoryTag.MMAP_TX_LOG_CURSOR);
                this.txnOffset = HEADER_SIZE + (txnLo - 1) * RECORD_SIZE;
            }
            this.txnLo = txnLo;
            txn = txnLo;
            return this;
        }

        private void remap(long newTxnCount) {
            final long oldSize = getMappedLen();
            txnCount = newTxnCount;
            final long newSize = getMappedLen();
            address = ff.mremap(fd, address, oldSize, newSize, 0, Files.MAP_RO, MemoryTag.MMAP_TX_LOG_CURSOR);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy