All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.cutlass.text.CopyTask Maven / Gradle / Ivy

/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2023 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.cutlass.text;

import io.questdb.cairo.*;
import io.questdb.cairo.security.AllowAllSecurityContext;
import io.questdb.cairo.sql.ExecutionCircuitBreaker;
import io.questdb.cairo.sql.RecordMetadata;
import io.questdb.cairo.vm.Vm;
import io.questdb.cairo.vm.api.MemoryCMARW;
import io.questdb.cutlass.text.types.TimestampAdapter;
import io.questdb.cutlass.text.types.TypeAdapter;
import io.questdb.griffin.engine.functions.columns.ColumnUtils;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.*;
import io.questdb.std.str.*;
import org.jetbrains.annotations.Nullable;

import static io.questdb.cairo.TableUtils.TXN_FILE_NAME;
import static io.questdb.cutlass.text.ParallelCsvFileImporter.createTable;

public class CopyTask {

    public static final byte NO_PHASE = -1;
    public static final byte PHASE_ANALYZE_FILE_STRUCTURE = 9;
    public static final byte PHASE_ATTACH_PARTITIONS = 8;
    public static final byte PHASE_BOUNDARY_CHECK = 1;
    public static final byte PHASE_BUILD_SYMBOL_INDEX = 6;
    public static final byte PHASE_CLEANUP = 10;
    public static final byte PHASE_INDEXING = 2;
    public static final byte PHASE_MOVE_PARTITIONS = 7;
    public static final byte PHASE_PARTITION_IMPORT = 3;
    public static final byte PHASE_SETUP = 0;
    public static final byte PHASE_SYMBOL_TABLE_MERGE = 4;
    public static final byte PHASE_UPDATE_SYMBOL_KEYS = 5;
    public static final byte STATUS_CANCELLED = 3;
    public static final byte STATUS_FAILED = 2;
    public static final byte STATUS_FINISHED = 1;
    public static final byte STATUS_STARTED = 0;
    private static final Log LOG = LogFactory.getLog(CopyTask.class);
    private static final IntObjHashMap PHASE_NAME_MAP = new IntObjHashMap<>();
    private static final IntObjHashMap STATUS_NAME_MAP = new IntObjHashMap<>();

    private final PhaseBoundaryCheck phaseBoundaryCheck = new PhaseBoundaryCheck();
    private final PhaseBuildSymbolIndex phaseBuildSymbolIndex = new PhaseBuildSymbolIndex();
    private final PhaseIndexing phaseIndexing = new PhaseIndexing();
    private final PhasePartitionImport phasePartitionImport = new PhasePartitionImport();
    private final PhaseSymbolTableMerge phaseSymbolTableMerge = new PhaseSymbolTableMerge();
    private final PhaseUpdateSymbolKeys phaseUpdateSymbolKeys = new PhaseUpdateSymbolKeys();
    private int chunkIndex;
    private @Nullable ExecutionCircuitBreaker circuitBreaker;
    private @Nullable CharSequence errorMessage;
    private byte phase;
    private byte status;

    public static String getPhaseName(byte phase) {
        return PHASE_NAME_MAP.get(phase);
    }

    public static String getStatusName(byte status) {
        return STATUS_NAME_MAP.get(status);
    }

    public void clear() {
        if (phase == PHASE_BOUNDARY_CHECK) {
            phaseBoundaryCheck.clear();
        } else if (phase == PHASE_INDEXING) {
            phaseIndexing.clear();
        } else if (phase == PHASE_PARTITION_IMPORT) {
            phasePartitionImport.clear();
        } else if (phase == PHASE_SYMBOL_TABLE_MERGE) {
            phaseSymbolTableMerge.clear();
        } else if (phase == PHASE_UPDATE_SYMBOL_KEYS) {
            phaseUpdateSymbolKeys.clear();
        } else if (phase == PHASE_BUILD_SYMBOL_INDEX) {
            phaseBuildSymbolIndex.clear();
        } else {
            throw TextException.$("Unexpected phase ").put(phase);
        }
    }

    public PhaseIndexing getBuildPartitionIndexPhase() {
        return phaseIndexing;
    }

    public int getChunkIndex() {
        return chunkIndex;
    }

    public PhaseBoundaryCheck getCountQuotesPhase() {
        return phaseBoundaryCheck;
    }

    public @Nullable CharSequence getErrorMessage() {
        return errorMessage;
    }

    public PhasePartitionImport getImportPartitionDataPhase() {
        return phasePartitionImport;
    }

    public byte getPhase() {
        return phase;
    }

    public byte getStatus() {
        return status;
    }

    public boolean isCancelled() {
        return this.status == STATUS_CANCELLED;
    }

    public boolean isFailed() {
        return this.status == STATUS_FAILED;
    }

    public void ofPhaseBoundaryCheck(final FilesFacade ff, Path path, long chunkStart, long chunkEnd) {
        this.phase = PHASE_BOUNDARY_CHECK;
        this.phaseBoundaryCheck.of(ff, path, chunkStart, chunkEnd);
    }

    public void ofPhaseBuildSymbolIndex(
            CairoEngine cairoEngine,
            TableStructure tableStructure,
            CharSequence root,
            int index,
            RecordMetadata metadata
    ) {
        this.phase = PHASE_BUILD_SYMBOL_INDEX;
        this.phaseBuildSymbolIndex.of(cairoEngine, tableStructure, root, index, metadata);
    }

    public void ofPhaseIndexing(
            long chunkStart,
            long chunkEnd,
            long lineNumber,
            int index,
            CharSequence inputFileName,
            CharSequence importRoot,
            int partitionBy,
            byte columnDelimiter,
            int timestampIndex,
            TimestampAdapter adapter,
            boolean ignoreHeader,
            int atomicity
    ) {
        this.phase = PHASE_INDEXING;
        this.phaseIndexing.of(
                chunkStart,
                chunkEnd,
                lineNumber,
                index,
                inputFileName,
                importRoot,
                partitionBy,
                columnDelimiter,
                timestampIndex,
                adapter,
                ignoreHeader,
                atomicity
        );
    }

    public void ofPhaseSymbolTableMerge(
            CairoConfiguration cfg,
            CharSequence importRoot,
            TableWriter writer,
            TableToken tableToken,
            CharSequence column,
            int columnIndex,
            int symbolColumnIndex,
            int tmpTableCount,
            int partitionBy
    ) {
        this.phase = PHASE_SYMBOL_TABLE_MERGE;
        this.phaseSymbolTableMerge.of(
                cfg,
                importRoot,
                writer,
                tableToken,
                column,
                columnIndex,
                symbolColumnIndex,
                tmpTableCount,
                partitionBy
        );
    }

    public void ofPhaseUpdateSymbolKeys(
            CairoEngine cairoEngine,
            TableStructure tableStructure,
            int index,
            long partitionSize,
            long partitionTimestamp,
            CharSequence root,
            CharSequence columnName,
            int symbolCount
    ) {
        this.phase = PHASE_UPDATE_SYMBOL_KEYS;
        this.phaseUpdateSymbolKeys.of(
                cairoEngine,
                tableStructure,
                index,
                partitionSize,
                partitionTimestamp,
                root,
                columnName,
                symbolCount
        );
    }

    public boolean run(
            TextLexerWrapper lf,
            CsvFileIndexer indexer,
            DirectCharSink utf8Sink,
            DirectLongList unmergedIndexes,
            long fileBufAddr,
            long fileBufSize,
            Path p1,
            Path p2
    ) {
        try {
            LOG.debug().$("starting [phase=").$(getPhaseName(phase)).$(",index=").$(chunkIndex).I$();

            this.status = STATUS_STARTED;
            this.errorMessage = null;

            throwIfCancelled();

            if (phase == PHASE_BOUNDARY_CHECK) {
                phaseBoundaryCheck.run(fileBufAddr, fileBufSize);
            } else if (phase == PHASE_INDEXING) {
                phaseIndexing.run(indexer, fileBufAddr, fileBufSize);
            } else if (phase == PHASE_PARTITION_IMPORT) {
                phasePartitionImport.run(lf, fileBufAddr, fileBufSize, utf8Sink, unmergedIndexes, p1, p2);
            } else if (phase == PHASE_SYMBOL_TABLE_MERGE) {
                phaseSymbolTableMerge.run(p1);
            } else if (phase == PHASE_UPDATE_SYMBOL_KEYS) {
                phaseUpdateSymbolKeys.run(p1);
            } else if (phase == PHASE_BUILD_SYMBOL_INDEX) {
                phaseBuildSymbolIndex.run();
            } else {
                throw TextException.$("Unexpected phase ").put(phase);
            }

            LOG.debug().$("finished [phase=").$(getPhaseName(phase)).$(",index=").$(chunkIndex).I$();
        } catch (TextImportException e) {
            this.status = STATUS_CANCELLED;
            this.errorMessage = e.getMessage();
            LOG.error().$("Import cancelled [phase=").$(getPhaseName(e.getPhase())).I$();
            return false;
        } catch (Throwable t) {
            LOG.error()
                    .$("could not import [phase=").$(getPhaseName(phase))
                    .$(", ex=").$(t)
                    .I$();
            this.status = STATUS_FAILED;
            this.errorMessage = t.getMessage();
            return false;
        }

        return true;
    }

    public void setChunkIndex(int chunkIndex) {
        this.chunkIndex = chunkIndex;
    }

    public void setCircuitBreaker(@Nullable ExecutionCircuitBreaker circuitBreaker) {
        this.circuitBreaker = circuitBreaker;
    }

    private TextImportException getCancelException() {
        TextImportException ex = TextImportException.instance(this.phase, "Cancelled");
        ex.setCancelled(true);
        return ex;
    }

    private void throwIfCancelled() throws TextImportException {
        if (circuitBreaker != null && circuitBreaker.checkIfTripped()) {
            throw getCancelException();
        }
    }

    void ofPhasePartitionImport(
            CairoEngine cairoEngine,
            TableStructure targetTableStructure,
            ObjList types,
            int atomicity,
            byte columnDelimiter,
            CharSequence importRoot,
            CharSequence inputFileName,
            int index,
            int lo,
            int hi,
            final ObjList partitions
    ) {
        this.phase = PHASE_PARTITION_IMPORT;
        this.phasePartitionImport.of(
                cairoEngine,
                targetTableStructure,
                types,
                atomicity,
                columnDelimiter,
                importRoot,
                inputFileName,
                index,
                lo,
                hi,
                partitions
        );
    }

    public static class PhaseBoundaryCheck {
        private long chunkEnd;
        private long chunkStart;
        private FilesFacade ff;
        private long newLineCountEven;
        private long newLineCountOdd;
        private long newLineOffsetEven;
        private long newLineOffsetOdd;
        private Path path;
        private long quoteCount;

        public void clear() {
            this.ff = null;
            this.path = null;
            this.chunkStart = -1;
            this.chunkEnd = -1;
        }

        public long getNewLineCountEven() {
            return newLineCountEven;
        }

        public long getNewLineCountOdd() {
            return newLineCountOdd;
        }

        public long getNewLineOffsetEven() {
            return newLineOffsetEven;
        }

        public long getNewLineOffsetOdd() {
            return newLineOffsetOdd;
        }

        public long getQuoteCount() {
            return quoteCount;
        }

        public void of(final FilesFacade ff, Path path, long chunkStart, long chunkEnd) {
            assert ff != null;
            assert path != null;
            assert chunkStart >= 0 && chunkEnd > chunkStart;

            this.ff = ff;
            this.path = path;
            this.chunkStart = chunkStart;
            this.chunkEnd = chunkEnd;
        }

        public void run(long fileBufPtr, long fileBufSize) throws TextException {
            long offset = chunkStart;

            //output vars
            long quotes = 0;
            long[] nlCount = new long[2];
            long[] nlFirst = new long[]{-1, -1};

            long read;
            long ptr;
            long hi;

            int fd = TableUtils.openRO(ff, path, LOG);
            ff.fadvise(fd, chunkStart, chunkEnd - chunkStart, Files.POSIX_FADV_SEQUENTIAL);
            try {
                do {
                    long leftToRead = Math.min(chunkEnd - offset, fileBufSize);
                    read = (int) ff.read(fd, fileBufPtr, leftToRead, offset);
                    if (read < 1) {
                        break;
                    }
                    hi = fileBufPtr + read;
                    ptr = fileBufPtr;

                    while (ptr < hi) {
                        final byte c = Unsafe.getUnsafe().getByte(ptr++);
                        if (c == '"') {
                            quotes++;
                        } else if (c == '\n') {
                            nlCount[(int) (quotes & 1)]++;
                            if (nlFirst[(int) (quotes & 1)] == -1) {
                                nlFirst[(int) (quotes & 1)] = offset + (ptr - fileBufPtr);
                            }
                        }
                    }

                    offset += read;
                } while (offset < chunkEnd);

                if (read < 0 || offset < chunkEnd) {
                    throw TextException
                            .$("could not read import file [path='").put(path)
                            .put("', offset=").put(offset)
                            .put(", errno=").put(ff.errno())
                            .put(']');
                }
            } finally {
                ff.close(fd);
            }

            this.quoteCount = quotes;
            this.newLineCountEven = nlCount[0];
            this.newLineCountOdd = nlCount[1];
            this.newLineOffsetEven = nlFirst[0];
            this.newLineOffsetOdd = nlFirst[1];
        }
    }

    public static class PhaseBuildSymbolIndex {
        private final StringSink tableNameSink = new StringSink();
        private CairoEngine cairoEngine;
        private int index;
        private RecordMetadata metadata;
        private CharSequence root;
        private TableStructure tableStructure;

        public void clear() {
            this.cairoEngine = null;
            this.tableStructure = null;
            this.root = null;
            this.index = -1;
            this.metadata = null;
        }

        public void of(
                CairoEngine cairoEngine,
                TableStructure tableStructure,
                CharSequence root,
                int index, RecordMetadata metadata
        ) {
            this.cairoEngine = cairoEngine;
            this.tableStructure = tableStructure;
            this.root = root;
            this.index = index;
            this.metadata = metadata;
        }

        public void run() {
            final CairoConfiguration configuration = cairoEngine.getConfiguration();

            tableNameSink.clear();
            tableNameSink.put(tableStructure.getTableName()).put('_').put(index);
            String tableName = tableNameSink.toString();
            TableToken tableToken = new TableToken(tableName, tableName, (int) cairoEngine.getTableIdGenerator().getNextId(), false, false, false);

            final int columnCount = metadata.getColumnCount();
            try (
                    TableWriter w = new TableWriter(
                            configuration,
                            tableToken,
                            cairoEngine.getMessageBus(),
                            null,
                            true,
                            DefaultLifecycleManager.INSTANCE,
                            root,
                            cairoEngine.getDdlListener(tableToken),
                            cairoEngine.getSnapshotAgent(),
                            cairoEngine.getMetrics()
                    )
            ) {
                for (int i = 0; i < columnCount; i++) {
                    if (metadata.isColumnIndexed(i)) {
                        w.addIndex(metadata.getColumnName(i), metadata.getIndexValueBlockCapacity(i));
                    }
                }
            }
        }
    }

    public static class PhaseSymbolTableMerge {
        private CairoConfiguration cfg;
        private CharSequence column;
        private int columnIndex;
        private CharSequence importRoot;
        private int partitionBy;
        private int symbolColumnIndex;
        private TableToken tableToken;
        private int tmpTableCount;
        private TableWriter writer;

        public void clear() {
            this.cfg = null;
            this.importRoot = null;
            this.writer = null;
            this.tableToken = null;
            this.column = null;
            this.columnIndex = -1;
            this.symbolColumnIndex = -1;
            this.tmpTableCount = -1;
            this.partitionBy = -1;
        }

        public void of(
                CairoConfiguration cfg,
                CharSequence importRoot,
                TableWriter writer,
                TableToken tableToken,
                CharSequence column,
                int columnIndex,
                int symbolColumnIndex,
                int tmpTableCount,
                int partitionBy
        ) {
            this.cfg = cfg;
            this.importRoot = importRoot;
            this.writer = writer;
            this.tableToken = tableToken;
            this.column = column;
            this.columnIndex = columnIndex;
            this.symbolColumnIndex = symbolColumnIndex;
            this.tmpTableCount = tmpTableCount;
            this.partitionBy = partitionBy;
        }

        public void run(Path path) {
            final FilesFacade ff = cfg.getFilesFacade();
            path.of(importRoot).concat(tableToken.getTableName());
            int plen = path.size();
            for (int i = 0; i < tmpTableCount; i++) {
                path.trimTo(plen);
                path.putAscii('_').put(i);
                int tableLen = path.size();
                try (TxReader txFile = new TxReader(ff).ofRO(path.concat(TXN_FILE_NAME).$(), partitionBy)) {
                    path.trimTo(tableLen);
                    txFile.unsafeLoadAll();
                    int symbolCount = txFile.getSymbolValueCount(symbolColumnIndex);
                    try (
                            SymbolMapReaderImpl reader = new SymbolMapReaderImpl(
                                    cfg,
                                    path,
                                    column,
                                    TableUtils.COLUMN_NAME_TXN_NONE, symbolCount
                            );
                            MemoryCMARW mem = Vm.getSmallCMARWInstance(
                                    ff,
                                    path.concat(column).put(TableUtils.SYMBOL_KEY_REMAP_FILE_SUFFIX).$(),
                                    MemoryTag.MMAP_IMPORT,
                                    cfg.getWriterFileOpenOpts()
                            )
                    ) {
                        // It is possible to skip symbol rewrite when symbols do not clash.
                        // From our benchmarks rewriting symbols take a tiny fraction of time compared to everything else
                        // so that we don't need to optimise this yet.
                        SymbolMapWriter.mergeSymbols(writer.getSymbolMapWriter(columnIndex), reader, mem);
                    }
                }
            }
        }
    }

    public static class PhaseUpdateSymbolKeys {
        CharSequence columnName;
        int index;
        long partitionSize;
        long partitionTimestamp;
        CharSequence root;
        int symbolCount;
        private CairoEngine cairoEngine;
        private TableStructure tableStructure;

        public void clear() {
            this.cairoEngine = null;
            this.tableStructure = null;
            this.index = -1;
            this.partitionSize = -1;
            this.partitionTimestamp = -1;
            this.root = null;
            this.columnName = null;
            this.symbolCount = -1;
        }

        public void of(
                CairoEngine cairoEngine,
                TableStructure tableStructure,
                int index,
                long partitionSize,
                long partitionTimestamp,
                CharSequence root,
                CharSequence columnName,
                int symbolCount
        ) {
            this.cairoEngine = cairoEngine;
            this.tableStructure = tableStructure;
            this.index = index;
            this.partitionSize = partitionSize;
            this.partitionTimestamp = partitionTimestamp;
            this.root = root;
            this.columnName = columnName;
            this.symbolCount = symbolCount;
        }

        public void run(Path path) {
            final FilesFacade ff = cairoEngine.getConfiguration().getFilesFacade();

            TableToken tableToken = cairoEngine.verifyTableName(tableStructure.getTableName());
            path.of(root).concat(tableToken.getTableName()).put('_').put(index);
            int plen = path.size();
            TableUtils.setPathForPartition(path.slash(), tableStructure.getPartitionBy(), partitionTimestamp, -1);
            path.concat(columnName).put(TableUtils.FILE_SUFFIX_D);

            long columnMemory = 0;
            long columnMemorySize = 0;
            long remapTableMemory = 0;
            long remapTableMemorySize = 0;
            int columnFd = -1;
            int remapFd = -1;
            try {
                columnFd = TableUtils.openFileRWOrFail(ff, path.$(), CairoConfiguration.O_NONE);
                columnMemorySize = ff.length(columnFd);

                path.trimTo(plen);
                path.concat(columnName).put(TableUtils.SYMBOL_KEY_REMAP_FILE_SUFFIX);
                remapFd = TableUtils.openFileRWOrFail(ff, path.$(), CairoConfiguration.O_NONE);
                remapTableMemorySize = ff.length(remapFd);

                if (columnMemorySize >= Integer.BYTES && remapTableMemorySize >= Integer.BYTES) {
                    columnMemory = TableUtils.mapRW(ff, columnFd, columnMemorySize, MemoryTag.MMAP_IMPORT);
                    remapTableMemory = TableUtils.mapRW(ff, remapFd, remapTableMemorySize, MemoryTag.MMAP_IMPORT);
                    long columnMemSize = partitionSize * Integer.BYTES;
                    long remapMemSize = (long) symbolCount * Integer.BYTES;
                    ColumnUtils.symbolColumnUpdateKeys(columnMemory, columnMemSize, remapTableMemory, remapMemSize);
                }
            } finally {
                ff.close(columnFd);
                ff.close(remapFd);
                if (columnMemory > 0) {
                    ff.munmap(columnMemory, columnMemorySize, MemoryTag.MMAP_IMPORT);
                }
                if (remapTableMemory > 0) {
                    ff.munmap(remapTableMemory, remapTableMemorySize, MemoryTag.MMAP_IMPORT);
                }
            }
        }
    }

    public class PhaseIndexing {
        //stores partition key and size for all indexed partitions
        private final LongList partitionKeysAndSizes = new LongList();
        private TimestampAdapter adapter;
        private int atomicity;
        private long chunkEnd;
        private long chunkStart;
        private byte columnDelimiter;
        private long errorCount;
        private boolean ignoreHeader;
        private CharSequence importRoot;
        private int index;
        private CharSequence inputFileName;
        private long lineCount;
        private long lineNumber;
        private int partitionBy;
        private int timestampIndex;

        public void clear() {
            this.chunkStart = -1;
            this.chunkEnd = -1;
            this.lineNumber = -1;
            this.lineCount = 0;
            this.errorCount = 0;

            this.index = -1;
            this.inputFileName = null;
            this.importRoot = null;
            this.partitionBy = -1;
            this.columnDelimiter = (byte) -1;
            this.timestampIndex = -1;
            this.adapter = null;
            this.ignoreHeader = false;
            this.atomicity = -1;
        }

        public long getErrorCount() {
            return errorCount;
        }

        public long getLineCount() {
            return lineCount;
        }

        public LongList getPartitionKeysAndSizes() {
            return partitionKeysAndSizes;
        }

        public void of(
                long chunkStart,
                long chunkEnd,
                long lineNumber,
                int index,
                CharSequence inputFileName,
                CharSequence importRoot,
                int partitionBy,
                byte columnDelimiter,
                int timestampIndex,
                TimestampAdapter adapter,
                boolean ignoreHeader,
                int atomicity
        ) {
            assert chunkStart >= 0 && chunkEnd > chunkStart;
            assert lineNumber >= 0;

            this.chunkStart = chunkStart;
            this.chunkEnd = chunkEnd;
            this.lineNumber = lineNumber;

            this.index = index;
            this.inputFileName = inputFileName;
            this.importRoot = importRoot;
            this.partitionBy = partitionBy;
            this.columnDelimiter = columnDelimiter;
            this.timestampIndex = timestampIndex;
            this.adapter = adapter;
            this.ignoreHeader = ignoreHeader;
            this.atomicity = atomicity;
        }

        public void run(CsvFileIndexer indexer, long fileBufAddr, long fileBufSize) throws TextException {
            try {
                indexer.of(
                        inputFileName,
                        importRoot,
                        index,
                        partitionBy,
                        columnDelimiter,
                        timestampIndex,
                        adapter,
                        ignoreHeader,
                        atomicity,
                        circuitBreaker
                );
                indexer.index(chunkStart, chunkEnd, lineNumber, partitionKeysAndSizes, fileBufAddr, fileBufSize);
                lineCount = indexer.getLineCount();
                errorCount = indexer.getErrorCount();
            } catch (TextException e) {
                if (indexer.isCancelled()) {
                    throw getCancelException();
                } else {
                    throw e;
                }
            } finally {
                indexer.clear();
            }
        }
    }

    public class PhasePartitionImport {
        private final LongList importedRows = new LongList();
        private final LongList offsets = new LongList();
        private final StringSink tableNameSink = new StringSink();
        private int atomicity;
        private byte columnDelimiter;
        private CairoEngine engine;
        private long errors;
        private int hi;
        private CharSequence importRoot;
        private int index;
        private CharSequence inputFileName;
        private int lo;
        private long offset;
        private ObjList partitions;
        private long rowsHandled;
        private long rowsImported;
        private TableWriter tableWriterRef;
        private TableStructure targetTableStructure;
        private TimestampAdapter timestampAdapter;
        private int timestampIndex;
        private ObjList types;
        private DirectCharSink utf8Sink;
        private final CsvTextLexer.Listener onFieldsPartitioned = this::onFieldsPartitioned;

        public void clear() {
            this.engine = null;
            this.targetTableStructure = null;
            this.types = null;
            this.atomicity = -1;
            this.columnDelimiter = (byte) -1;
            this.importRoot = null;
            this.inputFileName = null;
            this.index = -1;
            this.partitions = null;
            this.timestampIndex = -1;
            this.timestampAdapter = null;

            this.offset = 0;
            this.importedRows.clear();
            this.tableNameSink.clear();
            this.rowsHandled = 0;
            this.rowsImported = 0;
            this.errors = 0;

            this.utf8Sink = null;
        }

        public long getErrors() {
            return errors;
        }

        public LongList getImportedRows() {
            return importedRows;
        }

        public long getRowsHandled() {
            return rowsHandled;
        }

        public long getRowsImported() {
            return rowsImported;
        }

        public void run(
                TextLexerWrapper lf,
                long fileBufAddr,
                long fileBufSize,
                DirectCharSink utf8Sink,
                DirectLongList unmergedIndexes,
                Path path,
                Path tmpPath
        ) throws TextException {

            this.utf8Sink = utf8Sink;

            final CairoConfiguration configuration = engine.getConfiguration();
            final FilesFacade ff = configuration.getFilesFacade();

            tableNameSink.clear();
            tableNameSink.put(targetTableStructure.getTableName()).put('_').put(index);
            String publicTableName = tableNameSink.toString();
            TableToken tableToken = new TableToken(publicTableName, publicTableName, (int) engine.getTableIdGenerator().getNextId(), false, false, false);
            createTable(ff, configuration.getMkDirMode(), importRoot, tableToken.getDirName(), publicTableName, targetTableStructure, 0, AllowAllSecurityContext.INSTANCE);

            try (
                    TableWriter writer = new TableWriter(
                            configuration,
                            tableToken,
                            engine.getMessageBus(),
                            null,
                            true,
                            DefaultLifecycleManager.INSTANCE,
                            importRoot,
                            engine.getDdlListener(tableToken),
                            engine.getSnapshotAgent(),
                            engine.getMetrics()
                    )
            ) {
                tableWriterRef = writer;
                AbstractTextLexer lexer = lf.getLexer(columnDelimiter);
                lexer.setTableName(tableNameSink);
                lexer.setSkipLinesWithExtraValues(false);

                long prevErrors;
                try {
                    for (int i = lo; i < hi; i++) {
                        throwIfCancelled();

                        lexer.clear();
                        prevErrors = errors;

                        final CharSequence name = partitions.getQuick(i).name;
                        path.of(importRoot).concat(name);
                        mergePartitionIndexAndImportData(
                                ff,
                                configuration.getIOURingFacade(),
                                configuration.isIOURingEnabled(),
                                path,
                                lexer,
                                fileBufAddr,
                                fileBufSize,
                                utf8Sink,
                                unmergedIndexes,
                                tmpPath
                        );

                        long newErrors = errors - prevErrors;
                        long imported = atomicity == Atomicity.SKIP_ROW ? lexer.getLineCount() - newErrors : lexer.getLineCount();
                        importedRows.add(i);
                        importedRows.add(imported);
                        rowsHandled += lexer.getLineCount();
                        rowsImported += imported;

                        LOG.info()
                                .$("imported data [temp_table=").$(tableNameSink)
                                .$(", partition=").$(name)
                                .$(", lines=").$(lexer.getLineCount())
                                .$(", errors=").$(newErrors)
                                .I$();
                    }
                } finally {
                    writer.commit();
                }
            }
        }

        private void consumeIOURing(
                FilesFacade ff,
                long sqeMin,
                AbstractTextLexer lexer,
                long fileBufAddr,
                LongList offsets,
                IOURing ring,
                int cc,
                Path tmpPath
        ) {
            int submitted = ring.submit();
            assert submitted == cc;

            long nextCqe = sqeMin;
            int writtenMax = 0;
            // consume submitted tasks
            for (int i = 0; i < submitted; i++) {

                while (!ring.nextCqe()) {
                    Os.pause();
                }

                if (ring.getCqeRes() < 0) {
                    throw TextException
                            .$("could not read from file [path='").put(tmpPath)
                            .put("', errno=").put(ff.errno())
                            .put(", offset=").put(offset)
                            .put("]");
                }

                if (ring.getCqeId() == nextCqe) {
                    // only parse lines in order of submissions
                    nextCqe++;
                    parseLinesAndWrite(lexer, fileBufAddr, offsets, writtenMax);
                    writtenMax++;
                }
            }

            // if reads came out of order, the writtenMax should be less than submitted
            for (int i = writtenMax; i < submitted; i++) {
                parseLinesAndWrite(lexer, fileBufAddr, offsets, i);
            }
        }

        private TableWriter.Row getRow(DirectUtf8Sequence dus, long offset) {
            final long timestamp;
            try {
                timestamp = timestampAdapter.getTimestamp(dus);
            } catch (Throwable e) {
                if (atomicity == Atomicity.SKIP_ALL) {
                    throw TextException.$("could not parse timestamp [offset=").put(offset).put(", msg=").put(e.getMessage()).put(']');
                } else {
                    logError(offset, timestampIndex, dus);
                    return null;
                }
            }
            return tableWriterRef.newRow(timestamp);
        }

        private void importPartitionData(
                final IOURingFacade rf,
                final boolean ioURingEnabled,
                final AbstractTextLexer lexer,
                long address,
                long size,
                long fileBufAddr,
                long fileBufSize,
                DirectCharSink utf8Sink,
                Path tmpPath
        ) throws TextException {
            if (ioURingEnabled && rf.isAvailable()) {
                importPartitionDataURing(
                        rf,
                        lexer,
                        address,
                        size,
                        fileBufAddr,
                        fileBufSize,
                        utf8Sink,
                        tmpPath
                );
            } else {
                importPartitionDataVanilla(
                        lexer,
                        address,
                        size,
                        fileBufAddr,
                        fileBufSize,
                        utf8Sink,
                        tmpPath
                );
            }
        }

        private void importPartitionDataURing(
                final IOURingFacade rf,
                AbstractTextLexer lexer,
                long address,
                long size,
                long fileBufAddr,
                long fileBufSize,
                DirectCharSink utf8Sink,
                Path tmpPath
        ) {
            final CairoConfiguration configuration = engine.getConfiguration();
            final FilesFacade ff = configuration.getFilesFacade();

            offsets.clear();
            lexer.setupBeforeExactLines(onFieldsPartitioned);

            int fd = -1;
            try {
                tmpPath.of(configuration.getSqlCopyInputRoot()).concat(inputFileName).$();
                utf8Sink.clear();
                fd = TableUtils.openRO(ff, tmpPath, LOG);

                final long len = ff.length(fd);
                if (len == -1) {
                    throw CairoException.critical(ff.errno())
                            .put("could not get length of file [path=").put(tmpPath)
                            .put(']');
                }

                ff.fadvise(fd, 0, len, Files.POSIX_FADV_RANDOM);

                final long MASK = ~((255L) << 56 | (255L) << 48);
                final long count = size / (2 * Long.BYTES);

                int ringCapacity = 32;
                long sqeMin = 0;
                long sqeMax = -1;
                try (IOURing ring = rf.newInstance(ringCapacity)) {
                    long addr = fileBufAddr;
                    long lim = fileBufAddr + fileBufSize;
                    int cc = 0;
                    int bytesToRead;
                    int additionalLines;
                    for (long i = 0; i < count; i++) {
                        throwIfCancelled();

                        final long lengthAndOffset = Unsafe.getUnsafe().getLong(address + i * 2L * Long.BYTES + Long.BYTES);
                        final int lineLength = (int) (lengthAndOffset >>> 48);
                        // the offset is used by the callback to report errors
                        offset = lengthAndOffset & MASK;
                        bytesToRead = lineLength;

                        // schedule reads until we either run out of ring capacity or
                        // our read buffer size

                        if (cc == ringCapacity || (cc > 0 && addr + lineLength > lim)) {
                            // we are out of ring capacity or our buffer is exhausted
                            consumeIOURing(ff, sqeMin, lexer, fileBufAddr, offsets, ring, cc, tmpPath);

                            cc = 0;
                            addr = fileBufAddr;
                            offsets.clear();
                            sqeMin = sqeMax + 1;
                        }
                        if (addr + lineLength > lim) {
                            throw TextException.$("buffer overflow [path='").put(tmpPath)
                                    .put("', lineLength=").put(lineLength)
                                    .put(", fileBufSize=").put(fileBufSize)
                                    .put("]");
                        }

                        // try to coalesce ahead lines into the same read, if they're sequential
                        additionalLines = 0;
                        for (long j = i + 1; j < count; j++) {
                            long nextLengthAndOffset = Unsafe.getUnsafe().getLong(address + j * 2L * Long.BYTES + Long.BYTES);
                            int nextLineLength = (int) (nextLengthAndOffset >>> 48);
                            long nextOffset = nextLengthAndOffset & MASK;

                            // line indexing stops on first EOL char, e.g. \r, but it could be followed by \n
                            long diff = nextOffset - offset - bytesToRead;
                            int nextBytesToRead = ((int) diff) + nextLineLength;

                            if (diff > -1 && diff < 2 && addr + bytesToRead + nextBytesToRead <= lim) {
                                bytesToRead += nextBytesToRead;
                                additionalLines++;
                            } else {
                                break;
                            }
                        }
                        i += additionalLines;

                        sqeMax = ring.enqueueRead(fd, offset, addr, bytesToRead);
                        if (sqeMax == -1) {
                            throw TextException.$("io_uring error [path='").put(tmpPath)
                                    .put("', cqeRes=").put(-ring.getCqeRes())
                                    .put("]");
                        }

                        offsets.add(addr - fileBufAddr, bytesToRead);

                        cc++;
                        addr += bytesToRead;
                    } // for

                    // check if something is enqueued
                    if (cc > 0) {
                        consumeIOURing(ff, sqeMin, lexer, fileBufAddr, offsets, ring, cc, tmpPath);
                    }
                }

            } finally {
                ff.close(fd);
            }
        }

        private void importPartitionDataVanilla(
                AbstractTextLexer lexer,
                long address,
                long size,
                long fileBufAddr,
                long fileBufSize,
                DirectCharSink utf8Sink,
                Path tmpPath
        ) {
            final CairoConfiguration configuration = engine.getConfiguration();
            final FilesFacade ff = configuration.getFilesFacade();

            lexer.setupBeforeExactLines(onFieldsPartitioned);

            int fd = -1;
            try {
                tmpPath.of(configuration.getSqlCopyInputRoot()).concat(inputFileName).$();
                utf8Sink.clear();
                fd = TableUtils.openRO(ff, tmpPath, LOG);

                final long len = ff.length(fd);
                if (len == -1) {
                    throw CairoException.critical(ff.errno()).put(
                                    "could not get length of file [path=").put(tmpPath)
                            .put(']');
                }

                ff.fadvise(fd, 0, len, Files.POSIX_FADV_RANDOM);

                final long MASK = ~((255L) << 56 | (255L) << 48);
                final long count = size / (2 * Long.BYTES);
                int bytesToRead;
                int additionalLines;

                for (long i = 0; i < count; i++) {
                    throwIfCancelled();

                    long lengthAndOffset = Unsafe.getUnsafe().getLong(address + i * 2L * Long.BYTES + Long.BYTES);
                    int lineLength = (int) (lengthAndOffset >>> 48);
                    offset = lengthAndOffset & MASK;
                    bytesToRead = lineLength;

                    // try to coalesce ahead lines into the same read, if they're sequential
                    additionalLines = 0;
                    for (long j = i + 1; j < count; j++) {
                        long nextLengthAndOffset = Unsafe.getUnsafe().getLong(address + j * 2L * Long.BYTES + Long.BYTES);
                        int nextLineLength = (int) (nextLengthAndOffset >>> 48);
                        long nextOffset = nextLengthAndOffset & MASK;

                        // line indexing stops on first EOL char, e.g. \r, but it could be followed by \n
                        long diff = nextOffset - offset - bytesToRead;
                        int nextBytesToRead = (int) (diff + nextLineLength);
                        if (diff > -1 && diff < 2 && bytesToRead + nextBytesToRead <= fileBufSize) {
                            bytesToRead += nextBytesToRead;
                            additionalLines++;
                        } else {
                            break;
                        }
                    }
                    i += additionalLines;

                    if (bytesToRead > fileBufSize) {
                        throw TextException
                                .$("buffer overflow [path='").put(tmpPath)
                                .put("', bytesToRead=").put(bytesToRead)
                                .put(", fileBufSize=").put(fileBufSize)
                                .put("]");
                    }

                    long n = ff.read(fd, fileBufAddr, bytesToRead, offset);
                    if (n > 0) {
                        // at this phase there is no way for lines to be split across buffers
                        lexer.parseExactLines(fileBufAddr, fileBufAddr + n);
                    } else {
                        throw TextException
                                .$("could not read from file [path='").put(tmpPath)
                                .put("', errno=").put(ff.errno())
                                .put(", offset=").put(offset)
                                .put("]");
                    }
                }
            } finally {
                ff.close(fd);
            }
        }

        private void logError(long offset, int column, final DirectUtf8Sequence dus) {
            LOG.error()
                    .$("type syntax [type=").$(ColumnType.nameOf(types.getQuick(column).getType()))
                    .$(", offset=").$(offset)
                    .$(", column=").$(column)
                    .$(", value='").$(dus)
                    .$("']").$();
        }

        private void mergePartitionIndexAndImportData(
                final FilesFacade ff,
                final IOURingFacade rf,
                boolean ioURingEnabled,
                Path partitionPath,
                final AbstractTextLexer lexer,
                long fileBufAddr,
                long fileBufSize,
                DirectCharSink utf8Sink,
                DirectLongList unmergedIndexes,
                Path tmpPath
        ) throws TextException {
            unmergedIndexes.clear();
            partitionPath.slash$();
            int partitionLen = partitionPath.size();

            long mergedIndexSize = -1;
            long mergeIndexAddr = 0;
            int fd = -1;
            try {
                mergedIndexSize = openIndexChunks(ff, partitionPath, unmergedIndexes, partitionLen);

                if (unmergedIndexes.size() > 2) { // there's more than 1 chunk so we've to merge
                    partitionPath.trimTo(partitionLen);
                    partitionPath.concat(CsvFileIndexer.INDEX_FILE_NAME).$();

                    fd = TableUtils.openFileRWOrFail(ff, partitionPath, CairoConfiguration.O_NONE);
                    mergeIndexAddr = TableUtils.mapRW(ff, fd, mergedIndexSize, MemoryTag.MMAP_IMPORT);

                    Vect.mergeLongIndexesAsc(unmergedIndexes.getAddress(), (int) unmergedIndexes.size() / 2, mergeIndexAddr);
                    // release chunk memory because it's been copied to merge area
                    unmap(ff, unmergedIndexes);

                    importPartitionData(
                            rf,
                            ioURingEnabled,
                            lexer,
                            mergeIndexAddr,
                            mergedIndexSize,
                            fileBufAddr,
                            fileBufSize,
                            utf8Sink,
                            tmpPath
                    );
                } else { // we can use the single chunk as is
                    importPartitionData(
                            rf,
                            ioURingEnabled,
                            lexer,
                            unmergedIndexes.get(0),
                            mergedIndexSize,
                            fileBufAddr,
                            fileBufSize,
                            utf8Sink,
                            tmpPath
                    );
                }
            } finally {
                ff.close(fd);
                ff.munmap(mergeIndexAddr, mergedIndexSize, MemoryTag.MMAP_IMPORT);
                unmap(ff, unmergedIndexes);
            }
        }

        private boolean onField(
                long offset,
                final DirectUtf8Sequence dus,
                TableWriter.Row w,
                int fieldIndex
        ) throws TextException {
            TypeAdapter type = this.types.getQuick(fieldIndex);
            try {
                type.write(w, fieldIndex, dus, utf8Sink);
            } catch (NumericException | Utf8Exception | ImplicitCastException ignore) {
                errors++;
                logError(offset, fieldIndex, dus);
                switch (atomicity) {
                    case Atomicity.SKIP_ALL:
                        tableWriterRef.rollback();
                        throw TextException.$("bad syntax [line offset=").put(offset).put(",column=").put(fieldIndex).put(']');
                    case Atomicity.SKIP_ROW:
                        w.cancel();
                        return true;
                    default: // SKIP column
                        break;
                }
            } catch (Exception e) {
                throw TextException.$("unexpected error [line offset=").put(offset).put(",column=").put(fieldIndex).put(",message=").put(e.getMessage()).put(']');
            }
            return false;
        }

        private void onFieldsPartitioned(long line, ObjList values, int valuesLength) {
            assert tableWriterRef != null;
            DirectUtf8Sequence dus = values.getQuick(timestampIndex);
            final TableWriter.Row w = getRow(dus, offset);
            if (w == null) {
                return;
            }
            for (int i = 0; i < valuesLength; i++) {
                dus = values.getQuick(i);
                if (i == timestampIndex || dus.size() == 0) {
                    continue;
                }
                if (onField(offset, dus, w, i)) return;
            }
            w.append();
        }

        private long openIndexChunks(FilesFacade ff, Path partitionPath, DirectLongList mergeIndexes, int partitionLen) {
            long mergedIndexSize = 0;
            long chunk = ff.findFirst(partitionPath);
            if (chunk > 0) {
                try {
                    do {
                        // chunk loop
                        long chunkName = ff.findName(chunk);
                        long chunkType = ff.findType(chunk);
                        if (chunkType == Files.DT_FILE) {
                            partitionPath.trimTo(partitionLen);
                            partitionPath.concat(chunkName).$();

                            int fd = TableUtils.openRO(ff, partitionPath, LOG);
                            long size = 0;
                            long address = -1;

                            try {
                                size = ff.length(fd);
                                if (size < 1) {
                                    throw TextException.$("index chunk is empty [path='").put(partitionPath).put(']');
                                }
                                address = TableUtils.mapRO(ff, fd, size, MemoryTag.MMAP_IMPORT);
                                mergeIndexes.add(address);
                                mergeIndexes.add(size / CsvFileIndexer.INDEX_ENTRY_SIZE);
                                mergedIndexSize += size;
                            } catch (Throwable t) {
                                if (address != -1) { //release mem if it can't be added to mergeIndexes
                                    ff.munmap(address, size, MemoryTag.MMAP_IMPORT);
                                }
                                throw t;
                            } finally {
                                ff.close(fd);
                            }
                        }
                    } while (ff.findNext(chunk) > 0);
                } finally {
                    ff.findClose(chunk);
                }
            }
            return mergedIndexSize;
        }

        private void parseLinesAndWrite(AbstractTextLexer lexer, long fileBufAddr, LongList offsets, int j) {
            final long lo = fileBufAddr + offsets.getQuick(j * 2);
            final long hi = lo + offsets.getQuick(j * 2 + 1);
            lexer.parseExactLines(lo, hi);
        }

        private void unmap(FilesFacade ff, DirectLongList mergeIndexes) {
            for (long i = 0, sz = mergeIndexes.size() / 2; i < sz; i++) {
                final long addr = mergeIndexes.get(2 * i);
                final long size = mergeIndexes.get(2 * i + 1) * CsvFileIndexer.INDEX_ENTRY_SIZE;
                ff.munmap(addr, size, MemoryTag.MMAP_IMPORT);
            }
            mergeIndexes.clear();
        }

        void of(
                CairoEngine cairoEngine,
                TableStructure targetTableStructure,
                ObjList types,
                int atomicity,
                byte columnDelimiter,
                CharSequence importRoot,
                CharSequence inputFileName,
                int index,
                int lo,
                int hi,
                final ObjList partitions
        ) {
            this.engine = cairoEngine;
            this.targetTableStructure = targetTableStructure;
            this.types = types;
            this.atomicity = atomicity;
            this.columnDelimiter = columnDelimiter;
            this.importRoot = importRoot;
            this.inputFileName = inputFileName;
            this.index = index;
            this.lo = lo;
            this.hi = hi;
            this.partitions = partitions;
            this.timestampIndex = targetTableStructure.getTimestampIndex();
            this.timestampAdapter = (timestampIndex > -1 && timestampIndex < types.size()) ? (TimestampAdapter) types.getQuick(timestampIndex) : null;
            this.errors = 0;
            this.importedRows.clear();
        }
    }

    static {
        PHASE_NAME_MAP.put(PHASE_SETUP, "setup");
        PHASE_NAME_MAP.put(PHASE_BOUNDARY_CHECK, "boundary_check");
        PHASE_NAME_MAP.put(PHASE_INDEXING, "indexing");
        PHASE_NAME_MAP.put(PHASE_PARTITION_IMPORT, "partition_import");
        PHASE_NAME_MAP.put(PHASE_SYMBOL_TABLE_MERGE, "symbol_table_merge");
        PHASE_NAME_MAP.put(PHASE_UPDATE_SYMBOL_KEYS, "update_symbol_keys");
        PHASE_NAME_MAP.put(PHASE_BUILD_SYMBOL_INDEX, "build_symbol_index");
        PHASE_NAME_MAP.put(PHASE_MOVE_PARTITIONS, "move_partitions");
        PHASE_NAME_MAP.put(PHASE_ATTACH_PARTITIONS, "attach_partitions");
        PHASE_NAME_MAP.put(PHASE_ANALYZE_FILE_STRUCTURE, "analyze_file_structure");
        PHASE_NAME_MAP.put(PHASE_CLEANUP, "cleanup");

        STATUS_NAME_MAP.put(STATUS_STARTED, "started");
        STATUS_NAME_MAP.put(STATUS_FINISHED, "finished");
        STATUS_NAME_MAP.put(STATUS_FAILED, "failed");
        STATUS_NAME_MAP.put(STATUS_CANCELLED, "cancelled");
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy