io.questdb.cutlass.text.CsvFileIndexer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of questdb Show documentation
QuestDB is high performance SQL time series database
The newest version!
/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2024 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.cutlass.text;

import io.questdb.cairo.CairoConfiguration;
import io.questdb.cairo.CairoException;
import io.questdb.cairo.PartitionBy;
import io.questdb.cairo.TableUtils;
import io.questdb.cairo.sql.ExecutionCircuitBreaker;
import io.questdb.cairo.vm.MemoryPMARImpl;
import io.questdb.cairo.vm.Vm;
import io.questdb.cutlass.text.types.TimestampAdapter;
import io.questdb.cutlass.text.types.TypeManager;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.*;
import io.questdb.std.datetime.DateFormat;
import io.questdb.std.datetime.microtime.Timestamps;
import io.questdb.std.datetime.millitime.DateFormatUtils;
import io.questdb.std.str.*;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;

import java.io.Closeable;


/**
 * Class is responsible for scanning whole/chunk of input csv file and building per-partition index files .
 * Example path : workDir/targetTable/2022-06/0_1
 * These indexes are chunked to limit memory usage and are later merged into index.m .
 * It's a simplified version of text lexer that only:
 * - parses
 * - (if needed) buffers using a small buffer
 * the timestamp field .
 */
public class CsvFileIndexer implements Closeable, Mutable {
    public static final long INDEX_ENTRY_SIZE = 2 * Long.BYTES;
    public static final CharSequence INDEX_FILE_NAME = "index.m";
    private static final Log LOG = LogFactory.getLog(CsvFileIndexer.class);
    private static final long MASK_CR = SwarUtils.broadcast((byte) '\r');
    private static final long MASK_NEW_LINE = SwarUtils.broadcast((byte) '\n');
    private static final long MASK_QUOTE = SwarUtils.broadcast((byte) '"');
    // A guess at how long could a timestamp string be, including long day, month name, etc.
    // since we're only interested in timestamp field/col there's no point buffering whole line
    // we'll copy field part to buffer only if current field is designated timestamp
    private static final int MAX_TIMESTAMP_LENGTH = 100;
    private final CairoConfiguration configuration;
    private final int dirMode;
    private final FilesFacade ff;
    private final int fieldRollBufLen;
    private final CharSequence inputRoot;
    private final long maxIndexChunkSize;
    final private ObjList outputFileDenseList = new ObjList<>();
    // maps partitionFloors to output file descriptors
    final private LongObjHashMap outputFileLookupMap = new LongObjHashMap<>();
    // timestamp field of current line
    final private DirectUtf8String timestampField;
    // used for timestamp parsing
    private final TypeManager typeManager;
    // used for timestamp parsing
    private final DirectUtf16Sink utf16Sink;
    private final DirectUtf8Sink utf8Sink;
    private boolean cancelled = false;
    private @Nullable ExecutionCircuitBreaker circuitBreaker;
    private byte columnDelimiter;
    private long columnDelimiterMask;
    private boolean delayedOutQuote;
    private boolean eol;
    private int errorCount = 0;
    private boolean failOnTsError;
    // input file descriptor (cached between initial boundary scan & indexing phases)
    private long fd = -1;
    private long fieldHi;
    private int fieldIndex;
    // these two are pointers either into file read buffer or roll buffer
    private long fieldLo;
    private long fieldRollBufCur;
    private long fieldRollBufPtr;
    // if set to true then ignore first line of input file
    private boolean header;
    private CharSequence importRoot;
    private boolean inQuote;
    private int index;
    private CharSequence inputFileName;
    // fields taken & adjusted  from textLexer
    private long lastLineStart;
    private long lastQuotePos = -1;
    private long lineCount;
    private long lineNumber;
    // file offset of current start of buffered block
    private long offset;
    private DateFormat partitionDirFormatMethod;
    // used to map timestamp to output file
    private PartitionBy.PartitionFloorMethod partitionFloorMethod;
    // work dir path
    private Path path;
    private boolean rollBufferUnusable = false;
    private long sortBufferLength;
    private long sortBufferPtr;
    // adapter used to parse timestamp column
    private TimestampAdapter timestampAdapter;
    // position of timestamp column in csv (0-based)
    private int timestampIndex;
    private long timestampValue;
    private boolean useFieldRollBuf = false;

    public CsvFileIndexer(CairoConfiguration configuration) {
        try {
            this.configuration = configuration;
            final TextConfiguration textConfiguration = configuration.getTextConfiguration();
            int utf8SinkSize = textConfiguration.getUtf8SinkSize();
            this.utf16Sink = new DirectUtf16Sink(utf8SinkSize);
            this.utf8Sink = new DirectUtf8Sink(utf8SinkSize);
            this.typeManager = new TypeManager(textConfiguration, utf16Sink, utf8Sink);
            this.ff = configuration.getFilesFacade();
            this.dirMode = configuration.getMkDirMode();
            this.inputRoot = configuration.getSqlCopyInputRoot();
            this.maxIndexChunkSize = configuration.getSqlCopyMaxIndexChunkSize();
            this.fieldRollBufLen = MAX_TIMESTAMP_LENGTH;
            this.fieldRollBufPtr = Unsafe.malloc(fieldRollBufLen, MemoryTag.NATIVE_IMPORT);
            this.fieldRollBufCur = fieldRollBufPtr;
            this.timestampField = new DirectUtf8String();
            this.failOnTsError = false;
            this.path = new Path();
            this.sortBufferPtr = -1;
            this.sortBufferLength = 0;
        } catch (Throwable t) {
            close();
            throw t;
        }
    }

    @Override
    public final void clear() {
        fieldLo = 0;
        eol = false;
        fieldIndex = 0;
        inQuote = false;
        delayedOutQuote = false;
        lineNumber = 0;
        lineCount = 0;
        fieldRollBufCur = fieldRollBufPtr;
        useFieldRollBuf = false;
        rollBufferUnusable = false;
        header = false;
        errorCount = 0;
        offset = -1;
        Misc.clear(timestampField);
        lastQuotePos = -1;
        timestampValue = Long.MIN_VALUE;

        inputFileName = null;
        importRoot = null;
        timestampAdapter = null;
        timestampIndex = -1;
        partitionFloorMethod = null;
        partitionDirFormatMethod = null;
        columnDelimiter = -1;
        columnDelimiterMask = 0;

        closeOutputFiles();
        closeSortBuffer();

        if (ff != null && ff.close(fd)) {
            fd = -1;
        }

        failOnTsError = false;
        if (path != null) {
            path.trimTo(0);
        }
        circuitBreaker = null;
        cancelled = false;
    }

    @Override
    public void close() {
        fieldRollBufPtr = Unsafe.free(fieldRollBufPtr, fieldRollBufLen, MemoryTag.NATIVE_IMPORT);
        path = Misc.free(path);
        Misc.clear(typeManager);
        Misc.free(utf16Sink);
        Misc.free(utf8Sink);
        clear();
    }

    public int getErrorCount() {
        return errorCount;
    }

    public long getLineCount() {
        return lineCount;
    }

    public void index(
            long chunkLo,
            long chunkHi,
            long lineNumber,
            LongList partitionKeysAndSizes,
            long fileBufAddr,
            long fileBufSize
    ) {
        assert chunkHi > 0;
        assert chunkLo >= 0 && chunkLo < chunkHi;

        openInputFile();

        this.offset = chunkLo;
        long read;

        this.lastLineStart = offset;
        this.lineNumber = lineNumber;

        try {
            do {
                if (circuitBreaker != null && circuitBreaker.checkIfTripped()) {
                    this.cancelled = true;
                    throw TextException.$("Cancelled");
                }
                long leftToRead = Math.min(chunkHi - offset, fileBufSize);
                read = (int) ff.read(fd, fileBufAddr, leftToRead, offset);
                if (read < 1) {
                    break;
                }
                parse(fileBufAddr, fileBufAddr + read);
                offset += read;
            } while (offset < chunkHi);

            if (read < 0 || offset < chunkHi) {
                throw TextException
                        .$("could not read file [path='").put(path)
                        .put("', offset=").put(offset)
                        .put(", errno=").put(ff.errno())
                        .put(']');
            } else {
                parseLast();
            }

            collectPartitionStats(partitionKeysAndSizes);
            sortAndCloseOutputFiles();
        } finally {
            closeOutputFiles();//close without sorting if there's an error
            closeSortBuffer();
        }

        this.lineCount = this.lineNumber - lineNumber;
        LOG.info()
                .$("finished chunk [chunkLo=").$(chunkLo)
                .$(", chunkHi=").$(chunkHi)
                .$(", lines=").$(lineCount)
                .$(", errors=").$(errorCount)
                .I$();
    }

    public void indexLine(long ptr, long lo) throws TextException {
        // this is fine because Long.MIN_VALUE is treated as null and would be rejected by partitioned tables
        if (timestampValue == Long.MIN_VALUE) {
            return;
        }

        long lineStartOffset = lastLineStart;
        long length = offset + ptr - lo - lastLineStart;
        if (length >= (1L << 16)) {
            LOG.error().$("row exceeds maximum line length (65k) for parallel import [line=").$(lineNumber)
                    .$(", length=").$(length).I$();
            errorCount++;
            return;
        }

        // second long stores:
        //  length as 16 bits unsigned number followed by
        //  offset as 48-bits unsigned number
        //  allowing for importing 256TB big files with rows up to 65kB long
        long lengthAndOffset = (length << 48 | lineStartOffset);
        long partitionKey = partitionFloorMethod.floor(timestampValue);
        long mapKey = partitionKey / Timestamps.HOUR_MICROS; //remove trailing zeros to avoid excessive collisions in hashmap

        final IndexOutputFile target;
        int keyIndex = outputFileLookupMap.keyIndex(mapKey);
        if (keyIndex > -1) {
            target = prepareTargetFile(partitionKey);
            outputFileDenseList.add(target);
            outputFileLookupMap.putAt(keyIndex, mapKey, target);
        } else {
            target = outputFileLookupMap.valueAt(keyIndex);
        }

        if (target.indexChunkSize == maxIndexChunkSize) {
            target.nextChunk(ff, getPartitionIndexPrefix(partitionKey));
        }

        target.putEntry(timestampValue, lengthAndOffset, length);
    }

    public boolean isCancelled() {
        return cancelled;
    }

    public void of(
            CharSequence inputFileName,
            CharSequence importRoot,
            int index,
            int partitionBy,
            byte columnDelimiter,
            int timestampIndex,
            TimestampAdapter adapter,
            boolean ignoreHeader,
            int atomicity,
            @Nullable ExecutionCircuitBreaker circuitBreaker
    ) {
        this.inputFileName = inputFileName;
        this.importRoot = importRoot;
        this.partitionFloorMethod = PartitionBy.getPartitionFloorMethod(partitionBy);
        this.partitionDirFormatMethod = PartitionBy.getPartitionDirFormatMethod(partitionBy);
        this.offset = 0;
        this.columnDelimiter = columnDelimiter;
        this.columnDelimiterMask = SwarUtils.broadcast(columnDelimiter);
        if (timestampIndex < 0) {
            throw TextException.$("Timestamp index is not set [value=").put(timestampIndex).put(']');
        }
        this.timestampIndex = timestampIndex;
        this.timestampAdapter = adapter;
        this.header = ignoreHeader;
        this.index = index;
        this.failOnTsError = (atomicity == Atomicity.SKIP_ALL);
        this.timestampValue = Long.MIN_VALUE;
        this.circuitBreaker = circuitBreaker;
    }

    public void parseLast() {
        if (useFieldRollBuf) {
            if (inQuote && lastQuotePos < fieldHi) {
                errorCount++;
                LOG.info().$("quote is missing [table=").$("tableName").$(']').$();
            } else {
                this.fieldHi++;
                stashField(fieldIndex, 0);
                triggerLine(0);
            }
        }
    }

    public void sort(long srcFd, long srcSize) {
        if (srcSize < 1) {
            return;
        }

        long srcAddress = -1;

        try {
            srcAddress = TableUtils.mapRW(ff, srcFd, srcSize, MemoryTag.MMAP_IMPORT);

            if (sortBufferPtr == -1) {
                sortBufferPtr = Unsafe.malloc(maxIndexChunkSize, MemoryTag.NATIVE_IMPORT);
                sortBufferLength = maxIndexChunkSize;
            }

            Vect.radixSortLongIndexAscInPlace(srcAddress, srcSize / INDEX_ENTRY_SIZE, sortBufferPtr);
        } finally {
            if (srcAddress != -1) {
                ff.munmap(srcAddress, srcSize, MemoryTag.MMAP_IMPORT);
            }
        }
    }

    private void checkEol(long lo) {
        if (eol) {
            uneol(lo);
        }
    }

    private void clearRollBuffer(long ptr) {
        useFieldRollBuf = false;
        fieldRollBufCur = fieldRollBufPtr;
        this.fieldLo = this.fieldHi = ptr;
    }

    private void closeOutputFiles() {
        Misc.freeObjListAndClear(outputFileDenseList);
        Misc.clear(outputFileLookupMap);
    }

    private void closeSortBuffer() {
        if (sortBufferPtr != -1) {
            Unsafe.free(sortBufferPtr, sortBufferLength, MemoryTag.NATIVE_IMPORT);
            sortBufferPtr = -1;
            sortBufferLength = 0;
        }
    }

    private void collectPartitionStats(LongList partitionKeysAndSizes) {
        partitionKeysAndSizes.setPos(0);
        for (int i = 0, n = outputFileDenseList.size(); i < n; i++) {
            final IndexOutputFile value = outputFileDenseList.getQuick(i);
            partitionKeysAndSizes.add(value.partitionKey, value.dataSize);
        }
    }

    private void eol(long ptr, byte c) {
        if (c == '\n' || c == '\r') {
            eol = true;
            rollBufferUnusable = false;
            clearRollBuffer(ptr);
            fieldIndex = 0;
            lineNumber++;
        }
    }

    private boolean fitsInBuffer(int requiredLength) {
        if (requiredLength > fieldRollBufLen) {
            LOG.info()
                    .$("timestamp column value too long [path=").$(inputFileName)
                    .$(", line=").$(lineNumber)
                    .$(", requiredLen=").$(requiredLength)
                    .$(", rollLimit=").$(fieldRollBufLen)
                    .$(']').$();
            errorCount++;
            rollBufferUnusable = true;
            return false;
        }

        return true;
    }

    private Path getPartitionIndexDir(long partitionKey) {
        path.of(importRoot).slash();
        partitionDirFormatMethod.format(partitionKey, DateFormatUtils.EN_LOCALE, null, path);
        return path;
    }

    private Path getPartitionIndexPrefix(long partitionKey) {
        return getPartitionIndexDir(partitionKey).slash().put(index);
    }

    private void onColumnDelimiter(long lo, long ptr) {
        checkEol(lo);

        if (inQuote) {
            return;
        }
        stashField(fieldIndex++, ptr);
    }

    private void onLineEnd(long ptr, long lo) {
        if (inQuote) {
            return;
        }

        if (eol) {
            this.fieldLo = this.fieldHi;
            return;
        }

        stashField(fieldIndex, ptr);
        indexLine(ptr, lo);
        triggerLine(ptr);
    }

    private void onQuote() {
        if (inQuote) {
            delayedOutQuote = !delayedOutQuote;
            lastQuotePos = this.fieldHi;
        } else if (fieldHi - fieldLo == 1) {
            inQuote = true;
            this.fieldLo = this.fieldHi;
        }
    }

    private void parse(long lo, long hi) {
        this.fieldHi = useFieldRollBuf ? fieldRollBufCur : (this.fieldLo = lo);
        long ptr = lo;

        while (ptr < hi) {
            if (!rollBufferUnusable && !useFieldRollBuf && !delayedOutQuote && ptr < hi - 7) {
                long word = Unsafe.getUnsafe().getLong(ptr);
                long zeroBytesWord = SwarUtils.markZeroBytes(word ^ MASK_NEW_LINE)
                        | SwarUtils.markZeroBytes(word ^ MASK_CR)
                        | SwarUtils.markZeroBytes(word ^ MASK_QUOTE)
                        | SwarUtils.markZeroBytes(word ^ columnDelimiterMask);
                if (zeroBytesWord == 0) {
                    ptr += 7;
                    this.fieldHi += 7;
                    continue;
                } else {
                    int firstIndex = SwarUtils.indexOfFirstMarkedByte(zeroBytesWord);
                    ptr += firstIndex;
                    this.fieldHi += firstIndex;
                }
            }

            final byte b = Unsafe.getUnsafe().getByte(ptr++);
            if (rollBufferUnusable) {
                eol(ptr, b);
                continue;
            }

            if (useFieldRollBuf) {
                putToRollBuf(b);
                if (rollBufferUnusable) {
                    continue;
                }
            }

            this.fieldHi++;

            if (delayedOutQuote && b != '"') {
                inQuote = delayedOutQuote = false;
            }

            if (b == columnDelimiter) {
                onColumnDelimiter(lo, ptr);
            } else if (b == '"') {
                checkEol(lo);
                onQuote();
            } else if (b == '\n' || b == '\r') {
                onLineEnd(ptr, lo);
            } else {
                checkEol(lo);
            }
        }

        if (useFieldRollBuf) {
            return;
        }

        if (eol) {
            this.fieldLo = 0;
        } else if (fieldIndex == timestampIndex) {
            rollField(hi);
        }
    }

    private void parseTimestamp() {
        try {
            timestampValue = timestampAdapter.getTimestamp(timestampField);
        } catch (Exception e) {
            if (failOnTsError) {
                throw TextException.$("could not parse timestamp [line=").put(lineNumber).put(", column=").put(timestampIndex).put(']');
            } else {
                LOG.error().$("could not parse timestamp [line=").$(lineNumber).$(", column=").$(timestampIndex).I$();
                errorCount++;
            }
        }
    }

    @NotNull
    private IndexOutputFile prepareTargetFile(long partitionKey) {
        getPartitionIndexDir(partitionKey);
        path.slash();

        if (!ff.exists(path.$())) {
            int result = ff.mkdir(path.$(), dirMode);
            if (result != 0 && !ff.exists(path.$())) {//ignore because other worker might've created it
                throw TextException.$("Couldn't create partition dir [path='").put(path).put("']");
            }
        }

        path.put(index);

        return new IndexOutputFile(ff, path, partitionKey);
    }

    private void putToRollBuf(byte c) {
        if (fitsInBuffer((int) (fieldRollBufCur - fieldRollBufPtr + 1L))) {
            Unsafe.getUnsafe().putByte(fieldRollBufCur++, c);
        }
    }

    // roll timestamp field if it's split over read buffer boundaries
    private void rollField(long hi) {
        // lastLineStart is an offset from 'lo'
        // 'lo' is the address of incoming buffer
        int length = (int) (hi - fieldLo);
        if (length > 0 && fitsInBuffer(length)) {
            assert fieldLo + length <= hi;
            Vect.memcpy(fieldRollBufPtr, fieldLo, length);
            fieldRollBufCur = fieldRollBufPtr + length;
            shift(fieldLo - fieldRollBufPtr);
            useFieldRollBuf = true;
        }
    }

    private void shift(long d) {
        this.fieldLo -= d;
        this.fieldHi -= d;
        if (lastQuotePos > -1) {
            this.lastQuotePos -= d;
        }
    }

    private void sortAndCloseOutputFiles() {
        for (int i = 0, n = outputFileDenseList.size(); i < n; i++) {
            outputFileDenseList.getQuick(i).sortAndClose();
        }
        this.outputFileDenseList.clear();
        this.outputFileLookupMap.clear();
    }

    private void stashField(int fieldIndex, long ptr) {
        if (fieldIndex == timestampIndex && !header) {
            if (lastQuotePos > -1) {
                timestampField.of(fieldLo, lastQuotePos - 1);
            } else {
                timestampField.of(fieldLo, fieldHi - 1);
            }

            parseTimestamp();

            if (useFieldRollBuf) {
                clearRollBuffer(ptr);
            }
        }

        this.lastQuotePos = -1;
        this.fieldLo = this.fieldHi;
    }

    private void triggerLine(long ptr) {
        eol = true;
        fieldIndex = 0;
        if (useFieldRollBuf) {
            clearRollBuffer(ptr);
        }

        if (header) {
            header = false;
            return;
        }

        lineNumber++;
        timestampValue = Long.MIN_VALUE;
    }

    private void uneol(long lo) {
        eol = false;
        this.lastLineStart = this.offset + (this.fieldLo - lo);
    }

    void openInputFile() {
        if (fd > -1) {
            return;
        }

        path.of(inputRoot).slash().concat(inputFileName);
        this.fd = TableUtils.openRO(ff, path.$(), LOG);

        long len = ff.length(fd);
        if (len == -1) {
            throw CairoException.critical(ff.errno()).put(
                            "could not get length of file [path=").put(path)
                    .put(']');
        }
        ff.fadvise(fd, 0, len, Files.POSIX_FADV_SEQUENTIAL);
    }

    class IndexOutputFile implements Closeable {
        final MemoryPMARImpl memory;
        final long partitionKey;
        int chunkNumber;
        long dataSize; // partition data size in bytes
        long indexChunkSize;

        IndexOutputFile(FilesFacade ff, Path path, long partitionKey) {
            this.partitionKey = partitionKey;
            this.indexChunkSize = 0;
            this.chunkNumber = 0;
            this.dataSize = 0;
            this.memory = new MemoryPMARImpl(configuration);
            nextChunk(ff, path);
        }

        @Override
        public void close() {
            if (memory.isOpen()) {
                memory.close(true, Vm.TRUNCATE_TO_POINTER);
            }
        }

        public void nextChunk(FilesFacade ff, Path path) {
            if (memory.isOpen()) {
                sortAndClose();
            }

            chunkNumber++; //start with file name like $workerIndex_$chunkIndex, e.g. 1_1
            indexChunkSize = 0;
            path.put('_').put(chunkNumber);

            LPSZ lpsz = path.$();
            if (ff.exists(lpsz)) {
                throw TextException.$("index file already exists [path=").put(path).put(']');
            } else {
                LOG.debug().$("created import index file [path='").$(path).$("']").$();
            }

            this.memory.of(ff, lpsz, ff.getMapPageSize(), MemoryTag.MMAP_DEFAULT, CairoConfiguration.O_NONE);
        }

        private void sortAndClose() {
            if (memory.isOpen()) {
                CsvFileIndexer.this.sort(memory.getFd(), indexChunkSize);
                memory.close(true, Vm.TRUNCATE_TO_POINTER);
            }
        }

        void putEntry(long timestamp, long offset, long length) {
            memory.putLong128(timestamp, offset);
            indexChunkSize += INDEX_ENTRY_SIZE;
            dataSize += length;
        }
    }
}