All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.questdb.cutlass.text.CairoTextWriter Maven / Gradle / Ivy

There is a newer version: 5.0.1
Show newest version
/*******************************************************************************
 *     ___                  _   ____  ____
 *    / _ \ _   _  ___  ___| |_|  _ \| __ )
 *   | | | | | | |/ _ \/ __| __| | | |  _ \
 *   | |_| | |_| |  __/\__ \ |_| |_| | |_) |
 *    \__\_\\__,_|\___||___/\__|____/|____/
 *
 *  Copyright (c) 2014-2019 Appsicle
 *  Copyright (c) 2019-2020 QuestDB
 *
 *  Licensed under the Apache License, Version 2.0 (the "License");
 *  you may not use this file except in compliance with the License.
 *  You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing, software
 *  distributed under the License is distributed on an "AS IS" BASIS,
 *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *  See the License for the specific language governing permissions and
 *  limitations under the License.
 *
 ******************************************************************************/

package io.questdb.cutlass.text;

import io.questdb.cairo.*;
import io.questdb.cairo.sql.RecordMetadata;
import io.questdb.cutlass.text.types.*;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.log.LogRecord;
import io.questdb.std.*;
import io.questdb.std.str.DirectByteCharSequence;
import io.questdb.std.str.DirectCharSink;
import io.questdb.std.str.Path;

import java.io.Closeable;

public class CairoTextWriter implements Closeable, Mutable {
    private static final Log LOG = LogFactory.getLog(CairoTextWriter.class);
    private final CairoConfiguration configuration;
    private final CairoEngine engine;
    private final LongList columnErrorCounts = new LongList();
    private final DirectCharSink utf8Sink;
    private final AppendMemory appendMemory = new AppendMemory();
    private final Path path;
    private final TableStructureAdapter tableStructureAdapter = new TableStructureAdapter();
    private final TypeManager typeManager;
    private CharSequence tableName;
    private TableWriter writer;
    private long _size;
    private boolean overwrite;
    private boolean durable;
    private int atomicity;
    private int partitionBy;
    private int timestampIndex;
    private CharSequence timestampIndexCol;
    private ObjList types;
    private final TextLexer.Listener nonPartitionedListener = this::onFieldsNonPartitioned;
    private TimestampAdapter timestampAdapter;
    private final TextLexer.Listener partitionedListener = this::onFieldsPartitioned;
    private final ObjectPool dateToTimestampAdapterPool = new ObjectPool<>(DateToTimestampAdapter::new, 4);

    public CairoTextWriter(
            CairoEngine engine,
            Path path,
            TextConfiguration textConfiguration,
            TypeManager typeManager
    ) {
        this.engine = engine;
        this.configuration = engine.getConfiguration();
        this.path = path;
        this.utf8Sink = new DirectCharSink(textConfiguration.getUtf8SinkSize());
        this.typeManager = typeManager;
    }

    @Override
    public void clear() {
        dateToTimestampAdapterPool.clear();
        writer = Misc.free(writer);
        columnErrorCounts.clear();
        _size = 0;
    }

    @Override
    public void close() {
        clear();
        utf8Sink.close();
        appendMemory.close();
    }

    public void commit() {
        if (writer != null) {
            if (durable) {
                assert false;
            } else {
                writer.commit();
            }
        }
    }

    public LongList getColumnErrorCounts() {
        return columnErrorCounts;
    }

    public RecordMetadata getMetadata() {
        return writer == null ? null : writer.getMetadata();
    }

    public int getPartitionBy() {
        return partitionBy;
    }

    public CharSequence getTableName() {
        return tableName;
    }

    public TextLexer.Listener getTextListener() {
        return timestampAdapter != null ? partitionedListener : nonPartitionedListener;
    }

    public long getWrittenLineCount() {
        return writer == null ? 0 : writer.size() - _size;
    }

    public void of(CharSequence name, boolean overwrite, boolean durable, int atomicity, int partitionBy, CharSequence timestampIndexCol) {
        this.tableName = name;
        this.overwrite = overwrite;
        this.durable = durable;
        this.atomicity = atomicity;
        this.partitionBy = partitionBy;
        this.timestampIndexCol = timestampIndexCol;
    }

    public void onFieldsNonPartitioned(long line, ObjList values, int valuesLength) {
        final TableWriter.Row w = writer.newRow();
        for (int i = 0; i < valuesLength; i++) {
            final DirectByteCharSequence dbcs = values.getQuick(i);
            if (dbcs.length() == 0) {
                continue;
            }
            try {
                types.getQuick(i).write(w, i, dbcs);
            } catch (Exception ignore) {
                logError(line, i, dbcs);
                switch (atomicity) {
                    case Atomicity.SKIP_ALL:
                        writer.rollback();
                        throw CairoException.instance(0).put("bad syntax [line=").put(line).put(", col=").put(i).put(']');
                    case Atomicity.SKIP_ROW:
                        w.cancel();
                        return;
                    default:
                        // SKIP column
                        break;
                }
            }
        }
        w.append();
    }

    public void onFieldsPartitioned(long line, ObjList values, int valuesLength) {
        final int timestampIndex = this.timestampIndex;
        DirectByteCharSequence dbcs = values.getQuick(timestampIndex);
        try {
            final TableWriter.Row w = writer.newRow(timestampAdapter.getTimestamp(dbcs));
            for (int i = 0; i < valuesLength; i++) {
                dbcs = values.getQuick(i);
                if (i == timestampIndex || dbcs.length() == 0) {
                    continue;
                }
                try {
                    types.getQuick(i).write(w, i, dbcs);
                } catch (Exception ignore) {
                    logError(line, i, dbcs);
                    switch (atomicity) {
                        case Atomicity.SKIP_ALL:
                            writer.rollback();
                            throw CairoException.instance(0).put("bad syntax [line=").put(line).put(", col=").put(i).put(']');
                        case Atomicity.SKIP_ROW:
                            w.cancel();
                            return;
                        default:
                            // SKIP column
                            break;
                    }
                }
            }
            w.append();
        } catch (NumericException e) {
            logError(line, timestampIndex, dbcs);
        }
    }

    private void createTable(
            ObjList names,
            ObjList detectedTypes,
            CairoSecurityContext cairoSecurityContext
    ) throws TextException {
        engine.creatTable(
                cairoSecurityContext,
                appendMemory,
                path,
                tableStructureAdapter.of(names, detectedTypes)
        );
        this.types = detectedTypes;
    }

    private void logError(long line, int i, DirectByteCharSequence dbcs) {
        LogRecord logRecord = LOG.error().$("type syntax [type=").$(ColumnType.nameOf(types.getQuick(i).getType())).$("]\n\t");
        logRecord.$('[').$(line).$(':').$(i).$("] -> ").$(dbcs).$();
        columnErrorCounts.increment(i);
    }

    private void logTypeError(int i) {
        LOG.info()
                .$("mis-detected [table=").$(tableName)
                .$(", column=").$(i)
                .$(", type=").$(ColumnType.nameOf(this.types.getQuick(i).getType()))
                .$(']').$();
    }

    private TableWriter openWriterAndOverrideImportTypes(
            CairoSecurityContext cairoSecurityContext,
            ObjList detectedTypes
    ) {

        TableWriter writer = engine.getWriter(cairoSecurityContext, tableName);
        RecordMetadata metadata = writer.getMetadata();

        // now, compare column count.
        // Cannot continue if different

        if (metadata.getColumnCount() < detectedTypes.size()) {
            writer.close();
            throw CairoException.instance(0)
                    .put("column count mismatch [textColumnCount=").put(detectedTypes.size())
                    .put(", tableColumnCount=").put(metadata.getColumnCount())
                    .put(", table=").put(tableName)
                    .put(']');
        }

        this.types = detectedTypes;

        // now overwrite detected types with actual table column types
        for (int i = 0, n = this.types.size(); i < n; i++) {
            final int columnType = metadata.getColumnType(i);
            int detectedType = this.types.getQuick(i).getType();
            if (detectedType != columnType) {
                // when DATE type is mis-detected as STRING we
                // wouldn't have neither date format nor locale to
                // use when populating this field
                switch (columnType) {
                    case ColumnType.DATE:
                        logTypeError(i);
                        this.types.setQuick(i, BadDateAdapter.INSTANCE);
                        break;
                    case ColumnType.TIMESTAMP:
                        if (detectedType == ColumnType.DATE) {
                            this.types.setQuick(i, dateToTimestampAdapterPool.next().of((DateAdapter) this.types.getQuick(i)));
                        } else {
                            logTypeError(i);
                            this.types.setQuick(i, BadTimestampAdapter.INSTANCE);
                        }
                        break;
                    case ColumnType.BINARY:
                        writer.close();
                        throw CairoException.instance(0).put("cannot import text into BINARY column [index=").put(i).put(']');
                    default:
                        this.types.setQuick(i, typeManager.getTypeAdapter(columnType));
                        break;
                }
            }
        }

        return writer;
    }

    void prepareTable(
            CairoSecurityContext cairoSecurityContext,
            ObjList names,
            ObjList detectedTypes
    ) throws TextException {
        assert writer == null;

        if (detectedTypes.size() == 0) {
            throw CairoException.instance(0).put("cannot determine text structure");
        }

        switch (engine.getStatus(cairoSecurityContext, path, tableName)) {
            case TableUtils.TABLE_DOES_NOT_EXIST:
                createTable(names, detectedTypes, cairoSecurityContext);
                writer = engine.getWriter(cairoSecurityContext, tableName);
                break;
            case TableUtils.TABLE_EXISTS:
                if (overwrite) {
                    engine.remove(cairoSecurityContext, path, tableName);
                    createTable(names, detectedTypes, cairoSecurityContext);
                    writer = engine.getWriter(cairoSecurityContext, tableName);
                } else {
                    writer = openWriterAndOverrideImportTypes(cairoSecurityContext, detectedTypes);
                    tableStructureAdapter.of(names, detectedTypes);
                }
                break;
            default:
                throw CairoException.instance(0).put("name is reserved [table=").put(tableName).put(']');
        }
        _size = writer.size();
        columnErrorCounts.seed(writer.getMetadata().getColumnCount(), 0);
        if (timestampIndex != -1 && types.getQuick(timestampIndex).getType() == ColumnType.TIMESTAMP) {
            timestampAdapter = (TimestampAdapter) types.getQuick(timestampIndex);
        }
    }

    private class TableStructureAdapter implements TableStructure {
        private ObjList names;
        private ObjList types;

        @Override
        public int getColumnCount() {
            return types.size();
        }

        @Override
        public CharSequence getColumnName(int columnIndex) {
            return names.getQuick(columnIndex);
        }

        @Override
        public int getColumnType(int columnIndex) {
            return types.getQuick(columnIndex).getType();
        }

        @Override
        public int getIndexBlockCapacity(int columnIndex) {
            return 0;
        }

        @Override
        public boolean isIndexed(int columnIndex) {
            return false;
        }

        @Override
        public boolean isSequential(int columnIndex) {
            return false;
        }

        @Override
        public int getPartitionBy() {
            return partitionBy;
        }

        @Override
        public boolean getSymbolCacheFlag(int columnIndex) {
            return configuration.getDefaultSymbolCacheFlag();
        }

        @Override
        public int getSymbolCapacity(int columnIndex) {
            return configuration.getDefaultSymbolCapacity();
        }

        @Override
        public CharSequence getTableName() {
            return tableName;
        }

        @Override
        public int getTimestampIndex() {
            return timestampIndex;
        }

        TableStructureAdapter of(ObjList names, ObjList types) throws TextException {
            this.names = names;
            this.types = types;
            if (timestampIndexCol == null) {
                timestampIndex = -1;
            } else {
                timestampIndex = names.indexOf(timestampIndexCol);
                if (timestampIndex == -1) {
                    throw TextException.$("invalid timestamp column '").put(timestampIndexCol).put('\'');
                }
                if (types.getQuick(timestampIndex).getType() != ColumnType.TIMESTAMP) {
                    throw TextException.$("not a timestamp '").put(timestampIndexCol).put('\'');
                }
            }
            return this;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy