
io.questdb.cutlass.text.ParallelCsvFileImporter Maven / Gradle / Ivy
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (c) 2014-2019 Appsicle
* Copyright (c) 2019-2024 QuestDB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package io.questdb.cutlass.text;
import io.questdb.MessageBus;
import io.questdb.cairo.*;
import io.questdb.cairo.sql.ExecutionCircuitBreaker;
import io.questdb.cairo.sql.RecordMetadata;
import io.questdb.cairo.vm.Vm;
import io.questdb.cairo.vm.api.MemoryMARW;
import io.questdb.cutlass.text.types.*;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.mp.Job;
import io.questdb.mp.RingQueue;
import io.questdb.mp.Sequence;
import io.questdb.std.*;
import io.questdb.std.datetime.DateFormat;
import io.questdb.std.datetime.millitime.DateFormatUtils;
import io.questdb.std.str.DirectUtf16Sink;
import io.questdb.std.str.DirectUtf8Sink;
import io.questdb.std.str.Path;
import io.questdb.std.str.StringSink;
import org.jetbrains.annotations.Nullable;
import org.jetbrains.annotations.TestOnly;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.util.function.Consumer;
import static io.questdb.cairo.TableUtils.TXN_FILE_NAME;
/**
* Class is responsible for importing of large unordered import files into partitioned tables.
* It does the following (in parallel) :
* - splits the file into N-chunks, scans in parallel and finds correct line start for each chunk
* - scans each chunk and extracts timestamps and line offsets to per-partition index files
* (index files are stored as $inputWorkDir/$inputFileName/$partitionName/$workerId_$chunkNumber)
* then it sorts each file by timestamp value
* - merges all partition index chunks into one index file per partition (index.m)
* - loads partitions into separate tables using merged indexes (one table per worker)
* - scans all symbol columns to build per-column global symbol table
* - remaps all symbol values
* - moves and attaches partitions from temp tables to target table
* - removes temp tables and index files
*/
public class ParallelCsvFileImporter implements Closeable, Mutable {
private static final int DEFAULT_MIN_CHUNK_SIZE = 300 * 1024 * 1024;
private static final String LOCK_REASON = "parallel import";
private static final Log LOG = LogFactory.getLog(ParallelCsvFileImporter.class);
private static final int NO_INDEX = -1;
private final CairoEngine cairoEngine;
// holds result of first phase - boundary scanning
// count of quotes, even new lines, odd new lines, offset to first even newline, offset to first odd newline
private final LongList chunkStats;
private final Sequence collectSeq;
private final CairoConfiguration configuration;
private final FilesFacade ff;
// holds input for second phase - indexing: offset and start line number for each chunk
private final LongList indexChunkStats;
private final Path inputFilePath;
private final CharSequence inputRoot;
private final CharSequence inputWorkRoot;
private final CopyJob localImportJob;
private final ObjectPool otherToTimestampAdapterPool;
private final LongList partitionKeysAndSizes;
private final StringSink partitionNameSink;
private final ObjList partitions;
private final Sequence pubSeq;
private final RingQueue queue;
private final IntList symbolCapacities;
private final TableStructureAdapter targetTableStructure;
// stores 3 values per task : index, lo, hi (lo, hi are indexes in partitionNames)
private final IntList taskDistribution;
private final TextDelimiterScanner textDelimiterScanner;
private final TextMetadataDetector textMetadataDetector;
private final Path tmpPath;
private final TypeManager typeManager;
private final DirectUtf16Sink utf16Sink;
private final DirectUtf8Sink utf8Sink;
private final int workerCount;
private int atomicity;
private ExecutionCircuitBreaker circuitBreaker;
private byte columnDelimiter;
private boolean createdWorkDir;
private CharSequence errorMessage;
private long errors;
private boolean forceHeader;
private long importId;
// path to import directory under, usually $inputWorkRoot/$tableName
private CharSequence importRoot;
// name of file to process in inputRoot dir
private CharSequence inputFileName;
// incremented in phase 2
private long linesIndexed;
private RecordMetadata metadata;
private int minChunkSize = DEFAULT_MIN_CHUNK_SIZE;
private int partitionBy;
private byte phase = CopyTask.PHASE_SETUP;
private long phaseErrors;
// row stats are incremented in phase 3
private long rowsHandled;
private long rowsImported;
private long startMs; // start time of current phase (in millis)
// import status variables
private byte status = CopyTask.STATUS_STARTED;
private final Consumer checkStatusRef = this::updateStatus;
private final Consumer collectChunkStatsRef = this::collectChunkStats;
private final Consumer collectStubRef = this::collectStub;
private final Consumer collectDataImportStatsRef = this::collectDataImportStats;
private final Consumer collectIndexStatsRef = this::collectIndexStats;
private PhaseStatusReporter statusReporter;
// input params start
private CharSequence tableName;
private TableToken tableToken;
private boolean targetTableCreated;
private int targetTableStatus;
private int taskCount;
private TimestampAdapter timestampAdapter;
// name of timestamp column
private CharSequence timestampColumn;
// input params end
// index of timestamp column in input file
private int timestampIndex;
private TableWriter writer;
public ParallelCsvFileImporter(CairoEngine cairoEngine, int workerCount) {
if (workerCount < 1) {
throw TextImportException.instance(CopyTask.PHASE_SETUP, "Invalid worker count set [value=").put(workerCount).put(']');
}
MessageBus bus = cairoEngine.getMessageBus();
RingQueue queue = bus.getTextImportQueue();
if (queue.getCycle() < 1) {
throw TextImportException.instance(CopyTask.PHASE_SETUP, "Parallel import queue size cannot be zero!");
}
this.cairoEngine = cairoEngine;
this.workerCount = workerCount;
this.queue = queue;
this.pubSeq = bus.getTextImportPubSeq();
this.collectSeq = bus.getTextImportColSeq();
try {
this.localImportJob = new CopyJob(bus);
this.configuration = cairoEngine.getConfiguration();
this.ff = configuration.getFilesFacade();
this.inputRoot = configuration.getSqlCopyInputRoot();
this.inputWorkRoot = configuration.getSqlCopyInputWorkRoot();
TextConfiguration textConfiguration = configuration.getTextConfiguration();
int utf8SinkSize = textConfiguration.getUtf8SinkSize();
this.utf16Sink = new DirectUtf16Sink(utf8SinkSize);
this.utf8Sink = new DirectUtf8Sink(utf8SinkSize);
this.typeManager = new TypeManager(textConfiguration, utf16Sink, utf8Sink);
this.textDelimiterScanner = new TextDelimiterScanner(textConfiguration);
this.textMetadataDetector = new TextMetadataDetector(typeManager, textConfiguration);
this.targetTableStructure = new TableStructureAdapter(configuration);
this.targetTableStatus = -1;
this.targetTableCreated = false;
this.atomicity = Atomicity.SKIP_COL;
this.createdWorkDir = false;
this.otherToTimestampAdapterPool = new ObjectPool<>(OtherToTimestampAdapter::new, 4);
this.inputFilePath = new Path();
this.tmpPath = new Path();
this.chunkStats = new LongList();
this.indexChunkStats = new LongList();
this.partitionKeysAndSizes = new LongList();
this.partitionNameSink = new StringSink();
this.partitions = new ObjList<>();
this.taskDistribution = new IntList();
this.symbolCapacities = new IntList();
} catch (Throwable t) {
close();
throw t;
}
}
// Load balances existing partitions between given number of workers using partition sizes.
// Returns number of tasks.
public static int assignPartitions(ObjList partitions, int workerCount) {
partitions.sort((p1, p2) -> Long.compare(p2.bytes, p1.bytes));
long[] workerSums = new long[workerCount];
for (int i = 0, n = partitions.size(); i < n; i++) {
int minIdx = -1;
long minSum = Long.MAX_VALUE;
for (int j = 0; j < workerCount; j++) {
if (workerSums[j] == 0) {
minIdx = j;
break;
} else if (workerSums[j] < minSum) {
minSum = workerSums[j];
minIdx = j;
}
}
workerSums[minIdx] += partitions.getQuick(i).bytes;
partitions.getQuick(i).taskId = minIdx;
}
partitions.sort((p1, p2) -> {
long workerIdDiff = p1.taskId - p2.taskId;
if (workerIdDiff != 0) {
return (int) workerIdDiff;
}
return Long.compare(p1.key, p2.key);
});
int taskIds = 0;
for (int i = 0, n = workerSums.length; i < n; i++) {
if (workerSums[i] != 0) {
taskIds++;
}
}
return taskIds;
}
public static void createTable(
final FilesFacade ff,
int mkDirMode,
final CharSequence root,
final CharSequence tableDir,
final CharSequence tableName,
TableStructure structure,
int tableId,
SecurityContext securityContext
) {
try (Path path = new Path()) {
switch (TableUtils.exists(ff, path, root, tableDir)) {
case TableUtils.TABLE_EXISTS:
if (!ff.rmdir(path)) {
LOG.error()
.$("could not overwrite table [tableName='").utf8(tableName)
.$("',path='").$(path)
.$(", errno=").$(ff.errno())
.I$();
throw CairoException.critical(ff.errno()).put("could not overwrite [tableName=").put(tableName).put("]");
}
case TableUtils.TABLE_DOES_NOT_EXIST:
securityContext.authorizeTableCreate();
try (MemoryMARW memory = Vm.getMARWInstance()) {
TableUtils.createTable(
ff,
root,
mkDirMode,
memory,
path,
tableDir,
structure,
ColumnType.VERSION,
tableId
);
}
break;
default:
throw TextException.$("name is reserved [tableName=").put(tableName).put(']');
}
}
}
@Override
public void clear() {
writer = Misc.free(writer);
metadata = null;
importId = -1;
Misc.clear(chunkStats);
Misc.clear(indexChunkStats);
Misc.clear(partitionKeysAndSizes);
Misc.clear(partitionNameSink);
Misc.clear(taskDistribution);
Misc.clear(utf16Sink);
Misc.clear(utf8Sink);
Misc.clear(typeManager);
Misc.clear(symbolCapacities);
Misc.clear(textMetadataDetector);
Misc.clear(otherToTimestampAdapterPool);
Misc.clear(partitions);
linesIndexed = 0;
rowsHandled = 0;
rowsImported = 0;
errors = 0;
phaseErrors = 0;
inputFileName = null;
tableName = null;
tableToken = null;
timestampColumn = null;
timestampIndex = -1;
partitionBy = -1;
columnDelimiter = -1;
timestampAdapter = null;
forceHeader = false;
status = CopyTask.STATUS_STARTED;
phase = CopyTask.PHASE_SETUP;
errorMessage = null;
targetTableStatus = -1;
targetTableCreated = false;
atomicity = Atomicity.SKIP_COL;
taskCount = -1;
createdWorkDir = false;
}
@Override
public void close() {
clear();
Misc.free(this.inputFilePath);
Misc.free(this.tmpPath);
Misc.free(utf16Sink);
Misc.free(this.utf8Sink);
Misc.free(this.textMetadataDetector);
Misc.free(this.textDelimiterScanner);
Misc.free(this.localImportJob);
}
public void of(
String tableName,
String inputFileName,
long importId,
int partitionBy,
byte columnDelimiter,
String timestampColumn,
String timestampFormat,
boolean forceHeader,
ExecutionCircuitBreaker circuitBreaker,
int atomicity
) {
clear();
this.circuitBreaker = circuitBreaker;
this.tableName = tableName;
this.tableToken = cairoEngine.lockTableName(tableName, false);
if (tableToken == null) {
tableToken = cairoEngine.verifyTableName(tableName);
}
this.importRoot = tmpPath.of(inputWorkRoot).concat(tableToken).toString();
this.inputFileName = inputFileName;
this.timestampColumn = timestampColumn;
this.partitionBy = partitionBy;
this.columnDelimiter = columnDelimiter;
if (timestampFormat != null) {
DateFormat dateFormat = typeManager.getInputFormatConfiguration().getTimestampFormatFactory().get(timestampFormat);
this.timestampAdapter = (TimestampAdapter) typeManager.nextTimestampAdapter(
false,
dateFormat,
configuration.getTextConfiguration().getDefaultDateLocale()
);
}
this.forceHeader = forceHeader;
this.timestampIndex = -1;
this.status = CopyTask.STATUS_STARTED;
this.phase = CopyTask.PHASE_SETUP;
this.targetTableStatus = -1;
this.targetTableCreated = false;
this.atomicity = Atomicity.isValid(atomicity) ? atomicity : Atomicity.SKIP_ROW;
this.importId = importId;
inputFilePath.of(inputRoot).concat(inputFileName).$();
}
@TestOnly
public void of(
String tableName,
String inputFileName,
long importId,
int partitionBy,
byte columnDelimiter,
String timestampColumn,
String tsFormat,
boolean forceHeader,
ExecutionCircuitBreaker circuitBreaker
) {
of(
tableName,
inputFileName,
importId,
partitionBy,
columnDelimiter,
timestampColumn,
tsFormat,
forceHeader,
circuitBreaker,
Atomicity.SKIP_COL
);
}
@TestOnly
public void of(
String tableName,
String inputFileName,
long importId,
int partitionBy,
byte columnDelimiter,
String timestampColumn,
String timestampFormat,
boolean forceHeader
) {
of(
tableName,
inputFileName,
importId,
partitionBy,
columnDelimiter,
timestampColumn,
timestampFormat,
forceHeader,
null,
Atomicity.SKIP_COL
);
}
public void parseStructure(long fd, SecurityContext securityContext) throws TextImportException {
phasePrologue(CopyTask.PHASE_ANALYZE_FILE_STRUCTURE);
final CairoConfiguration configuration = cairoEngine.getConfiguration();
final int textAnalysisMaxLines = configuration.getTextConfiguration().getTextAnalysisMaxLines();
int len = configuration.getSqlCopyBufferSize();
long buf = Unsafe.malloc(len, MemoryTag.NATIVE_IMPORT);
try (TextLexerWrapper tlw = new TextLexerWrapper(configuration.getTextConfiguration())) {
long n = ff.read(fd, buf, len, 0);
if (n > 0) {
if (columnDelimiter < 0) {
columnDelimiter = textDelimiterScanner.scan(buf, buf + n);
}
AbstractTextLexer lexer = tlw.getLexer(columnDelimiter);
lexer.setSkipLinesWithExtraValues(false);
final ObjList names = new ObjList<>();
final ObjList types = new ObjList<>();
if (timestampColumn != null && timestampAdapter != null) {
names.add(timestampColumn);
types.add(timestampAdapter);
}
textMetadataDetector.of(tableName, names, types, forceHeader);
lexer.parse(buf, buf + n, textAnalysisMaxLines, textMetadataDetector);
textMetadataDetector.evaluateResults(lexer.getLineCount(), lexer.getErrorCount());
forceHeader = textMetadataDetector.isHeader();
prepareTable(
textMetadataDetector.getColumnNames(),
textMetadataDetector.getColumnTypes(),
inputFilePath,
typeManager,
securityContext
);
phaseEpilogue(CopyTask.PHASE_ANALYZE_FILE_STRUCTURE);
} else {
throw TextException.$("could not read from file '").put(inputFilePath).put("' to analyze structure");
}
} catch (CairoException e) {
throw TextImportException.instance(CopyTask.PHASE_ANALYZE_FILE_STRUCTURE, e.getFlyweightMessage(), e.getErrno());
} catch (TextException e) {
throw TextImportException.instance(CopyTask.PHASE_ANALYZE_FILE_STRUCTURE, e.getFlyweightMessage());
} finally {
Unsafe.free(buf, len, MemoryTag.NATIVE_IMPORT);
}
}
// returns list with N chunk boundaries
public LongList phaseBoundaryCheck(long fileLength) throws TextImportException {
phasePrologue(CopyTask.PHASE_BOUNDARY_CHECK);
assert (workerCount > 0 && minChunkSize > 0);
if (workerCount == 1) {
indexChunkStats.setPos(0);
indexChunkStats.add(0);
indexChunkStats.add(0);
indexChunkStats.add(fileLength);
indexChunkStats.add(0);
phaseEpilogue(CopyTask.PHASE_BOUNDARY_CHECK);
return indexChunkStats;
}
long chunkSize = Math.max(minChunkSize, (fileLength + workerCount - 1) / workerCount);
final int chunks = (int) Math.max((fileLength + chunkSize - 1) / chunkSize, 1);
int queuedCount = 0;
int collectedCount = 0;
chunkStats.setPos(chunks * 5);
chunkStats.zero(0);
for (int i = 0; i < chunks; i++) {
final long chunkLo = i * chunkSize;
final long chunkHi = Long.min(chunkLo + chunkSize, fileLength);
while (true) {
final long seq = pubSeq.next();
if (seq > -1) {
final CopyTask task = queue.get(seq);
task.setChunkIndex(i);
task.setCircuitBreaker(circuitBreaker);
task.ofPhaseBoundaryCheck(ff, inputFilePath, chunkLo, chunkHi);
pubSeq.done(seq);
queuedCount++;
break;
} else {
collectedCount += collect(queuedCount - collectedCount, collectChunkStatsRef);
}
}
}
collectedCount += collect(queuedCount - collectedCount, collectChunkStatsRef);
assert collectedCount == queuedCount;
processChunkStats(fileLength, chunks);
phaseEpilogue(CopyTask.PHASE_BOUNDARY_CHECK);
return indexChunkStats;
}
public void phaseIndexing() throws TextException {
phasePrologue(CopyTask.PHASE_INDEXING);
int queuedCount = 0;
int collectedCount = 0;
createWorkDir();
boolean forceHeader = this.forceHeader;
for (int i = 0, n = indexChunkStats.size() - 2; i < n; i += 2) {
int colIdx = i / 2;
final long chunkLo = indexChunkStats.get(i);
final long lineNumber = indexChunkStats.get(i + 1);
final long chunkHi = indexChunkStats.get(i + 2);
while (true) {
final long seq = pubSeq.next();
if (seq > -1) {
final CopyTask task = queue.get(seq);
task.setChunkIndex(colIdx);
task.setCircuitBreaker(circuitBreaker);
task.ofPhaseIndexing(
chunkLo,
chunkHi,
lineNumber,
colIdx,
inputFileName,
importRoot,
partitionBy,
columnDelimiter,
timestampIndex,
timestampAdapter,
forceHeader,
atomicity
);
if (forceHeader) {
forceHeader = false;
}
pubSeq.done(seq);
queuedCount++;
break;
} else {
collectedCount += collect(queuedCount - collectedCount, collectIndexStatsRef);
}
}
}
collectedCount += collect(queuedCount - collectedCount, collectIndexStatsRef);
assert collectedCount == queuedCount;
processIndexStats();
phaseEpilogue(CopyTask.PHASE_INDEXING);
}
public void process(SecurityContext securityContext) throws TextImportException {
final long startMs = getCurrentTimeMs();
long fd = -1;
try {
try {
updateImportStatus(CopyTask.STATUS_STARTED, Numbers.LONG_NULL, Numbers.LONG_NULL, 0);
try {
fd = TableUtils.openRO(ff, inputFilePath.$(), LOG);
} catch (CairoException e) {
throw TextImportException.instance(CopyTask.PHASE_SETUP, e.getFlyweightMessage(), e.getErrno());
}
long length = ff.length(fd);
if (length < 1) {
throw TextImportException.instance(CopyTask.PHASE_SETUP, "ignored empty input file [file='").put(inputFilePath).put(']');
}
try {
parseStructure(fd, securityContext);
phaseBoundaryCheck(length);
phaseIndexing();
phasePartitionImport();
phaseSymbolTableMerge();
phaseUpdateSymbolKeys();
phaseBuildSymbolIndex();
movePartitions();
attachPartitions();
updateImportStatus(CopyTask.STATUS_FINISHED, rowsHandled, rowsImported, errors);
} catch (Throwable t) {
cleanUp();
throw t;
} finally {
closeWriter();
if (createdWorkDir) {
removeWorkDir();
}
}
// these are the leftovers that also need to be converted
} catch (CairoException e) {
throw TextImportException.instance(CopyTask.PHASE_CLEANUP, e.getFlyweightMessage(), e.getErrno());
} catch (TextException e) {
throw TextImportException.instance(CopyTask.PHASE_CLEANUP, e.getFlyweightMessage());
} finally {
ff.close(fd);
}
} catch (TextImportException e) {
LOG.error()
.$("could not import [phase=").$(CopyTask.getPhaseName(e.getPhase()))
.$(", ex=").$(e.getFlyweightMessage())
.I$();
throw e;
}
long endMs = getCurrentTimeMs();
LOG.info()
.$("import complete [importId=").$hexPadded(importId)
.$(", file=`").$(inputFilePath).$('`')
.$("', time=").$((endMs - startMs) / 1000).$("s").I$();
}
public void setMinChunkSize(int minChunkSize) {
this.minChunkSize = minChunkSize;
}
public void setStatusReporter(final PhaseStatusReporter reporter) {
this.statusReporter = reporter;
}
public void updateImportStatus(byte status, long rowsHandled, long rowsImported, long errors) {
if (this.statusReporter != null) {
this.statusReporter.report(CopyTask.NO_PHASE, status, null, rowsHandled, rowsImported, errors);
}
}
public void updatePhaseStatus(byte phase, byte status, @Nullable final CharSequence msg) {
if (this.statusReporter != null) {
this.statusReporter.report(phase, status, msg, Numbers.LONG_NULL, Numbers.LONG_NULL, phaseErrors);
}
}
private void attachPartitions() throws TextImportException {
phasePrologue(CopyTask.PHASE_ATTACH_PARTITIONS);
// Go descending, attaching last partition is more expensive than others
for (int i = partitions.size() - 1; i > -1; i--) {
PartitionInfo partition = partitions.getQuick(i);
if (partition.importedRows == 0) {
continue;
}
final CharSequence partitionDirName = partition.name;
try {
final long timestamp = PartitionBy.parsePartitionDirName(partitionDirName, partitionBy);
writer.attachPartition(timestamp, partition.importedRows);
} catch (CairoException e) {
throw TextImportException.instance(CopyTask.PHASE_ATTACH_PARTITIONS, "could not attach [partition='")
.put(partitionDirName).put("', msg=")
.put('[').put(e.getErrno()).put("] ").put(e.getFlyweightMessage()).put(']');
}
}
phaseEpilogue(CopyTask.PHASE_ATTACH_PARTITIONS);
}
private void cleanUp() {
if (targetTableStatus == TableUtils.TABLE_EXISTS && writer != null) {
writer.truncate();
}
closeWriter();
if (targetTableStatus == TableUtils.TABLE_DOES_NOT_EXIST && targetTableCreated) {
cairoEngine.drop(tmpPath, tableToken);
}
if (tableToken != null) {
cairoEngine.unlockTableName(tableToken);
}
}
private void closeWriter() {
writer = Misc.free(writer);
metadata = null;
}
private int collect(int queuedCount, Consumer consumer) {
int collectedCount = 0;
while (collectedCount < queuedCount) {
final long seq = collectSeq.next();
if (seq > -1) {
CopyTask task = queue.get(seq);
consumer.accept(task);
task.clear();
collectSeq.done(seq);
collectedCount += 1;
} else {
stealWork();
}
}
return collectedCount;
}
private void collectChunkStats(final CopyTask task) {
updateStatus(task);
final CopyTask.PhaseBoundaryCheck phaseBoundaryCheck = task.getCountQuotesPhase();
final int chunkOffset = 5 * task.getChunkIndex();
chunkStats.set(chunkOffset, phaseBoundaryCheck.getQuoteCount());
chunkStats.set(chunkOffset + 1, phaseBoundaryCheck.getNewLineCountEven());
chunkStats.set(chunkOffset + 2, phaseBoundaryCheck.getNewLineCountOdd());
chunkStats.set(chunkOffset + 3, phaseBoundaryCheck.getNewLineOffsetEven());
chunkStats.set(chunkOffset + 4, phaseBoundaryCheck.getNewLineOffsetOdd());
}
private void collectDataImportStats(final CopyTask task) {
updateStatus(task);
final CopyTask.PhasePartitionImport phase = task.getImportPartitionDataPhase();
LongList rows = phase.getImportedRows();
for (int i = 0, n = rows.size(); i < n; i += 2) {
partitions.get((int) rows.get(i)).importedRows = rows.get(i + 1);
}
rowsHandled += phase.getRowsHandled();
rowsImported += phase.getRowsImported();
phaseErrors += phase.getErrors();
errors += phase.getErrors();
}
private void collectIndexStats(final CopyTask task) {
updateStatus(task);
final CopyTask.PhaseIndexing phaseIndexing = task.getBuildPartitionIndexPhase();
final LongList keys = phaseIndexing.getPartitionKeysAndSizes();
this.partitionKeysAndSizes.add(keys);
this.linesIndexed += phaseIndexing.getLineCount();
this.phaseErrors += phaseIndexing.getErrorCount();
this.errors += phaseIndexing.getErrorCount();
}
private void collectStub(final CopyTask task) {
updateStatus(task);
}
private void createWorkDir() {
// First, create the work root dir, if it doesn't exist.
Path workDirPath = tmpPath.of(inputWorkRoot).slash();
if (!ff.exists(workDirPath.$())) {
int result = ff.mkdir(workDirPath.$(), configuration.getMkDirMode());
if (result != 0) {
throw CairoException.critical(ff.errno()).put("could not create import work root directory [path='").put(workDirPath).put("']");
}
}
// Next, remove and recreate the per-table sub-dir.
removeWorkDir();
workDirPath = tmpPath.of(importRoot).slash();
int result = ff.mkdir(workDirPath.$(), configuration.getMkDirMode());
if (result != 0) {
throw CairoException.critical(ff.errno()).put("could not create temporary import work directory [path='").put(workDirPath).put("']");
}
createdWorkDir = true;
LOG.info().$("temporary import directory [path='").$(workDirPath).I$();
}
private long getCurrentTimeMs() {
return configuration.getMillisecondClock().getTicks();
}
private int getTaskCount() {
return taskDistribution.size() / 3;
}
private void initWriterAndOverrideImportMetadata(
ObjList names,
ObjList types,
TypeManager typeManager,
SecurityContext securityContext) throws TextException {
final TableWriter writer = cairoEngine.getWriter(tableToken, LOCK_REASON);
final RecordMetadata metadata = GenericRecordMetadata.copyDense(writer.getMetadata());
if (metadata.getColumnCount() < types.size()) {
writer.close();
throw TextException.$("column count mismatch [textColumnCount=").put(types.size())
.put(", tableColumnCount=").put(metadata.getColumnCount())
.put(", table=").put(tableName)
.put(']');
}
// remap index is only needed to adjust names and types
// workers will import data into temp tables without remapping
final IntList remapIndex = new IntList();
remapIndex.setPos(types.size());
for (int i = 0, n = types.size(); i < n; i++) {
final int columnIndex = metadata.getColumnIndexQuiet(names.getQuick(i));
final int idx = columnIndex > -1 ? columnIndex : i; // check for strict match ?
remapIndex.set(i, idx);
final int columnType = metadata.getColumnType(idx);
final TypeAdapter detectedAdapter = types.getQuick(i);
final int detectedType = detectedAdapter.getType();
if (detectedType != columnType) {
// when DATE type is mis-detected as STRING we
// would not have either date format nor locale to
// use when populating this field
switch (ColumnType.tagOf(columnType)) {
case ColumnType.DATE:
logTypeError(i, detectedType);
types.setQuick(i, BadDateAdapter.INSTANCE);
break;
case ColumnType.TIMESTAMP:
if (detectedAdapter instanceof TimestampCompatibleAdapter) {
types.setQuick(i, otherToTimestampAdapterPool.next().of((TimestampCompatibleAdapter) detectedAdapter));
} else {
logTypeError(i, detectedType);
types.setQuick(i, BadTimestampAdapter.INSTANCE);
}
break;
case ColumnType.BINARY:
writer.close();
throw TextException.$("cannot import text into BINARY column [index=").put(i).put(']');
default:
types.setQuick(i, typeManager.getTypeAdapter(columnType));
break;
}
}
}
// at this point we've to use target table columns names otherwise
// partition attach could fail on metadata differences
// (if header names or synthetic names are different from table's)
for (int i = 0, n = remapIndex.size(); i < n; i++) {
names.set(i, metadata.getColumnName(remapIndex.get(i)));
}
this.metadata = metadata;
this.writer = writer;//next call can throw exception
// authorize only columns present in the file
securityContext.authorizeInsert(tableToken);
// add table columns missing in input file
if (names.size() < metadata.getColumnCount()) {
for (int i = 0, n = metadata.getColumnCount(); i < n; i++) {
boolean unused = true;
for (int r = 0, rn = remapIndex.size(); r < rn; r++) {
if (remapIndex.get(r) == i) {
unused = false;
break;
}
}
if (unused) {
names.add(metadata.getColumnName(i));
types.add(typeManager.getTypeAdapter(metadata.getColumnType(i)));
remapIndex.add(i);
}
}
}
// copy symbol capacities from the destination table to avoid
// having default, undersized capacities in temporary tables
symbolCapacities.setAll(remapIndex.size(), -1);
for (int i = 0, n = remapIndex.size(); i < n; i++) {
final int columnIndex = remapIndex.getQuick(i);
if (ColumnType.isSymbol(metadata.getColumnType(columnIndex))) {
final int columnWriterIndex = metadata.getWriterIndex(columnIndex);
final MapWriter symbolWriter = writer.getSymbolMapWriter(columnWriterIndex);
symbolCapacities.set(i, symbolWriter.getSymbolCapacity());
}
}
}
private boolean isOneOfMainDirectories(CharSequence p) {
String path = normalize(p);
if (path == null) {
return false;
}
return path.equals(normalize(configuration.getConfRoot())) ||
path.equals(normalize(configuration.getRoot())) ||
path.equals(normalize(configuration.getDbDirectory())) ||
path.equals(normalize(configuration.getCheckpointRoot())) ||
path.equals(normalize(configuration.getBackupRoot()));
}
private void logTypeError(int i, int type) {
LOG.info()
.$("mis-detected [table=").$(tableName)
.$(", column=").$(i)
.$(", type=").$(ColumnType.nameOf(type))
.$(", workerCount=").$(workerCount)
.I$();
}
private void movePartitions() {
phasePrologue(CopyTask.PHASE_MOVE_PARTITIONS);
final int taskCount = getTaskCount();
try {
for (int i = 0; i < taskCount; i++) {
int index = taskDistribution.getQuick(i * 3);
int lo = taskDistribution.getQuick(i * 3 + 1);
int hi = taskDistribution.getQuick(i * 3 + 2);
final Path srcPath = localImportJob.getTmpPath1().of(importRoot).concat(tableName).put('_').put(index);
final Path dstPath = localImportJob.getTmpPath2().of(configuration.getRoot()).concat(tableToken);
final int srcPlen = srcPath.size();
final int dstPlen = dstPath.size();
if (!ff.exists(dstPath.slash$())) {
if (ff.mkdirs(dstPath, configuration.getMkDirMode()) != 0) {
throw TextException.$("could not create partition directory [path='").put(dstPath).put("', errno=").put(ff.errno()).put(']');
}
}
for (int j = lo; j < hi; j++) {
PartitionInfo partition = partitions.get(j);
if (partition.importedRows == 0) {
continue;
}
final CharSequence partitionName = partition.name;
srcPath.trimTo(srcPlen).concat(partitionName);
dstPath.trimTo(dstPlen).concat(partitionName).put(configuration.getAttachPartitionSuffix());
int res = ff.rename(srcPath.slash$(), dstPath.slash$());
if (res == Files.FILES_RENAME_ERR_EXDEV) {
LOG.info().$(srcPath).$(" and ").$(dstPath).$(" are not on the same mounted filesystem. Partitions will be copied.").$();
if (ff.mkdirs(dstPath, configuration.getMkDirMode()) != 0) {
throw TextException.$("could not create partition directory [path='").put(dstPath).put("', errno=").put(ff.errno()).put(']');
}
ff.iterateDir(srcPath.$(), (long name, int type) -> {
if (type == Files.DT_FILE) {
srcPath.trimTo(srcPlen).concat(partitionName).concat(name);
dstPath.trimTo(dstPlen).concat(partitionName).put(configuration.getAttachPartitionSuffix()).concat(name);
if (ff.copy(srcPath.$(), dstPath.$()) < 0) {
throw TextException.$("could not copy partition file [to='").put(dstPath).put("', errno=").put(ff.errno()).put(']');
}
}
});
srcPath.parent();
} else if (res != Files.FILES_RENAME_OK) {
throw CairoException.critical(ff.errno()).put("could not copy partition file [to=").put(dstPath).put(']');
}
}
}
} catch (CairoException e) {
throw TextImportException.instance(CopyTask.PHASE_MOVE_PARTITIONS, e.getFlyweightMessage(), e.getErrno());
} catch (TextException e) {
throw TextImportException.instance(CopyTask.PHASE_MOVE_PARTITIONS, e.getFlyweightMessage());
}
phaseEpilogue(CopyTask.PHASE_MOVE_PARTITIONS);
}
private String normalize(CharSequence c) {
try {
if (c == null) {
return null;
}
return new File(c.toString()).getCanonicalPath().replace(File.separatorChar, '/');
} catch (IOException e) {
LOG.error().$("could not normalize [path='").$(c).$("', message=").$(e.getMessage()).I$();
return null;
}
}
private void phaseBuildSymbolIndex() throws TextImportException {
phasePrologue(CopyTask.PHASE_BUILD_SYMBOL_INDEX);
final int columnCount = metadata.getColumnCount();
final int tmpTableCount = getTaskCount();
boolean isAnyIndexed = false;
for (int i = 0; i < columnCount; i++) {
isAnyIndexed |= metadata.isColumnIndexed(i);
}
if (isAnyIndexed) {
int queuedCount = 0;
int collectedCount = 0;
for (int t = 0; t < tmpTableCount; ++t) {
while (true) {
final long seq = pubSeq.next();
if (seq > -1) {
final CopyTask task = queue.get(seq);
task.setChunkIndex(t);
task.setCircuitBreaker(circuitBreaker);
// this task will create its own copy of TableWriter to build indexes concurrently?
task.ofPhaseBuildSymbolIndex(cairoEngine, targetTableStructure, importRoot, t, metadata);
pubSeq.done(seq);
queuedCount++;
break;
} else {
collectedCount += collect(queuedCount - collectedCount, checkStatusRef);
}
}
}
collectedCount += collect(queuedCount - collectedCount, checkStatusRef);
assert collectedCount == queuedCount;
}
phaseEpilogue(CopyTask.PHASE_BUILD_SYMBOL_INDEX);
}
private void phaseEpilogue(byte phase) {
throwErrorIfNotOk();
long endMs = getCurrentTimeMs();
LOG.info()
.$("finished [importId=").$hexPadded(importId)
.$(", phase=").$(CopyTask.getPhaseName(phase))
.$(", file=`").$(inputFilePath)
.$("`, duration=").$((endMs - startMs) / 1000).$('s')
.$(", errors=").$(phaseErrors)
.I$();
updatePhaseStatus(phase, CopyTask.STATUS_FINISHED, null);
}
private void phasePartitionImport() throws TextImportException {
if (partitions.size() == 0) {
if (linesIndexed > 0) {
throw TextImportException.instance(CopyTask.PHASE_PARTITION_IMPORT,
"All rows were skipped. Possible reasons: timestamp format mismatch or rows exceed maximum line length (65k).");
} else {
throw TextImportException.instance(CopyTask.PHASE_PARTITION_IMPORT,
"No rows in input file to import.");
}
}
phasePrologue(CopyTask.PHASE_PARTITION_IMPORT);
this.taskCount = assignPartitions(partitions, workerCount);
int queuedCount = 0;
int collectedCount = 0;
taskDistribution.clear();
for (int i = 0; i < taskCount; ++i) {
int lo = 0;
while (lo < partitions.size() && partitions.getQuick(lo).taskId != i) {
lo++;
}
int hi = lo + 1;
while (hi < partitions.size() && partitions.getQuick(hi).taskId == i) {
hi++;
}
while (true) {
final long seq = pubSeq.next();
if (seq > -1) {
final CopyTask task = queue.get(seq);
task.setChunkIndex(i);
task.setCircuitBreaker(circuitBreaker);
task.ofPhasePartitionImport(
cairoEngine,
targetTableStructure,
textMetadataDetector.getColumnTypes(),
atomicity,
columnDelimiter,
importRoot,
inputFileName,
i,
lo,
hi,
partitions
);
pubSeq.done(seq);
queuedCount++;
break;
} else {
collectedCount += collect(queuedCount - collectedCount, collectDataImportStatsRef);
}
}
taskDistribution.add(i);
taskDistribution.add(lo);
taskDistribution.add(hi);
}
collectedCount += collect(queuedCount - collectedCount, collectDataImportStatsRef);
assert collectedCount == queuedCount;
phaseEpilogue(CopyTask.PHASE_PARTITION_IMPORT);
}
private void phasePrologue(byte phase) {
phaseErrors = 0;
LOG.info()
.$("started [importId=").$hexPadded(importId)
.$(", phase=").$(CopyTask.getPhaseName(phase))
.$(", file=`").$(inputFilePath)
.$("`, workerCount=").$(workerCount).I$();
updatePhaseStatus(phase, CopyTask.STATUS_STARTED, null);
startMs = getCurrentTimeMs();
}
private void phaseSymbolTableMerge() throws TextImportException {
phasePrologue(CopyTask.PHASE_SYMBOL_TABLE_MERGE);
final int tmpTableCount = getTaskCount();
int queuedCount = 0;
int collectedCount = 0;
for (int columnIndex = 0, size = metadata.getColumnCount(); columnIndex < size; columnIndex++) {
if (ColumnType.isSymbol(metadata.getColumnType(columnIndex))) {
final CharSequence symbolColumnName = metadata.getColumnName(columnIndex);
int tmpTableSymbolColumnIndex = targetTableStructure.getSymbolColumnIndex(symbolColumnName);
while (true) {
final long seq = pubSeq.next();
if (seq > -1) {
final CopyTask task = queue.get(seq);
task.setChunkIndex(columnIndex);
task.ofPhaseSymbolTableMerge(
configuration,
importRoot,
writer,
tableToken,
symbolColumnName,
metadata.getWriterIndex(columnIndex),
tmpTableSymbolColumnIndex,
tmpTableCount,
partitionBy
);
pubSeq.done(seq);
queuedCount++;
break;
} else {
collectedCount += collect(queuedCount - collectedCount, collectStubRef);
}
}
}
}
collectedCount += collect(queuedCount - collectedCount, collectStubRef);
assert collectedCount == queuedCount;
phaseEpilogue(CopyTask.PHASE_SYMBOL_TABLE_MERGE);
}
private void phaseUpdateSymbolKeys() throws TextImportException {
phasePrologue(CopyTask.PHASE_UPDATE_SYMBOL_KEYS);
final int tmpTableCount = getTaskCount();
int queuedCount = 0;
int collectedCount = 0;
for (int t = 0; t < tmpTableCount; ++t) {
tmpPath.of(importRoot).concat(tableToken.getTableName()).put('_').put(t);
try (TxReader txFile = new TxReader(ff).ofRO(tmpPath.concat(TXN_FILE_NAME).$(), partitionBy)) {
txFile.unsafeLoadAll();
final int partitionCount = txFile.getPartitionCount();
for (int p = 0; p < partitionCount; p++) {
final long partitionSize = txFile.getPartitionSize(p);
final long partitionTimestamp = txFile.getPartitionTimestampByIndex(p);
int symbolColumnIndex = 0;
if (partitionSize == 0) {
continue;
}
for (int c = 0, size = metadata.getColumnCount(); c < size; c++) {
if (ColumnType.isSymbol(metadata.getColumnType(c))) {
final CharSequence symbolColumnName = metadata.getColumnName(c);
final int symbolCount = txFile.getSymbolValueCount(symbolColumnIndex++);
while (true) {
final long seq = pubSeq.next();
if (seq > -1) {
final CopyTask task = queue.get(seq);
task.setChunkIndex(t);
task.setCircuitBreaker(circuitBreaker);
task.ofPhaseUpdateSymbolKeys(
cairoEngine,
targetTableStructure,
t,
partitionSize,
partitionTimestamp,
importRoot,
symbolColumnName,
symbolCount
);
pubSeq.done(seq);
queuedCount++;
break;
} else {
collectedCount += collect(queuedCount - collectedCount, collectStubRef);
}
}
}
}
}
}
}
collectedCount += collect(queuedCount - collectedCount, collectStubRef);
assert collectedCount == queuedCount;
phaseEpilogue(CopyTask.PHASE_UPDATE_SYMBOL_KEYS);
}
private void processChunkStats(long fileLength, int chunks) {
long quotes = chunkStats.get(0);
indexChunkStats.setPos(0);
//set first chunk offset and line number
indexChunkStats.add(0);
indexChunkStats.add(0);
long lines;
long totalLines = chunks > 0 ? chunkStats.get(1) + 1 : 1;
for (int i = 1; i < chunks; i++) {
long startPos;
if ((quotes & 1) == 1) { // if number of quotes is odd then use odd starter
startPos = chunkStats.get(5 * i + 4);
lines = chunkStats.get(5 * i + 2);
} else {
startPos = chunkStats.get(5 * i + 3);
lines = chunkStats.get(5 * i + 1);
}
//if whole chunk belongs to huge quoted string or contains one very long line
//then it should be ignored here and merged with previous chunk
if (startPos > -1) {
indexChunkStats.add(startPos);
indexChunkStats.add(totalLines);
}
quotes += chunkStats.get(5 * i);
totalLines += lines;
}
if (indexChunkStats.get(indexChunkStats.size() - 2) < fileLength) {
indexChunkStats.add(fileLength);
indexChunkStats.add(totalLines);//doesn't matter
}
}
private void processIndexStats() {
LongHashSet set = new LongHashSet();
for (int i = 0, n = partitionKeysAndSizes.size(); i < n; i += 2) {
set.add(partitionKeysAndSizes.get(i));
}
LongList distinctKeys = new LongList();
for (int i = 0, n = set.size(); i < n; i++) {
distinctKeys.add(set.get(i));
}
distinctKeys.sort();
LongList totalSizes = new LongList();
for (int i = 0, n = distinctKeys.size(); i < n; i++) {
long key = distinctKeys.getQuick(i);
long size = 0;
for (int j = 0, m = partitionKeysAndSizes.size(); j < m; j += 2) {
if (partitionKeysAndSizes.getQuick(j) == key) {
size += partitionKeysAndSizes.get(j + 1);
}
}
totalSizes.add(size);
}
DateFormat dirFormat = PartitionBy.getPartitionDirFormatMethod(partitionBy);
for (int i = 0, n = distinctKeys.size(); i < n; i++) {
long key = distinctKeys.getQuick(i);
long size = totalSizes.getQuick(i);
partitionNameSink.clear();
dirFormat.format(distinctKeys.get(i), DateFormatUtils.EN_LOCALE, null, partitionNameSink);
String dirName = partitionNameSink.toString();
partitions.add(new PartitionInfo(key, dirName, size));
}
}
private void removeWorkDir() {
Path workDirPath = tmpPath.of(importRoot);
if (ff.exists(workDirPath.$())) {
if (isOneOfMainDirectories(importRoot)) {
throw TextException.$("could not remove import work directory because it points to one of main directories [path='").put(workDirPath).put("'] .");
}
LOG.info().$("removing import work directory [path='").$(workDirPath).$("']").$();
if (!ff.rmdir(workDirPath)) {
throw TextException.$("could not remove import work directory [path='").put(workDirPath).put("', errno=").put(ff.errno()).put(']');
}
}
}
private void stealWork() {
if (localImportJob.run(0, Job.RUNNING_STATUS)) {
return;
}
Os.pause();
}
private void throwErrorIfNotOk() {
if (status == CopyTask.STATUS_FAILED) {
throw TextImportException.instance(phase, "import failed [phase=")
.put(CopyTask.getPhaseName(phase))
.put(", msg=`").put(errorMessage).put("`]");
} else if (status == CopyTask.STATUS_CANCELLED) {
TextImportException ex = TextImportException.instance(phase, "import cancelled [phase=")
.put(CopyTask.getPhaseName(phase))
.put(", msg=`").put(errorMessage).put("`]");
ex.setCancelled(true);
throw ex;
}
}
private void updateStatus(final CopyTask task) {
boolean cancelledOrFailed = status == CopyTask.STATUS_FAILED || status == CopyTask.STATUS_CANCELLED;
if (!cancelledOrFailed && (task.isFailed() || task.isCancelled())) {
status = task.getStatus();
phase = task.getPhase();
errorMessage = task.getErrorMessage();
}
}
void prepareTable(ObjList names,
ObjList types,
Path path,
TypeManager typeManager,
SecurityContext securityContext)
throws TextException {
if (types.size() == 0) {
throw CairoException.nonCritical().put("cannot determine text structure");
}
if (partitionBy == PartitionBy.NONE) {
throw CairoException.nonCritical().put("partition strategy for parallel import cannot be NONE");
}
if (partitionBy < 0) {
partitionBy = PartitionBy.NONE;
}
if (timestampIndex == -1 && timestampColumn != null) {
for (int i = 0, n = names.size(); i < n; i++) {
if (Chars.equalsIgnoreCase(names.get(i), timestampColumn)) {
timestampIndex = i;
break;
}
}
}
try {
targetTableStatus = cairoEngine.getTableStatus(path, tableToken);
switch (targetTableStatus) {
case TableUtils.TABLE_DOES_NOT_EXIST:
if (partitionBy == PartitionBy.NONE) {
throw TextException.$("partition by unit must be set when importing to new table");
}
if (timestampColumn == null) {
throw TextException.$("timestamp column must be set when importing to new table");
}
if (timestampIndex == -1) {
throw TextException.$("timestamp column '").put(timestampColumn).put("' not found in file header");
}
validate(names, types, null, NO_INDEX);
symbolCapacities.setAll(types.size(), -1);
targetTableStructure.of(tableName, names, types, symbolCapacities, timestampIndex, partitionBy);
createTable(
ff,
configuration.getMkDirMode(),
configuration.getRoot(),
tableToken.getDirName(),
targetTableStructure.getTableName(),
targetTableStructure,
tableToken.getTableId(),
securityContext
);
cairoEngine.registerTableToken(tableToken);
targetTableCreated = true;
writer = cairoEngine.getWriter(tableToken, LOCK_REASON);
cairoEngine.metadataCacheHydrateTable(tableToken, true, true);
metadata = GenericRecordMetadata.copyDense(writer.getMetadata());
partitionBy = writer.getPartitionBy();
break;
case TableUtils.TABLE_EXISTS:
initWriterAndOverrideImportMetadata(names, types, typeManager, securityContext);
if (writer.getRowCount() > 0) {
throw TextException.$("target table must be empty [table=").put(tableName).put(']');
}
CharSequence designatedTimestampColumnName = writer.getDesignatedTimestampColumnName();
int designatedTimestampIndex = metadata.getTimestampIndex();
if (PartitionBy.isPartitioned(partitionBy) && partitionBy != writer.getPartitionBy()) {
throw TextException.$("declared partition by unit doesn't match table's");
}
partitionBy = writer.getPartitionBy();
if (!PartitionBy.isPartitioned(partitionBy)) {
throw TextException.$("target table is not partitioned");
}
validate(names, types, designatedTimestampColumnName, designatedTimestampIndex);
targetTableStructure.of(tableName, names, types, symbolCapacities, timestampIndex, partitionBy);
break;
default:
throw TextException.$("name is reserved [table=").put(tableName).put(']');
}
inputFilePath.of(inputRoot).concat(inputFileName).$(); // getStatus might override it
targetTableStructure.setIgnoreColumnIndexedFlag(true);
if (timestampAdapter == null && ColumnType.isTimestamp(types.getQuick(timestampIndex).getType())) {
timestampAdapter = (TimestampAdapter) types.getQuick(timestampIndex);
}
} catch (Throwable t) {
closeWriter();
throw t;
}
}
void validate(
ObjList names,
ObjList types,
CharSequence designatedTimestampColumnName,
int designatedTimestampIndex
) throws TextException {
if (timestampColumn == null && designatedTimestampColumnName == null) {
timestampIndex = NO_INDEX;
} else if (timestampColumn != null) {
timestampIndex = names.indexOf(timestampColumn);
if (timestampIndex == NO_INDEX) {
throw TextException.$("invalid timestamp column [name='").put(timestampColumn).put("']");
}
} else {
timestampIndex = names.indexOf(designatedTimestampColumnName);
if (timestampIndex == NO_INDEX) {
// columns in the imported file may not have headers, then use writer timestamp index
timestampIndex = designatedTimestampIndex;
}
}
if (timestampIndex != NO_INDEX) {
final TypeAdapter timestampAdapter = types.getQuick(timestampIndex);
final int typeTag = ColumnType.tagOf(timestampAdapter.getType());
if ((typeTag != ColumnType.LONG && typeTag != ColumnType.TIMESTAMP) || timestampAdapter == BadTimestampAdapter.INSTANCE) {
throw TextException.$("column is not a timestamp [no=").put(timestampIndex)
.put(", name='").put(timestampColumn).put("']");
}
}
}
@FunctionalInterface
public interface PhaseStatusReporter {
void report(byte phase, byte status, @Nullable final CharSequence msg, long rowsHandled, long rowsImported, long errors);
}
public static class PartitionInfo {
final long bytes;
final long key;
final CharSequence name;
long importedRows; // used to detect partitions that need skipping (because e.g. no data was imported for them)
int taskId; // assigned worker/task id
public PartitionInfo(long key, CharSequence name, long bytes) {
this.key = key;
this.name = name;
this.bytes = bytes;
}
public PartitionInfo(long key, CharSequence name, long bytes, int taskId) {
this.key = key;
this.name = name;
this.bytes = bytes;
this.taskId = taskId;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
PartitionInfo that = (PartitionInfo) o;
return key == that.key && bytes == that.bytes && taskId == that.taskId && importedRows == that.importedRows && name.equals(that.name);
}
@Override
public String toString() {
return "PartitionInfo{" +
"key=" + key +
", name=" + name +
", bytes=" + bytes +
", taskId=" + taskId +
", importedRows=" + importedRows +
'}';
}
}
public static class TableStructureAdapter implements TableStructure {
private final LongList columnBits = new LongList();
private final CairoConfiguration configuration;
private ObjList columnNames;
private boolean ignoreColumnIndexedFlag;
private int partitionBy;
private IntList symbolCapacities;
private CharSequence tableName;
private int timestampColumnIndex;
public TableStructureAdapter(CairoConfiguration configuration) {
this.configuration = configuration;
}
@Override
public int getColumnCount() {
return columnNames.size();
}
@Override
public CharSequence getColumnName(int columnIndex) {
return columnNames.getQuick(columnIndex);
}
@Override
public int getColumnType(int columnIndex) {
return Numbers.decodeLowInt(columnBits.getQuick(columnIndex));
}
@Override
public int getIndexBlockCapacity(int columnIndex) {
return configuration.getIndexValueBlockSize();
}
@Override
public int getMaxUncommittedRows() {
return configuration.getMaxUncommittedRows();
}
@Override
public long getO3MaxLag() {
return configuration.getO3MaxLag();
}
@Override
public int getPartitionBy() {
return partitionBy;
}
@Override
public boolean getSymbolCacheFlag(int columnIndex) {
return false;
}
@Override
public int getSymbolCapacity(int columnIndex) {
final int capacity = symbolCapacities.getQuick(columnIndex);
return capacity != -1 ? capacity : configuration.getDefaultSymbolCapacity();
}
public int getSymbolColumnIndex(CharSequence symbolColumnName) {
int index = -1;
for (int i = 0, n = columnNames.size(); i < n; i++) {
if (getColumnType(i) == ColumnType.SYMBOL) {
index++;
}
if (symbolColumnName.equals(columnNames.get(i))) {
return index;
}
}
return -1;
}
@Override
public CharSequence getTableName() {
return tableName;
}
@Override
public int getTimestampIndex() {
return timestampColumnIndex;
}
@Override
public boolean isDedupKey(int columnIndex) {
return false;
}
@Override
public boolean isIndexed(int columnIndex) {
return !ignoreColumnIndexedFlag && Numbers.decodeHighInt(columnBits.getQuick(columnIndex)) != 0;
}
@Override
public boolean isSequential(int columnIndex) {
return false;
}
@Override
public boolean isWalEnabled() {
return configuration.getWalEnabledDefault() && PartitionBy.isPartitioned(partitionBy);
}
public void of(
final CharSequence tableName,
final ObjList names,
final ObjList types,
final IntList symbolCapacities,
final int timestampColumnIndex,
final int partitionBy
) {
this.tableName = tableName;
this.columnNames = names;
this.symbolCapacities = symbolCapacities;
this.ignoreColumnIndexedFlag = false;
this.columnBits.clear();
for (int i = 0, size = types.size(); i < size; i++) {
final TypeAdapter adapter = types.getQuick(i);
this.columnBits.add(Numbers.encodeLowHighInts(adapter.getType(), adapter.isIndexed() ? 1 : 0));
}
this.timestampColumnIndex = timestampColumnIndex;
this.partitionBy = partitionBy;
}
public void setIgnoreColumnIndexedFlag(boolean flag) {
this.ignoreColumnIndexedFlag = flag;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy