
io.questdb.cairo.mig.Mig620 Maven / Gradle / Ivy
/*******************************************************************************
* ___ _ ____ ____
* / _ \ _ _ ___ ___| |_| _ \| __ )
* | | | | | | |/ _ \/ __| __| | | | _ \
* | |_| | |_| | __/\__ \ |_| |_| | |_) |
* \__\_\\__,_|\___||___/\__|____/|____/
*
* Copyright (c) 2014-2019 Appsicle
* Copyright (c) 2019-2024 QuestDB
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*
******************************************************************************/
package io.questdb.cairo.mig;
import io.questdb.cairo.CairoConfiguration;
import io.questdb.cairo.CairoException;
import io.questdb.cairo.PartitionBy;
import io.questdb.cairo.vm.Vm;
import io.questdb.cairo.vm.api.MemoryMARW;
import io.questdb.log.Log;
import io.questdb.log.LogFactory;
import io.questdb.std.*;
import io.questdb.std.str.LPSZ;
import io.questdb.std.str.Path;
import static io.questdb.cairo.TableUtils.setPathForPartition;
import static io.questdb.cairo.mig.MigrationUtils.openFileSafe;
public class Mig620 {
private static final int COLUMN_VERSION_FILE_HEADER_SIZE_MIG = 40;
private static final String COLUMN_VERSION_FILE_NAME_MIG = "_cv";
private static final long CV_COL_TOP_DEFAULT_PARTITION_MIG = Long.MIN_VALUE;
private static final int CV_OFFSET_VERSION_64 = 0;
private static final int CV_OFFSET_OFFSET_A_64 = CV_OFFSET_VERSION_64 + 8;
private static final int CV_OFFSET_SIZE_A_64 = CV_OFFSET_OFFSET_A_64 + 8;
private static final int CV_OFFSET_OFFSET_B_64 = CV_OFFSET_SIZE_A_64 + 8;
private static final int CV_OFFSET_SIZE_B_64 = CV_OFFSET_OFFSET_B_64 + 8;
private static final int CV_HEADER_SIZE = CV_OFFSET_SIZE_B_64 + 8;
private static final Log LOG = LogFactory.getLog(EngineMigration.class);
private static final long META_COLUMN_DATA_SIZE_MIG = 32;
private static final String META_FILE_NAME_MIG = "_meta";
private static final long META_OFFSET_COLUMN_TYPES_MIG = 128;
private static final long META_OFFSET_COUNT_MIG = 0;
private static final long META_OFFSET_PARTITION_BY_MIG = 4;
private static final long PARTITION_NAME_TX_OFFSET_MIG = 2;
private static final String TXN_FILE_NAME_MIG = "_txn";
private static final long TXN_OFFSET_MIG = 0;
private static final int TX_BASE_HEADER_SECTION_PADDING_MIG = 12; // Add some free space into header for future use
private static final long TX_BASE_OFFSET_VERSION_MIG = 0;
private static final long TX_BASE_OFFSET_A_MIG = TX_BASE_OFFSET_VERSION_MIG + 8;
private static final long TX_BASE_OFFSET_SYMBOLS_SIZE_A_MIG = TX_BASE_OFFSET_A_MIG + 4;
private static final long TX_BASE_OFFSET_PARTITIONS_SIZE_A_MIG = TX_BASE_OFFSET_SYMBOLS_SIZE_A_MIG + 4;
private static final long TX_BASE_OFFSET_B_MIG = TX_BASE_OFFSET_PARTITIONS_SIZE_A_MIG + 4 + TX_BASE_HEADER_SECTION_PADDING_MIG;
private static final long TX_BASE_OFFSET_SYMBOLS_SIZE_B_MIG = TX_BASE_OFFSET_B_MIG + 4;
private static final long TX_BASE_OFFSET_PARTITIONS_SIZE_B_MIG = TX_BASE_OFFSET_SYMBOLS_SIZE_B_MIG + 4;
private static final int TX_BASE_HEADER_SIZE_MIG = (int) Math.max(TX_BASE_OFFSET_PARTITIONS_SIZE_B_MIG + 4 + TX_BASE_HEADER_SECTION_PADDING_MIG, 64);
private static final long TX_DEFAULT_PARTITION_TIMESTAMP_MIG = 0L;
private static final long TX_OFFSET_COLUMN_VERSION_MIG = 64;
private static final long TX_OFFSET_MAP_WRITER_COUNT_MIG = 128;
private static final long TX_OFFSET_TRUNCATE_VERSION_MIG = 72;
private static void createColumnVersionFile(MemoryMARW txMemory, long partitionSizeOffset, int partitionTableSize, MigrationContext migrationContext, Path path, int pathLen) {
final FilesFacade ff = migrationContext.getFf();
try (MemoryMARW cvMemory = Vm.getCMARWInstance(
ff,
path.trimTo(pathLen).concat(COLUMN_VERSION_FILE_NAME_MIG).$(),
Files.PAGE_SIZE,
COLUMN_VERSION_FILE_HEADER_SIZE_MIG,
MemoryTag.NATIVE_MIG_MMAP,
CairoConfiguration.O_NONE
)) {
cvMemory.extend(COLUMN_VERSION_FILE_HEADER_SIZE_MIG);
cvMemory.jumpTo(COLUMN_VERSION_FILE_HEADER_SIZE_MIG);
cvMemory.zero();
try (MemoryMARW metaMem = openFileSafe(
ff,
path.trimTo(pathLen).concat(META_FILE_NAME_MIG).$(),
META_OFFSET_COLUMN_TYPES_MIG
)) {
int partitionBy = metaMem.getInt(META_OFFSET_PARTITION_BY_MIG);
ObjList columnNames = readColumNames(metaMem);
int columnCount = columnNames.size();
LongList columnTops = readColumnTops(columnCount, partitionBy, partitionSizeOffset, partitionTableSize, txMemory, ff, path, pathLen, columnNames);
path.trimTo(pathLen);
long sizeBytes = writeColumnVersion(path, columnTops, columnCount, columnNames, cvMemory);
cvMemory.putLong(CV_OFFSET_OFFSET_A_64, CV_HEADER_SIZE);
cvMemory.putLong(CV_OFFSET_SIZE_A_64, sizeBytes);
cvMemory.jumpTo(CV_HEADER_SIZE + sizeBytes);
}
}
}
private static LPSZ dFile(Path path, CharSequence columnName) {
return path.concat(columnName).put('.').put('d').$();
}
private static long getColumnNameOffset(int columnCount) {
return META_OFFSET_COLUMN_TYPES_MIG + columnCount * META_COLUMN_DATA_SIZE_MIG;
}
private static void migrateTxn(MemoryMARW txMemory, int symbolCount, int partitionTableSize, long existingTotalSize, long txn) {
txMemory.putInt(TX_OFFSET_COLUMN_VERSION_MIG, 0);
txMemory.putInt(TX_OFFSET_TRUNCATE_VERSION_MIG, 0);
long pageAddress = txMemory.getPageAddress(0);
Vect.memmove(pageAddress + TX_BASE_HEADER_SIZE_MIG, pageAddress, existingTotalSize);
Vect.memset(pageAddress, TX_BASE_HEADER_SIZE_MIG, 0);
txMemory.putLong(TX_BASE_OFFSET_VERSION_MIG, txn);
boolean currentIsA = txn % 2 == 0;
long offsetOffset = currentIsA ? TX_BASE_OFFSET_A_MIG : TX_BASE_OFFSET_B_MIG;
long symbolSizeOffset = currentIsA ? TX_BASE_OFFSET_SYMBOLS_SIZE_A_MIG : TX_BASE_OFFSET_SYMBOLS_SIZE_B_MIG;
long partitionsSizeOffset = currentIsA ? TX_BASE_OFFSET_PARTITIONS_SIZE_A_MIG : TX_BASE_OFFSET_PARTITIONS_SIZE_B_MIG;
txMemory.putInt(offsetOffset, TX_BASE_HEADER_SIZE_MIG);
txMemory.putInt(symbolSizeOffset, symbolCount * 8);
txMemory.putInt(partitionsSizeOffset, partitionTableSize);
txMemory.jumpTo(TX_BASE_HEADER_SIZE_MIG + existingTotalSize);
}
private static long openRO(FilesFacade ff, LPSZ path) {
final long fd = ff.openRO(path);
if (fd > -1) {
Mig620.LOG.debug().$("open [file=").$(path).$(", fd=").$(fd).$(']').$();
return fd;
}
throw CairoException.critical(ff.errno()).put("could not open read-only [file=").put(path).put(']');
}
private static ObjList readColumNames(MemoryMARW metaMem) {
ObjList columnNames = new ObjList<>();
final int columnCount = metaMem.getInt(META_OFFSET_COUNT_MIG);
long offset = getColumnNameOffset(columnCount);
for (int metaIndex = 0; metaIndex < columnCount; metaIndex++) {
String name = Chars.toString(metaMem.getStrA(offset));
columnNames.add(name);
offset += Vm.getStorageLength(name);
}
return columnNames;
}
/**
* Reads 8 bytes from "top" file.
*
* @param ff files facade, - intermediary to intercept OS file system calls.
* @param path path has to be set to location of "top" file, excluding file name. Zero terminated string.
* @param name name of top file
* @param plen path length to truncate "path" back to, path is reusable.
* @return number of rows column doesn't have when column was added to table that already had data.
*/
private static long readColumnTop(FilesFacade ff, Path path, CharSequence name, int plen) {
try {
if (ff.exists(topFile(path, name))) {
final long fd = openRO(ff, path.$());
try {
long n;
if ((n = ff.readNonNegativeLong(fd, 0)) < 0) {
return 0L;
}
return n;
} finally {
ff.close(fd);
}
}
return 0L;
} finally {
path.trimTo(plen);
}
}
private static LongList readColumnTops(int columnCount, int partitionBy, long partitionSizeOffset, int partitionTableSize, MemoryMARW txMemory, FilesFacade ff, Path path, int pathLen, ObjList columnNames) {
if (!PartitionBy.isPartitioned(partitionBy)) {
LongList result = new LongList();
readColumnTopsForPartition(result, columnNames, columnCount, partitionBy, TX_DEFAULT_PARTITION_TIMESTAMP_MIG, -1L, ff, path, pathLen);
return result;
}
return readColumnTopsAllPartitions(columnCount, partitionBy, partitionSizeOffset, partitionTableSize, txMemory, ff, path, pathLen, columnNames);
}
private static LongList readColumnTopsAllPartitions(int columnCount, int partitionBy, long partitionSizeOffset, int partitionTableSize, MemoryMARW txMemory, FilesFacade ff, Path path, int pathLen, ObjList columnNames) {
LongList result = new LongList();
int partitionCount = partitionTableSize / 8 / 4;
long offset = partitionSizeOffset + 4;
long prevPartition = Long.MIN_VALUE;
long txSize = txMemory.size() - 4 * 8;
for (int partitionIndex = 0; partitionIndex < partitionCount; partitionIndex++) {
if (offset > txSize) {
throw CairoException.critical(0).put("corrupt _txn file ").put(path.trimTo(pathLen).$()).put(", file is too small to read offset ").put(offset);
}
long partitionTs = txMemory.getLong(offset);
if (partitionTs <= prevPartition) {
throw CairoException.critical(0).put("corrupt _txn file, partitions are not ordered at ").put(path.trimTo(pathLen).$());
}
long partitionNameTxn = txMemory.getLong(offset + PARTITION_NAME_TX_OFFSET_MIG * 8);
readColumnTopsForPartition(result, columnNames, columnCount, partitionBy, partitionTs, partitionNameTxn, ff, path, pathLen);
offset += 4 * 8;
prevPartition = partitionTs;
}
return result;
}
private static void readColumnTopsForPartition(LongList tops, ObjList columnNames, int columnCount, int partitionBy, long partitionTimestamp, long partitionNameTxn, FilesFacade ff, Path path, int pathLen) {
tops.add(partitionTimestamp);
path.trimTo(pathLen);
setPathForPartition(path, partitionBy, partitionTimestamp, partitionNameTxn);
int partitionPathLen = path.size();
if (ff.exists(path.put(Files.SEPARATOR).$())) {
for (int i = 0; i < columnCount; i++) {
path.trimTo(partitionPathLen);
String columnName = columnNames.get(i);
long columnTop = -1;
if (ff.exists(dFile(path, columnName))) {
columnTop = readColumnTop(ff, path.trimTo(partitionPathLen), columnName, partitionPathLen);
}
tops.add(columnTop);
}
} else {
// Sometimes _txn file does not match the table directories, e.g. snapshot is inconsistent.
// Consider that file presence is same as previous partition.
// Except if previous partition column existed but column top was not 0, make it 0
if (tops.size() > columnCount) {
tops.add(tops, tops.size() - columnCount - 1, tops.size() - 1);
for (int i = tops.size() - columnCount, n = tops.size(); i < n; i++) {
if (tops.getQuick(i) > 0) {
tops.setQuick(i, 0);
}
}
} else {
for (int i = 0; i < columnCount; i++) {
tops.add(-1L);
}
}
}
}
private static LPSZ topFile(Path path, CharSequence columnName) {
return path.concat(columnName).put(".top").$();
}
private static long writeColumnVersion(Path tablePath, LongList columnTops, int columnCount, ObjList columnNames, MemoryMARW cvMemory) {
int topStep = columnCount + 1;
LongList columnVersions = new LongList();
LongList maxPartitionIndexWithNoColumnList = new LongList();
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
// Column Tops list has long values as follows:
// Partition Timestamp
// Column top value per every column as follows:
// -1: column does not exist for the partition
// 0: column present with no column top
// >0: column preset and there is a column top
int maxPartitionIndexWithNoColumn = -1;
for (int partitionIndex = 0; partitionIndex < columnTops.size(); partitionIndex += topStep) {
long columnTop = columnTops.getQuick(partitionIndex + columnIndex + 1);
if (columnTop < 0) {
maxPartitionIndexWithNoColumn = partitionIndex;
}
}
if (maxPartitionIndexWithNoColumn != -1) {
if (maxPartitionIndexWithNoColumn + topStep >= columnTops.size()) {
throw CairoException.critical(0).put("Table ").put(tablePath).put(" column '").put(columnNames.getQuick(columnIndex)).put("' is not present in the last partition.");
}
long columnAddedPartitionTs = columnTops.getQuick(maxPartitionIndexWithNoColumn + topStep);
columnVersions.add(CV_COL_TOP_DEFAULT_PARTITION_MIG, columnIndex, -1L, columnAddedPartitionTs);
}
maxPartitionIndexWithNoColumnList.add(maxPartitionIndexWithNoColumn);
}
// Column Version file must be sorted by Partition Timestamp first and then by Column Index
// Go partition by partition to keep adding records sorted
for (int partitionIndex = 0; partitionIndex < columnTops.size(); partitionIndex += topStep) {
for (int columnIndex = 0; columnIndex < columnCount; columnIndex++) {
long maxPartitionIndexWithNoColumn = maxPartitionIndexWithNoColumnList.getQuick(columnIndex);
long partitionTs = columnTops.getQuick(partitionIndex);
long columnTop = columnTops.getQuick(partitionIndex + columnIndex + 1);
if (columnTop > 0 || (columnTop == 0 && partitionIndex < maxPartitionIndexWithNoColumn)) {
columnVersions.add(partitionTs, columnIndex, -1, columnTop);
}
}
}
// Flush column tops to the file.
for (int i = 0; i < columnVersions.size(); i++) {
cvMemory.putLong(Mig620.CV_HEADER_SIZE + i * 8L, columnVersions.getQuick(i));
}
int sizeByes = columnVersions.size() * 8;
cvMemory.jumpTo(sizeByes + Mig620.CV_HEADER_SIZE);
return sizeByes;
}
static void migrate(MigrationContext migrationContext) {
final FilesFacade ff = migrationContext.getFf();
final Path path = migrationContext.getTablePath();
int pathLen = path.size();
path.concat(TXN_FILE_NAME_MIG);
EngineMigration.backupFile(
ff,
path,
migrationContext.getTablePath2(),
TXN_FILE_NAME_MIG,
425
);
try (MemoryMARW txMemory = openFileSafe(ff, path.$(), TX_OFFSET_MAP_WRITER_COUNT_MIG + 8)) {
int symbolCount = txMemory.getInt(TX_OFFSET_MAP_WRITER_COUNT_MIG);
long partitionSizeOffset = TX_OFFSET_MAP_WRITER_COUNT_MIG + 4 + symbolCount * 8L;
int partitionTableSize = txMemory.size() > partitionSizeOffset ? txMemory.getInt(partitionSizeOffset) : 0;
long existingTotalSize = partitionSizeOffset + 4 + partitionTableSize;
long txn = txMemory.getLong(TXN_OFFSET_MIG);
createColumnVersionFile(txMemory, partitionSizeOffset, partitionTableSize, migrationContext, path, pathLen);
migrateTxn(txMemory, symbolCount, partitionTableSize, existingTotalSize, txn);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy