All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.h2.mvstore.RandomAccessStore Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2004-2023 H2 Group. Multiple-Licensed under the MPL 2.0,
 * and the EPL 1.0 (https://h2database.com/html/license.html).
 * Initial Developer: H2 Group
 */
package org.h2.mvstore;

import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;

/**
 * Class RandomAccessStore.
 * 
    *
  • 4/5/20 2:51 PM initial creation *
* * @author Andrei Tokar */ public abstract class RandomAccessStore extends FileStore { /** * The free spaces between the chunks. The first block to use is block 2 * (the first two blocks are the store header). */ protected final FreeSpaceBitSet freeSpace = new FreeSpaceBitSet(2, BLOCK_SIZE); /** * Allocation mode: * false - new chunk is always allocated at the end of file * true - new chunk is allocated as close to the beginning of file, as possible */ private volatile boolean reuseSpace = true; private long reservedLow; private long reservedHigh; public RandomAccessStore(Map config) { super(config); } @Override protected final SFChunk createChunk(int newChunkId) { return new SFChunk(newChunkId); } @Override public SFChunk createChunk(String s) { return new SFChunk(s); } @Override protected SFChunk createChunk(Map map) { return new SFChunk(map); } /** * Mark the space as in use. * * @param pos the position in bytes * @param length the number of bytes */ @Override public void markUsed(long pos, int length) { freeSpace.markUsed(pos, length); } /** * Allocate a number of blocks and mark them as used. * * @param length the number of bytes to allocate * @param reservedLow start block index of the reserved area (inclusive) * @param reservedHigh end block index of the reserved area (exclusive), * special value -1 means beginning of the infinite free area * @return the start position in bytes */ private long allocate(int length, long reservedLow, long reservedHigh) { return freeSpace.allocate(length, reservedLow, reservedHigh); } /** * Calculate starting position of the prospective allocation. * * @param blocks the number of blocks to allocate * @param reservedLow start block index of the reserved area (inclusive) * @param reservedHigh end block index of the reserved area (exclusive), * special value -1 means beginning of the infinite free area * @return the starting block index */ private long predictAllocation(int blocks, long reservedLow, long reservedHigh) { return freeSpace.predictAllocation(blocks, reservedLow, reservedHigh); } @Override public boolean shouldSaveNow(int unsavedMemory, int autoCommitMemory) { return unsavedMemory > autoCommitMemory; } private boolean isFragmented() { return freeSpace.isFragmented(); } @Override public boolean isSpaceReused() { return reuseSpace; } @Override public void setReuseSpace(boolean reuseSpace) { this.reuseSpace = reuseSpace; } @Override protected void freeChunkSpace(Iterable chunks) { for (SFChunk chunk : chunks) { freeChunkSpace(chunk); } assert validateFileLength(String.valueOf(chunks)); } private void freeChunkSpace(SFChunk chunk) { long start = chunk.block * BLOCK_SIZE; int length = chunk.len * BLOCK_SIZE; free(start, length); } /** * Mark the space as free. * * @param pos the position in bytes * @param length the number of bytes */ protected void free(long pos, int length) { freeSpace.free(pos, length); } @Override public int getFillRate() { saveChunkLock.lock(); try { return freeSpace.getFillRate(); } finally { saveChunkLock.unlock(); } } @Override protected final boolean validateFileLength(String msg) { assert saveChunkLock.isHeldByCurrentThread(); assert getFileLengthInUse() == measureFileLengthInUse() : getFileLengthInUse() + " != " + measureFileLengthInUse() + " " + msg; return true; } private long measureFileLengthInUse() { assert saveChunkLock.isHeldByCurrentThread(); long size = 2; for (SFChunk c : getChunks().values()) { if (c.isAllocated()) { size = Math.max(size, c.block + c.len); } } return size * BLOCK_SIZE; } long getFirstFree() { return freeSpace.getFirstFree(); } long getFileLengthInUse() { return freeSpace.getLastFree(); } @Override protected void readStoreHeader(boolean recoveryMode) { SFChunk newest = null; boolean assumeCleanShutdown = true; boolean validStoreHeader = false; // find out which chunk and version are the newest // read the first two blocks ByteBuffer fileHeaderBlocks = readFully((SFChunk)null, 0, 2 * FileStore.BLOCK_SIZE); byte[] buff = new byte[FileStore.BLOCK_SIZE]; for (int i = 0; i <= FileStore.BLOCK_SIZE; i += FileStore.BLOCK_SIZE) { fileHeaderBlocks.get(buff); // the following can fail for various reasons try { HashMap m = DataUtils.parseChecksummedMap(buff); if (m == null) { assumeCleanShutdown = false; continue; } long version = DataUtils.readHexLong(m, FileStore.HDR_VERSION, 0); // if both header blocks do agree on version // we'll continue on happy path - assume that previous shutdown was clean assumeCleanShutdown = assumeCleanShutdown && (newest == null || version == newest.version); if (newest == null || version > newest.version) { validStoreHeader = true; storeHeader.putAll(m); int chunkId = DataUtils.readHexInt(m, FileStore.HDR_CHUNK, 0); long block = DataUtils.readHexLong(m, FileStore.HDR_BLOCK, 2); SFChunk test = readChunkHeaderAndFooter(block, chunkId); if (test != null) { newest = test; } } } catch (Exception ignore) { assumeCleanShutdown = false; } } if (!validStoreHeader) { throw DataUtils.newMVStoreException( DataUtils.ERROR_FILE_CORRUPT, "Store header is corrupt: {0}", this); } processCommonHeaderAttributes(); assumeCleanShutdown = assumeCleanShutdown && newest != null && !recoveryMode; if (assumeCleanShutdown) { assumeCleanShutdown = DataUtils.readHexInt(storeHeader, FileStore.HDR_CLEAN, 0) != 0; } // assert getChunks().size() <= 1 : getChunks().size(); long fileSize = size(); long blocksInStore = fileSize / FileStore.BLOCK_SIZE; Comparator chunkComparator = (one, two) -> { int result = Long.compare(two.version, one.version); if (result == 0) { // out of two copies of the same chunk we prefer the one // close to the beginning of file (presumably later version) result = Long.compare(one.block, two.block); } return result; }; Map validChunksByLocation = new HashMap<>(); if (assumeCleanShutdown) { // quickly check latest 20 chunks referenced in meta table Queue chunksToVerify = new PriorityQueue<>(20, Collections.reverseOrder(chunkComparator)); try { setLastChunk(newest); // load the chunk metadata: although meta's root page resides in the lastChunk, // traversing meta map might recursively load another chunk(s) for (SFChunk c : getChunksFromLayoutMap()) { // might be there already, due to meta traversal // see readPage() ... getChunkIfFound() chunksToVerify.offer(c); if (chunksToVerify.size() == 20) { chunksToVerify.poll(); } } SFChunk c; while (assumeCleanShutdown && (c = chunksToVerify.poll()) != null) { SFChunk test = readChunkHeaderAndFooter(c.block, c.id); assumeCleanShutdown = test != null; if (assumeCleanShutdown) { validChunksByLocation.put(test.block, test); } } } catch(IllegalStateException ignored) { assumeCleanShutdown = false; } } else { SFChunk tailChunk = discoverChunk(blocksInStore); if (tailChunk != null) { blocksInStore = tailChunk.block; // for a possible full scan later on validChunksByLocation.put(blocksInStore, tailChunk); if (newest == null || tailChunk.version > newest.version) { newest = tailChunk; } } if (newest != null) { // read the chunk header and footer, // and follow the chain of next chunks while (true) { validChunksByLocation.put(newest.block, newest); if (newest.next == 0 || newest.next >= blocksInStore) { // no (valid) next break; } SFChunk test = readChunkHeaderAndFooter(newest.next, newest.id + 1); if (test == null || test.version <= newest.version) { break; } newest = test; } } } if (!assumeCleanShutdown) { // now we know, that previous shutdown did not go well and file // is possibly corrupted but there is still hope for a quick // recovery boolean quickRecovery = !recoveryMode && findLastChunkWithCompleteValidChunkSet(chunkComparator, validChunksByLocation, false); if (!quickRecovery) { // scan whole file and try to fetch chunk header and/or footer out of every block // matching pairs with nothing in-between are considered as valid chunk long block = blocksInStore; SFChunk tailChunk; while ((tailChunk = discoverChunk(block)) != null) { block = tailChunk.block; validChunksByLocation.put(block, tailChunk); } if (!findLastChunkWithCompleteValidChunkSet(chunkComparator, validChunksByLocation, true) && hasPersistentData()) { throw DataUtils.newMVStoreException( DataUtils.ERROR_FILE_CORRUPT, "File is corrupted - unable to recover a valid set of chunks"); } } } clear(); // build the free space list for (SFChunk c : getChunks().values()) { if (c.isAllocated()) { long start = c.block * FileStore.BLOCK_SIZE; int length = c.len * FileStore.BLOCK_SIZE; markUsed(start, length); } if (!c.isLive()) { registerDeadChunk(c); } } assert validateFileLength("on open"); } @Override protected void initializeStoreHeader(long time) { initializeCommonHeaderAttributes(time); writeStoreHeader(); } @Override protected final void allocateChunkSpace(SFChunk chunk, WriteBuffer buff) { long reservedLow = this.reservedLow; long reservedHigh = this.reservedHigh > 0 ? this.reservedHigh : isSpaceReused() ? 0 : getAfterLastBlock(); long filePos = allocate(buff.limit(), reservedLow, reservedHigh); // calculate and set the likely next position if (reservedLow > 0 || reservedHigh == reservedLow) { chunk.next = predictAllocation(chunk.len, 0, 0); } else { // just after this chunk chunk.next = 0; } chunk.block = filePos / BLOCK_SIZE; } @Override protected final void writeChunk(SFChunk chunk, WriteBuffer buffer) { long filePos = chunk.block * BLOCK_SIZE; writeFully(chunk, filePos, buffer.getBuffer()); // end of the used space is not necessarily the end of the file boolean storeAtEndOfFile = filePos + buffer.limit() >= size(); boolean shouldWriteStoreHeader = shouldWriteStoreHeader(chunk, storeAtEndOfFile); lastChunk = chunk; if (shouldWriteStoreHeader) { writeStoreHeader(); } if (!storeAtEndOfFile) { // may only shrink after the store header was written shrinkStoreIfPossible(1); } } private boolean shouldWriteStoreHeader(SFChunk c, boolean storeAtEndOfFile) { // whether we need to write the store header boolean writeStoreHeader = false; if (!storeAtEndOfFile) { SFChunk chunk = lastChunk; if (chunk == null) { writeStoreHeader = true; } else if (chunk.next != c.block) { // the last prediction did not matched writeStoreHeader = true; } else { long headerVersion = DataUtils.readHexLong(storeHeader, HDR_VERSION, 0); if (chunk.version - headerVersion > 20) { // we write after at least every 20 versions writeStoreHeader = true; } else { for (int chunkId = DataUtils.readHexInt(storeHeader, HDR_CHUNK, 0); !writeStoreHeader && chunkId <= chunk.id; ++chunkId) { // one of the chunks in between // was removed writeStoreHeader = !getChunks().containsKey(chunkId); } } } } if (storeHeader.remove(HDR_CLEAN) != null) { writeStoreHeader = true; } return writeStoreHeader; } @Override protected final void writeCleanShutdownMark() { shrinkStoreIfPossible(0); storeHeader.put(HDR_CLEAN, 1); writeStoreHeader(); } @Override protected final void adjustStoreToLastChunk() { storeHeader.put(HDR_CLEAN, 1); writeStoreHeader(); readStoreHeader(false); } /** * Compact store file, that is, compact blocks that have a low * fill rate, and move chunks next to each other. This will typically * shrink the file. Changes are flushed to the file, and old * chunks are overwritten. * * @param thresholdFillRate do not compact if store fill rate above this value (0-100) * @param maxCompactTime the maximum time in milliseconds to compact * @param maxWriteSize the maximum amount of data to be written as part of this call */ @Override protected void compactStore(int thresholdFillRate, long maxCompactTime, int maxWriteSize, MVStore mvStore) { setRetentionTime(0); long stopAt = System.nanoTime() + maxCompactTime * 1_000_000L; while (compact(thresholdFillRate, maxWriteSize)) { sync(); compactMoveChunks(thresholdFillRate, maxWriteSize, mvStore); if (System.nanoTime() - stopAt > 0L) { break; } } } /** * Compact the store by moving all chunks next to each other, if there is * free space between chunks. This might temporarily increase the file size. * Chunks are overwritten irrespective of the current retention time. Before * overwriting chunks and before resizing the file, syncFile() is called. * * @param targetFillRate do nothing if the file store fill rate is higher * than this * @param moveSize the number of bytes to move * @param mvStore owner of this store */ public void compactMoveChunks(int targetFillRate, long moveSize, MVStore mvStore) { if (isSpaceReused()) { mvStore.executeFilestoreOperation(() -> { dropUnusedChunks(); saveChunkLock.lock(); try { if (hasPersistentData() && getFillRate() <= targetFillRate) { compactMoveChunks(moveSize); } } finally { saveChunkLock.unlock(); } }); } } private void compactMoveChunks(long moveSize) { long start = getFirstFree() / FileStore.BLOCK_SIZE; Iterable chunksToMove = findChunksToMove(start, moveSize); if (chunksToMove != null) { compactMoveChunks(chunksToMove); } } private Iterable findChunksToMove(long startBlock, long moveSize) { long maxBlocksToMove = moveSize / FileStore.BLOCK_SIZE; Iterable result = null; if (maxBlocksToMove > 0) { PriorityQueue queue = new PriorityQueue<>(getChunks().size() / 2 + 1, (o1, o2) -> { // instead of selection just closest to beginning of the file, // pick smaller chunk(s) which sit in between bigger holes int res = Integer.compare(o2.collectPriority, o1.collectPriority); if (res != 0) { return res; } return Long.signum(o2.block - o1.block); }); long size = 0; for (SFChunk chunk : getChunks().values()) { if (chunk.isAllocated() && chunk.block > startBlock) { chunk.collectPriority = getMovePriority(chunk); queue.offer(chunk); size += chunk.len; while (size > maxBlocksToMove) { Chunk removed = queue.poll(); if (removed == null) { break; } size -= removed.len; } } } if (!queue.isEmpty()) { ArrayList list = new ArrayList<>(queue); list.sort(Chunk.PositionComparator.instance()); result = list; } } return result; } private int getMovePriority(SFChunk chunk) { return getMovePriority((int)chunk.block); } private void compactMoveChunks(Iterable move) { assert saveChunkLock.isHeldByCurrentThread(); if (move != null) { // this will ensure better recognition of the last chunk // in case of power failure, since we are going to move older chunks // to the end of the file writeStoreHeader(); sync(); Iterator iterator = move.iterator(); assert iterator.hasNext(); long leftmostBlock = iterator.next().block; long originalBlockCount = getAfterLastBlock(); // we need to ensure that chunks moved within the following loop // do not overlap with space just released by chunks moved before them, // hence the need to reserve this area [leftmostBlock, originalBlockCount) for (SFChunk chunk : move) { moveChunk(chunk, leftmostBlock, originalBlockCount); } // update the metadata (hopefully within the file) store(leftmostBlock, originalBlockCount); sync(); SFChunk chunkToMove = lastChunk; assert chunkToMove != null; long postEvacuationBlockCount = getAfterLastBlock(); boolean chunkToMoveIsAlreadyInside = chunkToMove.block < leftmostBlock; boolean movedToEOF = !chunkToMoveIsAlreadyInside; // move all chunks, which previously did not fit before reserved area // now we can re-use previously reserved area [leftmostBlock, originalBlockCount), // but need to reserve [originalBlockCount, postEvacuationBlockCount) for (SFChunk c : move) { if (c.block >= originalBlockCount && moveChunk(c, originalBlockCount, postEvacuationBlockCount)) { assert c.block < originalBlockCount; movedToEOF = true; } } assert postEvacuationBlockCount >= getAfterLastBlock(); if (movedToEOF) { boolean moved = moveChunkInside(chunkToMove, originalBlockCount); // store a new chunk with updated metadata (hopefully within a file) store(originalBlockCount, postEvacuationBlockCount); sync(); // if chunkToMove did not fit within originalBlockCount (move is // false), and since now previously reserved area // [originalBlockCount, postEvacuationBlockCount) also can be // used, lets try to move that chunk into this area, closer to // the beginning of the file long lastBoundary = moved || chunkToMoveIsAlreadyInside ? postEvacuationBlockCount : chunkToMove.block; moved = !moved && moveChunkInside(chunkToMove, lastBoundary); if (moveChunkInside(lastChunk, lastBoundary) || moved) { store(lastBoundary, -1); } } shrinkStoreIfPossible(0); sync(); } } private void writeStoreHeader() { StringBuilder buff = new StringBuilder(112); if (hasPersistentData()) { storeHeader.put(HDR_BLOCK, lastChunk.block); storeHeader.put(HDR_CHUNK, lastChunk.id); storeHeader.put(HDR_VERSION, lastChunk.version); } DataUtils.appendMap(buff, storeHeader); byte[] bytes = buff.toString().getBytes(StandardCharsets.ISO_8859_1); int checksum = DataUtils.getFletcher32(bytes, 0, bytes.length); DataUtils.appendMap(buff, HDR_FLETCHER, checksum); buff.append('\n'); bytes = buff.toString().getBytes(StandardCharsets.ISO_8859_1); ByteBuffer header = ByteBuffer.allocate(2 * BLOCK_SIZE); header.put(bytes); header.position(BLOCK_SIZE); header.put(bytes); header.rewind(); writeFully(null, 0, header); } private void store(long reservedLow, long reservedHigh) { this.reservedLow = reservedLow; this.reservedHigh = reservedHigh; saveChunkLock.unlock(); try { store(); } finally { saveChunkLock.lock(); this.reservedLow = 0; this.reservedHigh = 0; } } private boolean moveChunkInside(SFChunk chunkToMove, long boundary) { boolean res = chunkToMove.block >= boundary && predictAllocation(chunkToMove.len, boundary, -1) < boundary && moveChunk(chunkToMove, boundary, -1); assert !res || chunkToMove.block + chunkToMove.len <= boundary; return res; } /** * Move specified chunk into free area of the file. "Reserved" area * specifies file interval to be avoided, when un-allocated space will be * chosen for a new chunk's location. * * @param chunk to move * @param reservedAreaLow low boundary of reserved area, inclusive * @param reservedAreaHigh high boundary of reserved area, exclusive * @return true if block was moved, false otherwise */ private boolean moveChunk(SFChunk chunk, long reservedAreaLow, long reservedAreaHigh) { // ignore if already removed during the previous store operations // those are possible either as explicit commit calls // or from meta map updates at the end of this method if (!getChunks().containsKey(chunk.id)) { return false; } long start = chunk.block * FileStore.BLOCK_SIZE; int length = chunk.len * FileStore.BLOCK_SIZE; long pos = allocate(length, reservedAreaLow, reservedAreaHigh); long block = pos / FileStore.BLOCK_SIZE; // in the absence of a reserved area, // block should always move closer to the beginning of the file assert reservedAreaHigh > 0 || block <= chunk.block : block + " " + chunk; ByteBuffer readBuff = readFully(chunk, start, length); writeFully(null, pos, readBuff); free(start, length); // can not set chunk's new block/len until it's fully written at new location, // because concurrent reader can pick it up prematurely, chunk.block = block; chunk.next = 0; saveChunkMetadataChanges(chunk); return true; } /** * Shrink the store if possible, and if at least a given percentage can be * saved. * * @param minPercent the minimum percentage to save */ @Override protected void shrinkStoreIfPossible(int minPercent) { assert saveChunkLock.isHeldByCurrentThread(); long result = getFileLengthInUse(); assert result == measureFileLengthInUse() : result + " != " + measureFileLengthInUse(); shrinkIfPossible(minPercent); } private void shrinkIfPossible(int minPercent) { if (isReadOnly()) { return; } long end = getFileLengthInUse(); long fileSize = size(); if (end >= fileSize) { return; } if (minPercent > 0 && fileSize - end < BLOCK_SIZE) { return; } int savedPercent = (int) (100 - (end * 100 / fileSize)); if (savedPercent < minPercent) { return; } sync(); truncate(end); } @Override protected void doHousekeeping(MVStore mvStore) throws InterruptedException { int autoCommitMemory = mvStore.getAutoCommitMemory(); int fillRate = getFillRate(); if (isFragmented() && fillRate < getAutoCompactFillRate()) { mvStore.tryExecuteUnderStoreLock(() -> { int moveSize = 2 * autoCommitMemory; if (isIdle()) { moveSize *= 4; } compactMoveChunks(101, moveSize, mvStore); return true; }); } int chunksFillRate = getRewritableChunksFillRate(); int adjustedChunksFillRate = 100 - (100 - chunksFillRate) / 2; int fillRateToCompare = isIdle() ? chunksFillRate : adjustedChunksFillRate; if (fillRateToCompare < getTargetFillRate()) { mvStore.tryExecuteUnderStoreLock(() -> { int writeLimit = autoCommitMemory; if (!isIdle()) { writeLimit /= 4; } if (rewriteChunks(writeLimit, isIdle() ? adjustedChunksFillRate : chunksFillRate)) { dropUnusedChunks(); } return true; }); } } private int getTargetFillRate() { int targetRate = getAutoCompactFillRate(); // use a lower fill rate if there were any file operations since the last time if (!isIdle()) { targetRate /= 2; } return targetRate; } protected abstract void truncate(long size); /** * Mark the file as empty. */ @Override public void clear() { freeSpace.clear(); } /** * Calculates relative "priority" for chunk to be moved. * * @param block where chunk starts * @return priority, bigger number indicate that chunk need to be moved sooner */ public int getMovePriority(int block) { return freeSpace.getMovePriority(block); } /** * Get the index of the first block after last occupied one. * It marks the beginning of the last (infinite) free space. * * @return block index */ private long getAfterLastBlock() { assert saveChunkLock.isHeldByCurrentThread(); return getAfterLastBlock_(); } protected long getAfterLastBlock_() { return freeSpace.getAfterLastBlock(); } @Override public Collection getRewriteCandidates() { return isSpaceReused() ? null : Collections.emptyList(); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy