org.h2.mvstore.MVStore Maven / Gradle / Ivy
/*
* Copyright 2004-2019 H2 Group. Multiple-Licensed under the MPL 2.0,
* and the EPL 1.0 (http://h2database.com/html/license.html).
* Initial Developer: H2 Group
*/
package org.h2.mvstore;
import static org.h2.mvstore.MVMap.INITIAL_VERSION;
import java.lang.Thread.UncaughtExceptionHandler;
import java.nio.ByteBuffer;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.Deque;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;
import java.util.concurrent.atomic.AtomicReference;
import java.util.concurrent.locks.ReentrantLock;
import org.h2.compress.CompressDeflate;
import org.h2.compress.CompressLZF;
import org.h2.compress.Compressor;
import org.h2.engine.Constants;
import org.h2.mvstore.cache.CacheLongKeyLIRS;
import org.h2.util.MathUtils;
import org.h2.util.Utils;
/*
TODO:
Documentation
- rolling docs review: at "Metadata Map"
- better document that writes are in background thread
- better document how to do non-unique indexes
- document pluggable store and OffHeapStore
TransactionStore:
- ability to disable the transaction log,
if there is only one connection
MVStore:
- better and clearer memory usage accounting rules
(heap memory versus disk memory), so that even there is
never an out of memory
even for a small heap, and so that chunks
are still relatively big on average
- make sure serialization / deserialization errors don't corrupt the file
- test and possibly improve compact operation (for large dbs)
- automated 'kill process' and 'power failure' test
- defragment (re-creating maps, specially those with small pages)
- store number of write operations per page (maybe defragment
if much different than count)
- r-tree: nearest neighbor search
- use a small object value cache (StringCache), test on Android
for default serialization
- MVStoreTool.dump should dump the data if possible;
possibly using a callback for serialization
- implement a sharded map (in one store, multiple stores)
to support concurrent updates and writes, and very large maps
- to save space when persisting very small transactions,
use a transaction log where only the deltas are stored
- serialization for lists, sets, sets, sorted sets, maps, sorted maps
- maybe rename 'rollback' to 'revert' to distinguish from transactions
- support other compression algorithms (deflate, LZ4,...)
- remove features that are not really needed; simplify the code
possibly using a separate layer or tools
(retainVersion?)
- optional pluggable checksum mechanism (per page), which
requires that everything is a page (including headers)
- rename "store" to "save", as "store" is used in "storeVersion"
- rename setStoreVersion to setDataVersion, setSchemaVersion or similar
- temporary file storage
- simple rollback method (rollback to last committed version)
- MVMap to implement SortedMap, then NavigableMap
- storage that splits database into multiple files,
to speed up compact and allow using trim
(by truncating / deleting empty files)
- add new feature to the file system API to avoid copying data
(reads that returns a ByteBuffer instead of writing into one)
for memory mapped files and off-heap storage
- support log structured merge style operations (blind writes)
using one map per level plus bloom filter
- have a strict call order MVStore -> MVMap -> Page -> FileStore
- autocommit commits, stores, and compacts from time to time;
the background thread should wait at least 90% of the
configured write delay to store changes
- compact* should also store uncommitted changes (if there are any)
- write a LSM-tree (log structured merge tree) utility on top of the MVStore
with blind writes and/or a bloom filter that
internally uses regular maps and merge sort
- chunk metadata: maybe split into static and variable,
or use a small page size for metadata
- data type "string": maybe use prefix compression for keys
- test chunk id rollover
- feature to auto-compact from time to time and on close
- compact very small chunks
- Page: to save memory, combine keys & values into one array
(also children & counts). Maybe remove some other
fields (childrenCount for example)
- Support SortedMap for MVMap
- compact: copy whole pages (without having to open all maps)
- maybe change the length code to have lower gaps
- test with very low limits (such as: short chunks, small pages)
- maybe allow to read beyond the retention time:
when compacting, move live pages in old chunks
to a map (possibly the metadata map) -
this requires a change in the compaction code, plus
a map lookup when reading old data; also, this
old data map needs to be cleaned up somehow;
maybe using an additional timeout
- rollback of removeMap should restore the data -
which has big consequences, as the metadata map
would probably need references to the root nodes of all maps
*/
/**
* A persistent storage for maps.
*/
public class MVStore implements AutoCloseable {
/**
* The block size (physical sector size) of the disk. The store header is
* written twice, one copy in each block, to ensure it survives a crash.
*/
static final int BLOCK_SIZE = 4 * 1024;
private static final int FORMAT_WRITE = 1;
private static final int FORMAT_READ = 1;
/**
* Used to mark a chunk as free, when it was detected that live bookkeeping
* is incorrect.
*/
private static final int MARKED_FREE = 10_000_000;
/**
* Store is open.
*/
private static final int STATE_OPEN = 0;
/**
* Store is about to close now, but is still operational.
* Outstanding store operation by background writer or other thread may be in progress.
* New updates must not be initiated, unless they are part of a closing procedure itself.
*/
private static final int STATE_STOPPING = 1;
/**
* Store is closing now, and any operation on it may fail.
*/
private static final int STATE_CLOSING = 2;
/**
* Store is closed.
*/
private static final int STATE_CLOSED = 3;
/**
* Lock which governs access to major store operations: store(), close(), ...
* It should used in a non-reentrant fashion.
* It serves as a replacement for synchronized(this), except it allows for
* non-blocking lock attempts.
*/
private final ReentrantLock storeLock = new ReentrantLock(true);
/**
* Reference to a background thread, which is expected to be running, if any.
*/
private final AtomicReference backgroundWriterThread = new AtomicReference<>();
private volatile boolean reuseSpace = true;
private volatile int state;
private final FileStore fileStore;
private final boolean fileStoreIsProvided;
private final int pageSplitSize;
private final int keysPerPage;
/**
* The page cache. The default size is 16 MB, and the average size is 2 KB.
* It is split in 16 segments. The stack move distance is 2% of the expected
* number of entries.
*/
final CacheLongKeyLIRS cache;
/**
* The page chunk references cache. The default size is 4 MB, and the
* average size is 2 KB. It is split in 16 segments. The stack move distance
* is 2% of the expected number of entries.
*/
final CacheLongKeyLIRS cacheChunkRef;
/**
* The newest chunk. If nothing was stored yet, this field is not set.
*/
private Chunk lastChunk;
/**
* The map of chunks.
*/
private final ConcurrentHashMap chunks =
new ConcurrentHashMap<>();
private long updateCounter = 0;
private long updateAttemptCounter = 0;
/**
* The map of temporarily freed storage space caused by freed pages.
* It contains the number of freed entries per chunk.
*/
private final Map freedPageSpace = new HashMap<>();
/**
* The metadata map. Write access to this map needs to be done under storeLock.
*/
private final MVMap meta;
private final ConcurrentHashMap> maps =
new ConcurrentHashMap<>();
private final HashMap storeHeader = new HashMap<>();
private WriteBuffer writeBuffer;
private final AtomicInteger lastMapId = new AtomicInteger();
private int versionsToKeep = 5;
/**
* The compression level for new pages (0 for disabled, 1 for fast, 2 for
* high). Even if disabled, the store may contain (old) compressed pages.
*/
private final int compressionLevel;
private Compressor compressorFast;
private Compressor compressorHigh;
private final UncaughtExceptionHandler backgroundExceptionHandler;
private volatile long currentVersion;
/**
* The version of the last stored chunk, or -1 if nothing was stored so far.
*/
private long lastStoredVersion = INITIAL_VERSION;
/**
* Oldest store version in use. All version beyond this can be safely dropped
*/
private final AtomicLong oldestVersionToKeep = new AtomicLong();
/**
* Ordered collection of all version usage counters for all versions starting
* from oldestVersionToKeep and up to current.
*/
private final Deque versions = new LinkedList<>();
/**
* Counter of open transactions for the latest (current) store version
*/
private volatile TxCounter currentTxCounter = new TxCounter(currentVersion);
/**
* The estimated memory used by unsaved pages. This number is not accurate,
* also because it may be changed concurrently, and because temporary pages
* are counted.
*/
private int unsavedMemory;
private final int autoCommitMemory;
private volatile boolean saveNeeded;
/**
* The time the store was created, in milliseconds since 1970.
*/
private long creationTime;
/**
* How long to retain old, persisted chunks, in milliseconds. For larger or
* equal to zero, a chunk is never directly overwritten if unused, but
* instead, the unused field is set. If smaller zero, chunks are directly
* overwritten if unused.
*/
private int retentionTime;
private long lastCommitTime;
/**
* The version of the current store operation (if any).
*/
private volatile long currentStoreVersion = -1;
private volatile boolean metaChanged;
/**
* The delay in milliseconds to automatically commit and write changes.
*/
private int autoCommitDelay;
private final int autoCompactFillRate;
private long autoCompactLastFileOpCount;
private volatile IllegalStateException panicException;
private long lastTimeAbsolute;
private long lastFreeUnusedChunks;
/**
* Create and open the store.
*
* @param config the configuration to use
* @throws IllegalStateException if the file is corrupt, or an exception
* occurred while opening
* @throws IllegalArgumentException if the directory does not exist
*/
MVStore(Map config) {
this.compressionLevel = DataUtils.getConfigParam(config, "compress", 0);
String fileName = (String) config.get("fileName");
FileStore fileStore = (FileStore) config.get("fileStore");
fileStoreIsProvided = fileStore != null;
if(fileStore == null && fileName != null) {
fileStore = new FileStore();
}
this.fileStore = fileStore;
int pgSplitSize = 48; // for "mem:" case it is # of keys
CacheLongKeyLIRS.Config cc = null;
if (this.fileStore != null) {
int mb = DataUtils.getConfigParam(config, "cacheSize", 16);
if (mb > 0) {
cc = new CacheLongKeyLIRS.Config();
cc.maxMemory = mb * 1024L * 1024L;
Object o = config.get("cacheConcurrency");
if (o != null) {
cc.segmentCount = (Integer)o;
}
}
pgSplitSize = 16 * 1024;
}
if (cc != null) {
cache = new CacheLongKeyLIRS<>(cc);
cc.maxMemory /= 4;
cacheChunkRef = new CacheLongKeyLIRS<>(cc);
} else {
cache = null;
cacheChunkRef = null;
}
pgSplitSize = DataUtils.getConfigParam(config, "pageSplitSize", pgSplitSize);
// Make sure pages will fit into cache
if (cache != null && pgSplitSize > cache.getMaxItemSize()) {
pgSplitSize = (int)cache.getMaxItemSize();
}
pageSplitSize = pgSplitSize;
keysPerPage = DataUtils.getConfigParam(config, "keysPerPage", 48);
backgroundExceptionHandler =
(UncaughtExceptionHandler)config.get("backgroundExceptionHandler");
meta = new MVMap<>(this);
if (this.fileStore != null) {
retentionTime = this.fileStore.getDefaultRetentionTime();
// 19 KB memory is about 1 KB storage
int kb = Math.max(1, Math.min(19, Utils.scaleForAvailableMemory(64))) * 1024;
kb = DataUtils.getConfigParam(config, "autoCommitBufferSize", kb);
autoCommitMemory = kb * 1024;
autoCompactFillRate = DataUtils.getConfigParam(config, "autoCompactFillRate", 40);
char[] encryptionKey = (char[]) config.get("encryptionKey");
try {
if (!fileStoreIsProvided) {
boolean readOnly = config.containsKey("readOnly");
this.fileStore.open(fileName, readOnly, encryptionKey);
}
if (this.fileStore.size() == 0) {
creationTime = getTimeAbsolute();
lastCommitTime = creationTime;
storeHeader.put("H", 2);
storeHeader.put("blockSize", BLOCK_SIZE);
storeHeader.put("format", FORMAT_WRITE);
storeHeader.put("created", creationTime);
writeStoreHeader();
} else {
readStoreHeader();
}
} catch (IllegalStateException e) {
panic(e);
} finally {
if (encryptionKey != null) {
Arrays.fill(encryptionKey, (char) 0);
}
}
lastCommitTime = getTimeSinceCreation();
Set rootsToRemove = new HashSet<>();
for (Iterator it = meta.keyIterator("root."); it.hasNext();) {
String key = it.next();
if (!key.startsWith("root.")) {
break;
}
String mapId = key.substring(key.lastIndexOf('.') + 1);
if(!meta.containsKey("map."+mapId)) {
rootsToRemove.add(key);
}
}
for (String key : rootsToRemove) {
meta.remove(key);
markMetaChanged();
}
// setAutoCommitDelay starts the thread, but only if
// the parameter is different from the old value
int delay = DataUtils.getConfigParam(config, "autoCommitDelay", 1000);
setAutoCommitDelay(delay);
} else {
autoCommitMemory = 0;
autoCompactFillRate = 0;
}
}
private void panic(IllegalStateException e) {
if (isOpen()) {
handleException(e);
panicException = e;
closeImmediately();
}
throw e;
}
public IllegalStateException getPanicException() {
return panicException;
}
/**
* Open a store in exclusive mode. For a file-based store, the parent
* directory must already exist.
*
* @param fileName the file name (null for in-memory)
* @return the store
*/
public static MVStore open(String fileName) {
HashMap config = new HashMap<>();
config.put("fileName", fileName);
return new MVStore(config);
}
/**
* Find position of the root page for historical version of the map.
*
* @param mapId to find the old version for
* @param version the version
* @return position of the root Page
*/
long getRootPos(int mapId, long version) {
MVMap oldMeta = getMetaMap(version);
return getRootPos(oldMeta, mapId);
}
/**
* Open a map with the default settings. The map is automatically create if
* it does not yet exist. If a map with this name is already open, this map
* is returned.
*
* @param the key type
* @param the value type
* @param name the name of the map
* @return the map
*/
public MVMap openMap(String name) {
return openMap(name, new MVMap.Builder());
}
/**
* Open a map with the given builder. The map is automatically create if it
* does not yet exist. If a map with this name is already open, this map is
* returned.
*
* @param the map type
* @param the key type
* @param the value type
* @param name the name of the map
* @param builder the map builder
* @return the map
*/
public , K, V> M openMap(String name, MVMap.MapBuilder builder) {
int id = getMapId(name);
M map;
if (id >= 0) {
map = openMap(id, builder);
assert builder.getKeyType() == null || map.getKeyType().getClass().equals(builder.getKeyType().getClass());
assert builder.getValueType() == null || map.getValueType().getClass().equals(builder.getValueType()
.getClass());
} else {
HashMap c = new HashMap<>();
id = lastMapId.incrementAndGet();
assert getMap(id) == null;
c.put("id", id);
c.put("createVersion", currentVersion);
map = builder.create(this, c);
String x = Integer.toHexString(id);
meta.put(MVMap.getMapKey(id), map.asString(name));
meta.put("name." + name, x);
map.setRootPos(0, lastStoredVersion);
markMetaChanged();
@SuppressWarnings("unchecked")
M existingMap = (M) maps.putIfAbsent(id, map);
if (existingMap != null) {
map = existingMap;
}
}
return map;
}
private , K, V> M openMap(int id, MVMap.MapBuilder builder) {
storeLock.lock();
try {
@SuppressWarnings("unchecked")
M map = (M) getMap(id);
if (map == null) {
String configAsString = meta.get(MVMap.getMapKey(id));
HashMap config;
if (configAsString != null) {
config = new HashMap(DataUtils.parseMap(configAsString));
} else {
config = new HashMap<>();
}
config.put("id", id);
map = builder.create(this, config);
long root = getRootPos(meta, id);
map.setRootPos(root, lastStoredVersion);
maps.put(id, map);
}
return map;
} finally {
storeLock.unlock();
}
}
/**
* Get map by id.
*
* @param the key type
* @param the value type
* @param id map id
* @return Map
*/
public MVMap getMap(int id) {
checkOpen();
@SuppressWarnings("unchecked")
MVMap map = (MVMap) maps.get(id);
return map;
}
/**
* Get the set of all map names.
*
* @return the set of names
*/
public Set getMapNames() {
HashSet set = new HashSet<>();
checkOpen();
for (Iterator it = meta.keyIterator("name."); it.hasNext();) {
String x = it.next();
if (!x.startsWith("name.")) {
break;
}
String mapName = x.substring("name.".length());
set.add(mapName);
}
return set;
}
/**
* Get the metadata map. This data is for informational purposes only. The
* data is subject to change in future versions.
*
* The data in this map should not be modified (changing system data may
* corrupt the store). If modifications are needed, they need be
* synchronized on the store.
*
* The metadata map contains the following entries:
*
* chunk.{chunkId} = {chunk metadata}
* name.{name} = {mapId}
* map.{mapId} = {map metadata}
* root.{mapId} = {root position}
* setting.storeVersion = {version}
*
*
* @return the metadata map
*/
public MVMap getMetaMap() {
checkOpen();
return meta;
}
private MVMap getMetaMap(long version) {
Chunk c = getChunkForVersion(version);
DataUtils.checkArgument(c != null, "Unknown version {0}", version);
c = readChunkHeader(c.block);
MVMap oldMeta = meta.openReadOnly(c.metaRootPos, version);
return oldMeta;
}
private Chunk getChunkForVersion(long version) {
Chunk newest = null;
for (Chunk c : chunks.values()) {
if (c.version <= version) {
if (newest == null || c.id > newest.id) {
newest = c;
}
}
}
return newest;
}
/**
* Check whether a given map exists.
*
* @param name the map name
* @return true if it exists
*/
public boolean hasMap(String name) {
return meta.containsKey("name." + name);
}
/**
* Check whether a given map exists and has data.
*
* @param name the map name
* @return true if it exists and has data.
*/
public boolean hasData(String name) {
return hasMap(name) && getRootPos(meta, getMapId(name)) != 0;
}
private void markMetaChanged() {
// changes in the metadata alone are usually not detected, as the meta
// map is changed after storing
metaChanged = true;
}
private void readStoreHeader() {
Chunk newest = null;
boolean validStoreHeader = false;
// find out which chunk and version are the newest
// read the first two blocks
ByteBuffer fileHeaderBlocks = fileStore.readFully(0, 2 * BLOCK_SIZE);
byte[] buff = new byte[BLOCK_SIZE];
for (int i = 0; i <= BLOCK_SIZE; i += BLOCK_SIZE) {
fileHeaderBlocks.get(buff);
// the following can fail for various reasons
try {
HashMap m = DataUtils.parseChecksummedMap(buff);
if (m == null) {
continue;
}
int blockSize = DataUtils.readHexInt(
m, "blockSize", BLOCK_SIZE);
if (blockSize != BLOCK_SIZE) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_UNSUPPORTED_FORMAT,
"Block size {0} is currently not supported",
blockSize);
}
long version = DataUtils.readHexLong(m, "version", 0);
if (newest == null || version > newest.version) {
validStoreHeader = true;
storeHeader.putAll(m);
creationTime = DataUtils.readHexLong(m, "created", 0);
int chunkId = DataUtils.readHexInt(m, "chunk", 0);
long block = DataUtils.readHexLong(m, "block", 0);
Chunk test = readChunkHeaderAndFooter(block);
if (test != null && test.id == chunkId) {
newest = test;
}
}
} catch (Exception ignore) {/**/}
}
if (!validStoreHeader) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"Store header is corrupt: {0}", fileStore);
}
long format = DataUtils.readHexLong(storeHeader, "format", 1);
if (format > FORMAT_WRITE && !fileStore.isReadOnly()) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_UNSUPPORTED_FORMAT,
"The write format {0} is larger " +
"than the supported format {1}, " +
"and the file was not opened in read-only mode",
format, FORMAT_WRITE);
}
format = DataUtils.readHexLong(storeHeader, "formatRead", format);
if (format > FORMAT_READ) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_UNSUPPORTED_FORMAT,
"The read format {0} is larger " +
"than the supported format {1}",
format, FORMAT_READ);
}
lastStoredVersion = INITIAL_VERSION;
chunks.clear();
long now = System.currentTimeMillis();
// calculate the year (doesn't have to be exact;
// we assume 365.25 days per year, * 4 = 1461)
int year = 1970 + (int) (now / (1000L * 60 * 60 * 6 * 1461));
if (year < 2014) {
// if the year is before 2014,
// we assume the system doesn't have a real-time clock,
// and we set the creationTime to the past, so that
// existing chunks are overwritten
creationTime = now - fileStore.getDefaultRetentionTime();
} else if (now < creationTime) {
// the system time was set to the past:
// we change the creation time
creationTime = now;
storeHeader.put("created", creationTime);
}
Chunk test = readChunkFooter(fileStore.size());
if (test != null) {
test = readChunkHeaderAndFooter(test.block);
if (test != null) {
if (newest == null || test.version > newest.version) {
newest = test;
}
}
}
long blocksInStore = fileStore.size() / BLOCK_SIZE;
// this queue will hold potential candidates for lastChunk to fall back to
Queue lastChunkCandidates = new PriorityQueue<>(Math.max(32, (int)(blocksInStore / 4)),
new Comparator() {
@Override
public int compare(Chunk one, Chunk two) {
int result = Long.compare(two.version, one.version);
if (result == 0) {
// out of two versions of the same chunk we prefer the one
// close to the beginning of file (presumably later version)
result = Long.compare(one.block, two.block);
}
return result;
}
});
Map validChunkCacheByLocation = new HashMap<>();
if (newest != null) {
// read the chunk header and footer,
// and follow the chain of next chunks
while (true) {
validChunkCacheByLocation.put(newest.block, newest);
lastChunkCandidates.add(newest);
if (newest.next == 0 ||
newest.next >= blocksInStore) {
// no (valid) next
break;
}
test = readChunkHeaderAndFooter(newest.next);
if (test == null || test.id <= newest.id) {
break;
}
newest = test;
}
}
// Try candidates for "last chunk" in order from newest to oldest
// until suitable is found. Suitable one should have meta map
// where all chunk references point to valid locations.
boolean verified = false;
while(!verified && setLastChunk(lastChunkCandidates.poll()) != null) {
verified = true;
// load the chunk metadata: although meta's root page resides in the lastChunk,
// traversing meta map might recursively load another chunk(s)
Cursor cursor = meta.cursor("chunk.");
while (cursor.hasNext() && cursor.next().startsWith("chunk.")) {
Chunk c = Chunk.fromString(cursor.getValue());
assert c.version <= currentVersion;
// might be there already, due to meta traversal
// see readPage() ... getChunkIfFound()
chunks.putIfAbsent(c.id, c);
long block = c.block;
test = validChunkCacheByLocation.get(block);
if (test == null) {
test = readChunkHeaderAndFooter(block);
if (test != null && test.id == c.id) { // chunk is valid
validChunkCacheByLocation.put(block, test);
lastChunkCandidates.offer(test);
continue;
}
} else if (test.id == c.id) { // chunk is valid
// nothing to do, since chunk was already verified
// and registered as potential "last chunk" candidate
continue;
}
// chunk reference is invalid
// this "last chunk" candidate is not suitable
// but we continue to process all references
// to find other potential candidates
verified = false;
}
}
fileStore.clear();
// build the free space list
for (Chunk c : chunks.values()) {
long start = c.block * BLOCK_SIZE;
int length = c.len * BLOCK_SIZE;
fileStore.markUsed(start, length);
}
assert fileStore.getFileLengthInUse() == measureFileLengthInUse() :
fileStore.getFileLengthInUse() + " != " + measureFileLengthInUse();
setWriteVersion(currentVersion);
if (lastStoredVersion == INITIAL_VERSION) {
lastStoredVersion = currentVersion - 1;
}
}
private Chunk setLastChunk(Chunk last) {
chunks.clear();
lastChunk = last;
if (last == null) {
// no valid chunk
lastMapId.set(0);
currentVersion = 0;
lastStoredVersion = INITIAL_VERSION;
meta.setRootPos(0, INITIAL_VERSION);
} else {
lastMapId.set(last.mapId);
currentVersion = last.version;
chunks.put(last.id, last);
lastStoredVersion = currentVersion - 1;
meta.setRootPos(last.metaRootPos, lastStoredVersion);
}
return last;
}
/**
* Read a chunk header and footer, and verify the stored data is consistent.
*
* @param block the block
* @return the chunk, or null if the header or footer don't match or are not
* consistent
*/
private Chunk readChunkHeaderAndFooter(long block) {
Chunk header;
try {
header = readChunkHeader(block);
} catch (Exception e) {
// invalid chunk header: ignore, but stop
return null;
}
if (header == null) {
return null;
}
Chunk footer = readChunkFooter((block + header.len) * BLOCK_SIZE);
if (footer == null || footer.id != header.id) {
return null;
}
return header;
}
/**
* Try to read a chunk footer.
*
* @param end the end of the chunk
* @return the chunk, or null if not successful
*/
private Chunk readChunkFooter(long end) {
// the following can fail for various reasons
try {
// read the chunk footer of the last block of the file
long pos = end - Chunk.FOOTER_LENGTH;
if(pos < 0) {
return null;
}
ByteBuffer lastBlock = fileStore.readFully(pos, Chunk.FOOTER_LENGTH);
byte[] buff = new byte[Chunk.FOOTER_LENGTH];
lastBlock.get(buff);
HashMap m = DataUtils.parseChecksummedMap(buff);
if (m != null) {
int chunk = DataUtils.readHexInt(m, "chunk", 0);
Chunk c = new Chunk(chunk);
c.version = DataUtils.readHexLong(m, "version", 0);
c.block = DataUtils.readHexLong(m, "block", 0);
return c;
}
} catch (Exception e) {
// ignore
}
return null;
}
private void writeStoreHeader() {
StringBuilder buff = new StringBuilder(112);
if (lastChunk != null) {
storeHeader.put("block", lastChunk.block);
storeHeader.put("chunk", lastChunk.id);
storeHeader.put("version", lastChunk.version);
}
DataUtils.appendMap(buff, storeHeader);
byte[] bytes = buff.toString().getBytes(StandardCharsets.ISO_8859_1);
int checksum = DataUtils.getFletcher32(bytes, 0, bytes.length);
DataUtils.appendMap(buff, "fletcher", checksum);
buff.append('\n');
bytes = buff.toString().getBytes(StandardCharsets.ISO_8859_1);
ByteBuffer header = ByteBuffer.allocate(2 * BLOCK_SIZE);
header.put(bytes);
header.position(BLOCK_SIZE);
header.put(bytes);
header.rewind();
write(0, header);
}
private void write(long pos, ByteBuffer buffer) {
try {
fileStore.writeFully(pos, buffer);
} catch (IllegalStateException e) {
panic(e);
}
}
/**
* Close the file and the store. Unsaved changes are written to disk first.
*/
@Override
public void close() {
closeStore(true);
}
/**
* Close the file and the store, without writing anything.
* This will try to stop the background thread (without waiting for it).
* This method ignores all errors.
*/
public void closeImmediately() {
try {
closeStore(false);
} catch (Throwable e) {
handleException(e);
}
}
private void closeStore(boolean normalShutdown) {
// If any other thead have already initiated closure procedure,
// isClosed() would wait until closure is done and then we jump out of the loop.
// This is a subtle difference between !isClosed() and isOpen().
while (!isClosed()) {
stopBackgroundThread(normalShutdown);
storeLock.lock();
try {
if (state == STATE_OPEN) {
state = STATE_STOPPING;
try {
try {
if (normalShutdown && fileStore != null && !fileStore.isReadOnly()) {
for (MVMap, ?> map : maps.values()) {
if (map.isClosed()) {
if (meta.remove(MVMap.getMapRootKey(map.getId())) != null) {
markMetaChanged();
}
}
}
commit();
shrinkFileIfPossible(0);
}
state = STATE_CLOSING;
// release memory early - this is important when called
// because of out of memory
if (cache != null) {
cache.clear();
}
if (cacheChunkRef != null) {
cacheChunkRef.clear();
}
for (MVMap, ?> m : new ArrayList<>(maps.values())) {
m.close();
}
chunks.clear();
maps.clear();
} finally {
if (fileStore != null && !fileStoreIsProvided) {
fileStore.close();
}
}
} finally {
state = STATE_CLOSED;
}
}
} finally {
storeLock.unlock();
}
}
}
/**
* Read a page of data into a ByteBuffer.
*
* @param pos page pos
* @param expectedMapId expected map id for the page
* @return ByteBuffer containing page data.
*/
ByteBuffer readBufferForPage(long pos, int expectedMapId) {
Chunk c = getChunk(pos);
long filePos = c.block * BLOCK_SIZE;
filePos += DataUtils.getPageOffset(pos);
if (filePos < 0) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"Negative position {0}; p={1}, c={2}", filePos, pos, c.toString());
}
long maxPos = (c.block + c.len) * BLOCK_SIZE;
ByteBuffer buff;
int maxLength = DataUtils.getPageMaxLength(pos);
if (maxLength == DataUtils.PAGE_LARGE) {
buff = fileStore.readFully(filePos, 128);
maxLength = buff.getInt();
// read the first bytes again
}
maxLength = (int) Math.min(maxPos - filePos, maxLength);
int length = maxLength;
if (length < 0) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_FILE_CORRUPT,
"Illegal page length {0} reading at {1}; max pos {2} ", length, filePos, maxPos);
}
buff = fileStore.readFully(filePos, length);
int chunkId = DataUtils.getPageChunkId(pos);
int offset = DataUtils.getPageOffset(pos);
int start = buff.position();
int remaining = buff.remaining();
int pageLength = buff.getInt();
if (pageLength > remaining || pageLength < 4) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected page length 4..{1}, got {2}", chunkId, remaining,
pageLength);
}
buff.limit(start + pageLength);
short check = buff.getShort();
int mapId = DataUtils.readVarInt(buff);
if (mapId != expectedMapId) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected map id {1}, got {2}", chunkId, expectedMapId, mapId);
}
int checkTest = DataUtils.getCheckValue(chunkId)
^ DataUtils.getCheckValue(offset)
^ DataUtils.getCheckValue(pageLength);
if (check != (short) checkTest) {
throw DataUtils.newIllegalStateException(DataUtils.ERROR_FILE_CORRUPT,
"File corrupted in chunk {0}, expected check value {1}, got {2}", chunkId, checkTest, check);
}
return buff;
}
/**
* Get the chunk for the given position.
*
* @param pos the position
* @return the chunk
*/
private Chunk getChunk(long pos) {
int chunkId = DataUtils.getPageChunkId(pos);
Chunk c = chunks.get(chunkId);
if (c == null) {
checkOpen();
String s = meta.get(Chunk.getMetaKey(chunkId));
if (s == null) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_CHUNK_NOT_FOUND,
"Chunk {0} not found", chunkId);
}
c = Chunk.fromString(s);
if (c.block == Long.MAX_VALUE) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_FILE_CORRUPT,
"Chunk {0} is invalid", chunkId);
}
chunks.put(c.id, c);
}
return c;
}
private void setWriteVersion(long version) {
for (Iterator> iter = maps.values().iterator(); iter.hasNext(); ) {
MVMap, ?> map = iter.next();
if (map.setWriteVersion(version) == null) {
assert map.isClosed();
assert map.getVersion() < getOldestVersionToKeep();
meta.remove(MVMap.getMapRootKey(map.getId()));
markMetaChanged();
iter.remove();
}
}
meta.setWriteVersion(version);
onVersionChange(version);
}
/**
* Unlike regular commit this method returns immediately if there is commit
* in progress on another thread, otherwise it acts as regular commit.
*
* This method may return BEFORE this thread changes are actually persisted!
*
* @return the new version (incremented if there were changes)
*/
public long tryCommit() {
// we need to prevent re-entrance, which may be possible,
// because meta map is modified within storeNow() and that
// causes beforeWrite() call with possibility of going back here
if ((!storeLock.isHeldByCurrentThread() || currentStoreVersion < 0) &&
storeLock.tryLock()) {
try {
store();
} finally {
storeLock.unlock();
}
}
return currentVersion;
}
/**
* Commit the changes.
*
* This method does nothing if there are no unsaved changes,
* otherwise it increments the current version
* and stores the data (for file based stores).
*
* It is not necessary to call this method when auto-commit is enabled (the default
* setting), as in this case it is automatically called from time to time or
* when enough changes have accumulated. However, it may still be called to
* flush all changes to disk.
*
* At most one store operation may run at any time.
*
* @return the new version (incremented if there were changes)
*/
public long commit() {
// we need to prevent re-entrance, which may be possible,
// because meta map is modified within storeNow() and that
// causes beforeWrite() call with possibility of going back here
if(!storeLock.isHeldByCurrentThread() || currentStoreVersion < 0) {
storeLock.lock();
try {
store();
} finally {
storeLock.unlock();
}
}
return currentVersion;
}
private void store() {
try {
if (isOpenOrStopping() && hasUnsavedChangesInternal()) {
currentStoreVersion = currentVersion;
if (fileStore == null) {
lastStoredVersion = currentVersion;
//noinspection NonAtomicOperationOnVolatileField
++currentVersion;
setWriteVersion(currentVersion);
metaChanged = false;
} else {
if (fileStore.isReadOnly()) {
throw DataUtils.newIllegalStateException(
DataUtils.ERROR_WRITING_FAILED, "This store is read-only");
}
try {
storeNow();
} catch (IllegalStateException e) {
panic(e);
} catch (Throwable e) {
panic(DataUtils.newIllegalStateException(DataUtils.ERROR_INTERNAL, e.toString(), e));
}
}
}
} finally {
// in any case reset the current store version,
// to allow closing the store
currentStoreVersion = -1;
}
}
private void storeNow() {
assert storeLock.isHeldByCurrentThread();
long time = getTimeSinceCreation();
freeUnusedIfNeeded(time);
int currentUnsavedPageCount = unsavedMemory;
long storeVersion = currentStoreVersion;
long version = ++currentVersion;
lastCommitTime = time;
// the metadata of the last chunk was not stored so far, and needs to be
// set now (it's better not to update right after storing, because that
// would modify the meta map again)
int lastChunkId;
if (lastChunk == null) {
lastChunkId = 0;
} else {
lastChunkId = lastChunk.id;
meta.put(Chunk.getMetaKey(lastChunkId), lastChunk.asString());
markMetaChanged();
// never go backward in time
time = Math.max(lastChunk.time, time);
}
int newChunkId = lastChunkId;
while (true) {
newChunkId = (newChunkId + 1) & Chunk.MAX_ID;
Chunk old = chunks.get(newChunkId);
if (old == null) {
break;
}
if (old.block == Long.MAX_VALUE) {
IllegalStateException e = DataUtils.newIllegalStateException(
DataUtils.ERROR_INTERNAL,
"Last block {0} not stored, possibly due to out-of-memory", old);
panic(e);
}
}
Chunk c = new Chunk(newChunkId);
c.pageCount = Integer.MAX_VALUE;
c.pageCountLive = Integer.MAX_VALUE;
c.maxLen = Long.MAX_VALUE;
c.maxLenLive = Long.MAX_VALUE;
c.metaRootPos = Long.MAX_VALUE;
c.block = Long.MAX_VALUE;
c.len = Integer.MAX_VALUE;
c.time = time;
c.version = version;
c.mapId = lastMapId.get();
c.next = Long.MAX_VALUE;
chunks.put(c.id, c);
ArrayList changed = new ArrayList<>();
for (Iterator> iter = maps.values().iterator(); iter.hasNext(); ) {
MVMap, ?> map = iter.next();
RootReference rootReference = map.setWriteVersion(version);
if (rootReference == null) {
assert map.isClosed();
assert map.getVersion() < getOldestVersionToKeep();
meta.remove(MVMap.getMapRootKey(map.getId()));
iter.remove();
} else if (map.getCreateVersion() <= storeVersion && // if map was created after storing started, skip it
!map.isVolatile() &&
map.hasChangesSince(lastStoredVersion)) {
assert rootReference.version <= version : rootReference.version + " > " + version;
Page rootPage = rootReference.root;
if (!rootPage.isSaved() ||
// after deletion previously saved leaf
// may pop up as a root, but we still need
// to save new root pos in meta
rootPage.isLeaf()) {
changed.add(rootPage);
}
}
}
WriteBuffer buff = getWriteBuffer();
// need to patch the header later
c.writeChunkHeader(buff, 0);
int headerLength = buff.position();
c.pageCount = 0;
c.pageCountLive = 0;
c.maxLen = 0;
c.maxLenLive = 0;
for (Page p : changed) {
String key = MVMap.getMapRootKey(p.getMapId());
if (p.getTotalCount() == 0) {
meta.remove(key);
} else {
p.writeUnsavedRecursive(c, buff);
long root = p.getPos();
meta.put(key, Long.toHexString(root));
}
}
applyFreedSpace();
RootReference metaRootReference = meta.setWriteVersion(version);
assert metaRootReference != null;
assert metaRootReference.version == version : metaRootReference.version + " != " + version;
metaChanged = false;
onVersionChange(version);
Page metaRoot = metaRootReference.root;
metaRoot.writeUnsavedRecursive(c, buff);
int chunkLength = buff.position();
// add the store header and round to the next block
int length = MathUtils.roundUpInt(chunkLength +
Chunk.FOOTER_LENGTH, BLOCK_SIZE);
buff.limit(length);
long filePos = allocateFileSpace(length, !reuseSpace);
c.block = filePos / BLOCK_SIZE;
c.len = length / BLOCK_SIZE;
assert fileStore.getFileLengthInUse() == measureFileLengthInUse() :
fileStore.getFileLengthInUse() + " != " + measureFileLengthInUse() + " " + c;
c.metaRootPos = metaRoot.getPos();
// calculate and set the likely next position
if (reuseSpace) {
c.next = fileStore.predictAllocation(c.len * BLOCK_SIZE) / BLOCK_SIZE;
} else {
// just after this chunk
c.next = 0;
}
buff.position(0);
c.writeChunkHeader(buff, headerLength);
buff.position(buff.limit() - Chunk.FOOTER_LENGTH);
buff.put(c.getFooterBytes());
buff.position(0);
write(filePos, buff.getBuffer());
releaseWriteBuffer(buff);
// whether we need to write the store header
boolean writeStoreHeader = false;
// end of the used space is not necessarily the end of the file
boolean storeAtEndOfFile = filePos + length >= fileStore.size();
if (!storeAtEndOfFile) {
if (lastChunk == null) {
writeStoreHeader = true;
} else if (lastChunk.next != c.block) {
// the last prediction did not matched
writeStoreHeader = true;
} else {
long headerVersion = DataUtils.readHexLong(
storeHeader, "version", 0);
if (lastChunk.version - headerVersion > 20) {
// we write after at least every 20 versions
writeStoreHeader = true;
} else {
int chunkId = DataUtils.readHexInt(storeHeader, "chunk", 0);
while (true) {
Chunk old = chunks.get(chunkId);
if (old == null) {
// one of the chunks in between
// was removed
writeStoreHeader = true;
break;
}
if (chunkId == lastChunk.id) {
break;
}
chunkId++;
}
}
}
}
lastChunk = c;
if (writeStoreHeader) {
writeStoreHeader();
}
if (!storeAtEndOfFile) {
// may only shrink after the store header was written
shrinkFileIfPossible(1);
}
for (Page p : changed) {
p.writeEnd();
}
metaRoot.writeEnd();
// some pages might have been changed in the meantime (in the newest
// version)
unsavedMemory = Math.max(0, unsavedMemory
- currentUnsavedPageCount);
lastStoredVersion = storeVersion;
}
/**
* Try to free unused chunks. This method doesn't directly write, but can
* change the metadata, and therefore cause a background write.
*/
private void freeUnusedIfNeeded(long time) {
int freeDelay = retentionTime / 5;
if (time - lastFreeUnusedChunks >= freeDelay) {
// set early in case it fails (out of memory or so)
lastFreeUnusedChunks = time;
freeUnusedChunks(true);
}
}
private void freeUnusedChunks(boolean fast) {
assert storeLock.isHeldByCurrentThread();
if (lastChunk != null && reuseSpace) {
Set referenced = collectReferencedChunks(fast);
long time = getTimeSinceCreation();
for (Iterator iterator = chunks.values().iterator(); iterator.hasNext(); ) {
Chunk c = iterator.next();
if (c.block != Long.MAX_VALUE && !referenced.contains(c.id)) {
if (canOverwriteChunk(c, time)) {
iterator.remove();
if (meta.remove(Chunk.getMetaKey(c.id)) != null) {
markMetaChanged();
}
long start = c.block * BLOCK_SIZE;
int length = c.len * BLOCK_SIZE;
fileStore.free(start, length);
assert fileStore.getFileLengthInUse() == measureFileLengthInUse() :
fileStore.getFileLengthInUse() + " != " + measureFileLengthInUse();
} else {
if (c.unused == 0) {
c.unused = time;
meta.put(Chunk.getMetaKey(c.id), c.asString());
markMetaChanged();
}
}
}
}
// set it here, to avoid calling it often if it was slow
lastFreeUnusedChunks = getTimeSinceCreation();
}
}
/**
* Collect ids for chunks that are in use.
* @param fast if true, simplified version is used, which assumes that recent chunks
* are still in-use and do not scan recent versions of the store.
* Also is this case only oldest available version of the store is scanned.
* @return set of chunk ids in-use, or null if all chunks should be considered in-use
*/
private Set collectReferencedChunks(boolean fast) {
assert lastChunk != null;
final ThreadPoolExecutor executorService = new ThreadPoolExecutor(10, 10, 10L, TimeUnit.SECONDS,
new ArrayBlockingQueue(keysPerPage + 1));
final AtomicInteger executingThreadCounter = new AtomicInteger();
try {
ChunkIdsCollector collector = new ChunkIdsCollector(meta.getId());
long oldestVersionToKeep = getOldestVersionToKeep();
RootReference rootReference = meta.flushAndGetRoot();
if (fast) {
RootReference previous;
while (rootReference.version >= oldestVersionToKeep && (previous = rootReference.previous) != null) {
rootReference = previous;
}
inspectVersion(rootReference, collector, executorService, executingThreadCounter, null);
Page rootPage = rootReference.root;
long pos = rootPage.getPos();
assert rootPage.isSaved();
int chunkId = DataUtils.getPageChunkId(pos);
while (++chunkId <= lastChunk.id) {
collector.registerChunk(chunkId);
}
} else {
Set inspectedRoots = new HashSet<>();
do {
inspectVersion(rootReference, collector, executorService, executingThreadCounter, inspectedRoots);
} while (rootReference.version >= oldestVersionToKeep
&& (rootReference = rootReference.previous) != null);
}
return collector.getReferenced();
} finally {
executorService.shutdownNow();
}
}
/**
* Scans all map of a particular store version and marks visited chunks as in-use.
* @param rootReference of the meta map of the version
* @param collector to report visited chunks to
* @param executorService to use for parallel processing
* @param executingThreadCounter counter for threads already in use
* @param inspectedRoots set of page positions for map's roots already inspected
* or null if not to be used
*/
private void inspectVersion(RootReference rootReference, ChunkIdsCollector collector,
ThreadPoolExecutor executorService,
AtomicInteger executingThreadCounter,
Set inspectedRoots) {
Page rootPage = rootReference.root;
long pos = rootPage.getPos();
if (rootPage.isSaved()) {
if (inspectedRoots != null && !inspectedRoots.add(pos)) {
return;
}
collector.setMapId(meta.getId());
collector.visit(pos, executorService, executingThreadCounter);
}
for (Cursor c = new Cursor<>(rootPage, "root."); c.hasNext(); ) {
String key = c.next();
if (!key.startsWith("root.")) {
break;
}
pos = DataUtils.parseHexLong(c.getValue());
if (DataUtils.isPageSaved(pos)) {
if (inspectedRoots == null || inspectedRoots.add(pos)) {
// to allow for something like "root.tmp.123" to be processed
int mapId = DataUtils.parseHexInt(key.substring(key.lastIndexOf('.') + 1));
collector.setMapId(mapId);
collector.visit(pos, executorService, executingThreadCounter);
}
}
}
}
final class ChunkIdsCollector {
/** really a set */
private final ConcurrentHashMap referencedChunks = new ConcurrentHashMap<>();
private final ChunkIdsCollector parent;
private int mapId;
ChunkIdsCollector(int mapId) {
this.parent = null;
this.mapId = mapId;
}
private ChunkIdsCollector(ChunkIdsCollector parent) {
this.parent = parent;
this.mapId = parent.mapId;
}
public int getMapId() {
return mapId;
}
public void setMapId(int mapId) {
this.mapId = mapId;
}
public Set getReferenced() {
return new HashSet<>(referencedChunks.keySet());
}
/**
* Visit a page on a chunk and collect ids for it and its children.
*
* @param page the page to visit
* @param executorService the service to use when doing visit in parallel
* @param executingThreadCounter number of threads currently active
*/
public void visit(Page page, ThreadPoolExecutor executorService, AtomicInteger executingThreadCounter) {
long pos = page.getPos();
if (DataUtils.isPageSaved(pos)) {
registerChunk(DataUtils.getPageChunkId(pos));
}
int count = page.map.getChildPageCount(page);
if (count == 0) {
return;
}
ChunkIdsCollector childCollector = DataUtils.isPageSaved(pos) && cacheChunkRef != null ?
new ChunkIdsCollector(this) : this;
for (int i = 0; i < count; i++) {
Page childPage = page.getChildPageIfLoaded(i);
if (childPage != null) {
childCollector.visit(childPage, executorService, executingThreadCounter);
} else {
childCollector.visit(page.getChildPagePos(i), executorService, executingThreadCounter);
}
}
cacheCollectedChunkIds(pos, childCollector);
}
/**
* Visit a page on a chunk and collect ids for it and its children.
*
* @param pos position of the page to visit
* @param executorService the service to use when doing visit in parallel
* @param executingThreadCounter number of threads currently active
*/
public void visit(long pos, ThreadPoolExecutor executorService, AtomicInteger executingThreadCounter) {
if (!DataUtils.isPageSaved(pos)) {
return;
}
registerChunk(DataUtils.getPageChunkId(pos));
if (DataUtils.getPageType(pos) == DataUtils.PAGE_TYPE_LEAF) {
return;
}
int[] chunkIds;
if (cacheChunkRef != null && (chunkIds = cacheChunkRef.get(pos)) != null) {
// there is a cached set of chunk ids for this position
for (int chunkId : chunkIds) {
registerChunk(chunkId);
}
} else {
ChunkIdsCollector childCollector = cacheChunkRef != null ? new ChunkIdsCollector(this) : this;
Page page;
if (cache != null && (page = cache.get(pos)) != null) {
// there is a full page in cache, use it
childCollector.visit(page, executorService, executingThreadCounter);
} else {
// page was not cached: read the data
ByteBuffer buff = readBufferForPage(pos, getMapId());
Page.readChildrenPositions(buff, pos, childCollector, executorService, executingThreadCounter);
}
cacheCollectedChunkIds(pos, childCollector);
}
}
/**
* Add chunk to list of referenced chunks.
*
* @param chunkId chunk id
*/
void registerChunk(int chunkId) {
if (referencedChunks.put(chunkId, 1) == null && parent != null) {
parent.registerChunk(chunkId);
}
}
private void cacheCollectedChunkIds(long pos, ChunkIdsCollector childCollector) {
if (childCollector != this) {
int[] chunkIds = new int[childCollector.referencedChunks.size()];
int index = 0;
for (Integer chunkId : childCollector.referencedChunks.keySet()) {
chunkIds[index++] = chunkId;
}
cacheChunkRef.put(pos, chunkIds, Constants.MEMORY_ARRAY + 4 * chunkIds.length);
}
}
}
/**
* Get a buffer for writing. This caller must synchronize on the store
* before calling the method and until after using the buffer.
*
* @return the buffer
*/
private WriteBuffer getWriteBuffer() {
WriteBuffer buff;
if (writeBuffer != null) {
buff = writeBuffer;
buff.clear();
} else {
buff = new WriteBuffer();
}
return buff;
}
/**
* Release a buffer for writing. This caller must synchronize on the store
* before calling the method and until after using the buffer.
*
* @param buff the buffer than can be re-used
*/
private void releaseWriteBuffer(WriteBuffer buff) {
if (buff.capacity() <= 4 * 1024 * 1024) {
writeBuffer = buff;
}
}
private boolean canOverwriteChunk(Chunk c, long time) {
if (retentionTime >= 0) {
if (c.time + retentionTime > time) {
return false;
}
if (c.unused == 0 || c.unused + retentionTime / 2 > time) {
return false;
}
}
return true;
}
private long getTimeSinceCreation() {
return Math.max(0, getTimeAbsolute() - creationTime);
}
private long getTimeAbsolute() {
long now = System.currentTimeMillis();
if (lastTimeAbsolute != 0 && now < lastTimeAbsolute) {
// time seems to have run backwards - this can happen
// when the system time is adjusted, for example
// on a leap second
now = lastTimeAbsolute;
} else {
lastTimeAbsolute = now;
}
return now;
}
/**
* Apply the freed space to the chunk metadata. The metadata is updated, but
* completely free chunks are not removed from the set of chunks, and the
* disk space is not yet marked as free.
*/
private void applyFreedSpace() {
while (true) {
ArrayList modified = new ArrayList<>();
synchronized (freedPageSpace) {
for (Chunk f : freedPageSpace.values()) {
Chunk c = chunks.get(f.id);
if (c != null) { // skip if was already removed
c.maxLenLive += f.maxLenLive;
c.pageCountLive += f.pageCountLive;
if (c.pageCountLive < 0 && c.pageCountLive > -MARKED_FREE) {
// can happen after a rollback
c.pageCountLive = 0;
}
if (c.maxLenLive < 0 && c.maxLenLive > -MARKED_FREE) {
// can happen after a rollback
c.maxLenLive = 0;
}
modified.add(c);
}
}
freedPageSpace.clear();
}
if (modified.isEmpty()) {
break;
}
for (Chunk c : modified) {
meta.put(Chunk.getMetaKey(c.id), c.asString());
}
markMetaChanged();
}
}
/**
* Shrink the file if possible, and if at least a given percentage can be
* saved.
*
* @param minPercent the minimum percentage to save
*/
private void shrinkFileIfPossible(int minPercent) {
if (fileStore.isReadOnly()) {
return;
}
long end = getFileLengthInUse();
long fileSize = fileStore.size();
if (end >= fileSize) {
return;
}
if (minPercent > 0 && fileSize - end < BLOCK_SIZE) {
return;
}
int savedPercent = (int) (100 - (end * 100 / fileSize));
if (savedPercent < minPercent) {
return;
}
if (isOpenOrStopping()) {
sync();
}
fileStore.truncate(end);
}
/**
* Get the position right after the last used byte.
*
* @return the position
*/
private long getFileLengthInUse() {
long result = fileStore.getFileLengthInUse();
assert result == measureFileLengthInUse() : result + " != " + measureFileLengthInUse();
return result;
}
private long measureFileLengthInUse() {
long size = 2;
for (Chunk c : chunks.values()) {
if (c.len != Integer.MAX_VALUE) {
size = Math.max(size, c.block + c.len);
}
}
return size * BLOCK_SIZE;
}
/**
* Check whether there are any unsaved changes.
*
* @return if there are any changes
*/
public boolean hasUnsavedChanges() {
if (metaChanged) {
return true;
}
for (MVMap, ?> m : maps.values()) {
if (!m.isClosed()) {
if(m.hasChangesSince(lastStoredVersion)) {
return true;
}
}
}
return false;
}
private boolean hasUnsavedChangesInternal() {
if (meta.hasChangesSince(lastStoredVersion)) {
return true;
}
return hasUnsavedChanges();
}
private Chunk readChunkHeader(long block) {
long p = block * BLOCK_SIZE;
ByteBuffer buff = fileStore.readFully(p, Chunk.MAX_HEADER_LENGTH);
return Chunk.readChunkHeader(buff, p);
}
/**
* Compact the store by moving all live pages to new chunks.
*
* @return if anything was written
*/
public boolean compactRewriteFully() {
storeLock.lock();
try {
checkOpen();
if (lastChunk == null) {
// nothing to do
return false;
}
for (MVMap, ?> m : maps.values()) {
@SuppressWarnings("unchecked")
MVMap