All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.arcadedb.index.lsm.LSMTreeIndexAbstract Maven / Gradle / Ivy

/*
 * Copyright © 2021-present Arcade Data Ltd ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: 2021-present Arcade Data Ltd ([email protected])
 * SPDX-License-Identifier: Apache-2.0
 */
package com.arcadedb.index.lsm;

import com.arcadedb.database.Binary;
import com.arcadedb.database.DatabaseFactory;
import com.arcadedb.database.DatabaseInternal;
import com.arcadedb.database.RID;
import com.arcadedb.engine.BasePage;
import com.arcadedb.engine.MutablePage;
import com.arcadedb.engine.PaginatedComponent;
import com.arcadedb.engine.PaginatedFile;
import com.arcadedb.index.IndexCursorEntry;
import com.arcadedb.index.IndexException;
import com.arcadedb.log.LogManager;
import com.arcadedb.schema.Type;
import com.arcadedb.serializer.BinaryComparator;
import com.arcadedb.serializer.BinarySerializer;
import com.arcadedb.serializer.BinaryTypes;

import java.io.*;
import java.util.*;
import java.util.logging.*;

import static com.arcadedb.database.Binary.BYTE_SERIALIZED_SIZE;
import static com.arcadedb.database.Binary.INT_SERIALIZED_SIZE;

/**
 * Abstract class for LSM-based indexes. The first page contains 2 bytes to store key and value types. The pages are populated from the head of the page
 * with the pointers to the pair key/value that starts from the tail. A page is full when there is no space between the head
 * (key pointers) and the tail (key/value pairs).
 * 
* When a page is full, another page is created, waiting for a compaction. *
* HEADER ROOT PAGE (1st) = [offsetFreeKeyValueContent(int:4),numberOfEntries(int:4),mutable(boolean:1),compactedPageNumberOfSeries(int:4),subIndexFileId(int:4),numberOfKeys(byte:1),keyType(byte:1)*] *
* HEADER Nst PAGE = [offsetFreeKeyValueContent(int:4),numberOfEntries(int:4),mutable(boolean:1),compactedPageNumberOfSeries(int:4)] *

*

* The page content size and available space API are not valid in the index pages, because the whole page is used from start to end. */ public abstract class LSMTreeIndexAbstract extends PaginatedComponent { public enum NULL_STRATEGY {ERROR, SKIP} public static final int DEF_PAGE_SIZE = 2 * 1024 * 1024; public final RID REMOVED_ENTRY_RID; protected static final String TEMP_EXT = "temp_"; protected static final LSMTreeIndexCompacted.LookupResult LOWER = new LSMTreeIndexCompacted.LookupResult(false, true, 0, null); protected static final LSMTreeIndexCompacted.LookupResult HIGHER = new LSMTreeIndexCompacted.LookupResult(false, true, 0, null); protected static final byte valueType = BinaryTypes.TYPE_COMPRESSED_RID; protected LSMTreeIndex mainIndex; protected final BinaryComparator comparator; protected final BinarySerializer serializer; protected final boolean unique; protected Type[] keyTypes; protected byte[] binaryKeyTypes; protected NULL_STRATEGY nullStrategy = NULL_STRATEGY.SKIP; public enum COMPACTING_STATUS {NO, SCHEDULED, IN_PROGRESS} protected static class LookupResult { public final boolean found; public final boolean outside; public final int keyIndex; public final int[] valueBeginPositions; public LookupResult(final boolean found, final boolean outside, final int keyIndex, final int[] valueBeginPositions) { this.found = found; this.outside = outside; this.keyIndex = keyIndex; this.valueBeginPositions = valueBeginPositions; } } /** * Called at creation time. */ protected LSMTreeIndexAbstract(final LSMTreeIndex mainIndex, final DatabaseInternal database, final String name, final boolean unique, final String filePath, final String ext, final PaginatedFile.MODE mode, final Type[] keyTypes, final int pageSize, final int version, final NULL_STRATEGY nullStrategy) throws IOException { super(database, name, filePath, ext, mode, pageSize, version); if (nullStrategy == null) throw new IllegalArgumentException("Index null strategy is null "); this.mainIndex = mainIndex; this.serializer = database.getSerializer(); this.comparator = serializer.getComparator(); this.unique = unique; this.keyTypes = keyTypes; this.binaryKeyTypes = new byte[keyTypes.length]; for (int i = 0; i < keyTypes.length; i++) this.binaryKeyTypes[i] = keyTypes[i].getBinaryType(); this.nullStrategy = nullStrategy; REMOVED_ENTRY_RID = new RID(database, -1, -1L); } /** * Called at cloning time. */ protected LSMTreeIndexAbstract(final LSMTreeIndex mainIndex, final DatabaseInternal database, final String name, final boolean unique, final String filePath, final String ext, final Type[] keyTypes, final byte[] binaryKeyTypes, final int pageSize, final int version) throws IOException { super(database, name, filePath, TEMP_EXT + ext, PaginatedFile.MODE.READ_WRITE, pageSize, version); this.mainIndex = mainIndex; this.serializer = database.getSerializer(); this.comparator = serializer.getComparator(); this.unique = unique; this.keyTypes = keyTypes; this.binaryKeyTypes = binaryKeyTypes; REMOVED_ENTRY_RID = new RID(database, -1, -1L); } /** * Called at load time (1st page only). */ protected LSMTreeIndexAbstract(final LSMTreeIndex mainIndex, final DatabaseInternal database, final String name, final boolean unique, final String filePath, final int id, final PaginatedFile.MODE mode, final int pageSize, final int version) throws IOException { super(database, name, filePath, id, mode, pageSize, version); this.mainIndex = mainIndex; this.serializer = database.getSerializer(); this.comparator = serializer.getComparator(); this.unique = unique; REMOVED_ENTRY_RID = new RID(database, -1, -1L); } /** * @param purpose 0 = exists, 1 = retrieve, 2 = ascending iterator, 3 = descending iterator */ protected abstract LookupResult compareKey(final Binary currentPageBuffer, final int startIndexArray, final Object[] convertedKeys, int mid, final int count, final int purpose); public PaginatedComponent getPaginatedComponent() { return this; } public boolean isUnique() { return unique; } public int getFileId() { return file.getFileId(); } public NULL_STRATEGY getNullStrategy() { return nullStrategy; } @Override public String toString() { return name + "(" + getFileId() + ")"; } public Type[] getKeyTypes() { return keyTypes; } public byte[] getBinaryKeyTypes() { return binaryKeyTypes; } public boolean isDeletedEntry(final RID rid) { return rid.getBucketId() < 0; } public void removeTempSuffix() { final String fileName = file.getFilePath(); final int extPos = fileName.lastIndexOf('.'); if (fileName.substring(extPos + 1).startsWith(TEMP_EXT)) { final String newFileName = fileName.substring(0, extPos) + "." + fileName.substring(extPos + TEMP_EXT.length() + 1); try { file.rename(newFileName); } catch (final IOException e) { throw new IndexException( "Cannot rename index file '" + file.getFilePath() + "' into temp file '" + newFileName + "' (exists=" + (new File(file.getFilePath()).exists()) + ")", e); } } } public void drop() throws IOException { if (database.isOpen()) { database.getPageManager().deleteFile(file.getFileId()); database.getFileManager().dropFile(file.getFileId()); database.getSchema().getEmbedded().removeFile(file.getFileId()); } else { if (!new File(file.getFilePath()).delete()) LogManager.instance().log(this, Level.WARNING, "Error on deleting index file '%s'", null, file.getFilePath()); } } public Map getStats() { final Map stats = new HashMap<>(); stats.put("pages", (long) getTotalPages()); return stats; } @Override public Object getMainComponent() { return mainIndex; } /** * Checks if all the key values are NULL. */ public static boolean isKeyNull(final Object[] keys) { if (keys == null) return true; for (int i = 0; i < keys.length; ++i) if (keys[i] != null) return false; return true; } /** * Lookups for an entry in the index by using dichotomy search. * * @param purpose 0 = exists, 1 = retrieve, 2 = ascending iterator, 3 = descending iterator * * @return LookupResult object, never null */ protected LookupResult lookupInPage(final int pageNum, final int count, final Binary currentPageBuffer, final Object[] convertedKeys, final int purpose) { if (binaryKeyTypes.length == 0) throw new IllegalArgumentException("No key types found"); if (convertedKeys.length > binaryKeyTypes.length) throw new IllegalArgumentException("key is composed of " + convertedKeys.length + " items, while the index defined " + binaryKeyTypes.length + " items"); if ((purpose == 0 || purpose == 1) && convertedKeys.length != binaryKeyTypes.length) throw new IllegalArgumentException("key is composed of " + convertedKeys.length + " items, while the index defined " + binaryKeyTypes.length + " items"); if (count == 0) // EMPTY, NOT FOUND return new LookupResult(false, true, 0, null); int low = 0; int high = count - 1; final int startIndexArray = getHeaderSize(pageNum); LookupResult result; // CHECK THE BOUNDARIES FIRST (LOWER THAN THE FIRST) result = compareKey(currentPageBuffer, startIndexArray, convertedKeys, low, count, purpose); if (result == LOWER) { if (purpose == 2) // BROWSE ASCENDING return new LookupResult(false, false, low, new int[] { currentPageBuffer.position() }); return new LookupResult(false, true, low, null); } else if (result != HIGHER) return result; // CHECK THE BOUNDARIES FIRST (HIGHER THAN THE LAST) result = compareKey(currentPageBuffer, startIndexArray, convertedKeys, high, count, purpose); if (result == HIGHER) { if (purpose == 3) // BROWSE ASCENDING return new LookupResult(false, false, high, new int[] { currentPageBuffer.position() }); return new LookupResult(false, true, count, null); } else if (result != LOWER) return result; int mid; while (low <= high) { mid = (low + high) / 2; result = compareKey(currentPageBuffer, startIndexArray, convertedKeys, mid, count, purpose); if (result == HIGHER) low = mid + 1; else if (result == LOWER) high = mid - 1; else return result; } if (purpose == 3) { return new LookupResult(false, false, high, null); } return new LookupResult(false, false, low, null); } protected void writeEntry(final Binary buffer, final Object[] keys, final Object rid) { buffer.clear(); writeKeys(buffer, keys); writeEntryValue(buffer, rid); } protected void writeEntry(final Binary buffer, final Object[] keys, final Object[] rids) { buffer.clear(); writeKeys(buffer, keys); writeEntryValues(buffer, rids); } /** * Reads the keys and returns the serialized size. */ protected int getSerializedKeySize(final Binary buffer, final int keyLength) { final int startsAt = buffer.position(); for (int keyIndex = 0; keyIndex < keyLength; ++keyIndex) { final boolean notNull = version < 1 || buffer.getByte() == 1; if (notNull) serializer.deserializeValue(database, buffer, binaryKeyTypes[keyIndex], null); } return buffer.position() - startsAt; } protected Object[] convertKeys(final Object[] keys, final byte[] keyTypes) { if (keys == null) return null; final Object[] convertedKeys = new Object[keys.length]; for (int i = 0; i < keys.length; ++i) { if (keys[i] == null) continue; convertedKeys[i] = Type.convert(database, keys[i], BinaryTypes.getClassFromType(keyTypes[i])); if (convertedKeys[i] instanceof String) // OPTIMIZATION: ALWAYS CONVERT STRINGS TO BYTE[] convertedKeys[i] = ((String) convertedKeys[i]).getBytes(DatabaseFactory.getDefaultCharset()); } return convertedKeys; } protected Object[] getPageKeyRange(final BasePage currentPage) { final Binary currentPageBuffer = new Binary(currentPage.slice()); final Object[] min = getKeyInPagePosition(currentPage.getPageId().getPageNumber(), currentPageBuffer, 0); final int count = getCount(currentPage); if (count > 1) return new Object[] { min, getKeyInPagePosition(currentPage.getPageId().getPageNumber(), currentPageBuffer, count - 1) }; else return new Object[] { min, min }; } protected Object[] getKeyInPagePosition(final int pageNum, final Binary currentPageBuffer, final int position) { final int startIndexArray = getHeaderSize(pageNum); final int contentPos = currentPageBuffer.getInt(startIndexArray + (position * INT_SERIALIZED_SIZE)); currentPageBuffer.position(contentPos); final Object[] key = new Object[binaryKeyTypes.length]; for (int keyIndex = 0; keyIndex < binaryKeyTypes.length; ++keyIndex) { final boolean notNull = version < 1 || currentPageBuffer.getByte() == 1; if (notNull) key[keyIndex] = serializer.deserializeValue(database, currentPageBuffer, binaryKeyTypes[keyIndex], null); else key[keyIndex] = null; } return key; } protected int compareKey(final Binary currentPageBuffer, final int startIndexArray, final Object[] keys, final int mid, final int count) { final int contentPos = currentPageBuffer.getInt(startIndexArray + (mid * INT_SERIALIZED_SIZE)); if (contentPos < startIndexArray + (count * INT_SERIALIZED_SIZE)) throw new IndexException("Internal error: invalid content position " + contentPos + " is < of " + (startIndexArray + (count * INT_SERIALIZED_SIZE))); currentPageBuffer.position(contentPos); int result = -1; for (int keyIndex = 0; keyIndex < keys.length; ++keyIndex) { // GET THE KEY final Object key = keys[keyIndex]; final boolean notNull = version < 1 || currentPageBuffer.getByte() == 1; if (!notNull) { if (key == null) // BOTH NULL continue; else return 1; } if (key == null) return -1; if (binaryKeyTypes[keyIndex] == BinaryTypes.TYPE_STRING) { // OPTIMIZATION: SPECIAL CASE, LAZY EVALUATE BYTE PER BYTE THE STRING result = comparator.compareBytes((byte[]) key, currentPageBuffer); } else { final Object keyValue = serializer.deserializeValue(database, currentPageBuffer, binaryKeyTypes[keyIndex], null); result = comparator.compare(key, binaryKeyTypes[keyIndex], keyValue, binaryKeyTypes[keyIndex]); } if (result != 0) break; } return result; } protected int getHeaderSize(final int pageNum) { int size = INT_SERIALIZED_SIZE + INT_SERIALIZED_SIZE + BYTE_SERIALIZED_SIZE + INT_SERIALIZED_SIZE; if (pageNum == 0) size += INT_SERIALIZED_SIZE + BYTE_SERIALIZED_SIZE + binaryKeyTypes.length; return size; } public static int compareKeys(final BinaryComparator comparator, final byte[] keyTypes, final Object[] keys1, final Object[] keys2) { final int minKeySize = Math.min(keys1.length, keys2.length); for (int k = 0; k < minKeySize; ++k) { final int result = comparator.compare(keys1[k], keyTypes[k], keys2[k], keyTypes[k]); if (result < 0) return -1; else if (result > 0) return 1; } return 0; } private void writeEntryValues(final Binary buffer, final Object[] values) { // WRITE NUMBER OF VALUES serializer.serializeValue(database, buffer, BinaryTypes.TYPE_INT, values.length); // WRITE VALUES for (int i = 0; i < values.length; ++i) serializer.serializeValue(database, buffer, valueType, values[i]); } private void writeEntryValue(final Binary buffer, final Object value) { // WRITE NUMBER OF VALUES serializer.serializeValue(database, buffer, BinaryTypes.TYPE_INT, 1); // WRITE VALUES serializer.serializeValue(database, buffer, valueType, value); } protected RID[] readEntryValues(final Binary buffer) { final int items = (int) serializer.deserializeValue(database, buffer, BinaryTypes.TYPE_INT, null); final RID[] rids = new RID[items]; for (int i = 0; i < rids.length; ++i) rids[i] = (RID) serializer.deserializeValue(database, buffer, valueType, null); return rids; } private void readEntryValues(final Binary buffer, final List list) { final int items = (int) serializer.deserializeValue(database, buffer, BinaryTypes.TYPE_INT, null); final Object[] rids = new Object[items]; for (int i = 0; i < rids.length; ++i) list.add((RID) serializer.deserializeValue(database, buffer, valueType, null)); } private List readAllValuesFromResult(final Binary currentPageBuffer, final LookupResult result) { final List allValues = new ArrayList<>(); for (int i = 0; i < result.valueBeginPositions.length; ++i) { currentPageBuffer.position(result.valueBeginPositions[i]); readEntryValues(currentPageBuffer, allValues); } return allValues; } protected int getCount(final BasePage currentPage) { return currentPage.readInt(INT_SERIALIZED_SIZE); } protected void setCount(final MutablePage currentPage, final int newCount) { currentPage.writeInt(INT_SERIALIZED_SIZE, newCount); } protected boolean isMutable(final BasePage currentPage) { return currentPage.readByte(INT_SERIALIZED_SIZE + INT_SERIALIZED_SIZE) == 1; } protected void setMutable(final MutablePage currentPage, final boolean mutable) { currentPage.writeByte(INT_SERIALIZED_SIZE + INT_SERIALIZED_SIZE, (byte) (mutable ? 1 : 0)); } protected void checkForNulls(final Object[] keys) { if (nullStrategy != NULL_STRATEGY.ERROR) return; if (keys != null) for (int i = 0; i < keys.length; ++i) if (keys[i] == null) throw new IllegalArgumentException( "Indexed key " + mainIndex.getTypeName() + mainIndex.propertyNames + " cannot be NULL (" + Arrays.toString(keys) + ")"); } protected boolean lookupInPageAndAddInResultset(final BasePage currentPage, final Binary currentPageBuffer, final int count, final Object[] originalKeys, final Object[] convertedKeys, final int limit, final Set set, final Set removedRIDs) { final LookupResult result = lookupInPage(currentPage.getPageId().getPageNumber(), count, currentPageBuffer, convertedKeys, 1); if (result.found) { // REAL ALL THE ENTRIES final List allValues = readAllValuesFromResult(currentPageBuffer, result); final Set validRIDs = new HashSet<>(); // START FROM THE LAST ENTRY for (int i = allValues.size() - 1; i > -1; --i) { final RID rid = allValues.get(i); if (REMOVED_ENTRY_RID.equals(rid)) { // DELETED ITEM return false; } if (rid.getBucketId() < 0) { // RID DELETED, SKIP THE RID final RID originalRID = getOriginalRID(rid); if (!validRIDs.contains(originalRID)) removedRIDs.add(originalRID); continue; } if (removedRIDs.contains(rid)) // HAS BEEN DELETED continue; validRIDs.add(rid); set.add(new IndexCursorEntry(originalKeys, rid, 1)); if (limit > -1 && set.size() >= limit) { return false; } } } return true; } protected int getValuesFreePosition(final BasePage currentPage) { return currentPage.readInt(0); } protected void setValuesFreePosition(final MutablePage currentPage, final int newValuesFreePosition) { currentPage.writeInt(0, newValuesFreePosition); } protected RID getOriginalRID(final RID rid) { return new RID(database, (rid.getBucketId() * -1) - 2, rid.getPosition()); } private void writeKeys(final Binary buffer, final Object[] keys) { // WRITE KEYS for (int i = 0; i < binaryKeyTypes.length; ++i) { final Object value = keys[i]; if (value == null) // WRITE 0 IF NULL buffer.putByte((byte) 0); else { // WRITE 1 + THE ACTUAL VALUE buffer.putByte((byte) 1); serializer.serializeValue(database, buffer, binaryKeyTypes[i], value); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy