All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.arcadedb.index.lsm.LSMTreeIndexCompactor Maven / Gradle / Ivy

There is a newer version: 24.11.2
Show newest version
/*
 * Copyright © 2021-present Arcade Data Ltd ([email protected])
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *
 * SPDX-FileCopyrightText: 2021-present Arcade Data Ltd ([email protected])
 * SPDX-License-Identifier: Apache-2.0
 */
package com.arcadedb.index.lsm;

import com.arcadedb.GlobalConfiguration;
import com.arcadedb.database.Binary;
import com.arcadedb.database.DatabaseInternal;
import com.arcadedb.database.RID;
import com.arcadedb.database.TrackableBinary;
import com.arcadedb.engine.ImmutablePage;
import com.arcadedb.engine.MutablePage;
import com.arcadedb.engine.PageId;
import com.arcadedb.log.LogManager;
import com.arcadedb.serializer.BinaryComparator;
import com.arcadedb.serializer.BinarySerializer;
import com.arcadedb.utility.FileUtils;

import java.io.*;
import java.util.*;
import java.util.logging.*;

public class LSMTreeIndexCompactor {
  private boolean debug = false;

  public LSMTreeIndexCompactor setDebug(final boolean debug) {
    this.debug = debug;
    return this;
  }

  public boolean compact(final LSMTreeIndex mainIndex) throws IOException, InterruptedException {
    final LSMTreeIndexMutable mutableIndex = mainIndex.getMutableIndex();

    final DatabaseInternal database = mutableIndex.getDatabase();

    final int totalPages = mutableIndex.getTotalPages();
    LogManager.instance()
        .log(mainIndex, Level.INFO, "Compacting index '%s' (pages=%d pageSize=%d threadId=%d)...", null, mutableIndex, totalPages, mutableIndex.getPageSize(),
            Thread.currentThread().getId());

    if (totalPages < 2)
      return false;

    final long startTime = System.currentTimeMillis();

    LSMTreeIndexCompacted compactedIndex = mutableIndex.getSubIndex();
    if (compactedIndex == null) {
      // CREATE A NEW INDEX
      compactedIndex = mutableIndex.createNewForCompaction();
      mutableIndex.getDatabase().getSchema().getEmbedded().registerFile(compactedIndex);
      LogManager.instance()
          .log(mainIndex, Level.WARNING, "- Creating sub-index '%s' with fileId=%d (threadId=%d)...", null, compactedIndex, compactedIndex.getFileId(),
              Thread.currentThread().getId());
    }

    final byte[] keyTypes = mutableIndex.getBinaryKeyTypes();

    long indexCompactionRAM = database.getConfiguration().getValueAsLong(GlobalConfiguration.INDEX_COMPACTION_RAM_MB) * 1024 * 1024;

    final long maxUsableRAM = Runtime.getRuntime().maxMemory() * 30 / 100;
    if (indexCompactionRAM > maxUsableRAM) {
      LogManager.instance()
          .log(mainIndex, Level.INFO, "Configured RAM for compaction (%dMB) is more than 1/3 of the max heap (%s). Forcing to %s", null, indexCompactionRAM,
              FileUtils.getSizeAsString(Runtime.getRuntime().maxMemory()), maxUsableRAM);
      indexCompactionRAM = maxUsableRAM;
    }

    long iterations = 1;
    long totalKeys = 0;
    long totalValues = 0;
    long totalMergedKeys = 0;
    long totalMergedValues = 0;

    final Binary keyValueContent = new Binary();

    int pagesToCompact;
    int compactedPages = 0;

    if (debug) {
      System.out.println("BEFORE COMPACTING:");
      LSMTreeIndexDebugger.printIndex(mainIndex);
    }

    // FIND LAST IMMUTABLE PAGE TO COMPACT
    int lastImmutablePage = totalPages - 1;
    for (int pageIndex = totalPages - 1; pageIndex > -1; --pageIndex) {
      final ImmutablePage page = database.getPageManager().getImmutablePage(new PageId(mutableIndex.getFileId(), pageIndex), mutableIndex.getPageSize(), false, true);
      if (!mutableIndex.isMutable(page)) {
        lastImmutablePage = pageIndex;
        break;
      }
    }

    LogManager.instance().log(mainIndex, Level.WARNING, "- Compacting pages 0-%d (threadId=%d)", null, lastImmutablePage, Thread.currentThread().getId());

    for (int pageIndex = 0; pageIndex <= lastImmutablePage; ) {
      final long totalRAMNeeded = (lastImmutablePage - pageIndex + 1L) * mutableIndex.getPageSize();

      if (totalRAMNeeded > indexCompactionRAM) {
        pagesToCompact = (int) (indexCompactionRAM / mutableIndex.getPageSize());
        LogManager.instance()
            .log(mainIndex, Level.WARNING, "- Creating partial index with %d pages by using %s (totalRAMNeeded=%s, threadId=%d)", null, pagesToCompact,
                FileUtils.getSizeAsString(indexCompactionRAM), FileUtils.getSizeAsString(totalRAMNeeded), Thread.currentThread().getId());
      } else
        pagesToCompact = lastImmutablePage - pageIndex + 1;

      // CREATE ROOT PAGE
      final MutablePage rootPage = compactedIndex.createNewPage(0);
      final TrackableBinary rootPageBuffer = rootPage.getTrackable();
      Object[] lastPageMaxKey = null;

      LogManager.instance()
          .log(mainIndex, Level.WARNING, "- This turn compacting %d pages using root page %s v.%d (threadId=%d)", null, pagesToCompact, rootPage.getPageId(),
              rootPage.getVersion(), Thread.currentThread().getId());

      int compactedPageNumberInSeries = 1;

      final LSMTreeIndexUnderlyingPageCursor[] iterators = new LSMTreeIndexUnderlyingPageCursor[pagesToCompact];
      for (int i = 0; i < pagesToCompact; ++i)
        iterators[i] = mutableIndex.newPageIterator(pageIndex + i, -1, true);

      final Object[][] keys = new Object[pagesToCompact][keyTypes.length];

      for (int p = 0; p < pagesToCompact; ++p) {
        if (iterators[p].hasNext()) {
          iterators[p].next();
          keys[p] = iterators[p].getKeys();
        } else {
          iterators[p].close();
          iterators[p] = null;
          keys[p] = null;
        }
      }

      final BinarySerializer serializer = database.getSerializer();
      final BinaryComparator comparator = serializer.getComparator();

      MutablePage lastPage = null;
      TrackableBinary currentPageBuffer = null;

      final Set rids = new LinkedHashSet<>();

      for (boolean moreItems = true; moreItems; ++iterations) {
        moreItems = false;

        Object[] minorKey = null;
        final List minorKeyIndexes = new ArrayList<>();

        // FIND THE MINOR KEY
        for (int p = 0; p < pagesToCompact; ++p) {
          if (minorKey == null) {
            minorKey = keys[p];
            if (minorKey != null) {
              moreItems = true;
              minorKeyIndexes.add(p);
            }
          } else {
            if (keys[p] != null) {
              moreItems = true;
              final int cmp = LSMTreeIndexMutable.compareKeys(comparator, keyTypes, keys[p], minorKey);
              if (cmp == 0) {
                minorKeyIndexes.add(p);
                ++totalMergedKeys;
              } else if (cmp < 0) {
                minorKey = keys[p];
                if (minorKey != null) {
                  minorKeyIndexes.clear();
                  minorKeyIndexes.add(p);
                }
              }
            }
          }
        }

        rids.clear();
        for (int i = 0; i < minorKeyIndexes.size(); ++i) {
          final int idx = minorKeyIndexes.get(i);
          final LSMTreeIndexUnderlyingPageCursor iter = iterators[idx];

          // BROWSE THE SAME ITERATOR TO CHECK IF NEXT VALUES HAVE THE SAME KEY
          while (true) {
            if (iter == null)
              break;

            final Object[] value = iter.getValue();
            if (value != null) {
              // NOT DELETED
              for (int r = 0; r < value.length; ++r) {
                final RID rid = (RID) value[r];
                // ADD ALSO REMOVED RIDS. ONCE THE COMPACTING OF COMPACTED INDEXES (2nd LEVEL) IS DONE, REMOVED ENTRIES CAN BE REMOVED
                rids.add(rid);
              }

              if (!rids.isEmpty())
                totalMergedValues += rids.size();
            }

            // CHECK IF THE NEXT ELEMENT HAS THE SAME KEY
            if (iter.hasNext()) {
              iter.next();
              keys[idx] = iter.getKeys();

              if (LSMTreeIndexMutable.compareKeys(comparator, keyTypes, keys[idx], minorKey) != 0)
                break;

            } else {
              iterators[idx].close();
              iterators[idx] = null;
              keys[idx] = null;
              break;
            }
          }
        }

        if (!rids.isEmpty()) {
          final RID[] ridsArray = new RID[rids.size()];
          rids.toArray(ridsArray);

          final MutablePage newPage = compactedIndex.appendDuringCompaction(keyValueContent, lastPage, currentPageBuffer, compactedPageNumberInSeries, minorKey,
              ridsArray);

          if (newPage != lastPage) {
            ++compactedPageNumberInSeries;

            if (rootPage != null) {
              // NEW PAGE: STORE THE MIN KEY IN THE ROOT PAGE
              final int newPageNum = newPage.getPageId().getPageNumber();

              final MutablePage newRootPage = compactedIndex.appendDuringCompaction(keyValueContent, rootPage, rootPageBuffer, compactedPageNumberInSeries,
                  minorKey, new RID[] { new RID(database, 0, newPageNum) });

              LogManager.instance()
                  .log(mainIndex, Level.WARNING, "- Creating a new entry in index '%s' root page %s->%d (entry in page=%d threadId=%d)", null, mutableIndex,
                      Arrays.toString(minorKey), newPageNum, mutableIndex.getCount(rootPage) - 1, Thread.currentThread().getId());

              if (newRootPage != rootPage) {
                throw new UnsupportedOperationException("Root index page overflow");
//
//                // TODO: MANAGE A LINKED LIST OF ROOT PAGES INSTEAD
//                ++compactedPageNumberInSeries;
//
//                LogManager.instance().info(mainIndex, "- End of space in root index page for index '%s' (rootEntries=%d)", compactedIndex.getName(),
//                    compactedIndex.getCount(rootPage));
//                database.getPageManager().updatePage(rootPage, true, false);
//                rootPage = null;
//                rootPageBuffer = null;
              }
            }

            currentPageBuffer = newPage.getTrackable();
            lastPage = newPage;
          }

          // UPDATE LAST PAGE'S KEY
          if (minorKey != null)
            lastPageMaxKey = minorKey;

          ++totalKeys;
          totalValues += rids.size();

          if (totalKeys % 1_000_000 == 0)
            LogManager.instance()
                .log(mainIndex, Level.WARNING, "- Keys %d values %d - iterations %d (entriesInRootPage=%d, threadId=%d)", null, totalKeys, totalValues,
                    iterations, compactedIndex.getCount(rootPage), Thread.currentThread().getId());
        }
      }

      if (rootPage != null && lastPageMaxKey != null) {
        // WRITE THE MAX KEY
        compactedIndex.appendDuringCompaction(keyValueContent, rootPage, rootPageBuffer, compactedPageNumberInSeries, lastPageMaxKey,
            new RID[] { new RID(database, 0, 0) });
        LogManager.instance()
            .log(mainIndex, Level.WARNING, "- Creating last entry in index '%s' root page %s (entriesInRootPage=%d, threadId=%d)", null, mutableIndex,
                Arrays.toString(lastPageMaxKey), compactedIndex.getCount(rootPage), Thread.currentThread().getId());
      }

      final List modifiedPages = new ArrayList<>(1);

      if (lastPage != null)
        modifiedPages.add(database.getPageManager().updatePageVersion(lastPage, true));
      if (rootPage != null)
        modifiedPages.add(database.getPageManager().updatePageVersion(rootPage, true));

      database.getPageManager().writePages(modifiedPages, false);

      compactedPages += pagesToCompact;

      LogManager.instance().log(mainIndex, Level.WARNING,
          "- compacted %d pages, remaining %d pages (totalKeys=%d totalValues=%d totalMergedKeys=%d totalMergedValues=%d, threadId=%d)", null, compactedPages,
          (lastImmutablePage - compactedPages + 1), totalKeys, totalValues, totalMergedKeys, totalMergedValues, Thread.currentThread().getId());

      pageIndex += pagesToCompact;
    }

    final String oldMutableFileName = mutableIndex.getName();
    final int oldMutableFileId = mutableIndex.getFileId();

    final LSMTreeIndexMutable newIndex = mainIndex.splitIndex(lastImmutablePage + 1, compactedIndex);

    LogManager.instance().log(mainIndex, Level.WARNING, String.format(
        "Index '%s' compacted in %dms (keys=%d values=%d mutablePages=%d immutablePages=%d iterations=%d oldLevel0File=%s(%d) newLevel0File=%s(%d) newLevel1File=%s(%d) threadId=%d)",
        mainIndex.getName(), (System.currentTimeMillis() - startTime), totalKeys, totalValues, newIndex.getTotalPages(), compactedIndex.getTotalPages(),
        iterations, oldMutableFileName, oldMutableFileId, mainIndex.getMutableIndex().getName(), mainIndex.getMutableIndex().getFileId(),
        compactedIndex.getName(), compactedIndex.getFileId(), Thread.currentThread().getId()));

    if (debug) {
      System.out.println("AFTER COMPACTING:");
      LSMTreeIndexDebugger.printIndex(mainIndex);
    }

    return true;
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy