All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.neo4j.kernel.impl.index.schema.PartMerger Maven / Gradle / Ivy

Go to download

Neo4j kernel is a lightweight, embedded Java database designed to store data structured as graphs rather than tables. For more information, see http://neo4j.org.

There is a newer version: 5.25.1
Show newest version
/*
 * Copyright (c) "Neo4j"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.kernel.impl.index.schema;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.ExecutionException;

import org.neo4j.index.internal.gbptree.Layout;
import org.neo4j.kernel.api.index.IndexPopulator.PopulationWorkScheduler;
import org.neo4j.scheduler.JobHandle;
import org.neo4j.scheduler.JobHandles;

import static org.neo4j.io.IOUtils.closeAll;
import static org.neo4j.kernel.impl.index.schema.BlockEntryStreamMerger.QUEUE_SIZE;

/**
 * The idea is to merge multiple (already individually sorted) parts of {@link BlockEntry block entries} into one stream.
 * Input is the parts. One or more threads will be spawned to merge these parts with the given merge factor, making the number
 * of threads by roughly log4(numberOfParts). Output is a {@link BlockEntryCursor} which is a stream which gets populated over time.
 *
 * The part merging will look something like this:
 * 
 *   (1) (2) (3) (4) (5) (6) (7) (8) (9) (10)
 *    \  |   |  /     \  |   |  /   /   /
 *    (---A---)       (---B---)   /   /
 *          \           |       /   /
 *         (----------C-------------)
 * 
*/ class PartMerger implements AutoCloseable { static final int DEFAULT_BATCH_SIZE = 100; private static final int MERGE_FACTOR = 4; private final PopulationWorkScheduler populationWorkScheduler; private final List> parts; private final Layout layout; private final BlockStorage.Cancellation cancellation; private final int batchSize; private final Comparator samplingComparator; private final List> allMergers = new ArrayList<>(); private final List> mergeHandles = new ArrayList<>(); PartMerger( PopulationWorkScheduler populationWorkScheduler, List> parts, Layout layout, Comparator samplingComparator, BlockStorage.Cancellation cancellation, int batchSize ) { this.populationWorkScheduler = populationWorkScheduler; this.parts = parts; this.layout = layout; this.cancellation = cancellation; this.batchSize = batchSize; this.samplingComparator = samplingComparator; } BlockEntryStreamMerger startMerge() { List> remainingParts = new ArrayList<>( parts ); while ( remainingParts.size() > MERGE_FACTOR ) { // Build one "level" of mergers, each merger in this level merging "merge factor" number of streams List> current = new ArrayList<>(); List> levelParts = new ArrayList<>(); for ( BlockEntryCursor remainingPart : remainingParts ) { current.add( remainingPart ); if ( current.size() == MERGE_FACTOR ) { BlockEntryStreamMerger merger = new BlockEntryStreamMerger<>( current, layout, null, cancellation, batchSize, QUEUE_SIZE ); allMergers.add( merger ); levelParts.add( merger ); current = new ArrayList<>(); } } levelParts.addAll( current ); remainingParts = levelParts; } BlockEntryStreamMerger merger = new BlockEntryStreamMerger<>( remainingParts, layout, samplingComparator, cancellation, batchSize, QUEUE_SIZE ); allMergers.add( merger ); allMergers.forEach( merge -> mergeHandles.add( populationWorkScheduler.schedule( indexName -> "Part merger while writing scan update for " + indexName, merge ) ) ); return merger; } @Override public void close() throws IOException { allMergers.forEach( BlockEntryStreamMerger::halt ); try { JobHandles.getAllResults( mergeHandles ); } catch ( ExecutionException e ) { if ( e.getCause() instanceof IOException ) { throw (IOException) e.getCause(); } throw new IOException( e.getCause() ); } finally { closeAll( () -> closeAll( allMergers ), () -> closeAll( parts ) ); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy