org.neo4j.kernel.impl.index.schema.PartMerger Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of neo4j-kernel Show documentation
Show all versions of neo4j-kernel Show documentation
Neo4j kernel is a lightweight, embedded Java database designed to
store data structured as graphs rather than tables. For more
information, see http://neo4j.org.
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.neo4j.kernel.impl.index.schema;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.concurrent.ExecutionException;
import org.neo4j.index.internal.gbptree.Layout;
import org.neo4j.kernel.api.index.IndexPopulator.PopulationWorkScheduler;
import org.neo4j.scheduler.JobHandle;
import org.neo4j.scheduler.JobHandles;
import static org.neo4j.io.IOUtils.closeAll;
import static org.neo4j.kernel.impl.index.schema.BlockEntryStreamMerger.QUEUE_SIZE;
/**
* The idea is to merge multiple (already individually sorted) parts of {@link BlockEntry block entries} into one stream.
* Input is the parts. One or more threads will be spawned to merge these parts with the given merge factor, making the number
* of threads by roughly log4(numberOfParts). Output is a {@link BlockEntryCursor} which is a stream which gets populated over time.
*
* The part merging will look something like this:
*
* (1) (2) (3) (4) (5) (6) (7) (8) (9) (10)
* \ | | / \ | | / / /
* (---A---) (---B---) / /
* \ | / /
* (----------C-------------)
*
*/
class PartMerger implements AutoCloseable
{
static final int DEFAULT_BATCH_SIZE = 100;
private static final int MERGE_FACTOR = 4;
private final PopulationWorkScheduler populationWorkScheduler;
private final List> parts;
private final Layout layout;
private final BlockStorage.Cancellation cancellation;
private final int batchSize;
private final Comparator samplingComparator;
private final List> allMergers = new ArrayList<>();
private final List> mergeHandles = new ArrayList<>();
PartMerger( PopulationWorkScheduler populationWorkScheduler, List> parts,
Layout layout, Comparator samplingComparator, BlockStorage.Cancellation cancellation, int batchSize )
{
this.populationWorkScheduler = populationWorkScheduler;
this.parts = parts;
this.layout = layout;
this.cancellation = cancellation;
this.batchSize = batchSize;
this.samplingComparator = samplingComparator;
}
BlockEntryStreamMerger startMerge()
{
List> remainingParts = new ArrayList<>( parts );
while ( remainingParts.size() > MERGE_FACTOR )
{
// Build one "level" of mergers, each merger in this level merging "merge factor" number of streams
List> current = new ArrayList<>();
List> levelParts = new ArrayList<>();
for ( BlockEntryCursor remainingPart : remainingParts )
{
current.add( remainingPart );
if ( current.size() == MERGE_FACTOR )
{
BlockEntryStreamMerger merger = new BlockEntryStreamMerger<>( current, layout, null, cancellation, batchSize, QUEUE_SIZE );
allMergers.add( merger );
levelParts.add( merger );
current = new ArrayList<>();
}
}
levelParts.addAll( current );
remainingParts = levelParts;
}
BlockEntryStreamMerger merger =
new BlockEntryStreamMerger<>( remainingParts, layout, samplingComparator, cancellation, batchSize, QUEUE_SIZE );
allMergers.add( merger );
allMergers.forEach( merge -> mergeHandles.add(
populationWorkScheduler.schedule( indexName -> "Part merger while writing scan update for " + indexName, merge ) ) );
return merger;
}
@Override
public void close() throws IOException
{
allMergers.forEach( BlockEntryStreamMerger::halt );
try
{
JobHandles.getAllResults( mergeHandles );
}
catch ( ExecutionException e )
{
if ( e.getCause() instanceof IOException )
{
throw (IOException) e.getCause();
}
throw new IOException( e.getCause() );
}
finally
{
closeAll( () -> closeAll( allMergers ), () -> closeAll( parts ) );
}
}
}