All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.neo4j.unsafe.impl.batchimport.Configuration Maven / Gradle / Ivy

Go to download

Neo4j kernel is a lightweight, embedded Java database designed to store data structured as graphs rather than tables. For more information, see http://neo4j.org.

There is a newer version: 2025.02.0
Show newest version
/*
 * Copyright (c) 2002-2015 "Neo Technology,"
 * Network Engine for Objects in Lund AB [http://neotechnology.com]
 *
 * This file is part of Neo4j.
 *
 * Neo4j is free software: you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation, either version 3 of the License, or
 * (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU General Public License for more details.
 *
 * You should have received a copy of the GNU General Public License
 * along with this program.  If not, see .
 */
package org.neo4j.unsafe.impl.batchimport;

import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.kernel.configuration.Config;

import static java.lang.Math.round;

/**
 * User controlled configuration for a {@link BatchImporter}.
 */
public interface Configuration extends org.neo4j.unsafe.impl.batchimport.staging.Configuration
{
    /**
     * File name in which bad entries from the import will end up. This file will be created in the
     * database directory of the imported database, i.e. /bad.log.
     */
    String BAD_FILE_NAME = "bad.log";

    /**
     * Memory dedicated to buffering data to be written.
     */
    int writeBufferSize();

    /**
     * The number of relationships threshold for considering a node dense.
     */
    int denseNodeThreshold();

    /**
     * Rough max number of processors (CPU cores) simultaneously used in total by importer at any given time.
     * This value should be set while taking the necessary IO threads into account; the page cache and the operating
     * system will require a couple of threads between them, to handle the IO workload the importer generates.
     * Defaults to the value provided by the {@link Runtime#availableProcessors() jvm}. There's a discrete
     * number of threads that needs to be used just to get the very basics of the import working,
     * so for that reason there's no lower bound to this value.
     *   "Processor" in the context of the batch importer is different from "thread" since when discovering
     * how many processors are fully in use there's a calculation where one thread takes up 0 < fraction <= 1
     * of a processor.
     */
    int maxNumberOfProcessors();

    class Default
            extends org.neo4j.unsafe.impl.batchimport.staging.Configuration.Default
            implements Configuration
    {
        private static final int DEFAULT_PAGE_SIZE = 1024 * 8;

        @Override
        public int batchSize()
        {
            return 10_000;
        }

        @Override
        public int writeBufferSize()
        {
            // Do a little calculation here where the goal of the returned value is that if a file channel
            // would be seen as a batch itself (think asynchronous writing) there would be created roughly
            // as many as the other types of batches.
            int averageRecordSize = 40; // Gut-feel estimate
            int batchesToBuffer = 1000;
            int maxWriteBufferSize = batchSize() * averageRecordSize * batchesToBuffer;
            int writeBufferSize = (int) Math.min( maxWriteBufferSize, Runtime.getRuntime().maxMemory() / 5);
            return roundToClosest( writeBufferSize, DEFAULT_PAGE_SIZE * 30 );
        }

        private int roundToClosest( int value, int divisible )
        {
            double roughCount = (double) value / divisible;
            int count = (int) round( roughCount );
            return divisible*count;
        }

        @Override
        public int workAheadSize()
        {
            return 20;
        }

        @Override
        public int denseNodeThreshold()
        {
            return Integer.parseInt( GraphDatabaseSettings.dense_node_threshold.getDefaultValue() );
        }

        @Override
        public int maxNumberOfProcessors()
        {
            return Runtime.getRuntime().availableProcessors();
        }

        @Override
        public int movingAverageSize()
        {
            return 100;
        }
    }

    Configuration DEFAULT = new Default();

    class Overridden
            extends org.neo4j.unsafe.impl.batchimport.staging.Configuration.Overridden
            implements Configuration
    {
        private final Configuration defaults;
        private final Config config;

        public Overridden( Configuration defaults, Config config )
        {
            super( defaults );
            this.defaults = defaults;
            this.config = config;
        }

        public Overridden( Config config )
        {
            this( Configuration.DEFAULT, config );
        }

        @Override
        public int writeBufferSize()
        {
            return defaults.writeBufferSize();
        }

        @Override
        public int denseNodeThreshold()
        {
            return config.get( GraphDatabaseSettings.dense_node_threshold );
        }

        @Override
        public int maxNumberOfProcessors()
        {
            return defaults.maxNumberOfProcessors();
        }

        @Override
        public int movingAverageSize()
        {
            return defaults.movingAverageSize();
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy