org.neo4j.tooling.ImportTool Maven / Gradle / Ivy
/*
* Copyright (c) 2002-2016 "Neo Technology,"
* Network Engine for Objects in Lund AB [http://neotechnology.com]
*
* This file is part of Neo4j.
*
* Neo4j is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see .
*/
package org.neo4j.tooling;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.PrintStream;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Collection;
import java.util.Map.Entry;
import java.util.function.BiFunction;
import java.util.function.Function;
import org.neo4j.csv.reader.IllegalMultilineFieldException;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.helpers.Args;
import org.neo4j.helpers.Args.Option;
import org.neo4j.helpers.ArrayUtil;
import org.neo4j.helpers.Exceptions;
import org.neo4j.helpers.Strings;
import org.neo4j.helpers.collection.IterableWrapper;
import org.neo4j.helpers.collection.Iterables;
import org.neo4j.helpers.collection.MapUtil;
import org.neo4j.io.fs.DefaultFileSystemAbstraction;
import org.neo4j.io.fs.FileSystemAbstraction;
import org.neo4j.kernel.configuration.Config;
import org.neo4j.kernel.impl.logging.LogService;
import org.neo4j.kernel.impl.logging.StoreLogService;
import org.neo4j.kernel.impl.storemigration.ExistingTargetStrategy;
import org.neo4j.kernel.impl.storemigration.FileOperation;
import org.neo4j.kernel.impl.storemigration.StoreFile;
import org.neo4j.kernel.impl.storemigration.StoreFileType;
import org.neo4j.kernel.impl.util.Converters;
import org.neo4j.kernel.impl.util.OsBeanUtil;
import org.neo4j.kernel.impl.util.Validator;
import org.neo4j.kernel.impl.util.Validators;
import org.neo4j.kernel.internal.Version;
import org.neo4j.kernel.lifecycle.LifeSupport;
import org.neo4j.unsafe.impl.batchimport.BatchImporter;
import org.neo4j.unsafe.impl.batchimport.ParallelBatchImporter;
import org.neo4j.unsafe.impl.batchimport.cache.idmapping.string.DuplicateInputIdException;
import org.neo4j.unsafe.impl.batchimport.input.Collector;
import org.neo4j.unsafe.impl.batchimport.input.Input;
import org.neo4j.unsafe.impl.batchimport.input.InputNode;
import org.neo4j.unsafe.impl.batchimport.input.InputRelationship;
import org.neo4j.unsafe.impl.batchimport.input.MissingRelationshipDataException;
import org.neo4j.unsafe.impl.batchimport.input.csv.Configuration;
import org.neo4j.unsafe.impl.batchimport.input.csv.CsvInput;
import org.neo4j.unsafe.impl.batchimport.input.csv.DataFactory;
import org.neo4j.unsafe.impl.batchimport.input.csv.IdType;
import org.neo4j.unsafe.impl.batchimport.staging.ExecutionMonitors;
import static java.nio.charset.Charset.defaultCharset;
import static org.neo4j.helpers.Exceptions.launderedException;
import static org.neo4j.helpers.Format.bytes;
import static org.neo4j.helpers.Strings.TAB;
import static org.neo4j.io.ByteUnit.mebiBytes;
import static org.neo4j.kernel.impl.util.Converters.withDefault;
import static org.neo4j.unsafe.impl.batchimport.Configuration.BAD_FILE_NAME;
import static org.neo4j.unsafe.impl.batchimport.input.Collectors.badCollector;
import static org.neo4j.unsafe.impl.batchimport.input.Collectors.collect;
import static org.neo4j.unsafe.impl.batchimport.input.InputEntityDecorators.NO_NODE_DECORATOR;
import static org.neo4j.unsafe.impl.batchimport.input.InputEntityDecorators.additiveLabels;
import static org.neo4j.unsafe.impl.batchimport.input.InputEntityDecorators.defaultRelationshipType;
import static org.neo4j.unsafe.impl.batchimport.input.csv.Configuration.COMMAS;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.data;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatNodeFileHeader;
import static org.neo4j.unsafe.impl.batchimport.input.csv.DataFactories.defaultFormatRelationshipFileHeader;
/**
* User-facing command line tool around a {@link BatchImporter}.
*/
public class ImportTool
{
enum Options
{
STORE_DIR( "into", null,
"",
"Database directory to import into. " + "Must not contain existing database." ),
NODE_DATA( "nodes", null,
"[:Label1:Label2] \"" + MULTI_FILE_DELIMITER + "" + MULTI_FILE_DELIMITER + "...\"",
"Node CSV header and data. Multiple files will be logically seen as one big file "
+ "from the perspective of the importer. "
+ "The first line must contain the header. "
+ "Multiple data sources like these can be specified in one import, "
+ "where each data source has its own header. "
+ "Note that file groups must be enclosed in quotation marks.",
true ),
RELATIONSHIP_DATA( "relationships", null,
"[:RELATIONSHIP_TYPE] \"" + MULTI_FILE_DELIMITER + "" +
MULTI_FILE_DELIMITER + "...\"",
"Relationship CSV header and data. Multiple files will be logically seen as one big file "
+ "from the perspective of the importer. "
+ "The first line must contain the header. "
+ "Multiple data sources like these can be specified in one import, "
+ "where each data source has its own header. "
+ "Note that file groups must be enclosed in quotation marks.",
true ),
DELIMITER( "delimiter", null,
"",
"Delimiter character, or 'TAB', between values in CSV data. The default option is `" + COMMAS.delimiter() + "`." ),
ARRAY_DELIMITER( "array-delimiter", null,
"",
"Delimiter character, or 'TAB', between array elements within a value in CSV data. The default option is `" + COMMAS.arrayDelimiter() + "`." ),
QUOTE( "quote", null,
"",
"Character to treat as quotation character for values in CSV data. "
+ "The default option is `" + COMMAS.quotationCharacter() + "`. "
+ "Quotes inside quotes escaped like `\"\"\"Go away\"\", he said.\"` and "
+ "`\"\\\"Go away\\\", he said.\"` are supported. "
+ "If you have set \"`'`\" to be used as the quotation character, "
+ "you could write the previous example like this instead: " + "`'\"Go away\", he said.'`" ),
MULTILINE_FIELDS( "multiline-fields", org.neo4j.csv.reader.Configuration.DEFAULT.multilineFields(),
"",
"Whether or not fields from input source can span multiple lines, i.e. contain newline characters." ),
INPUT_ENCODING( "input-encoding", null,
"",
"Character set that input data is encoded in. Provided value must be one out of the available "
+ "character sets in the JVM, as provided by Charset#availableCharsets(). "
+ "If no input encoding is provided, the default character set of the JVM will be used." ),
IGNORE_EMPTY_STRINGS( "ignore-empty-strings", org.neo4j.csv.reader.Configuration.DEFAULT.emptyQuotedStringsAsNull(),
"",
"Whether or not empty string fields, i.e. \"\" from input source are ignored, i.e. treated as null." ),
ID_TYPE( "id-type", IdType.STRING,
"",
"One out of " + Arrays.toString( IdType.values() )
+ " and specifies how ids in node/relationship "
+ "input files are treated.\n"
+ IdType.STRING + ": arbitrary strings for identifying nodes.\n"
+ IdType.INTEGER + ": arbitrary integer values for identifying nodes.\n"
+ IdType.ACTUAL + ": (advanced) actual node ids. The default option is `" + IdType.STRING + "`." ),
PROCESSORS( "processors", null,
"",
"(advanced) Max number of processors used by the importer. Defaults to the number of "
+ "available processors reported by the JVM"
+ availableProcessorsHint()
+ ". There is a certain amount of minimum threads needed so for that reason there "
+ "is no lower bound for this value. For optimal performance this value shouldn't be "
+ "greater than the number of available processors." ),
STACKTRACE( "stacktrace", null,
"",
"Enable printing of error stack traces." ),
BAD_TOLERANCE( "bad-tolerance", 1000,
"",
"Number of bad entries before the import is considered failed. This tolerance threshold is "
+ "about relationships refering to missing nodes. Format errors in input data are "
+ "still treated as errors" ),
SKIP_BAD_RELATIONSHIPS( "skip-bad-relationships", Boolean.TRUE,
"",
"Whether or not to skip importing relationships that refers to missing node ids, i.e. either "
+ "start or end node id/group referring to node that wasn't specified by the "
+ "node input data. "
+ "Skipped nodes will be logged"
+ ", containing at most number of entites specified by " + BAD_TOLERANCE.key() + "." ),
SKIP_DUPLICATE_NODES( "skip-duplicate-nodes", Boolean.FALSE,
"",
"Whether or not to skip importing nodes that have the same id/group. In the event of multiple "
+ "nodes within the same group having the same id, the first encountered will be imported "
+ "whereas consecutive such nodes will be skipped. "
+ "Skipped nodes will be logged"
+ ", containing at most number of entities specified by " + BAD_TOLERANCE.key() + "." ),
IGNORE_EXTRA_COLUMNS( "ignore-extra-columns", Boolean.FALSE,
"",
"Whether or not to ignore extra columns in the data not specified by the header. "
+ "Skipped columns will be logged, containing at most number of entities specified by "
+ BAD_TOLERANCE.key() + "." ),
DATABASE_CONFIG( "db-config", null,
"",
"(advanced) File specifying database-specific configuration. For more information consult "
+ "manual about available configuration options for a neo4j configuration file. "
+ "Only configuration affecting store at time of creation will be read. "
+ "Examples of supported config are:\n"
+ GraphDatabaseSettings.dense_node_threshold.name() + "\n"
+ GraphDatabaseSettings.string_block_size.name() + "\n"
+ GraphDatabaseSettings.array_block_size.name() );
private final String key;
private final Object defaultValue;
private final String usage;
private final String description;
private final boolean keyAndUsageGoTogether;
Options( String key, Object defaultValue, String usage, String description )
{
this( key, defaultValue, usage, description, false );
}
Options( String key, Object defaultValue, String usage, String description, boolean keyAndUsageGoTogether )
{
this.key = key;
this.defaultValue = defaultValue;
this.usage = usage;
this.description = description;
this.keyAndUsageGoTogether = keyAndUsageGoTogether;
}
String key()
{
return key;
}
String argument()
{
return "--" + key();
}
void printUsage( PrintStream out )
{
out.println( argument() + spaceInBetweenArgumentAndUsage() + usage );
for ( String line : Args.splitLongLine( descriptionWithDefaultValue().replace( "`", "" ), 80 ) )
{
out.println( "\t" + line );
}
}
private String spaceInBetweenArgumentAndUsage()
{
return keyAndUsageGoTogether ? "" : " ";
}
String descriptionWithDefaultValue()
{
String result = description;
if ( defaultValue != null )
{
if ( !result.endsWith( "." ) )
{
result += ".";
}
result += " Default value: " + defaultValue;
}
return result;
}
String manPageEntry()
{
String filteredDescription = descriptionWithDefaultValue().replace( availableProcessorsHint(), "" );
String usageString = (usage.length() > 0) ? spaceInBetweenArgumentAndUsage() + usage : "";
return "*" + argument() + usageString + "*::\n" + filteredDescription + "\n\n";
}
String manualEntry()
{
return "[[import-tool-option-" + key() + "]]\n" + manPageEntry() + "//^\n\n";
}
Object defaultValue()
{
return defaultValue;
}
private static String availableProcessorsHint()
{
return " (in your case " + Runtime.getRuntime().availableProcessors() + ")";
}
}
/**
* Delimiter used between files in an input group.
*/
static final String MULTI_FILE_DELIMITER = ",";
/**
* Runs the import tool given the supplied arguments.
*
* @param incomingArguments arguments for specifying input and configuration for the import.
*/
public static void main( String[] incomingArguments ) throws IOException
{
main( incomingArguments, false );
}
/**
* Runs the import tool given the supplied arguments.
*
* @param incomingArguments arguments for specifying input and configuration for the import.
* @param defaultSettingsSuitableForTests default configuration geared towards unit/integration
* test environments, for example lower default buffer sizes.
*/
public static void main( String[] incomingArguments, boolean defaultSettingsSuitableForTests ) throws IOException
{
Args args = Args.parse( incomingArguments );
if ( ArrayUtil.isEmpty( incomingArguments ) || asksForUsage( args ) )
{
printUsage( System.out );
return;
}
FileSystemAbstraction fs = new DefaultFileSystemAbstraction();
File storeDir;
Collection
© 2015 - 2025 Weber Informatics LLC | Privacy Policy