apoc.export.csv.CsvEntityLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of apoc-core Show documentation
Show all versions of apoc-core Show documentation
Core package for Neo4j Procedures
/*
* Copyright (c) "Neo4j"
* Neo4j Sweden AB [http://neo4j.com]
*
* This file is part of Neo4j.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package apoc.export.csv;
import apoc.export.util.BatchTransaction;
import apoc.export.util.CountingReader;
import apoc.export.util.ProgressReporter;
import apoc.load.CSVResult;
import apoc.load.Mapping;
import apoc.load.util.Results;
import apoc.util.FileUtils;
import com.opencsv.CSVReader;
import com.opencsv.CSVReaderBuilder;
import com.opencsv.RFC4180ParserBuilder;
import java.io.IOException;
import java.net.URISyntaxException;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.neo4j.graphdb.*;
import org.neo4j.graphdb.security.URLAccessChecker;
import org.neo4j.graphdb.security.URLAccessValidationError;
import org.neo4j.logging.Log;
public class CsvEntityLoader {
private final CsvLoaderConfig clc;
private final ProgressReporter reporter;
private final Log log;
private final URLAccessChecker urlAccessChecker;
/**
* @param clc configuration object
* @param reporter
*/
public CsvEntityLoader(CsvLoaderConfig clc, ProgressReporter reporter, Log log, URLAccessChecker urlAccessChecker) {
this.clc = clc;
this.reporter = reporter;
this.log = log;
this.urlAccessChecker = urlAccessChecker;
}
/**
* Loads nodes from a CSV file with given labels to an online database, and fills the {@code idMapping},
* which will be used by the {@link #loadRelationships(Object, String, GraphDatabaseService, Map)}
* method.
*
* @param fileName URI/Binary of the CSV file representing the node
* @param labels list of node labels to be applied to each node
* @param db running database instance
* @param idMapping to be filled with the mapping between the CSV ids and the DB's internal node ids
* @throws IOException
*/
public void loadNodes(
final Object fileName,
final List labels,
final GraphDatabaseService db,
final Map> idMapping)
throws IOException, URISyntaxException, URLAccessValidationError {
try (final CountingReader reader = FileUtils.readerFor(fileName, clc.getCompressionAlgo(), urlAccessChecker)) {
final String header = readFirstLine(reader);
final List fields =
CsvHeaderFields.processHeader(header, clc.getDelimiter(), clc.getQuotationCharacter());
final Optional idField = fields.stream()
.filter(f -> CsvLoaderConstants.ID_FIELD.equals(f.getType()))
.findFirst();
if (!idField.isPresent()) {
log.warn(
"Please note that if no ID is specified, the node will be imported but it will not be able to be connected by any relationships during the import");
}
final Optional idAttribute =
idField.isPresent() ? Optional.of(idField.get().getName()) : Optional.empty();
final String idSpace =
idField.isPresent() ? idField.get().getIdSpace() : CsvLoaderConstants.DEFAULT_IDSPACE;
idMapping.putIfAbsent(idSpace, new HashMap<>());
final Map idspaceIdMapping = idMapping.get(idSpace);
final Map mapping = getMapping(fields);
final CSVReader csv = new CSVReaderBuilder(reader)
.withCSVParser(new RFC4180ParserBuilder()
.withSeparator(clc.getDelimiter())
.withQuoteChar(clc.getQuotationCharacter())
.build())
.withSkipLines(clc.getSkipLines() - 1)
.build();
final String[] loadCsvCompatibleHeader =
fields.stream().map(f -> f.getName()).toArray(String[]::new);
AtomicInteger lineNo = new AtomicInteger();
BatchTransaction btx = new BatchTransaction(db, clc.getBatchSize(), reporter);
try {
csv.forEach(line -> {
lineNo.getAndIncrement();
final EnumSet results = EnumSet.of(Results.map);
final CSVResult result = new CSVResult(
loadCsvCompatibleHeader,
line,
lineNo.get(),
false,
mapping,
Collections.emptyList(),
results);
final String nodeCsvId =
(String) idAttribute.map(result.map::get).orElse(null);
// if 'ignore duplicate nodes' is false, there is an id field and the mapping already has the
// current id,
// we either fail the loading process or skip it depending on the 'ignore duplicate nodes' setting
if (idField.isPresent() && idspaceIdMapping.containsKey(nodeCsvId)) {
if (clc.getIgnoreDuplicateNodes()) {
return;
} else {
throw new IllegalStateException("Duplicate node with id " + nodeCsvId + " found on line "
+ lineNo + "\n" + Arrays.toString(line));
}
}
// create node and add its id to the mapping
final Node node = btx.getTransaction().createNode();
if (idField.isPresent()) {
idspaceIdMapping.put(nodeCsvId, node.getElementId());
}
// add labels
for (String label : labels) {
node.addLabel(Label.label(label));
}
// add properties
int props = 0;
for (CsvHeaderField field : fields) {
final String name = field.getName();
Object value = result.map.get(name);
if (field.isMeta()) {
final List customLabels = (List) value;
for (String customLabel : customLabels) {
node.addLabel(Label.label(customLabel));
}
} else if (field.isId()) {
final Object idValue;
if (clc.getStringIds()) {
idValue = value;
} else {
idValue = Long.valueOf((String) value);
}
node.setProperty(field.getName(), idValue);
props++;
} else {
boolean propertyAdded =
CsvPropertyConverter.addPropertyToGraphEntity(node, field, value, clc);
props += propertyAdded ? 1 : 0;
}
}
btx.increment();
reporter.update(1, 0, props++);
});
btx.doCommit();
} catch (RuntimeException e) {
btx.rollback();
throw e;
} finally {
btx.close();
}
}
}
/**
* Loads relationships from a CSV file with given relationship types to an online database,
* using the {@code idMapping} created by the
* {@link #loadNodes(Object, List, GraphDatabaseService, Map)} method.
*
* @param data URI / Binary of the CSV file representing the relationship
* @param type relationship type to be applied to each relationships
* and a key type, that is the relationship type to be applied to each relationships
* @param db running database instance
* @param idMapping stores mapping between the CSV ids and the DB's internal node ids
* @throws IOException
*/
public void loadRelationships(
final Object data,
final String type,
final GraphDatabaseService db,
final Map> idMapping)
throws IOException, URISyntaxException, URLAccessValidationError {
try (final CountingReader reader = FileUtils.readerFor(data, clc.getCompressionAlgo(), urlAccessChecker)) {
final String header = readFirstLine(reader);
final List fields =
CsvHeaderFields.processHeader(header, clc.getDelimiter(), clc.getQuotationCharacter());
final CsvHeaderField startIdField = fields.stream()
.filter(f -> CsvLoaderConstants.START_ID_FIELD.equals(f.getType()))
.findFirst()
.get();
final CsvHeaderField endIdField = fields.stream()
.filter(f -> CsvLoaderConstants.END_ID_FIELD.equals(f.getType()))
.findFirst()
.get();
final List edgePropertiesFields = fields.stream()
.filter(field -> !CsvLoaderConstants.START_ID_FIELD.equals(field.getType()))
.filter(field -> !CsvLoaderConstants.END_ID_FIELD.equals(field.getType()))
.collect(Collectors.toList());
final Map mapping = getMapping(fields);
try (final var csv = new CSVReaderBuilder(reader)
.withCSVParser(new RFC4180ParserBuilder()
.withSeparator(clc.getDelimiter())
.withQuoteChar(clc.getQuotationCharacter())
.build())
.withSkipLines(clc.getSkipLines() - 1)
.build()) {
final String[] loadCsvCompatibleHeader =
fields.stream().map(f -> f.getName()).toArray(String[]::new);
AtomicInteger lineNo = new AtomicInteger();
BatchTransaction btx = new BatchTransaction(db, clc.getBatchSize(), reporter);
try {
csv.forEach(line -> {
lineNo.getAndIncrement();
final EnumSet results = EnumSet.of(Results.map);
final CSVResult result = new CSVResult(
loadCsvCompatibleHeader,
line,
lineNo.get(),
false,
mapping,
Collections.emptyList(),
results);
final Object startId = result.map.get(CsvLoaderConstants.START_ID_ATTR);
final Object startInternalId =
idMapping.get(startIdField.getIdSpace()).get(startId.toString());
if (startInternalId == null) {
throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id "
+ startId + " not found");
}
final Node source = btx.getTransaction().getNodeByElementId(startInternalId.toString());
final Object endId = result.map.get(CsvLoaderConstants.END_ID_ATTR);
final Object endInternalId =
idMapping.get(endIdField.getIdSpace()).get(endId.toString());
if (endInternalId == null) {
throw new IllegalStateException(
"Node for id space " + endIdField.getIdSpace() + " and id " + endId + " not found");
}
final Node target = btx.getTransaction().getNodeByElementId(endInternalId.toString());
final String currentType;
final Object overridingType = result.map.get(CsvLoaderConstants.TYPE_ATTR);
if (overridingType != null && !((String) overridingType).isEmpty()) {
currentType = (String) overridingType;
} else {
currentType = type;
}
final Relationship rel =
source.createRelationshipTo(target, RelationshipType.withName(currentType));
// add properties
int props = 0;
for (CsvHeaderField field : edgePropertiesFields) {
final String name = field.getName();
Object value = result.map.get(name);
boolean propertyAdded =
CsvPropertyConverter.addPropertyToGraphEntity(rel, field, value, clc);
props += propertyAdded ? 1 : 0;
}
btx.increment();
reporter.update(0, 1, props);
});
btx.doCommit();
} catch (RuntimeException e) {
btx.rollback();
throw e;
} finally {
btx.close();
}
}
}
}
private Map getMapping(List fields) {
return fields.stream().collect(Collectors.toMap(CsvHeaderField::getName, f -> {
final Map mappingMap = Collections.unmodifiableMap(Stream.of(
new AbstractMap.SimpleEntry<>("type", f.getType()),
new AbstractMap.SimpleEntry<>("array", f.isArray()),
new AbstractMap.SimpleEntry<>("optionalData", f.getOptionalData()))
.collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue)));
return new Mapping(f.getName(), mappingMap, clc.getArrayDelimiter(), false);
}));
}
private static String readFirstLine(CountingReader reader) throws IOException {
String line = "";
int i;
while ((i = reader.read()) != 0) {
char c = (char) i;
if (c == '\n') break;
line += c;
}
return line;
}
}