apoc.export.csv.CsvEntityLoader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of apoc Show documentation
Show all versions of apoc Show documentation
A collection of useful Neo4j Procedures
package apoc.export.csv;
import apoc.export.util.BatchTransaction;
import apoc.export.util.CountingReader;
import apoc.export.util.ProgressReporter;
import apoc.load.LoadCsv;
import apoc.load.util.LoadCsvConfig;
import apoc.util.FileUtils;
import com.opencsv.CSVReader;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Label;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.Relationship;
import org.neo4j.graphdb.RelationshipType;
import java.io.IOException;
import java.util.AbstractMap;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.Arrays;
public class CsvEntityLoader {
private final CsvLoaderConfig clc;
private final ProgressReporter reporter;
/**
* @param clc configuration object
* @param reporter
*/
public CsvEntityLoader(CsvLoaderConfig clc, ProgressReporter reporter) {
this.clc = clc;
this.reporter = reporter;
}
/**
* Loads nodes from a CSV file with given labels to an online database, and fills the {@code idMapping},
* which will be used by the {@link #loadRelationships(String, String, GraphDatabaseService, Map)}
* method.
*
* @param fileName URI of the CSV file representing the node
* @param labels list of node labels to be applied to each node
* @param db running database instance
* @param idMapping to be filled with the mapping between the CSV ids and the DB's internal node ids
* @throws IOException
*/
public void loadNodes(final String fileName, final List labels, final GraphDatabaseService db,
final Map> idMapping) throws IOException {
final CountingReader reader = FileUtils.readerFor(fileName);
final String header = readFirstLine(reader);
reader.skip(clc.getSkipLines() - 1);
final List fields = CsvHeaderFields.processHeader(header, clc.getDelimiter(), clc.getQuotationCharacter());
final Optional idField = fields.stream()
.filter(f -> CsvLoaderConstants.ID_FIELD.equals(f.getType()))
.findFirst();
final Optional idAttribute = idField.isPresent() ? Optional.of(idField.get().getName()) : Optional.empty();
final String idSpace = idField.isPresent() ? idField.get().getIdSpace() : CsvLoaderConstants.DEFAULT_IDSPACE;
idMapping.putIfAbsent(idSpace, new HashMap<>());
final Map idspaceIdMapping = idMapping.get(idSpace);
final Map mapping = fields.stream().collect(
Collectors.toMap(
CsvHeaderField::getName,
f -> {
final Map mappingMap = Collections
.unmodifiableMap(Stream.of(
new AbstractMap.SimpleEntry<>("type", f.getType()),
new AbstractMap.SimpleEntry<>("array", f.isArray())
).collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue)));
return new LoadCsv.Mapping(f.getName(), mappingMap, clc.getArrayDelimiter(), false);
}
)
);
final CSVReader csv = new CSVReader(reader, clc.getDelimiter(), clc.getQuotationCharacter());
final String[] loadCsvCompatibleHeader = fields.stream().map(f -> f.getName()).toArray(String[]::new);
int lineNo = 0;
try (BatchTransaction tx = new BatchTransaction(db, clc.getBatchSize(), reporter)) {
for (String[] line : csv.readAll()) {
lineNo++;
final EnumSet results = EnumSet.of(LoadCsvConfig.Results.map);
final LoadCsv.CSVResult result = new LoadCsv.CSVResult(
loadCsvCompatibleHeader, line, lineNo, false, mapping, Collections.emptyList(), results
);
final String nodeCsvId = result.map.get(idAttribute.get()).toString();
// if 'ignore duplicate nodes' is false, there is an id field and the mapping already has the current id,
// we either fail the loading process or skip it depending on the 'ignore duplicate nodes' setting
if (idField.isPresent() && idspaceIdMapping.containsKey(nodeCsvId)) {
if (clc.getIgnoreDuplicateNodes()) {
continue;
} else {
throw new IllegalStateException("Duplicate node with id " + nodeCsvId + " found on line "+lineNo+"\n"
+Arrays.toString(line));
}
}
// create node and add its id to the mapping
final Node node = db.createNode();
if (idField.isPresent()) {
idspaceIdMapping.put(nodeCsvId, node.getId());
}
// add labels
for (String label : labels) {
node.addLabel(Label.label(label));
}
// add properties
int props = 0;
for (CsvHeaderField field : fields) {
final String name = field.getName();
Object value = result.map.get(name);
if (field.isMeta()) {
final List customLabels = (List) value;
for (String customLabel : customLabels) {
node.addLabel(Label.label(customLabel));
}
} else if (field.isId()) {
final Object idValue;
if (clc.getStringIds()) {
idValue = value;
} else {
idValue = Long.valueOf((String) value);
}
node.setProperty(field.getName(), idValue);
props++;
} else {
boolean propertyAdded = CsvPropertyConverter.addPropertyToGraphEntity(node, field, value);
props += propertyAdded ? 1 : 0;
}
}
reporter.update(1, 0, props++);
}
}
}
/**
* Loads relationships from a CSV file with given relationship types to an online database,
* using the {@code idMapping} created by the
* {@link #loadNodes(String, List, GraphDatabaseService, Map)} method.
*
* @param fileName URI of the CSV file representing the relationship
* @param type relationship type to be applied to each relationships
* @param db running database instance
* @param idMapping stores mapping between the CSV ids and the DB's internal node ids
* @throws IOException
*/
public void loadRelationships(
final String fileName, final String type, final GraphDatabaseService db,
final Map> idMapping) throws IOException {
final CountingReader reader = FileUtils.readerFor(fileName);
final String header = readFirstLine(reader);
final List fields = CsvHeaderFields.processHeader(header, clc.getDelimiter(), clc.getQuotationCharacter());
final CsvHeaderField startIdField = fields.stream()
.filter(f -> CsvLoaderConstants.START_ID_FIELD.equals(f.getType()))
.findFirst().get();
final CsvHeaderField endIdField = fields.stream()
.filter(f -> CsvLoaderConstants.END_ID_FIELD.equals(f.getType()))
.findFirst().get();
final List edgePropertiesFields = fields.stream()
.filter(field -> !CsvLoaderConstants.START_ID_FIELD.equals(field.getType()))
.filter(field -> !CsvLoaderConstants.END_ID_FIELD.equals(field.getType()))
.collect(Collectors.toList());
final Map mapping = fields.stream().collect(
Collectors.toMap(
CsvHeaderField::getName,
f -> {
final Map mappingMap = Collections
.unmodifiableMap(Stream.of(
new AbstractMap.SimpleEntry<>("type", f.getType()),
new AbstractMap.SimpleEntry<>("array", f.isArray())
).collect(Collectors.toMap(AbstractMap.SimpleEntry::getKey, AbstractMap.SimpleEntry::getValue)));
return new LoadCsv.Mapping(f.getName(), mappingMap, clc.getArrayDelimiter(), false);
}
)
);
final CSVReader csv = new CSVReader(reader, clc.getDelimiter());
final String[] loadCsvCompatibleHeader = fields.stream().map(f -> f.getName()).toArray(String[]::new);
int lineNo = 0;
try (BatchTransaction tx = new BatchTransaction(db, clc.getBatchSize(), reporter)) {
for (String[] line : csv.readAll()) {
lineNo++;
final EnumSet results = EnumSet.of(LoadCsvConfig.Results.map);
final LoadCsv.CSVResult result = new LoadCsv.CSVResult(
loadCsvCompatibleHeader, line, lineNo, false, mapping, Collections.emptyList(), results
);
final Object startId = result.map.get(CsvLoaderConstants.START_ID_ATTR);
final Object startInternalId = idMapping.get(startIdField.getIdSpace()).get(startId);
if (startInternalId == null) {
throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id " + startId + " not found");
}
final Node source = db.getNodeById((long) startInternalId);
final Object endId = result.map.get(CsvLoaderConstants.END_ID_ATTR);
final Object endInternalId = idMapping.get(endIdField.getIdSpace()).get(endId);
if (endInternalId == null) {
throw new IllegalStateException("Node for id space " + endIdField.getIdSpace() + " and id " + endId + " not found");
}
final Node target = db.getNodeById((long) endInternalId);
final String currentType;
final Object overridingType = result.map.get(CsvLoaderConstants.TYPE_ATTR);
if (overridingType != null && !((String) overridingType).isEmpty()) {
currentType = (String) overridingType;
} else {
currentType = type;
}
final Relationship rel = source.createRelationshipTo(target, RelationshipType.withName(currentType));
// add properties
int props = 0;
for (CsvHeaderField field : edgePropertiesFields) {
final String name = field.getName();
Object value = result.map.get(name);
boolean propertyAdded = CsvPropertyConverter.addPropertyToGraphEntity(rel, field, value);
props += propertyAdded ? 1 : 0;
}
reporter.update(0, 1, props);
}
}
}
private static String readFirstLine(CountingReader reader) throws IOException {
String line = "";
int i;
while ((i = reader.read()) != 0) {
char c = (char) i;
if (c == '\n') break;
line += c;
}
return line;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy