
apoc.load.LoadCsv Maven / Gradle / Ivy
package apoc.load;
import apoc.export.util.CountingReader;
import apoc.export.util.FileUtils;
import apoc.meta.Meta;
import au.com.bytecode.opencsv.CSVReader;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.procedure.Context;
import org.neo4j.procedure.Description;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.Procedure;
import java.io.IOException;
import java.util.*;
import java.util.function.Consumer;
import java.util.regex.Pattern;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import static apoc.util.Util.cleanUrl;
import static java.util.Arrays.asList;
public class LoadCsv {
public static final char DEFAULT_ARRAY_SEP = ';';
public static final char DEFAULT_SEP = ',';
@Context
public GraphDatabaseService db;
@Procedure
@Description("apoc.load.csv('url',{config}) YIELD lineNo, list, map - load CSV fom URL as stream of values,\n config contains any of: {skip:1,limit:5,header:false,sep:'TAB',ignore:['tmp'],arraySep:';',mapping:{years:{type:'int',arraySep:'-',array:false,name:'age',ignore:false}}")
public Stream csv(@Name("url") String url, @Name("config") Map config) {
try {
CountingReader reader = FileUtils.readerFor(url);
char separator = separator(config, "sep", DEFAULT_SEP);
char arraySep = separator(config, "arraySep", DEFAULT_ARRAY_SEP);
long skip = longValue(config, "skip", 0L);
boolean hasHeader = booleanValue(config, "header", true);
long limit = longValue(config, "limit", Long.MAX_VALUE);
List ignore = value(config, "ignore", Collections.emptyList());
Map mappings = createMapping(value(config, "mapping", Collections.emptyMap()), arraySep, ignore);
CSVReader csv = new CSVReader(reader, separator);
String[] header = getHeader(hasHeader, csv, ignore, mappings);
boolean checkIgnore = !ignore.isEmpty() || mappings.values().stream().anyMatch( m -> m.ignore);
return StreamSupport.stream(new CSVSpliterator(csv, header, url, skip, limit, checkIgnore,mappings), false);
} catch (IOException e) {
throw new RuntimeException("Can't read CSV from URL " + cleanUrl(url), e);
}
}
private Map createMapping(Map> mapping, char arraySep, List ignore) {
if (mapping.isEmpty()) return Collections.emptyMap();
HashMap result = new HashMap<>(mapping.size());
for (Map.Entry> entry : mapping.entrySet()) {
String name = entry.getKey();
result.put(name, new Mapping(name, entry.getValue(), arraySep, ignore.contains(name)));
}
return result;
}
static class Mapping {
public static final Mapping EMPTY = new Mapping("", Collections.emptyMap(), DEFAULT_ARRAY_SEP, false);
final String name;
final Meta.Types type;
final boolean array;
final boolean ignore;
final char arraySep;
private final String arrayPattern;
public Mapping(String name, Map mapping, char arraySep, boolean ignore) {
this.name = mapping.getOrDefault("name", name).toString();
this.array = (Boolean) mapping.getOrDefault("array", false);
this.ignore = (Boolean) mapping.getOrDefault("ignore", ignore);
this.arraySep = separator(mapping.getOrDefault("arraySep", arraySep).toString(),DEFAULT_ARRAY_SEP);
this.type = Meta.Types.from(mapping.getOrDefault("type", "STRING").toString());
this.arrayPattern = Pattern.compile(String.valueOf(this.arraySep), Pattern.LITERAL).toString();
}
public Object convert(String value) {
return array ? convertArray(value) : convertType(value);
}
private Object convertArray(String value) {
String[] values = value.split(arrayPattern);
List
© 2015 - 2025 Weber Informatics LLC | Privacy Policy