All Downloads are FREE. Search and download functionalities are using the official Maven repository.

apoc.load.LoadXls Maven / Gradle / Ivy

There is a newer version: 4.4.0.34
Show newest version
/*
 * Copyright (c) "Neo4j"
 * Neo4j Sweden AB [http://neo4j.com]
 *
 * This file is part of Neo4j.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package apoc.load;

import static apoc.export.xls.ExportXlsHandler.XLS_MISSING_DEPS_ERROR;
import static apoc.load.LoadXlsHandler.getXlsSpliterator;
import static apoc.util.DateParseUtil.dateParse;
import static apoc.util.Util.*;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyList;

import apoc.Extended;
import apoc.export.util.CountingInputStream;
import apoc.meta.Meta;
import apoc.util.FileUtils;
import apoc.util.MissingDependencyException;
import apoc.util.Util;
import java.time.*;
import java.time.temporal.TemporalAccessor;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import java.util.stream.StreamSupport;
import org.apache.commons.lang3.StringUtils;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.procedure.Context;
import org.neo4j.procedure.Description;
import org.neo4j.procedure.Name;
import org.neo4j.procedure.Procedure;

@Extended
public class LoadXls {

    public static final char DEFAULT_ARRAY_SEP = ';';

    @Context
    public GraphDatabaseService db;

    enum Results {
        map,
        list,
        strings,
        stringMap
    }

    // Sheet1!$A$1:$F$1
    static class Selection {
        private static final Pattern PATTERN =
                Pattern.compile("([a-z]+)(\\d+)?(?::([a-z]+)(\\d+)?)?", Pattern.CASE_INSENSITIVE);
        private static final int DEFAULT = -1;
        String sheet;
        int top = DEFAULT;
        int left = DEFAULT;
        int bottom = DEFAULT;
        int right = DEFAULT;

        public Selection(String selector) {
            String[] parts = selector.split("!");
            sheet = parts[0];
            if (parts.length > 1 && !parts[1].trim().isEmpty()) {
                String range = parts[1].trim().replace("$", "");
                Matcher matcher = PATTERN.matcher(range);
                if (matcher.matches()) {
                    left = toCol(matcher.group(1), DEFAULT);
                    top = toRow(matcher.group(2), DEFAULT);
                    right = toCol(matcher.group(3), left);
                    if (right != DEFAULT) right++;
                    bottom = toRow(matcher.group(4), DEFAULT);
                }
            }
        }

        int getOrDefault(int value, int given) {
            return value == DEFAULT ? given : value;
        }

        private int toCol(String col, int defaultValue) {
            if (col == null || col.trim().isEmpty()) return defaultValue;
            AtomicInteger index = new AtomicInteger(0);
            return Stream.of(col.trim().toUpperCase().split(""))
                    .map(str -> new AbstractMap.SimpleEntry<>(index.getAndIncrement(), str.charAt(0) - 65))
                    .map(e -> (26 * e.getKey()) + e.getValue())
                    .reduce(0, Math::addExact);
        }

        private int toRow(String row, int defaultValue) {
            if (row == null || row.trim().isEmpty()) return defaultValue;
            row = row.trim();
            try {
                return Integer.parseInt(row) - 1;
            } catch (NumberFormatException nfe) {
                return (short) defaultValue;
            }
        }

        public void updateVertical(int firstRowNum, int lastRowNum) {
            if (top == DEFAULT) top = firstRowNum;
            if (bottom == DEFAULT) bottom = lastRowNum;
        }

        public void updateHorizontal(short firstCellNum, short lastCellNum) {
            if (left == DEFAULT) left = firstCellNum;
            if (right == DEFAULT) right = lastCellNum;
        }
    }

    @Procedure("apoc.load.xls")
    @Description(
            "apoc.load.xls('url','selector',{config}) YIELD lineNo, list, map - load XLS fom URL as stream of row values,\n config contains any of: {skip:1,limit:5,header:false,ignore:['tmp'],arraySep:';',mapping:{years:{type:'int',arraySep:'-',array:false,name:'age',ignore:false, dateFormat:'iso_date', dateParse:['dd-MM-yyyy']}}")
    public Stream xls(
            @Name("url") String url,
            @Name("selector") String selector,
            @Name(value = "config", defaultValue = "{}") Map config) {
        boolean failOnError = booleanValue(config, "failOnError", true);
        try (CountingInputStream stream = FileUtils.inputStreamFor(url, null, null, null)) {
            Selection selection = new Selection(selector);

            char arraySep = separator(config, "arraySep", DEFAULT_ARRAY_SEP);
            long skip = longValue(config, "skip", 0L);
            boolean hasHeader = booleanValue(config, "header", true);
            boolean skipNulls = booleanValue(config, "skipNulls", false);
            long limit = longValue(config, "limit", Long.MAX_VALUE);

            List ignore = value(config, "ignore", emptyList());
            List nullValues = value(config, "nullValues", emptyList());
            Map> mapping = value(config, "mapping", Collections.emptyMap());
            Map mappings = createMapping(mapping, arraySep, ignore);

            LoadXlsHandler.XLSSpliterator xlsSpliterator = getXlsSpliterator(
                    url, stream, selection, skip, hasHeader, limit, ignore, nullValues, mappings, skipNulls);
            return StreamSupport.stream(xlsSpliterator, false);
        } catch (NoClassDefFoundError e) {
            throw new MissingDependencyException(XLS_MISSING_DEPS_ERROR);
        } catch (Exception e) {
            if (!failOnError)
                return Stream.of(
                        new XLSResult(new String[0], new Object[0], 0, true, Collections.emptyMap(), emptyList()));
            else throw new RuntimeException("Can't read XLS from URL " + cleanUrl(url), e);
        }
    }

    private Map createMapping(
            Map> mapping, char arraySep, List ignore) {
        if (mapping.isEmpty()) return Collections.emptyMap();
        HashMap result = new HashMap<>(mapping.size());
        for (Map.Entry> entry : mapping.entrySet()) {
            String name = entry.getKey();
            result.put(name, new Mapping(name, entry.getValue(), arraySep, ignore.contains(name)));
        }
        return result;
    }

    static class Mapping {
        public static final Mapping EMPTY = new Mapping("", Collections.emptyMap(), DEFAULT_ARRAY_SEP, false);
        final String name;
        final Collection nullValues;
        final Meta.Types type;
        final boolean array;
        final boolean ignore;
        final char arraySep;
        final String dateFormat;
        private final String[] dateParse;
        private final Pattern arrayPattern;

        public Mapping(String name, Map mapping, char arraySep, boolean ignore) {
            this.name = mapping.getOrDefault("name", name).toString();
            this.array = (Boolean) mapping.getOrDefault("array", false);
            this.ignore = (Boolean) mapping.getOrDefault("ignore", ignore);
            this.nullValues = (Collection) mapping.getOrDefault("nullValues", emptyList());
            this.arraySep = separator(mapping.getOrDefault("arraySep", arraySep).toString(), DEFAULT_ARRAY_SEP);
            this.type = Meta.Types.from(mapping.getOrDefault("type", "STRING").toString());
            this.arrayPattern = Pattern.compile(String.valueOf(this.arraySep), Pattern.LITERAL);
            this.dateFormat =
                    mapping.getOrDefault("dateFormat", StringUtils.EMPTY).toString();
            this.dateParse = convertFormat(mapping.getOrDefault("dateParse", null));
        }

        public Object convert(Object value) {
            return array ? convertArray(value) : convertType(value);
        }

        private Object convertArray(Object value) {
            if (value == null) return emptyList();
            String[] values = arrayPattern.split(value.toString());
            List result = new ArrayList<>(values.length);
            for (String v : values) {
                result.add(convertType(v));
            }
            return result;
        }

        private Object convertType(Object value) {
            if (nullValues.contains(value) || value == null) return null;
            switch (type) { // Destination Type
                case STRING:
                    if (value instanceof TemporalAccessor && !dateFormat.isEmpty()) {
                        return dateFormat((TemporalAccessor) value, dateFormat);
                    } else {
                        return value.toString();
                    }
                case INTEGER:
                    return Util.toLong(value);
                case FLOAT:
                    return Util.toDouble(value);
                case BOOLEAN:
                    return Util.toBoolean(value);
                case NULL:
                    return null;
                case LIST:
                    return Arrays.stream(arrayPattern.split(value.toString()))
                            .map(this::convertType)
                            .collect(Collectors.toList());
                case DATE:
                    return dateParse(value.toString(), LocalDate.class, dateParse);
                case DATE_TIME:
                    return dateParse(value.toString(), ZonedDateTime.class, dateParse);
                case LOCAL_DATE_TIME:
                    return dateParse(value.toString(), LocalDateTime.class, dateParse);
                case LOCAL_TIME:
                    return dateParse(value.toString(), LocalTime.class, dateParse);
                case TIME:
                    return dateParse(value.toString(), OffsetTime.class, dateParse);
                case DURATION:
                    return durationParse(value.toString());
                default:
                    return value;
            }
        }
    }

    private static String[] convertFormat(Object value) {
        if (value == null) return null;
        if (!(value instanceof List)) throw new RuntimeException("Only array of Strings are allowed!");
        List strings = (List) value;
        return strings.toArray(new String[strings.size()]);
    }

    private boolean booleanValue(Map config, String key, boolean defaultValue) {
        if (config == null || !config.containsKey(key)) return defaultValue;
        Object value = config.get(key);
        if (value instanceof Boolean) return ((Boolean) value);
        return Boolean.parseBoolean(value.toString());
    }

    private long longValue(Map config, String key, long defaultValue) {
        if (config == null || !config.containsKey(key)) return defaultValue;
        Object value = config.get(key);
        if (value instanceof Number) return ((Number) value).longValue();
        return Long.parseLong(value.toString());
    }

    private  T value(Map config, String key, T defaultValue) {
        if (config == null || !config.containsKey(key)) return defaultValue;
        return (T) config.get(key);
    }

    private char separator(Map config, String key, char defaultValue) {
        if (config == null) return defaultValue;
        Object value = config.get(key);
        if (value == null) return defaultValue;
        return separator(value.toString(), defaultValue);
    }

    private static char separator(String separator, char defaultSep) {
        if (separator == null) return defaultSep;
        if ("TAB".equalsIgnoreCase(separator)) return '\t';
        return separator.charAt(0);
    }

    public static class XLSResult {
        public long lineNo;
        public List list;
        public Map map;

        public XLSResult(
                String[] header,
                Object[] list,
                long lineNo,
                boolean ignore,
                Map mapping,
                List nullValues) {
            this.lineNo = lineNo;
            removeNullValues(list, nullValues);

            this.map = createMap(header, list, ignore, mapping);
            this.list = createList(header, list, ignore, mapping);
        }

        public void removeNullValues(Object[] list, List nullValues) {
            if (nullValues.isEmpty()) return;
            for (int i = 0; i < list.length; i++) {
                if (list[i] != null && nullValues.contains(list[i])) list[i] = null;
            }
        }

        private List createList(String[] header, Object[] list, boolean ignore, Map mappings) {
            if (!ignore && mappings.isEmpty()) return asList((Object[]) list);
            ArrayList result = new ArrayList<>(list.length);
            for (int i = 0; i < header.length; i++) {
                String name = header[i];
                if (name == null) continue;
                Mapping mapping = mappings.get(name);
                if (mapping != null) {
                    if (mapping.ignore) continue;
                    result.add(mapping.convert(list[i]));
                } else {
                    result.add(list[i]);
                }
            }
            return result;
        }

        private Map createMap(
                String[] header, Object[] list, boolean ignore, Map mappings) {
            if (header == null) return null;
            Map map = new LinkedHashMap<>(header.length, 1f);
            for (int i = 0; i < header.length; i++) {
                String name = header[i];
                if (ignore && name == null) continue;
                Mapping mapping = mappings.get(name);
                if (mapping == null) {
                    map.put(name, list[i]);
                } else {
                    if (mapping.ignore) continue;
                    map.put(mapping.name, mapping.convert(list[i]));
                }
            }
            return map;
        }
    }
}