All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.datasonnet.plugins.DefaultCSVFormatPlugin Maven / Gradle / Ivy

package com.datasonnet.plugins;

/*-
 * Copyright 2019-2021 the original author or authors.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import com.datasonnet.document.DefaultDocument;
import com.datasonnet.document.Document;
import com.datasonnet.document.MediaType;
import com.datasonnet.document.MediaTypes;
import com.datasonnet.spi.PluginException;
import com.datasonnet.spi.ujsonUtils;
import com.fasterxml.jackson.core.JsonProcessingException;
import com.fasterxml.jackson.databind.JsonNode;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.fasterxml.jackson.dataformat.csv.CsvParser;
import com.fasterxml.jackson.dataformat.csv.CsvSchema;
import ujson.Value;

import javax.swing.text.html.Option;
import java.io.BufferedOutputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
import java.util.Map;
import java.util.Optional;

public class DefaultCSVFormatPlugin extends BaseJacksonDataFormatPlugin {
    public static final String DS_PARAM_USE_HEADER = "useheader";
    public static final String DS_PARAM_QUOTE_CHAR = "quote";
    public static final String DS_PARAM_SEPARATOR_CHAR = "separator";
    public static final String DS_PARAM_ESCAPE_CHAR = "escape";
    public static final String DS_PARAM_NEW_LINE = "newline";
    public static final String DS_PARAM_HEADERS = "headers";
    public static final String DS_PARAM_DISABLE_QUOTES = "disablequotes";

    private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper();
    private static final CsvMapper CSV_MAPPER = new CsvMapper();

    static {
        CSV_MAPPER.enable(CsvParser.Feature.WRAP_AS_ARRAY);
    }

    public DefaultCSVFormatPlugin() {
        supportedTypes.add(MediaTypes.APPLICATION_CSV);
        supportedTypes.add(MediaType.parseMediaType("text/csv"));

        readerParams.add(DS_PARAM_USE_HEADER);
        readerParams.add(DS_PARAM_QUOTE_CHAR);
        readerParams.add(DS_PARAM_SEPARATOR_CHAR);
        readerParams.add(DS_PARAM_ESCAPE_CHAR);
        readerParams.add(DS_PARAM_NEW_LINE);
        readerParams.add(DS_PARAM_HEADERS);
        readerParams.add(DS_PARAM_DISABLE_QUOTES);

        writerParams.addAll(readerParams);

        readerSupportedClasses.add(InputStream.class);
        readerSupportedClasses.add(byte[].class);
        readerSupportedClasses.add(String.class);

        writerSupportedClasses.add(OutputStream.class);
        writerSupportedClasses.add(byte[].class);
        writerSupportedClasses.add(String.class);
    }

    private boolean isUseHeader(MediaType mediaType) {
        if (mediaType.getParameter(DS_PARAM_USE_HEADER) != null) {
            return Boolean.parseBoolean(mediaType.getParameter(DS_PARAM_USE_HEADER));
        }
        return true;
    }

    @Override
    public Value read(Document doc) throws PluginException {
        if (doc.getContent() == null) {
            return ujson.Null$.MODULE$;
        }

        CsvSchema.Builder builder = this.getBuilder(doc.getMediaType());
        boolean useHeader = isUseHeader(doc.getMediaType());
        CsvSchema csvSchema = builder.build();

        // Read data from CSV file
        try {
            if (String.class.isAssignableFrom(doc.getContent().getClass())) {
                JsonNode result = CSV_MAPPER
                        .readerFor(useHeader ? Map.class : List.class)
                        .with(csvSchema)
                        .readTree((String) doc.getContent());

                return ujsonFrom(result);
            } else if (byte[].class.isAssignableFrom(doc.getContent().getClass())) {
                JsonNode result = CSV_MAPPER
                        .readerFor(useHeader ? Map.class : List.class)
                        .with(csvSchema)
                        .readTree((byte[]) doc.getContent());

                return ujsonFrom(result);
            } else if (InputStream.class.isAssignableFrom(doc.getContent().getClass())) {
                JsonNode result = CSV_MAPPER
                        .readerFor(useHeader ? Map.class : List.class)
                        .with(csvSchema)
                        .readTree((InputStream) doc.getContent());

                return ujsonFrom(result);
            } else {
                throw new PluginException(new IllegalArgumentException("Unsupported document content class, use the test method canRead before invoking read"));
            }
        } catch (JsonProcessingException jpe) {
            throw new PluginException("Unable to convert CSV to JSON", jpe);
        } catch (IOException ioe) {
            throw new PluginException("Unable to read CSV input", ioe);
        }
    }

    @SuppressWarnings("unchecked")
    @Override
    public  Document write(Value input, MediaType mediaType, Class targetType) throws PluginException {
        Map params = mediaType.getParameters();
        CsvSchema.Builder builder = this.getBuilder(mediaType);

        try {
            final JsonNode jsonTree = OBJECT_MAPPER.valueToTree(ujsonUtils.javaObjectFrom(input));
            if (isUseHeader(mediaType)) {
                if (params.containsKey(DS_PARAM_HEADERS)) {
                    String[] headers = params.get(DS_PARAM_HEADERS).split(",");
                    for (String header : headers) {
                        builder.addColumn(header);
                    }
                } else {
                    JsonNode firstObject = jsonTree.elements().next();
                    firstObject.fieldNames().forEachRemaining(builder::addColumn);
                }
            }

            CsvSchema csvSchema = builder.build();

            if (targetType.isAssignableFrom(String.class)) {
                return (Document) new DefaultDocument<>(CSV_MAPPER.writerFor(JsonNode.class)
                        .with(csvSchema)
                        .writeValueAsString(jsonTree), MediaTypes.APPLICATION_CSV);
            }

            if (targetType.isAssignableFrom(OutputStream.class)) {
                OutputStream out = new BufferedOutputStream(new ByteArrayOutputStream());
                CSV_MAPPER.writerFor(JsonNode.class)
                        .with(csvSchema)
                        .writeValue(out, jsonTree);
                return (Document) new DefaultDocument<>(out, MediaTypes.APPLICATION_CSV);
            }

            if (targetType.isAssignableFrom(byte[].class)) {
                return (Document) new DefaultDocument<>(CSV_MAPPER.writerFor(JsonNode.class)
                        .with(csvSchema)
                        .writeValueAsBytes(jsonTree), MediaTypes.APPLICATION_CSV);
            }

            throw new PluginException(new IllegalArgumentException("Unsupported document content class, use the test method canWrite before invoking write"));

        } catch (IOException e) {
            throw new PluginException("Unable to processing CSV", e);
        }
    }

    private CsvSchema.Builder getBuilder(MediaType mediaType) {
        CsvSchema.Builder builder = CsvSchema.builder();

        String useHeadrStr = mediaType.getParameter(DS_PARAM_USE_HEADER);
        boolean useHeader = Boolean.parseBoolean(Optional.ofNullable(useHeadrStr).orElse("true"));
        builder.setUseHeader(useHeader);

        String disableQuotesStr = mediaType.getParameter(DS_PARAM_DISABLE_QUOTES);
        boolean disableQuotes = Boolean.parseBoolean(Optional.ofNullable(disableQuotesStr).orElse("false"));

        if(disableQuotes) {
            builder.disableQuoteChar();
        } else if (mediaType.getParameter(DS_PARAM_QUOTE_CHAR) != null) {
            builder.setQuoteChar(mediaType.getParameter(DS_PARAM_QUOTE_CHAR).charAt(0));
        }

        if (mediaType.getParameter(DS_PARAM_SEPARATOR_CHAR) != null) {
            builder.setColumnSeparator(mediaType.getParameter(DS_PARAM_SEPARATOR_CHAR).charAt(0));
        }

        if (mediaType.getParameter(DS_PARAM_ESCAPE_CHAR) != null) {
            builder.setEscapeChar(mediaType.getParameter(DS_PARAM_ESCAPE_CHAR).charAt(0)
            );
        }

        if (mediaType.getParameter(DS_PARAM_NEW_LINE) != null) {
            builder.setLineSeparator(mediaType.getParameter(DS_PARAM_NEW_LINE)
                    .replaceAll("LF", "\n")
                    .replaceAll("CR", "\r")
            );
        }

        return builder;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy