All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.flux.impl.custom.CustomImportCommand Maven / Gradle / Ivy

There is a newer version: 1.1.3
Show newest version
/*
 * Copyright © 2024 MarkLogic Corporation. All Rights Reserved.
 */
package com.marklogic.flux.impl.custom;

import com.marklogic.flux.api.CustomImporter;
import com.marklogic.flux.api.WriteStructuredDocumentsOptions;
import com.marklogic.flux.impl.AbstractCommand;
import com.marklogic.flux.impl.S3Params;
import com.marklogic.flux.impl.importdata.WriteStructuredDocumentParams;
import org.apache.spark.sql.*;
import picocli.CommandLine;

import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;

@CommandLine.Command(
    name = "custom-import",
    description = "Read data via a custom Spark connector or data source and write JSON or XML documents to MarkLogic."
)
public class CustomImportCommand extends AbstractCommand implements CustomImporter {

    @CommandLine.Mixin
    private CustomReadParams readParams = new CustomReadParams();

    @CommandLine.Mixin
    private WriteStructuredDocumentParams writeParams = new WriteStructuredDocumentParams();

    @Override
    protected Dataset loadDataset(SparkSession session, DataFrameReader reader) {
        readParams.s3Params.addToHadoopConfiguration(session.sparkContext().hadoopConfiguration());
        return reader.format(readParams.source)
            .options(readParams.additionalOptions)
            .load();
    }

    @Override
    protected void applyWriter(SparkSession session, DataFrameWriter writer) {
        writer.format(MARKLOGIC_CONNECTOR)
            .options(getConnectionParams().makeOptions())
            .options(writeParams.makeOptions())
            .mode(SaveMode.Append)
            .save();
    }

    public static class CustomReadParams implements CustomReadOptions {

        @CommandLine.Option(
            names = "--source",
            description = "Identifier for the Spark connector or data source that is used to read data.",
            required = true
        )
        private String source;

        @CommandLine.Option(
            names = "-P",
            description = "Specify any number of options to be passed to the connector identified by '--source' - e.g. -PmyOption=someValue."
        )
        private Map additionalOptions = new HashMap<>();

        @CommandLine.Mixin
        private S3Params s3Params = new S3Params();

        @Override
        public CustomReadOptions source(String source) {
            this.source = source;
            return this;
        }

        @Override
        public CustomReadOptions additionalOptions(Map additionalOptions) {
            this.additionalOptions = additionalOptions;
            return this;
        }

        public CustomReadOptions s3AddCredentials() {
            this.s3Params.setAddCredentials(true);
            return this;
        }

        @Override
        public CustomReadOptions s3AccessKeyId(String accessKeyId) {
            this.s3Params.setAccessKeyId(accessKeyId);
            return this;
        }

        @Override
        public CustomReadOptions s3SecretAccessKey(String secretAccessKey) {
            this.s3Params.setSecretAccessKey(secretAccessKey);
            return this;
        }

        @Override
        public CustomReadOptions s3Endpoint(String endpoint) {
            this.s3Params.setEndpoint(endpoint);
            return this;
        }
    }

    @Override
    public CustomImporter from(Consumer consumer) {
        consumer.accept(readParams);
        return this;
    }

    @Override
    public CustomImporter to(Consumer consumer) {
        consumer.accept(writeParams);
        return this;
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy