All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marklogic.flux.impl.importdata.AbstractImportFilesCommand Maven / Gradle / Ivy

There is a newer version: 1.0.0.ea1
Show newest version
/*
 * Copyright © 2024 MarkLogic Corporation. All Rights Reserved.
 */
package com.marklogic.flux.impl.importdata;

import com.marklogic.flux.api.Executor;
import com.marklogic.flux.api.FluxException;
import com.marklogic.flux.impl.AbstractCommand;
import org.apache.spark.sql.*;

import java.util.Map;
import java.util.function.Supplier;

/**
 * Base class for commands that import files and write to MarkLogic.
 */
public abstract class AbstractImportFilesCommand extends AbstractCommand {

    /**
     * Subclass must define the format used for reading - e.g. "csv", "marklogic", etc.
     *
     * @return the name of the Spark data source or connector to pass to the Spark 'format(String)' method
     */
    protected abstract String getReadFormat();

    protected abstract 

P getReadParams(); protected abstract Supplier> getWriteParams(); @Override protected void validateDuringApiUsage() { if (!getReadParams().hasAtLeastOnePath()) { throw new FluxException("Must specify one or more file paths"); } } @Override protected final Dataset loadDataset(SparkSession session, DataFrameReader reader) { ReadFilesParams readFilesParams = getReadParams(); if (logger.isInfoEnabled()) { logger.info("Importing files from: {}", readFilesParams.getPaths()); } readFilesParams.getS3Params().addToHadoopConfiguration(session.sparkContext().hadoopConfiguration()); return reader .format(getReadFormat()) .options(readFilesParams.makeOptions()) .load(readFilesParams.getPaths().toArray(new String[]{})); } @Override protected final void applyWriter(SparkSession session, DataFrameWriter writer) { writer.format(MARKLOGIC_CONNECTOR) .options(getConnectionParams().makeOptions()) .options(getWriteParams().get()) .mode(SaveMode.Append) .save(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy