All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hpccsystems.spark.datasource.HpccRelationProvider Maven / Gradle / Ivy

The newest version!
package org.hpccsystems.spark.datasource;

import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.Row;
import org.apache.spark.sql.SQLContext;
import org.apache.spark.sql.SaveMode;

import org.apache.spark.sql.sources.BaseRelation;
import org.apache.spark.sql.sources.CreatableRelationProvider;
import org.apache.spark.sql.sources.DataSourceRegister;
import org.apache.spark.sql.sources.RelationProvider;

import org.hpccsystems.ws.client.wrappers.ArrayOfEspExceptionWrapper;
import org.hpccsystems.spark.HpccFileWriter;

import scala.collection.JavaConversions;
import scala.collection.immutable.Map;

import java.util.TreeMap;

import org.apache.logging.log4j.Logger;
import org.apache.logging.log4j.LogManager;

/**
 * Registers HPCC Systems as a DataSource. Allows reading of a dataset in HPCC Systems and writing a new dataset to HPCC Systems.
 */
public class HpccRelationProvider implements RelationProvider, CreatableRelationProvider, DataSourceRegister
{
    private static Logger log = LogManager.getLogger(HpccRelationProvider.class);

    @Override
    public String shortName()
    {
        return "hpcc";
    }

    @Override
    public BaseRelation createRelation(SQLContext sqlContext, Map parameters)
    {
        HpccOptions options = null;
        try
        {
            options = extractOptions(parameters);
        }
        catch (Exception e)
        {
            String error = "Error while attempting to extract HpccOptions: " + e.getMessage();
            log.error(error);
            throw new RuntimeException(error);
        }
        return new HpccRelation(sqlContext, options);
    }

    @Override
    public BaseRelation createRelation(SQLContext sqlContext, SaveMode mode, Map parameters, Dataset data)
    {
        boolean overwrite = false;
        if (mode == SaveMode.Append)
        {
            String error = "Append mode is not supported in HPCC.";
            log.error(error);
            throw new RuntimeException(error);
        }
        else if (mode == SaveMode.Overwrite)
        {
            overwrite = true;
        }

        HpccOptions options = null;
        try
        {
            options = extractOptions(parameters);
        }
        catch (Exception e)
        {
            String error = "Error while attempting to extract HpccOptions: " + e.getMessage();
            log.error(error);
            throw new RuntimeException(error);
        }

        try
        {
            HpccFileWriter fileWriter = new HpccFileWriter(options.connectionInfo);
            fileWriter.setTraceContext(options.traceID, options.spanID);
            fileWriter.saveToHPCC(sqlContext.sparkContext(), data.schema(), data.rdd().toJavaRDD(), options.clusterName, options.fileName,
                    options.compression, overwrite);

            // Attempt to open the new file
            return createRelation(sqlContext, parameters);
        }
        catch (Exception e)
        {
            String error = "Unable to save file to HPCC with error: " + e.getMessage();
            log.error(error);
            throw new RuntimeException(error,e);
        }
    }

    private HpccOptions extractOptions(Map scalaParams) throws Exception
    {
        java.util.Map parameters = JavaConversions.mapAsJavaMap(scalaParams);

        // Use TreeMap to avoid case sensitivity
        TreeMap paramTreeMap = new TreeMap<>(String.CASE_INSENSITIVE_ORDER);
        paramTreeMap.putAll(parameters);

        HpccOptions options = new HpccOptions(paramTreeMap);
        return options;
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy