All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cdap.plugin.sfmc.source.MarketingCloudSource Maven / Gradle / Ivy

The newest version!
/*
 * Copyright © 2021 Cask Data, Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not
 * use this file except in compliance with the License. You may obtain a copy of
 * the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
 * License for the specific language governing permissions and limitations under
 * the License.
 */

package io.cdap.plugin.sfmc.source;

import io.cdap.cdap.api.annotation.Description;
import io.cdap.cdap.api.annotation.Name;
import io.cdap.cdap.api.annotation.Plugin;
import io.cdap.cdap.api.data.batch.Input;
import io.cdap.cdap.api.data.format.StructuredRecord;
import io.cdap.cdap.api.data.schema.Schema;
import io.cdap.cdap.api.dataset.lib.KeyValue;
import io.cdap.cdap.etl.api.Emitter;
import io.cdap.cdap.etl.api.Engine;
import io.cdap.cdap.etl.api.FailureCollector;
import io.cdap.cdap.etl.api.PipelineConfigurer;
import io.cdap.cdap.etl.api.StageConfigurer;
import io.cdap.cdap.etl.api.action.SettableArguments;
import io.cdap.cdap.etl.api.batch.BatchSource;
import io.cdap.cdap.etl.api.batch.BatchSourceContext;
import io.cdap.plugin.common.LineageRecorder;
import io.cdap.plugin.common.SourceInputFormatProvider;
import io.cdap.plugin.sfmc.source.util.MarketingCloudConstants;
import io.cdap.plugin.sfmc.source.util.MarketingCloudObjectInfo;
import io.cdap.plugin.sfmc.source.util.SourceQueryMode;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.NullWritable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.stream.Collectors;

/**
 * A {@link BatchSource} that reads data from multiple objects in Salesforce.
 */
@Plugin(type = BatchSource.PLUGIN_TYPE)
@Name(MarketingCloudConstants.PLUGIN_NAME)
@Description("Read marketing data from Salesforce Marketing cloud.")
public class MarketingCloudSource extends BatchSource {
  private static final Logger LOG = LoggerFactory.getLogger(MarketingCloudSource.class);

  private final MarketingCloudSourceConfig conf;

  public MarketingCloudSource(MarketingCloudSourceConfig conf) {
    this.conf = conf;
  }

  @Override
  public void configurePipeline(PipelineConfigurer pipelineConfigurer) {
    super.configurePipeline(pipelineConfigurer);

    StageConfigurer stageConfigurer = pipelineConfigurer.getStageConfigurer();
    FailureCollector collector = stageConfigurer.getFailureCollector();

    conf.validate(collector);
    // Since we have validated all the properties, throw an exception if there are any
    // errors in the collector. This is to avoid adding same validation errors again in
    // getSchema method call
    collector.getOrThrowException();

    //Get Schema
    stageConfigurer.setOutputSchema(getSchema(conf.getQueryMode()));

    if (pipelineConfigurer.getEngine() == Engine.SPARK) {
      pipelineConfigurer.setPipelineProperties(Collections.singletonMap("spark.task.maxFailures", "1"));
    } else if (pipelineConfigurer.getEngine() == Engine.MAPREDUCE) {
      Map properties = new HashMap<>();
      properties.put("mapreduce.reduce.maxattempts", "1");
      properties.put("mapreduce.map.maxattempts", "1");
      pipelineConfigurer.setPipelineProperties(properties);
    }
  }

  @Override
  public void prepareRun(BatchSourceContext context) throws Exception {
    FailureCollector collector = context.getFailureCollector();
    conf.validate(collector);
    collector.getOrThrowException();

    SourceQueryMode mode = conf.getQueryMode(collector);

    Configuration hConf = new Configuration();
    Collection tables = MarketingCloudInputFormat.setInput(hConf, mode, conf);
    SettableArguments arguments = context.getArguments();
    for (MarketingCloudObjectInfo tableInfo : tables) {
      arguments.set(MarketingCloudConstants.TABLE_PREFIX + tableInfo.getFormattedTableName(),
                    tableInfo.getSchema().toString());
      recordLineage(context, tableInfo);
    }

    context.setInput(Input.of(conf.getReferenceName(),
                              new SourceInputFormatProvider(MarketingCloudInputFormat.class, hConf)));
  }

  @Override
  public void transform(KeyValue input, Emitter emitter) {
    emitter.emit(input.getValue());
  }

  private Schema getSchema(SourceQueryMode mode) {
    Schema schema = null;
    if (mode == SourceQueryMode.SINGLE_OBJECT) {
      Configuration hConf = new Configuration();
      Collection tables = MarketingCloudInputFormat.setInput(hConf, mode, conf);
      if (tables != null && !tables.isEmpty()) {
        schema = tables.iterator().next().getSchema();
      }
    }
    return schema;
  }

  private void recordLineage(BatchSourceContext context, MarketingCloudObjectInfo tableInfo) {
    String tableName = tableInfo.getFormattedTableName();
    String outputName = String.format("%s-%s", conf.getReferenceName(), tableName);
    Schema schema = tableInfo.getSchema();
    LineageRecorder lineageRecorder = new LineageRecorder(context, outputName);
    lineageRecorder.createExternalDataset(schema);
    List fields = Objects.requireNonNull(schema).getFields();
    if (fields != null && !fields.isEmpty()) {
      lineageRecorder.recordRead("Read",
                                 String.format("Read from '%s' Marketing Cloud object.", tableName),
                                 fields.stream().map(Schema.Field::getName).collect(Collectors.toList()));
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy