All Downloads are FREE. Search and download functionalities are using the official Maven repository.

gobblin.source.extractor.extract.AbstractSource Maven / Gradle / Ivy

There is a newer version: 0.8.0
Show newest version
/*
 * Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use
 * this file except in compliance with the License. You may obtain a copy of the
 * License at  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed
 * under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 * CONDITIONS OF ANY KIND, either express or implied.
 */

package gobblin.source.extractor.extract;

import java.util.List;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;

import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.SourceState;
import gobblin.configuration.WorkUnitState;
import gobblin.source.Source;
import gobblin.source.extractor.JobCommitPolicy;
import gobblin.source.extractor.WorkUnitRetryPolicy;
import gobblin.source.workunit.ExtractFactory;
import gobblin.source.workunit.WorkUnit;
import gobblin.source.workunit.Extract;
import gobblin.source.workunit.Extract.TableType;


/**
 * A base implementation of {@link gobblin.source.Source} that provides default behavior.
 *
 * @author Yinan Li
 */
public abstract class AbstractSource implements Source {

  private final ExtractFactory extractFactory = new ExtractFactory("yyyyMMddHHmmss");

  /**
   * Get a list of {@link WorkUnitState}s of previous {@link WorkUnit}s subject for retries.
   *
   * 

* We use two keys for configuring work unit retries. The first one specifies * whether work unit retries are enabled or not. This is for individual jobs * or a group of jobs that following the same rule for work unit retries. * The second one that is more advanced is for specifying a retry policy. * This one is particularly useful for being a global policy for a group of * jobs that have different job commit policies and want work unit retries only * for a specific job commit policy. The first one probably is sufficient for * most jobs that only need a way to enable/disable work unit retries. The * second one gives users more flexibilities. *

* * @param state Source state * @return list of {@link WorkUnitState}s of previous {@link WorkUnit}s subject for retries */ protected List getPreviousWorkUnitStatesForRetry(SourceState state) { if (Iterables.isEmpty(state.getPreviousWorkUnitStates())) { return ImmutableList.of(); } // Determine a work unit retry policy WorkUnitRetryPolicy workUnitRetryPolicy; if (state.contains(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY)) { // Use the given work unit retry policy if specified workUnitRetryPolicy = WorkUnitRetryPolicy.forName(state.getProp(ConfigurationKeys.WORK_UNIT_RETRY_POLICY_KEY)); } else { // Otherwise set the retry policy based on if work unit retry is enabled boolean retryFailedWorkUnits = state.getPropAsBoolean(ConfigurationKeys.WORK_UNIT_RETRY_ENABLED_KEY, true); workUnitRetryPolicy = retryFailedWorkUnits ? WorkUnitRetryPolicy.ALWAYS : WorkUnitRetryPolicy.NEVER; } if (workUnitRetryPolicy == WorkUnitRetryPolicy.NEVER) { return ImmutableList.of(); } List previousWorkUnitStates = Lists.newArrayList(); // Get previous work units that were not successfully committed (subject for retries) for (WorkUnitState workUnitState : state.getPreviousWorkUnitStates()) { if (workUnitState.getWorkingState() != WorkUnitState.WorkingState.COMMITTED) { if (state.getPropAsBoolean(ConfigurationKeys.OVERWRITE_CONFIGS_IN_STATESTORE, ConfigurationKeys.DEFAULT_OVERWRITE_CONFIGS_IN_STATESTORE)) { // We need to make a copy here since getPreviousWorkUnitStates returns ImmutableWorkUnitStates // for which addAll is not supported WorkUnitState workUnitStateCopy = new WorkUnitState(workUnitState.getWorkunit(), state); workUnitStateCopy.addAll(workUnitState); workUnitStateCopy.overrideWith(state); previousWorkUnitStates.add(workUnitStateCopy); } else { previousWorkUnitStates.add(workUnitState); } } } if (workUnitRetryPolicy == WorkUnitRetryPolicy.ALWAYS) { return previousWorkUnitStates; } JobCommitPolicy jobCommitPolicy = JobCommitPolicy .forName(state.getProp(ConfigurationKeys.JOB_COMMIT_POLICY_KEY, ConfigurationKeys.DEFAULT_JOB_COMMIT_POLICY)); if ((workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_PARTIAL_SUCCESS && jobCommitPolicy == JobCommitPolicy.COMMIT_ON_PARTIAL_SUCCESS) || (workUnitRetryPolicy == WorkUnitRetryPolicy.ON_COMMIT_ON_FULL_SUCCESS && jobCommitPolicy == JobCommitPolicy.COMMIT_ON_FULL_SUCCESS)) { return previousWorkUnitStates; } // Return an empty list if job commit policy and work unit retry policy do not match return ImmutableList.of(); } /** * Get a list of previous {@link WorkUnit}s subject for retries. * *

* This method uses {@link AbstractSource#getPreviousWorkUnitStatesForRetry(SourceState)}. *

* * @param state Source state * @return list of previous {@link WorkUnit}s subject for retries */ protected List getPreviousWorkUnitsForRetry(SourceState state) { List workUnits = Lists.newArrayList(); for (WorkUnitState workUnitState : getPreviousWorkUnitStatesForRetry(state)) { // Make a copy here as getWorkUnit() below returns an ImmutableWorkUnit workUnits.add(WorkUnit.copyOf(workUnitState.getWorkunit())); } return workUnits; } public Extract createExtract(TableType type, String namespace, String table) { return this.extractFactory.getUniqueExtract(type, namespace, table); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy