org.apache.hudi.common.engine.HoodieEngineContext Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hudi.common.engine;
import org.apache.hudi.common.config.SerializableConfiguration;
import org.apache.hudi.common.data.HoodieAccumulator;
import org.apache.hudi.common.data.HoodieData;
import org.apache.hudi.common.data.HoodieData.HoodieDataCacheKey;
import org.apache.hudi.common.function.SerializableBiFunction;
import org.apache.hudi.common.function.SerializableConsumer;
import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.function.SerializablePairFlatMapFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.ImmutablePair;
import org.apache.hudi.common.util.collection.Pair;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.stream.Stream;
/**
* Base class contains the context information needed by the engine at runtime. It will be extended by different
* engine implementation if needed.
*/
public abstract class HoodieEngineContext {
/**
* A wrapped hadoop configuration which can be serialized.
*/
private SerializableConfiguration hadoopConf;
protected TaskContextSupplier taskContextSupplier;
public HoodieEngineContext(SerializableConfiguration hadoopConf, TaskContextSupplier taskContextSupplier) {
this.hadoopConf = hadoopConf;
this.taskContextSupplier = taskContextSupplier;
}
public SerializableConfiguration getHadoopConf() {
return hadoopConf;
}
public TaskContextSupplier getTaskContextSupplier() {
return taskContextSupplier;
}
public abstract HoodieAccumulator newAccumulator();
public abstract HoodieData emptyHoodieData();
public HoodieData parallelize(List data) {
if (data.isEmpty()) {
return emptyHoodieData();
} else {
return parallelize(data, data.size());
}
}
public abstract HoodieData parallelize(List data, int parallelism);
public abstract List map(List data, SerializableFunction func, int parallelism);
public abstract List mapToPairAndReduceByKey(
List data, SerializablePairFunction mapToPairFunc, SerializableBiFunction reduceFunc, int parallelism);
public abstract Stream> mapPartitionsToPairAndReduceByKey(
Stream data, SerializablePairFlatMapFunction, K, V> flatMapToPairFunc,
SerializableBiFunction reduceFunc, int parallelism);
public abstract List reduceByKey(
List> data, SerializableBiFunction reduceFunc, int parallelism);
public abstract List flatMap(List data, SerializableFunction> func, int parallelism);
public abstract void foreach(List data, SerializableConsumer consumer, int parallelism);
public abstract Map mapToPair(List data, SerializablePairFunction func, Integer parallelism);
public abstract void setProperty(EngineProperty key, String value);
public abstract Option getProperty(EngineProperty key);
public abstract void setJobStatus(String activeModule, String activityDescription);
public abstract void putCachedDataIds(HoodieDataCacheKey cacheKey, int... ids);
public abstract List getCachedDataIds(HoodieDataCacheKey cacheKey);
public abstract List removeCachedDataIds(HoodieDataCacheKey cacheKey);
public abstract void cancelJob(String jobId);
public abstract void cancelAllJobs();
}