All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
com.dataartisans.flink.cascading.runtime.util.FlinkFlowProcess Maven / Gradle / Ivy
/*
* Copyright 2015 data Artisans GmbH
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.dataartisans.flink.cascading.runtime.util;
import cascading.CascadingException;
import cascading.flow.FlowProcess;
import cascading.flow.FlowSession;
import cascading.flow.hadoop.util.HadoopUtil;
import cascading.tap.Tap;
import cascading.tap.hadoop.Hfs;
import cascading.tuple.TupleEntryCollector;
import cascading.tuple.TupleEntryIterator;
import com.dataartisans.flink.cascading.runtime.stats.EnumStringConverter;
import org.apache.flink.api.common.accumulators.LongCounter;
import org.apache.flink.api.common.functions.RuntimeContext;
import org.apache.flink.util.InstantiationUtil;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapred.JobConf;
import java.io.IOException;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Set;
public class FlinkFlowProcess extends FlowProcess {
private transient RuntimeContext runtimeContext;
private Configuration conf;
private String taskId;
public FlinkFlowProcess() {
this.conf = new Configuration();
}
public FlinkFlowProcess(Configuration conf) {
this.conf = conf;
}
public FlinkFlowProcess(FlowSession flowSession, Configuration conf) {
super(flowSession);
this.conf = conf;
}
public FlinkFlowProcess(Configuration conf, RuntimeContext runtimeContext, String taskId) {
this(conf);
this.runtimeContext = runtimeContext;
this.taskId = taskId;
}
@Override
public int getNumProcessSlices() {
return this.runtimeContext.getNumberOfParallelSubtasks();
}
@Override
public int getCurrentSliceNum() {
return this.runtimeContext.getIndexOfThisSubtask();
}
@Override
public FlowProcess copyWith(Configuration config) {
return new FlinkFlowProcess(config, this.runtimeContext, this.taskId);
}
@Override
public Object getProperty( String key ) {
return this.conf.get(key);
}
@Override
public Collection getPropertyKeys() {
Set keys = new HashSet<>();
for( Map.Entry entry : this.conf ) {
keys.add(entry.getKey());
}
return Collections.unmodifiableSet( keys );
}
@Override
public Object newInstance(String className) {
if(className == null || className.isEmpty()) {
return null;
}
try {
Class clazz = Class.forName(className);
return InstantiationUtil.instantiate(clazz);
}
catch( ClassNotFoundException exception ) {
throw new CascadingException( "unable to load class: " + className, exception );
}
}
@Override
public void keepAlive() {
// Hadoop reports progress
// Tez doesn't do anything here either...
}
@Override
public void increment(Enum e, long l) {
increment(EnumStringConverter.enumToGroup(e), EnumStringConverter.enumToCounter(e), l);
}
@Override
public void increment(String group, String counter, long l) {
if(this.runtimeContext != null) {
LongCounter flinkCounter = getOrInitCounter(EnumStringConverter.mergeGroupCounter(group, counter));
flinkCounter.add(l);
}
}
@Override
public long getCounterValue(Enum e) {
return getCounterValue(EnumStringConverter.enumToGroup(e), EnumStringConverter.enumToCounter(e));
}
@Override
public long getCounterValue(String group, String counter) {
if(this.runtimeContext != null) {
return getOrInitCounter(EnumStringConverter.mergeGroupCounter(group, counter)).getLocalValue();
}
else {
return 0L;
}
}
@Override
public void setStatus(String s) {
// Tez doesn't do anything here either
}
@Override
public boolean isCounterStatusInitialized() {
return this.runtimeContext != null;
}
@Override
public TupleEntryIterator openTapForRead(Tap tap) throws IOException {
return tap.openForRead( this );
}
@Override
public TupleEntryCollector openTapForWrite(Tap tap) throws IOException {
return tap.openForWrite( this, null ); // do not honor sinkmode as this may be opened across tasks
}
@Override
public TupleEntryCollector openTrapForWrite(Tap trap) throws IOException {
if (trap instanceof Hfs) {
JobConf jobConf = new JobConf(this.getConfigCopy());
int stepNum = jobConf.getInt( "cascading.flow.step.num", 0 );
int nodeNum = jobConf.getInt( "cascading.flow.node.num", 0 );
String partname = String.format( "-%05d-%05d-%05d", stepNum, nodeNum, this.getCurrentSliceNum() );
jobConf.set( "cascading.tapcollector.partname", "%s%spart" + partname );
String value = String.format( "attempt_%012d_0000_m_%06d_0", (int) Math.rint( System.currentTimeMillis() ), this.getCurrentSliceNum() );
jobConf.set( "mapred.task.id", value );
jobConf.set( "mapreduce.task.id", value );
return trap.openForWrite( new FlinkFlowProcess( jobConf ), null);
}
else {
throw new UnsupportedOperationException("Only Hfs taps are supported as traps");
}
}
@Override
public TupleEntryCollector openSystemIntermediateForWrite() throws IOException {
return null; // Not required for Flink
}
@Override
public Configuration getConfig() {
return conf;
}
@Override
public Configuration getConfigCopy() {
return HadoopUtil.copyJobConf(this.conf);
}
@Override
public C copyConfig(C conf) {
return HadoopUtil.copyJobConf(conf);
}
@Override
public Map diffConfigIntoMap(C defaultConfig, C updatedConfig) {
Map newConf = new HashMap<>();
for(Map.Entry e : ((Configuration)updatedConfig)) {
String key = e.getKey();
String val = ((Configuration) updatedConfig).get(key);
String defaultVal = ((Configuration)defaultConfig).get(key);
// add keys that are different from default
if(val != null && defaultVal != null && !val.equals(defaultVal)) {
newConf.put(key, val);
}
else if((val == null && defaultVal != null) || (val != null && defaultVal == null)) {
newConf.put(key, val);
}
}
return newConf;
}
@Override
public Configuration mergeMapIntoConfig(Configuration defaultConfig, Map map) {
Configuration mergedConf = HadoopUtil.copyJobConf(defaultConfig);
for(String key : map.keySet()) {
mergedConf.set(key, map.get(key));
}
return mergedConf;
}
private LongCounter getOrInitCounter(String counterName) {
LongCounter lc = this.runtimeContext.getLongCounter(counterName);
if (lc == null) {
lc = new LongCounter();
this.runtimeContext.addAccumulator(counterName, lc);
}
return lc;
}
}