All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.dataartisans.flink.cascading.runtime.groupBy.GroupByReducer Maven / Gradle / Ivy

/*
 * Copyright 2015 data Artisans GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package com.dataartisans.flink.cascading.runtime.groupBy;

import cascading.CascadingException;
import cascading.flow.FlowElement;
import cascading.flow.FlowException;
import cascading.flow.FlowNode;
import cascading.flow.SliceCounters;
import cascading.flow.stream.duct.Duct;
import cascading.flow.stream.element.ElementDuct;
import cascading.pipe.GroupBy;
import cascading.tuple.Tuple;
import com.dataartisans.flink.cascading.runtime.util.FlinkFlowProcess;
import com.dataartisans.flink.cascading.util.FlinkConfigConverter;
import org.apache.flink.api.common.functions.RichGroupReduceFunction;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.util.Collector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Set;

import static cascading.util.LogUtil.logCounters;
import static cascading.util.LogUtil.logMemory;

@SuppressWarnings("unused")
public class GroupByReducer extends RichGroupReduceFunction {

	private static final Logger LOG = LoggerFactory.getLogger(GroupByReducer.class);

	private FlowNode flowNode;
	private GroupByStreamGraph streamGraph;
	private GroupByInGate groupSource;
	private FlinkFlowProcess currentProcess;

	private boolean calledPrepare;
	private long processBeginTime;

	public GroupByReducer() {}

	public GroupByReducer(FlowNode flowNode) {
		this.flowNode = flowNode;
	}

	@Override
	public void open(Configuration config) {

		this.calledPrepare = false;

		try {

			currentProcess = new FlinkFlowProcess(FlinkConfigConverter.toHadoopConfig(config), getRuntimeContext(), flowNode.getID());

			Set sources = flowNode.getSourceElements();
			if(sources.size() != 1) {
				throw new RuntimeException("FlowNode for GroupByReducer may only have a single source");
			}
			FlowElement sourceElement = sources.iterator().next();
			if(!(sourceElement instanceof GroupBy)) {
				throw new RuntimeException("Source of GroupByReducer must be a GroupBy");
			}
			GroupBy source = (GroupBy)sourceElement;

			streamGraph = new GroupByStreamGraph( currentProcess, flowNode, source );
			groupSource = this.streamGraph.getGroupSource();

			for( Duct head : streamGraph.getHeads() ) {
				LOG.info("sourcing from: " + ((ElementDuct) head).getFlowElement());
			}

			for( Duct tail : streamGraph.getTails() ) {
				LOG.info("sinking to: " + ((ElementDuct) tail).getFlowElement());
			}
		}
		catch( Throwable throwable ) {

			if( throwable instanceof CascadingException) {
				throw (CascadingException) throwable;
			}

			throw new FlowException( "internal error during GroupByReducer configuration", throwable );
		}

	}

	@Override
	public void reduce(Iterable input, Collector output) throws Exception {

		this.streamGraph.setTupleCollector(output);

		if(! this.calledPrepare) {
			this.streamGraph.prepare();
			this.calledPrepare = true;

			this.groupSource.start(this.groupSource);

			processBeginTime = System.currentTimeMillis();
			currentProcess.increment( SliceCounters.Process_Begin_Time, processBeginTime );
		}


		try {
			this.groupSource.run(input.iterator());
		}
		catch( OutOfMemoryError error ) {
			throw error;
		}
		catch( Throwable throwable ) {

			if( throwable instanceof CascadingException ) {
				throw (CascadingException) throwable;
			}

			throw new FlowException( "internal error during GroupByReducer execution", throwable );
		}
	}


	@Override
	public void close() {

		try {
			if( this.calledPrepare) {
				this.groupSource.complete(this.groupSource);
				this.streamGraph.cleanup();
			}
		}
		finally {
			if( currentProcess != null ) {
				long processEndTime = System.currentTimeMillis();
				currentProcess.increment( SliceCounters.Process_End_Time, processEndTime );
				currentProcess.increment( SliceCounters.Process_Duration, processEndTime - processBeginTime );
			}

			String message = "flow node id: " + flowNode.getID();
			logMemory( LOG, message + ", mem on close" );
			logCounters( LOG, message + ", counter:", currentProcess );
		}
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy