All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.api.java.operators.SingleInputUdfOperator Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import java.lang.annotation.Annotation;
import java.util.Collections;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import eu.stratosphere.api.common.operators.SingleInputSemanticProperties;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.functions.FunctionAnnotation;
import eu.stratosphere.api.java.functions.SemanticPropUtil;
import eu.stratosphere.types.TypeInformation;
import eu.stratosphere.configuration.Configuration;

/**
 * The SingleInputUdfOperator is the base class of all unary operators that execute
 * user-defined functions (UDFs). The UDFs encapsulated by this operator are naturally UDFs that
 * have one input (such as {@link MapFunction} or {@link ReduceFunction}).
 * 

* This class encapsulates utilities for the UDFs, such as broadcast variables, parameterization * through configuration objects, and semantic properties. * @param The data type of the input data set. * @param The data type of the returned data set. */ public abstract class SingleInputUdfOperator> extends SingleInputOperator implements UdfOperator { private Configuration parameters; private Map> broadcastVariables; private SingleInputSemanticProperties udfSemantics; // -------------------------------------------------------------------------------------------- /** * Creates a new operators with the given data set as input. The given result type * describes the data type of the elements in the data set produced by the operator. * * @param input The data set that is the input to the operator. * @param resultType The type of the elements in the resulting data set. */ protected SingleInputUdfOperator(DataSet input, TypeInformation resultType) { super(input, resultType); } protected void extractSemanticAnnotationsFromUdf(Class udfClass) { Set annotations = FunctionAnnotation.readSingleConstantAnnotations(udfClass); SingleInputSemanticProperties sp = SemanticPropUtil.getSemanticPropsSingle(annotations, getInputType(), getResultType()); setSemanticProperties(sp); } // -------------------------------------------------------------------------------------------- // Fluent API methods // -------------------------------------------------------------------------------------------- @Override public O withParameters(Configuration parameters) { this.parameters = parameters; @SuppressWarnings("unchecked") O returnType = (O) this; return returnType; } @Override public O withBroadcastSet(DataSet data, String name) { if (this.broadcastVariables == null) { this.broadcastVariables = new HashMap>(); } this.broadcastVariables.put(name, data); @SuppressWarnings("unchecked") O returnType = (O) this; return returnType; } /** * Adds a constant-set annotation for the UDF. * *

* Constant set annotations are used by the optimizer to infer the existence of data properties (sorted, partitioned, grouped). * In certain cases, these annotations allow the optimizer to generate a more efficient execution plan which can lead to improved performance. * Constant set annotations can only be specified if the second input and the output type of the UDF are of {@link Tuple} data types. * *

* A constant-set annotation is a set of constant field specifications. The constant field specification String "4->3" specifies, that this UDF copies the fourth field of * an input tuple to the third field of the output tuple. Field references are zero-indexed. * *

* NOTICE: Constant set annotations are optional, but if given need to be correct. Otherwise, the program might produce wrong results! * * @param constantSet A list of constant field specification Strings. * @return This operator with an annotated constant field set. */ public O withConstantSet(String... constantSet) { SingleInputSemanticProperties props = SemanticPropUtil.getSemanticPropsSingleFromString(constantSet, null, null, this.getInputType(), this.getResultType()); this.setSemanticProperties(props); @SuppressWarnings("unchecked") O returnType = (O) this; return returnType; } // -------------------------------------------------------------------------------------------- // Accessors // -------------------------------------------------------------------------------------------- @Override public Map> getBroadcastSets() { return this.broadcastVariables == null ? Collections.>emptyMap() : Collections.unmodifiableMap(this.broadcastVariables); } @Override public Configuration getParameters() { return this.parameters; } @Override public SingleInputSemanticProperties getSematicProperties() { return this.udfSemantics; } /** * Sets the semantic properties for the user-defined function (UDF). The semantic properties * define how fields of tuples and other objects are modified or preserved through this UDF. * The configured properties can be retrieved via {@link UdfOperator#getSematicProperties()}. * * @param properties The semantic properties for the UDF. * @see UdfOperator#getSematicProperties() */ public void setSemanticProperties(SingleInputSemanticProperties properties) { this.udfSemantics = properties; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy