All Downloads are FREE. Search and download functionalities are using the official Maven repository.

eu.stratosphere.api.java.operators.DataSource Maven / Gradle / Ivy

There is a newer version: 0.5.2-hadoop2
Show newest version
/***********************************************************************************************************************
 *
 * Copyright (C) 2010-2013 by the Stratosphere project (http://stratosphere.eu)
 *
 * Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
 * an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
 * specific language governing permissions and limitations under the License.
 *
 **********************************************************************************************************************/
package eu.stratosphere.api.java.operators;

import eu.stratosphere.api.common.io.InputFormat;
import eu.stratosphere.api.common.operators.OperatorInformation;
import eu.stratosphere.api.common.operators.base.GenericDataSourceBase;
import eu.stratosphere.api.java.DataSet;
import eu.stratosphere.api.java.ExecutionEnvironment;
import eu.stratosphere.types.TypeInformation;

/**
 * An operation that creates a new data set (data source). The operation acts as the
 * data set on which to apply further transformations. It encapsulates additional
 * configuration parameters, to customize the execution.
 * 
 * @param  The type of the elements produced by this data source.
 */
public class DataSource extends DataSet {
	
	private final InputFormat inputFormat;
	
	private String name;
	
	private int dop = -1;

	// --------------------------------------------------------------------------------------------
	
	/**
	 * Creates a new data source.
	 * 
	 * @param context The environment in which the data source gets executed.
	 * @param inputFormat The input format that the data source executes.
	 * @param type The type of the elements produced by this input format.
	 */
	public DataSource(ExecutionEnvironment context, InputFormat inputFormat, TypeInformation type) {
		super(context, type);
		
		if (inputFormat == null) {
			throw new IllegalArgumentException("The input format may not be null.");
		}
		
		this.inputFormat = inputFormat;
	}

	/**
	 * Gets the input format that is executed by this data source.
	 * 
	 * @return The input format that is executed by this data source.
	 */
	public InputFormat getInputFormat() {
		return this.inputFormat;
	}
	
	// --------------------------------------------------------------------------------------------
	
	/**
	 * Sets the name of the data source operation. The name will be used for logging and other
	 * messages. The default name is a textual representation of the input format.
	 * 
	 * @param name The name for the data source.
	 * @return The data source object itself, to allow for function call chaining.
	 */
	public DataSource name(String name) {
		this.name = name;
		return this;
	}
	
	/**
	 * Returns the degree of parallelism of this data source.
	 * 
	 * @return The degree of parallelism of this data source.
	 */
	public int getParallelism() {
		return this.dop;
	}
	
	/**
	 * Sets the degree of parallelism for this data source.
	 * The degree must be 1 or more.
	 * 
	 * @param dop The degree of parallelism for this data source.
	 * @return This data source with set degree of parallelism.
	 */
	public DataSource setParallelism(int dop) {
		if(dop < 1) {
			throw new IllegalArgumentException("The parallelism of an operator must be at least 1.");
		}
		this.dop = dop;
		
		return this;
	}
	
	// --------------------------------------------------------------------------------------------
	
	protected GenericDataSourceBase translateToDataFlow() {
		String name = this.name != null ? this.name : this.inputFormat.toString();
		if (name.length() > 100) {
			name = name.substring(0, 100);
		}
		
		@SuppressWarnings({ "unchecked", "rawtypes" })
		GenericDataSourceBase source = new GenericDataSourceBase(this.inputFormat,
				new OperatorInformation(getType()), name);
		source.setDegreeOfParallelism(dop);
		
		return source;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy