Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.java.operators;
import org.apache.flink.annotation.Public;
import org.apache.flink.api.common.InvalidProgramException;
import org.apache.flink.api.common.operators.Keys;
import org.apache.flink.api.common.operators.Operator;
import org.apache.flink.api.common.operators.Order;
import org.apache.flink.api.common.operators.Ordering;
import org.apache.flink.api.common.operators.UnaryOperatorInformation;
import org.apache.flink.api.common.operators.base.SortPartitionOperatorBase;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.DataSet;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.api.java.tuple.Tuple2;
import java.util.ArrayList;
import java.util.List;
/**
* This operator represents a DataSet with locally sorted partitions.
*
* @param The type of the DataSet with locally sorted partitions.
*/
@Public
public class SortPartitionOperator extends SingleInputOperator> {
private List> keys;
private List orders;
private final String sortLocationName;
private boolean useKeySelector;
private SortPartitionOperator(DataSet dataSet, String sortLocationName) {
super(dataSet, dataSet.getType());
keys = new ArrayList<>();
orders = new ArrayList<>();
this.sortLocationName = sortLocationName;
}
public SortPartitionOperator(DataSet dataSet, int sortField, Order sortOrder, String sortLocationName) {
this(dataSet, sortLocationName);
this.useKeySelector = false;
ensureSortableKey(sortField);
keys.add(new Keys.ExpressionKeys<>(sortField, getType()));
orders.add(sortOrder);
}
public SortPartitionOperator(DataSet dataSet, String sortField, Order sortOrder, String sortLocationName) {
this(dataSet, sortLocationName);
this.useKeySelector = false;
ensureSortableKey(sortField);
keys.add(new Keys.ExpressionKeys<>(sortField, getType()));
orders.add(sortOrder);
}
public SortPartitionOperator(DataSet dataSet, Keys.SelectorFunctionKeys sortKey, Order sortOrder, String sortLocationName) {
this(dataSet, sortLocationName);
this.useKeySelector = true;
ensureSortableKey(sortKey);
keys.add(sortKey);
orders.add(sortOrder);
}
/**
* Returns whether using key selector or not.
*/
public boolean useKeySelector() {
return useKeySelector;
}
/**
* Appends an additional sort order with the specified field in the specified order to the
* local partition sorting of the DataSet.
*
* @param field The field index of the additional sort order of the local partition sorting.
* @param order The order of the additional sort order of the local partition sorting.
* @return The DataSet with sorted local partitions.
*/
public SortPartitionOperator sortPartition(int field, Order order) {
if (useKeySelector) {
throw new InvalidProgramException("Expression keys cannot be appended after a KeySelector");
}
ensureSortableKey(field);
keys.add(new Keys.ExpressionKeys<>(field, getType()));
orders.add(order);
return this;
}
/**
* Appends an additional sort order with the specified field in the specified order to the
* local partition sorting of the DataSet.
*
* @param field The field expression referring to the field of the additional sort order of
* the local partition sorting.
* @param order The order of the additional sort order of the local partition sorting.
* @return The DataSet with sorted local partitions.
*/
public SortPartitionOperator sortPartition(String field, Order order) {
if (useKeySelector) {
throw new InvalidProgramException("Expression keys cannot be appended after a KeySelector");
}
ensureSortableKey(field);
keys.add(new Keys.ExpressionKeys<>(field, getType()));
orders.add(order);
return this;
}
public SortPartitionOperator sortPartition(KeySelector keyExtractor, Order order) {
throw new InvalidProgramException("KeySelector cannot be chained.");
}
private void ensureSortableKey(int field) throws InvalidProgramException {
if (!Keys.ExpressionKeys.isSortKey(field, getType())) {
throw new InvalidProgramException("Selected sort key is not a sortable type");
}
}
private void ensureSortableKey(String field) throws InvalidProgramException {
if (!Keys.ExpressionKeys.isSortKey(field, getType())) {
throw new InvalidProgramException("Selected sort key is not a sortable type");
}
}
private void ensureSortableKey(Keys.SelectorFunctionKeys sortKey) {
if (!sortKey.getKeyType().isSortKeyType()) {
throw new InvalidProgramException("Selected sort key is not a sortable type");
}
}
// --------------------------------------------------------------------------------------------
// Translation
// --------------------------------------------------------------------------------------------
protected org.apache.flink.api.common.operators.SingleInputOperator translateToDataFlow(Operator input) {
String name = "Sort at " + sortLocationName;
if (useKeySelector) {
return translateToDataFlowWithKeyExtractor(input, (Keys.SelectorFunctionKeys) keys.get(0), orders.get(0), name);
}
// flatten sort key positions
List allKeyPositions = new ArrayList<>();
List allOrders = new ArrayList<>();
for (int i = 0, length = keys.size(); i < length; i++) {
int[] sortKeyPositions = keys.get(i).computeLogicalKeyPositions();
Order order = orders.get(i);
for (int sortKeyPosition : sortKeyPositions) {
allKeyPositions.add(sortKeyPosition);
allOrders.add(order);
}
}
Ordering partitionOrdering = new Ordering();
for (int i = 0, length = allKeyPositions.size(); i < length; i++) {
partitionOrdering.appendOrdering(allKeyPositions.get(i), null, allOrders.get(i));
}
// distinguish between partition types
UnaryOperatorInformation operatorInfo = new UnaryOperatorInformation<>(getType(), getType());
SortPartitionOperatorBase noop = new SortPartitionOperatorBase<>(operatorInfo, partitionOrdering, name);
noop.setInput(input);
if (this.getParallelism() < 0) {
// use parallelism of input if not explicitly specified
noop.setParallelism(input.getParallelism());
} else {
// use explicitly specified parallelism
noop.setParallelism(this.getParallelism());
}
return noop;
}
private org.apache.flink.api.common.operators.SingleInputOperator translateToDataFlowWithKeyExtractor(
Operator input, Keys.SelectorFunctionKeys keys, Order order, String name) {
TypeInformation> typeInfoWithKey = KeyFunctions.createTypeWithKey(keys);
Keys.ExpressionKeys> newKey = new Keys.ExpressionKeys<>(0, typeInfoWithKey);
Operator> keyedInput = KeyFunctions.appendKeyExtractor(input, keys);
int[] sortKeyPositions = newKey.computeLogicalKeyPositions();
Ordering partitionOrdering = new Ordering();
for (int keyPosition : sortKeyPositions) {
partitionOrdering.appendOrdering(keyPosition, null, order);
}
// distinguish between partition types
UnaryOperatorInformation, Tuple2> operatorInfo = new UnaryOperatorInformation<>(typeInfoWithKey, typeInfoWithKey);
SortPartitionOperatorBase> noop = new SortPartitionOperatorBase<>(operatorInfo, partitionOrdering, name);
noop.setInput(keyedInput);
if (this.getParallelism() < 0) {
// use parallelism of input if not explicitly specified
noop.setParallelism(input.getParallelism());
} else {
// use explicitly specified parallelism
noop.setParallelism(this.getParallelism());
}
return KeyFunctions.appendKeyRemover(noop, keys);
}
}