All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.operators.sort.DefaultFileMergePolicy Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.operators.sort;

import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.LinkedList;
import java.util.List;

/**
 * The default merge policy which supports both sync and async merging. When merging synchronously,
 * no file will be merged before the starting of final merge.
 */
public class DefaultFileMergePolicy implements MergePolicy {
	private static final Logger LOG = LoggerFactory.getLogger(DefaultFileMergePolicy.class);

	/** Merge factor which indicates the max file handles per merge and the max number of final files. */
	private final int mergeFactor;

	/** Whether all files should be merged to one file at last. */
	private final boolean mergeToOneFile;

	/** Whether to merge asynchronously or not. */
	private final boolean enableAsyncMerging;

	/** Stores the merging candidates, and layer n stores files that has been merged for n round(s). */
	private List>> layeredDataFiles = new ArrayList<>();

	/** Identifies whether the final merging has been started. */
	private boolean isFinalMergeStarted = false;

	public DefaultFileMergePolicy(int mergeFactor, boolean enableAsyncMerging, boolean mergeToOneFile) {
		Preconditions.checkArgument(mergeFactor >= 2, "Illegal merge factor: " + mergeFactor);

		this.mergeFactor = mergeFactor;
		this.enableAsyncMerging = enableAsyncMerging;
		this.mergeToOneFile = mergeToOneFile;
	}

	@Override
	public void addNewCandidate(DataFileInfo dataFileInfo) {
		if (isFinalMergeStarted) {
			// after starting final merger, there should be only one layer of candidate files
			Preconditions.checkState(layeredDataFiles.size() == 1,
				"Illegal layer size: " + layeredDataFiles.size());
			layeredDataFiles.get(0).addLast(dataFileInfo);
		} else {
			int mergeRound = dataFileInfo.getMergeRound();
			Preconditions.checkArgument(layeredDataFiles.size() >= mergeRound,
				"Illegal merge round: (" + mergeRound + " " + layeredDataFiles.size() + ")");
			if (layeredDataFiles.size() == mergeRound) {
				LinkedList> dataFiles = new LinkedList<>();
				layeredDataFiles.add(dataFiles);
			}
			layeredDataFiles.get(mergeRound).addLast(dataFileInfo);
		}
	}

	@Override
	public void startFinalMerge() {
		Preconditions.checkState(!isFinalMergeStarted, "Final merge has been started");
		isFinalMergeStarted = true;
		// merge all remaining data files to one list
		for (int i = 1; i < layeredDataFiles.size(); ++i) {
			layeredDataFiles.get(0).addAll(layeredDataFiles.get(i));
		}
		if (layeredDataFiles.size() > 1) {
			layeredDataFiles = layeredDataFiles.subList(0, 1);
		}
	}

	@Override
	public List> selectMergeCandidates(int numMergeReadMemory) {
		int numFileHandles = Math.min(mergeFactor, numMergeReadMemory / 2);
		ArrayList> candidates = new ArrayList<>(numFileHandles);
		if (layeredDataFiles.isEmpty()) {
			// this can happen when no spilled file is added and the final merge has been started
			Preconditions.checkState(isFinalMergeStarted, "Final merge should have been started.");
			return null;
		} else if (isFinalMergeStarted) {
			LinkedList> dataFiles = layeredDataFiles.get(0);
			Preconditions.checkArgument(numMergeReadMemory >= 4 || dataFiles.size() <= 1,
				"At least 4 read buffers is needed, but actual is " + numMergeReadMemory);
			// sort the file list so the smaller files can be picked out and got merged
			dataFiles.sort(new FileLengthComparator<>());
			if (dataFiles.size() > mergeFactor && !mergeToOneFile) {
				// merge as few files as possible
				numFileHandles = Math.min(numFileHandles, dataFiles.size() - mergeFactor + 1);
			} else if (mergeToOneFile && dataFiles.size() > 1) {
				if (dataFiles.size() <= numFileHandles) {
					// can be merged in one round
					numFileHandles = dataFiles.size();
				} else {
					// merge as few files as possible
					numFileHandles = Math.min(numFileHandles, dataFiles.size() - numFileHandles + 1);
				}
			} else {
				// there is no need to merge
				return null;
			}

			// remove and return the chosen candidates
			for (int i = 0; i < numFileHandles; ++i) {
				candidates.add(dataFiles.removeFirst());
			}
			return candidates;
		} else if (enableAsyncMerging) {
			Preconditions.checkArgument(numMergeReadMemory >= 4,
				"At least 4 read buffers is needed, but actual is " + numMergeReadMemory);
			for (LinkedList> dataFiles: layeredDataFiles) {
				if (dataFiles.size() > mergeFactor) {
					for (int i = 0; i < numFileHandles; ++i) {
						candidates.add(dataFiles.removeFirst());
					}
					return candidates;
				}
			}
		}
		return null;
	}

	@Override
	public List getFinalMergeResult() {
		Preconditions.checkState(layeredDataFiles.size() <= 1, "Illegal merge state: " + layeredDataFiles.size());
		ArrayList dataFiles = new ArrayList<>();
		if (layeredDataFiles.size() > 0) {
			for (DataFileInfo fileInfo : layeredDataFiles.get(0)) {
				dataFiles.add(fileInfo.getDataFile());
			}
		}
		return dataFiles;
	}

	/**
	 * The comparator used to sort the file candidates in ascending order of file length.
	 */
	private static class FileLengthComparator implements Comparator> {
		@Override
		public int compare(DataFileInfo file1, DataFileInfo file2) {
			if (file1.getFileLength() == file2.getFileLength()) {
				return 0;
			} else {
				return file1.getFileLength() >= file2.getFileLength() ? 1 : -1;
			}
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy