org.apache.flink.table.filesystem.stream.compact.CompactCoordinator Maven / Gradle / Ivy
Show all versions of flink-table-runtime-blink_2.12 Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.table.filesystem.stream.compact;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeutils.base.ListSerializer;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.api.common.typeutils.base.MapSerializer;
import org.apache.flink.api.common.typeutils.base.StringSerializer;
import org.apache.flink.api.java.typeutils.runtime.kryo.KryoSerializer;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.state.StateInitializationContext;
import org.apache.flink.runtime.state.StateSnapshotContext;
import org.apache.flink.streaming.api.operators.AbstractStreamOperator;
import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
import org.apache.flink.table.filesystem.stream.TaskTracker;
import org.apache.flink.table.filesystem.stream.compact.CompactMessages.CompactionUnit;
import org.apache.flink.table.filesystem.stream.compact.CompactMessages.CoordinatorInput;
import org.apache.flink.table.filesystem.stream.compact.CompactMessages.CoordinatorOutput;
import org.apache.flink.table.filesystem.stream.compact.CompactMessages.EndCheckpoint;
import org.apache.flink.table.filesystem.stream.compact.CompactMessages.EndCompaction;
import org.apache.flink.table.filesystem.stream.compact.CompactMessages.InputFile;
import org.apache.flink.table.runtime.util.BinPacking;
import org.apache.flink.util.function.SupplierWithException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;
import java.util.function.Function;
/**
* This is the single (non-parallel) monitoring task which coordinate input files to compaction
* units. - Receives in-flight input files inside checkpoint. - Receives all upstream end input
* messages after the checkpoint completes successfully, starts coordination.
*
* {@link CompactionUnit} and {@link EndCompaction} must be sent to the downstream in an orderly
* manner, while {@link EndCompaction} is broadcast emitting, so unit and endCompaction use the
* broadcast emitting mechanism together. Since unit is broadcast, we want it to be processed by a
* single task, so we carry the ID in the unit and let the downstream task select its own unit.
*
*
NOTE: The coordination is a stable algorithm, which can ensure that the downstream can perform
* compaction at any time without worrying about fail over.
*
*
STATE: This operator stores input files in state, after the checkpoint completes successfully,
* input files are taken out from the state for coordination.
*/
public class CompactCoordinator extends AbstractStreamOperator
implements OneInputStreamOperator {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(CompactCoordinator.class);
private final SupplierWithException fsFactory;
private final long targetFileSize;
private transient FileSystem fileSystem;
private transient ListState