All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.inlong.sort.redis.sink.AbstractRedisSinkFunction Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.inlong.sort.redis.sink;
import org.apache.inlong.sort.base.metric.MetricOption;
import org.apache.inlong.sort.base.metric.MetricState;
import org.apache.inlong.sort.base.metric.SinkMetricData;
import org.apache.inlong.sort.base.util.MetricStateUtils;
import org.apache.inlong.sort.redis.common.container.InlongRedisCommandsContainer;
import org.apache.inlong.sort.redis.common.container.RedisCommandsContainerBuilder;
import org.apache.inlong.sort.redis.common.schema.StateEncoder;
import org.apache.commons.lang3.time.StopWatch;
import org.apache.flink.api.common.serialization.SerializationSchema;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.runtime.state.FunctionInitializationContext;
import org.apache.flink.runtime.state.FunctionSnapshotContext;
import org.apache.flink.streaming.api.checkpoint.CheckpointedFunction;
import org.apache.flink.streaming.api.functions.sink.RichSinkFunction;
import org.apache.flink.streaming.connectors.redis.common.config.FlinkJedisConfigBase;
import org.apache.flink.table.data.RowData;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.concurrent.GuardedBy;
import java.time.Duration;
import java.util.ArrayList;
import java.util.List;
import java.util.Optional;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.inlong.sort.base.Constants.*;
/**
* The Flink Redis Producer.
*/
public abstract class AbstractRedisSinkFunction
extends
RichSinkFunction
implements
CheckpointedFunction {
private static final long serialVersionUID = 1L;
private static final Logger LOG = LoggerFactory.getLogger(AbstractRedisSinkFunction.class);
/**
* The output type info.
*/
private final TypeInformation outputType;
/**
* The serializer for values.
*/
protected final SerializationSchema serializationSchema;
protected final FlinkJedisConfigBase flinkJedisConfigBase;
/**
* The redis record expired time.
*/
protected transient Integer expireTime;
/**
* The flag indicating whether the main thread need flush.
*/
private transient boolean forceFlush;
private ListState listState;
private transient Object lock;
private final long batchSize;
private final long flushIntervalInMillis;
private static final String DEFAULT_OUTPUT_FLUSH_THREAD_NAME = "OutputFlusher";
private final List rows;
/**
* The container for all available Redis commands.
*/
protected InlongRedisCommandsContainer redisCommandsContainer;
/**
* The stop watch to measure time duration.
*/
@GuardedBy("lock")
protected transient StopWatch stopWatch;
protected StateEncoder stateEncoder;
private final String auditHostAndPorts;
private final String inLongMetric;
private transient MetricState metricState;
private transient ListState metricStateListState;
private SinkMetricData sinkMetricData;
public AbstractRedisSinkFunction(
TypeInformation outputType,
SerializationSchema serializationSchema,
StateEncoder stateEncoder,
long batchSize,
Duration flushInterval,
Duration configuration,
FlinkJedisConfigBase flinkJedisConfigBase,
String inLongMetric,
String auditHostAndPorts) {
checkNotNull(configuration, "The configuration must not be null.");
this.stateEncoder = stateEncoder;
this.outputType = outputType;
this.serializationSchema = serializationSchema;
this.batchSize = batchSize;
this.flushIntervalInMillis = flushInterval.toMillis();
this.forceFlush = false;
this.rows = new ArrayList<>();
this.flinkJedisConfigBase = flinkJedisConfigBase;
this.inLongMetric = inLongMetric;
this.auditHostAndPorts = auditHostAndPorts;
}
@Override
public void open(Configuration parameters) {
LOG.info("Opening redis sink with address");
lock = new Object();
stopWatch = new StopWatch();
try {
this.redisCommandsContainer = RedisCommandsContainerBuilder.build(this.flinkJedisConfigBase);
this.redisCommandsContainer.open();
} catch (Exception e) {
LOG.error("Redis has not been properly initialized: ", e);
throw new RuntimeException(e);
}
Optional outputFlusher;
if (this.batchSize == 1 || this.flushIntervalInMillis == 0) {
LOG.info("Flush records immediately.");
outputFlusher = Optional.empty();
} else {
String threadName = DEFAULT_OUTPUT_FLUSH_THREAD_NAME + " for "
+ getRuntimeContext().getTaskNameWithSubtasks();
outputFlusher = Optional.of(new OutputFlusher(threadName, flushIntervalInMillis));
outputFlusher.get().start();
}
MetricOption metricOption = MetricOption.builder()
.withInlongLabels(inLongMetric)
.withAuditAddress(auditHostAndPorts)
.withInitRecords(metricState != null ? metricState.getMetricValue(NUM_RECORDS_OUT) : 0L)
.withInitBytes(metricState != null ? metricState.getMetricValue(NUM_BYTES_OUT) : 0L)
.withInitDirtyRecords(metricState != null ? metricState.getMetricValue(DIRTY_RECORDS_OUT) : 0L)
.withInitDirtyBytes(metricState != null ? metricState.getMetricValue(DIRTY_BYTES_OUT) : 0L)
.withRegisterMetric(MetricOption.RegisteredMetric.ALL)
.build();
if (metricOption != null) {
sinkMetricData = new SinkMetricData(metricOption, getRuntimeContext().getMetricGroup());
}
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
if (this.inLongMetric != null) {
this.metricStateListState = context.getOperatorStateStore().getUnionListState(
new ListStateDescriptor<>(
INLONG_METRIC_STATE_NAME, TypeInformation.of(new TypeHint() {
})));
}
if (context.isRestored()) {
metricState = MetricStateUtils.restoreMetricState(metricStateListState,
getRuntimeContext().getIndexOfThisSubtask(), getRuntimeContext().getNumberOfParallelSubtasks());
}
final ListStateDescriptor stateDescriptor = new ListStateDescriptor<>(
"rowState", outputType);
this.listState = context.getOperatorStateStore().getListState(stateDescriptor);
if (context.isRestored()) {
if (listState != null) {
listState.get().forEach(rows::add);
}
}
}
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
LOG.info("redis start snapshotState, id: {}", functionSnapshotContext.getCheckpointId());
synchronized (lock) {
listState.clear();
listState.addAll(rows);
}
if (sinkMetricData != null && metricStateListState != null) {
MetricStateUtils.snapshotMetricStateForSinkMetricData(metricStateListState, sinkMetricData,
getRuntimeContext().getIndexOfThisSubtask());
}
LOG.info("redis end snapshotState, id: {}", functionSnapshotContext.getCheckpointId());
}
protected List serialize(RowData in) {
try {
return stateEncoder.serialize(in, serializationSchema);
} catch (Exception e) {
LOG.error("Error when serializing data: " + in);
throw new RuntimeException(e);
}
}
public void invoke(RowData in, Context context) {
List redisOutputs = serialize(in);
sendMetrics(in.toString().getBytes());
synchronized (lock) {
rows.addAll(redisOutputs);
if (forceFlush || rows.size() >= batchSize) {
flush();
}
}
}
@Override
public void close() throws Exception {
closeClient();
super.close();
LOG.info("Closed redis sink.");
}
private void closeClient() {
synchronized (lock) {
if (redisCommandsContainer != null) {
flush();
try {
redisCommandsContainer.close();
redisCommandsContainer = null;
} catch (Throwable t) {
LOG.warn("Could not properly close the redis client.", t);
}
}
}
}
private class OutputFlusher extends Thread {
private final long timeoutInMillis;
private volatile boolean running = true;
OutputFlusher(String name, long timeoutInMillis) {
super(name);
setDaemon(true);
this.timeoutInMillis = timeoutInMillis;
}
public void terminate() {
running = false;
interrupt();
}
@Override
public void run() {
while (running) {
try {
try {
Thread.sleep(timeoutInMillis);
} catch (InterruptedException e) {
if (running) {
throw new Exception(e);
}
}
if (rows.size() > 0) {
flush();
}
} catch (Throwable t) {
LOG.error("An exception happened while flushing the outputs", t);
// There is no need to handle exceptions in asynchronous threads.
// When the number of rows exceeds the batchSize, it will fail directly in the next write.
// But there is a possibility of data delay.
forceFlush = true;
LOG.error("Set the forceFlush to true, it will retry in the main thread.");
}
}
}
}
protected abstract void flushInternal(List rows);
private void flush() {
synchronized (lock) {
try {
if (rows != null && rows.size() > 0) {
LOG.debug("Flushing {} records to redis...", rows.size());
flushInternal(rows);
LOG.debug("Flushed {} records to redis...", rows.size());
rows.clear();
}
} finally {
forceFlush = false;
}
}
}
protected void sendMetrics(byte[] document) {
if (sinkMetricData != null) {
sinkMetricData.invoke(1, document.length);
}
}
}