All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.source.internal.state.DeltaPendingSplitsCheckpointSerializer Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
package io.delta.flink.source.internal.state;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;

import io.delta.flink.source.internal.utils.SourceUtils;
import org.apache.flink.api.connector.source.SourceSplit;
import org.apache.flink.api.connector.source.SplitEnumerator;
import org.apache.flink.connector.file.src.PendingSplitsCheckpoint;
import org.apache.flink.connector.file.src.PendingSplitsCheckpointSerializer;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import org.apache.flink.core.memory.DataInputViewStreamWrapper;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import static org.apache.flink.util.Preconditions.checkArgument;

/**
 * 

A de/serializer for objects of class {@link DeltaEnumeratorStateCheckpoint}. * *

This class provides methods for Flink core to serialize and deserialize {@code * DeltaPendingSplitsCheckpointSerializer} objects. * *

Serialization of {@code DeltaPendingSplitsCheckpointSerializer} object takes place during * checkpoint operation. * *

Deserialization of {@code DeltaPendingSplitsCheckpointSerializer} object takes place during * recovering from checkpoint when {@link SplitEnumerator} is being recreated. */ public class DeltaPendingSplitsCheckpointSerializer implements SimpleVersionedSerializer> { /** * The version of the serialization schema. *

* The {@link org.apache.flink.runtime.source.coordinator.SourceCoordinator} adds the version * number to {@link SplitEnumerator} checkpoint data. *

* During recovery from checkpoint, this value is deserialize and used as a version argument of * {@link DeltaPendingSplitsCheckpointSerializer#deserialize(int, byte[])} method. *

* It can be used to choose proper deserialization schema. */ private static final int VERSION = 1; /** * A de/serializer for {@link org.apache.flink.connector.file.src.FileSourceSplit} that {@link * DeltaSourceSplit} extends. It handles de/serialization all fields inherited from {@code * FileSourceSplit} */ private final PendingSplitsCheckpointSerializer decoratedSerDe; /** * Creates DeltaPendingSplitsCheckpointSerializer with given Split De/Serializer. * * @param splitSerDe A serializer for {@link SourceSplit} since {@link SplitEnumerator} state * checkpoint has to serialize unsigned splits. */ public DeltaPendingSplitsCheckpointSerializer( SimpleVersionedSerializer splitSerDe) { this.decoratedSerDe = new PendingSplitsCheckpointSerializer<>(splitSerDe); } @Override public int getVersion() { return VERSION; } @Override public byte[] serialize(DeltaEnumeratorStateCheckpoint state) throws IOException { checkArgument( state.getClass() == DeltaEnumeratorStateCheckpoint.class, "Only supports %s", DeltaEnumeratorStateCheckpoint.class.getName()); PendingSplitsCheckpoint decoratedCheckPoint = state.getPendingSplitsCheckpoint(); byte[] decoratedBytes = decoratedSerDe.serialize(decoratedCheckPoint); ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream(); try (DataOutputViewStreamWrapper outputWrapper = new DataOutputViewStreamWrapper(byteArrayOutputStream)) { outputWrapper.writeInt(decoratedBytes.length); outputWrapper.write(decoratedBytes); outputWrapper.writeLong(state.getSnapshotVersion()); outputWrapper.writeBoolean(state.isMonitoringForChanges()); final byte[] serPath = SourceUtils.pathToString(state.getDeltaTablePath()) .getBytes(StandardCharsets.UTF_8); outputWrapper.writeInt(serPath.length); outputWrapper.write(serPath); } return byteArrayOutputStream.toByteArray(); } @Override public DeltaEnumeratorStateCheckpoint deserialize(int version, byte[] serialized) throws IOException { if (version == 1) { return tryDeserializeV1(serialized); } throw new IOException("Unknown version: " + version); } private DeltaEnumeratorStateCheckpoint tryDeserializeV1(byte[] serialized) throws IOException { try (DataInputViewStreamWrapper inputWrapper = new DataInputViewStreamWrapper(new ByteArrayInputStream(serialized))) { return deserializeV1(inputWrapper); } } private DeltaEnumeratorStateCheckpoint deserializeV1( DataInputViewStreamWrapper inputWrapper) throws IOException { byte[] decoratedBytes = new byte[inputWrapper.readInt()]; inputWrapper.readFully(decoratedBytes); PendingSplitsCheckpoint decoratedCheckPoint = decoratedSerDe.deserialize(decoratedSerDe.getVersion(), decoratedBytes); long snapshotVersion = inputWrapper.readLong(); boolean monitoringForChanges = inputWrapper.readBoolean(); final byte[] bytes = new byte[inputWrapper.readInt()]; inputWrapper.readFully(bytes); Path deltaTablePath = new Path(new String(bytes, StandardCharsets.UTF_8)); return new DeltaEnumeratorStateCheckpoint<>( deltaTablePath, snapshotVersion, monitoringForChanges, decoratedCheckPoint); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy