org.apache.flink.connector.pulsar.source.util.SerdeUtils Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.connector.pulsar.source.util;
import org.apache.flink.api.connector.source.SourceSplit;
import org.apache.flink.core.io.SimpleVersionedSerializer;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;
/**
* A util class with some helper method for serde in the sources.
*/
public class SerdeUtils {
/**
* Private constructor for util class.
*/
private SerdeUtils() {
}
/**
* Serialize a mapping from subtask ids to lists of assigned splits.
* The serialized format is following:
*
* 4 bytes - number of subtasks
* 4 bytes - split serializer version
* N bytes - [assignment_for_subtask]
* 4 bytes - subtask id
* 4 bytes - number of assigned splits
* N bytes - [assigned_splits]
* 4 bytes - serialized split length
* N bytes - serialized splits
*
*
* @param splitAssignments a mapping from subtask ids to lists of assigned splits.
* @param splitSerializer the serializer of the split.
* @param the type of the splits.
* @param the type of the collection to hold the assigned splits for a subtask.
* @return the serialized bytes of the given subtask to splits assignment mapping.
* @throws IOException when serialization failed.
*/
public static > byte[] serializeSplitAssignments(
Map splitAssignments,
SimpleVersionedSerializer splitSerializer) throws IOException {
try (
ByteArrayOutputStream baos = new ByteArrayOutputStream();
DataOutputStream out = new DataOutputStream(baos)) {
out.writeInt(splitAssignments.size());
// Split serializer version.
out.writeInt(splitSerializer.getVersion());
// Write assignments for subtasks.
for (Map.Entry entry : splitAssignments.entrySet()) {
// Subtask ID
int subtaskId = entry.getKey();
Collection splitsForSubtask = entry.getValue();
// Number of the splits.
out.writeInt(subtaskId);
out.writeInt(splitsForSubtask.size());
for (SplitT split : splitsForSubtask) {
byte[] serializedSplit = splitSerializer.serialize(split);
out.writeInt(serializedSplit.length);
out.write(serializedSplit);
}
}
return baos.toByteArray();
}
}
/**
* Deserialize the given bytes returned by {@link #serializeSplitAssignments(Map, SimpleVersionedSerializer)}.
*
* @param serialized the serialized bytes returned by
* {@link #serializeSplitAssignments(Map, SimpleVersionedSerializer)}.
* @param splitSerializer the split serializer for the splits.
* @param collectionSupplier the supplier for the {@link Collection} instance to hold the assigned splits for a
* subtask.
* @param the type of the splits.
* @param the type of the collection to hold the assigned splits for a subtask.
* @return A mapping from subtask id to its assigned splits.
* @throws IOException when deserialization failed.
*/
public static > Map deserializeSplitAssignments(
byte[] serialized,
SimpleVersionedSerializer splitSerializer,
Function collectionSupplier) throws IOException {
try (
ByteArrayInputStream bais = new ByteArrayInputStream(serialized);
DataInputStream in = new DataInputStream(bais)) {
int numSubtasks = in.readInt();
Map splitsAssignments = new HashMap<>(numSubtasks);
int serializerVersion = in.readInt();
for (int i = 0; i < numSubtasks; i++) {
int subtaskId = in.readInt();
int numAssignedSplits = in.readInt();
C assignedSplits = collectionSupplier.apply(numAssignedSplits);
for (int j = 0; j < numAssignedSplits; j++) {
int serializedSplitSize = in.readInt();
byte[] serializedSplit = new byte[serializedSplitSize];
in.readFully(serializedSplit);
SplitT split = splitSerializer.deserialize(serializerVersion, serializedSplit);
assignedSplits.add(split);
}
splitsAssignments.put(subtaskId, assignedSplits);
}
return splitsAssignments;
}
}
}