All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.connector.pulsar.source.util.SerdeUtils Maven / Gradle / Ivy

There is a newer version: 1.12.0
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.connector.pulsar.source.util;

import org.apache.flink.api.connector.source.SourceSplit;
import org.apache.flink.core.io.SimpleVersionedSerializer;

import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import java.util.function.Function;

/**
 * A util class with some helper method for serde in the sources.
 */
public class SerdeUtils {

    /**
     * Private constructor for util class.
     */
    private SerdeUtils() {
    }

    /**
     * Serialize a mapping from subtask ids to lists of assigned splits.
     * The serialized format is following:
     * 
     * 4 bytes - number of subtasks
     * 4 bytes - split serializer version
     * N bytes - [assignment_for_subtask]
     * 		4 bytes - subtask id
     * 		4 bytes - number of assigned splits
     * 		N bytes - [assigned_splits]
     * 			4 bytes - serialized split length
     * 			N bytes - serialized splits
     * 
* * @param splitAssignments a mapping from subtask ids to lists of assigned splits. * @param splitSerializer the serializer of the split. * @param the type of the splits. * @param the type of the collection to hold the assigned splits for a subtask. * @return the serialized bytes of the given subtask to splits assignment mapping. * @throws IOException when serialization failed. */ public static > byte[] serializeSplitAssignments( Map splitAssignments, SimpleVersionedSerializer splitSerializer) throws IOException { try ( ByteArrayOutputStream baos = new ByteArrayOutputStream(); DataOutputStream out = new DataOutputStream(baos)) { out.writeInt(splitAssignments.size()); // Split serializer version. out.writeInt(splitSerializer.getVersion()); // Write assignments for subtasks. for (Map.Entry entry : splitAssignments.entrySet()) { // Subtask ID int subtaskId = entry.getKey(); Collection splitsForSubtask = entry.getValue(); // Number of the splits. out.writeInt(subtaskId); out.writeInt(splitsForSubtask.size()); for (SplitT split : splitsForSubtask) { byte[] serializedSplit = splitSerializer.serialize(split); out.writeInt(serializedSplit.length); out.write(serializedSplit); } } return baos.toByteArray(); } } /** * Deserialize the given bytes returned by {@link #serializeSplitAssignments(Map, SimpleVersionedSerializer)}. * * @param serialized the serialized bytes returned by * {@link #serializeSplitAssignments(Map, SimpleVersionedSerializer)}. * @param splitSerializer the split serializer for the splits. * @param collectionSupplier the supplier for the {@link Collection} instance to hold the assigned splits for a * subtask. * @param the type of the splits. * @param the type of the collection to hold the assigned splits for a subtask. * @return A mapping from subtask id to its assigned splits. * @throws IOException when deserialization failed. */ public static > Map deserializeSplitAssignments( byte[] serialized, SimpleVersionedSerializer splitSerializer, Function collectionSupplier) throws IOException { try ( ByteArrayInputStream bais = new ByteArrayInputStream(serialized); DataInputStream in = new DataInputStream(bais)) { int numSubtasks = in.readInt(); Map splitsAssignments = new HashMap<>(numSubtasks); int serializerVersion = in.readInt(); for (int i = 0; i < numSubtasks; i++) { int subtaskId = in.readInt(); int numAssignedSplits = in.readInt(); C assignedSplits = collectionSupplier.apply(numAssignedSplits); for (int j = 0; j < numAssignedSplits; j++) { int serializedSplitSize = in.readInt(); byte[] serializedSplit = new byte[serializedSplitSize]; in.readFully(serializedSplit); SplitT split = splitSerializer.deserialize(serializerVersion, serializedSplit); assignedSplits.add(split); } splitsAssignments.put(subtaskId, assignedSplits); } return splitsAssignments; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy