All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.table.source.DataSplit Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.table.source;

import org.dinky.shaded.paimon.data.BinaryRow;
import org.dinky.shaded.paimon.io.DataFileMeta;
import org.dinky.shaded.paimon.io.DataFileMetaSerializer;
import org.dinky.shaded.paimon.io.DataInputView;
import org.dinky.shaded.paimon.io.DataInputViewStreamWrapper;
import org.dinky.shaded.paimon.io.DataOutputView;
import org.dinky.shaded.paimon.io.DataOutputViewStreamWrapper;
import org.dinky.shaded.paimon.utils.SerializationUtils;

import java.io.IOException;
import java.io.ObjectInputStream;
import java.io.ObjectOutputStream;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalLong;

import static org.dinky.shaded.paimon.utils.Preconditions.checkArgument;

/** Input splits. Needed by most batch computation engines. */
public class DataSplit implements Split {

    private static final long serialVersionUID = 5L;

    private long snapshotId = 0;
    private boolean isStreaming = false;
    private List beforeFiles = new ArrayList<>();

    private BinaryRow partition;
    private int bucket = -1;
    private List dataFiles;

    private List rawFiles = Collections.emptyList();

    public DataSplit() {}

    public long snapshotId() {
        return snapshotId;
    }

    public BinaryRow partition() {
        return partition;
    }

    public int bucket() {
        return bucket;
    }

    public List beforeFiles() {
        return beforeFiles;
    }

    public List dataFiles() {
        return dataFiles;
    }

    public boolean isStreaming() {
        return isStreaming;
    }

    public OptionalLong getLatestFileCreationEpochMillis() {
        return this.dataFiles.stream().mapToLong(DataFileMeta::creationTimeEpochMillis).max();
    }

    @Override
    public long rowCount() {
        long rowCount = 0;
        for (DataFileMeta file : dataFiles) {
            rowCount += file.rowCount();
        }
        return rowCount;
    }

    @Override
    public Optional> convertToRawFiles() {
        if (rawFiles.isEmpty()) {
            return Optional.empty();
        } else {
            return Optional.of(rawFiles);
        }
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        DataSplit split = (DataSplit) o;
        return bucket == split.bucket
                && Objects.equals(partition, split.partition)
                && Objects.equals(beforeFiles, split.beforeFiles)
                && Objects.equals(dataFiles, split.dataFiles)
                && isStreaming == split.isStreaming
                && Objects.equals(rawFiles, split.rawFiles);
    }

    @Override
    public int hashCode() {
        return Objects.hash(partition, bucket, beforeFiles, dataFiles, isStreaming, rawFiles);
    }

    private void writeObject(ObjectOutputStream out) throws IOException {
        serialize(new DataOutputViewStreamWrapper(out));
    }

    private void readObject(ObjectInputStream in) throws IOException, ClassNotFoundException {
        assign(deserialize(new DataInputViewStreamWrapper(in)));
    }

    private void assign(DataSplit other) {
        this.snapshotId = other.snapshotId;
        this.partition = other.partition;
        this.bucket = other.bucket;
        this.beforeFiles = other.beforeFiles;
        this.dataFiles = other.dataFiles;
        this.isStreaming = other.isStreaming;
        this.rawFiles = other.rawFiles;
    }

    public void serialize(DataOutputView out) throws IOException {
        out.writeLong(snapshotId);
        SerializationUtils.serializeBinaryRow(partition, out);
        out.writeInt(bucket);

        DataFileMetaSerializer dataFileSer = new DataFileMetaSerializer();
        out.writeInt(beforeFiles.size());
        for (DataFileMeta file : beforeFiles) {
            dataFileSer.serialize(file, out);
        }

        out.writeInt(dataFiles.size());
        for (DataFileMeta file : dataFiles) {
            dataFileSer.serialize(file, out);
        }

        out.writeBoolean(isStreaming);

        out.writeInt(rawFiles.size());
        for (RawFile rawFile : rawFiles) {
            rawFile.serialize(out);
        }
    }

    public static DataSplit deserialize(DataInputView in) throws IOException {
        long snapshotId = in.readLong();
        BinaryRow partition = SerializationUtils.deserializeBinaryRow(in);
        int bucket = in.readInt();

        DataFileMetaSerializer dataFileSer = new DataFileMetaSerializer();
        int beforeNumber = in.readInt();
        List beforeFiles = new ArrayList<>(beforeNumber);
        for (int i = 0; i < beforeNumber; i++) {
            beforeFiles.add(dataFileSer.deserialize(in));
        }

        int fileNumber = in.readInt();
        List dataFiles = new ArrayList<>(fileNumber);
        for (int i = 0; i < fileNumber; i++) {
            dataFiles.add(dataFileSer.deserialize(in));
        }

        boolean isStreaming = in.readBoolean();

        int rawFileNum = in.readInt();
        List rawFiles = new ArrayList<>();
        for (int i = 0; i < rawFileNum; i++) {
            rawFiles.add(RawFile.deserialize(in));
        }

        return builder()
                .withSnapshot(snapshotId)
                .withPartition(partition)
                .withBucket(bucket)
                .withBeforeFiles(beforeFiles)
                .withDataFiles(dataFiles)
                .isStreaming(isStreaming)
                .rawFiles(rawFiles)
                .build();
    }

    public static Builder builder() {
        return new Builder();
    }

    /** Builder for {@link DataSplit}. */
    public static class Builder {

        private final DataSplit split = new DataSplit();

        public Builder withSnapshot(long snapshot) {
            this.split.snapshotId = snapshot;
            return this;
        }

        public Builder withPartition(BinaryRow partition) {
            this.split.partition = partition;
            return this;
        }

        public Builder withBucket(int bucket) {
            this.split.bucket = bucket;
            return this;
        }

        public Builder withBeforeFiles(List beforeFiles) {
            this.split.beforeFiles = beforeFiles;
            return this;
        }

        public Builder withDataFiles(List dataFiles) {
            this.split.dataFiles = dataFiles;
            return this;
        }

        public Builder isStreaming(boolean isStreaming) {
            this.split.isStreaming = isStreaming;
            return this;
        }

        public Builder rawFiles(List rawFiles) {
            this.split.rawFiles = rawFiles;
            return this;
        }

        public DataSplit build() {
            checkArgument(split.partition != null);
            checkArgument(split.bucket != -1);
            checkArgument(split.dataFiles != null);

            DataSplit split = new DataSplit();
            split.assign(this.split);
            return split;
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy