All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.flink.sink.shuffle.AggregatedStatisticsSerializer Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.iceberg.flink.sink.shuffle;

import java.io.IOException;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import org.apache.flink.api.common.typeutils.CompositeTypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot;
import org.apache.flink.api.common.typeutils.base.EnumSerializer;
import org.apache.flink.api.common.typeutils.base.ListSerializer;
import org.apache.flink.api.common.typeutils.base.LongSerializer;
import org.apache.flink.api.common.typeutils.base.MapSerializer;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputView;
import org.apache.iceberg.SortKey;

public class AggregatedStatisticsSerializer extends TypeSerializer {
  private final TypeSerializer sortKeySerializer;
  private final EnumSerializer statisticsTypeSerializer;
  private final MapSerializer keyFrequencySerializer;
  private final ListSerializer rangeBoundsSerializer;

  AggregatedStatisticsSerializer(TypeSerializer sortKeySerializer) {
    this.sortKeySerializer = sortKeySerializer;
    this.statisticsTypeSerializer = new EnumSerializer<>(StatisticsType.class);
    this.keyFrequencySerializer = new MapSerializer<>(sortKeySerializer, LongSerializer.INSTANCE);
    this.rangeBoundsSerializer = new ListSerializer<>(sortKeySerializer);
  }

  @Override
  public boolean isImmutableType() {
    return false;
  }

  @Override
  public TypeSerializer duplicate() {
    return new AggregatedStatisticsSerializer(sortKeySerializer);
  }

  @Override
  public AggregatedStatistics createInstance() {
    return new AggregatedStatistics(0, StatisticsType.Map, Collections.emptyMap(), null);
  }

  @Override
  public AggregatedStatistics copy(AggregatedStatistics from) {
    return new AggregatedStatistics(
        from.checkpointId(), from.type(), from.keyFrequency(), from.rangeBounds());
  }

  @Override
  public AggregatedStatistics copy(AggregatedStatistics from, AggregatedStatistics reuse) {
    // no benefit of reuse
    return copy(from);
  }

  @Override
  public int getLength() {
    return -1;
  }

  @Override
  public void serialize(AggregatedStatistics record, DataOutputView target) throws IOException {
    target.writeLong(record.checkpointId());
    statisticsTypeSerializer.serialize(record.type(), target);
    if (record.type() == StatisticsType.Map) {
      keyFrequencySerializer.serialize(record.keyFrequency(), target);
    } else {
      rangeBoundsSerializer.serialize(Arrays.asList(record.rangeBounds()), target);
    }
  }

  @Override
  public AggregatedStatistics deserialize(DataInputView source) throws IOException {
    long checkpointId = source.readLong();
    StatisticsType type = statisticsTypeSerializer.deserialize(source);
    Map keyFrequency = null;
    SortKey[] rangeBounds = null;
    if (type == StatisticsType.Map) {
      keyFrequency = keyFrequencySerializer.deserialize(source);
    } else {
      List sortKeys = rangeBoundsSerializer.deserialize(source);
      rangeBounds = new SortKey[sortKeys.size()];
      rangeBounds = sortKeys.toArray(rangeBounds);
    }

    return new AggregatedStatistics(checkpointId, type, keyFrequency, rangeBounds);
  }

  @Override
  public AggregatedStatistics deserialize(AggregatedStatistics reuse, DataInputView source)
      throws IOException {
    // not much benefit to reuse
    return deserialize(source);
  }

  @Override
  public void copy(DataInputView source, DataOutputView target) throws IOException {
    serialize(deserialize(source), target);
  }

  @Override
  public boolean equals(Object obj) {
    if (!(obj instanceof AggregatedStatisticsSerializer)) {
      return false;
    }

    AggregatedStatisticsSerializer other = (AggregatedStatisticsSerializer) obj;
    return Objects.equals(sortKeySerializer, other.sortKeySerializer);
  }

  @Override
  public int hashCode() {
    return sortKeySerializer.hashCode();
  }

  @Override
  public TypeSerializerSnapshot snapshotConfiguration() {
    return new AggregatedStatisticsSerializerSnapshot(this);
  }

  public static class AggregatedStatisticsSerializerSnapshot
      extends CompositeTypeSerializerSnapshot<
          AggregatedStatistics, AggregatedStatisticsSerializer> {
    private static final int CURRENT_VERSION = 1;

    /** Constructor for read instantiation. */
    @SuppressWarnings({"unused", "checkstyle:RedundantModifier"})
    public AggregatedStatisticsSerializerSnapshot() {}

    @SuppressWarnings("checkstyle:RedundantModifier")
    public AggregatedStatisticsSerializerSnapshot(AggregatedStatisticsSerializer serializer) {
      super(serializer);
    }

    @Override
    protected int getCurrentOuterSnapshotVersion() {
      return CURRENT_VERSION;
    }

    @Override
    protected TypeSerializer[] getNestedSerializers(
        AggregatedStatisticsSerializer outerSerializer) {
      return new TypeSerializer[] {outerSerializer.sortKeySerializer};
    }

    @Override
    protected AggregatedStatisticsSerializer createOuterSerializerWithNestedSerializers(
        TypeSerializer[] nestedSerializers) {
      SortKeySerializer sortKeySerializer = (SortKeySerializer) nestedSerializers[0];
      return new AggregatedStatisticsSerializer(sortKeySerializer);
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy