All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.beam.sdk.extensions.smb.AvroBucketMetadata Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2019 Spotify AB.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.beam.sdk.extensions.smb;

import static com.google.common.base.Verify.verify;
import static com.google.common.base.Verify.verifyNotNull;
import static org.apache.beam.sdk.coders.Coder.NonDeterministicException;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nullable;
import org.apache.avro.Schema;
import org.apache.avro.generic.IndexedRecord;
import org.apache.beam.sdk.coders.CannotProvideCoderException;
import org.apache.beam.sdk.coders.Coder;
import org.apache.beam.sdk.transforms.display.DisplayData;
import org.apache.beam.sdk.transforms.display.DisplayData.Builder;
import org.apache.beam.vendor.guava.v32_1_2_jre.com.google.common.collect.ImmutableSet;

/**
 * {@link org.apache.beam.sdk.extensions.smb.BucketMetadata} for Avro {@link IndexedRecord} records.
 */
public class AvroBucketMetadata extends BucketMetadata {

  @JsonProperty private final String keyField;

  @JsonProperty
  @JsonInclude(JsonInclude.Include.NON_NULL)
  private final String keyFieldSecondary;

  @JsonIgnore private final AtomicReference keyPath = new AtomicReference<>();
  @JsonIgnore private final AtomicReference keyPathSecondary = new AtomicReference<>();

  public AvroBucketMetadata(
      int numBuckets,
      int numShards,
      Class keyClassPrimary,
      String keyField,
      Class keyClassSecondary,
      String keyFieldSecondary,
      HashType hashType,
      String filenamePrefix,
      Schema schema)
      throws CannotProvideCoderException, NonDeterministicException {
    this(
        BucketMetadata.CURRENT_VERSION,
        numBuckets,
        numShards,
        keyClassPrimary,
        AvroUtils.validateKeyField(keyField, keyClassPrimary, schema),
        keyClassSecondary,
        keyFieldSecondary == null
            ? null
            : AvroUtils.validateKeyField(keyFieldSecondary, keyClassSecondary, schema),
        hashType,
        filenamePrefix);
  }

  AvroBucketMetadata(
      int version,
      int numBuckets,
      int numShards,
      Class keyClassPrimary,
      String keyField,
      Class keyClassSecondary,
      String keyFieldSecondary,
      HashType hashType,
      String filenamePrefix)
      throws CannotProvideCoderException, NonDeterministicException {
    this(
        version,
        numBuckets,
        numShards,
        keyClassPrimary,
        keyField,
        keyClassSecondary,
        keyFieldSecondary,
        BucketMetadata.serializeHashType(hashType),
        filenamePrefix);
  }

  @JsonCreator
  AvroBucketMetadata(
      @JsonProperty("version") int version,
      @JsonProperty("numBuckets") int numBuckets,
      @JsonProperty("numShards") int numShards,
      @JsonProperty("keyClass") Class keyClassPrimary,
      @JsonProperty("keyField") String keyField,
      @Nullable @JsonProperty("keyClassSecondary") Class keyClassSecondary,
      @Nullable @JsonProperty("keyFieldSecondary") String keyFieldSecondary,
      @JsonProperty("hashType") String hashType,
      @JsonProperty(value = "filenamePrefix", required = false) String filenamePrefix)
      throws CannotProvideCoderException, NonDeterministicException {
    super(
        version,
        numBuckets,
        numShards,
        keyClassPrimary,
        keyClassSecondary,
        hashType,
        filenamePrefix);
    verify(
        (keyClassSecondary != null && keyFieldSecondary != null)
            || (keyClassSecondary == null && keyFieldSecondary == null));
    this.keyField = keyField;
    this.keyFieldSecondary = keyFieldSecondary;
  }

  @Override
  public Map, Coder> coderOverrides() {
    return AvroUtils.coderOverrides();
  }

  @Override
  int hashPrimaryKeyMetadata() {
    return Objects.hash(keyField, AvroUtils.castToComparableStringClass(getKeyClass()));
  }

  @Override
  int hashSecondaryKeyMetadata() {
    return Objects.hash(
        keyFieldSecondary, AvroUtils.castToComparableStringClass(getKeyClassSecondary()));
  }

  @Override
  public Set> compatibleMetadataTypes() {
    return ImmutableSet.of(ParquetBucketMetadata.class);
  }

  @Override
  public K1 extractKeyPrimary(V value) {
    int[] path = keyPath.get();
    if (path == null) {
      path = AvroUtils.toKeyPath(keyField, getKeyClass(), value.getSchema());
      keyPath.compareAndSet(null, path);
    }
    return extractKey(getKeyClass(), path, value);
  }

  @Override
  public K2 extractKeySecondary(V value) {
    verifyNotNull(keyFieldSecondary);
    verifyNotNull(getKeyClassSecondary());
    int[] path = keyPathSecondary.get();
    if (path == null) {
      path = AvroUtils.toKeyPath(keyFieldSecondary, getKeyClassSecondary(), value.getSchema());
      keyPathSecondary.compareAndSet(null, path);
    }
    return extractKey(getKeyClassSecondary(), path, value);
  }

  static  K extractKey(Class keyClazz, int[] keyPath, IndexedRecord value) {
    IndexedRecord node = value;
    for (int i = 0; i < keyPath.length - 1; i++) {
      node = (IndexedRecord) node.get(keyPath[i]);
    }
    Object keyObj = node.get(keyPath[keyPath.length - 1]);
    // Always convert CharSequence to String, in case reader and writer disagree
    if (keyObj != null && (keyClazz == CharSequence.class || keyClazz == String.class)) {
      keyObj = keyObj.toString();
    }
    @SuppressWarnings("unchecked")
    K key = (K) keyObj;
    return key;
  }

  @Override
  public void populateDisplayData(Builder builder) {
    super.populateDisplayData(builder);
    builder.add(DisplayData.item("keyFieldPrimary", keyField));
    if (keyFieldSecondary != null)
      builder.add(DisplayData.item("keyFieldSecondary", keyFieldSecondary));
  }

  @Override
   boolean keyClassMatches(Class requestedReadType) {
    return super.keyClassMatches(requestedReadType)
        || AvroUtils.castToComparableStringClass(getKeyClass()) == requestedReadType
        || AvroUtils.castToComparableStringClass(requestedReadType) == getKeyClass();
  }

  @Override
   boolean keyClassSecondaryMatches(Class requestedReadType) {
    return super.keyClassSecondaryMatches(requestedReadType)
        || AvroUtils.castToComparableStringClass(getKeyClassSecondary()) == requestedReadType
        || AvroUtils.castToComparableStringClass(requestedReadType) == getKeyClassSecondary();
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy