All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.data.HoodiePairData Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.common.data;

import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;

import java.io.Serializable;
import java.util.Map;

/**
 * An abstraction for pairs of key in type K and value in type V to store the reference
 * and do transformation.
 *
 * @param  type of key.
 * @param  type of value.
 */
public abstract class HoodiePairData implements Serializable {
  /**
   * @return the collection of pairs.
   */
  public abstract Object get();

  /**
   * Caches the data.
   *
   * @param cacheConfig config value for caching.
   */
  public abstract void persist(String cacheConfig);

  /**
   * Removes the cached data.
   */
  public abstract void unpersist();

  /**
   * @return all keys in {@link HoodieData}.
   */
  public abstract HoodieData keys();

  /**
   * @return all values in {@link HoodieData}.
   */
  public abstract HoodieData values();

  /**
   * @return the number of pairs.
   */
  public abstract long count();

  /**
   * @return the number of pairs per key in a {@link Map}.
   */
  public abstract Map countByKey();

  /**
   * @param func serializable map function.
   * @param   output object type.
   * @return {@link HoodieData} containing the result. Actual execution may be deferred.
   */
  public abstract  HoodieData map(SerializableFunction, O> func);

  /**
   * @param mapToPairFunc serializable map function to generate another pair.
   * @param            new key type.
   * @param            new value type.
   * @return {@link HoodiePairData} containing the result. Actual execution may be deferred.
   */
  public abstract  HoodiePairData mapToPair(
      SerializablePairFunction, L, W> mapToPairFunc);

  /**
   * Performs a left outer join of this and other. For each element (k, v) in this,
   * the resulting HoodiePairData will either contain all pairs (k, (v, Some(w))) for w in other,
   * or the pair (k, (v, None)) if no elements in other have key k.
   *
   * @param other the other {@link HoodiePairData}
   * @param    value type of the other {@link HoodiePairData}
   * @return {@link HoodiePairData>>} containing the left outer join result.
   * Actual execution may be deferred.
   */
  public abstract  HoodiePairData>> leftOuterJoin(HoodiePairData other);
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy