org.apache.hudi.common.data.HoodiePairData Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.common.data;
import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.Pair;
import java.io.Serializable;
import java.util.Map;
/**
* An abstraction for pairs of key in type K and value in type V to store the reference
* and do transformation.
*
* @param type of key.
* @param type of value.
*/
public abstract class HoodiePairData implements Serializable {
/**
* @return the collection of pairs.
*/
public abstract Object get();
/**
* Caches the data.
*
* @param cacheConfig config value for caching.
*/
public abstract void persist(String cacheConfig);
/**
* Removes the cached data.
*/
public abstract void unpersist();
/**
* @return all keys in {@link HoodieData}.
*/
public abstract HoodieData keys();
/**
* @return all values in {@link HoodieData}.
*/
public abstract HoodieData values();
/**
* @return the number of pairs.
*/
public abstract long count();
/**
* @return the number of pairs per key in a {@link Map}.
*/
public abstract Map countByKey();
/**
* @param func serializable map function.
* @param output object type.
* @return {@link HoodieData} containing the result. Actual execution may be deferred.
*/
public abstract HoodieData map(SerializableFunction, O> func);
/**
* @param mapToPairFunc serializable map function to generate another pair.
* @param new key type.
* @param new value type.
* @return {@link HoodiePairData} containing the result. Actual execution may be deferred.
*/
public abstract HoodiePairData mapToPair(
SerializablePairFunction, L, W> mapToPairFunc);
/**
* Performs a left outer join of this and other. For each element (k, v) in this,
* the resulting HoodiePairData will either contain all pairs (k, (v, Some(w))) for w in other,
* or the pair (k, (v, None)) if no elements in other have key k.
*
* @param other the other {@link HoodiePairData}
* @param value type of the other {@link HoodiePairData}
* @return {@link HoodiePairData>>} containing the left outer join result.
* Actual execution may be deferred.
*/
public abstract HoodiePairData>> leftOuterJoin(HoodiePairData other);
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy