org.apache.hudi.common.data.HoodieMapPair Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.hudi.common.data;
import org.apache.hudi.common.function.FunctionWrapper;
import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.ImmutablePair;
import org.apache.hudi.common.util.collection.Pair;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import static org.apache.hudi.common.function.FunctionWrapper.throwingMapWrapper;
/**
* Implementation of {@link HoodiePairData} using Java {@link Map}.
* The pairs are organized by the key in the Map and values for the same key
* are stored in a list as the value corresponding to the key in the Map.
*
* @param type of key.
* @param type of value.
*/
public class HoodieMapPair extends HoodiePairData {
private final Map> mapPairData;
private HoodieMapPair(Map> mapPairData) {
this.mapPairData = mapPairData;
}
/**
* @param mapPairData a {@link Map} of pairs.
* @param type of key.
* @param type of value.
* @return a new instance containing the {@link Map>} reference.
*/
public static HoodieMapPair of(Map> mapPairData) {
return new HoodieMapPair<>(mapPairData);
}
/**
* @param hoodiePairData {@link HoodieMapPair } instance containing the {@link Map} of pairs.
* @param type of key.
* @param type of value.
* @return the {@link Map} of pairs.
*/
public static Map> getMapPair(HoodiePairData hoodiePairData) {
return ((HoodieMapPair) hoodiePairData).get();
}
@Override
public Map> get() {
return mapPairData;
}
@Override
public void persist(String cacheConfig) {
// No OP
}
@Override
public void unpersist() {
// No OP
}
@Override
public HoodieData keys() {
return HoodieList.of(new ArrayList<>(mapPairData.keySet()));
}
@Override
public HoodieData values() {
return HoodieList.of(
mapPairData.values().stream().flatMap(List::stream).collect(Collectors.toList()));
}
@Override
public long count() {
return mapPairData.values().stream().map(
list -> (long) list.size()).reduce(Long::sum).orElse(0L);
}
@Override
public Map countByKey() {
return mapPairData.entrySet().stream().collect(
Collectors.toMap(Map.Entry::getKey, entry -> (long) entry.getValue().size()));
}
@Override
public HoodieData map(SerializableFunction, O> func) {
Function, O> throwableFunc = throwingMapWrapper(func);
return HoodieList.of(
streamAllPairs().map(throwableFunc).collect(Collectors.toList()));
}
@Override
public HoodiePairData mapToPair(SerializablePairFunction, L, W> mapToPairFunc) {
Map> newMap = new HashMap<>();
Function, Pair> throwableMapToPairFunc =
FunctionWrapper.throwingMapToPairWrapper(mapToPairFunc);
streamAllPairs().map(pair -> throwableMapToPairFunc.apply(pair)).forEach(newPair -> {
List list = newMap.computeIfAbsent(newPair.getKey(), k -> new ArrayList<>());
list.add(newPair.getValue());
});
return HoodieMapPair.of(newMap);
}
@Override
public HoodiePairData>> leftOuterJoin(HoodiePairData other) {
Map> otherMapPairData = HoodieMapPair.getMapPair(other);
Stream>>>> pairs = streamAllPairs()
.map(pair -> new ImmutablePair<>(pair.getKey(), new ImmutablePair<>(
pair.getValue(), Option.ofNullable(otherMapPairData.get(pair.getKey())))));
Map>>> resultMap = new HashMap<>();
pairs.forEach(pair -> {
K key = pair.getKey();
ImmutablePair>> valuePair = pair.getValue();
List>> resultList = resultMap.computeIfAbsent(key, k -> new ArrayList<>());
if (!valuePair.getRight().isPresent()) {
resultList.add(new ImmutablePair<>(valuePair.getLeft(), Option.empty()));
} else {
resultList.addAll(valuePair.getRight().get().stream().map(
w -> new ImmutablePair<>(valuePair.getLeft(), Option.of(w))).collect(Collectors.toList()));
}
});
return HoodieMapPair.of(resultMap);
}
private Stream> streamAllPairs() {
return mapPairData.entrySet().stream().flatMap(
entry -> entry.getValue().stream().map(e -> new ImmutablePair<>(entry.getKey(), e)));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy