All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hudi.common.data.HoodieMapPair Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.hudi.common.data;

import org.apache.hudi.common.function.FunctionWrapper;
import org.apache.hudi.common.function.SerializableFunction;
import org.apache.hudi.common.function.SerializablePairFunction;
import org.apache.hudi.common.util.Option;
import org.apache.hudi.common.util.collection.ImmutablePair;
import org.apache.hudi.common.util.collection.Pair;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.function.Function;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import static org.apache.hudi.common.function.FunctionWrapper.throwingMapWrapper;

/**
 * Implementation of {@link HoodiePairData} using Java {@link Map}.
 * The pairs are organized by the key in the Map and values for the same key
 * are stored in a list as the value corresponding to the key in the Map.
 *
 * @param  type of key.
 * @param  type of value.
 */
public class HoodieMapPair extends HoodiePairData {

  private final Map> mapPairData;

  private HoodieMapPair(Map> mapPairData) {
    this.mapPairData = mapPairData;
  }

  /**
   * @param mapPairData a {@link Map} of pairs.
   * @param          type of key.
   * @param          type of value.
   * @return a new instance containing the {@link Map>} reference.
   */
  public static  HoodieMapPair of(Map> mapPairData) {
    return new HoodieMapPair<>(mapPairData);
  }

  /**
   * @param hoodiePairData {@link HoodieMapPair } instance containing the {@link Map} of pairs.
   * @param             type of key.
   * @param             type of value.
   * @return the {@link Map} of pairs.
   */
  public static  Map> getMapPair(HoodiePairData hoodiePairData) {
    return ((HoodieMapPair) hoodiePairData).get();
  }

  @Override
  public Map> get() {
    return mapPairData;
  }

  @Override
  public void persist(String cacheConfig) {
    // No OP
  }

  @Override
  public void unpersist() {
    // No OP
  }

  @Override
  public HoodieData keys() {
    return HoodieList.of(new ArrayList<>(mapPairData.keySet()));
  }

  @Override
  public HoodieData values() {
    return HoodieList.of(
        mapPairData.values().stream().flatMap(List::stream).collect(Collectors.toList()));
  }

  @Override
  public long count() {
    return mapPairData.values().stream().map(
        list -> (long) list.size()).reduce(Long::sum).orElse(0L);
  }

  @Override
  public Map countByKey() {
    return mapPairData.entrySet().stream().collect(
        Collectors.toMap(Map.Entry::getKey, entry -> (long) entry.getValue().size()));
  }

  @Override
  public  HoodieData map(SerializableFunction, O> func) {
    Function, O> throwableFunc = throwingMapWrapper(func);
    return HoodieList.of(
        streamAllPairs().map(throwableFunc).collect(Collectors.toList()));
  }

  @Override
  public  HoodiePairData mapToPair(SerializablePairFunction, L, W> mapToPairFunc) {
    Map> newMap = new HashMap<>();
    Function, Pair> throwableMapToPairFunc =
        FunctionWrapper.throwingMapToPairWrapper(mapToPairFunc);
    streamAllPairs().map(pair -> throwableMapToPairFunc.apply(pair)).forEach(newPair -> {
      List list = newMap.computeIfAbsent(newPair.getKey(), k -> new ArrayList<>());
      list.add(newPair.getValue());
    });
    return HoodieMapPair.of(newMap);
  }

  @Override
  public  HoodiePairData>> leftOuterJoin(HoodiePairData other) {
    Map> otherMapPairData = HoodieMapPair.getMapPair(other);
    Stream>>>> pairs = streamAllPairs()
        .map(pair -> new ImmutablePair<>(pair.getKey(), new ImmutablePair<>(
            pair.getValue(), Option.ofNullable(otherMapPairData.get(pair.getKey())))));
    Map>>> resultMap = new HashMap<>();
    pairs.forEach(pair -> {
      K key = pair.getKey();
      ImmutablePair>> valuePair = pair.getValue();
      List>> resultList = resultMap.computeIfAbsent(key, k -> new ArrayList<>());
      if (!valuePair.getRight().isPresent()) {
        resultList.add(new ImmutablePair<>(valuePair.getLeft(), Option.empty()));
      } else {
        resultList.addAll(valuePair.getRight().get().stream().map(
            w -> new ImmutablePair<>(valuePair.getLeft(), Option.of(w))).collect(Collectors.toList()));
      }
    });
    return HoodieMapPair.of(resultMap);
  }

  private Stream> streamAllPairs() {
    return mapPairData.entrySet().stream().flatMap(
        entry -> entry.getValue().stream().map(e -> new ImmutablePair<>(entry.getKey(), e)));
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy