All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.spark.util.kvstore.InMemoryStore Maven / Gradle / Ivy

There is a newer version: 3.5.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.util.kvstore;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Iterator;
import java.util.HashSet;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ConcurrentMap;
import java.util.function.BiConsumer;
import java.util.function.Predicate;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import com.google.common.base.Objects;
import com.google.common.base.Preconditions;

import org.apache.spark.annotation.Private;

/**
 * Implementation of KVStore that keeps data deserialized in memory. This store does not index
 * data; instead, whenever iterating over an indexed field, the stored data is copied and sorted
 * according to the index. This saves memory but makes iteration more expensive.
 */
@Private
public class InMemoryStore implements KVStore {

  private Object metadata;
  private InMemoryLists inMemoryLists = new InMemoryLists();

  @Override
  public  T getMetadata(Class klass) {
    return klass.cast(metadata);
  }

  @Override
  public void setMetadata(Object value) {
    this.metadata = value;
  }

  @Override
  public long count(Class type) {
    InstanceList list = inMemoryLists.get(type);
    return list != null ? list.size() : 0;
  }

  @Override
  public long count(Class type, String index, Object indexedValue) throws Exception {
    InstanceList list = inMemoryLists.get(type);
    int count = 0;
    Object comparable = asKey(indexedValue);
    KVTypeInfo.Accessor accessor = list.getIndexAccessor(index);
    for (Object o : view(type)) {
      if (Objects.equal(comparable, asKey(accessor.get(o)))) {
        count++;
      }
    }
    return count;
  }

  @Override
  public  T read(Class klass, Object naturalKey) {
    InstanceList list = inMemoryLists.get(klass);
    T value = list != null ? list.get(naturalKey) : null;
    if (value == null) {
      throw new NoSuchElementException();
    }
    return value;
  }

  @Override
  public void write(Object value) throws Exception {
    inMemoryLists.write(value);
  }

  @Override
  public void delete(Class type, Object naturalKey) {
    InstanceList list = inMemoryLists.get(type);
    if (list != null) {
      list.delete(naturalKey);
    }
  }

  @Override
  public  KVStoreView view(Class type){
    InstanceList list = inMemoryLists.get(type);
    return list != null ? list.view() : emptyView();
  }

  @Override
  public void close() {
    metadata = null;
    inMemoryLists.clear();
  }

  @Override
  public  boolean removeAllByIndexValues(
      Class klass,
      String index,
      Collection indexValues) {
    InstanceList list = inMemoryLists.get(klass);

    if (list != null) {
      return list.countingRemoveAllByIndexValues(index, indexValues) > 0;
    } else {
      return false;
    }
  }

  @SuppressWarnings("unchecked")
  private static Comparable asKey(Object in) {
    if (in.getClass().isArray()) {
      in = ArrayWrappers.forArray(in);
    }
    return (Comparable) in;
  }

  @SuppressWarnings("unchecked")
  private static  KVStoreView emptyView() {
    return (InMemoryView) InMemoryView.EMPTY_VIEW;
  }

  /**
   * Encapsulates ConcurrentHashMap so that the typing in and out of the map strictly maps a
   * class of type T to an InstanceList of type T.
   */
  private static class InMemoryLists {
    private final ConcurrentMap, InstanceList> data = new ConcurrentHashMap<>();

    @SuppressWarnings("unchecked")
    public  InstanceList get(Class type) {
      return (InstanceList) data.get(type);
    }

    @SuppressWarnings("unchecked")
    public  void write(T value) throws Exception {
      InstanceList list =
        (InstanceList) data.computeIfAbsent(value.getClass(), InstanceList::new);
      list.put(value);
    }

    public void clear() {
      data.clear();
    }
  }

  /**
   * An alias class for the type "ConcurrentHashMap, Boolean>", which is used
   * as a concurrent hashset for storing natural keys and the boolean value doesn't matter.
   */
  private static class NaturalKeys extends ConcurrentHashMap, Boolean> {}

  private static class InstanceList {

    /**
     * A BiConsumer to control multi-entity removal.  We use this in a forEach rather than an
     * iterator because there is a bug in jdk8 which affects remove() on all concurrent map
     * iterators.  https://bugs.openjdk.java.net/browse/JDK-8078645
     */
    private static class CountingRemoveIfForEach implements BiConsumer, T> {
      private final InstanceList instanceList;
      private final Predicate filter;

      /**
       * Keeps a count of the number of elements removed.  This count is not currently surfaced
       * to clients of KVStore as Java's generic removeAll() construct returns only a boolean,
       * but I found it handy to have the count of elements removed while debugging; a count being
       * no more complicated than a boolean, I've retained that behavior here, even though there
       * is no current requirement.
       */
      private int count = 0;

      CountingRemoveIfForEach(InstanceList instanceList, Predicate filter) {
        this.instanceList = instanceList;
        this.filter = filter;
      }

      @Override
      public void accept(Comparable key, T value) {
        if (filter.test(value)) {
          if (instanceList.delete(key, value)) {
            count++;
          }
        }
      }

      public int count() { return count; }
    }

    private final KVTypeInfo ti;
    private final KVTypeInfo.Accessor naturalKey;
    private final ConcurrentMap, T> data;
    private final String naturalParentIndexName;
    private final Boolean hasNaturalParentIndex;
    // A mapping from parent to the natural keys of its children.
    // For example, a mapping from a stage ID to all the task IDs in the stage.
    private final ConcurrentMap, NaturalKeys> parentToChildrenMap;

    private InstanceList(Class klass) {
      this.ti = new KVTypeInfo(klass);
      this.naturalKey = ti.getAccessor(KVIndex.NATURAL_INDEX_NAME);
      this.data = new ConcurrentHashMap<>();
      this.naturalParentIndexName = ti.getParentIndexName(KVIndex.NATURAL_INDEX_NAME);
      this.parentToChildrenMap = new ConcurrentHashMap<>();
      this.hasNaturalParentIndex = !naturalParentIndexName.isEmpty();
    }

    KVTypeInfo.Accessor getIndexAccessor(String indexName) {
      return ti.getAccessor(indexName);
    }

    int countingRemoveAllByIndexValues(String index, Collection indexValues) {
      int count = 0;
      if (KVIndex.NATURAL_INDEX_NAME.equals(index)) {
        for (Object naturalKey : indexValues) {
          count += delete(asKey(naturalKey)) ? 1 : 0;
        }
        return count;
      } else if (hasNaturalParentIndex && naturalParentIndexName.equals(index)) {
        // If there is a parent index for the natural index and `index` happens to be it,
        // Spark can use the `parentToChildrenMap` to get the related natural keys, and then
        // delete them from `data`.
        for (Object indexValue : indexValues) {
          Comparable parentKey = asKey(indexValue);
          NaturalKeys children = parentToChildrenMap.getOrDefault(parentKey, new NaturalKeys());
          for (Comparable naturalKey : children.keySet()) {
            data.remove(naturalKey);
            count ++;
          }
          parentToChildrenMap.remove(parentKey);
        }
        return count;
      } else {
        Predicate filter = getPredicate(ti.getAccessor(index), indexValues);
        CountingRemoveIfForEach callback = new CountingRemoveIfForEach<>(this, filter);

        // Go through all the values in `data` and delete objects that meets the predicate `filter`.
        // This can be slow when there is a large number of entries in `data`.
        data.forEach(callback);
        return callback.count();
      }
    }

    public T get(Object key) {
      return data.get(asKey(key));
    }

    public void put(T value) throws Exception {
      data.put(asKey(naturalKey.get(value)), value);
      if (hasNaturalParentIndex) {
        Comparable parentKey = asKey(getIndexAccessor(naturalParentIndexName).get(value));
        NaturalKeys children =
          parentToChildrenMap.computeIfAbsent(parentKey, k -> new NaturalKeys());
        children.put(asKey(naturalKey.get(value)), true);
      }
    }

    public boolean delete(Object key) {
      boolean entryExists = data.remove(asKey(key)) != null;
      if (entryExists) {
        deleteParentIndex(key);
      }
      return entryExists;
    }

    public boolean delete(Object key, T value) {
      boolean entryExists = data.remove(asKey(key), value);
      if (entryExists) {
        deleteParentIndex(key);
      }
      return entryExists;
    }

    private void deleteParentIndex(Object key) {
      if (hasNaturalParentIndex) {
        for (NaturalKeys v : parentToChildrenMap.values()) {
          if (v.remove(asKey(key)) != null) {
            // `v` can be empty after removing the natural key and we can remove it from
            // `parentToChildrenMap`. However, `parentToChildrenMap` is a ConcurrentMap and such
            // checking and deleting can be slow.
            // This method is to delete one object with certain key, let's make it simple here.
            break;
          }
        }
      }
    }

    public int size() {
      return data.size();
    }

    public InMemoryView view() {
      return new InMemoryView<>(data, ti, naturalParentIndexName, parentToChildrenMap);
    }

    private static  Predicate getPredicate(
        KVTypeInfo.Accessor getter,
        Collection values) {
      if (Comparable.class.isAssignableFrom(getter.getType())) {
        HashSet set = new HashSet<>(values);

        return (value) -> set.contains(indexValueForEntity(getter, value));
      } else {
        HashSet> set = new HashSet<>(values.size());
        for (Object key : values) {
          set.add(asKey(key));
        }
        return (value) -> set.contains(asKey(indexValueForEntity(getter, value)));
      }
    }

    private static Object indexValueForEntity(KVTypeInfo.Accessor getter, Object entity) {
      try {
        return getter.get(entity);
      } catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
      }
    }
  }

  private static class InMemoryView extends KVStoreView {
    private static final InMemoryView EMPTY_VIEW =
      new InMemoryView<>(new ConcurrentHashMap<>(), null, "", new ConcurrentHashMap<>());

    private final ConcurrentMap, T> data;
    private final KVTypeInfo ti;
    private final KVTypeInfo.Accessor natural;
    private final ConcurrentMap, NaturalKeys> parentToChildrenMap;
    private final String naturalParentIndexName;
    private final Boolean hasNaturalParentIndex;

    InMemoryView(
        ConcurrentMap, T> data,
        KVTypeInfo ti,
        String naturalParentIndexName,
        ConcurrentMap, NaturalKeys> parentToChildrenMap) {
      this.data = data;
      this.ti = ti;
      this.natural = ti != null ? ti.getAccessor(KVIndex.NATURAL_INDEX_NAME) : null;
      this.naturalParentIndexName = naturalParentIndexName;
      this.parentToChildrenMap = parentToChildrenMap;
      this.hasNaturalParentIndex = !naturalParentIndexName.isEmpty();
    }

    @Override
    public Iterator iterator() {
      if (data.isEmpty()) {
        return new InMemoryIterator<>(Collections.emptyIterator());
      }

      KVTypeInfo.Accessor getter = index != null ? ti.getAccessor(index) : null;
      int modifier = ascending ? 1 : -1;

      final List sorted = copyElements();
      sorted.sort((e1, e2) -> modifier * compare(e1, e2, getter));
      Stream stream = sorted.stream();

      if (first != null) {
        Comparable firstKey = asKey(first);
        stream = stream.filter(e -> modifier * compare(e, getter, firstKey) >= 0);
      }

      if (last != null) {
        Comparable lastKey = asKey(last);
        stream = stream.filter(e -> modifier * compare(e, getter, lastKey) <= 0);
      }

      if (skip > 0) {
        stream = stream.skip(skip);
      }

      if (max < sorted.size()) {
        stream = stream.limit((int) max);
      }

      return new InMemoryIterator<>(stream.iterator());
    }

    /**
     * Create a copy of the input elements, filtering the values for child indices if needed.
     */
    private List copyElements() {
      if (parent != null) {
        Comparable parentKey = asKey(parent);
        if (hasNaturalParentIndex && naturalParentIndexName.equals(ti.getParentIndexName(index))) {
          // If there is a parent index for the natural index and the parent of `index` happens to
          // be it, Spark can use the `parentToChildrenMap` to get the related natural keys, and
          // then copy them from `data`.
          NaturalKeys children = parentToChildrenMap.getOrDefault(parentKey, new NaturalKeys());
          ArrayList elements = new ArrayList<>();
          for (Comparable naturalKey : children.keySet()) {
            data.computeIfPresent(naturalKey, (k, v) -> {
              elements.add(v);
              return v;
            });
          }
          return elements;
        } else {
          // Go through all the values in `data` and collect all the objects has certain parent
          // value. This can be slow when there is a large number of entries in `data`.
          KVTypeInfo.Accessor parentGetter = ti.getParentAccessor(index);
          Preconditions.checkArgument(parentGetter != null, "Parent filter for non-child index.");
          return data.values().stream()
            .filter(e -> compare(e, parentGetter, parentKey) == 0)
            .collect(Collectors.toList());
        }
      } else {
        return new ArrayList<>(data.values());
      }
    }

    private int compare(T e1, T e2, KVTypeInfo.Accessor getter) {
      try {
        int diff = compare(e1, getter, asKey(getter.get(e2)));
        if (diff == 0 && getter != natural) {
          diff = compare(e1, natural, asKey(natural.get(e2)));
        }
        return diff;
      } catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
      }
    }

    private int compare(T e1, KVTypeInfo.Accessor getter, Comparable v2) {
      try {
        return asKey(getter.get(e1)).compareTo(v2);
      } catch (ReflectiveOperationException e) {
        throw new RuntimeException(e);
      }
    }
  }

  private static class InMemoryIterator implements KVStoreIterator {

    private final Iterator iter;

    InMemoryIterator(Iterator iter) {
      this.iter = iter;
    }

    @Override
    public boolean hasNext() {
      return iter.hasNext();
    }

    @Override
    public T next() {
      return iter.next();
    }

    @Override
    public void remove() {
      throw new UnsupportedOperationException();
    }

    @Override
    public List next(int max) {
      List list = new ArrayList<>(max);
      while (hasNext() && list.size() < max) {
        list.add(next());
      }
      return list;
    }

    @Override
    public boolean skip(long n) {
      long skipped = 0;
      while (skipped < n) {
        if (hasNext()) {
          next();
          skipped++;
        } else {
          return false;
        }
      }

      return hasNext();
    }

    @Override
    public void close() {
      // no op.
    }

  }

}