All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.util.Sets Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.util;

import org.apache.hadoop.classification.InterfaceAudience;

import java.util.Arrays;
import java.util.Collection;
import java.util.Collections;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;

/**
 * Static utility methods pertaining to {@link Set} instances.
 * This class is Hadoop's internal use alternative to Guava's Sets
 * utility class.
 * Javadocs for majority of APIs in this class are taken from Guava's Sets
 * class from Guava release version 27.0-jre.
 */
@InterfaceAudience.Private
public final class Sets {

  private static final int MAX_POWER_OF_TWO = 1 << (Integer.SIZE - 2);

  private Sets() {
    // empty
  }

  /**
   * Creates a mutable, initially empty {@code HashSet} instance.
   *
   * 

Note: if mutability is not required, use ImmutableSet#of() * instead. If {@code E} is an {@link Enum} type, use {@link EnumSet#noneOf} * instead. Otherwise, strongly consider using a {@code LinkedHashSet} * instead, at the cost of increased memory footprint, to get * deterministic iteration behavior.

* * @param Generics Type E. * @return a new, empty {@code TreeSet} */ public static HashSet newHashSet() { return new HashSet(); } /** * Creates a mutable, empty {@code TreeSet} instance sorted by the * natural sort ordering of its elements. * *

Note: if mutability is not required, use ImmutableSortedSet#of() * instead.

* * @param Generics Type E * @return a new, empty {@code TreeSet} */ public static TreeSet newTreeSet() { return new TreeSet(); } /** * Creates a mutable {@code HashSet} instance initially containing * the given elements. * *

Note: if elements are non-null and won't be added or removed * after this point, use ImmutableSet#of() or ImmutableSet#copyOf(Object[]) * instead. If {@code E} is an {@link Enum} type, use * {@link EnumSet#of(Enum, Enum[])} instead. Otherwise, strongly consider * using a {@code LinkedHashSet} instead, at the cost of increased memory * footprint, to get deterministic iteration behavior.

* *

This method is just a small convenience, either for * {@code newHashSet(}{@link Arrays#asList}{@code (...))}, or for creating an * empty set then calling {@link Collections#addAll}.

* * @param Generics Type E. * @param elements the elements that the set should contain. * @return a new, empty thread-safe {@code Set} */ @SafeVarargs public static HashSet newHashSet(E... elements) { HashSet set = newHashSetWithExpectedSize(elements.length); Collections.addAll(set, elements); return set; } /** * Creates a mutable {@code HashSet} instance containing the given * elements. A very thin convenience for creating an empty set then calling * {@link Collection#addAll} or Iterables#addAll. * *

Note: if mutability is not required and the elements are * non-null, use ImmutableSet#copyOf(Iterable) instead. (Or, change * {@code elements} to be a FluentIterable and call {@code elements.toSet()}.)

* *

Note: if {@code E} is an {@link Enum} type, use * newEnumSet(Iterable, Class) instead.

* * @param Generics Type E. * @param elements the elements that the set should contain. * @return a new, empty thread-safe {@code Set}. */ public static HashSet newHashSet(Iterable elements) { return (elements instanceof Collection) ? new HashSet(cast(elements)) : newHashSet(elements.iterator()); } /** * Creates a mutable {@code TreeSet} instance containing the given * elements sorted by their natural ordering. * *

Note: if mutability is not required, use * ImmutableSortedSet#copyOf(Iterable) instead. * *

Note: If {@code elements} is a {@code SortedSet} with an * explicit comparator, this method has different behavior than * {@link TreeSet#TreeSet(SortedSet)}, which returns a {@code TreeSet} * with that comparator. * *

Note for Java 7 and later: this method is now unnecessary and * should be treated as deprecated. Instead, use the {@code TreeSet} * constructor directly, taking advantage of the new * "diamond" syntax. * *

This method is just a small convenience for creating an empty set and * then calling Iterables#addAll. This method is not very useful and will * likely be deprecated in the future. * * @param Generics Type E. * @param elements the elements that the set should contain * @return a new {@code TreeSet} containing those elements (minus duplicates) */ public static TreeSet newTreeSet( Iterable elements) { TreeSet set = newTreeSet(); addAll(set, elements); return set; } private static boolean addAll(TreeSet addTo, Iterable elementsToAdd) { if (elementsToAdd instanceof Collection) { Collection c = cast(elementsToAdd); return addTo.addAll(c); } if (elementsToAdd == null) { throw new NullPointerException(); } return addAll(addTo, elementsToAdd.iterator()); } /** * Creates a mutable {@code HashSet} instance containing the given * elements. A very thin convenience for creating an empty set and then * calling Iterators#addAll. * *

Note: if mutability is not required and the elements are * non-null, use ImmutableSet#copyOf(Iterator) instead.

* *

Note: if {@code E} is an {@link Enum} type, you should create * an {@link EnumSet} instead.

* *

Overall, this method is not very useful and will likely be deprecated * in the future.

* * @param Generics Type E. * @param elements elements. * @return a new, empty thread-safe {@code Set}. */ public static HashSet newHashSet(Iterator elements) { HashSet set = newHashSet(); addAll(set, elements); return set; } /** * Returns a new hash set using the smallest initial table size that can hold * {@code expectedSize} elements without resizing. Note that this is not what * {@link HashSet#HashSet(int)} does, but it is what most users want and * expect it to do. * *

This behavior can't be broadly guaranteed, but has been tested with * OpenJDK 1.7 and 1.8.

* * @param expectedSize the number of elements you expect to add to the * returned set * @param Generics Type E. * @return a new, empty hash set with enough capacity to hold * {@code expectedSize} elements without resizing * @throws IllegalArgumentException if {@code expectedSize} is negative */ public static HashSet newHashSetWithExpectedSize(int expectedSize) { return new HashSet(capacity(expectedSize)); } private static Collection cast(Iterable iterable) { return (Collection) iterable; } private static boolean addAll(Collection addTo, Iterator iterator) { if (addTo == null) { throw new NullPointerException(); } if (iterator == null) { throw new NullPointerException(); } boolean wasModified = false; while (iterator.hasNext()) { wasModified |= addTo.add(iterator.next()); } return wasModified; } /** * Returns the intersection of two sets as an unmodifiable set. * The returned set contains all elements that are contained by both backing * sets. * *

Results are undefined if {@code set1} and {@code set2} are sets based * on different equivalence relations (as {@code HashSet}, {@code TreeSet}, * and the keySet of an {@code IdentityHashMap} all are). * * @param set1 set1. * @param set2 set2. * @param Generics Type E. * @return a new, empty thread-safe {@code Set}. */ public static Set intersection(final Set set1, final Set set2) { if (set1 == null) { throw new NullPointerException("set1"); } if (set2 == null) { throw new NullPointerException("set2"); } Set newSet = new HashSet<>(set1); newSet.retainAll(set2); return Collections.unmodifiableSet(newSet); } /** * Returns the union of two sets as an unmodifiable set. * The returned set contains all elements that are contained in either * backing set. * *

Results are undefined if {@code set1} and {@code set2} are sets * based on different equivalence relations (as {@link HashSet}, * {@link TreeSet}, and the {@link Map#keySet} of an * {@code IdentityHashMap} all are). * * @param set1 set1. * @param set2 set2. * @param Generics Type E. * @return a new, empty thread-safe {@code Set}. */ public static Set union( final Set set1, final Set set2) { if (set1 == null) { throw new NullPointerException("set1"); } if (set2 == null) { throw new NullPointerException("set2"); } Set newSet = new HashSet<>(set1); newSet.addAll(set2); return Collections.unmodifiableSet(newSet); } /** * Returns the difference of two sets as an unmodifiable set. * The returned set contains all elements that are contained by {@code set1} * and not contained by {@code set2}. * *

Results are undefined if {@code set1} and {@code set2} are sets based * on different equivalence relations (as {@code HashSet}, {@code TreeSet}, * and the keySet of an {@code IdentityHashMap} all are). * * This method is used to find difference for HashSets. For TreeSets with * strict order requirement, recommended method is * {@link #differenceInTreeSets(Set, Set)}. * * @param set1 set1. * @param set2 set2. * @param Generics Type E. * @return a new, empty thread-safe {@code Set}. */ public static Set difference( final Set set1, final Set set2) { if (set1 == null) { throw new NullPointerException("set1"); } if (set2 == null) { throw new NullPointerException("set2"); } Set newSet = new HashSet<>(set1); newSet.removeAll(set2); return Collections.unmodifiableSet(newSet); } /** * Returns the difference of two sets as an unmodifiable set. * The returned set contains all elements that are contained by {@code set1} * and not contained by {@code set2}. * *

Results are undefined if {@code set1} and {@code set2} are sets based * on different equivalence relations (as {@code HashSet}, {@code TreeSet}, * and the keySet of an {@code IdentityHashMap} all are). * * This method is used to find difference for TreeSets. For HashSets, * recommended method is {@link #difference(Set, Set)}. * * @param Generics Type E. * @param set1 set1. * @param set2 set2. * @return a new, empty thread-safe {@code Set}. */ public static Set differenceInTreeSets( final Set set1, final Set set2) { if (set1 == null) { throw new NullPointerException("set1"); } if (set2 == null) { throw new NullPointerException("set2"); } Set newSet = new TreeSet<>(set1); newSet.removeAll(set2); return Collections.unmodifiableSet(newSet); } /** * Returns the symmetric difference of two sets as an unmodifiable set. * The returned set contains all elements that are contained in either * {@code set1} or {@code set2} but not in both. The iteration order of the * returned set is undefined. * *

Results are undefined if {@code set1} and {@code set2} are sets based * on different equivalence relations (as {@code HashSet}, {@code TreeSet}, * and the keySet of an {@code IdentityHashMap} all are). * * @param set1 set1. * @param set2 set2. * @param Generics Type E. * @return a new, empty thread-safe {@code Set}. */ public static Set symmetricDifference( final Set set1, final Set set2) { if (set1 == null) { throw new NullPointerException("set1"); } if (set2 == null) { throw new NullPointerException("set2"); } Set intersection = new HashSet<>(set1); intersection.retainAll(set2); Set symmetricDifference = new HashSet<>(set1); symmetricDifference.addAll(set2); symmetricDifference.removeAll(intersection); return Collections.unmodifiableSet(symmetricDifference); } /** * Creates a thread-safe set backed by a hash map. The set is backed by a * {@link ConcurrentHashMap} instance, and thus carries the same concurrency * guarantees. * *

Unlike {@code HashSet}, this class does NOT allow {@code null} to be * used as an element. The set is serializable. * * @param Generics Type. * @return a new, empty thread-safe {@code Set} */ public static Set newConcurrentHashSet() { return Collections.newSetFromMap(new ConcurrentHashMap()); } /** * Returns a capacity that is sufficient to keep the map from being resized * as long as it grows no larger than expectedSize and the load factor * is ≥ its default (0.75). * The implementation of this method is adapted from Guava version 27.0-jre. */ private static int capacity(int expectedSize) { if (expectedSize < 3) { if (expectedSize < 0) { throw new IllegalArgumentException( "expectedSize cannot be negative but was: " + expectedSize); } return expectedSize + 1; } if (expectedSize < MAX_POWER_OF_TWO) { // This is the calculation used in JDK8 to resize when a putAll // happens; it seems to be the most conservative calculation we // can make. 0.75 is the default load factor. return (int) ((float) expectedSize / 0.75F + 1.0F); } return Integer.MAX_VALUE; // any large value } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy