All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.trino.spi.predicate.TupleDomain Maven / Gradle / Ivy

There is a newer version: 468
Show newest version
/*
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package io.trino.spi.predicate;

import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.errorprone.annotations.DoNotCall;
import io.trino.spi.connector.ConnectorSession;
import io.trino.spi.type.Type;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.function.BiFunction;
import java.util.function.BiPredicate;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.ToLongFunction;
import java.util.stream.Collector;

import static io.airlift.slice.SizeOf.estimatedSizeOf;
import static io.airlift.slice.SizeOf.instanceSize;
import static io.airlift.slice.SizeOf.sizeOf;
import static java.lang.String.format;
import static java.util.Collections.emptyMap;
import static java.util.Collections.unmodifiableList;
import static java.util.Objects.requireNonNull;
import static java.util.stream.Collectors.toList;
import static java.util.stream.Collectors.toMap;
import static java.util.stream.Collectors.toUnmodifiableList;

/**
 * Defines a set of valid tuples according to the constraints on each of its constituent columns
 */
public final class TupleDomain
{
    private static final int INSTANCE_SIZE = instanceSize(TupleDomain.class);

    private static final TupleDomain NONE = new TupleDomain<>(Optional.empty());
    private static final TupleDomain ALL = new TupleDomain<>(Optional.of(emptyMap()));

    /**
     * TupleDomain is internally represented as a normalized map of each column to its
     * respective allowable value Domain. Conceptually, these Domains can be thought of
     * as being AND'ed together to form the representative predicate.
     * 

* This map is normalized in the following ways: * 1) The map will not contain Domain.none() as any of its values. If any of the Domain * values are Domain.none(), then the whole map will instead be null. This enforces the fact that * any single Domain.none() value effectively turns this TupleDomain into "none" as well. * 2) The map will not contain Domain.all() as any of its values. Our convention here is that * any unmentioned column is equivalent to having Domain.all(). To normalize this structure, * we remove any Domain.all() values from the map. */ private final Optional> domains; private TupleDomain(Optional> domains) { requireNonNull(domains, "domains is null"); this.domains = domains.flatMap(map -> { if (containsNoneDomain(map)) { return Optional.empty(); } return Optional.of(Collections.unmodifiableMap(normalizeAndCopy(map))); }); } public static TupleDomain withColumnDomains(Map domains) { requireNonNull(domains, "domains is null"); if (domains.isEmpty()) { return all(); } return new TupleDomain<>(Optional.of(domains)); } @SuppressWarnings("unchecked") public static TupleDomain none() { return (TupleDomain) NONE; } @SuppressWarnings("unchecked") public static TupleDomain all() { return (TupleDomain) ALL; } /** * Extract all column constraints that require exactly one value or only null in their respective Domains. * Returns an empty Optional if the Domain is none. */ public static Optional> extractFixedValues(TupleDomain tupleDomain) { if (tupleDomain.getDomains().isEmpty()) { return Optional.empty(); } return Optional.of(tupleDomain.getDomains().get() .entrySet().stream() .filter(entry -> entry.getValue().isNullableSingleValue()) .collect(toLinkedMap(Map.Entry::getKey, entry -> new NullableValue(entry.getValue().getType(), entry.getValue().getNullableSingleValue())))); } /** * Extract all column constraints that define a non-empty set of discrete values allowed for the columns in their respective Domains. * Returns an empty Optional if the Domain is none. */ public static Optional>> extractDiscreteValues(TupleDomain tupleDomain) { if (tupleDomain.getDomains().isEmpty()) { return Optional.empty(); } return Optional.of(tupleDomain.getDomains().get() .entrySet().stream() .filter(entry -> entry.getValue().isNullableDiscreteSet()) .collect(toLinkedMap( Map.Entry::getKey, entry -> { Domain.DiscreteSet discreteValues = entry.getValue().getNullableDiscreteSet(); List nullableValues = new ArrayList<>(); for (Object value : discreteValues.getNonNullValues()) { nullableValues.add(new NullableValue(entry.getValue().getType(), value)); } if (discreteValues.containsNull()) { nullableValues.add(new NullableValue(entry.getValue().getType(), null)); } return unmodifiableList(nullableValues); }))); } /** * Convert a map of columns to values into the TupleDomain which requires * those columns to be fixed to those values. Null is allowed as a fixed value. */ public static TupleDomain fromFixedValues(Map fixedValues) { return TupleDomain.withColumnDomains(fixedValues.entrySet().stream() .collect(toLinkedMap( Map.Entry::getKey, entry -> { Type type = entry.getValue().getType(); Object value = entry.getValue().getValue(); return value == null ? Domain.onlyNull(type) : Domain.singleValue(type, value); }))); } @JsonCreator @DoNotCall // For JSON deserialization only @Deprecated // Discourage usages in SPI consumers public static TupleDomain fromColumnDomains(@JsonProperty("columnDomains") Optional>> columnDomains) { if (columnDomains.isEmpty()) { return none(); } return withColumnDomains(columnDomains.get().stream() .collect(toLinkedMap(ColumnDomain::getColumn, ColumnDomain::getDomain))); } @JsonProperty @DoNotCall // For JSON serialization only @Deprecated // Discourage usages in SPI consumers public Optional>> getColumnDomains() { return domains.map(map -> map.entrySet().stream() .map(entry -> new ColumnDomain<>(entry.getKey(), entry.getValue())) .collect(toUnmodifiableList())); } private static boolean containsNoneDomain(Map domains) { return domains.values().stream().anyMatch(Domain::isNone); } private static Map normalizeAndCopy(Map domains) { return domains.entrySet().stream() .filter(entry -> !entry.getValue().isAll()) .collect(toLinkedMap(Map.Entry::getKey, Map.Entry::getValue)); } /** * Returns true if any tuples would satisfy this TupleDomain */ public boolean isAll() { return domains.isPresent() && domains.get().isEmpty(); } /** * Returns true if no tuple could ever satisfy this TupleDomain */ public boolean isNone() { return domains.isEmpty(); } /** * Gets the TupleDomain as a map of each column to its respective Domain. * - Will return an Optional.empty() if this is a 'none' TupleDomain. * - Unmentioned columns have an implicit value of Domain.all() * - The column Domains can be thought of as AND'ed to together to form the whole predicate */ @JsonIgnore public Optional> getDomains() { return domains; } public Domain getDomain(T column, Type type) { if (domains.isEmpty()) { return Domain.none(type); } Domain domain = domains.get().get(column); if (domain != null && !domain.getType().equals(type)) { throw new IllegalArgumentException("Provided type %s does not match domain type %s for column %s".formatted(type, domain.getType(), column)); } if (domain == null) { return Domain.all(type); } return domain; } /** * Returns the strict intersection of the TupleDomains. * The resulting TupleDomain represents the set of tuples that would be valid * in both TupleDomains. */ public TupleDomain intersect(TupleDomain other) { return intersect(List.of(this, other)); } public static TupleDomain intersect(List> domains) { if (domains.isEmpty()) { return all(); } if (domains.size() == 1) { return upcast(domains.get(0)); } if (domains.stream().anyMatch(TupleDomain::isNone)) { return none(); } if (domains.stream().allMatch(domain -> domain.equals(domains.get(0)))) { return upcast(domains.get(0)); } List> candidates = domains.stream() .filter(domain -> !domain.isAll()) .collect(toList()); if (candidates.isEmpty()) { return all(); } if (candidates.size() == 1) { return upcast(candidates.get(0)); } Map intersected = new LinkedHashMap<>(candidates.get(0).getDomains().get()); for (int i = 1; i < candidates.size(); i++) { for (Map.Entry entry : candidates.get(i).getDomains().get().entrySet()) { Domain intersectionDomain = intersected.get(entry.getKey()); if (intersectionDomain == null) { intersected.put(entry.getKey(), entry.getValue()); } else { Domain intersect = intersectionDomain.intersect(entry.getValue()); if (intersect.isNone()) { return TupleDomain.none(); } intersected.put(entry.getKey(), intersect); } } } return withColumnDomains(intersected); } @SuppressWarnings("unchecked") private static TupleDomain upcast(TupleDomain domain) { // TupleDomain is covariant with respect to T (because it's immutable), so it's a safe operation return (TupleDomain) domain; } @SafeVarargs public static TupleDomain columnWiseUnion(TupleDomain first, TupleDomain second, TupleDomain... rest) { List> domains = new ArrayList<>(rest.length + 2); domains.add(first); domains.add(second); domains.addAll(Arrays.asList(rest)); return columnWiseUnion(domains); } /** * Returns the tuple domain that contains all other tuple domains, or {@code Optional.empty()} if they * are not supersets of each other. */ public static Optional> maximal(List> domains) { if (domains.isEmpty()) { return Optional.empty(); } TupleDomain largest = domains.get(0); for (int i = 1; i < domains.size(); i++) { TupleDomain current = domains.get(i); if (current.contains(largest)) { largest = current; } else if (!largest.contains(current)) { return Optional.empty(); } } return Optional.of(largest); } /** * Returns a TupleDomain in which corresponding column Domains are unioned together. *

* Note that this is NOT equivalent to a strict union as the final result may allow tuples * that do not exist in either TupleDomain. * Example 1: *

    *
  • TupleDomain X: a => 1, b => 2 *
  • TupleDomain Y: a => 2, b => 3 *
  • Column-wise unioned TupleDomain: a => 1 OR 2, b => 2 OR 3 *
*

* In the above resulting TupleDomain, tuple (a => 1, b => 3) would be considered valid but would * not be valid for either TupleDomain X or TupleDomain Y. * Example 2: *

* Let a be of type DOUBLE *

    *
  • TupleDomain X: {@code (a < 5)} *
  • TupleDomain Y: {@code (a > 0)} *
  • Column-wise unioned TupleDomain: {@code (a IS NOT NULL)} *
* In the above resulting TupleDomain, tuple (a => NaN) would be considered valid but would * not be valid for either TupleDomain X or TupleDomain Y. * However, this result is guaranteed to be a superset of the strict union. */ public static TupleDomain columnWiseUnion(List> tupleDomains) { if (tupleDomains.isEmpty()) { throw new IllegalArgumentException("tupleDomains must have at least one element"); } if (tupleDomains.size() == 1) { return tupleDomains.get(0); } // gather all common columns Set commonColumns = new HashSet<>(); // first, find a non-none domain boolean found = false; Iterator> domains = tupleDomains.iterator(); while (domains.hasNext()) { TupleDomain domain = domains.next(); if (domain.isAll()) { return TupleDomain.all(); } if (!domain.isNone()) { found = true; commonColumns.addAll(domain.getDomains().get().keySet()); break; } } if (!found) { return TupleDomain.none(); } // then, get the common columns while (domains.hasNext()) { TupleDomain domain = domains.next(); if (!domain.isNone()) { commonColumns.retainAll(domain.getDomains().get().keySet()); } } // group domains by column (only for common columns) Map> domainsByColumn = new LinkedHashMap<>(tupleDomains.size()); for (TupleDomain domain : tupleDomains) { if (!domain.isNone()) { for (Map.Entry entry : domain.getDomains().get().entrySet()) { if (commonColumns.contains(entry.getKey())) { List domainForColumn = domainsByColumn.computeIfAbsent(entry.getKey(), _ -> new ArrayList<>()); domainForColumn.add(entry.getValue()); } } } } // finally, do the column-wise union Map result = new LinkedHashMap<>(domainsByColumn.size()); for (Map.Entry> entry : domainsByColumn.entrySet()) { result.put(entry.getKey(), Domain.union(entry.getValue())); } return withColumnDomains(result); } /** * Returns true only if there exists a strict intersection between the TupleDomains. * i.e. there exists some potential tuple that would be allowable in both TupleDomains. */ public boolean overlaps(TupleDomain other) { requireNonNull(other, "other is null"); if (this.isNone() || other.isNone()) { return false; } if (this == other || this.isAll() || other.isAll()) { return true; } Map thisDomains = this.domains.orElseThrow(); Map otherDomains = other.getDomains().orElseThrow(); for (Map.Entry entry : otherDomains.entrySet()) { Domain commonColumnDomain = thisDomains.get(entry.getKey()); if (commonColumnDomain != null) { if (!commonColumnDomain.overlaps(entry.getValue())) { return false; } } } // All the common columns have overlapping domains return true; } /** * Returns true only if the this TupleDomain contains all possible tuples that would be allowable by * the other TupleDomain. */ public boolean contains(TupleDomain other) { if (other.isNone() || this == other) { return true; } if (isNone()) { return false; } Map thisDomains = domains.orElseThrow(); Map otherDomains = other.getDomains().orElseThrow(); for (Map.Entry entry : thisDomains.entrySet()) { Domain otherDomain = otherDomains.get(entry.getKey()); if (otherDomain == null || !entry.getValue().contains(otherDomain)) { return false; } } return true; } @Override public boolean equals(Object obj) { if (this == obj) { return true; } if (obj == null || getClass() != obj.getClass()) { return false; } TupleDomain other = (TupleDomain) obj; return Objects.equals(this.domains, other.domains); } @Override public int hashCode() { return Objects.hash(domains); } @Override public String toString() { return toString(ToStringSession.INSTANCE); } public String toString(ConnectorSession session) { if (isAll()) { return "ALL"; } if (isNone()) { return "NONE"; } return domains.orElseThrow().entrySet().stream() .collect(toLinkedMap(Map.Entry::getKey, entry -> entry.getValue().toString(session))) .toString(); } public TupleDomain filter(BiPredicate predicate) { requireNonNull(predicate, "predicate is null"); return transformDomains((key, domain) -> { if (!predicate.test(key, domain)) { return Domain.all(domain.getType()); } return domain; }); } public TupleDomain transformKeys(Function function) { if (isNone()) { return none(); } if (isAll()) { return all(); } Map domains = this.domains.orElseThrow(); HashMap result = new LinkedHashMap<>(domains.size()); for (Map.Entry entry : domains.entrySet()) { U key = function.apply(entry.getKey()); requireNonNull(key, () -> format("mapping function %s returned null for %s", function, entry.getKey())); Domain previous = result.put(key, entry.getValue()); if (previous != null) { throw new IllegalArgumentException(format("Every argument must have a unique mapping. %s maps to %s and %s", entry.getKey(), entry.getValue(), previous)); } } return TupleDomain.withColumnDomains(result); } public TupleDomain simplify() { return transformDomains((key, domain) -> domain.simplify()); } public TupleDomain simplify(int threshold) { return transformDomains((key, domain) -> domain.simplify(threshold)); } public TupleDomain transformDomains(BiFunction transformation) { requireNonNull(transformation, "transformation is null"); if (isNone() || isAll()) { return this; } return withColumnDomains(domains.get().entrySet().stream() .collect(toLinkedMap( Map.Entry::getKey, entry -> { Domain newDomain = transformation.apply(entry.getKey(), entry.getValue()); return requireNonNull(newDomain, "newDomain is null"); }))); } public Predicate> asPredicate() { if (isNone()) { return bindings -> false; } Map domains = this.domains.orElseThrow(); return bindings -> { for (Map.Entry entry : bindings.entrySet()) { Domain domain = domains.get(entry.getKey()); if (domain != null && !domain.includesNullableValue(entry.getValue().getValue())) { return false; } } return true; }; } // Available for Jackson serialization only! public static class ColumnDomain { private final C column; private final Domain domain; @JsonCreator public ColumnDomain( @JsonProperty("column") C column, @JsonProperty("domain") Domain domain) { this.column = requireNonNull(column, "column is null"); this.domain = requireNonNull(domain, "domain is null"); } @JsonProperty public C getColumn() { return column; } @JsonProperty public Domain getDomain() { return domain; } } private static Collector> toLinkedMap(Function keyMapper, Function valueMapper) { return toMap( keyMapper, valueMapper, (u, v) -> { throw new IllegalStateException(format("Duplicate values for a key: %s and %s", u, v)); }, LinkedHashMap::new); } public long getRetainedSizeInBytes(ToLongFunction keySize) { return INSTANCE_SIZE + sizeOf(domains, value -> estimatedSizeOf(value, keySize, Domain::getRetainedSizeInBytes)); } }