All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iceberg.SortOrder Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iceberg;

import java.io.Serializable;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.stream.IntStream;
import org.apache.iceberg.exceptions.ValidationException;
import org.apache.iceberg.expressions.BoundReference;
import org.apache.iceberg.expressions.BoundTerm;
import org.apache.iceberg.expressions.BoundTransform;
import org.apache.iceberg.expressions.Expressions;
import org.apache.iceberg.expressions.Term;
import org.apache.iceberg.expressions.UnboundTerm;
import org.apache.iceberg.relocated.com.google.common.base.Preconditions;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
import org.apache.iceberg.transforms.Transform;
import org.apache.iceberg.transforms.Transforms;
import org.apache.iceberg.types.Type;
import org.apache.iceberg.types.Types;

/**
 * A sort order that defines how data and delete files should be ordered in a table.
 */
public class SortOrder implements Serializable {
  private static final SortOrder UNSORTED_ORDER = new SortOrder(new Schema(), 0, Collections.emptyList());

  private final Schema schema;
  private final int orderId;
  private final SortField[] fields;

  private transient volatile List fieldList;

  private SortOrder(Schema schema, int orderId, List fields) {
    this.schema = schema;
    this.orderId = orderId;
    this.fields = fields.toArray(new SortField[0]);
  }

  /**
   * Returns the {@link Schema} for this sort order
   */
  public Schema schema() {
    return schema;
  }

  /**
   * Returns the ID of this sort order
   */
  public int orderId() {
    return orderId;
  }

  /**
   * Returns the list of {@link SortField sort fields} for this sort order
   */
  public List fields() {
    return lazyFieldList();
  }

  /**
   * Returns true if the sort order is sorted
   */
  public boolean isSorted() {
    return fields.length >= 1;
  }

  /**
   * Returns true if the sort order is unsorted
   */
  public boolean isUnsorted() {
    return fields.length < 1;
  }

  /**
   * Checks whether this order satisfies another order.
   *
   * @param anotherSortOrder a different sort order
   * @return true if this order satisfies the given order
   */
  public boolean satisfies(SortOrder anotherSortOrder) {
    // any ordering satisfies an unsorted ordering
    if (anotherSortOrder.isUnsorted()) {
      return true;
    }

    // this ordering cannot satisfy an ordering with more sort fields
    if (anotherSortOrder.fields.length > fields.length) {
      return false;
    }

    // this ordering has either more or the same number of sort fields
    return IntStream.range(0, anotherSortOrder.fields.length)
        .allMatch(index -> fields[index].satisfies(anotherSortOrder.fields[index]));
  }

  /**
   * Checks whether this order is equivalent to another order while ignoring the order id.
   *
   * @param anotherSortOrder a different sort order
   * @return true if this order is equivalent to the given order
   */
  public boolean sameOrder(SortOrder anotherSortOrder) {
    return Arrays.equals(fields, anotherSortOrder.fields);
  }

  private List lazyFieldList() {
    if (fieldList == null) {
      synchronized (this) {
        if (fieldList == null) {
          this.fieldList = ImmutableList.copyOf(fields);
        }
      }
    }
    return fieldList;
  }

  @Override
  public String toString() {
    StringBuilder sb = new StringBuilder();
    sb.append("[");
    for (SortField field : fields) {
      sb.append("\n");
      sb.append("  ").append(field);
    }
    if (fields.length > 0) {
      sb.append("\n");
    }
    sb.append("]");
    return sb.toString();
  }

  @Override
  public boolean equals(Object other) {
    if (this == other) {
      return true;
    } else if (other == null || getClass() != other.getClass()) {
      return false;
    }

    SortOrder that = (SortOrder) other;
    return orderId == that.orderId && sameOrder(that);
  }

  @Override
  public int hashCode() {
    return 31 * Integer.hashCode(orderId) + Arrays.hashCode(fields);
  }

  /**
   * Returns a sort order for unsorted tables.
   *
   * @return an unsorted order
   */
  public static SortOrder unsorted() {
    return UNSORTED_ORDER;
  }

  /**
   * Creates a new {@link Builder sort order builder} for the given {@link Schema}.
   *
   * @param schema a schema
   * @return a sort order builder for the given schema
   */
  public static Builder builderFor(Schema schema) {
    return new Builder(schema);
  }

  /**
   * A builder used to create valid {@link SortOrder sort orders}.
   * 

* Call {@link #builderFor(Schema)} to create a new builder. */ public static class Builder implements SortOrderBuilder { private final Schema schema; private final List fields = Lists.newArrayList(); private Integer orderId = null; private boolean caseSensitive = true; private Builder(Schema schema) { this.schema = schema; } /** * Add an expression term to the sort, ascending with the given null order. * * @param term an expression term * @param nullOrder a null order (first or last) * @return this for method chaining */ @Override public Builder asc(Term term, NullOrder nullOrder) { return addSortField(term, SortDirection.ASC, nullOrder); } /** * Add an expression term to the sort, ascending with the given null order. * * @param term an expression term * @param nullOrder a null order (first or last) * @return this for method chaining */ @Override public Builder desc(Term term, NullOrder nullOrder) { return addSortField(term, SortDirection.DESC, nullOrder); } public Builder sortBy(String name, SortDirection direction, NullOrder nullOrder) { return addSortField(Expressions.ref(name), direction, nullOrder); } public Builder sortBy(Term term, SortDirection direction, NullOrder nullOrder) { return addSortField(term, direction, nullOrder); } public Builder withOrderId(int newOrderId) { this.orderId = newOrderId; return this; } public Builder caseSensitive(boolean sortCaseSensitive) { this.caseSensitive = sortCaseSensitive; return this; } Builder addSortField(Term term, SortDirection direction, NullOrder nullOrder) { Preconditions.checkArgument(term instanceof UnboundTerm, "Term must be unbound"); // ValidationException is thrown by bind if binding fails so we assume that boundTerm is correct BoundTerm boundTerm = ((UnboundTerm) term).bind(schema.asStruct(), caseSensitive); int sourceId = boundTerm.ref().fieldId(); SortField sortField = new SortField(toTransform(boundTerm), sourceId, direction, nullOrder); fields.add(sortField); return this; } Builder addSortField(String transformAsString, int sourceId, SortDirection direction, NullOrder nullOrder) { Types.NestedField column = schema.findField(sourceId); Preconditions.checkNotNull(column, "Cannot find source column: %s", sourceId); Transform transform = Transforms.fromString(column.type(), transformAsString); SortField sortField = new SortField(transform, sourceId, direction, nullOrder); fields.add(sortField); return this; } Builder addSortField(Transform transform, int sourceId, SortDirection direction, NullOrder nullOrder) { fields.add(new SortField(transform, sourceId, direction, nullOrder)); return this; } public SortOrder build() { SortOrder sortOrder = buildUnchecked(); checkCompatibility(sortOrder, schema); return sortOrder; } SortOrder buildUnchecked() { if (fields.isEmpty()) { if (orderId != null && orderId != 0) { throw new IllegalArgumentException("Unsorted order ID must be 0"); } return SortOrder.unsorted(); } if (orderId != null && orderId == 0) { throw new IllegalArgumentException("Sort order ID 0 is reserved for unsorted order"); } // default ID to 1 as 0 is reserved for unsorted order int actualOrderId = orderId != null ? orderId : 1; return new SortOrder(schema, actualOrderId, fields); } private Transform toTransform(BoundTerm term) { if (term instanceof BoundReference) { return Transforms.identity(term.type()); } else if (term instanceof BoundTransform) { return ((BoundTransform) term).transform(); } else { throw new ValidationException("Invalid term: %s, expected either a bound reference or transform", term); } } } public static void checkCompatibility(SortOrder sortOrder, Schema schema) { for (SortField field : sortOrder.fields) { Type sourceType = schema.findType(field.sourceId()); ValidationException.check( sourceType != null, "Cannot find source column for sort field: %s", field); ValidationException.check( sourceType.isPrimitiveType(), "Cannot sort by non-primitive source field: %s", sourceType); ValidationException.check( field.transform().canTransform(sourceType), "Invalid source type %s for transform: %s", sourceType, field.transform()); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy