org.apache.kudu.client.PartitionSchema Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of camel-quarkus-kudu-client
Show all versions of camel-quarkus-kudu-client
org.apache.kudu:kudu-client with netty package relocations reverted and netty classes stripped away
so that camel-quarkus-kudu can use quarkus-netty as a replacement
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.kudu.client;
import java.io.Serializable;
import java.util.ArrayList;
import java.util.Comparator;
import java.util.List;
import java.util.TreeSet;
import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.common.primitives.UnsignedBytes;
import org.apache.yetus.audience.InterfaceAudience;
import org.apache.yetus.audience.InterfaceStability;
import org.apache.kudu.Schema;
/**
* A partition schema describes how the rows of a table are distributed among
* tablets.
*
* Primarily, a table's partition schema is responsible for translating the
* primary key column values of a row into a partition key that can be used to
* find the tablet containing the key.
*
* In case of table-wide hash partitioning, the partition schema is made up of
* zero or more hash bucket components, followed by a single range component.
* In case of custom hash bucketing per range, the partition schema contains
* information on hash bucket components per range.
*
* Each hash bucket component includes one or more columns from the primary key
* column set, with the restriction that an individual primary key column may
* only be included in a single hash component.
*
* This class is new, and not considered stable or suitable for public use.
*/
@InterfaceAudience.LimitedPrivate("Impala")
@InterfaceStability.Unstable
public class PartitionSchema {
private static final class BoundsComparator
implements Comparator, Serializable {
private static final long serialVersionUID = 36028797018963969L;
private static final Comparator comparator =
UnsignedBytes.lexicographicalComparator();
@Override
public int compare(EncodedRangeBoundsWithHashSchema lhs,
EncodedRangeBoundsWithHashSchema rhs) {
return comparator.compare(lhs.lower, rhs.lower);
}
}
private static final Comparator COMPARATOR =
new BoundsComparator();
private final RangeSchema rangeSchema;
private final List hashBucketSchemas;
private final List rangesWithHashSchemas;
private final List encodedRangesWithHashSchemas;
private TreeSet hashSchemasPerRange;
private final boolean isSimple;
static class EncodedRangeBoundsWithHashSchema {
final byte[] lower;
final byte[] upper;
final List hashSchemas;
public EncodedRangeBoundsWithHashSchema(
byte[] lower,
byte[] upper,
List hashSchemas) {
Preconditions.checkNotNull(lower);
Preconditions.checkNotNull(upper);
Preconditions.checkState(upper.length == 0 || Bytes.memcmp(lower, upper) < 0);
this.lower = lower;
this.upper = upper;
this.hashSchemas = hashSchemas;
}
}
/**
* Creates a new partition schema from the range and hash bucket schemas.
*
* @param rangeSchema the range schema
* @param hashBucketSchemas the table-wide hash schema
* @param schema the table schema
*/
public PartitionSchema(RangeSchema rangeSchema,
List hashBucketSchemas,
List rangesWithHashSchemas,
Schema schema) {
this.rangeSchema = rangeSchema;
this.hashBucketSchemas = hashBucketSchemas;
this.rangesWithHashSchemas = rangesWithHashSchemas;
this.hashSchemasPerRange = new TreeSet<>(COMPARATOR);
this.encodedRangesWithHashSchemas = new ArrayList<>(rangesWithHashSchemas.size());
for (RangeWithHashSchema rhs : this.rangesWithHashSchemas) {
final boolean isLowerBoundEmpty =
rhs.lowerBound == null || rhs.lowerBound.getColumnsBitSet().isEmpty();
byte[] lower = isLowerBoundEmpty ? new byte[0]
: KeyEncoder.encodeRangePartitionKey(rhs.lowerBound, this.rangeSchema);
final boolean isUpperBoundEmpty =
rhs.upperBound == null || rhs.upperBound.getColumnsBitSet().isEmpty();
byte[] upper = isUpperBoundEmpty ? new byte[0]
: KeyEncoder.encodeRangePartitionKey(rhs.upperBound, this.rangeSchema);
if (!hashSchemasPerRange.add(
new EncodedRangeBoundsWithHashSchema(lower, upper, rhs.hashSchemas))) {
throw new IllegalArgumentException(
rhs.lowerBound.toString() + ": duplicate lower range boundary");
}
}
// Populate the convenience collection storing the information on ranges
// with encoded bounds sorted in ascending order by lower bounds.
encodedRangesWithHashSchemas.addAll(this.hashSchemasPerRange);
boolean isSimple =
rangesWithHashSchemas.isEmpty() &&
hashBucketSchemas.isEmpty() &&
rangeSchema.columns.size() == schema.getPrimaryKeyColumnCount();
if (isSimple) {
int i = 0;
for (Integer id : rangeSchema.columns) {
if (schema.getColumnIndex(id) != i++) {
isSimple = false;
break;
}
}
}
this.isSimple = isSimple;
}
/**
* Creates a new partition schema from the range and hash bucket schemas.
*
* @param rangeSchema the range schema
* @param hashBucketSchemas the table-wide hash schema
* @param schema the table schema
*/
public PartitionSchema(RangeSchema rangeSchema,
List hashBucketSchemas,
Schema schema) {
this(rangeSchema, hashBucketSchemas, ImmutableList.of(), schema);
}
/**
* Returns the encoded partition key of the row.
* @return a byte array containing the encoded partition key of the row
*/
public byte[] encodePartitionKey(PartialRow row) {
return KeyEncoder.encodePartitionKey(row, this);
}
public RangeSchema getRangeSchema() {
return rangeSchema;
}
public List getHashBucketSchemas() {
return hashBucketSchemas;
}
public List getRangesWithHashSchemas() {
return rangesWithHashSchemas;
}
List getEncodedRangesWithHashSchemas() {
return encodedRangesWithHashSchemas;
}
/**
* Returns true if the partition schema if the partition schema does not include any hash
* components, and the range columns match the table's primary key columns.
*
* @return whether the partition schema is the default simple range partitioning.
*/
boolean isSimpleRangePartitioning() {
return isSimple;
}
/**
* @return whether the partition schema has ranges with custom hash schemas.
*/
boolean hasCustomHashSchemas() {
return !rangesWithHashSchemas.isEmpty();
}
/**
* Find hash schema for the given encoded range key. Depending on the
* partition schema and the key, it might be either table-wide or a custom
* hash schema for a particular range. Just as a convention, this method
* returns the table-wide hash schema for keys in non-covered ranges.
*
* @return hash bucket schema for the encoded range key
*/
List getHashSchemaForRange(byte[] rangeKey) {
if (!hasCustomHashSchemas()) {
// By definition, the table-wide hash schema provides the hash bucketing
// structure in the absence of per-range custom hash schemas.
return hashBucketSchemas;
}
final EncodedRangeBoundsWithHashSchema entry = hashSchemasPerRange.floor(
new EncodedRangeBoundsWithHashSchema(rangeKey, new byte[0], ImmutableList.of()));
if (entry == null) {
return hashBucketSchemas;
}
// Check if 'rangeKey' is in the range.
// NOTE: the right boundary is exclusive; an empty array for upper boundary
// means that the range partition is unbounded.
final byte[] upper = entry.upper;
Preconditions.checkNotNull(upper);
if (upper.length == 0 || Bytes.memcmp(rangeKey, upper) < 0) {
return entry.hashSchemas;
}
return hashBucketSchemas;
}
public static class RangeSchema {
private final List columns;
public RangeSchema(List columns) {
this.columns = columns;
}
/**
* Gets the column IDs of the columns in the range partition.
* @return the column IDs of the columns in the range partition
* @deprecated Use {@link #getColumnIds} instead.
*/
@Deprecated
public List getColumns() {
return columns;
}
/**
* Gets the column IDs of the columns in the range partition.
* @return the column IDs of the columns in the range partition
*/
public List getColumnIds() {
return columns;
}
}
public static class HashBucketSchema {
private final List columnIds;
private int numBuckets;
private int seed;
public HashBucketSchema(List columnIds, int numBuckets, int seed) {
this.columnIds = columnIds;
this.numBuckets = numBuckets;
this.seed = seed;
}
/**
* Gets the column IDs of the columns in the hash partition.
* @return the column IDs of the columns in the hash partition
*/
public List getColumnIds() {
return columnIds;
}
public int getNumBuckets() {
return numBuckets;
}
public int getSeed() {
return seed;
}
}
/**
* This utility class is used to represent information on a custom hash schema
* for a particular range.
*/
public static class RangeWithHashSchema {
public PartialRow lowerBound;
public PartialRow upperBound;
public List hashSchemas;
public RangeWithHashSchema(
PartialRow lowerBound,
PartialRow upperBound,
List hashSchemas) {
Preconditions.checkNotNull(lowerBound);
Preconditions.checkNotNull(upperBound);
Preconditions.checkArgument(
lowerBound.getSchema().equals(upperBound.getSchema()));
this.lowerBound = lowerBound;
this.upperBound = upperBound;
this.hashSchemas = hashSchemas;
}
}
}