All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.utils.Projection Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.utils;

import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.DataType;
import org.dinky.shaded.paimon.types.RowType;

import java.lang.reflect.Array;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashSet;
import java.util.List;
import java.util.ListIterator;
import java.util.Set;
import java.util.stream.Collectors;
import java.util.stream.IntStream;

import static org.dinky.shaded.paimon.types.DataTypeRoot.ROW;

/**
 * {@link Projection} represents a list of (possibly nested) indexes that can be used to project
 * data types. A row projection includes both reducing the accessible fields and reordering them.
 */
public abstract class Projection {

    // sealed class
    private Projection() {}

    public abstract RowType project(RowType rowType);

    /** Project array. */
    public  T[] project(T[] array) {
        int[] project = toTopLevelIndexes();
        @SuppressWarnings("unchecked")
        T[] ret = (T[]) Array.newInstance(array.getClass().getComponentType(), project.length);
        for (int i = 0; i < project.length; i++) {
            ret[i] = array[project[i]];
        }
        return ret;
    }

    /** Project list. */
    public  List project(List list) {
        int[] project = toTopLevelIndexes();
        List ret = new ArrayList<>();
        for (int i : project) {
            ret.add(list.get(i));
        }
        return ret;
    }

    /** @return {@code true} whether this projection is nested or not. */
    public abstract boolean isNested();

    /**
     * Perform a difference of this {@link Projection} with another {@link Projection}. The result
     * of this operation is a new {@link Projection} retaining the same ordering of this instance
     * but with the indexes from {@code other} removed. For example:
     *
     * 
     * 
     * [4, 1, 0, 3, 2] - [4, 2] = [1, 0, 2]
     * 
     * 
* *

Note how the index {@code 3} in the minuend becomes {@code 2} because it's rescaled to * project correctly a {@link InternalRow} or arity 3. * * @param other the subtrahend * @throws IllegalArgumentException when {@code other} is nested. */ public abstract Projection difference(Projection other); /** * Complement this projection. The returned projection is an ordered projection of fields from 0 * to {@code fieldsNumber} except the indexes in this {@link Projection}. For example: * *

     * 
     * [4, 2].complement(5) = [0, 1, 3]
     * 
     * 
* * @param fieldsNumber the size of the universe * @throws IllegalStateException if this projection is nested. */ public abstract Projection complement(int fieldsNumber); /** * Convert this instance to a projection of top level indexes. The array represents the mapping * of the fields of the original {@link DataType}. For example, {@code [0, 2, 1]} specifies to * include in the following order the 1st field, the 3rd field and the 2nd field of the row. * * @throws IllegalStateException if this projection is nested. */ public abstract int[] toTopLevelIndexes(); /** * Convert this instance to a nested projection index paths. The array represents the mapping of * the fields of the original {@link DataType}, including nested rows. For example, {@code [[0, * 2, 1], ...]} specifies to include the 2nd field of the 3rd field of the 1st field in the * top-level row. */ public abstract int[][] toNestedIndexes(); /** * Create an empty {@link Projection}, that is a projection that projects no fields, returning * an empty {@link DataType}. */ public static Projection empty() { return EmptyProjection.INSTANCE; } /** * Create a {@link Projection} of the provided {@code indexes}. * * @see #toTopLevelIndexes() */ public static Projection of(int[] indexes) { if (indexes.length == 0) { return empty(); } return new TopLevelProjection(indexes); } /** * Create a {@link Projection} of the provided {@code indexes}. * * @see #toNestedIndexes() */ public static Projection of(int[][] indexes) { if (indexes.length == 0) { return empty(); } return new NestedProjection(indexes); } /** Create a {@link Projection} of a field range. */ public static Projection range(int startInclusive, int endExclusive) { return new TopLevelProjection(IntStream.range(startInclusive, endExclusive).toArray()); } @Override public boolean equals(Object o) { if (this == o) { return true; } if (!(o instanceof Projection)) { return false; } Projection other = (Projection) o; if (!this.isNested() && !other.isNested()) { return Arrays.equals(this.toTopLevelIndexes(), other.toTopLevelIndexes()); } return Arrays.deepEquals(this.toNestedIndexes(), other.toNestedIndexes()); } @Override public int hashCode() { if (isNested()) { return Arrays.deepHashCode(toNestedIndexes()); } return Arrays.hashCode(toTopLevelIndexes()); } @Override public String toString() { if (isNested()) { return "Nested projection = " + Arrays.deepToString(toNestedIndexes()); } return "Top level projection = " + Arrays.toString(toTopLevelIndexes()); } private static class EmptyProjection extends Projection { static final EmptyProjection INSTANCE = new EmptyProjection(); private EmptyProjection() {} @Override public RowType project(RowType rowType) { return new NestedProjection(toNestedIndexes()).project(rowType); } @Override public boolean isNested() { return false; } @Override public Projection difference(Projection projection) { return this; } @Override public Projection complement(int fieldsNumber) { return new TopLevelProjection(IntStream.range(0, fieldsNumber).toArray()); } @Override public int[] toTopLevelIndexes() { return new int[0]; } @Override public int[][] toNestedIndexes() { return new int[0][]; } } private static class NestedProjection extends Projection { final int[][] projection; final boolean nested; NestedProjection(int[][] projection) { this.projection = projection; this.nested = Arrays.stream(projection).anyMatch(arr -> arr.length > 1); } @Override public RowType project(RowType rowType) { final List updatedFields = new ArrayList<>(); Set nameDomain = new HashSet<>(); int duplicateCount = 0; for (int[] indexPath : this.projection) { DataField field = rowType.getFields().get(indexPath[0]); StringBuilder builder = new StringBuilder(rowType.getFieldNames().get(indexPath[0])); for (int index = 1; index < indexPath.length; index++) { Preconditions.checkArgument( field.type().getTypeRoot() == ROW, "Row data type expected."); RowType rowtype = ((RowType) field.type()); builder.append("_").append(rowtype.getFieldNames().get(indexPath[index])); field = rowtype.getFields().get(indexPath[index]); } String path = builder.toString(); while (nameDomain.contains(path)) { path = builder.append("_$").append(duplicateCount++).toString(); } updatedFields.add(field.newName(path)); nameDomain.add(path); } return new RowType(rowType.isNullable(), updatedFields); } @Override public boolean isNested() { return nested; } @Override public Projection difference(Projection other) { if (other.isNested()) { throw new IllegalArgumentException( "Cannot perform difference between nested projection and nested projection"); } if (other instanceof EmptyProjection) { return this; } if (!this.isNested()) { return new TopLevelProjection(toTopLevelIndexes()).difference(other); } // Extract the indexes to exclude and sort them int[] indexesToExclude = other.toTopLevelIndexes(); indexesToExclude = Arrays.copyOf(indexesToExclude, indexesToExclude.length); Arrays.sort(indexesToExclude); List resultProjection = Arrays.stream(projection).collect(Collectors.toCollection(ArrayList::new)); ListIterator resultProjectionIterator = resultProjection.listIterator(); while (resultProjectionIterator.hasNext()) { int[] indexArr = resultProjectionIterator.next(); // Let's check if the index is inside the indexesToExclude array int searchResult = Arrays.binarySearch(indexesToExclude, indexArr[0]); if (searchResult >= 0) { // Found, we need to remove it resultProjectionIterator.remove(); } else { // Not found, let's compute the offset. // Offset is the index where the projection index should be inserted in the // indexesToExclude array int offset = (-(searchResult) - 1); if (offset != 0) { indexArr[0] = indexArr[0] - offset; } } } return new NestedProjection(resultProjection.toArray(new int[0][])); } @Override public Projection complement(int fieldsNumber) { if (isNested()) { throw new IllegalStateException("Cannot perform complement of a nested projection"); } return new TopLevelProjection(toTopLevelIndexes()).complement(fieldsNumber); } @Override public int[] toTopLevelIndexes() { if (isNested()) { throw new IllegalStateException( "Cannot convert a nested projection to a top level projection"); } return Arrays.stream(projection).mapToInt(arr -> arr[0]).toArray(); } @Override public int[][] toNestedIndexes() { return projection; } } private static class TopLevelProjection extends Projection { final int[] projection; TopLevelProjection(int[] projection) { this.projection = projection; } @Override public RowType project(RowType rowType) { return new NestedProjection(toNestedIndexes()).project(rowType); } @Override public boolean isNested() { return false; } @Override public Projection difference(Projection other) { if (other.isNested()) { throw new IllegalArgumentException( "Cannot perform difference between top level projection and nested projection"); } if (other instanceof EmptyProjection) { return this; } // Extract the indexes to exclude and sort them int[] indexesToExclude = other.toTopLevelIndexes(); indexesToExclude = Arrays.copyOf(indexesToExclude, indexesToExclude.length); Arrays.sort(indexesToExclude); List resultProjection = Arrays.stream(projection) .boxed() .collect(Collectors.toCollection(ArrayList::new)); ListIterator resultProjectionIterator = resultProjection.listIterator(); while (resultProjectionIterator.hasNext()) { int index = resultProjectionIterator.next(); // Let's check if the index is inside the indexesToExclude array int searchResult = Arrays.binarySearch(indexesToExclude, index); if (searchResult >= 0) { // Found, we need to remove it resultProjectionIterator.remove(); } else { // Not found, let's compute the offset. // Offset is the index where the projection index should be inserted in the // indexesToExclude array int offset = (-(searchResult) - 1); if (offset != 0) { resultProjectionIterator.set(index - offset); } } } return new TopLevelProjection(resultProjection.stream().mapToInt(i -> i).toArray()); } @Override public Projection complement(int fieldsNumber) { int[] indexesToExclude = Arrays.copyOf(projection, projection.length); Arrays.sort(indexesToExclude); return new TopLevelProjection( IntStream.range(0, fieldsNumber) .filter(i -> Arrays.binarySearch(indexesToExclude, i) < 0) .toArray()); } @Override public int[] toTopLevelIndexes() { return projection; } @Override public int[][] toNestedIndexes() { return Arrays.stream(projection).mapToObj(i -> new int[] {i}).toArray(int[][]::new); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy