All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.paimon.flink.Projection Maven / Gradle / Ivy

There is a newer version: 0.9.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.paimon.flink;

import org.apache.paimon.types.DataField;
import org.apache.paimon.types.DataTypeRoot;
import org.apache.paimon.types.DataTypeVisitor;

import org.apache.flink.table.types.DataType;
import org.apache.flink.table.types.logical.RowType;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

import static org.apache.paimon.flink.LogicalTypeConversion.toLogicalType;

/**
 * {@link Projection} represents a list of (possibly nested) indexes that can be used to project
 * data types. A row projection includes both reducing the accessible fields and reordering them.
 */
public abstract class Projection {

    // sealed class
    private Projection() {}

    public abstract RowType project(RowType logicalType);

    public abstract org.apache.paimon.types.RowType project(
            org.apache.paimon.types.RowType rowType);

    public abstract NestedProjectedRowData getOuterProjectRow(
            org.apache.paimon.types.RowType rowType);

    /** @return {@code true} whether this projection is nested or not. */
    public abstract boolean isNested();

    /**
     * Convert this instance to a projection of top level indexes. The array represents the mapping
     * of the fields of the original {@link DataType}. For example, {@code [0, 2, 1]} specifies to
     * include in the following order the 1st field, the 3rd field and the 2nd field of the row.
     *
     * @throws IllegalStateException if this projection is nested.
     */
    public abstract int[] toTopLevelIndexes();

    /**
     * Convert this instance to a nested projection index paths. The array represents the mapping of
     * the fields of the original {@link DataType}, including nested rows. For example, {@code [[0,
     * 2, 1], ...]} specifies to include the 2nd field of the 3rd field of the 1st field in the
     * top-level row.
     */
    public abstract int[][] toNestedIndexes();

    /**
     * Create an empty {@link Projection}, that is a projection that projects no fields, returning
     * an empty {@link DataType}.
     */
    public static Projection empty() {
        return EmptyProjection.INSTANCE;
    }

    /**
     * Create a {@link Projection} of the provided {@code indexes}.
     *
     * @see #toTopLevelIndexes()
     */
    public static Projection of(int[] indexes) {
        if (indexes.length == 0) {
            return empty();
        }
        return new TopLevelProjection(indexes);
    }

    /**
     * Create a {@link Projection} of the provided {@code indexes}.
     *
     * @see #toNestedIndexes()
     */
    public static Projection of(int[][] indexes) {
        if (indexes.length == 0) {
            return empty();
        }
        return new NestedProjection(indexes);
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (!(o instanceof Projection)) {
            return false;
        }
        Projection other = (Projection) o;
        if (!this.isNested() && !other.isNested()) {
            return Arrays.equals(this.toTopLevelIndexes(), other.toTopLevelIndexes());
        }
        return Arrays.deepEquals(this.toNestedIndexes(), other.toNestedIndexes());
    }

    @Override
    public int hashCode() {
        if (isNested()) {
            return Arrays.deepHashCode(toNestedIndexes());
        }
        return Arrays.hashCode(toTopLevelIndexes());
    }

    @Override
    public String toString() {
        if (isNested()) {
            return "Nested projection = " + Arrays.deepToString(toNestedIndexes());
        }
        return "Top level projection = " + Arrays.toString(toTopLevelIndexes());
    }

    private static class EmptyProjection extends Projection {

        static final EmptyProjection INSTANCE = new EmptyProjection();

        private EmptyProjection() {}

        @Override
        public RowType project(RowType dataType) {
            return new NestedProjection(toNestedIndexes()).project(dataType);
        }

        @Override
        public org.apache.paimon.types.RowType project(org.apache.paimon.types.RowType rowType) {
            return new NestedProjection(toNestedIndexes()).project(rowType);
        }

        @Override
        public NestedProjectedRowData getOuterProjectRow(org.apache.paimon.types.RowType rowType) {
            return new NestedProjection(toNestedIndexes()).getOuterProjectRow(rowType);
        }

        @Override
        public boolean isNested() {
            return false;
        }

        @Override
        public int[] toTopLevelIndexes() {
            return new int[0];
        }

        @Override
        public int[][] toNestedIndexes() {
            return new int[0][];
        }
    }

    private static class NestedProjection extends Projection {

        final int[][] projection;
        final boolean nested;

        NestedProjection(int[][] projection) {
            this.projection = projection;
            this.nested = Arrays.stream(projection).anyMatch(arr -> arr.length > 1);
        }

        @Override
        public RowType project(RowType rowType) {
            final List updatedFields = new ArrayList<>();
            Set nameDomain = new HashSet<>();
            int duplicateCount = 0;
            for (int[] indexPath : this.projection) {
                RowType.RowField field = rowType.getFields().get(indexPath[0]);
                StringBuilder builder =
                        new StringBuilder(rowType.getFieldNames().get(indexPath[0]));
                for (int index = 1; index < indexPath.length; index++) {
                    RowType rowtype = ((RowType) field.getType());
                    builder.append("_").append(rowtype.getFieldNames().get(indexPath[index]));
                    field = rowtype.getFields().get(indexPath[index]);
                }
                String path = builder.toString();
                while (nameDomain.contains(path)) {
                    path = builder.append("_$").append(duplicateCount++).toString();
                }
                updatedFields.add(
                        new RowType.RowField(
                                path, field.getType(), field.getDescription().orElse(null)));
                nameDomain.add(path);
            }
            return new RowType(rowType.isNullable(), updatedFields);
        }

        @Override
        public org.apache.paimon.types.RowType project(org.apache.paimon.types.RowType rowType) {
            if (!nested) {
                return rowType.project(
                        Arrays.stream(this.projection).mapToInt(x -> x[0]).toArray());
            }

            MutableRowType result =
                    new MutableRowType(rowType.isNullable(), Collections.emptyList());
            for (int[] indexPath : this.projection) {
                org.apache.paimon.types.RowType sourceType = rowType;
                MutableRowType targetType = result;
                int index;
                for (index = 0; index < indexPath.length - 1; index++) {
                    String fieldName = sourceType.getFieldNames().get(indexPath[index]);
                    DataField field = sourceType.getField(fieldName);
                    sourceType = (org.apache.paimon.types.RowType) field.type();
                    if (!targetType.containsField(fieldName)) {
                        targetType.appendDataField(
                                fieldName,
                                field.id(),
                                new MutableRowType(
                                        sourceType.isNullable(), Collections.emptyList()),
                                field.description());
                    }
                    targetType = (MutableRowType) targetType.getField(fieldName).type();
                }

                String fieldName = sourceType.getFieldNames().get(indexPath[index]);
                DataField field = sourceType.getField(fieldName);
                targetType.appendDataField(
                        fieldName, field.id(), field.type(), field.description());
            }
            return result.toRowType();
        }

        @Override
        public NestedProjectedRowData getOuterProjectRow(org.apache.paimon.types.RowType rowType) {
            if (!nested) {
                return null;
            }

            org.apache.paimon.types.RowType resultType = project(rowType);

            int[][] resultIndices = new int[this.projection.length][];
            for (int i = 0; i < this.projection.length; i++) {
                org.apache.paimon.types.RowType sourceType = rowType;
                org.apache.paimon.types.RowType targetType = resultType;
                resultIndices[i] = new int[this.projection[i].length];
                for (int j = 0; j < this.projection[i].length; j++) {
                    DataField sourceField = sourceType.getFields().get(this.projection[i][j]);
                    String fieldName = sourceField.name();
                    resultIndices[i][j] = targetType.getFieldIndex(fieldName);
                    if (j < this.projection[i].length - 1) {
                        targetType =
                                (org.apache.paimon.types.RowType)
                                        targetType.getField(fieldName).type();
                        sourceType = (org.apache.paimon.types.RowType) sourceField.type();
                    }
                }
            }

            return new NestedProjectedRowData(toLogicalType(resultType), resultIndices);
        }

        @Override
        public boolean isNested() {
            return nested;
        }

        @Override
        public int[] toTopLevelIndexes() {
            if (isNested()) {
                throw new IllegalStateException(
                        "Cannot convert a nested projection to a top level projection");
            }
            return Arrays.stream(projection).mapToInt(arr -> arr[0]).toArray();
        }

        @Override
        public int[][] toNestedIndexes() {
            return projection;
        }
    }

    private static class TopLevelProjection extends Projection {

        final int[] projection;

        TopLevelProjection(int[] projection) {
            this.projection = projection;
        }

        @Override
        public RowType project(RowType dataType) {
            return new NestedProjection(toNestedIndexes()).project(dataType);
        }

        @Override
        public org.apache.paimon.types.RowType project(org.apache.paimon.types.RowType rowType) {
            return new NestedProjection(toNestedIndexes()).project(rowType);
        }

        @Override
        public NestedProjectedRowData getOuterProjectRow(org.apache.paimon.types.RowType rowType) {
            return new NestedProjection(toNestedIndexes()).getOuterProjectRow(rowType);
        }

        @Override
        public boolean isNested() {
            return false;
        }

        @Override
        public int[] toTopLevelIndexes() {
            return projection;
        }

        @Override
        public int[][] toNestedIndexes() {
            return Arrays.stream(projection).mapToObj(i -> new int[] {i}).toArray(int[][]::new);
        }
    }

    /**
     * A mutable version of {@link org.apache.paimon.types.RowType} to facilitate the building
     * process of projections.
     *
     * 

It is mutable in aspect of the {@link #appendDataField} method. */ private static class MutableRowType extends org.apache.paimon.types.DataType { private final List fields; private final boolean isNullable; private MutableRowType(org.apache.paimon.types.RowType rowType) { this(rowType.isNullable(), rowType.getFields()); } private MutableRowType(boolean isNullable, List fields) { super(isNullable, DataTypeRoot.ROW); this.fields = new ArrayList<>(fields); this.isNullable = isNullable; } private org.apache.paimon.types.RowType toRowType() { for (int i = 0; i < fields.size(); i++) { DataField field = fields.get(i); if (field.type() instanceof MutableRowType) { fields.set( i, new DataField( field.id(), field.name(), ((MutableRowType) field.type()).toRowType(), field.description())); } } return new org.apache.paimon.types.RowType(isNullable, fields); } private boolean containsField(String fieldName) { for (DataField field : fields) { if (field.name().equals(fieldName)) { return true; } } return false; } private DataField getField(String fieldName) { for (DataField field : fields) { if (field.name().equals(fieldName)) { return field; } } throw new RuntimeException("Cannot find field: " + fieldName); } private void appendDataField( String name, int newId, org.apache.paimon.types.DataType type, String description) { for (DataField field : fields) { if (field.name().equals(name)) { throw new IllegalStateException( String.format( "A field with name %s has already been appended. Existing fields: %s", name, fields)); } if (field.id() == newId) { throw new IllegalStateException( String.format( "A field with id %s has already been appended. Existing fields: %s", newId, fields)); } } if (type instanceof org.apache.paimon.types.RowType) { type = new MutableRowType((org.apache.paimon.types.RowType) type); } fields.add(new DataField(newId, name, type, description)); } @Override public int defaultSize() { throw new UnsupportedOperationException(); } @Override public org.apache.paimon.types.DataType copy(boolean isNullable) { throw new UnsupportedOperationException(); } @Override public String asSQLString() { throw new UnsupportedOperationException(); } @Override public R accept(DataTypeVisitor visitor) { throw new UnsupportedOperationException(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy