All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.utils.BulkFormatMapping Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.utils;

import org.dinky.shaded.paimon.KeyValue;
import org.dinky.shaded.paimon.casting.CastFieldGetter;
import org.dinky.shaded.paimon.format.FileFormatDiscover;
import org.dinky.shaded.paimon.format.FormatReaderFactory;
import org.dinky.shaded.paimon.partition.PartitionUtils;
import org.dinky.shaded.paimon.predicate.Predicate;
import org.dinky.shaded.paimon.schema.IndexCastMapping;
import org.dinky.shaded.paimon.schema.KeyValueFieldsExtractor;
import org.dinky.shaded.paimon.schema.SchemaEvolutionUtil;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.RowType;

import javax.annotation.Nullable;

import java.util.List;

/** Class with index mapping and bulk format. */
public class BulkFormatMapping {

    @Nullable private final int[] indexMapping;
    @Nullable private final CastFieldGetter[] castMapping;
    @Nullable private final Pair partitionPair;
    private final FormatReaderFactory bulkFormat;

    public BulkFormatMapping(
            int[] indexMapping,
            @Nullable CastFieldGetter[] castMapping,
            @Nullable Pair partitionPair,
            FormatReaderFactory bulkFormat) {
        this.indexMapping = indexMapping;
        this.castMapping = castMapping;
        this.bulkFormat = bulkFormat;
        this.partitionPair = partitionPair;
    }

    @Nullable
    public int[] getIndexMapping() {
        return indexMapping;
    }

    @Nullable
    public CastFieldGetter[] getCastMapping() {
        return castMapping;
    }

    public Pair getPartitionPair() {
        return partitionPair;
    }

    public FormatReaderFactory getReaderFactory() {
        return bulkFormat;
    }

    public static BulkFormatMappingBuilder newBuilder(
            FileFormatDiscover formatDiscover,
            KeyValueFieldsExtractor extractor,
            int[][] keyProjection,
            int[][] valueProjection,
            @Nullable List filters) {
        return new BulkFormatMappingBuilder(
                formatDiscover, extractor, keyProjection, valueProjection, filters);
    }

    /** Builder to build {@link BulkFormatMapping}. */
    public static class BulkFormatMappingBuilder {

        private final FileFormatDiscover formatDiscover;
        private final KeyValueFieldsExtractor extractor;
        private final int[][] keyProjection;
        private final int[][] valueProjection;
        @Nullable private final List filters;

        private BulkFormatMappingBuilder(
                FileFormatDiscover formatDiscover,
                KeyValueFieldsExtractor extractor,
                int[][] keyProjection,
                int[][] valueProjection,
                @Nullable List filters) {
            this.formatDiscover = formatDiscover;
            this.extractor = extractor;
            this.keyProjection = keyProjection;
            this.valueProjection = valueProjection;
            this.filters = filters;
        }

        public BulkFormatMapping build(
                String formatIdentifier, TableSchema tableSchema, TableSchema dataSchema) {
            List tableKeyFields = extractor.keyFields(tableSchema);
            List tableValueFields = extractor.valueFields(tableSchema);
            int[][] tableProjection =
                    KeyValue.project(keyProjection, valueProjection, tableKeyFields.size());

            List dataKeyFields = extractor.keyFields(dataSchema);
            List dataValueFields = extractor.valueFields(dataSchema);

            RowType keyType = new RowType(dataKeyFields);
            RowType valueType = new RowType(dataValueFields);
            RowType dataRecordType = KeyValue.schema(keyType, valueType);

            int[][] dataKeyProjection =
                    SchemaEvolutionUtil.createDataProjection(
                            tableKeyFields, dataKeyFields, keyProjection);
            int[][] dataValueProjection =
                    SchemaEvolutionUtil.createDataProjection(
                            tableValueFields, dataValueFields, valueProjection);
            int[][] dataProjection =
                    KeyValue.project(dataKeyProjection, dataValueProjection, dataKeyFields.size());

            /*
             * We need to create index mapping on projection instead of key and value separately
             * here, for example
             *
             * 
    *
  • the table key fields: 1->d, 3->a, 4->b, 5->c *
  • the data key fields: 1->a, 2->b, 3->c *
* *

The value fields of table and data are 0->value_count, the key and value * projections are as follows * *

    *
  • table key projection: [0, 1, 2, 3], value projection: [0], data projection: [0, * 1, 2, 3, 4, 5, 6] which 4/5 is seq/kind and 6 is value *
  • data key projection: [0, 1, 2], value projection: [0], data projection: [0, 1, * 2, 3, 4, 5] where 3/4 is seq/kind and 5 is value *
* *

We will get value index mapping null from above and we can't create projection * index mapping based on key and value index mapping any more. */ IndexCastMapping indexCastMapping = SchemaEvolutionUtil.createIndexCastMapping( Projection.of(tableProjection).toTopLevelIndexes(), tableKeyFields, tableValueFields, Projection.of(dataProjection).toTopLevelIndexes(), dataKeyFields, dataValueFields); List dataFilters = tableSchema.id() == dataSchema.id() ? filters : SchemaEvolutionUtil.createDataFilters( tableSchema.fields(), dataSchema.fields(), filters); Pair partitionPair = null; if (!dataSchema.partitionKeys().isEmpty()) { Pair partitionMappping = PartitionUtils.constructPartitionMapping( dataRecordType, dataSchema.partitionKeys(), dataProjection); // is partition fields are not selected, we just do nothing. if (partitionMappping != null) { dataProjection = partitionMappping.getRight(); partitionPair = Pair.of( partitionMappping.getLeft(), dataSchema.projectedLogicalRowType(dataSchema.partitionKeys())); } } RowType projectedRowType = Projection.of(dataProjection).project(dataRecordType); return new BulkFormatMapping( indexCastMapping.getIndexMapping(), indexCastMapping.getCastMapping(), partitionPair, formatDiscover .discover(formatIdentifier) .createReaderFactory(projectedRowType, dataFilters)); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy