org.dinky.shaded.paimon.utils.BulkFormatMapping Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.dinky.shaded.paimon.utils;
import org.dinky.shaded.paimon.KeyValue;
import org.dinky.shaded.paimon.casting.CastFieldGetter;
import org.dinky.shaded.paimon.format.FileFormatDiscover;
import org.dinky.shaded.paimon.format.FormatReaderFactory;
import org.dinky.shaded.paimon.partition.PartitionUtils;
import org.dinky.shaded.paimon.predicate.Predicate;
import org.dinky.shaded.paimon.schema.IndexCastMapping;
import org.dinky.shaded.paimon.schema.KeyValueFieldsExtractor;
import org.dinky.shaded.paimon.schema.SchemaEvolutionUtil;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.types.DataField;
import org.dinky.shaded.paimon.types.RowType;
import javax.annotation.Nullable;
import java.util.List;
/** Class with index mapping and bulk format. */
public class BulkFormatMapping {
@Nullable private final int[] indexMapping;
@Nullable private final CastFieldGetter[] castMapping;
@Nullable private final Pair partitionPair;
private final FormatReaderFactory bulkFormat;
public BulkFormatMapping(
int[] indexMapping,
@Nullable CastFieldGetter[] castMapping,
@Nullable Pair partitionPair,
FormatReaderFactory bulkFormat) {
this.indexMapping = indexMapping;
this.castMapping = castMapping;
this.bulkFormat = bulkFormat;
this.partitionPair = partitionPair;
}
@Nullable
public int[] getIndexMapping() {
return indexMapping;
}
@Nullable
public CastFieldGetter[] getCastMapping() {
return castMapping;
}
public Pair getPartitionPair() {
return partitionPair;
}
public FormatReaderFactory getReaderFactory() {
return bulkFormat;
}
public static BulkFormatMappingBuilder newBuilder(
FileFormatDiscover formatDiscover,
KeyValueFieldsExtractor extractor,
int[][] keyProjection,
int[][] valueProjection,
@Nullable List filters) {
return new BulkFormatMappingBuilder(
formatDiscover, extractor, keyProjection, valueProjection, filters);
}
/** Builder to build {@link BulkFormatMapping}. */
public static class BulkFormatMappingBuilder {
private final FileFormatDiscover formatDiscover;
private final KeyValueFieldsExtractor extractor;
private final int[][] keyProjection;
private final int[][] valueProjection;
@Nullable private final List filters;
private BulkFormatMappingBuilder(
FileFormatDiscover formatDiscover,
KeyValueFieldsExtractor extractor,
int[][] keyProjection,
int[][] valueProjection,
@Nullable List filters) {
this.formatDiscover = formatDiscover;
this.extractor = extractor;
this.keyProjection = keyProjection;
this.valueProjection = valueProjection;
this.filters = filters;
}
public BulkFormatMapping build(
String formatIdentifier, TableSchema tableSchema, TableSchema dataSchema) {
List tableKeyFields = extractor.keyFields(tableSchema);
List tableValueFields = extractor.valueFields(tableSchema);
int[][] tableProjection =
KeyValue.project(keyProjection, valueProjection, tableKeyFields.size());
List dataKeyFields = extractor.keyFields(dataSchema);
List dataValueFields = extractor.valueFields(dataSchema);
RowType keyType = new RowType(dataKeyFields);
RowType valueType = new RowType(dataValueFields);
RowType dataRecordType = KeyValue.schema(keyType, valueType);
int[][] dataKeyProjection =
SchemaEvolutionUtil.createDataProjection(
tableKeyFields, dataKeyFields, keyProjection);
int[][] dataValueProjection =
SchemaEvolutionUtil.createDataProjection(
tableValueFields, dataValueFields, valueProjection);
int[][] dataProjection =
KeyValue.project(dataKeyProjection, dataValueProjection, dataKeyFields.size());
/*
* We need to create index mapping on projection instead of key and value separately
* here, for example
*
*
* - the table key fields: 1->d, 3->a, 4->b, 5->c
*
- the data key fields: 1->a, 2->b, 3->c
*
*
* The value fields of table and data are 0->value_count, the key and value
* projections are as follows
*
*
* - table key projection: [0, 1, 2, 3], value projection: [0], data projection: [0,
* 1, 2, 3, 4, 5, 6] which 4/5 is seq/kind and 6 is value
*
- data key projection: [0, 1, 2], value projection: [0], data projection: [0, 1,
* 2, 3, 4, 5] where 3/4 is seq/kind and 5 is value
*
*
* We will get value index mapping null from above and we can't create projection
* index mapping based on key and value index mapping any more.
*/
IndexCastMapping indexCastMapping =
SchemaEvolutionUtil.createIndexCastMapping(
Projection.of(tableProjection).toTopLevelIndexes(),
tableKeyFields,
tableValueFields,
Projection.of(dataProjection).toTopLevelIndexes(),
dataKeyFields,
dataValueFields);
List dataFilters =
tableSchema.id() == dataSchema.id()
? filters
: SchemaEvolutionUtil.createDataFilters(
tableSchema.fields(), dataSchema.fields(), filters);
Pair partitionPair = null;
if (!dataSchema.partitionKeys().isEmpty()) {
Pair partitionMappping =
PartitionUtils.constructPartitionMapping(
dataRecordType, dataSchema.partitionKeys(), dataProjection);
// is partition fields are not selected, we just do nothing.
if (partitionMappping != null) {
dataProjection = partitionMappping.getRight();
partitionPair =
Pair.of(
partitionMappping.getLeft(),
dataSchema.projectedLogicalRowType(dataSchema.partitionKeys()));
}
}
RowType projectedRowType = Projection.of(dataProjection).project(dataRecordType);
return new BulkFormatMapping(
indexCastMapping.getIndexMapping(),
indexCastMapping.getCastMapping(),
partitionPair,
formatDiscover
.discover(formatIdentifier)
.createReaderFactory(projectedRowType, dataFilters));
}
}
}