All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dinky.shaded.paimon.table.sink.SequenceGenerator Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.dinky.shaded.paimon.table.sink;

import org.dinky.shaded.paimon.CoreOptions;
import org.dinky.shaded.paimon.CoreOptions.SequenceAutoPadding;
import org.dinky.shaded.paimon.data.InternalRow;
import org.dinky.shaded.paimon.schema.TableSchema;
import org.dinky.shaded.paimon.types.BigIntType;
import org.dinky.shaded.paimon.types.CharType;
import org.dinky.shaded.paimon.types.DataType;
import org.dinky.shaded.paimon.types.DataTypeDefaultVisitor;
import org.dinky.shaded.paimon.types.DataTypeFamily;
import org.dinky.shaded.paimon.types.DateType;
import org.dinky.shaded.paimon.types.DecimalType;
import org.dinky.shaded.paimon.types.DoubleType;
import org.dinky.shaded.paimon.types.FloatType;
import org.dinky.shaded.paimon.types.IntType;
import org.dinky.shaded.paimon.types.LocalZonedTimestampType;
import org.dinky.shaded.paimon.types.RowKind;
import org.dinky.shaded.paimon.types.RowType;
import org.dinky.shaded.paimon.types.SmallIntType;
import org.dinky.shaded.paimon.types.TimestampType;
import org.dinky.shaded.paimon.types.TinyIntType;
import org.dinky.shaded.paimon.types.VarCharType;
import org.dinky.shaded.paimon.utils.InternalRowUtils;

import javax.annotation.Nullable;

import java.util.Collections;
import java.util.List;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;

/** Generate sequence number. */
public class SequenceGenerator {

    private final int index;
    private final List paddings;

    private final Generator generator;
    private final DataType fieldType;

    public SequenceGenerator(String field, RowType rowType) {
        this(field, rowType, Collections.emptyList());
    }

    public SequenceGenerator(String field, RowType rowType, List paddings) {
        index = rowType.getFieldNames().indexOf(field);
        this.paddings = paddings;

        if (index == -1) {
            throw new RuntimeException(
                    String.format(
                            "Can not find sequence field %s in table schema: %s", field, rowType));
        }
        fieldType = rowType.getTypeAt(index);
        generator = fieldType.accept(new SequenceGeneratorVisitor());
    }

    public SequenceGenerator(int index, DataType dataType) {
        this.index = index;
        this.paddings = Collections.emptyList();

        this.fieldType = dataType;
        if (index == -1) {
            throw new RuntimeException(String.format("Index : %s is invalid", index));
        }
        generator = fieldType.accept(new SequenceGeneratorVisitor());
    }

    public static SequenceGenerator create(TableSchema schema, CoreOptions options) {
        List sequenceAutoPadding =
                options.sequenceAutoPadding().stream()
                        .map(SequenceAutoPadding::fromString)
                        .collect(Collectors.toList());
        return options.sequenceField()
                .map(
                        field ->
                                new SequenceGenerator(
                                        field, schema.logicalRowType(), sequenceAutoPadding))
                .orElse(null);
    }

    public int index() {
        return index;
    }

    public DataType fieldType() {
        return fieldType;
    }

    @Nullable
    public Long generateNullable(InternalRow row) {
        return generator.generateNullable(row, index);
    }

    public long generate(InternalRow row) {
        long sequence = generator.generate(row, index);
        for (SequenceAutoPadding padding : paddings) {
            switch (padding) {
                case ROW_KIND_FLAG:
                    sequence = addRowKindFlag(sequence, row.getRowKind());
                    break;
                case SECOND_TO_MICRO:
                    sequence = secondToMicro(sequence);
                    break;
                case MILLIS_TO_MICRO:
                    sequence = millisToMicro(sequence);
                    break;
                default:
                    throw new UnsupportedOperationException(
                            "Unknown sequence padding mode " + padding);
            }
        }
        return sequence;
    }

    private long addRowKindFlag(long sequence, RowKind rowKind) {
        return (sequence << 1) | (rowKind.isAdd() ? 1 : 0);
    }

    private long millisToMicro(long sequence) {
        // Generated value is millis
        return sequence * 1_000 + getCurrentMicroOfMillis();
    }

    private long secondToMicro(long sequence) {
        // timestamp returns millis
        long second = fieldType.is(DataTypeFamily.TIMESTAMP) ? sequence / 1000 : sequence;
        return second * 1_000_000 + getCurrentMicroOfSeconds();
    }

    private static long getCurrentMicroOfMillis() {
        long currentNanoTime = System.nanoTime();
        long mills = TimeUnit.MILLISECONDS.convert(currentNanoTime, TimeUnit.NANOSECONDS);
        return (currentNanoTime - mills * 1_000_000) / 1000;
    }

    private static long getCurrentMicroOfSeconds() {
        long currentNanoTime = System.nanoTime();
        long seconds = TimeUnit.SECONDS.convert(currentNanoTime, TimeUnit.NANOSECONDS);
        return (currentNanoTime - seconds * 1_000_000_000) / 1000;
    }

    private interface Generator {
        long generate(InternalRow row, int i);

        @Nullable
        default Long generateNullable(InternalRow row, int i) {
            if (row.isNullAt(i)) {
                return null;
            }
            return generate(row, i);
        }
    }

    private static class SequenceGeneratorVisitor extends DataTypeDefaultVisitor {

        @Override
        public Generator visit(CharType charType) {
            return stringGenerator();
        }

        @Override
        public Generator visit(VarCharType varCharType) {
            return stringGenerator();
        }

        private Generator stringGenerator() {
            return (row, i) -> Long.parseLong(row.getString(i).toString());
        }

        @Override
        public Generator visit(DecimalType decimalType) {
            return (row, i) ->
                    InternalRowUtils.castToIntegral(
                            row.getDecimal(i, decimalType.getPrecision(), decimalType.getScale()));
        }

        @Override
        public Generator visit(TinyIntType tinyIntType) {
            return InternalRow::getByte;
        }

        @Override
        public Generator visit(SmallIntType smallIntType) {
            return InternalRow::getShort;
        }

        @Override
        public Generator visit(IntType intType) {
            return InternalRow::getInt;
        }

        @Override
        public Generator visit(BigIntType bigIntType) {
            return InternalRow::getLong;
        }

        @Override
        public Generator visit(FloatType floatType) {
            return (row, i) -> (long) row.getFloat(i);
        }

        @Override
        public Generator visit(DoubleType doubleType) {
            return (row, i) -> (long) row.getDouble(i);
        }

        @Override
        public Generator visit(DateType dateType) {
            return InternalRow::getInt;
        }

        @Override
        public Generator visit(TimestampType timestampType) {
            return (row, i) -> row.getTimestamp(i, timestampType.getPrecision()).getMillisecond();
        }

        @Override
        public Generator visit(LocalZonedTimestampType localZonedTimestampType) {
            return (row, i) ->
                    row.getTimestamp(i, localZonedTimestampType.getPrecision()).getMillisecond();
        }

        @Override
        protected Generator defaultMethod(DataType dataType) {
            throw new UnsupportedOperationException("Unsupported type: " + dataType);
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy