All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.delta.flink.source.internal.utils.SourceSchema Maven / Gradle / Ivy

There is a newer version: 3.2.1
Show newest version
package io.delta.flink.source.internal.utils;

import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;

import io.delta.flink.source.internal.SchemaConverter;
import org.apache.flink.table.types.logical.LogicalType;

import io.delta.standalone.Snapshot;
import io.delta.standalone.actions.Metadata;
import io.delta.standalone.types.StructField;
import io.delta.standalone.types.StructType;

/**
 * Schema information about column names and their types that should be read from Delta table.
 */
public class SourceSchema {

    /**
     * Delta table column names to read.
     */
    private final String[] columnNames;

    /**
     * Data types for {@link #columnNames}.
     */
    private final LogicalType[] columnTypes;

    /**
     * Delta table {@link io.delta.standalone.Snapshot} version from which this schema (column
     * names and types) was acquired.
     */
    private final long snapshotVersion;

    /**
     * {@link List} with names of partition columns. If empty, then no partition columns were found
     * for given schema version.
     */
    private final List partitionColumns;

    private SourceSchema(
        String[] columnNames,
        LogicalType[] columnTypes,
        long snapshotVersion,
        Collection partitionColumns) {
        this.columnNames = columnNames;
        this.columnTypes = columnTypes;
        this.snapshotVersion = snapshotVersion;
        this.partitionColumns = new ArrayList<>(partitionColumns);
    }

    /**
     * Creates {@link SourceSchema} instance using {@link Metadata} information from Delta's
     * {@link Snapshot}.
     * 

* In case {@code userColumnNames} parameter is defined, this method will extract type * information for every provided column. The created {@link SourceSchema} object will * contain only columns defined in userColumnNames parameter. *

* If userColumnNames will be empty or null, then created {@link SourceSchema} will contain all * table columns from Snapshot's metadata. * * @param userColumnNames user defined columns that if defined, should be read from Delta * table. * @param snapshot {@link Snapshot} to extract schema information from. * @return A {@link SourceSchema} with column names and their {@link LogicalType}. */ public static SourceSchema fromSnapshot( Collection userColumnNames, Snapshot snapshot) { String[] columnNames; LogicalType[] columnTypes; Metadata metadata = snapshot.getMetadata(); StructType tableSchema = metadata.getSchema(); if (tableSchema == null) { throw new IllegalArgumentException( String.format( "Unable to find Schema information in Delta log for Snapshot version [%d]", snapshot.getVersion() )); } if (userColumnNames != null && !userColumnNames.isEmpty()) { columnTypes = new LogicalType[userColumnNames.size()]; int i = 0; for (String columnName : userColumnNames) { StructField field = tableSchema.get(columnName); columnTypes[i++] = SchemaConverter.toFlinkDataType( field.getDataType(), field.isNullable()); } columnNames = userColumnNames.toArray(new String[0]); } else { StructField[] fields = tableSchema.getFields(); columnNames = new String[fields.length]; columnTypes = new LogicalType[fields.length]; int i = 0; for (StructField field : fields) { columnNames[i] = field.getName(); columnTypes[i] = SchemaConverter.toFlinkDataType(field.getDataType(), field.isNullable()); i++; } } return new SourceSchema( columnNames, columnTypes, snapshot.getVersion(), metadata.getPartitionColumns() ); } public List getPartitionColumns() { return Collections.unmodifiableList(partitionColumns); } /** * @return Delta table column names that should be raed from Delta table row. */ public String[] getColumnNames() { return columnNames; } /** * @return An array with {@link LogicalType} objects for column names returned by {@link * #getColumnNames()}. */ public LogicalType[] getColumnTypes() { return columnTypes; } /** * @return a {@link io.delta.standalone.Snapshot} version for which this schema is valid. */ public long getSnapshotVersion() { return snapshotVersion; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy