All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.formats.csv.CsvRowSchemaConverter Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.formats.csv;

import org.apache.flink.api.common.typeinfo.BasicArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.PrimitiveArrayTypeInfo;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.typeutils.ObjectArrayTypeInfo;
import org.apache.flink.api.java.typeutils.RowTypeInfo;
import org.apache.flink.table.types.logical.ArrayType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.LogicalTypeRoot;
import org.apache.flink.table.types.logical.RowType;

import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema.Builder;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvSchema.Column;

import java.util.Arrays;
import java.util.HashSet;
import java.util.List;

/**
 * Converter functions that covert Flink's type information to Jackson's {@link CsvSchema}.
 *
 * 

In {@link CsvSchema}, there are four types (string, number, boolean, and array). In order to * satisfy various Flink types, this class sorts out instances of {@link TypeInformation} and {@link * LogicalType} that are not supported. It converts supported types to one of CsvSchema's types. * *

Note: Changes in this class need to be kept in sync with the corresponding runtime classes * {@link CsvRowDeserializationSchema} and {@link CsvRowSerializationSchema}. */ public final class CsvRowSchemaConverter { private CsvRowSchemaConverter() { // private } /** * Types that can be converted to ColumnType.NUMBER. * *

From Jackson: Value should be a number, but literals "null", "true" and "false" are also * understood, and an empty String is considered null. Values are also trimmed (leading/trailing * white space). Other non-numeric Strings may cause parsing exception. */ private static final HashSet> NUMBER_TYPES = new HashSet<>( Arrays.asList( Types.BYTE, Types.SHORT, Types.INT, Types.LONG, Types.DOUBLE, Types.FLOAT, Types.BIG_DEC, Types.BIG_INT)); /** * Types that can be converted to ColumnType.NUMBER. * *

From Jackson: Value should be a number, but literals "null", "true" and "false" are also * understood, and an empty String is considered null. Values are also trimmed (leading/trailing * white space). Other non-numeric Strings may cause parsing exception. */ private static final HashSet NUMBER_TYPE_ROOTS = new HashSet<>( Arrays.asList( LogicalTypeRoot.TINYINT, LogicalTypeRoot.SMALLINT, LogicalTypeRoot.INTEGER, LogicalTypeRoot.BIGINT, LogicalTypeRoot.DOUBLE, LogicalTypeRoot.FLOAT, LogicalTypeRoot.DECIMAL)); /** * Types that can be converted to ColumnType.STRING. * *

From Jackson: Default type if not explicitly defined; no type-inference is performed, and * value is not trimmed. */ private static final HashSet> STRING_TYPES = new HashSet<>( Arrays.asList( Types.STRING, Types.SQL_DATE, Types.SQL_TIME, Types.SQL_TIMESTAMP, Types.LOCAL_DATE, Types.LOCAL_TIME, Types.LOCAL_DATE_TIME, Types.INSTANT)); /** * Types that can be converted to ColumnType.STRING. * *

From Jackson: Default type if not explicitly defined; no type-inference is performed, and * value is not trimmed. */ private static final HashSet STRING_TYPE_ROOTS = new HashSet<>( Arrays.asList( LogicalTypeRoot.CHAR, LogicalTypeRoot.VARCHAR, LogicalTypeRoot.BINARY, LogicalTypeRoot.VARBINARY, LogicalTypeRoot.DATE, LogicalTypeRoot.TIME_WITHOUT_TIME_ZONE, LogicalTypeRoot.TIMESTAMP_WITHOUT_TIME_ZONE, LogicalTypeRoot.TIMESTAMP_WITH_LOCAL_TIME_ZONE, LogicalTypeRoot.TIMESTAMP_WITH_TIME_ZONE)); /** * Types that can be converted to ColumnType.BOOLEAN. * *

From Jackson: Value is expected to be a boolean ("true", "false") String, or "null", or * empty String (equivalent to null). Values are trimmed (leading/trailing white space). Values * other than indicated above may result in an exception. */ private static final HashSet> BOOLEAN_TYPES = new HashSet<>(Arrays.asList(Types.BOOLEAN, Types.VOID)); /** * Types that can be converted to ColumnType.BOOLEAN. * *

From Jackson: Value is expected to be a boolean ("true", "false") String, or "null", or * empty String (equivalent to null). Values are trimmed (leading/trailing white space). Values * other than indicated above may result in an exception. */ private static final HashSet BOOLEAN_TYPE_ROOTS = new HashSet<>(Arrays.asList(LogicalTypeRoot.BOOLEAN, LogicalTypeRoot.NULL)); /** Convert {@link RowTypeInfo} to {@link CsvSchema}. */ public static CsvSchema convert(RowTypeInfo rowType) { final Builder builder = new CsvSchema.Builder(); final String[] fields = rowType.getFieldNames(); final TypeInformation[] types = rowType.getFieldTypes(); for (int i = 0; i < rowType.getArity(); i++) { builder.addColumn(new Column(i, fields[i], convertType(fields[i], types[i]))); } return builder.build(); } /** Convert {@link RowType} to {@link CsvSchema}. */ public static CsvSchema convert(RowType rowType) { Builder builder = new CsvSchema.Builder(); List fields = rowType.getFields(); for (int i = 0; i < rowType.getFieldCount(); i++) { String fieldName = fields.get(i).getName(); LogicalType fieldType = fields.get(i).getType(); builder.addColumn(new Column(i, fieldName, convertType(fieldName, fieldType))); } return builder.build(); } /** * Convert {@link TypeInformation} to {@link CsvSchema.ColumnType} based on Jackson's * categories. */ private static CsvSchema.ColumnType convertType(String fieldName, TypeInformation info) { if (STRING_TYPES.contains(info)) { return CsvSchema.ColumnType.STRING; } else if (NUMBER_TYPES.contains(info)) { return CsvSchema.ColumnType.NUMBER; } else if (BOOLEAN_TYPES.contains(info)) { return CsvSchema.ColumnType.BOOLEAN; } else if (info instanceof ObjectArrayTypeInfo) { validateNestedField(fieldName, ((ObjectArrayTypeInfo) info).getComponentInfo()); return CsvSchema.ColumnType.ARRAY; } else if (info instanceof BasicArrayTypeInfo) { validateNestedField(fieldName, ((BasicArrayTypeInfo) info).getComponentInfo()); return CsvSchema.ColumnType.ARRAY; } else if (info instanceof RowTypeInfo) { final TypeInformation[] types = ((RowTypeInfo) info).getFieldTypes(); for (TypeInformation type : types) { validateNestedField(fieldName, type); } return CsvSchema.ColumnType.ARRAY; } else if (info instanceof PrimitiveArrayTypeInfo && ((PrimitiveArrayTypeInfo) info).getComponentType() == Types.BYTE) { return CsvSchema.ColumnType.STRING; } else { throw new IllegalArgumentException( "Unsupported type information '" + info.toString() + "' for field '" + fieldName + "'."); } } /** * Convert {@link LogicalType} to {@link CsvSchema.ColumnType} based on Jackson's categories. */ private static CsvSchema.ColumnType convertType(String fieldName, LogicalType type) { if (STRING_TYPE_ROOTS.contains(type.getTypeRoot())) { return CsvSchema.ColumnType.STRING; } else if (NUMBER_TYPE_ROOTS.contains(type.getTypeRoot())) { return CsvSchema.ColumnType.NUMBER; } else if (BOOLEAN_TYPE_ROOTS.contains(type.getTypeRoot())) { return CsvSchema.ColumnType.BOOLEAN; } else if (type.getTypeRoot() == LogicalTypeRoot.ARRAY) { validateNestedField(fieldName, ((ArrayType) type).getElementType()); return CsvSchema.ColumnType.ARRAY; } else if (type.getTypeRoot() == LogicalTypeRoot.ROW) { RowType rowType = (RowType) type; for (LogicalType fieldType : rowType.getChildren()) { validateNestedField(fieldName, fieldType); } return CsvSchema.ColumnType.ARRAY; } else { throw new IllegalArgumentException( "Unsupported type '" + type.asSummaryString() + "' for field '" + fieldName + "'."); } } private static void validateNestedField(String fieldName, TypeInformation info) { if (!NUMBER_TYPES.contains(info) && !STRING_TYPES.contains(info) && !BOOLEAN_TYPES.contains(info)) { throw new IllegalArgumentException( "Only simple types are supported in the second level nesting of fields '" + fieldName + "' but was: " + info); } } private static void validateNestedField(String fieldName, LogicalType type) { if (!NUMBER_TYPE_ROOTS.contains(type.getTypeRoot()) && !STRING_TYPE_ROOTS.contains(type.getTypeRoot()) && !BOOLEAN_TYPE_ROOTS.contains(type.getTypeRoot())) { throw new IllegalArgumentException( "Only simple types are supported in the second level nesting of fields '" + fieldName + "' but was: " + type.asSummaryString()); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy