
org.apache.flink.formats.csv.RowDataToCsvConverters Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.formats.csv;
import org.apache.flink.annotation.Internal;
import org.apache.flink.formats.common.Converter;
import org.apache.flink.table.data.ArrayData;
import org.apache.flink.table.data.DecimalData;
import org.apache.flink.table.data.RowData;
import org.apache.flink.table.data.TimestampData;
import org.apache.flink.table.types.logical.ArrayType;
import org.apache.flink.table.types.logical.DecimalType;
import org.apache.flink.table.types.logical.LocalZonedTimestampType;
import org.apache.flink.table.types.logical.LogicalType;
import org.apache.flink.table.types.logical.RowType;
import org.apache.flink.table.types.logical.TimestampType;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.JsonNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ArrayNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ContainerNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.databind.node.ObjectNode;
import org.apache.flink.shaded.jackson2.com.fasterxml.jackson.dataformat.csv.CsvMapper;
import java.io.Serializable;
import java.math.BigDecimal;
import java.time.LocalDate;
import java.time.LocalTime;
import java.time.format.DateTimeFormatter;
import java.util.Arrays;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_DATE;
import static java.time.format.DateTimeFormatter.ISO_LOCAL_TIME;
import static org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_FORMAT;
import static org.apache.flink.formats.common.TimeFormats.SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT;
import static org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.StreamWriteFeature.WRITE_BIGDECIMAL_AS_PLAIN;
/** Tool class used to convert from {@link RowData} to CSV-format {@link JsonNode}. * */
@Internal
public class RowDataToCsvConverters implements Serializable {
private static final long serialVersionUID = 1L;
/**
* Runtime converter that converts objects of Flink Table & SQL internal data structures to
* corresponding {@link JsonNode}s.
*/
interface RowDataToCsvConverter
extends Converter<
RowData, JsonNode, RowDataToCsvConverter.RowDataToCsvFormatConverterContext> {
/**
* Converter context for passing the {@code CsvMapper} and the {@code container} that can be
* reused between transformations of the individual elements for performance reasons.
*/
class RowDataToCsvFormatConverterContext {
CsvMapper csvMapper;
ContainerNode> container;
public RowDataToCsvFormatConverterContext(
CsvMapper csvMapper, ContainerNode> container) {
this.csvMapper = csvMapper;
this.container = container;
}
}
}
private interface RowFieldConverter extends Serializable {
JsonNode convert(CsvMapper csvMapper, ContainerNode> container, RowData row, int pos);
}
private interface ArrayElementConverter extends Serializable {
JsonNode convert(CsvMapper csvMapper, ContainerNode> container, ArrayData array, int pos);
}
public static RowDataToCsvConverter createRowConverter(RowType type) {
LogicalType[] fieldTypes =
type.getFields().stream()
.map(RowType.RowField::getType)
.toArray(LogicalType[]::new);
final String[] fieldNames = type.getFieldNames().toArray(new String[0]);
final RowFieldConverter[] fieldConverters =
Arrays.stream(fieldTypes)
.map(RowDataToCsvConverters::createNullableRowFieldConverter)
.toArray(RowFieldConverter[]::new);
final int rowArity = type.getFieldCount();
return (row, context) -> {
// top level reuses the object node container
final ObjectNode objectNode = (ObjectNode) context.container;
for (int i = 0; i < rowArity; i++) {
try {
objectNode.set(
fieldNames[i],
fieldConverters[i].convert(
context.csvMapper, context.container, row, i));
} catch (Throwable t) {
throw new RuntimeException(
String.format("Fail to serialize at field: %s.", fieldNames[i]), t);
}
}
return objectNode;
};
}
private static RowFieldConverter createNullableRowFieldConverter(LogicalType fieldType) {
final RowFieldConverter fieldConverter = createRowFieldConverter(fieldType);
return (csvMapper, container, row, pos) -> {
if (row.isNullAt(pos)) {
return container.nullNode();
}
return fieldConverter.convert(csvMapper, container, row, pos);
};
}
private static RowFieldConverter createRowFieldConverter(LogicalType fieldType) {
switch (fieldType.getTypeRoot()) {
case NULL:
return (csvMapper, container, row, pos) -> container.nullNode();
case BOOLEAN:
return (csvMapper, container, row, pos) ->
container.booleanNode(row.getBoolean(pos));
case TINYINT:
return (csvMapper, container, row, pos) -> container.numberNode(row.getByte(pos));
case SMALLINT:
return (csvMapper, container, row, pos) -> container.numberNode(row.getShort(pos));
case INTEGER:
case INTERVAL_YEAR_MONTH:
return (csvMapper, container, row, pos) -> container.numberNode(row.getInt(pos));
case BIGINT:
case INTERVAL_DAY_TIME:
return (csvMapper, container, row, pos) -> container.numberNode(row.getLong(pos));
case FLOAT:
return (csvMapper, container, row, pos) -> container.numberNode(row.getFloat(pos));
case DOUBLE:
return (csvMapper, container, row, pos) -> container.numberNode(row.getDouble(pos));
case CHAR:
case VARCHAR:
return (csvMapper, container, row, pos) ->
container.textNode(row.getString(pos).toString());
case BINARY:
case VARBINARY:
return (csvMapper, container, row, pos) -> container.binaryNode(row.getBinary(pos));
case DATE:
return (csvMapper, container, row, pos) -> convertDate(row.getInt(pos), container);
case TIME_WITHOUT_TIME_ZONE:
return (csvMapper, container, row, pos) -> convertTime(row.getInt(pos), container);
case TIMESTAMP_WITHOUT_TIME_ZONE:
final int timestampPrecision = ((TimestampType) fieldType).getPrecision();
return (csvMapper, container, row, pos) ->
convertTimestamp(
row.getTimestamp(pos, timestampPrecision),
container,
SQL_TIMESTAMP_FORMAT);
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
final int zonedTimestampPrecision =
((LocalZonedTimestampType) fieldType).getPrecision();
return (csvMapper, container, row, pos) ->
convertTimestamp(
row.getTimestamp(pos, zonedTimestampPrecision),
container,
SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT);
case DECIMAL:
return createDecimalRowFieldConverter((DecimalType) fieldType);
case ARRAY:
return createArrayRowFieldConverter((ArrayType) fieldType);
case ROW:
return createRowRowFieldConverter((RowType) fieldType);
case MAP:
case MULTISET:
case RAW:
default:
throw new UnsupportedOperationException("Unsupported type: " + fieldType);
}
}
private static ArrayElementConverter createNullableArrayElementConverter(
LogicalType fieldType) {
final ArrayElementConverter elementConverter = createArrayElementConverter(fieldType);
return (csvMapper, container, array, pos) -> {
if (array.isNullAt(pos)) {
return container.nullNode();
}
return elementConverter.convert(csvMapper, container, array, pos);
};
}
private static ArrayElementConverter createArrayElementConverter(LogicalType fieldType) {
switch (fieldType.getTypeRoot()) {
case NULL:
return (csvMapper, container, array, pos) -> container.nullNode();
case BOOLEAN:
return (csvMapper, container, array, pos) ->
container.booleanNode(array.getBoolean(pos));
case TINYINT:
return (csvMapper, container, array, pos) ->
container.numberNode(array.getByte(pos));
case SMALLINT:
return (csvMapper, container, array, pos) ->
container.numberNode(array.getShort(pos));
case INTEGER:
case INTERVAL_YEAR_MONTH:
return (csvMapper, container, array, pos) ->
container.numberNode(array.getInt(pos));
case BIGINT:
case INTERVAL_DAY_TIME:
return (csvMapper, container, array, pos) ->
container.numberNode(array.getLong(pos));
case FLOAT:
return (csvMapper, container, array, pos) ->
container.numberNode(array.getFloat(pos));
case DOUBLE:
return (csvMapper, container, array, pos) ->
container.numberNode(array.getDouble(pos));
case CHAR:
case VARCHAR:
return (csvMapper, container, array, pos) ->
container.textNode(array.getString(pos).toString());
case BINARY:
case VARBINARY:
return (csvMapper, container, array, pos) ->
container.binaryNode(array.getBinary(pos));
case DATE:
return (csvMapper, container, array, pos) ->
convertDate(array.getInt(pos), container);
case TIME_WITHOUT_TIME_ZONE:
return (csvMapper, container, array, pos) ->
convertTime(array.getInt(pos), container);
case TIMESTAMP_WITHOUT_TIME_ZONE:
final int timestampPrecision = ((TimestampType) fieldType).getPrecision();
return (csvMapper, container, array, pos) ->
convertTimestamp(
array.getTimestamp(pos, timestampPrecision),
container,
SQL_TIMESTAMP_FORMAT);
case TIMESTAMP_WITH_LOCAL_TIME_ZONE:
final int localZonedTimestampPrecision =
((LocalZonedTimestampType) fieldType).getPrecision();
return (csvMapper, container, array, pos) ->
convertTimestamp(
array.getTimestamp(pos, localZonedTimestampPrecision),
container,
SQL_TIMESTAMP_WITH_LOCAL_TIMEZONE_FORMAT);
case DECIMAL:
return createDecimalArrayElementConverter((DecimalType) fieldType);
// we don't support ARRAY and ROW in an ARRAY, see
// CsvRowSchemaConverter#validateNestedField
case ARRAY:
case ROW:
case MAP:
case MULTISET:
case RAW:
default:
throw new UnsupportedOperationException("Unsupported type: " + fieldType);
}
}
// ------------------------------------------------------------------------------------------
// Field/Element Converters
// ------------------------------------------------------------------------------------------
private static RowFieldConverter createDecimalRowFieldConverter(DecimalType decimalType) {
final int precision = decimalType.getPrecision();
final int scale = decimalType.getScale();
return (csvMapper, container, row, pos) -> {
DecimalData decimal = row.getDecimal(pos, precision, scale);
return convertDecimal(csvMapper, decimal, container);
};
}
private static ArrayElementConverter createDecimalArrayElementConverter(
DecimalType decimalType) {
final int precision = decimalType.getPrecision();
final int scale = decimalType.getScale();
return (csvMapper, container, array, pos) -> {
DecimalData decimal = array.getDecimal(pos, precision, scale);
return convertDecimal(csvMapper, decimal, container);
};
}
private static JsonNode convertDecimal(
CsvMapper csvMapper, DecimalData decimal, ContainerNode> container) {
BigDecimal bigDecimal = decimal.toBigDecimal();
return container.numberNode(
csvMapper.isEnabled(WRITE_BIGDECIMAL_AS_PLAIN)
? bigDecimal
: bigDecimal.stripTrailingZeros());
}
private static JsonNode convertDate(int days, ContainerNode> container) {
LocalDate date = LocalDate.ofEpochDay(days);
return container.textNode(ISO_LOCAL_DATE.format(date));
}
private static JsonNode convertTime(int millisecond, ContainerNode> container) {
LocalTime time = LocalTime.ofNanoOfDay(millisecond * 1000_000L);
return container.textNode(ISO_LOCAL_TIME.format(time));
}
private static JsonNode convertTimestamp(
TimestampData timestamp, ContainerNode> container, DateTimeFormatter formatter) {
return container.textNode(formatter.format(timestamp.toLocalDateTime()));
}
private static RowFieldConverter createArrayRowFieldConverter(ArrayType type) {
LogicalType elementType = type.getElementType();
final ArrayElementConverter elementConverter =
createNullableArrayElementConverter(elementType);
return (csvMapper, container, row, pos) -> {
ArrayNode arrayNode = csvMapper.createArrayNode();
ArrayData arrayData = row.getArray(pos);
int numElements = arrayData.size();
for (int i = 0; i < numElements; i++) {
arrayNode.add(elementConverter.convert(csvMapper, arrayNode, arrayData, i));
}
return arrayNode;
};
}
private static RowFieldConverter createRowRowFieldConverter(RowType type) {
LogicalType[] fieldTypes =
type.getFields().stream()
.map(RowType.RowField::getType)
.toArray(LogicalType[]::new);
final RowFieldConverter[] fieldConverters =
Arrays.stream(fieldTypes)
.map(RowDataToCsvConverters::createNullableRowFieldConverter)
.toArray(RowFieldConverter[]::new);
final int rowArity = type.getFieldCount();
return (csvMapper, container, row, pos) -> {
final RowData value = row.getRow(pos, rowArity);
// nested rows use array node container
final ArrayNode arrayNode = csvMapper.createArrayNode();
for (int i = 0; i < rowArity; i++) {
arrayNode.add(fieldConverters[i].convert(csvMapper, arrayNode, value, i));
}
return arrayNode;
};
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy