
com.hazelcast.jet.hadoop.impl.CsvInputFormat Maven / Gradle / Ivy
/*
* Copyright 2021 Hazelcast Inc.
*
* Licensed under the Hazelcast Community License (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://hazelcast.com/hazelcast-community-license
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.hazelcast.jet.hadoop.impl;
import com.hazelcast.com.fasterxml.jackson.databind.DeserializationFeature;
import com.hazelcast.com.fasterxml.jackson.databind.MappingIterator;
import com.hazelcast.com.fasterxml.jackson.databind.ObjectReader;
import com.hazelcast.com.fasterxml.jackson.dataformat.csv.CsvMapper;
import com.hazelcast.com.fasterxml.jackson.dataformat.csv.CsvParser.Feature;
import com.hazelcast.com.fasterxml.jackson.dataformat.csv.CsvSchema;
import com.hazelcast.com.fasterxml.jackson.dataformat.csv.CsvSchema.Builder;
import com.hazelcast.jet.JetException;
import com.hazelcast.jet.impl.util.ReflectionUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.compress.CompressionCodec;
import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.hadoop.io.compress.SplittableCompressionCodec;
import org.apache.hadoop.mapreduce.InputSplit;
import org.apache.hadoop.mapreduce.JobContext;
import org.apache.hadoop.mapreduce.RecordReader;
import org.apache.hadoop.mapreduce.TaskAttemptContext;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.input.LineRecordReader;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Function;
import static com.hazelcast.jet.impl.util.Util.createFieldProjection;
import static java.util.function.Function.identity;
public class CsvInputFormat extends FileInputFormat {
public static final String CSV_INPUT_FORMAT_BEAN_CLASS = "csv.bean.class";
public static final String CSV_INPUT_FORMAT_FIELD_LIST_PREFIX = "csv.field.list.";
@Override
public RecordReader createRecordReader(InputSplit split, TaskAttemptContext context) {
return new RecordReader() {
private Class> formatClazz;
private final LineRecordReader reader = new LineRecordReader();
private ObjectReader objectReader;
private Function
© 2015 - 2025 Weber Informatics LLC | Privacy Policy