Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet;
import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.compression.CompressionCodecFactory;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.format.converter.ParquetMetadataConverter.MetadataFilter;
import org.apache.parquet.hadoop.util.HadoopCodecs;
import java.util.Map;
import static org.apache.parquet.hadoop.ParquetInputFormat.DICTIONARY_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.RECORD_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.STATS_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.getFilter;
import static org.apache.parquet.hadoop.UnmaterializableRecordCounter.BAD_RECORD_THRESHOLD_CONF_KEY;
public class HadoopReadOptions extends ParquetReadOptions {
private final Configuration conf;
private static final String ALLOCATION_SIZE = "parquet.read.allocation.size";
private HadoopReadOptions(boolean useSignedStringMinMax,
boolean useStatsFilter,
boolean useDictionaryFilter,
boolean useRecordFilter,
FilterCompat.Filter recordFilter,
MetadataFilter metadataFilter,
CompressionCodecFactory codecFactory,
ByteBufferAllocator allocator,
int maxAllocationSize,
Map properties,
Configuration conf) {
super(
useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, recordFilter,
metadataFilter, codecFactory, allocator, maxAllocationSize, properties
);
this.conf = conf;
}
@Override
public String getProperty(String property) {
String value = super.getProperty(property);
if (value != null) {
return value;
}
return conf.get(property);
}
public Configuration getConf() {
return conf;
}
public static Builder builder(Configuration conf) {
return new Builder(conf);
}
public static class Builder extends ParquetReadOptions.Builder {
private final Configuration conf;
public Builder(Configuration conf) {
this.conf = conf;
useSignedStringMinMax(conf.getBoolean("parquet.strings.signed-min-max.enabled", false));
useDictionaryFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true));
useStatsFilter(conf.getBoolean(DICTIONARY_FILTERING_ENABLED, true));
useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true));
withCodecFactory(HadoopCodecs.newFactory(conf, 0));
withRecordFilter(getFilter(conf));
withMaxAllocationInBytes(conf.getInt(ALLOCATION_SIZE, 8388608));
String badRecordThresh = conf.get(BAD_RECORD_THRESHOLD_CONF_KEY);
if (badRecordThresh != null) {
set(BAD_RECORD_THRESHOLD_CONF_KEY, badRecordThresh);
}
}
@Override
public ParquetReadOptions build() {
return new HadoopReadOptions(
useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter,
recordFilter, metadataFilter, codecFactory, allocator, maxAllocationSize, properties,
conf);
}
}
}