All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.parquet.HadoopReadOptions Maven / Gradle / Ivy

There is a newer version: 1.14.4
Show newest version
/*
 *  Licensed to the Apache Software Foundation (ASF) under one
 *  or more contributor license agreements.  See the NOTICE file
 *  distributed with this work for additional information
 *  regarding copyright ownership.  The ASF licenses this file
 *  to you under the Apache License, Version 2.0 (the
 *  "License"); you may not use this file except in compliance
 *  with the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 *  Unless required by applicable law or agreed to in writing,
 *  software distributed under the License is distributed on an
 *  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 *  KIND, either express or implied.  See the License for the
 *  specific language governing permissions and limitations
 *  under the License.
 */

package org.apache.parquet;

import org.apache.hadoop.conf.Configuration;
import org.apache.parquet.bytes.ByteBufferAllocator;
import org.apache.parquet.compression.CompressionCodecFactory;
import org.apache.parquet.filter2.compat.FilterCompat;
import org.apache.parquet.format.converter.ParquetMetadataConverter.MetadataFilter;
import org.apache.parquet.hadoop.util.HadoopCodecs;

import java.util.Map;

import static org.apache.parquet.hadoop.ParquetInputFormat.DICTIONARY_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.RECORD_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.STATS_FILTERING_ENABLED;
import static org.apache.parquet.hadoop.ParquetInputFormat.getFilter;
import static org.apache.parquet.hadoop.UnmaterializableRecordCounter.BAD_RECORD_THRESHOLD_CONF_KEY;

public class HadoopReadOptions extends ParquetReadOptions {
  private final Configuration conf;

  private static final String ALLOCATION_SIZE = "parquet.read.allocation.size";

  private HadoopReadOptions(boolean useSignedStringMinMax,
                            boolean useStatsFilter,
                            boolean useDictionaryFilter,
                            boolean useRecordFilter,
                            FilterCompat.Filter recordFilter,
                            MetadataFilter metadataFilter,
                            CompressionCodecFactory codecFactory,
                            ByteBufferAllocator allocator,
                            int maxAllocationSize,
                            Map properties,
                            Configuration conf) {
    super(
        useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter, recordFilter,
        metadataFilter, codecFactory, allocator, maxAllocationSize, properties
    );
    this.conf = conf;
  }

  @Override
  public String getProperty(String property) {
    String value = super.getProperty(property);
    if (value != null) {
      return value;
    }
    return conf.get(property);
  }

  public Configuration getConf() {
    return conf;
  }

  public static Builder builder(Configuration conf) {
    return new Builder(conf);
  }

  public static class Builder extends ParquetReadOptions.Builder {
    private final Configuration conf;

    public Builder(Configuration conf) {
      this.conf = conf;
      useSignedStringMinMax(conf.getBoolean("parquet.strings.signed-min-max.enabled", false));
      useDictionaryFilter(conf.getBoolean(STATS_FILTERING_ENABLED, true));
      useStatsFilter(conf.getBoolean(DICTIONARY_FILTERING_ENABLED, true));
      useRecordFilter(conf.getBoolean(RECORD_FILTERING_ENABLED, true));
      withCodecFactory(HadoopCodecs.newFactory(conf, 0));
      withRecordFilter(getFilter(conf));
      withMaxAllocationInBytes(conf.getInt(ALLOCATION_SIZE, 8388608));
      String badRecordThresh = conf.get(BAD_RECORD_THRESHOLD_CONF_KEY);
      if (badRecordThresh != null) {
        set(BAD_RECORD_THRESHOLD_CONF_KEY, badRecordThresh);
      }
    }

    @Override
    public ParquetReadOptions build() {
      return new HadoopReadOptions(
          useSignedStringMinMax, useStatsFilter, useDictionaryFilter, useRecordFilter,
          recordFilter, metadataFilter, codecFactory, allocator, maxAllocationSize, properties,
          conf);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy