org.apache.parquet.cascading.ParquetValueScheme Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.cascading;
import java.io.IOException;
import java.io.Serializable;
import org.apache.hadoop.mapred.JobConf;
import org.apache.hadoop.mapred.OutputCollector;
import org.apache.hadoop.mapred.RecordReader;
import cascading.flow.FlowProcess;
import cascading.scheme.Scheme;
import cascading.scheme.SinkCall;
import cascading.scheme.SourceCall;
import cascading.tap.Tap;
import cascading.tuple.Tuple;
import cascading.tuple.TupleEntry;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.hadoop.ParquetInputFormat;
import org.apache.parquet.hadoop.mapred.Container;
import org.apache.parquet.hadoop.thrift.ParquetThriftInputFormat;
import org.apache.parquet.hadoop.thrift.ThriftReadSupport;
import static org.apache.parquet.Preconditions.checkNotNull;
/**
* A Cascading Scheme that returns a simple Tuple with a single value, the "value" object
* coming out of the underlying InputFormat.
*
* This is an abstract class; implementations are expected to set up their Input/Output Formats
* correctly in the respective Init methods.
*/
@Deprecated // The parquet-cascading module depends on Cascading 2.x, and is being superseded with parquet-cascading3 for Cascading 3.x
public abstract class ParquetValueScheme extends Scheme{
public static final class Config implements Serializable {
private final FilterPredicate filterPredicate;
private final String deprecatedProjectionString;
private final String strictProjectionString;
private final Class klass;
private Config(Class klass, FilterPredicate filterPredicate, String deprecatedProjectionString, String strictProjectionString) {
this.filterPredicate = filterPredicate;
this.deprecatedProjectionString = deprecatedProjectionString;
this.strictProjectionString = strictProjectionString;
this.klass = klass;
}
public Config() {
filterPredicate = null;
deprecatedProjectionString = null;
strictProjectionString = null;
klass = null;
}
public FilterPredicate getFilterPredicate() {
return filterPredicate;
}
@Deprecated
public String getProjectionString() {
return deprecatedProjectionString;
}
public String getStrictProjectionString() {
return strictProjectionString;
}
public Class getKlass() {
return klass;
}
public Config withFilterPredicate(FilterPredicate f) {
return new Config(this.klass, checkNotNull(f, "filterPredicate"), this.deprecatedProjectionString, this.strictProjectionString);
}
@Deprecated
public Config withProjectionString(String p) {
return new Config(this.klass, this.filterPredicate, checkNotNull(p, "projectionString"), this.strictProjectionString);
}
public Config withStrictProjectionString(String p) {
return new Config(this.klass, this.filterPredicate, this.deprecatedProjectionString, checkNotNull(p, "projectionString"));
}
public Config withRecordClass(Class klass) {
return new Config(checkNotNull(klass, "recordClass"), this.filterPredicate, this.deprecatedProjectionString, this.strictProjectionString);
}
}
private static final long serialVersionUID = 157560846420730043L;
protected final Config config;
public ParquetValueScheme() {
this(new Config());
}
public ParquetValueScheme(FilterPredicate filterPredicate) {
this(new Config().withFilterPredicate(filterPredicate));
}
public ParquetValueScheme(Config config) {
this.config = config;
}
@Deprecated
private void setProjectionPushdown(JobConf jobConf) {
if (this.config.deprecatedProjectionString != null) {
ThriftReadSupport.setProjectionPushdown(jobConf, this.config.deprecatedProjectionString);
}
}
private void setStrictProjectionPushdown(JobConf jobConf) {
if (this.config.strictProjectionString != null) {
ThriftReadSupport.setStrictFieldProjectionFilter(jobConf, this.config.strictProjectionString);
}
}
private void setPredicatePushdown(JobConf jobConf) {
if (this.config.filterPredicate != null) {
ParquetInputFormat.setFilterPredicate(jobConf, this.config.filterPredicate);
}
}
@Override
public void sourceConfInit(FlowProcess jobConfFlowProcess, Tap jobConfRecordReaderOutputCollectorTap, final JobConf jobConf) {
setPredicatePushdown(jobConf);
setProjectionPushdown(jobConf);
setStrictProjectionPushdown(jobConf);
setRecordClass(jobConf);
}
private void setRecordClass(JobConf jobConf) {
if (config.klass != null) {
ParquetThriftInputFormat.setThriftClass(jobConf, config.klass);
}
}
@SuppressWarnings("unchecked")
@Override
public boolean source(FlowProcess fp, SourceCall
© 2015 - 2025 Weber Informatics LLC | Privacy Policy