org.apache.parquet.filter2.bloomfilterlevel.BloomFilterImpl Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.parquet.filter2.bloomfilterlevel;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.parquet.column.values.bloomfilter.BloomFilter;
import org.apache.parquet.filter2.predicate.FilterPredicate;
import org.apache.parquet.filter2.predicate.Operators;
import org.apache.parquet.filter2.predicate.UserDefinedPredicate;
import org.apache.parquet.hadoop.BloomFilterReader;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
import org.apache.parquet.hadoop.metadata.ColumnPath;
import static org.apache.parquet.Preconditions.checkNotNull;
public class BloomFilterImpl implements FilterPredicate.Visitor{
private static final Logger LOG = LoggerFactory.getLogger(BloomFilterImpl.class);
private static final boolean BLOCK_MIGHT_MATCH = false;
private static final boolean BLOCK_CANNOT_MATCH = true;
private final Map columns = new HashMap();
public static boolean canDrop(FilterPredicate pred, List columns, BloomFilterReader bloomFilterReader) {
checkNotNull(pred, "pred");
checkNotNull(columns, "columns");
return pred.accept(new BloomFilterImpl(columns, bloomFilterReader));
}
private BloomFilterImpl(List columnsList, BloomFilterReader bloomFilterReader) {
for (ColumnChunkMetaData chunk : columnsList) {
columns.put(chunk.getPath(), chunk);
}
this.bloomFilterReader = bloomFilterReader;
}
private BloomFilterReader bloomFilterReader;
private ColumnChunkMetaData getColumnChunk(ColumnPath columnPath) {
return columns.get(columnPath);
}
@Override
public > Boolean visit(Operators.Eq eq) {
T value = eq.getValue();
if (value == null) {
// the bloom filter bitset contains only non-null values so isn't helpful. this
// could check the column stats, but the StatisticsFilter is responsible
return BLOCK_MIGHT_MATCH;
}
Operators.Column filterColumn = eq.getColumn();
ColumnChunkMetaData meta = getColumnChunk(filterColumn.getColumnPath());
if (meta == null) {
// the column isn't in this file so all values are null, but the value
// must be non-null because of the above check.
return BLOCK_CANNOT_MATCH;
}
try {
BloomFilter bloomFilter = bloomFilterReader.readBloomFilter(meta);
if (bloomFilter != null && !bloomFilter.findHash(bloomFilter.hash(value))) {
return BLOCK_CANNOT_MATCH;
}
} catch (RuntimeException e) {
LOG.warn(e.getMessage());
return BLOCK_MIGHT_MATCH;
}
return BLOCK_MIGHT_MATCH;
}
@Override
public > Boolean visit(Operators.NotEq notEq) {
return BLOCK_MIGHT_MATCH;
}
@Override
public > Boolean visit(Operators.Lt lt) {
return BLOCK_MIGHT_MATCH;
}
@Override
public > Boolean visit(Operators.LtEq ltEq) {
return BLOCK_MIGHT_MATCH;
}
@Override
public > Boolean visit(Operators.Gt gt) {
return BLOCK_MIGHT_MATCH;
}
@Override
public > Boolean visit(Operators.GtEq gtEq) {
return BLOCK_MIGHT_MATCH;
}
@Override
public Boolean visit(Operators.And and) {
return and.getLeft().accept(this) || and.getRight().accept(this);
}
@Override
public Boolean visit(Operators.Or or) {
return or.getLeft().accept(this) && or.getRight().accept(this);
}
@Override
public Boolean visit(Operators.Not not) {
throw new IllegalArgumentException(
"This predicate contains a not! Did you forget to run this predicate through LogicalInverseRewriter? " + not);
}
private , U extends UserDefinedPredicate> Boolean visit(Operators.UserDefined ud, boolean inverted) {
return BLOCK_MIGHT_MATCH;
}
@Override
public , U extends UserDefinedPredicate> Boolean visit(Operators.UserDefined udp) {
return visit(udp, false);
}
@Override
public , U extends UserDefinedPredicate> Boolean visit(Operators.LogicalNotUserDefined udp) {
return visit(udp.getUserDefined(), true);
}
}