com.nvidia.spark.rapids.iceberg.parquet.ParquetUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of rapids-4-spark_2.13 Show documentation
Show all versions of rapids-4-spark_2.13 Show documentation
Creates the distribution package of the RAPIDS plugin for Apache Spark
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.nvidia.spark.rapids.iceberg.parquet;
import java.util.Set;
import org.apache.iceberg.relocated.com.google.common.collect.Sets;
import org.apache.parquet.column.Encoding;
import org.apache.parquet.column.EncodingStats;
import org.apache.parquet.hadoop.metadata.ColumnChunkMetaData;
/** Derived from Apache Iceberg's ParquetUtil class. */
public class ParquetUtil {
// not meant to be instantiated
private ParquetUtil() {
}
@SuppressWarnings("deprecation")
public static boolean hasNonDictionaryPages(ColumnChunkMetaData meta) {
EncodingStats stats = meta.getEncodingStats();
if (stats != null) {
return stats.hasNonDictionaryEncodedPages();
}
// without EncodingStats, fall back to testing the encoding list
Set encodings = Sets.newHashSet(meta.getEncodings());
if (encodings.remove(Encoding.PLAIN_DICTIONARY)) {
// if remove returned true, PLAIN_DICTIONARY was present, which means at
// least one page was dictionary encoded and 1.0 encodings are used
// RLE and BIT_PACKED are only used for repetition or definition levels
encodings.remove(Encoding.RLE);
encodings.remove(Encoding.BIT_PACKED);
// when empty, no encodings other than dictionary or rep/def levels
return !encodings.isEmpty();
} else {
// if PLAIN_DICTIONARY wasn't present, then either the column is not
// dictionary-encoded, or the 2.0 encoding, RLE_DICTIONARY, was used.
// for 2.0, this cannot determine whether a page fell back without
// page encoding stats
return true;
}
}
}