io.prestosql.parquet.reader.PageReader Maven / Gradle / Ivy
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.prestosql.parquet.reader;
import io.prestosql.parquet.DataPage;
import io.prestosql.parquet.DataPageV1;
import io.prestosql.parquet.DataPageV2;
import io.prestosql.parquet.DictionaryPage;
import org.apache.parquet.hadoop.metadata.CompressionCodecName;
import java.io.IOException;
import java.util.LinkedList;
import java.util.List;
import static io.prestosql.parquet.ParquetCompressionUtils.decompress;
import static java.lang.Math.toIntExact;
class PageReader
{
private final CompressionCodecName codec;
private final long valueCount;
private final List compressedPages;
private final DictionaryPage compressedDictionaryPage;
public PageReader(CompressionCodecName codec, List compressedPages, DictionaryPage compressedDictionaryPage)
{
this.codec = codec;
this.compressedPages = new LinkedList<>(compressedPages);
this.compressedDictionaryPage = compressedDictionaryPage;
int count = 0;
for (DataPage page : compressedPages) {
count += page.getValueCount();
}
this.valueCount = count;
}
public long getTotalValueCount()
{
return valueCount;
}
public DataPage readPage()
{
if (compressedPages.isEmpty()) {
return null;
}
DataPage compressedPage = compressedPages.remove(0);
try {
if (compressedPage instanceof DataPageV1) {
DataPageV1 dataPageV1 = (DataPageV1) compressedPage;
return new DataPageV1(
decompress(codec, dataPageV1.getSlice(), dataPageV1.getUncompressedSize()),
dataPageV1.getValueCount(),
dataPageV1.getUncompressedSize(),
dataPageV1.getRepetitionLevelEncoding(),
dataPageV1.getDefinitionLevelEncoding(),
dataPageV1.getValueEncoding());
}
else {
DataPageV2 dataPageV2 = (DataPageV2) compressedPage;
if (!dataPageV2.isCompressed()) {
return dataPageV2;
}
int uncompressedSize = toIntExact(dataPageV2.getUncompressedSize()
- dataPageV2.getDefinitionLevels().length()
- dataPageV2.getRepetitionLevels().length());
return new DataPageV2(
dataPageV2.getRowCount(),
dataPageV2.getNullCount(),
dataPageV2.getValueCount(),
dataPageV2.getRepetitionLevels(),
dataPageV2.getDefinitionLevels(),
dataPageV2.getDataEncoding(),
decompress(codec, dataPageV2.getSlice(), uncompressedSize),
dataPageV2.getUncompressedSize(),
dataPageV2.getStatistics(),
false);
}
}
catch (IOException e) {
throw new RuntimeException("Could not decompress page", e);
}
}
public DictionaryPage readDictionaryPage()
{
if (compressedDictionaryPage == null) {
return null;
}
try {
return new DictionaryPage(
decompress(codec, compressedDictionaryPage.getSlice(), compressedDictionaryPage.getUncompressedSize()),
compressedDictionaryPage.getDictionarySize(),
compressedDictionaryPage.getEncoding());
}
catch (IOException e) {
throw new RuntimeException("Error reading dictionary page", e);
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy