parquet.column.values.fallback.FallbackValuesWriter Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package parquet.column.values.fallback;
import parquet.bytes.BytesInput;
import parquet.column.Encoding;
import parquet.column.page.DictionaryPage;
import parquet.column.values.RequiresFallback;
import parquet.column.values.ValuesWriter;
import parquet.io.api.Binary;
public class FallbackValuesWriter extends ValuesWriter {
public static FallbackValuesWriter of(I initialWriter, F fallBackWriter) {
return new FallbackValuesWriter(initialWriter, fallBackWriter);
}
/** writer to start with */
public final I initialWriter;
/** fallback */
public final F fallBackWriter;
private boolean fellBackAlready = false;
/** writer currently written to */
private ValuesWriter currentWriter;
private boolean initialUsedAndHadDictionary = false;
/* size of raw data, even if dictionary is used, it will not have effect on raw data size, it is used to decide
* if fall back to plain encoding is better by comparing rawDataByteSize with Encoded data size
* It's also used in getBufferedSize, so the page will be written based on raw data size
*/
private long rawDataByteSize = 0;
/** indicates if this is the first page being processed */
private boolean firstPage = true;
public FallbackValuesWriter(I initialWriter, F fallBackWriter) {
super();
this.initialWriter = initialWriter;
this.fallBackWriter = fallBackWriter;
this.currentWriter = initialWriter;
}
@Override
public long getBufferedSize() {
// use raw data size to decide if we want to flush the page
// so the actual size of the page written could be much more smaller
// due to dictionary encoding. This prevents page being too big when fallback happens.
return rawDataByteSize;
}
@Override
public BytesInput getBytes() {
if (!fellBackAlready && firstPage) {
// we use the first page to decide if we're going to use this encoding
BytesInput bytes = initialWriter.getBytes();
if (!initialWriter.isCompressionSatisfying(rawDataByteSize, bytes.size())) {
fallBack();
} else {
return bytes;
}
}
return currentWriter.getBytes();
}
@Override
public Encoding getEncoding() {
Encoding encoding = currentWriter.getEncoding();
if (!fellBackAlready && !initialUsedAndHadDictionary) {
initialUsedAndHadDictionary = encoding.usesDictionary();
}
return encoding;
}
@Override
public void reset() {
rawDataByteSize = 0;
firstPage = false;
currentWriter.reset();
}
public DictionaryPage createDictionaryPage() {
if (initialUsedAndHadDictionary) {
return initialWriter.createDictionaryPage();
} else {
return currentWriter.createDictionaryPage();
}
}
public void resetDictionary() {
if (initialUsedAndHadDictionary) {
initialWriter.resetDictionary();
} else {
currentWriter.resetDictionary();
}
currentWriter = initialWriter;
fellBackAlready = false;
initialUsedAndHadDictionary = false;
firstPage = true;
}
@Override
public long getAllocatedSize() {
return currentWriter.getAllocatedSize();
}
@Override
public String memUsageString(String prefix) {
return String.format(
"%s FallbackValuesWriter{\n"
+ "%s\n"
+ "%s\n"
+ "%s}\n",
prefix,
initialWriter.memUsageString(prefix + " initial:"),
fallBackWriter.memUsageString(prefix + " fallback:"),
prefix
);
}
private void checkFallback() {
if (!fellBackAlready && initialWriter.shouldFallBack()) {
fallBack();
}
}
private void fallBack() {
fellBackAlready = true;
initialWriter.fallBackAllValuesTo(fallBackWriter);
currentWriter = fallBackWriter;
}
// passthrough writing the value
public void writeByte(int value) {
rawDataByteSize += 1;
currentWriter.writeByte(value);
checkFallback();
}
public void writeBytes(Binary v) {
//for rawdata, length(4 bytes int) is stored, followed by the binary content itself
rawDataByteSize += v.length() + 4;
currentWriter.writeBytes(v);
checkFallback();
}
public void writeInteger(int v) {
rawDataByteSize += 4;
currentWriter.writeInteger(v);
checkFallback();
}
public void writeLong(long v) {
rawDataByteSize += 8;
currentWriter.writeLong(v);
checkFallback();
}
public void writeFloat(float v) {
rawDataByteSize += 4;
currentWriter.writeFloat(v);
checkFallback();
}
public void writeDouble(double v) {
rawDataByteSize += 8;
currentWriter.writeDouble(v);
checkFallback();
}
}