All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.iotdb.tsfile.encoding.encoder.RleEncoder Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.iotdb.tsfile.encoding.encoder;

import org.apache.iotdb.tsfile.common.conf.TSFileConfig;
import org.apache.iotdb.tsfile.common.conf.TSFileDescriptor;
import org.apache.iotdb.tsfile.exception.encoding.TsFileEncodingException;
import org.apache.iotdb.tsfile.file.metadata.enums.TSEncoding;
import org.apache.iotdb.tsfile.utils.Binary;
import org.apache.iotdb.tsfile.utils.ReadWriteForEncodingUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.math.BigDecimal;
import java.util.ArrayList;
import java.util.List;

/**
 * Encodes values using a combination of run length encoding and bit packing, according to the
 * following grammar:
 *
 * 
{@code
 * rle-bit-packing-hybrid:   
 * length := length of the   in bytes stored as 4 bytes little endian
 * bitwidth := bitwidth for all encoded data in 
 * encoded-data := *
 * run :=  | 
 * bit-packed-run :=   
 * bit-packed-header := varint-encode( << 1 | 1)
 * lastBitPackedNum := the number of useful value in last bit-pack may be less than 8, so
 * lastBitPackedNum indicates how many values are useful
 * bit-packed-values :=  bit packed
 * rle-run :=  
 * rle-header := varint-encode( (number of times repeated) << 1)
 * repeated-value := value that is repeated, using a fixed-width of round-up-to-next-byte(bit-width)
 * }
* * @param data type T for RLE */ public abstract class RleEncoder> extends Encoder { private static final Logger logger = LoggerFactory.getLogger(RleEncoder.class); /** we save all value in a list and calculate its bitwidth. */ protected List values; /** the bit width used for bit-packing and rle. */ protected int bitWidth; /** for a given value now buffered, how many times it occurs. */ protected int repeatCount; /** the number of group which using bit packing, it is saved in header. */ protected int bitPackedGroupCount; /** the number of buffered value in array. */ protected int numBufferedValues; /** * we will write all bytes using bit-packing to OutputStream once. Before that, all bytes are * saved in list. */ protected List bytesBuffer; /** flag which indicate encoding mode false -- rle true -- bit-packing. */ protected boolean isBitPackRun; /** previous value written, used to detect repeated values. */ protected T preValue; /** array to buffer values temporarily. */ protected T[] bufferedValues; protected boolean isBitWidthSaved; /** output stream to buffer {@code }. */ protected ByteArrayOutputStream byteCache; protected TSFileConfig config = TSFileDescriptor.getInstance().getConfig(); /** constructor. */ protected RleEncoder() { super(TSEncoding.RLE); bytesBuffer = new ArrayList<>(); isBitPackRun = false; isBitWidthSaved = false; byteCache = new ByteArrayOutputStream(); } protected void reset() { numBufferedValues = 0; repeatCount = 0; bitPackedGroupCount = 0; bytesBuffer.clear(); isBitPackRun = false; isBitWidthSaved = false; byteCache.reset(); values.clear(); } /** * Write all values buffered in cache to OutputStream. * * @param out - byteArrayOutputStream * @throws IOException cannot flush to OutputStream */ @Override public void flush(ByteArrayOutputStream out) throws IOException { int lastBitPackedNum = numBufferedValues; if (repeatCount >= TSFileConfig.RLE_MIN_REPEATED_NUM) { try { writeRleRun(); } catch (IOException e) { logger.error( "tsfile-encoding RleEncoder : error occurs when writing nums to OutputStram " + "when flushing left nums. " + "numBufferedValues {}, repeatCount {}, bitPackedGroupCount{}, " + "isBitPackRun {}, isBitWidthSaved {}", numBufferedValues, repeatCount, bitPackedGroupCount, isBitPackRun, isBitWidthSaved, e); throw e; } } else if (numBufferedValues > 0) { clearBuffer(); writeOrAppendBitPackedRun(); endPreviousBitPackedRun(lastBitPackedNum); } else { endPreviousBitPackedRun(TSFileConfig.RLE_MIN_REPEATED_NUM); } // write length ReadWriteForEncodingUtils.writeUnsignedVarInt(byteCache.size(), out); byteCache.writeTo(out); reset(); } /** * Write bytes to OutputStream using rle. rle format: {@code [header][value] header: (repeated * value) << 1} * * @throws IOException cannot write RLE run */ protected abstract void writeRleRun() throws IOException; /** Start a bit-packing run transform values to bytes and buffer them in cache. */ public void writeOrAppendBitPackedRun() { if (bitPackedGroupCount >= TSFileConfig.RLE_MAX_BIT_PACKED_NUM) { // we've packed as many values as we can for this run, // end it and start a new one endPreviousBitPackedRun(TSFileConfig.RLE_MIN_REPEATED_NUM); } if (!isBitPackRun) { isBitPackRun = true; } convertBuffer(); numBufferedValues = 0; repeatCount = 0; ++bitPackedGroupCount; } /** * End a bit-packing run write all bit-packing group to OutputStream bit-packing format: {@code * [header][lastBitPackedNum][bit-packing group]+ [bit-packing group]+ are saved in List * bytesBuffer }. * * @param lastBitPackedNum - in last bit-packing group, it may have useful values less than 8. * This param indicates how many values are useful */ protected void endPreviousBitPackedRun(int lastBitPackedNum) { if (!isBitPackRun) { return; } byte bitPackHeader = (byte) ((bitPackedGroupCount << 1) | 1); byteCache.write(bitPackHeader); byteCache.write(lastBitPackedNum); for (byte[] bytes : bytesBuffer) { byteCache.write(bytes, 0, bytes.length); } bytesBuffer.clear(); isBitPackRun = false; bitPackedGroupCount = 0; } /** * Encode T value using rle or bit-packing. It may not write to OutputStream immediately * * @param value - value to encode */ @SuppressWarnings("squid:S3776") // Suppress high Cognitive Complexity warning protected void encodeValue(T value) { if (!isBitWidthSaved) { // save bit width in header, // perpare for read byteCache.write(bitWidth); isBitWidthSaved = true; } if (value.equals(preValue)) { repeatCount++; if (repeatCount >= TSFileConfig.RLE_MIN_REPEATED_NUM && repeatCount <= TSFileConfig.RLE_MAX_REPEATED_NUM) { // value occurs more than RLE_MIN_REPEATED_NUM times but less than // EncodingConfig.RLE_MAX_REPEATED_NUM // we'll use rle, so just keep on counting repeats for now // we'll write current value to OutputStream when we encounter a different value return; } else if (repeatCount == TSFileConfig.RLE_MAX_REPEATED_NUM + 1) { // value occurs more than EncodingConfig.RLE_MAX_REPEATED_NUM // we'll write current rle run to stream and keep on counting current value repeatCount = TSFileConfig.RLE_MAX_REPEATED_NUM; try { writeRleRun(); logger.debug("tsfile-encoding RleEncoder : write full rle run to stream"); } catch (IOException e) { logger.error( " error occurs when writing full rle run to OutputStram when repeatCount = {}." + "numBufferedValues {}, repeatCount {}, bitPackedGroupCount{}, " + "isBitPackRun {}, isBitWidthSaved {}", TSFileConfig.RLE_MAX_REPEATED_NUM + 1, numBufferedValues, repeatCount, bitPackedGroupCount, isBitPackRun, isBitWidthSaved, e); } repeatCount = 1; preValue = value; } } else { // we encounter a differnt value if (repeatCount >= TSFileConfig.RLE_MIN_REPEATED_NUM) { try { writeRleRun(); } catch (IOException e) { logger.error( "tsfile-encoding RleEncoder : error occurs when writing num to OutputStram " + "when repeatCount > {}." + "numBufferedValues {}, repeatCount {}, bitPackedGroupCount{}, isBitPackRun {}, " + "isBitWidthSaved {}", TSFileConfig.RLE_MIN_REPEATED_NUM, numBufferedValues, repeatCount, bitPackedGroupCount, isBitPackRun, isBitWidthSaved, e); } } repeatCount = 1; preValue = value; } bufferedValues[numBufferedValues] = value; numBufferedValues++; // if none of value we encountered occurs more MAX_REPEATED_NUM times // we'll use bit-packing if (numBufferedValues == TSFileConfig.RLE_MIN_REPEATED_NUM) { writeOrAppendBitPackedRun(); } } /** clean all useless value in bufferedValues and set 0. */ protected abstract void clearBuffer(); protected abstract void convertBuffer(); @Override public void encode(boolean value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(short value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(int value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(long value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(float value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(double value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(Binary value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } @Override public void encode(BigDecimal value, ByteArrayOutputStream out) { throw new TsFileEncodingException(getClass().getName()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy