Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.facebook.presto.orc.writer;
import com.facebook.presto.common.block.AbstractVariableWidthBlockBuilder;
import com.facebook.presto.common.block.Block;
import com.facebook.presto.common.block.BlockBuilder;
import com.facebook.presto.common.block.BlockBuilderStatus;
import com.google.common.annotations.VisibleForTesting;
import io.airlift.slice.DynamicSliceOutput;
import io.airlift.slice.Slice;
import io.airlift.slice.SliceInput;
import io.airlift.slice.XxHash64;
import org.openjdk.jol.info.ClassLayout;
import java.util.Arrays;
import java.util.OptionalInt;
import java.util.function.ObjLongConsumer;
import static com.facebook.presto.orc.writer.SegmentedSliceBlockBuilder.Segments.INITIAL_SEGMENTS;
import static com.facebook.presto.orc.writer.SegmentedSliceBlockBuilder.Segments.SEGMENT_SIZE;
import static com.facebook.presto.orc.writer.SegmentedSliceBlockBuilder.Segments.offset;
import static com.facebook.presto.orc.writer.SegmentedSliceBlockBuilder.Segments.segment;
import static io.airlift.slice.SizeOf.sizeOf;
import static java.lang.String.format;
/**
* Custom Block Builder implementation for use with SliceDictionaryBuilder.
* Instead of using one large contiguous Slice for storing the unique Strings
* in String dictionary, this class uses Segmented Slices. The main advantage
* of this class over VariableWidthBlockBuilder is memory. Non contiguous
* memory is more likely to be available and hence reduce the chance of OOMs.
*
* Why implement a block builder ?
* SliceDictionaryBuilder takes in a Block and Position to write.
* 1. It can create a slice for the position and write it. This does not
* require a block builder. But temporary slice, produces lot of
* short lived garbage.
* 2. A block and position can be copied to BlockBuilder using the method
* Block.writeBytesTo . But this requires implementing the BlockBuilder interface.
* Most of the methods are going to be unused and left as Unsupported.
*
* What's the difference between this class and VariableWidthBlockBuilder?
* This class is different from VariableWidthBlockBuilder in the following ways
* 1. It does not support nulls. (So null byte array and management is avoided).
* 2. Instead of using one contiguous chunk for storing all the entries,
* they are segmented.
*
* How is it implemented ?
* The Strings from 0 to SEGMENT_SIZE-1 are stored in the first segment.
* The string from SEGMENT_SIZE to 2 * SEGMENT_SIZE -1 goes to the second.
* Each segment has Slice(data is concatenated and stored in one slice)
* and offsets to capture the start offset and length. New slices are appended
* to the open segment. Once the segment is full the segment is
* finalized and appended to the closed segments. A new open segment is
* created for further appends.
*/
public class SegmentedSliceBlockBuilder
extends AbstractVariableWidthBlockBuilder
{
private static final int INSTANCE_SIZE = ClassLayout.parseClass(SegmentedSliceBlockBuilder.class).instanceSize();
private final int expectedBytes;
private DynamicSliceOutput openSliceOutput;
private int[][] offsets;
private Slice[] closedSlices;
private long closedSlicesRetainedSize;
private long closedSlicesSizeInBytes;
private int openSegmentIndex;
private int openSegmentOffset;
public SegmentedSliceBlockBuilder(int expectedEntries, int expectedBytes)
{
this.expectedBytes = expectedBytes;
openSliceOutput = new DynamicSliceOutput(expectedBytes);
int initialSize = Math.max(INITIAL_SEGMENTS, segment(expectedEntries) + 1);
offsets = new int[initialSize][];
closedSlices = new Slice[initialSize];
offsets[0] = new int[SEGMENT_SIZE + 1];
}
public void reset()
{
// DynamicSliceOutput.reset() does not shrink memory, when dictionary is converted
// to direct, DynamicSliceOutput needs to give up memory to reduce the memory pressure.
openSliceOutput = new DynamicSliceOutput(expectedBytes);
Arrays.fill(closedSlices, null);
closedSlicesRetainedSize = 0;
closedSlicesSizeInBytes = 0;
// Fill the first offset array with 0, and free up the rest of the offsets array.
Arrays.fill(offsets[0], 0);
Arrays.fill(offsets, 1, offsets.length, null);
openSegmentIndex = 0;
openSegmentOffset = 0;
}
@Override
public int getPositionOffset(int position)
{
return getOffset(position);
}
@Override
public int getSliceLength(int position)
{
int offset = offset(position);
int segment = segment(position);
return offsets[segment][offset + 1] - offsets[segment][offset];
}
private Slice getSegmentRawSlice(int segment)
{
return segment == openSegmentIndex ? openSliceOutput.getUnderlyingSlice() : closedSlices[segment];
}
@Override
public Slice getRawSlice(int position)
{
return getSegmentRawSlice(segment(position));
}
@Override
public int getPositionCount()
{
return Segments.getPositions(openSegmentIndex, openSegmentOffset);
}
@Override
public long getSizeInBytes()
{
long offsetsSizeInBytes = Integer.BYTES * (long) getPositionCount();
return openSliceOutput.size() + offsetsSizeInBytes + closedSlicesSizeInBytes;
}
@Override
public OptionalInt fixedSizeInBytesPerPosition()
{
return OptionalInt.empty(); // size is variable based on the per element length
}
@Override
public long getRegionSizeInBytes(int position, int length)
{
throw new UnsupportedOperationException("getRegionSizeInBytes is not supported by SegmentedSliceBlockBuilder");
}
@Override
public long getPositionsSizeInBytes(boolean[] positions, int usedPositionCount)
{
throw new UnsupportedOperationException("getPositionsSizeInBytes is not supported by SegmentedSliceBlockBuilder");
}
@Override
public long getRetainedSizeInBytes()
{
long offsetsSize = sizeOf(offsets) + (openSegmentIndex + 1) * sizeOf(offsets[0]);
long closedSlicesSize = sizeOf(closedSlices) + closedSlicesRetainedSize;
return INSTANCE_SIZE + openSliceOutput.getRetainedSize() + offsetsSize + closedSlicesSize;
}
@Override
public void retainedBytesForEachPart(ObjLongConsumer