Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
jp.co.yahoo.yosegi.binary.maker.OptimizedNullArrayStringColumnBinaryMaker Maven / Gradle / Ivy
/**
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package jp.co.yahoo.yosegi.binary.maker;
import jp.co.yahoo.yosegi.binary.ColumnBinary;
import jp.co.yahoo.yosegi.binary.ColumnBinaryMakerConfig;
import jp.co.yahoo.yosegi.binary.ColumnBinaryMakerCustomConfigNode;
import jp.co.yahoo.yosegi.binary.CompressResultNode;
import jp.co.yahoo.yosegi.binary.maker.index.RangeStringIndex;
import jp.co.yahoo.yosegi.blockindex.BlockIndexNode;
import jp.co.yahoo.yosegi.blockindex.StringRangeBlockIndex;
import jp.co.yahoo.yosegi.compressor.CompressResult;
import jp.co.yahoo.yosegi.compressor.FindCompressor;
import jp.co.yahoo.yosegi.compressor.ICompressor;
import jp.co.yahoo.yosegi.inmemory.IDictionary;
import jp.co.yahoo.yosegi.inmemory.IMemoryAllocator;
import jp.co.yahoo.yosegi.message.objects.PrimitiveObject;
import jp.co.yahoo.yosegi.message.objects.StringObj;
import jp.co.yahoo.yosegi.message.objects.Utf8BytesLinkObj;
import jp.co.yahoo.yosegi.spread.analyzer.IColumnAnalizeResult;
import jp.co.yahoo.yosegi.spread.analyzer.StringColumnAnalizeResult;
import jp.co.yahoo.yosegi.spread.column.ColumnType;
import jp.co.yahoo.yosegi.spread.column.ICell;
import jp.co.yahoo.yosegi.spread.column.IColumn;
import jp.co.yahoo.yosegi.spread.column.PrimitiveCell;
import jp.co.yahoo.yosegi.spread.column.PrimitiveColumn;
import jp.co.yahoo.yosegi.util.DetermineMinMax;
import jp.co.yahoo.yosegi.util.DetermineMinMaxFactory;
import jp.co.yahoo.yosegi.util.io.IReadSupporter;
import jp.co.yahoo.yosegi.util.io.IWriteSupporter;
import jp.co.yahoo.yosegi.util.io.NumberToBinaryUtils;
import jp.co.yahoo.yosegi.util.io.nullencoder.NullBinaryEncoder;
import jp.co.yahoo.yosegi.util.io.unsafe.ByteBufferSupporterFactory;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
public class OptimizedNullArrayStringColumnBinaryMaker implements IColumnBinaryMaker {
// Metadata layout
// byteOrder, ColumnStart, rowCount, minLength , maxLength ,
// dicSize , nullLength, indexLength, lengthByteLength
private static final int META_LENGTH = Byte.BYTES + Integer.BYTES * 8;
@Override
public ColumnBinary toBinary(
final ColumnBinaryMakerConfig commonConfig ,
final ColumnBinaryMakerCustomConfigNode currentConfigNode ,
final CompressResultNode compressResultNode ,
final IColumn column ) throws IOException {
if ( column.size() == 0 ) {
return new UnsupportedColumnBinaryMaker()
.toBinary( commonConfig , currentConfigNode , compressResultNode , column );
}
ColumnBinaryMakerConfig currentConfig = commonConfig;
if ( currentConfigNode != null ) {
currentConfig = currentConfigNode.getCurrentConfig();
}
Map dicMap = new HashMap();
byte[][] dicArray = new byte[column.size()][];
int totalLength = 0;
int logicalDataLength = 0;
int[] indexArray = new int[column.size()];
boolean[] isNullArray = new boolean[column.size()];
DetermineMinMax detemineMinMax = DetermineMinMaxFactory.createString();
DetermineMinMax lengthMinMax = DetermineMinMaxFactory.createInt();
int rowCount = 0;
int nullCount = 0;
int nullMaxIndex = 0;
int notNullMaxIndex = 0;
int startIndex = 0;
for ( ; startIndex < column.size() ; startIndex++ ) {
ICell cell = column.get(startIndex);
if ( cell.getType() != ColumnType.NULL ) {
break;
}
}
for ( int i = startIndex,arrayIndex = 0 ; i < column.size() ; i++ ,arrayIndex++) {
ICell cell = column.get(i);
if ( cell.getType() == ColumnType.NULL ) {
nullCount++;
nullMaxIndex = arrayIndex;
isNullArray[arrayIndex] = true;
continue;
}
PrimitiveCell byteCell = (PrimitiveCell) cell;
String target = byteCell.getRow().getString();
if ( target == null ) {
nullCount++;
nullMaxIndex = arrayIndex;
isNullArray[arrayIndex] = true;
continue;
}
byte[] obj = byteCell.getRow().getBytes();
if ( ! dicMap.containsKey( target ) ) {
detemineMinMax.set( target );
lengthMinMax.set( obj.length );
int dicIndex = dicMap.size();
dicMap.put( target , dicIndex );
dicArray[dicIndex] = obj;
totalLength += obj.length;
}
logicalDataLength += Integer.BYTES + obj.length;
indexArray[rowCount] = dicMap.get( target );
notNullMaxIndex = arrayIndex;
rowCount++;
}
if ( nullCount == 0
&& detemineMinMax.getMin().equals( detemineMinMax.getMax() )
&& startIndex == 0 ) {
return ConstantColumnBinaryMaker.createColumnBinary(
new StringObj( detemineMinMax.getMin() ) , column.getColumnName() , column.size() );
}
NumberToBinaryUtils.IIntConverter indexConverter =
NumberToBinaryUtils.getIntConverter( 0 , dicMap.size() );
int indexLength = indexConverter.calcBinarySize( rowCount );
int lengthByteLength = 0;
NumberToBinaryUtils.IIntConverter lengthConverter =
NumberToBinaryUtils.getIntConverter(
lengthMinMax.getMin() , lengthMinMax.getMax() );
if ( ! lengthMinMax.getMin().equals( lengthMinMax.getMax() ) ) {
lengthByteLength = lengthConverter.calcBinarySize( dicMap.size() );
}
int dicLength = totalLength;
int nullLength = NullBinaryEncoder.getBinarySize(
nullCount , rowCount , nullMaxIndex , notNullMaxIndex );
byte[] binaryRaw =
new byte[ META_LENGTH + nullLength + indexLength + lengthByteLength + dicLength ];
ByteOrder order = ByteOrder.nativeOrder();
ByteBuffer wrapBuffer = ByteBuffer.wrap( binaryRaw );
wrapBuffer.put( order == ByteOrder.BIG_ENDIAN ? (byte)0 : (byte)1 );
wrapBuffer.putInt( startIndex );
wrapBuffer.putInt( rowCount );
wrapBuffer.putInt( lengthMinMax.getMin() );
wrapBuffer.putInt( lengthMinMax.getMax() );
wrapBuffer.putInt( dicMap.size() );
wrapBuffer.putInt( nullLength );
wrapBuffer.putInt( indexLength );
wrapBuffer.putInt( lengthByteLength );
NullBinaryEncoder.toBinary(
binaryRaw ,
META_LENGTH ,
nullLength ,
isNullArray ,
nullCount ,
rowCount ,
nullMaxIndex ,
notNullMaxIndex );
IWriteSupporter indexWriter = indexConverter.toWriteSuppoter(
rowCount , binaryRaw , META_LENGTH + nullLength , indexLength );
for ( int i = 0 ; i < rowCount ; i++ ) {
indexWriter.putInt( indexArray[i] );
}
if ( ! lengthMinMax.getMin().equals( lengthMinMax.getMax() ) ) {
IWriteSupporter lengthWriter = lengthConverter.toWriteSuppoter(
dicMap.size() , binaryRaw , META_LENGTH + nullLength + indexLength , lengthByteLength );
for ( int i = 0 ; i < dicMap.size(); i++ ) {
lengthWriter.putInt( dicArray[i].length );
}
}
ByteBuffer valueBuffer = ByteBuffer.wrap(
binaryRaw , META_LENGTH + nullLength + indexLength + lengthByteLength , totalLength );
for ( int i = 0 ; i < dicMap.size() ; i++ ) {
valueBuffer.put( dicArray[i] );
}
CompressResult compressResult = compressResultNode.getCompressResult(
this.getClass().getName() ,
"c0" ,
currentConfig.compressionPolicy ,
currentConfig.allowedRatio );
byte[] compressBinary = currentConfig.compressorClass.compress(
binaryRaw , 0 , binaryRaw.length , compressResult );
int minCharLength = Character.BYTES * detemineMinMax.getMin().length();
int maxCharLength = Character.BYTES * detemineMinMax.getMax().length();
int headerSize = Integer.BYTES + minCharLength + Integer.BYTES + maxCharLength;
byte[] binary = new byte[headerSize + compressBinary.length];
wrapBuffer = ByteBuffer.wrap( binary , 0 , binary.length );
ByteBuffer binaryWrapBuffer = ByteBuffer.wrap( binary );
binaryWrapBuffer.putInt( minCharLength );
binaryWrapBuffer.asCharBuffer().put( detemineMinMax.getMin() );
binaryWrapBuffer.position( binaryWrapBuffer.position() + minCharLength );
binaryWrapBuffer.putInt( maxCharLength );
binaryWrapBuffer.asCharBuffer().put( detemineMinMax.getMax() );
binaryWrapBuffer.position( binaryWrapBuffer.position() + maxCharLength );
binaryWrapBuffer.put( compressBinary );
return new ColumnBinary(
this.getClass().getName() ,
currentConfig.compressorClass.getClass().getName() ,
column.getColumnName() ,
column.getColumnType() ,
column.size() ,
binaryRaw.length ,
logicalDataLength ,
dicMap.size() ,
binary ,
0 ,
binary.length ,
null );
}
@Override
public int calcBinarySize( final IColumnAnalizeResult analizeResult ) {
StringColumnAnalizeResult stringAnalizeResult = (StringColumnAnalizeResult)analizeResult;
int startIndex = analizeResult.getRowStart();
int maxIndex = analizeResult.getRowEnd();
int nullCount = analizeResult.getNullCount() - startIndex;
int notNullCount = analizeResult.getRowCount();
int nullLength =
NullBinaryEncoder.getBinarySize( nullCount , notNullCount , maxIndex , maxIndex );
NumberToBinaryUtils.IIntConverter indexConverter =
NumberToBinaryUtils.getIntConverter( 0 , analizeResult.getUniqCount() );
int indexLength = indexConverter.calcBinarySize( notNullCount );
int minLength = stringAnalizeResult.getMinUtf8Bytes();
int maxLength = stringAnalizeResult.getMaxUtf8Bytes();
int lengthBinaryLength = 0;
NumberToBinaryUtils.IIntConverter lengthConverter =
NumberToBinaryUtils.getIntConverter( minLength , maxLength );
if ( ! ( minLength == maxLength ) ) {
lengthBinaryLength = lengthConverter.calcBinarySize( notNullCount );
}
int dicLength = stringAnalizeResult.getUniqUtf8ByteSize();
return META_LENGTH + nullLength + indexLength + lengthBinaryLength + dicLength;
}
@Override
public IColumn toColumn( final ColumnBinary columnBinary ) throws IOException {
ByteBuffer wrapBuffer = ByteBuffer.wrap(
columnBinary.binary ,
columnBinary.binaryStart ,
columnBinary.binaryLength );
int minLength = wrapBuffer.getInt();
char[] minCharArray = new char[minLength / Character.BYTES];
wrapBuffer.asCharBuffer().get( minCharArray );
wrapBuffer.position( wrapBuffer.position() + minLength );
int maxLength = wrapBuffer.getInt();
char[] maxCharArray = new char[maxLength / Character.BYTES];
wrapBuffer.asCharBuffer().get( maxCharArray );
wrapBuffer.position( wrapBuffer.position() + maxLength );
String min = new String( minCharArray );
String max = new String( maxCharArray );
int headerSize = Integer.BYTES + minLength + Integer.BYTES + maxLength;
return new HeaderIndexLazyColumn(
columnBinary.columnName ,
columnBinary.columnType ,
new ColumnManager(
columnBinary ,
columnBinary.binaryStart + headerSize ,
columnBinary.binaryLength - headerSize ) ,
new RangeStringIndex( min , max )
);
}
@Override
public void loadInMemoryStorage(
final ColumnBinary columnBinary ,
final IMemoryAllocator allocator ) throws IOException {
ByteBuffer rawBuffer = ByteBuffer.wrap(
columnBinary.binary , columnBinary.binaryStart , columnBinary.binaryLength );
int minBinaryLength = rawBuffer.getInt();
rawBuffer.position( rawBuffer.position() + minBinaryLength );
int maxBinaryLength = rawBuffer.getInt();
rawBuffer.position( rawBuffer.position() + maxBinaryLength );
int headerSize = Integer.BYTES + minBinaryLength + Integer.BYTES + maxBinaryLength;
ICompressor compressor = FindCompressor.get( columnBinary.compressorClassName );
byte[] binary = compressor.decompress(
columnBinary.binary ,
columnBinary.binaryStart + headerSize ,
columnBinary.binaryLength - headerSize );
ByteBuffer wrapBuffer = ByteBuffer.wrap( binary , 0 , binary.length );
ByteOrder order = wrapBuffer.get() == (byte)0
? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN;
int startIndex = wrapBuffer.getInt();
final int rowCount = wrapBuffer.getInt();
int minLength = wrapBuffer.getInt();
int maxLength = wrapBuffer.getInt();
int dicSize = wrapBuffer.getInt();
int nullLength = wrapBuffer.getInt();
int indexLength = wrapBuffer.getInt();
int lengthBinaryLength = wrapBuffer.getInt();
int dicLength = binary.length - META_LENGTH - nullLength - indexLength - lengthBinaryLength;
boolean[] isNullArray =
NullBinaryEncoder.toIsNullArray( binary , META_LENGTH , nullLength );
IReadSupporter lengthReader;
if ( minLength == maxLength ) {
lengthReader = NumberToBinaryUtils.getFixedIntConverter( minLength );
} else {
NumberToBinaryUtils.IIntConverter lengthConverter =
NumberToBinaryUtils.getIntConverter( minLength , maxLength );
lengthReader = lengthConverter.toReadSupporter(
binary ,
META_LENGTH + nullLength + indexLength ,
lengthBinaryLength );
}
IDictionary dic = allocator.createDictionary( dicSize );
int currentStart = META_LENGTH + nullLength + indexLength + lengthBinaryLength;
for ( int i = 0 ; i < dicSize ; i++ ) {
int currentLength = lengthReader.getInt();
dic.setBytes( i , binary , currentStart , currentLength );
currentStart += currentLength;
}
allocator.setValueCount( startIndex + isNullArray.length );
NumberToBinaryUtils.IIntConverter indexConverter =
NumberToBinaryUtils.getIntConverter( 0 , dicSize );
IReadSupporter indexReader =
indexConverter.toReadSupporter( binary , META_LENGTH + nullLength , indexLength );
for ( int i = 0; i < startIndex ; i++ ) {
allocator.setNull( i );
}
for ( int i = 0 ; i < isNullArray.length ; i++ ) {
if ( isNullArray[i] ) {
allocator.setNull( i + startIndex );
} else {
allocator.setFromDictionary( i + startIndex , indexReader.getInt() , dic );
}
}
}
@Override
public void setBlockIndexNode(
final BlockIndexNode parentNode ,
final ColumnBinary columnBinary ,
final int spreadIndex ) throws IOException {
ByteBuffer wrapBuffer = ByteBuffer.wrap(
columnBinary.binary , columnBinary.binaryStart , columnBinary.binaryLength );
int minLength = wrapBuffer.getInt();
char[] minCharArray = new char[minLength / Character.BYTES];
wrapBuffer.asCharBuffer().get( minCharArray );
wrapBuffer.position( wrapBuffer.position() + minLength );
int maxLength = wrapBuffer.getInt();
char[] maxCharArray = new char[maxLength / Character.BYTES];
wrapBuffer.asCharBuffer().get( maxCharArray );
wrapBuffer.position( wrapBuffer.position() + maxLength );
String min = new String( minCharArray );
String max = new String( maxCharArray );
BlockIndexNode currentNode = parentNode.getChildNode( columnBinary.columnName );
currentNode.setBlockIndex( new StringRangeBlockIndex( min , max ) );
}
public class ColumnManager implements IColumnManager {
private final ColumnBinary columnBinary;
private final int binaryStart;
private final int binaryLength;
private PrimitiveColumn column;
private boolean isCreate;
/**
* Init.
**/
public ColumnManager(
final ColumnBinary columnBinary ,
final int binaryStart ,
final int binaryLength ) {
this.columnBinary = columnBinary;
this.binaryStart = binaryStart;
this.binaryLength = binaryLength;
}
private void create() throws IOException {
if ( isCreate ) {
return;
}
ICompressor compressor = FindCompressor.get( columnBinary.compressorClassName );
byte[] binary = compressor.decompress( columnBinary.binary , binaryStart , binaryLength );
ByteBuffer wrapBuffer = ByteBuffer.wrap( binary , 0 , binary.length );
ByteOrder order = wrapBuffer.get() == (byte)0
? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN;
final int startIndex = wrapBuffer.getInt();
final int rowCount = wrapBuffer.getInt();
int minLength = wrapBuffer.getInt();
int maxLength = wrapBuffer.getInt();
int dicSize = wrapBuffer.getInt();
int nullLength = wrapBuffer.getInt();
int indexLength = wrapBuffer.getInt();
int lengthByteLength = wrapBuffer.getInt();
int dicLength = binary.length - META_LENGTH - nullLength - indexLength - lengthByteLength;
NumberToBinaryUtils.IIntConverter indexConverter =
NumberToBinaryUtils.getIntConverter( 0 , dicSize );
boolean[] isNullArray =
NullBinaryEncoder.toIsNullArray( binary , META_LENGTH , nullLength );
IReadSupporter indexReader =
indexConverter.toReadSupporter( binary , META_LENGTH + nullLength , indexLength );
int[] indexArray = new int[isNullArray.length];
for ( int i = 0 ; i < indexArray.length ; i++ ) {
if ( ! isNullArray[i] ) {
indexArray[i] = indexReader.getInt();
}
}
IReadSupporter lengthReader;
if ( minLength == maxLength ) {
lengthReader = NumberToBinaryUtils.getFixedIntConverter( minLength );
} else {
NumberToBinaryUtils.IIntConverter lengthConverter =
NumberToBinaryUtils.getIntConverter( minLength , maxLength );
lengthReader = lengthConverter.toReadSupporter(
binary ,
META_LENGTH + nullLength + indexLength ,
lengthByteLength );
}
PrimitiveObject[] dicArray = new Utf8BytesLinkObj[ dicSize ];
int currentStart = META_LENGTH + nullLength + indexLength + lengthByteLength;
for ( int i = 0 ; i < dicArray.length ; i++ ) {
int currentLength = lengthReader.getInt();
dicArray[i] = new Utf8BytesLinkObj( binary , currentStart , currentLength );
currentStart += currentLength;
}
column = new PrimitiveColumn( columnBinary.columnType , columnBinary.columnName );
column.setCellManager( new OptimizedNullArrayDicCellManager(
columnBinary.columnType , startIndex , isNullArray , indexArray , dicArray ) );
isCreate = true;
}
@Override
public IColumn get() {
if ( ! isCreate ) {
try {
create();
} catch ( IOException ex ) {
throw new UncheckedIOException( ex );
}
}
return column;
}
@Override
public List getColumnKeys() {
return new ArrayList();
}
@Override
public int getColumnSize() {
return 0;
}
}
}