All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jp.co.yahoo.yosegi.binary.maker.OptimizedNullArrayStringColumnBinaryMaker Maven / Gradle / Ivy

/**
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 * 

* http://www.apache.org/licenses/LICENSE-2.0 *

* Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package jp.co.yahoo.yosegi.binary.maker; import jp.co.yahoo.yosegi.binary.ColumnBinary; import jp.co.yahoo.yosegi.binary.ColumnBinaryMakerConfig; import jp.co.yahoo.yosegi.binary.ColumnBinaryMakerCustomConfigNode; import jp.co.yahoo.yosegi.binary.CompressResultNode; import jp.co.yahoo.yosegi.binary.maker.index.RangeStringIndex; import jp.co.yahoo.yosegi.blockindex.BlockIndexNode; import jp.co.yahoo.yosegi.blockindex.StringRangeBlockIndex; import jp.co.yahoo.yosegi.compressor.CompressResult; import jp.co.yahoo.yosegi.compressor.FindCompressor; import jp.co.yahoo.yosegi.compressor.ICompressor; import jp.co.yahoo.yosegi.inmemory.IDictionary; import jp.co.yahoo.yosegi.inmemory.IMemoryAllocator; import jp.co.yahoo.yosegi.message.objects.PrimitiveObject; import jp.co.yahoo.yosegi.message.objects.StringObj; import jp.co.yahoo.yosegi.message.objects.Utf8BytesLinkObj; import jp.co.yahoo.yosegi.spread.analyzer.IColumnAnalizeResult; import jp.co.yahoo.yosegi.spread.analyzer.StringColumnAnalizeResult; import jp.co.yahoo.yosegi.spread.column.ColumnType; import jp.co.yahoo.yosegi.spread.column.ICell; import jp.co.yahoo.yosegi.spread.column.IColumn; import jp.co.yahoo.yosegi.spread.column.PrimitiveCell; import jp.co.yahoo.yosegi.spread.column.PrimitiveColumn; import jp.co.yahoo.yosegi.util.DetermineMinMax; import jp.co.yahoo.yosegi.util.DetermineMinMaxFactory; import jp.co.yahoo.yosegi.util.io.IReadSupporter; import jp.co.yahoo.yosegi.util.io.IWriteSupporter; import jp.co.yahoo.yosegi.util.io.NumberToBinaryUtils; import jp.co.yahoo.yosegi.util.io.nullencoder.NullBinaryEncoder; import jp.co.yahoo.yosegi.util.io.unsafe.ByteBufferSupporterFactory; import java.io.IOException; import java.io.UncheckedIOException; import java.nio.ByteBuffer; import java.nio.ByteOrder; import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; public class OptimizedNullArrayStringColumnBinaryMaker implements IColumnBinaryMaker { // Metadata layout // byteOrder, ColumnStart, rowCount, minLength , maxLength , // dicSize , nullLength, indexLength, lengthByteLength private static final int META_LENGTH = Byte.BYTES + Integer.BYTES * 8; @Override public ColumnBinary toBinary( final ColumnBinaryMakerConfig commonConfig , final ColumnBinaryMakerCustomConfigNode currentConfigNode , final CompressResultNode compressResultNode , final IColumn column ) throws IOException { if ( column.size() == 0 ) { return new UnsupportedColumnBinaryMaker() .toBinary( commonConfig , currentConfigNode , compressResultNode , column ); } ColumnBinaryMakerConfig currentConfig = commonConfig; if ( currentConfigNode != null ) { currentConfig = currentConfigNode.getCurrentConfig(); } Map dicMap = new HashMap(); byte[][] dicArray = new byte[column.size()][]; int totalLength = 0; int logicalDataLength = 0; int[] indexArray = new int[column.size()]; boolean[] isNullArray = new boolean[column.size()]; DetermineMinMax detemineMinMax = DetermineMinMaxFactory.createString(); DetermineMinMax lengthMinMax = DetermineMinMaxFactory.createInt(); int rowCount = 0; int nullCount = 0; int nullMaxIndex = 0; int notNullMaxIndex = 0; int startIndex = 0; for ( ; startIndex < column.size() ; startIndex++ ) { ICell cell = column.get(startIndex); if ( cell.getType() != ColumnType.NULL ) { break; } } for ( int i = startIndex,arrayIndex = 0 ; i < column.size() ; i++ ,arrayIndex++) { ICell cell = column.get(i); if ( cell.getType() == ColumnType.NULL ) { nullCount++; nullMaxIndex = arrayIndex; isNullArray[arrayIndex] = true; continue; } PrimitiveCell byteCell = (PrimitiveCell) cell; String target = byteCell.getRow().getString(); if ( target == null ) { nullCount++; nullMaxIndex = arrayIndex; isNullArray[arrayIndex] = true; continue; } byte[] obj = byteCell.getRow().getBytes(); if ( ! dicMap.containsKey( target ) ) { detemineMinMax.set( target ); lengthMinMax.set( obj.length ); int dicIndex = dicMap.size(); dicMap.put( target , dicIndex ); dicArray[dicIndex] = obj; totalLength += obj.length; } logicalDataLength += Integer.BYTES + obj.length; indexArray[rowCount] = dicMap.get( target ); notNullMaxIndex = arrayIndex; rowCount++; } if ( nullCount == 0 && detemineMinMax.getMin().equals( detemineMinMax.getMax() ) && startIndex == 0 ) { return ConstantColumnBinaryMaker.createColumnBinary( new StringObj( detemineMinMax.getMin() ) , column.getColumnName() , column.size() ); } NumberToBinaryUtils.IIntConverter indexConverter = NumberToBinaryUtils.getIntConverter( 0 , dicMap.size() ); int indexLength = indexConverter.calcBinarySize( rowCount ); int lengthByteLength = 0; NumberToBinaryUtils.IIntConverter lengthConverter = NumberToBinaryUtils.getIntConverter( lengthMinMax.getMin() , lengthMinMax.getMax() ); if ( ! lengthMinMax.getMin().equals( lengthMinMax.getMax() ) ) { lengthByteLength = lengthConverter.calcBinarySize( dicMap.size() ); } int dicLength = totalLength; int nullLength = NullBinaryEncoder.getBinarySize( nullCount , rowCount , nullMaxIndex , notNullMaxIndex ); byte[] binaryRaw = new byte[ META_LENGTH + nullLength + indexLength + lengthByteLength + dicLength ]; ByteOrder order = ByteOrder.nativeOrder(); ByteBuffer wrapBuffer = ByteBuffer.wrap( binaryRaw ); wrapBuffer.put( order == ByteOrder.BIG_ENDIAN ? (byte)0 : (byte)1 ); wrapBuffer.putInt( startIndex ); wrapBuffer.putInt( rowCount ); wrapBuffer.putInt( lengthMinMax.getMin() ); wrapBuffer.putInt( lengthMinMax.getMax() ); wrapBuffer.putInt( dicMap.size() ); wrapBuffer.putInt( nullLength ); wrapBuffer.putInt( indexLength ); wrapBuffer.putInt( lengthByteLength ); NullBinaryEncoder.toBinary( binaryRaw , META_LENGTH , nullLength , isNullArray , nullCount , rowCount , nullMaxIndex , notNullMaxIndex ); IWriteSupporter indexWriter = indexConverter.toWriteSuppoter( rowCount , binaryRaw , META_LENGTH + nullLength , indexLength ); for ( int i = 0 ; i < rowCount ; i++ ) { indexWriter.putInt( indexArray[i] ); } if ( ! lengthMinMax.getMin().equals( lengthMinMax.getMax() ) ) { IWriteSupporter lengthWriter = lengthConverter.toWriteSuppoter( dicMap.size() , binaryRaw , META_LENGTH + nullLength + indexLength , lengthByteLength ); for ( int i = 0 ; i < dicMap.size(); i++ ) { lengthWriter.putInt( dicArray[i].length ); } } ByteBuffer valueBuffer = ByteBuffer.wrap( binaryRaw , META_LENGTH + nullLength + indexLength + lengthByteLength , totalLength ); for ( int i = 0 ; i < dicMap.size() ; i++ ) { valueBuffer.put( dicArray[i] ); } CompressResult compressResult = compressResultNode.getCompressResult( this.getClass().getName() , "c0" , currentConfig.compressionPolicy , currentConfig.allowedRatio ); byte[] compressBinary = currentConfig.compressorClass.compress( binaryRaw , 0 , binaryRaw.length , compressResult ); int minCharLength = Character.BYTES * detemineMinMax.getMin().length(); int maxCharLength = Character.BYTES * detemineMinMax.getMax().length(); int headerSize = Integer.BYTES + minCharLength + Integer.BYTES + maxCharLength; byte[] binary = new byte[headerSize + compressBinary.length]; wrapBuffer = ByteBuffer.wrap( binary , 0 , binary.length ); ByteBuffer binaryWrapBuffer = ByteBuffer.wrap( binary ); binaryWrapBuffer.putInt( minCharLength ); binaryWrapBuffer.asCharBuffer().put( detemineMinMax.getMin() ); binaryWrapBuffer.position( binaryWrapBuffer.position() + minCharLength ); binaryWrapBuffer.putInt( maxCharLength ); binaryWrapBuffer.asCharBuffer().put( detemineMinMax.getMax() ); binaryWrapBuffer.position( binaryWrapBuffer.position() + maxCharLength ); binaryWrapBuffer.put( compressBinary ); return new ColumnBinary( this.getClass().getName() , currentConfig.compressorClass.getClass().getName() , column.getColumnName() , column.getColumnType() , column.size() , binaryRaw.length , logicalDataLength , dicMap.size() , binary , 0 , binary.length , null ); } @Override public int calcBinarySize( final IColumnAnalizeResult analizeResult ) { StringColumnAnalizeResult stringAnalizeResult = (StringColumnAnalizeResult)analizeResult; int startIndex = analizeResult.getRowStart(); int maxIndex = analizeResult.getRowEnd(); int nullCount = analizeResult.getNullCount() - startIndex; int notNullCount = analizeResult.getRowCount(); int nullLength = NullBinaryEncoder.getBinarySize( nullCount , notNullCount , maxIndex , maxIndex ); NumberToBinaryUtils.IIntConverter indexConverter = NumberToBinaryUtils.getIntConverter( 0 , analizeResult.getUniqCount() ); int indexLength = indexConverter.calcBinarySize( notNullCount ); int minLength = stringAnalizeResult.getMinUtf8Bytes(); int maxLength = stringAnalizeResult.getMaxUtf8Bytes(); int lengthBinaryLength = 0; NumberToBinaryUtils.IIntConverter lengthConverter = NumberToBinaryUtils.getIntConverter( minLength , maxLength ); if ( ! ( minLength == maxLength ) ) { lengthBinaryLength = lengthConverter.calcBinarySize( notNullCount ); } int dicLength = stringAnalizeResult.getUniqUtf8ByteSize(); return META_LENGTH + nullLength + indexLength + lengthBinaryLength + dicLength; } @Override public IColumn toColumn( final ColumnBinary columnBinary ) throws IOException { ByteBuffer wrapBuffer = ByteBuffer.wrap( columnBinary.binary , columnBinary.binaryStart , columnBinary.binaryLength ); int minLength = wrapBuffer.getInt(); char[] minCharArray = new char[minLength / Character.BYTES]; wrapBuffer.asCharBuffer().get( minCharArray ); wrapBuffer.position( wrapBuffer.position() + minLength ); int maxLength = wrapBuffer.getInt(); char[] maxCharArray = new char[maxLength / Character.BYTES]; wrapBuffer.asCharBuffer().get( maxCharArray ); wrapBuffer.position( wrapBuffer.position() + maxLength ); String min = new String( minCharArray ); String max = new String( maxCharArray ); int headerSize = Integer.BYTES + minLength + Integer.BYTES + maxLength; return new HeaderIndexLazyColumn( columnBinary.columnName , columnBinary.columnType , new ColumnManager( columnBinary , columnBinary.binaryStart + headerSize , columnBinary.binaryLength - headerSize ) , new RangeStringIndex( min , max ) ); } @Override public void loadInMemoryStorage( final ColumnBinary columnBinary , final IMemoryAllocator allocator ) throws IOException { ByteBuffer rawBuffer = ByteBuffer.wrap( columnBinary.binary , columnBinary.binaryStart , columnBinary.binaryLength ); int minBinaryLength = rawBuffer.getInt(); rawBuffer.position( rawBuffer.position() + minBinaryLength ); int maxBinaryLength = rawBuffer.getInt(); rawBuffer.position( rawBuffer.position() + maxBinaryLength ); int headerSize = Integer.BYTES + minBinaryLength + Integer.BYTES + maxBinaryLength; ICompressor compressor = FindCompressor.get( columnBinary.compressorClassName ); byte[] binary = compressor.decompress( columnBinary.binary , columnBinary.binaryStart + headerSize , columnBinary.binaryLength - headerSize ); ByteBuffer wrapBuffer = ByteBuffer.wrap( binary , 0 , binary.length ); ByteOrder order = wrapBuffer.get() == (byte)0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN; int startIndex = wrapBuffer.getInt(); final int rowCount = wrapBuffer.getInt(); int minLength = wrapBuffer.getInt(); int maxLength = wrapBuffer.getInt(); int dicSize = wrapBuffer.getInt(); int nullLength = wrapBuffer.getInt(); int indexLength = wrapBuffer.getInt(); int lengthBinaryLength = wrapBuffer.getInt(); int dicLength = binary.length - META_LENGTH - nullLength - indexLength - lengthBinaryLength; boolean[] isNullArray = NullBinaryEncoder.toIsNullArray( binary , META_LENGTH , nullLength ); IReadSupporter lengthReader; if ( minLength == maxLength ) { lengthReader = NumberToBinaryUtils.getFixedIntConverter( minLength ); } else { NumberToBinaryUtils.IIntConverter lengthConverter = NumberToBinaryUtils.getIntConverter( minLength , maxLength ); lengthReader = lengthConverter.toReadSupporter( binary , META_LENGTH + nullLength + indexLength , lengthBinaryLength ); } IDictionary dic = allocator.createDictionary( dicSize ); int currentStart = META_LENGTH + nullLength + indexLength + lengthBinaryLength; for ( int i = 0 ; i < dicSize ; i++ ) { int currentLength = lengthReader.getInt(); dic.setBytes( i , binary , currentStart , currentLength ); currentStart += currentLength; } allocator.setValueCount( startIndex + isNullArray.length ); NumberToBinaryUtils.IIntConverter indexConverter = NumberToBinaryUtils.getIntConverter( 0 , dicSize ); IReadSupporter indexReader = indexConverter.toReadSupporter( binary , META_LENGTH + nullLength , indexLength ); for ( int i = 0; i < startIndex ; i++ ) { allocator.setNull( i ); } for ( int i = 0 ; i < isNullArray.length ; i++ ) { if ( isNullArray[i] ) { allocator.setNull( i + startIndex ); } else { allocator.setFromDictionary( i + startIndex , indexReader.getInt() , dic ); } } } @Override public void setBlockIndexNode( final BlockIndexNode parentNode , final ColumnBinary columnBinary , final int spreadIndex ) throws IOException { ByteBuffer wrapBuffer = ByteBuffer.wrap( columnBinary.binary , columnBinary.binaryStart , columnBinary.binaryLength ); int minLength = wrapBuffer.getInt(); char[] minCharArray = new char[minLength / Character.BYTES]; wrapBuffer.asCharBuffer().get( minCharArray ); wrapBuffer.position( wrapBuffer.position() + minLength ); int maxLength = wrapBuffer.getInt(); char[] maxCharArray = new char[maxLength / Character.BYTES]; wrapBuffer.asCharBuffer().get( maxCharArray ); wrapBuffer.position( wrapBuffer.position() + maxLength ); String min = new String( minCharArray ); String max = new String( maxCharArray ); BlockIndexNode currentNode = parentNode.getChildNode( columnBinary.columnName ); currentNode.setBlockIndex( new StringRangeBlockIndex( min , max ) ); } public class ColumnManager implements IColumnManager { private final ColumnBinary columnBinary; private final int binaryStart; private final int binaryLength; private PrimitiveColumn column; private boolean isCreate; /** * Init. **/ public ColumnManager( final ColumnBinary columnBinary , final int binaryStart , final int binaryLength ) { this.columnBinary = columnBinary; this.binaryStart = binaryStart; this.binaryLength = binaryLength; } private void create() throws IOException { if ( isCreate ) { return; } ICompressor compressor = FindCompressor.get( columnBinary.compressorClassName ); byte[] binary = compressor.decompress( columnBinary.binary , binaryStart , binaryLength ); ByteBuffer wrapBuffer = ByteBuffer.wrap( binary , 0 , binary.length ); ByteOrder order = wrapBuffer.get() == (byte)0 ? ByteOrder.BIG_ENDIAN : ByteOrder.LITTLE_ENDIAN; final int startIndex = wrapBuffer.getInt(); final int rowCount = wrapBuffer.getInt(); int minLength = wrapBuffer.getInt(); int maxLength = wrapBuffer.getInt(); int dicSize = wrapBuffer.getInt(); int nullLength = wrapBuffer.getInt(); int indexLength = wrapBuffer.getInt(); int lengthByteLength = wrapBuffer.getInt(); int dicLength = binary.length - META_LENGTH - nullLength - indexLength - lengthByteLength; NumberToBinaryUtils.IIntConverter indexConverter = NumberToBinaryUtils.getIntConverter( 0 , dicSize ); boolean[] isNullArray = NullBinaryEncoder.toIsNullArray( binary , META_LENGTH , nullLength ); IReadSupporter indexReader = indexConverter.toReadSupporter( binary , META_LENGTH + nullLength , indexLength ); int[] indexArray = new int[isNullArray.length]; for ( int i = 0 ; i < indexArray.length ; i++ ) { if ( ! isNullArray[i] ) { indexArray[i] = indexReader.getInt(); } } IReadSupporter lengthReader; if ( minLength == maxLength ) { lengthReader = NumberToBinaryUtils.getFixedIntConverter( minLength ); } else { NumberToBinaryUtils.IIntConverter lengthConverter = NumberToBinaryUtils.getIntConverter( minLength , maxLength ); lengthReader = lengthConverter.toReadSupporter( binary , META_LENGTH + nullLength + indexLength , lengthByteLength ); } PrimitiveObject[] dicArray = new Utf8BytesLinkObj[ dicSize ]; int currentStart = META_LENGTH + nullLength + indexLength + lengthByteLength; for ( int i = 0 ; i < dicArray.length ; i++ ) { int currentLength = lengthReader.getInt(); dicArray[i] = new Utf8BytesLinkObj( binary , currentStart , currentLength ); currentStart += currentLength; } column = new PrimitiveColumn( columnBinary.columnType , columnBinary.columnName ); column.setCellManager( new OptimizedNullArrayDicCellManager( columnBinary.columnType , startIndex , isNullArray , indexArray , dicArray ) ); isCreate = true; } @Override public IColumn get() { if ( ! isCreate ) { try { create(); } catch ( IOException ex ) { throw new UncheckedIOException( ex ); } } return column; } @Override public List getColumnKeys() { return new ArrayList(); } @Override public int getColumnSize() { return 0; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy