org.apache.druid.frame.read.columnar.StringFrameColumnReader Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of druid-processing Show documentation
Show all versions of druid-processing Show documentation
A module that is everything required to understands Druid Segments
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.frame.read.columnar;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.primitives.Ints;
import it.unimi.dsi.fastutil.objects.ObjectArrays;
import org.apache.datasketches.memory.Memory;
import org.apache.druid.common.config.NullHandling;
import org.apache.druid.error.DruidException;
import org.apache.druid.frame.Frame;
import org.apache.druid.frame.read.FrameReaderUtils;
import org.apache.druid.frame.write.FrameWriterUtils;
import org.apache.druid.frame.write.columnar.FrameColumnWriters;
import org.apache.druid.frame.write.columnar.StringFrameColumnWriter;
import org.apache.druid.java.util.common.ISE;
import org.apache.druid.java.util.common.StringUtils;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.query.rowsandcols.column.Column;
import org.apache.druid.query.rowsandcols.column.ColumnAccessorBasedColumn;
import org.apache.druid.query.rowsandcols.column.accessor.ObjectColumnAccessorBase;
import org.apache.druid.segment.BaseSingleValueDimensionSelector;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionDictionarySelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.DimensionSelectorUtils;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.RangeIndexedInts;
import org.apache.druid.segment.data.ReadableOffset;
import org.apache.druid.segment.vector.MultiValueDimensionVectorSelector;
import org.apache.druid.segment.vector.ReadableVectorInspector;
import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorObjectSelector;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
/**
* Reader for {@link StringFrameColumnWriter}, types {@link ColumnType#STRING} and {@link ColumnType#STRING_ARRAY}.
*/
public class StringFrameColumnReader implements FrameColumnReader
{
private final int columnNumber;
private final boolean asArray;
/**
* Create a new reader.
*
* @param columnNumber column number
* @param asArray true for {@link ColumnType#STRING_ARRAY}, false for {@link ColumnType#STRING}
*/
StringFrameColumnReader(int columnNumber, boolean asArray)
{
this.columnNumber = columnNumber;
this.asArray = asArray;
}
@Override
public Column readRACColumn(Frame frame)
{
final Memory memory = frame.region(columnNumber);
validate(memory);
if (isMultiValue(memory)) {
// When we implement handling of multi-value, we should actually make this look like an Array of String instead
// of perpetuating the multi-value idea. Thus, when we add support for Arrays to the RAC stuff, that's when
// we can start supporting multi-value.
throw new ISE("Multivalue not yet handled by RAC");
}
final long positionOfLengths = getStartOfStringLengthSection(frame.numRows(), false);
final long positionOfPayloads = getStartOfStringDataSection(memory, frame.numRows(), false);
StringFrameColumn frameCol =
new StringFrameColumn(frame, false, memory, positionOfLengths, positionOfPayloads, false);
return new ColumnAccessorBasedColumn(frameCol);
}
@Override
public ColumnPlus readColumn(final Frame frame)
{
final Memory memory = frame.region(columnNumber);
validate(memory);
final boolean multiValue = isMultiValue(memory);
final long startOfStringLengthSection = getStartOfStringLengthSection(frame.numRows(), multiValue);
final long startOfStringDataSection = getStartOfStringDataSection(memory, frame.numRows(), multiValue);
final BaseColumn baseColumn;
if (asArray) {
baseColumn = new StringArrayFrameColumn(
frame,
multiValue,
memory,
startOfStringLengthSection,
startOfStringDataSection
);
} else {
baseColumn = new StringFrameColumn(
frame,
multiValue,
memory,
startOfStringLengthSection,
startOfStringDataSection,
false
);
}
return new ColumnPlus(
baseColumn,
new ColumnCapabilitiesImpl().setType(asArray ? ColumnType.STRING_ARRAY : ColumnType.STRING)
.setHasMultipleValues(!asArray && multiValue)
.setDictionaryEncoded(false)
.setHasBitmapIndexes(false)
.setHasSpatialIndexes(false)
.setHasNulls(ColumnCapabilities.Capable.UNKNOWN),
frame.numRows()
);
}
private void validate(final Memory region)
{
// Check if column is big enough for a header
if (region.getCapacity() < StringFrameColumnWriter.DATA_OFFSET) {
throw DruidException.defensive("Column[%s] is not big enough for a header", columnNumber);
}
final byte typeCode = region.getByte(0);
final byte expectedTypeCode = asArray ? FrameColumnWriters.TYPE_STRING_ARRAY : FrameColumnWriters.TYPE_STRING;
if (typeCode != expectedTypeCode) {
throw DruidException.defensive(
"Column[%s] does not have the correct type code; expected[%s], got[%s]",
columnNumber,
expectedTypeCode,
typeCode
);
}
}
private static boolean isMultiValue(final Memory memory)
{
return memory.getByte(1) == 1;
}
/**
* Returns cumulative row length, if the row is not null itself, or -(cumulative row length) - 1 if the row is
* null itself.
*
* To check if the return value from this function indicate a null row, use {@link #isNullRow(int)}
*
* To get the actual cumulative row length, use {@link #adjustCumulativeRowLength(int)}.
*/
private static int getCumulativeRowLength(final Memory memory, final int physicalRow)
{
// Note: only valid to call this if multiValue = true.
return memory.getInt(StringFrameColumnWriter.DATA_OFFSET + (long) Integer.BYTES * physicalRow);
}
/**
* When given a return value from {@link #getCumulativeRowLength(Memory, int)}, returns whether the row is
* null itself (i.e. a null array).
*/
private static boolean isNullRow(final int cumulativeRowLength)
{
return cumulativeRowLength < 0;
}
/**
* Adjusts a negative cumulative row length from {@link #getCumulativeRowLength(Memory, int)} to be the actual
* positive length.
*/
private static int adjustCumulativeRowLength(final int cumulativeRowLength)
{
if (cumulativeRowLength < 0) {
return -(cumulativeRowLength + 1);
} else {
return cumulativeRowLength;
}
}
private static long getStartOfStringLengthSection(
final int numRows,
final boolean multiValue
)
{
if (multiValue) {
return StringFrameColumnWriter.DATA_OFFSET + (long) Integer.BYTES * numRows;
} else {
return StringFrameColumnWriter.DATA_OFFSET;
}
}
private static long getStartOfStringDataSection(
final Memory memory,
final int numRows,
final boolean multiValue
)
{
final int totalNumValues;
if (multiValue) {
totalNumValues = adjustCumulativeRowLength(getCumulativeRowLength(memory, numRows - 1));
} else {
totalNumValues = numRows;
}
return getStartOfStringLengthSection(numRows, multiValue) + (long) Integer.BYTES * totalNumValues;
}
@VisibleForTesting
static class StringFrameColumn extends ObjectColumnAccessorBase implements DictionaryEncodedColumn
{
private final Frame frame;
private final Memory memory;
private final long startOfStringLengthSection;
private final long startOfStringDataSection;
/**
* Whether the column is stored in multi-value format.
*/
private final boolean multiValue;
/**
* Whether the column is being read as {@link ColumnType#STRING_ARRAY} (true) or {@link ColumnType#STRING} (false).
*/
private final boolean asArray;
private StringFrameColumn(
Frame frame,
boolean multiValue,
Memory memory,
long startOfStringLengthSection,
long startOfStringDataSection,
final boolean asArray
)
{
this.frame = frame;
this.multiValue = multiValue;
this.memory = memory;
this.startOfStringLengthSection = startOfStringLengthSection;
this.startOfStringDataSection = startOfStringDataSection;
this.asArray = asArray;
}
@Override
public boolean hasMultipleValues()
{
// Only used in segment tests that don't run on frames.
throw new UnsupportedOperationException();
}
@Override
public int getSingleValueRow(int rowNum)
{
// Only used in segment tests that don't run on frames.
throw new UnsupportedOperationException();
}
@Override
public IndexedInts getMultiValueRow(int rowNum)
{
// Only used in segment tests that don't run on frames.
throw new UnsupportedOperationException();
}
@Nullable
@Override
public String lookupName(int id)
{
// Only used on columns from segments, not frames.
throw new UnsupportedOperationException();
}
@Override
public int lookupId(String name)
{
// Only used on columns from segments, not frames.
throw new UnsupportedOperationException();
}
@Override
public int getCardinality()
{
return DimensionDictionarySelector.CARDINALITY_UNKNOWN;
}
@Override
public DimensionSelector makeDimensionSelector(ReadableOffset offset, @Nullable ExtractionFn extractionFn)
{
if (asArray) {
throw new ISE("Cannot call makeDimensionSelector on field of type [%s]", ColumnType.STRING_ARRAY);
}
return makeDimensionSelectorInternal(offset, extractionFn);
}
@Override
public SingleValueDimensionVectorSelector makeSingleValueDimensionVectorSelector(ReadableVectorOffset offset)
{
// Callers should use object selectors, because we have no dictionary.
throw new UnsupportedOperationException();
}
@Override
public MultiValueDimensionVectorSelector makeMultiValueDimensionVectorSelector(ReadableVectorOffset vectorOffset)
{
// Callers should use object selectors, because we have no dictionary.
throw new UnsupportedOperationException();
}
@Override
public VectorObjectSelector makeVectorObjectSelector(final ReadableVectorOffset offset)
{
class StringFrameVectorObjectSelector implements VectorObjectSelector
{
private final Object[] vector = new Object[offset.getMaxVectorSize()];
private int id = ReadableVectorInspector.NULL_ID;
@Override
public Object[] getObjectVector()
{
computeVectorIfNeeded();
return vector;
}
@Override
public int getMaxVectorSize()
{
return offset.getMaxVectorSize();
}
@Override
public int getCurrentVectorSize()
{
return offset.getCurrentVectorSize();
}
private void computeVectorIfNeeded()
{
if (id == offset.getId()) {
return;
}
if (offset.isContiguous()) {
final int start = offset.getStartOffset();
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
final int physicalRow = frame.physicalRow(i + start);
vector[i] = getRowAsObject(physicalRow, true);
}
} else {
final int[] offsets = offset.getOffsets();
for (int i = 0; i < offset.getCurrentVectorSize(); i++) {
final int physicalRow = frame.physicalRow(offsets[i]);
vector[i] = getRowAsObject(physicalRow, true);
}
}
id = offset.getId();
}
}
return new StringFrameVectorObjectSelector();
}
@Override
public int length()
{
return frame.numRows();
}
@Override
public void close()
{
// Do nothing.
}
@Override
public ColumnType getType()
{
return asArray ? ColumnType.STRING_ARRAY : ColumnType.STRING;
}
@Override
public int numRows()
{
return length();
}
@Override
protected Object getVal(int rowNum)
{
return getString(frame.physicalRow(rowNum));
}
@Override
protected Comparator