Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.druid.segment.virtual;
import com.fasterxml.jackson.annotation.JsonCreator;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.primitives.Doubles;
import org.apache.druid.common.guava.GuavaUtils;
import org.apache.druid.java.util.common.IAE;
import org.apache.druid.java.util.common.Numbers;
import org.apache.druid.math.expr.Evals;
import org.apache.druid.math.expr.ExprEval;
import org.apache.druid.math.expr.ExpressionType;
import org.apache.druid.query.cache.CacheKeyBuilder;
import org.apache.druid.query.dimension.DimensionSpec;
import org.apache.druid.query.extraction.ExtractionFn;
import org.apache.druid.query.filter.ColumnIndexSelector;
import org.apache.druid.query.filter.DruidPredicateFactory;
import org.apache.druid.query.filter.ValueMatcher;
import org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector;
import org.apache.druid.segment.BaseSingleValueDimensionSelector;
import org.apache.druid.segment.ColumnInspector;
import org.apache.druid.segment.ColumnSelector;
import org.apache.druid.segment.ColumnSelectorFactory;
import org.apache.druid.segment.ColumnValueSelector;
import org.apache.druid.segment.DimensionSelector;
import org.apache.druid.segment.IdLookup;
import org.apache.druid.segment.NilColumnValueSelector;
import org.apache.druid.segment.VirtualColumn;
import org.apache.druid.segment.column.BaseColumn;
import org.apache.druid.segment.column.ColumnCapabilities;
import org.apache.druid.segment.column.ColumnCapabilitiesImpl;
import org.apache.druid.segment.column.ColumnHolder;
import org.apache.druid.segment.column.ColumnIndexSupplier;
import org.apache.druid.segment.column.ColumnType;
import org.apache.druid.segment.column.DictionaryEncodedColumn;
import org.apache.druid.segment.column.NumericColumn;
import org.apache.druid.segment.column.Types;
import org.apache.druid.segment.column.ValueType;
import org.apache.druid.segment.column.ValueTypes;
import org.apache.druid.segment.data.IndexedInts;
import org.apache.druid.segment.data.ReadableOffset;
import org.apache.druid.segment.nested.CompressedNestedDataComplexColumn;
import org.apache.druid.segment.nested.NestedCommonFormatColumn;
import org.apache.druid.segment.nested.NestedDataComplexColumn;
import org.apache.druid.segment.nested.NestedDataComplexTypeSerde;
import org.apache.druid.segment.nested.NestedFieldDictionaryEncodedColumn;
import org.apache.druid.segment.nested.NestedPathArrayElement;
import org.apache.druid.segment.nested.NestedPathFinder;
import org.apache.druid.segment.nested.NestedPathPart;
import org.apache.druid.segment.nested.StructuredData;
import org.apache.druid.segment.nested.VariantColumn;
import org.apache.druid.segment.serde.NoIndexesColumnIndexSupplier;
import org.apache.druid.segment.vector.BaseDoubleVectorValueSelector;
import org.apache.druid.segment.vector.BaseFloatVectorValueSelector;
import org.apache.druid.segment.vector.BaseLongVectorValueSelector;
import org.apache.druid.segment.vector.NilVectorSelector;
import org.apache.druid.segment.vector.ReadableVectorInspector;
import org.apache.druid.segment.vector.ReadableVectorOffset;
import org.apache.druid.segment.vector.SingleValueDimensionVectorSelector;
import org.apache.druid.segment.vector.VectorObjectSelector;
import org.apache.druid.segment.vector.VectorValueSelector;
import javax.annotation.Nullable;
import java.nio.ByteBuffer;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
import java.util.Set;
/**
* Optimized virtual column that can make direct selectors into a {@link NestedDataComplexColumn} or any associated
* nested fields ({@link NestedFieldDictionaryEncodedColumn}) including using
* their indexes.
*
* This virtual column is used for the SQL operators JSON_VALUE (if {@link #processFromRaw} is set to false) or
* JSON_QUERY (if it is true), and accepts 'JSONPath' or 'jq' syntax string representations of paths, or a parsed
* list of {@link NestedPathPart} in order to determine what should be selected from the column.
*
* Type information for nested fields is completely absent in the SQL planner, so it guesses the best it can to set
* {@link #expectedType} from the context of how something is being used, e.g. an aggregators default type or an
* explicit cast, or, if using the 'RETURNING' syntax which explicitly specifies type. This might not be the same as
* if it had actual type information, but, we try to stick with whatever we chose there to do the best we can for now.
*
* Since {@link #capabilities(ColumnInspector, String)} is determined by the {@link #expectedType}, the results will
* be best effor cast to the expected type if the column is not natively the expected type so that this column can
* fulfill the contract of the type of selector that is likely to be created to read this column.
*/
public class NestedFieldVirtualColumn implements VirtualColumn
{
private final String columnName;
private final String outputName;
@Nullable
private final ColumnType expectedType;
private final List parts;
private final boolean processFromRaw;
private final boolean hasNegativeArrayIndex;
@JsonCreator
public NestedFieldVirtualColumn(
@JsonProperty("columnName") String columnName,
@JsonProperty("outputName") String outputName,
@JsonProperty("expectedType") @Nullable ColumnType expectedType,
@JsonProperty("pathParts") @Nullable List parts,
@JsonProperty("processFromRaw") @Nullable Boolean processFromRaw,
@JsonProperty("path") @Nullable String path,
@JsonProperty("useJqSyntax") @Nullable Boolean useJqSyntax
)
{
this.columnName = columnName;
this.outputName = outputName;
if (path != null) {
Preconditions.checkArgument(parts == null, "Cannot define both 'path' and 'pathParts'");
} else if (parts == null) {
throw new IllegalArgumentException("Must define exactly one of 'path' or 'pathParts'");
}
if (parts != null) {
this.parts = parts;
} else {
boolean isInputJq = useJqSyntax != null && useJqSyntax;
this.parts = isInputJq ? NestedPathFinder.parseJqPath(path) : NestedPathFinder.parseJsonPath(path);
}
boolean hasNegative = false;
for (NestedPathPart part : this.parts) {
if (part instanceof NestedPathArrayElement) {
NestedPathArrayElement elementPart = (NestedPathArrayElement) part;
if (elementPart.getIndex() < 0) {
hasNegative = true;
break;
}
}
}
this.hasNegativeArrayIndex = hasNegative;
this.expectedType = expectedType;
this.processFromRaw = processFromRaw == null ? false : processFromRaw;
}
@VisibleForTesting
public NestedFieldVirtualColumn(
String columnName,
String path,
String outputName
)
{
this(columnName, outputName, null, null, null, path, false);
}
@VisibleForTesting
public NestedFieldVirtualColumn(
String columnName,
String path,
String outputName,
@Nullable ColumnType expectedType
)
{
this(columnName, outputName, expectedType, null, null, path, false);
}
@Override
public byte[] getCacheKey()
{
final String partsString = NestedPathFinder.toNormalizedJsonPath(parts);
return new CacheKeyBuilder(VirtualColumnCacheHelper.CACHE_TYPE_ID_USER_DEFINED).appendString("nested-field")
.appendString(outputName)
.appendString(columnName)
.appendString(partsString)
.appendBoolean(processFromRaw)
.build();
}
@JsonProperty
@Override
public String getOutputName()
{
return outputName;
}
@JsonProperty
public String getColumnName()
{
return columnName;
}
@JsonProperty("pathParts")
public List getPathParts()
{
return parts;
}
@JsonProperty
public ColumnType getExpectedType()
{
return expectedType;
}
@JsonProperty
public boolean isProcessFromRaw()
{
return processFromRaw;
}
@Override
public DimensionSelector makeDimensionSelector(
DimensionSpec dimensionSpec,
ColumnSelectorFactory factory
)
{
// this dimension selector is used for realtime queries, nested paths are not themselves dictionary encoded until
// written to segment, so we fall back to processing the structured data from a column value selector on the
// complex column
ColumnValueSelector> valueSelector = makeColumnValueSelector(dimensionSpec.getOutputName(), factory);
return dimensionSpec.decorate(new FieldDimensionSelector(valueSelector));
}
@Override
public ColumnValueSelector> makeColumnValueSelector(
String columnName,
ColumnSelectorFactory factory
)
{
// this column value selector is used for realtime queries, so we always process StructuredData
final ColumnValueSelector> baseSelector = factory.makeColumnValueSelector(this.columnName);
// processFromRaw is true that means JSON_QUERY, which can return partial results, otherwise this virtual column
// is JSON_VALUE which only returns literals, so use the literal value selector instead
return processFromRaw
? new RawFieldColumnSelector(baseSelector, parts)
: new RawFieldLiteralColumnValueSelector(baseSelector, parts);
}
@Nullable
@Override
public DimensionSelector makeDimensionSelector(
DimensionSpec dimensionSpec,
ColumnSelector columnSelector,
ReadableOffset offset
)
{
ColumnHolder holder = columnSelector.getColumnHolder(columnName);
if (holder == null) {
// column doesn't exist
return dimensionSpec.decorate(DimensionSelector.constant(null, dimensionSpec.getExtractionFn()));
}
if (hasNegativeArrayIndex) {
// negative array elements in a path expression mean that values should be fetched 'from the end' of the array
// if the path has negative array elements, then we have to use the 'raw' processing of the FieldDimensionSelector
// created with the column selector factory instead of using the optimized nested field column, return null
// to fall through
return null;
}
return dimensionSpec.decorate(makeDimensionSelectorUndecorated(holder, offset, dimensionSpec.getExtractionFn()));
}
private DimensionSelector makeDimensionSelectorUndecorated(
ColumnHolder holder,
ReadableOffset offset,
@Nullable ExtractionFn extractionFn
)
{
BaseColumn theColumn = holder.getColumn();
if (theColumn instanceof NestedDataComplexColumn) {
final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn;
return column.makeDimensionSelector(parts, offset, extractionFn);
}
// not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts
if (parts.isEmpty()) {
// dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat)
// nothing *should* be using a dimension selector to call the numeric getters, but just in case... wrap their
// selector in a "best effort" casting selector to implement them
if (theColumn instanceof DictionaryEncodedColumn) {
final DictionaryEncodedColumn> column = (DictionaryEncodedColumn>) theColumn;
return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, extractionFn));
}
// for non-dictionary encoded columns, wrap a value selector to make it appear as a dimension selector
return ValueTypes.makeNumericWrappingDimensionSelector(
holder.getCapabilities().getType(),
theColumn.makeColumnValueSelector(offset),
extractionFn
);
}
if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantColumn) {
final VariantColumn> arrayColumn = (VariantColumn>) theColumn;
ColumnValueSelector> arraySelector = arrayColumn.makeColumnValueSelector(offset);
final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex();
if (elementNumber < 0) {
throw new IAE("Cannot make array element selector, negative array index not supported");
}
return new BaseSingleValueDimensionSelector()
{
@Nullable
@Override
protected String getValue()
{
Object o = arraySelector.getObject();
if (o instanceof Object[]) {
Object[] array = (Object[]) o;
if (elementNumber < array.length) {
Object element = array[elementNumber];
if (element == null) {
return null;
}
return String.valueOf(element);
}
}
return null;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector)
{
arraySelector.inspectRuntimeShape(inspector);
}
};
}
// we are not a nested column and are being asked for a path that will never exist, so we are nil selector
return DimensionSelector.constant(null, extractionFn);
}
@Nullable
@Override
public ColumnValueSelector> makeColumnValueSelector(
String columnName,
ColumnSelector columnSelector,
ReadableOffset offset
)
{
ColumnHolder holder = columnSelector.getColumnHolder(this.columnName);
if (holder == null) {
return NilColumnValueSelector.instance();
}
BaseColumn theColumn = holder.getColumn();
if (processFromRaw || hasNegativeArrayIndex) {
// if the path has negative array elements, or has set the flag to process 'raw' values explicitly (JSON_QUERY),
// then we use the 'raw' processing of the RawFieldColumnSelector/RawFieldLiteralColumnValueSelector created
// with the column selector factory instead of using the optimized nested field column
return null;
}
// "JSON_VALUE", which only returns literals, on a NestedDataComplexColumn, so we can use the fields value selector
if (theColumn instanceof NestedDataComplexColumn) {
final NestedDataComplexColumn column = (NestedDataComplexColumn) theColumn;
return column.makeColumnValueSelector(parts, offset);
}
// not a nested column, but we can still do stuff if the path is the 'root', indicated by an empty path parts
if (parts.isEmpty()) {
// dictionary encoded columns do not typically implement the value selector methods (getLong, getDouble, getFloat)
// so we want to wrap their selector in a "best effort" casting selector to implement them
if (theColumn instanceof DictionaryEncodedColumn && !(theColumn instanceof VariantColumn)) {
final DictionaryEncodedColumn> column = (DictionaryEncodedColumn>) theColumn;
return new BestEffortCastingValueSelector(column.makeDimensionSelector(offset, null));
}
// otherwise it is probably cool to pass through the value selector directly, if numbers make sense the selector
// very likely implemented them, and everyone implements getObject if not
return theColumn.makeColumnValueSelector(offset);
}
if (parts.size() == 1 && parts.get(0) instanceof NestedPathArrayElement && theColumn instanceof VariantColumn) {
final VariantColumn> arrayColumn = (VariantColumn>) theColumn;
ColumnValueSelector> arraySelector = arrayColumn.makeColumnValueSelector(offset);
final int elementNumber = ((NestedPathArrayElement) parts.get(0)).getIndex();
if (elementNumber < 0) {
throw new IAE("Cannot make array element selector, negative array index not supported");
}
return new ColumnValueSelector